-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Added cargo-fuzz tests for Rlp crate #6145
Changes from 16 commits
1bb6244
2ae9599
5d00521
d059d69
e2799de
40dc411
17bd401
5373ba4
34fb2bc
3766434
fbc3436
728e3e1
c3d80a8
5b84221
25b74cd
e3151fb
3786e22
a7f2279
0d23592
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
target | ||
corpus | ||
artifacts |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
[package] | ||
name = "rlp-fuzz" | ||
version = "0.0.1" | ||
authors = ["Parity Technologies <admin@parity.io>"] | ||
publish = false | ||
build = "build.rs" | ||
|
||
[package.metadata] | ||
cargo-fuzz = true | ||
|
||
[features] | ||
nightly = [] | ||
|
||
[dependencies] | ||
ethcore-bigint = { path = "../../bigint" } | ||
rand = "0.3" | ||
|
||
[build-dependencies] | ||
rustc_version = "0.2.1" | ||
|
||
[dependencies.rlp] | ||
path = ".." | ||
[dependencies.libfuzzer-sys] | ||
git = "https://github.com/rust-fuzz/libfuzzer-sys.git" | ||
|
||
# Prevent this from interfering with workspaces | ||
[workspace] | ||
members = ["."] | ||
|
||
[[bin]] | ||
name = "untrusted" | ||
path = "fuzz_targets/untrusted.rs" | ||
|
||
[[bin]] | ||
name = "append_raw" | ||
path = "fuzz_targets/append_raw.rs" | ||
|
||
[[bin]] | ||
name = "untrusted_data" | ||
path = "fuzz_targets/untrusted_data.rs" | ||
|
||
[[bin]] | ||
name = "gen_untrusted_corpus" | ||
path = "gen_untrusted_corpus.rs" | ||
|
||
[[bin]] | ||
name = "untrusted_std" | ||
path = "fuzz_targets/untrusted_std.rs" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#Fuzzing crates with cargo-fuzz | ||
|
||
##Installation | ||
Install cargo-fuzz using cargo: | ||
- `cargo install cargo-fuzz` | ||
|
||
##Initializing fuzz tests | ||
Change into the target crate's base directory, e.g.: | ||
- `cd parity/util/rlp` | ||
|
||
Initialize the fuzz directory: | ||
- `cargo fuzz init` | ||
- `cargo fuzz init -t named_first_test` | ||
|
||
Fuzz tests stored under `./fuzz/fuzz_targets/*.rs` | ||
|
||
##Adding tests | ||
Add a test named `anyname` under `fuzz/fuzz_targets/anyname.rs`: | ||
- `cargo fuzz add anyname` | ||
|
||
This also adds an entry to `fuzz/Cargo.toml` for creating a binary target | ||
with the test's name, e.g. | ||
``` | ||
[[bin]] | ||
name = "anyname" | ||
path = "fuzz_targets/anyname.rs" | ||
``` | ||
|
||
##Editing tests | ||
Make changes and import any necessary libraries to test target features | ||
- `vim ./fuzz/fuzz_targets/untrusted.rs` | ||
|
||
##Running tests | ||
While still in the crate's root directory, run the following: | ||
- `cargo fuzz run <test-name>` | ||
- `cargo fuzz run -j <num-jobs> <test-name>` for parallell jobs | ||
|
||
Examples and detailed information on libfuzzer output can be found [here](https://rust-fuzz.github.io/book/cargo-fuzz/tutorial.html) | ||
|
||
Efficient fuzzing tips from Google: [efficient fuzzing](https://chromium.googlesource.com/chromium/src/+/master/testing/libfuzzer/efficient_fuzzer.md) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
extern crate rustc_version; | ||
use rustc_version::{version, version_meta, Channel, Version}; | ||
|
||
fn main() { | ||
assert!(version().unwrap().major >= 1); | ||
|
||
match version_meta().unwrap().channel { | ||
Channel::Stable => { | ||
} | ||
Channel::Beta => { | ||
} | ||
Channel::Nightly => { | ||
println!("cargo:rustc-cfg=feature=\"nightly\"") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if let Channel::Nighlty = version_meta().unwrap().channel {
println!("cargo:rustc-cfg=feature=\"nightly\"")
} |
||
} | ||
Channel::Dev => { | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
"\x83" | ||
"\xc0" | ||
"\xc8\x83\x41\x41\x41\x83\x41\x41\x41" | ||
"\x0f" | ||
"\x04\x00" | ||
"\xc7\xc0\xc1\xc0\xc3\xc0\xc1\xc0" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#![no_main] | ||
#![cfg(feature = "nightly")] | ||
#[macro_use] extern crate libfuzzer_sys; | ||
extern crate rlp; | ||
extern crate rand; | ||
|
||
use rlp::RlpStream; | ||
use rand::Rng; | ||
|
||
fuzz_target!(|data: &[u8]| { | ||
let mut rls = RlpStream::new(); | ||
|
||
// Generate random item count to further fuzz append_raw's parsing | ||
let mut rng = rand::thread_rng(); | ||
let rand_item_count = rng.gen_range::<usize>(1 as usize, 4096 as usize); | ||
|
||
let _ = rls.append_raw(data, rand_item_count); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
what do you expect to find by fuzz-testing this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. trying to get full fuzz-coverage of the classes in the rlp crate, and I hadn't considered where the data may be coming from. I agree it makes more sense to fully fuzz the UntrustedRlp struct, and am focusing more on that now that the foundational stuff is fixed. Also still becoming more familiar with the code base, another expectation/goal of writing fuzz tests for all the code. If for some reason or another, a developer decides to use a normally internal function/struct, it would be beneficial to know if that feature is buggy. |
||
}); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#![no_main] | ||
#![cfg(feature = "nightly")] | ||
#[macro_use] extern crate libfuzzer_sys; | ||
extern crate rlp; | ||
|
||
use rlp::UntrustedRlp; | ||
|
||
fuzz_target!(|data: &[u8]| { | ||
let _ = UntrustedRlp::new(data); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. creating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, what do you mean by all the lists? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RLP is a data format consisting mostly of nested lists, so you could try iterating over all of them recursively. This might give the fuzzer more to work with in terms of exploring code paths There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added another fuzz test that creates a list (prepending
So now I'm thinking of creating lists of various types from the fuzz data, and recursively calling What about taking variable length slices from the raw fuzz data, then using those as list items, prepended by their length? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
fn iter_recursive(rlp: UntrustedRlp) {
for x in rlp.iter() {
iter_recursive(x)
}
} this would essentially walk the whole RLP structure recursively There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this sounds like it requires building a minimally well-formed list in the RLP structure. is this what you are talking about building from the fuzz data? i don't fully understand your thinking on this. i get the code coverage, just trying to figure out guiding the fuzzer without constraining it too much. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. after reading through
Do you think throwing fuzz data into these list items will get the right code/crash coverage (using your recursive function to traverse the RLP structure)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the interesting thing about RLP is that actually, each item is either one of a few things:
This is why it's called "recursive length prefix". What I'm suggesting is that we have the fuzzer try and generate data which we treat as a bunch of nested lists of bytes (and try to walk accordingly). This will allow the fuzzer to attempt to explore almost all code paths in the decoder, possibly finding some well-formed RLP that leads to panic. Most of the data it generates will end up being invalid RLP, but it may find some valid RLPs which are not handled correctly. Isn't this the usual strategy with fuzz testing? e.g. if you have a PNG decoder, you try to have the fuzzer generate data that breaks the decoder, not to make the fuzzer generate a picture of a horse. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Currently, I'm using the Thanks for the link to the spec, I'll go over it again to get more ideas for what to throw at this thing. I'm understanding better how RLP really works now. Hopefully this leads to some really exhaustive fuzz code.
Absolutely agree. I'm trying to add more to the fuzzer corpus, so that the fuzzer's mutation algorithms can be more efficient in breaking the RLP decoder. Like sending in a PNG of a horse with a trojan encoded inside :) |
||
}); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#![no_main] | ||
#![cfg(feature = "nightly")] | ||
#[macro_use] extern crate libfuzzer_sys; | ||
extern crate rlp; | ||
|
||
use rlp::UntrustedRlp; | ||
|
||
fuzz_target!(|data: &[u8]| { | ||
let dlen = data.len(); | ||
let dvec = Vec::with_capacity(dlen + 1); | ||
|
||
// Initialize rlpstream data as a list | ||
dvec.push(0xc + dlen as u8); | ||
|
||
// Push data stream onto raw rlpstream data | ||
for d in data.iter() { | ||
dvec.push(d); | ||
} | ||
let urlp = UntrustedRlp::new(dvec.as_bytes()); | ||
|
||
// Internally calls BasicDecoder::payload_info(self.bytes) | ||
// Which will hit the code-path we're interested in fuzzing | ||
let _ = urlp.data(); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#![no_main] | ||
#![cfg(feature = "nightly")] | ||
#[macro_use] extern crate libfuzzer_sys; | ||
extern crate rlp; | ||
extern crate ethcore_bigint; | ||
|
||
use rlp::{DecoderError, UntrustedRlp}; | ||
use ethcore_bigint::prelude::{U128, U256, H64, H128, H160, H256, H512, H520, H2048}; | ||
|
||
fuzz_target!(|data: &[u8]| { | ||
// Create UntrustedRlp to build BasicDecoder | ||
let urlp = UntrustedRlp::new(&data); | ||
|
||
// Attempt to create panic by decoding | ||
let _: Result<u8, DecoderError> = urlp.as_val(); | ||
let _: Result<u16, DecoderError> = urlp.as_val(); | ||
let _: Result<u32, DecoderError> = urlp.as_val(); | ||
let _: Result<u64, DecoderError> = urlp.as_val(); | ||
let _: Result<U128, DecoderError> = urlp.as_val(); | ||
let _: Result<U256, DecoderError> = urlp.as_val(); | ||
|
||
let _: Result<H64, DecoderError> = urlp.as_val(); | ||
let _: Result<H128, DecoderError> = urlp.as_val(); | ||
let _: Result<H160, DecoderError> = urlp.as_val(); | ||
let _: Result<H256, DecoderError> = urlp.as_val(); | ||
let _: Result<H512, DecoderError> = urlp.as_val(); | ||
let _: Result<H520, DecoderError> = urlp.as_val(); | ||
let _: Result<H2048, DecoderError> = urlp.as_val(); | ||
|
||
let _: Result<Vec<u8>, DecoderError> = urlp.as_val(); | ||
let _: Result<Vec<u8>, DecoderError> = urlp.as_list(); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
extern crate rlp; | ||
extern crate ethcore_bigint; | ||
|
||
use std::env; | ||
use std::error::Error; | ||
use std::io::prelude::*; | ||
use std::path::PathBuf; | ||
use std::fs::File; | ||
use rlp::RlpStream; | ||
use ethcore_bigint::prelude::{U128, U256, H64, H128, H160, H256, H512, H520, H2048}; | ||
|
||
fn create_file(path: &PathBuf) -> File { | ||
match File::create(path.as_path()) { | ||
Err(why) => panic!("couldn't create {}: {}", | ||
path.display(), | ||
why.description()), | ||
Ok(file) => file | ||
} | ||
} | ||
|
||
fn write_to_file(f: &File, rlp: &RlpStream) { | ||
let mut g = f.clone(); | ||
match g.write_all(rlp.as_raw()) { | ||
Err(why) => { | ||
panic!("couldn't write to file: {}", why.description()) | ||
}, | ||
Ok(_) => println!("successfully wrote to file") | ||
} | ||
|
||
} | ||
|
||
fn write_corpus(corpdir: &str, rlp: &RlpStream) { | ||
// Read in base path to fuzzing corpus directory from `RLPCORPUS` environment var | ||
let corp = env::var("RLPCORPUS").unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is fuzzing corpus? does it need to be persistent? also:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
the fuzzing corpus starts as an initial set of valid input data, and cargo-fuzz mutates the initial data into a larger, potentially crash-causing set of input data. It needs to be somewhat persistent, but you could generate the corpus from scratch each time. The simplest way is to just run generating from scratch is computationally more expensive (depends on size of corpus), and some amount of state is lost in code penetration. cargo-fuzz also has a command
True, I wasn't sure on the preferred way to do this. I'll change the path to a constant in the test, and use relative path so its all maintained in the
|
||
let mut path = PathBuf::from(corp); | ||
path.push(corpdir); | ||
|
||
// Write RLP Stream to corpus file | ||
let f = create_file(&path); | ||
write_to_file(&f, rlp); | ||
} | ||
|
||
fn create_uint_stream() { | ||
// Create RLP Stream to encode values | ||
let mut rlp = RlpStream::new(); | ||
// U8, U16, U32, U64, U128, U256 bytes | ||
let u8b = 8 as u8; | ||
let u16b = 16 as u16; | ||
let u32b = 32 as u32; | ||
let u64b = 64 as u64; | ||
let u128b = U128::from(128); | ||
let u256b = U256::from(256); | ||
|
||
rlp.append(&u8b); | ||
rlp.append(&u16b); | ||
rlp.append(&u32b); | ||
rlp.append(&u64b); | ||
rlp.append(&u128b); | ||
rlp.append(&u256b); | ||
|
||
write_corpus("untrusted_data/well-formed-list-uint", &rlp); | ||
} | ||
|
||
fn create_uint_slice_stream() { | ||
let s0 = vec![0xc8, 0x83, b'c', b'a', b't', 0x83, b'd', b'o', b'g']; | ||
let s1 = String::from("second string"); | ||
|
||
let mut rlp = RlpStream::new(); | ||
rlp.append(&s0); | ||
rlp.append(&s1.as_bytes()); | ||
|
||
write_corpus("untrusted_data/well-formed-list-byte-slice", &rlp); | ||
} | ||
|
||
fn create_u8_vec_stream() { | ||
let v8: Vec<u8> = vec![41 as u8]; | ||
|
||
let mut rlp = RlpStream::new(); | ||
rlp.append(&v8); | ||
|
||
write_corpus("untrusted_data/well-formed-list-byte-vector", &rlp); | ||
} | ||
|
||
fn create_hash_stream() { | ||
// Create RLP Stream to encode values | ||
let mut rlp = RlpStream::new(); | ||
// H64, H128, H160, H256, H512, H520, H2048 list | ||
let h64 = H64::random(); | ||
let h128 = H128::random(); | ||
let h160 = H160::random(); | ||
let h256 = H256::random(); | ||
let h512 = H512::random(); | ||
let h520 = H520::random(); | ||
let h2048 = H2048::random(); | ||
|
||
|
||
rlp.append(&h64); | ||
rlp.append(&h128); | ||
rlp.append(&h160); | ||
rlp.append(&h256); | ||
rlp.append(&h512); | ||
rlp.append(&h520); | ||
rlp.append(&h2048); | ||
|
||
write_corpus(&"untrusted_data/well-formed-list-hash", &rlp); | ||
} | ||
|
||
fn main() { | ||
create_uint_stream(); | ||
create_uint_slice_stream(); | ||
create_hash_stream(); | ||
create_u8_vec_stream(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's the purpose of all the
RUSTC_IS_*
? can't you justif let Channel::Nightly = version_meta().unwrap() { /* set feature flag */ }
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you're right, I took this code from an example on using rustc_version (not super familiar with conditional compilation).
I'll remove the code from the other cases, and only set the feature flag for nightly