-
-
Notifications
You must be signed in to change notification settings - Fork 369
Dynamic deserialization proposal #861
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1727b00
d3ba8ae
e3f6098
d3521e1
7155437
cda248b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,20 @@ | ||
//! The `BytesInput` is the "normal" input, a map of bytes, that can be sent directly to the client | ||
//! (As opposed to other, more abstract, inputs, like an Grammar-Based AST Input) | ||
|
||
use alloc::{borrow::ToOwned, rc::Rc, string::String, vec::Vec}; | ||
use alloc::{borrow::ToOwned, boxed::Box, rc::Rc, string::String, vec::Vec}; | ||
use core::{cell::RefCell, convert::From, hash::Hasher}; | ||
#[cfg(feature = "std")] | ||
use std::{fs::File, io::Read, path::Path}; | ||
|
||
use ahash::AHasher; | ||
#[cfg(feature = "input_conversion")] | ||
use postcard::{de_flavors::Slice, Deserializer}; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
#[cfg(feature = "input_conversion")] | ||
use crate::inputs::ConvertibleInput; | ||
#[cfg(feature = "std")] | ||
use crate::{bolts::fs::write_file_atomic, Error}; | ||
use crate::{bolts::fs::write_file_atomic, bolts::AsSlice, Error}; | ||
use crate::{ | ||
bolts::{ownedref::OwnedSlice, HasLen}, | ||
inputs::{HasBytesVec, HasTargetBytes, Input}, | ||
|
@@ -24,6 +28,8 @@ pub struct BytesInput { | |
} | ||
|
||
impl Input for BytesInput { | ||
const NAME: &'static str = "BytesInput"; | ||
|
||
#[cfg(feature = "std")] | ||
/// Write this input to the file | ||
fn to_file<P>(&self, path: P) -> Result<(), Error> | ||
|
@@ -53,6 +59,24 @@ impl Input for BytesInput { | |
} | ||
} | ||
|
||
/// Dynamic deserialisation of any input type that has target bytes | ||
#[cfg(feature = "input_conversion")] | ||
pub fn target_bytes_to_bytes<I: HasTargetBytes + for<'a> Deserialize<'a>>( | ||
buf: &[u8], | ||
) -> Result<Box<dyn ConvertibleInput>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error> | ||
{ | ||
let orig: I = postcard::from_bytes(buf)?; | ||
Ok(Box::new(BytesInput { | ||
bytes: orig.target_bytes().as_slice().to_vec(), | ||
})) | ||
} | ||
|
||
#[cfg(feature = "input_conversion")] | ||
inventory::submit! { | ||
use crate::inputs::{GeneralizedInput, InputConversion}; | ||
InputConversion::new(GeneralizedInput::NAME, BytesInput::NAME, target_bytes_to_bytes::<GeneralizedInput>) | ||
} | ||
|
||
/// Rc Ref-cell from Input | ||
impl From<BytesInput> for Rc<RefCell<BytesInput>> { | ||
fn from(input: BytesInput) -> Self { | ||
|
@@ -105,3 +129,33 @@ impl BytesInput { | |
Self { bytes } | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use alloc::vec::Vec; | ||
|
||
use crate::{ | ||
bolts::AsSlice, | ||
inputs::{BytesInput, GeneralizedInput, HasTargetBytes, Input, NopInput}, | ||
}; | ||
|
||
#[test] | ||
fn deserialize_generalised_to_bytes() { | ||
let generalised = GeneralizedInput::new(b"hello".to_vec()); | ||
let mut buf = Vec::new(); | ||
generalised.serialize_dynamic(&mut buf).unwrap(); | ||
let bytes = BytesInput::deserialize_dynamic(&buf).unwrap().unwrap(); | ||
assert_eq!(bytes.target_bytes().as_slice(), b"hello"); | ||
} | ||
Comment on lines
+143
to
+149
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Example usage of the dynamic deserialisation. #858 |
||
|
||
#[test] | ||
fn failed_deserialize_from_nop() { | ||
// note that NopInput implements HasTargetBytes, but because we have not submitted the | ||
// conversion BytesInput cannot be converted from NopInput | ||
|
||
let nop = NopInput {}; | ||
let mut buf = Vec::new(); | ||
nop.serialize_dynamic(&mut buf).unwrap(); | ||
assert!(BytesInput::deserialize_dynamic(&buf).unwrap().is_none()); | ||
} | ||
Comment on lines
+152
to
+160
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comments here: if the deserialisation for this type cannot occur, it is skipped and not deserialised. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,15 +15,22 @@ pub use generalized::*; | |
#[cfg(feature = "nautilus")] | ||
pub mod nautilus; | ||
use alloc::{ | ||
boxed::Box, | ||
string::{String, ToString}, | ||
vec::Vec, | ||
}; | ||
use core::{clone::Clone, fmt::Debug}; | ||
use core::{ | ||
clone::Clone, | ||
fmt::{Debug, Formatter}, | ||
}; | ||
#[cfg(feature = "std")] | ||
use std::{fs::File, hash::Hash, io::Read, path::Path}; | ||
|
||
#[cfg(feature = "input_conversion")] | ||
use downcast_rs::{impl_downcast, Downcast}; | ||
#[cfg(feature = "nautilus")] | ||
pub use nautilus::*; | ||
use postcard::{de_flavors::Slice, Deserializer}; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
#[cfg(feature = "std")] | ||
|
@@ -33,6 +40,9 @@ use crate::{bolts::ownedref::OwnedSlice, Error}; | |
/// An input for the target | ||
#[cfg(not(feature = "std"))] | ||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { | ||
/// Name for this input type | ||
const NAME: &'static str; | ||
|
||
/// Write this input to the file | ||
fn to_file<P>(&self, _path: P) -> Result<(), Error> { | ||
Err(Error::not_implemented("Not supported in no_std")) | ||
|
@@ -52,7 +62,12 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { | |
|
||
/// An input for the target | ||
#[cfg(feature = "std")] | ||
pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { | ||
pub trait Input: | ||
Clone + ConvertibleInput + Serialize + serde::de::DeserializeOwned + Debug | ||
{ | ||
/// Name for this input type | ||
const NAME: &'static str; | ||
|
||
/// Write this input to the file | ||
fn to_file<P>(&self, path: P) -> Result<(), Error> | ||
where | ||
|
@@ -72,17 +87,106 @@ pub trait Input: Clone + Serialize + serde::de::DeserializeOwned + Debug { | |
Ok(postcard::from_bytes(&bytes)?) | ||
} | ||
|
||
/// Serializes this input to the dynamic serialisation format to pass between different fuzzers | ||
fn serialize_dynamic(&self, buf: &mut Vec<u8>) -> Result<(), postcard::Error> { | ||
buf.extend_from_slice(postcard::to_allocvec(Self::NAME)?.as_slice()); | ||
buf.extend_from_slice(postcard::to_allocvec(self)?.as_slice()); | ||
Ok(()) | ||
} | ||
|
||
/// Deserializes this input type from the dynamic serialization format, if possible | ||
fn deserialize_dynamic( | ||
buf: &[u8], | ||
) -> Result<Option<Self>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error> { | ||
convert_named(buf) | ||
} | ||
Comment on lines
+91
to
+102
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two methods can be overriden, so it's possible to serialize context and pass it at the end of buf here; just append the context to the end of the message. If the target already has it, it can opt to simply not deserialize it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how big context is, but if it's reasonably sized this could be effective. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. context doesn't need to be serialized, but it is needed to do the serialization as it contains some info to convert the AST to bytes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does that work with LLMP now? I don't see its unparse getting invoked except in specific places. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't. To serialize with serde there is no need of it, while it is needed to convert the AST stored in the input to bytes. You see it only in the harness. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also EncodedInput has something similar, as it is an array of u32 id representing tokens and the mapping id -> token is ofc not stored in the input itself so to convert to bytes you need to call https://github.com/AFLplusplus/LibAFL/blob/main/libafl/src/inputs/encoded.rs#L75 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the conversion would have to happen at the sending client, not the receiving? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But I see why it is convenient to do in the receiver, we can reuse the NewTestcase event and avoid double memory usage There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have to think about it, but I don't see other solutions, the tokens map in the EncodedInput decoder can be large even gigabytes when using a huge initial corpus |
||
|
||
/// Generate a name for this input | ||
fn generate_name(&self, idx: usize) -> String; | ||
|
||
/// An hook executed if the input is stored as `Testcase` | ||
fn wrapped_as_testcase(&mut self) {} | ||
} | ||
|
||
/// Utility trait for downcasting inputs for conversion | ||
#[cfg(feature = "input_conversion")] | ||
pub trait ConvertibleInput: Downcast {} | ||
|
||
#[cfg(feature = "input_conversion")] | ||
impl_downcast!(ConvertibleInput); | ||
|
||
#[cfg(feature = "input_conversion")] | ||
impl<I: Input> ConvertibleInput for I {} | ||
|
||
/// Function signature for conversion methods | ||
#[cfg(feature = "input_conversion")] | ||
pub type InputConversionFn = fn( | ||
&[u8], | ||
) -> Result< | ||
Box<dyn ConvertibleInput>, | ||
<&mut Deserializer<Slice> as serde::de::Deserializer>::Error, | ||
>; | ||
|
||
/// Struct for converting between input types at deserialisation time | ||
#[cfg(feature = "input_conversion")] | ||
pub struct InputConversion { | ||
from: &'static str, | ||
to: &'static str, | ||
converter: InputConversionFn, | ||
} | ||
|
||
#[cfg(feature = "input_conversion")] | ||
impl Debug for InputConversion { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { | ||
f.debug_struct("InputConversion") | ||
.field("from", &self.from) | ||
.field("to", &self.to) | ||
.finish() | ||
} | ||
} | ||
|
||
#[cfg(feature = "input_conversion")] | ||
impl InputConversion { | ||
/// Create a new input conversion to be registered | ||
pub const fn new(from: &'static str, to: &'static str, converter: InputConversionFn) -> Self { | ||
Self { | ||
from, | ||
to, | ||
converter, | ||
} | ||
} | ||
} | ||
|
||
#[cfg(feature = "input_conversion")] | ||
inventory::collect!(InputConversion); | ||
|
||
/// Converts from a serialisation-specified type to the intended type, if such a conversion exists | ||
#[cfg(feature = "input_conversion")] | ||
pub fn convert_named<T: Input>( | ||
bytes: &[u8], | ||
) -> Result<Option<T>, <&mut Deserializer<Slice> as serde::de::Deserializer>::Error> { | ||
let mut deser = Deserializer::from_bytes(bytes); | ||
let from = String::deserialize(&mut deser)?; | ||
if from == T::NAME { | ||
return Ok(Some(T::deserialize(&mut deser)?)); | ||
} | ||
for conversion in inventory::iter::<InputConversion> { | ||
if conversion.from == from && conversion.to == T::NAME { | ||
return Ok((conversion.converter)(deser.finalize()?)? | ||
.downcast() | ||
.ok() | ||
.map(|boxed| *boxed)); | ||
} | ||
} | ||
Ok(None) | ||
} | ||
|
||
/// An input for tests, mainly. There is no real use much else. | ||
#[derive(Copy, Clone, Serialize, Deserialize, Debug, Hash)] | ||
pub struct NopInput {} | ||
impl Input for NopInput { | ||
const NAME: &'static str = "NopInput"; | ||
|
||
fn generate_name(&self, _idx: usize) -> String { | ||
"nop-input".to_string() | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think here type_name can be used instead of an associated const https://doc.rust-lang.org/std/any/fn.type_name.html
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, awesome -- didn't know about that. I'll patch it out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, this needs to be an associated const because type_name is not const stable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it will be stabilized soon, see rust-lang/rust#63084
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So keep the const for now but put a comment to remind us to switch to type_name once it is stable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could be an issue, probably a NAME field is safer
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maye it should be called
TYPE
else it's confusing with ournamed
trait(?)