Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] sort keys #1666

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,18 @@ rust-version = "1.75"
[dependencies]
# TODO it would be very nice to remove the "py-clone" feature as it can panic,
# but needs a bit of work to make sure it's not used in the codebase
pyo3 = { version = "0.23.5", features = ["generate-import-lib", "num-bigint", "py-clone"] }
pyo3 = { version = "0.23.5", features = [
"generate-import-lib",
"num-bigint",
"py-clone",
] }
Comment on lines +32 to +36
Copy link
Author

@zzstoatzz zzstoatzz Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob IDE idiosyncracy, can revert

regex = "1.11.1"
strum = { version = "0.26.3", features = ["derive"] }
strum_macros = "0.26.4"
serde_json = {version = "1.0.138", features = ["arbitrary_precision", "preserve_order"]}
serde_json = { version = "1.0.138", features = [
"arbitrary_precision",
"preserve_order",
] }
enum_dispatch = "0.3.13"
serde = { version = "1.0.218", features = ["derive"] }
speedate = "0.15.0"
Expand Down
5 changes: 4 additions & 1 deletion python/pydantic_core/_pydantic_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ def to_json(
fallback: Callable[[Any], Any] | None = None,
serialize_as_any: bool = False,
context: Any | None = None,
sort_keys: bool = False,
) -> bytes:
"""
Serialize a Python object to JSON including transforming and filtering data.
Expand All @@ -426,6 +427,7 @@ def to_json(
serialize_as_any: Whether to serialize fields with duck-typing serialization behavior.
context: The context to use for serialization, this is passed to functional serializers as
[`info.context`][pydantic_core.core_schema.SerializationInfo.context].
sort_keys: Whether to sort the keys of the serialized object.

Raises:
PydanticSerializationError: If serialization fails and no `fallback` function is provided.
Expand Down Expand Up @@ -478,6 +480,7 @@ def to_jsonable_python(
fallback: Callable[[Any], Any] | None = None,
serialize_as_any: bool = False,
context: Any | None = None,
sort_keys: bool = False,
) -> Any:
"""
Serialize/marshal a Python object to a JSON-serializable Python object including transforming and filtering data.
Expand All @@ -502,7 +505,7 @@ def to_jsonable_python(
serialize_as_any: Whether to serialize fields with duck-typing serialization behavior.
context: The context to use for serialization, this is passed to functional serializers as
[`info.context`][pydantic_core.core_schema.SerializationInfo.context].

sort_keys: Whether to sort the keys of the serialized object.
Raises:
PydanticSerializationError: If serialization fails and no `fallback` function is provided.

Expand Down
1 change: 1 addition & 0 deletions src/errors/validation_exception.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ impl ValidationError {
None,
DuckTypingSerMode::SchemaBased,
None,
false,
);
let serializer = ValidationErrorSerializer {
py,
Expand Down
12 changes: 9 additions & 3 deletions src/serializers/extra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ impl SerializationState {
exclude_none: bool,
round_trip: bool,
serialize_unknown: bool,
fallback: Option<&'py Bound<'_, PyAny>>,
fallback: Option<&'py Bound<'py, PyAny>>,
duck_typing_ser_mode: DuckTypingSerMode,
context: Option<&'py Bound<'_, PyAny>>,
context: Option<&'py Bound<'py, PyAny>>,
sort_keys: bool,
) -> Extra<'py> {
Extra::new(
py,
Expand All @@ -106,6 +107,7 @@ impl SerializationState {
fallback,
duck_typing_ser_mode,
context,
sort_keys,
)
}

Expand Down Expand Up @@ -139,6 +141,7 @@ pub(crate) struct Extra<'a> {
pub fallback: Option<&'a Bound<'a, PyAny>>,
pub duck_typing_ser_mode: DuckTypingSerMode,
pub context: Option<&'a Bound<'a, PyAny>>,
pub sort_keys: bool,
}

impl<'a> Extra<'a> {
Expand All @@ -158,6 +161,7 @@ impl<'a> Extra<'a> {
fallback: Option<&'a Bound<'a, PyAny>>,
duck_typing_ser_mode: DuckTypingSerMode,
context: Option<&'a Bound<'a, PyAny>>,
sort_keys: bool,
) -> Self {
Self {
mode,
Expand All @@ -177,6 +181,7 @@ impl<'a> Extra<'a> {
fallback,
duck_typing_ser_mode,
context,
sort_keys,
}
}

Expand Down Expand Up @@ -288,11 +293,12 @@ impl ExtraOwned {
fallback: self.fallback.as_ref().map(|m| m.bind(py)),
duck_typing_ser_mode: self.duck_typing_ser_mode,
context: self.context.as_ref().map(|m| m.bind(py)),
sort_keys: false,
}
}
}

#[derive(Clone)]
#[derive(Clone, PartialEq)]
#[cfg_attr(debug_assertions, derive(Debug))]
pub(crate) enum SerMode {
Python,
Expand Down
56 changes: 48 additions & 8 deletions src/serializers/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ pub(crate) fn infer_to_python_known(
extra.fallback,
extra.duck_typing_ser_mode,
extra.context,
extra.sort_keys,
);
serializer.serializer.to_python(value, include, exclude, &extra)
};
Expand Down Expand Up @@ -265,10 +266,16 @@ pub(crate) fn infer_to_python_known(
}
ObType::Dict => {
let dict = value.downcast::<PyDict>()?;
serialize_pairs_python(py, dict.iter().map(Ok), include, exclude, extra, Ok)?
serialize_pairs_python(py, dict.iter().map(Ok), include, exclude, extra, |k| {
Ok(PyString::new(py, &infer_json_key(&k, extra)?).into_any())
})?
}
ObType::PydanticSerializable => serialize_with_serializer()?,
ObType::Dataclass => serialize_pairs_python(py, any_dataclass_iter(value)?.0, include, exclude, extra, Ok)?,
ObType::Dataclass => {
serialize_pairs_python(py, any_dataclass_iter(value)?.0, include, exclude, extra, |k| {
Ok(PyString::new(py, &infer_json_key(&k, extra)?).into_any())
})?
}
ObType::Generator => {
let iter = super::type_serializers::generator::SerializationIterator::new(
value.downcast()?,
Expand Down Expand Up @@ -497,6 +504,7 @@ pub(crate) fn infer_serialize_known<S: Serializer>(
extra.fallback,
extra.duck_typing_ser_mode,
extra.context,
extra.sort_keys,
);
let pydantic_serializer =
PydanticSerializer::new(value, &extracted_serializer.serializer, include, exclude, &extra);
Expand Down Expand Up @@ -708,15 +716,36 @@ fn serialize_pairs_python<'py>(
let new_dict = PyDict::new(py);
let filter = AnyFilter::new();

// Collect pairs if we need to sort
let mut pairs = Vec::new();
for result in pairs_iter {
let (k, v) = result?;
let op_next = filter.key_filter(&k, include, exclude)?;
if let Some((next_include, next_exclude)) = op_next {
let k = key_transform(k)?;
let k = if *extra.mode == SerMode::Json {
key_transform(k)?
} else {
k
};
let v = infer_to_python(&v, next_include.as_ref(), next_exclude.as_ref(), extra)?;
new_dict.set_item(k, v)?;
pairs.push((k, v));
}
}

// Sort if requested and in JSON mode
if extra.sort_keys && *extra.mode == SerMode::Json {
pairs.sort_by(|(a, _), (b, _)| {
a.str()
.ok()
.and_then(|s| s.to_str().ok().map(ToString::to_string))
.cmp(&b.str().ok().and_then(|s| s.to_str().ok().map(ToString::to_string)))
});
}

// Add to dictionary
for (k, v) in pairs {
new_dict.set_item(k, v)?;
}
Ok(new_dict.into())
}

Expand All @@ -731,15 +760,26 @@ fn serialize_pairs_json<'py, S: Serializer>(
let mut map = serializer.serialize_map(Some(iter_size))?;
let filter = AnyFilter::new();

// If sort_keys is true, collect and sort the pairs first
let mut pairs: Vec<_> = Vec::new();
for result in pairs_iter {
let (key, value) = result.map_err(py_err_se_err)?;

let op_next = filter.key_filter(&key, include, exclude).map_err(py_err_se_err)?;
if let Some((next_include, next_exclude)) = op_next {
let key = infer_json_key(&key, extra).map_err(py_err_se_err)?;
let value_serializer = SerializeInfer::new(&value, next_include.as_ref(), next_exclude.as_ref(), extra);
map.serialize_entry(&key, &value_serializer)?;
let key_str = infer_json_key(&key, extra).map_err(py_err_se_err)?.into_owned();
pairs.push((key_str, (value, next_include, next_exclude)));
}
}

if extra.sort_keys {
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
}

// Serialize the pairs in order
for (key, (value, next_include, next_exclude)) in pairs {
let value_serializer = SerializeInfer::new(&value, next_include.as_ref(), next_exclude.as_ref(), extra);
map.serialize_entry(&key, &value_serializer)?;
}

map.end()
}
15 changes: 12 additions & 3 deletions src/serializers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ impl SchemaSerializer {
fallback: Option<&'a Bound<'a, PyAny>>,
duck_typing_ser_mode: DuckTypingSerMode,
context: Option<&'a Bound<'a, PyAny>>,
sort_keys: bool,
) -> Extra<'b> {
Extra::new(
py,
Expand All @@ -81,6 +82,7 @@ impl SchemaSerializer {
fallback,
duck_typing_ser_mode,
context,
sort_keys,
)
}
}
Expand Down Expand Up @@ -148,6 +150,7 @@ impl SchemaSerializer {
fallback,
duck_typing_ser_mode,
context,
false,
);
let v = self.serializer.to_python(value, include, exclude, &extra)?;
warnings.final_check(py)?;
Expand All @@ -157,7 +160,7 @@ impl SchemaSerializer {
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = None,
exclude_unset = false, exclude_defaults = false, exclude_none = false, round_trip = false, warnings = WarningsArg::Bool(true),
fallback = None, serialize_as_any = false, context = None))]
fallback = None, serialize_as_any = false, context = None, sort_keys = false))]
pub fn to_json(
&self,
py: Python,
Expand All @@ -174,6 +177,7 @@ impl SchemaSerializer {
fallback: Option<&Bound<'_, PyAny>>,
serialize_as_any: bool,
context: Option<&Bound<'_, PyAny>>,
sort_keys: bool,
) -> PyResult<PyObject> {
let warnings_mode = match warnings {
WarningsArg::Bool(b) => b.into(),
Expand All @@ -196,6 +200,7 @@ impl SchemaSerializer {
fallback,
duck_typing_ser_mode,
context,
sort_keys,
);
let bytes = to_json_bytes(
value,
Expand Down Expand Up @@ -242,7 +247,7 @@ impl SchemaSerializer {
#[pyo3(signature = (value, *, indent = None, include = None, exclude = None, by_alias = None,
exclude_none = false, round_trip = false, timedelta_mode = "iso8601", bytes_mode = "utf8",
inf_nan_mode = "constants", serialize_unknown = false, fallback = None, serialize_as_any = false,
context = None))]
context = None, sort_keys = false))]
pub fn to_json(
py: Python,
value: &Bound<'_, PyAny>,
Expand All @@ -259,6 +264,7 @@ pub fn to_json(
fallback: Option<&Bound<'_, PyAny>>,
serialize_as_any: bool,
context: Option<&Bound<'_, PyAny>>,
sort_keys: bool,
) -> PyResult<PyObject> {
let state = SerializationState::new(timedelta_mode, bytes_mode, inf_nan_mode)?;
let duck_typing_ser_mode = DuckTypingSerMode::from_bool(serialize_as_any);
Expand All @@ -272,6 +278,7 @@ pub fn to_json(
fallback,
duck_typing_ser_mode,
context,
sort_keys,
);
let serializer = type_serializers::any::AnySerializer.into();
let bytes = to_json_bytes(value, &serializer, include, exclude, &extra, indent, 1024)?;
Expand All @@ -284,7 +291,7 @@ pub fn to_json(
#[pyfunction]
#[pyo3(signature = (value, *, include = None, exclude = None, by_alias = None, exclude_none = false, round_trip = false,
timedelta_mode = "iso8601", bytes_mode = "utf8", inf_nan_mode = "constants", serialize_unknown = false, fallback = None,
serialize_as_any = false, context = None))]
serialize_as_any = false, context = None, sort_keys = false))]
pub fn to_jsonable_python(
py: Python,
value: &Bound<'_, PyAny>,
Expand All @@ -300,6 +307,7 @@ pub fn to_jsonable_python(
fallback: Option<&Bound<'_, PyAny>>,
serialize_as_any: bool,
context: Option<&Bound<'_, PyAny>>,
sort_keys: bool,
) -> PyResult<PyObject> {
let state = SerializationState::new(timedelta_mode, bytes_mode, inf_nan_mode)?;
let duck_typing_ser_mode = DuckTypingSerMode::from_bool(serialize_as_any);
Expand All @@ -313,6 +321,7 @@ pub fn to_jsonable_python(
fallback,
duck_typing_ser_mode,
context,
sort_keys,
);
let v = infer::infer_to_python(value, include, exclude, &extra)?;
state.final_check(py)?;
Expand Down
3 changes: 3 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ a = A()
None,
false,
None,
false,
)
.unwrap();
let serialized: &[u8] = serialized.extract(py).unwrap();
Expand Down Expand Up @@ -212,6 +213,7 @@ dump_json_input_2 = {'a': 'something'}
None,
false,
None,
false,
)
.unwrap();
let repr = format!("{}", serialization_result.bind(py).repr().unwrap());
Expand All @@ -233,6 +235,7 @@ dump_json_input_2 = {'a': 'something'}
None,
false,
None,
false,
)
.unwrap();
let repr = format!("{}", serialization_result.bind(py).repr().unwrap());
Expand Down
Loading
Loading