Skip to content

Add UNSET sentinel #1711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ classifiers = [
'Typing :: Typed',
]
dependencies = [
'typing-extensions>=4.13.0',
'typing-extensions@git+https://github.com/HexDecimal/typing_extensions@conforming-sentinel',
]
dynamic = ['license', 'readme', 'version']

Expand Down
28 changes: 28 additions & 0 deletions python/pydantic_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys as _sys
from typing import Any as _Any

from typing_extensions import Sentinel

from ._pydantic_core import (
ArgsKwargs,
MultiHostUrl,
Expand Down Expand Up @@ -40,6 +42,7 @@

__all__ = [
'__version__',
'UNSET',
'CoreConfig',
'CoreSchema',
'CoreSchemaType',
Expand Down Expand Up @@ -140,3 +143,28 @@ class MultiHostHost(_TypedDict):
"""The host part of this host, or `None`."""
port: int | None
"""The port part of this host, or `None`."""


UNSET = Sentinel('UNSET', module_name='pydantic_core')
"""A singleton indicating a field value was not set during validation.

This singleton can be used a default value, as an alternative to `None` when it has
an explicit meaning. During serialization, any field with `UNSET` as a value is excluded
from the output.

Example:
```python
from pydantic import BaseModel
from pydantic.experimental.unset import UNSET


class Configuration(BaseModel):
timeout: int | None | UNSET = UNSET


# configuration defaults, stored somewhere else:
defaults = {'timeout': 200}

conf = Configuration.model_validate({...})
timeout = conf.timeout if timeout.timeout is not UNSET else defaults['timeout']
"""
12 changes: 12 additions & 0 deletions python/pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,16 @@ class Color(Enum):
)


class UnsetSentinelSchema(TypedDict, total=False):
type: Required[Literal['unset-sentinel']]


def unset_sentinel_schema() -> UnsetSentinelSchema:
"""Returns a schema for the [`UNSET`][pydantic_core.UNSET] sentinel."""

return {'type': 'unset-sentinel'}


# must match input/parse_json.rs::JsonType::try_from
JsonType = Literal['null', 'bool', 'int', 'float', 'str', 'list', 'dict']

Expand Down Expand Up @@ -4065,6 +4075,7 @@ def definition_reference_schema(
DatetimeSchema,
TimedeltaSchema,
LiteralSchema,
UnsetSentinelSchema,
EnumSchema,
IsInstanceSchema,
IsSubclassSchema,
Expand Down Expand Up @@ -4123,6 +4134,7 @@ def definition_reference_schema(
'datetime',
'timedelta',
'literal',
'unset-sentinel',
'enum',
'is-instance',
'is-subclass',
Expand Down
1 change: 1 addition & 0 deletions src/common/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub(crate) mod prebuilt;
pub(crate) mod union;
pub(crate) mod unset_sentinel;
16 changes: 16 additions & 0 deletions src/common/unset_sentinel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;

static UNSET_SENTINEL_OBJECT: GILOnceCell<Py<PyAny>> = GILOnceCell::new();

pub fn get_unset_sentinel_object(py: Python) -> &Bound<'_, PyAny> {
UNSET_SENTINEL_OBJECT
.get_or_init(py, || {
py.import(intern!(py, "pydantic_core"))
.and_then(|core_module| core_module.getattr(intern!(py, "UNSET")))
.unwrap()
.into()
})
.bind(py)
}
3 changes: 3 additions & 0 deletions src/errors/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ error_types! {
expected: {ctx_type: String, ctx_fn: field_from_context},
},
// ---------------------
// unset sentinel
UnsetSentinelError {},
// date errors
DateType {},
DateParsing {
Expand Down Expand Up @@ -531,6 +533,7 @@ impl ErrorType {
Self::AssertionError {..} => "Assertion failed, {error}",
Self::CustomError {..} => "", // custom errors are handled separately
Self::LiteralError {..} => "Input should be {expected}",
Self::UnsetSentinelError { .. } => "Input should be the 'UNSET' sentinel",
Self::DateType {..} => "Input should be a valid date",
Self::DateParsing {..} => "Input should be a valid date in the format YYYY-MM-DD, {error}",
Self::DateFromDatetimeParsing {..} => "Input should be a valid date or datetime, {error}",
Expand Down
5 changes: 5 additions & 0 deletions src/serializers/computed_fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use pyo3::{intern, PyTraverseError, PyVisit};
use serde::ser::SerializeMap;

use crate::build_tools::py_schema_error_type;
use crate::common::unset_sentinel::get_unset_sentinel_object;
use crate::definitions::DefinitionsBuilder;
use crate::py_gc::PyGcTraverse;
use crate::serializers::filter::SchemaFilter;
Expand Down Expand Up @@ -148,6 +149,10 @@ impl ComputedFields {
if extra.exclude_none && value.is_none() {
continue;
}
let unset_obj = get_unset_sentinel_object(model.py());
if value.is(unset_obj) {
continue;
}

let field_extra = Extra {
field_name: Some(&computed_field.property_name),
Expand Down
22 changes: 19 additions & 3 deletions src/serializers/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use ahash::AHashMap;
use serde::ser::SerializeMap;
use smallvec::SmallVec;

use crate::common::unset_sentinel::get_unset_sentinel_object;
use crate::serializers::extra::SerCheck;
use crate::PydanticSerializationUnexpectedValue;

Expand All @@ -15,8 +16,7 @@ use super::errors::py_err_se_err;
use super::extra::Extra;
use super::filter::SchemaFilter;
use super::infer::{infer_json_key, infer_serialize, infer_to_python, SerializeInfer};
use super::shared::PydanticSerializer;
use super::shared::{CombinedSerializer, TypeSerializer};
use super::shared::{CombinedSerializer, PydanticSerializer, TypeSerializer};

/// representation of a field for serialization
#[derive(Debug)]
Expand Down Expand Up @@ -154,6 +154,7 @@ impl GeneralFieldsSerializer {
) -> PyResult<Bound<'py, PyDict>> {
let output_dict = PyDict::new(py);
let mut used_req_fields: usize = 0;
let unset_obj = get_unset_sentinel_object(py);

// NOTE! we maintain the order of the input dict assuming that's right
for result in main_iter {
Expand All @@ -163,6 +164,10 @@ impl GeneralFieldsSerializer {
if extra.exclude_none && value.is_none() {
continue;
}
if value.is(unset_obj) {
continue;
}

let field_extra = Extra {
field_name: Some(key_str),
..extra
Expand Down Expand Up @@ -238,9 +243,13 @@ impl GeneralFieldsSerializer {

for result in main_iter {
let (key, value) = result.map_err(py_err_se_err)?;
let unset_obj = get_unset_sentinel_object(value.py());
if extra.exclude_none && value.is_none() {
continue;
}
if value.is(unset_obj) {
continue;
}
let key_str = key_str(&key).map_err(py_err_se_err)?;
let field_extra = Extra {
field_name: Some(key_str),
Expand Down Expand Up @@ -326,6 +335,7 @@ impl TypeSerializer for GeneralFieldsSerializer {
extra: &Extra,
) -> PyResult<PyObject> {
let py = value.py();
let unset_obj = get_unset_sentinel_object(py);
// If there is already a model registered (from a dataclass, BaseModel)
// then do not touch it
// If there is no model, we (a TypedDict) are the model
Expand All @@ -347,6 +357,9 @@ impl TypeSerializer for GeneralFieldsSerializer {
if extra.exclude_none && value.is_none() {
continue;
}
if value.is(unset_obj) {
continue;
}
if let Some((next_include, next_exclude)) = self.filter.key_filter(&key, include, exclude)? {
let value = match &self.extra_serializer {
Some(serializer) => {
Expand Down Expand Up @@ -380,7 +393,7 @@ impl TypeSerializer for GeneralFieldsSerializer {
extra.warnings.on_fallback_ser::<S>(self.get_name(), value, extra)?;
return infer_serialize(value, serializer, include, exclude, extra);
};

let unset_obj = get_unset_sentinel_object(value.py());
// If there is already a model registered (from a dataclass, BaseModel)
// then do not touch it
// If there is no model, we (a TypedDict) are the model
Expand All @@ -407,6 +420,9 @@ impl TypeSerializer for GeneralFieldsSerializer {
if extra.exclude_none && value.is_none() {
continue;
}
if value.is(unset_obj) {
continue;
}
let filter = self.filter.key_filter(&key, include, exclude).map_err(py_err_se_err)?;
if let Some((next_include, next_exclude)) = filter {
let output_key = infer_json_key(&key, extra).map_err(py_err_se_err)?;
Expand Down
2 changes: 2 additions & 0 deletions src/serializers/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ combined_serializer! {
Union: super::type_serializers::union::UnionSerializer;
TaggedUnion: super::type_serializers::union::TaggedUnionSerializer;
Literal: super::type_serializers::literal::LiteralSerializer;
UnsetSentinel: super::type_serializers::unset_sentinel::UnsetSentinelSerializer;
Enum: super::type_serializers::enum_::EnumSerializer;
Recursive: super::type_serializers::definitions::DefinitionRefSerializer;
Tuple: super::type_serializers::tuple::TupleSerializer;
Expand Down Expand Up @@ -343,6 +344,7 @@ impl PyGcTraverse for CombinedSerializer {
CombinedSerializer::Union(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::TaggedUnion(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Literal(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::UnsetSentinel(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Enum(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Recursive(inner) => inner.py_gc_traverse(visit),
CombinedSerializer::Tuple(inner) => inner.py_gc_traverse(visit),
Expand Down
1 change: 1 addition & 0 deletions src/serializers/type_serializers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub mod timedelta;
pub mod tuple;
pub mod typed_dict;
pub mod union;
pub mod unset_sentinel;
pub mod url;
pub mod uuid;
pub mod with_default;
Expand Down
76 changes: 76 additions & 0 deletions src/serializers/type_serializers/unset_sentinel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// This serializer is defined so that building a schema serializer containing an
// 'unset-sentinel' core schema doesn't crash. In practice, the serializer isn't
// used for model-like classes, as the 'fields' serializer takes care of omitting
// the fields from the output (the serializer can still be used if the 'unset-sentinel'
// core schema is used standalone (e.g. with a Pydantic type adapter), but this isn't
// something we explicitly support.

use std::borrow::Cow;

use pyo3::prelude::*;
use pyo3::types::PyDict;

use serde::ser::Error;

use crate::common::unset_sentinel::get_unset_sentinel_object;
use crate::definitions::DefinitionsBuilder;
use crate::PydanticSerializationUnexpectedValue;

use super::{BuildSerializer, CombinedSerializer, Extra, TypeSerializer};

#[derive(Debug)]
pub struct UnsetSentinelSerializer {}

impl BuildSerializer for UnsetSentinelSerializer {
const EXPECTED_TYPE: &'static str = "unset-sentinel";

fn build(
_schema: &Bound<'_, PyDict>,
_config: Option<&Bound<'_, PyDict>>,
_definitions: &mut DefinitionsBuilder<CombinedSerializer>,
) -> PyResult<CombinedSerializer> {
Ok(Self {}.into())
}
}

impl_py_gc_traverse!(UnsetSentinelSerializer {});

impl TypeSerializer for UnsetSentinelSerializer {
fn to_python(
&self,
value: &Bound<'_, PyAny>,
_include: Option<&Bound<'_, PyAny>>,
_exclude: Option<&Bound<'_, PyAny>>,
_extra: &Extra,
) -> PyResult<PyObject> {
let unset_obj = get_unset_sentinel_object(value.py());

if value.is(unset_obj) {
Ok(unset_obj.to_owned().into())
} else {
Err(
PydanticSerializationUnexpectedValue::new_from_msg(Some("Expected 'UNSET' sentinel".to_string()))
.to_py_err(),
)
}
}

fn json_key<'a>(&self, key: &'a Bound<'_, PyAny>, extra: &Extra) -> PyResult<Cow<'a, str>> {
self.invalid_as_json_key(key, extra, Self::EXPECTED_TYPE)
}

fn serde_serialize<S: serde::ser::Serializer>(
&self,
_value: &Bound<'_, PyAny>,
_serializer: S,
_include: Option<&Bound<'_, PyAny>>,
_exclude: Option<&Bound<'_, PyAny>>,
_extra: &Extra,
) -> Result<S::Ok, S::Error> {
Err(Error::custom("'UNSET' can't be serialized to JSON".to_string()))
}

fn get_name(&self) -> &str {
Self::EXPECTED_TYPE
}
}
5 changes: 5 additions & 0 deletions src/validators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ mod timedelta;
mod tuple;
mod typed_dict;
mod union;
mod unset_sentinel;
mod url;
mod uuid;
mod validation_state;
Expand Down Expand Up @@ -574,6 +575,8 @@ fn build_validator_inner(
call::CallValidator,
// literals
literal::LiteralValidator,
// unset sentinel
unset_sentinel::UnsetSentinelValidator,
// enums
enum_::BuildEnumValidator,
// any
Expand Down Expand Up @@ -741,6 +744,8 @@ pub enum CombinedValidator {
FunctionCall(call::CallValidator),
// literals
Literal(literal::LiteralValidator),
// Unset sentinel
UnsetSentinel(unset_sentinel::UnsetSentinelValidator),
// enums
IntEnum(enum_::EnumValidator<enum_::IntEnumValidator>),
StrEnum(enum_::EnumValidator<enum_::StrEnumValidator>),
Expand Down
Loading
Loading