Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .changelog/5269.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
`opentelemetry-sdk`: declarative config loader now recursively converts parsed dicts into typed dataclass instances, including nested dataclasses, lists of dataclasses, and enum values. End-to-end YAML/JSON → SDK configuration now works via the factory functions.
2 changes: 1 addition & 1 deletion .codespellrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[codespell]
# skipping auto generated folders
skip = ./.tox,./.mypy_cache,./docs/_build,./target,*/LICENSE,./venv,.git,./opentelemetry-semantic-conventions,*-requirements*.txt
ignore-words-list = ans,ue,ot,hist,ro
ignore-words-list = ans,ue,ot,hist,ro,astroid
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0

"""Recursive dict-to-dataclass conversion for parsed config data.

The YAML/JSON loader produces nested dicts. Factory functions expect typed
dataclass instances (e.g. ``TracerProvider``, ``SpanProcessor``). This module
walks each field's type annotation and converts nested dicts into their
corresponding dataclass types.
"""

from __future__ import annotations

import dataclasses
import enum
import types
import typing
from collections.abc import Mapping
from typing import Any, TypeVar, Union, get_args, get_origin

_T = TypeVar("_T")


def _unwrap_optional(type_hint: Any) -> Any:
"""Strip ``None`` from a ``X | None`` / ``Optional[X]`` annotation.

Returns the unwrapped type, or the original hint if not a Union with None.
"""
origin = get_origin(type_hint)
if origin is Union or origin is types.UnionType:
non_none = [t for t in get_args(type_hint) if t is not type(None)]
if len(non_none) == 1:
return non_none[0]
return type_hint


def _convert_value(value: Any, type_hint: Any) -> Any:
"""Convert a value according to its type hint.

Recursively converts dicts to dataclasses and lists of dicts to lists of
dataclasses. Other values (primitives, enums, ``dict[str, Any]`` aliases)
pass through unchanged.
"""
if value is None:
Comment thread
MikeGoldsmith marked this conversation as resolved.
return None

unwrapped = _unwrap_optional(type_hint)
origin = get_origin(unwrapped)

# list[X] — recurse on each element
if origin is list and isinstance(value, list):
Comment thread
MikeGoldsmith marked this conversation as resolved.
args = get_args(unwrapped)
if args:
item_type = args[0]
return [_convert_value(item, item_type) for item in value]
return value

# Direct dataclass type — recurse
if (
isinstance(unwrapped, type)
and dataclasses.is_dataclass(unwrapped)
and isinstance(value, dict)
):
return _dict_to_dataclass(value, unwrapped)

# Enum type — coerce string/value to the Enum member
if (
isinstance(unwrapped, type)
and issubclass(unwrapped, enum.Enum)
and not isinstance(value, unwrapped)
):
return unwrapped(value)

return value
Comment thread
MikeGoldsmith marked this conversation as resolved.


def _dict_to_dataclass(data: Mapping[str, Any], cls: type[_T]) -> _T:
"""Recursively convert a mapping to a dataclass instance.

For each key in ``data``:
- If it matches a known dataclass field, the value is converted according
to that field's type annotation (recursing for nested dataclasses).
- Unknown keys are passed through as kwargs; classes decorated with
``@_additional_properties`` will capture them on the instance's
``additional_properties`` attribute.

``ClassVar`` fields (e.g. the ``additional_properties`` annotation on
decorated dataclasses) are ignored as expected.

Raises:
TypeError: If ``cls`` is not a dataclass type.
"""
if not dataclasses.is_dataclass(cls):
raise TypeError(f"{cls.__name__} is not a dataclass")

# Annotated as ``dict[str, Any]`` so astroid stops tracing into
# ``typing.get_type_hints`` — under pylint 3.x that path leads into
# Python 3.14's ``annotationlib`` (which uses t-strings) and crashes.
hints: dict[str, Any] = dict(
typing.get_type_hints(cls, include_extras=False)
)
known_fields = {f.name for f in dataclasses.fields(cls)}
kwargs: dict[str, Any] = {}

for key, value in data.items():
if key in known_fields:
type_hint = hints.get(key)
kwargs[key] = _convert_value(value, type_hint)
else:
# Unknown key — @_additional_properties decorator will capture it.
kwargs[key] = value

return cls(**kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path
from typing import Any

from opentelemetry.sdk._configuration._conversion import _dict_to_dataclass
from opentelemetry.sdk._configuration._exceptions import ConfigurationError
from opentelemetry.sdk._configuration.file._env_substitution import (
substitute_env_vars,
Expand Down Expand Up @@ -172,10 +173,13 @@ def _validate_schema(data: dict) -> None:


def _dict_to_model(data: dict[str, Any]) -> OpenTelemetryConfiguration:
"""Convert dictionary to OpenTelemetryConfiguration model.
"""Convert a parsed config dictionary to the full typed model tree.

Uses the generated dataclass from models.py. This provides basic
validation through dataclass field types.
Walks each field's type annotation, recursively converting nested
dicts to their corresponding dataclass types. The resulting
``OpenTelemetryConfiguration`` is fully typed end-to-end, so factory
functions can rely on typed attribute access (e.g. ``config.sampler``,
``config.processors[0].batch.exporter``).

Args:
data: Parsed configuration dictionary.
Expand All @@ -187,15 +191,9 @@ def _dict_to_model(data: dict[str, Any]) -> OpenTelemetryConfiguration:
TypeError: If data doesn't match expected structure.
ValueError: If values are invalid.
"""
# Construct the top-level model from the validated dict. Nested fields
# are stored as dicts rather than their dataclass types; factory functions
# in later PRs will handle the full recursive conversion when building
# SDK objects.
try:
config = OpenTelemetryConfiguration(**data)
return config
return _dict_to_dataclass(data, OpenTelemetryConfiguration)
except TypeError as exc:
# Provide more helpful error message
raise TypeError(
f"Configuration structure is invalid. "
f"Check that all required fields are present and correctly typed: {exc}"
Expand Down
89 changes: 89 additions & 0 deletions opentelemetry-sdk/tests/_configuration/file/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,32 @@
from pathlib import Path
from unittest.mock import patch

from opentelemetry.sdk._configuration._tracer_provider import (
create_tracer_provider,
)
from opentelemetry.sdk._configuration.file import (
ConfigurationError,
load_config_file,
)
from opentelemetry.sdk._configuration.models import (
BatchSpanProcessor as BatchSpanProcessorConfig,
)
from opentelemetry.sdk._configuration.models import OpenTelemetryConfiguration
from opentelemetry.sdk._configuration.models import (
ParentBasedSampler as ParentBasedSamplerConfig,
)
from opentelemetry.sdk._configuration.models import (
SpanProcessor as SpanProcessorConfig,
)
from opentelemetry.sdk._configuration.models import (
TracerProvider as TracerProviderConfig,
)
from opentelemetry.sdk.trace import TracerProvider as SdkTracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
)
from opentelemetry.sdk.trace.sampling import ParentBased, TraceIdRatioBased


class TestConfigLoader(unittest.TestCase):
Expand Down Expand Up @@ -231,3 +252,71 @@ def test_schema_validation_invalid_enum(self):
self.assertIn("schema", str(ctx.exception).lower())
finally:
os.unlink(temp_path)


class TestConfigLoaderEndToEnd(unittest.TestCase):
"""Smoke-test the full YAML -> typed config -> SDK object pipeline.

Unit tests in test_conversion.py exercise the dict-to-dataclass
conversion in isolation; these tests verify it composes with the
real loader and downstream factory functions on a representative
nested configuration.
"""

_YAML = """
file_format: '1.0-rc.1'
tracer_provider:
processors:
- batch:
exporter:
console: {}
sampler:
parent_based:
root:
trace_id_ratio_based: {ratio: 0.5}
"""

def _load(self) -> OpenTelemetryConfiguration:
with tempfile.NamedTemporaryFile(
mode="w", suffix=".yaml", delete=False
) as fh:
fh.write(self._YAML)
path = fh.name
try:
return load_config_file(path)
finally:
os.unlink(path)

def test_nested_fields_are_typed_dataclasses(self):
config = self._load()

self.assertIsInstance(config.tracer_provider, TracerProviderConfig)
Comment thread
MikeGoldsmith marked this conversation as resolved.
self.assertIsInstance(
config.tracer_provider.sampler.parent_based,
ParentBasedSamplerConfig,
)
# Lists of dataclasses are converted element-wise.
self.assertIsInstance(
config.tracer_provider.processors[0], SpanProcessorConfig
)
self.assertIsInstance(
config.tracer_provider.processors[0].batch,
BatchSpanProcessorConfig,
)

# pylint: disable=protected-access
def test_typed_config_feeds_factory_function(self):
config = self._load()

provider = create_tracer_provider(config.tracer_provider)

self.assertIsInstance(provider, SdkTracerProvider)
# Sampler wiring from the YAML: parent_based(trace_id_ratio_based(0.5)).
self.assertIsInstance(provider.sampler, ParentBased)
self.assertIsInstance(provider.sampler._root, TraceIdRatioBased)
self.assertEqual(provider.sampler._root.rate, 0.5)
# Span processor wiring from the YAML: batch(console).
processors = provider._active_span_processor._span_processors
self.assertEqual(len(processors), 1)
self.assertIsInstance(processors[0], BatchSpanProcessor)
self.assertIsInstance(processors[0].span_exporter, ConsoleSpanExporter)
110 changes: 110 additions & 0 deletions opentelemetry-sdk/tests/_configuration/test_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0

# Tests access private members of SDK classes to assert correct configuration.
# pylint: disable=protected-access

import unittest
from dataclasses import dataclass
from typing import Any, ClassVar

from opentelemetry.sdk._configuration._common import _additional_properties
from opentelemetry.sdk._configuration._conversion import _dict_to_dataclass
from opentelemetry.sdk._configuration.models import ExemplarFilter


@dataclass
class _Inner:
value: int | None = None


@dataclass
class _Middle:
inner: _Inner | None = None
items: list[_Inner] | None = None


@dataclass
class _Outer:
middle: _Middle | None = None
name: str | None = None


@_additional_properties
@dataclass
class _WithExtras:
known: str | None = None
additional_properties: ClassVar[dict[str, Any]]


@dataclass
class _WithEnum:
filter: ExemplarFilter | None = None


class TestDictToDataclass(unittest.TestCase):
def test_raises_on_non_dataclass(self):
# _dict_to_dataclass is internal and assumes cls is a dataclass.
with self.assertRaises(TypeError) as ctx:
_dict_to_dataclass({"x": 1}, dict)
self.assertIn("not a dataclass", str(ctx.exception))

def test_converts_flat_dict(self):
result = _dict_to_dataclass({"value": 42}, _Inner)
self.assertIsInstance(result, _Inner)
self.assertEqual(result.value, 42)

def test_converts_nested_dataclass(self):
result = _dict_to_dataclass(
{"middle": {"inner": {"value": 7}}}, _Outer
)
self.assertIsInstance(result, _Outer)
self.assertIsInstance(result.middle, _Middle)
self.assertIsInstance(result.middle.inner, _Inner)
self.assertEqual(result.middle.inner.value, 7)

def test_converts_list_of_dataclasses(self):
result = _dict_to_dataclass(
{"middle": {"items": [{"value": 1}, {"value": 2}]}}, _Outer
)
self.assertEqual(len(result.middle.items), 2)
self.assertIsInstance(result.middle.items[0], _Inner)
self.assertEqual(result.middle.items[0].value, 1)
self.assertEqual(result.middle.items[1].value, 2)

def test_none_value_preserved(self):
result = _dict_to_dataclass({"middle": None, "name": "test"}, _Outer)
self.assertIsNone(result.middle)
self.assertEqual(result.name, "test")

def test_missing_optional_fields_default_to_none(self):
result = _dict_to_dataclass({}, _Outer)
self.assertIsNone(result.middle)
self.assertIsNone(result.name)

def test_unknown_keys_routed_to_additional_properties(self):
result = _dict_to_dataclass(
{"known": "yes", "my_plugin": {"opt": True}}, _WithExtras
)
self.assertEqual(result.known, "yes")
self.assertEqual(
result.additional_properties, {"my_plugin": {"opt": True}}
)

def test_primitive_values_pass_through(self):
result = _dict_to_dataclass({"name": "hello"}, _Outer)
self.assertEqual(result.name, "hello")

def test_empty_list_converted(self):
result = _dict_to_dataclass({"middle": {"items": []}}, _Outer)
self.assertEqual(result.middle.items, [])

def test_enum_value_coerced_from_string(self):
result = _dict_to_dataclass({"filter": "always_on"}, _WithEnum)
self.assertIs(result.filter, ExemplarFilter.always_on)

def test_enum_value_already_enum_passes_through(self):
result = _dict_to_dataclass(
{"filter": ExemplarFilter.trace_based}, _WithEnum
)
self.assertIs(result.filter, ExemplarFilter.trace_based)
Loading