Skip to content

More json encodings #513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 28, 2025
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:

strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev", "pypy3.9", "pypy3.10"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10"]

steps:
- uses: "actions/checkout@v3"
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Eliot supports a range of use cases and 3rd party libraries:

Eliot is only used to generate your logs; you will might need tools like Logstash and ElasticSearch to aggregate and store logs if you are using multiple processes across multiple machines.

Eliot supports Python 3.8-3.12, as well as PyPy3.
Eliot supports Python 3.9-3.13, as well as PyPy3.
It is maintained by Itamar Turner-Trauring, and released under the Apache 2.0 License.

* `Read the documentation <https://eliot.readthedocs.io>`_.
Expand Down
41 changes: 41 additions & 0 deletions benchmarks/serialization_complex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Benchmark of message serialization.

The goal here is to mostly focus on performance of serialization, in a vaguely
realistic manner. That is, mesages are logged in context of a message with a
small number of fields.
"""

import time
import polars as pl
from eliot import start_action, to_file

# Ensure JSON serialization is part of benchmark:
to_file(open("/dev/null", "w"))

N = 100_000

MY_SET = {1, 2, 3, 4}
SERIES = pl.Series([1, 2, 3])


def run():
start = time.time()
for i in range(N):
with start_action(action_type="my_action"):
with start_action(action_type="my_action2") as ctx:
ctx.log(
message_type="my_message",
series=SERIES,
my_set=MY_SET,
)
end = time.time()

# Each iteration has 5 messages: start/end of my_action, start/end of
# my_action2, and my_message.
print("%.6f per message" % ((end - start) / (N * 5),))
print("%s messages/sec" % (int(N / (end - start)),))


if __name__ == "__main__":
run()
11 changes: 11 additions & 0 deletions docs/source/news.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
What's New
==========

1.17.0
^^^^^^

Enhancements:

* Eliot's JSON output can now serialize pathlib.Path, Pandas objects, Polars objects, times, dates, Pydantic objects, sets, and complex numbers. Thanks to Anton Kulaga for the patch.

Deprecations and removals:

* Dropped support for Python 3.8.

1.16.0
^^^^^^

Expand Down
79 changes: 78 additions & 1 deletion eliot/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from typing import Callable
import json
import sys
from pathlib import Path
from datetime import date, time
import platform


class EliotJSONEncoder(json.JSONEncoder):
Expand All @@ -19,7 +22,8 @@ def default(self, o):
def json_default(o: object) -> object:
"""
JSON object encoder for non-standard types. In particular, supports NumPy
types. If you are wrappnig it, call it last, as it will raise a
types, Path objects, Pydantic models, dataclasses, Pandas and Polars
objects. If you are wrapping it, call it last, as it will raise a
``TypeError`` on unsupported types.
"""
numpy = sys.modules.get("numpy", None)
Expand All @@ -39,9 +43,82 @@ def json_default(o: object) -> object:
}
else:
return o.tolist()

# Add Pydantic support
pydantic = sys.modules.get("pydantic", None)
if pydantic is not None and isinstance(o, pydantic.BaseModel):
return o.model_dump()

if isinstance(o, Path):
return str(o)

if isinstance(o, date):
return o.isoformat()

if isinstance(o, time):
return o.isoformat()

if isinstance(o, set):
return list(o)

if isinstance(o, complex):
return {"real": o.real, "imag": o.imag}

# Add Pandas support
pandas = sys.modules.get("pandas", None)
if pandas is not None:
if isinstance(o, pandas.Timestamp):
return o.isoformat()
if isinstance(o, pandas.Series):
return o.to_list()
if isinstance(o, pandas.DataFrame):
return o.to_dict(orient="records")
if isinstance(o, pandas.Interval):
return {"left": o.left, "right": o.right, "closed": o.closed}
if isinstance(o, pandas.Period):
return str(o)

# Add Polars support
polars = sys.modules.get("polars", None)
if polars is not None:
if isinstance(o, polars.Series):
return o.to_list()
if isinstance(o, polars.DataFrame):
return o.to_dicts()
if isinstance(o, polars.Datetime):
return o.isoformat()

raise TypeError("Unsupported type")


if platform.python_implementation() == "PyPy":
# We're not using orjson, so need to serialize a few more types.

original_json_default = json_default

def json_default(o: object, original_json_default=original_json_default) -> object:
from datetime import datetime
from enum import Enum
from uuid import UUID

# Add dataclass support
if hasattr(o, "__dataclass_fields__"):
return {field: getattr(o, field) for field in o.__dataclass_fields__}
if isinstance(o, datetime):
return o.isoformat()

if isinstance(o, UUID):
return str(o)

if isinstance(o, Enum):
return o.value

return original_json_default(o)

json_default.__doc__ = original_json_default.__doc__
del original_json_default


def _encoder_to_default_function(
encoder: json.JSONEncoder,
) -> Callable[[object], object]:
Expand Down
123 changes: 123 additions & 0 deletions eliot/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from unittest import TestCase, skipUnless, skipIf
from json import loads
from importlib.metadata import PackageNotFoundError, version as package_version

try:
import numpy as np
Expand All @@ -18,6 +19,15 @@
)


def package_installed(name: str) -> bool:
"""Return whether the package is installed."""
try:
package_version(name)
return True
except PackageNotFoundError:
return False


class EliotJSONEncoderTests(TestCase):
"""Tests for L{EliotJSONEncoder} and L{json_default}."""

Expand Down Expand Up @@ -83,3 +93,116 @@ def test_large_numpy_array(self):
loads(dumps(a1002, default=json_default)),
{"array_start": a1002.flat[:10000].tolist(), "original_shape": [2, 5001]},
)

def test_basic_types(self):
"""Test serialization of basic Python types."""
from pathlib import Path
from datetime import datetime, date, time
from uuid import UUID
from collections import defaultdict, OrderedDict, Counter
from enum import Enum

class TestEnum(Enum):
A = 1
B = "test"

test_data = {
"path": Path("/tmp/test"),
"datetime": datetime(2024, 1, 1, 12, 0),
"date": date(2024, 1, 1),
"time": time(12, 0),
"uuid": UUID("12345678-1234-5678-1234-567812345678"),
"set": {1, 2, 3},
"defaultdict": defaultdict(list, {"a": [1, 2]}),
"ordered_dict": OrderedDict([("a", 1), ("b", 2)]),
"counter": Counter(["a", "a", "b"]),
"complex": 1 + 2j,
"enum": TestEnum.A,
"enum2": TestEnum.B,
}

serialized = loads(dumps(test_data, default=json_default))

self.assertEqual(serialized["path"], "/tmp/test")
self.assertEqual(serialized["datetime"], "2024-01-01T12:00:00")
self.assertEqual(serialized["date"], "2024-01-01")
self.assertEqual(serialized["time"], "12:00:00")
self.assertEqual(serialized["uuid"], "12345678-1234-5678-1234-567812345678")
self.assertEqual(serialized["set"], [1, 2, 3])
self.assertEqual(serialized["defaultdict"], {"a": [1, 2]})
self.assertEqual(serialized["ordered_dict"], {"a": 1, "b": 2})
self.assertEqual(serialized["counter"], {"a": 2, "b": 1})
self.assertEqual(serialized["complex"], {"real": 1.0, "imag": 2.0})
self.assertEqual(serialized["enum"], 1)
self.assertEqual(serialized["enum2"], "test")

@skipUnless(package_installed("pydantic"), "Pydantic not installed.")
def test_pydantic(self):
"""Test serialization of Pydantic models."""
from pydantic import BaseModel

class TestModel(BaseModel):
name: str
value: int

model = TestModel(name="test", value=42)
serialized = loads(dumps(model, default=json_default))
self.assertEqual(serialized, {"name": "test", "value": 42})

@skipUnless(package_installed("pandas"), "Pandas not installed.")
def test_pandas(self):
"""Test serialization of Pandas objects."""
import pandas as pd

# Test Timestamp
ts = pd.Timestamp("2024-01-01 12:00:00")
self.assertEqual(loads(dumps(ts, default=json_default)), "2024-01-01T12:00:00")

# Test Series
series = pd.Series([1, 2, 3])
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])

# Test DataFrame
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
self.assertEqual(
loads(dumps(df, default=json_default)), [{"a": 1, "b": 3}, {"a": 2, "b": 4}]
)

# Test Interval
interval = pd.Interval(0, 1, closed="both")
self.assertEqual(
loads(dumps(interval, default=json_default)),
{"left": 0, "right": 1, "closed": "both"},
)

# Test Period
period = pd.Period("2024-01")
self.assertEqual(loads(dumps(period, default=json_default)), "2024-01")

@skipUnless(package_installed("polars"), "Polars not installed.")
def test_polars(self):
"""Test serialization of Polars objects."""
import polars as pl

# Test Series
series = pl.Series("a", [1, 2, 3])
self.assertEqual(loads(dumps(series, default=json_default)), [1, 2, 3])

# Test DataFrame
df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
self.assertEqual(
loads(dumps(df, default=json_default)), [{"a": 1, "b": 3}, {"a": 2, "b": 4}]
)

def test_dataclass(self):
"""Test serialization of dataclasses."""
from dataclasses import dataclass

@dataclass
class TestDataClass:
name: str
value: int

obj = TestDataClass(name="test", value=42)
serialized = loads(dumps(obj, default=json_default))
self.assertEqual(serialized, {"name": "test", "value": 42})
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def read(path):
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
Expand All @@ -32,7 +31,7 @@ def read(path):
version=versioneer.get_version(),
cmdclass=versioneer.get_cmdclass(),
description="Logging library that tells you why it happened",
python_requires=">=3.8.0",
python_requires=">=3.9.0",
install_requires=[
# Internal code documentation:
"zope.interface",
Expand Down
Loading