|
3 | 3 | import socket
|
4 | 4 | import sys
|
5 | 5 | import time
|
| 6 | +import warnings |
6 | 7 | from abc import ABCMeta, abstractmethod
|
| 8 | +from contextlib import nullcontext |
7 | 9 | from math import floor
|
8 | 10 | from pathlib import Path
|
9 | 11 | from queue import Empty, Queue
|
10 | 12 | from signal import SIGINT, signal, SIGTERM
|
11 | 13 | from threading import Thread, Timer
|
12 | 14 | from types import FrameType
|
13 |
| -from typing import Callable, ClassVar, Dict, IO, Optional, Tuple, Union |
| 15 | +from typing import Any, Callable, ClassVar, Dict, IO, Optional, Tuple, Union |
14 | 16 |
|
15 | 17 | import tzlocal
|
16 |
| -from clp_ffi_py.ir import FourByteEncoder |
| 18 | +from clp_ffi_py.ir import FourByteEncoder, Serializer |
| 19 | +from clp_ffi_py.utils import serialize_dict_to_msgpack |
17 | 20 | from zstandard import FLUSH_FRAME, ZstdCompressionWriter, ZstdCompressor
|
18 | 21 |
|
| 22 | +from clp_logging.auto_generated_kv_pairs_utils import AutoGeneratedKeyValuePairsBuffer |
19 | 23 | from clp_logging.protocol import (
|
20 | 24 | BYTE_ORDER,
|
21 | 25 | EOF_CHAR,
|
|
25 | 29 | UINT_MAX,
|
26 | 30 | ULONG_MAX,
|
27 | 31 | )
|
| 32 | +from clp_logging.utils import Timestamp |
28 | 33 |
|
29 | 34 | # TODO: lock writes to zstream if GIL ever goes away
|
30 | 35 | # Note: no need to quote "Queue[Tuple[int, bytes]]" in python 3.9
|
31 | 36 |
|
32 | 37 | DEFAULT_LOG_FORMAT: str = " %(levelname)s %(name)s %(message)s"
|
33 | 38 | WARN_PREFIX: str = " [WARN][clp_logging]"
|
| 39 | +AUTO_GENERATED_KV_PAIRS_KEY: str = "auto_generated_kv_pairs" |
| 40 | +USER_GENERATED_KV_PAIRS_KEY: str = "user_generated_kv_pairs" |
34 | 41 |
|
35 | 42 |
|
36 | 43 | def _init_timeinfo(fmt: Optional[str], tz: Optional[str]) -> Tuple[str, str]:
|
@@ -129,9 +136,9 @@ def _write(self, loglevel: int, msg: str) -> None:
|
129 | 136 | # override
|
130 | 137 | def emit(self, record: logging.LogRecord) -> None:
|
131 | 138 | """
|
132 |
| - Override `logging.Handler.emit` in base class to ensure |
133 |
| - `logging.Handler.handleError` is always called and avoid requiring a |
134 |
| - `logging.LogRecord` to call internal writing functions. |
| 139 | + Implements `logging.Handler.emit` to ensure |
| 140 | + `logging.Handler.handleError` is always called and so derived classes |
| 141 | + only need to implement `_write` instead of implementing this method. |
135 | 142 | """
|
136 | 143 | msg: str = self.format(record) + "\n"
|
137 | 144 | try:
|
@@ -792,3 +799,169 @@ def __init__(
|
792 | 799 | super().__init__(
|
793 | 800 | open(fpath, mode), enable_compression, timestamp_format, timezone, loglevel_timeout
|
794 | 801 | )
|
| 802 | + |
| 803 | + |
| 804 | +class ClpKeyValuePairStreamHandler(logging.Handler): |
| 805 | + """ |
| 806 | + A custom logging handler that serializes key-value pair log events into the |
| 807 | + CLP key-value pair IR format. |
| 808 | +
|
| 809 | + Differences from `logging.StreamHandler`: |
| 810 | +
|
| 811 | + - Log events (`logging.LogRecord`) should contain the key-value pairs that a user wants to log |
| 812 | + as a Python dictionary. |
| 813 | + - As a result, the key-value pairs will not be formatted into a string before being written. |
| 814 | + - The key-value pairs will be serialized into the CLP key-value pair IR format before writing to |
| 815 | + the stream. |
| 816 | +
|
| 817 | + Key-value pairs in the log event must abide by the following rules: |
| 818 | + - Keys must be of type `str`. |
| 819 | + - Values must be one of the following types: |
| 820 | + - Primitives: `int`, `float`, `str`, `bool`, or `None`. |
| 821 | + - Arrays, where each array: |
| 822 | + - may contain primitive values, dictionaries, or nested arrays. |
| 823 | + - can be empty. |
| 824 | + - Dictionaries, where each dictionary: |
| 825 | + - must adhere to the aforementioned rules for keys and values. |
| 826 | + - can be empty. |
| 827 | +
|
| 828 | + :param stream: A writable byte output stream to which the handler will write the serialized IR |
| 829 | + byte sequences. |
| 830 | + :param enable_compression: Whether to compress the serialized IR byte sequences using Zstandard. |
| 831 | + """ |
| 832 | + |
| 833 | + def __init__( |
| 834 | + self, |
| 835 | + stream: IO[bytes], |
| 836 | + enable_compression: bool = True, |
| 837 | + ) -> None: |
| 838 | + super().__init__() |
| 839 | + |
| 840 | + self._enable_compression: bool = enable_compression |
| 841 | + self._serializer: Optional[Serializer] = None |
| 842 | + self._formatter: Optional[logging.Formatter] = None |
| 843 | + self._ostream: IO[bytes] = stream |
| 844 | + |
| 845 | + self._auto_gen_kv_pairs_buf: AutoGeneratedKeyValuePairsBuffer = ( |
| 846 | + AutoGeneratedKeyValuePairsBuffer() |
| 847 | + ) |
| 848 | + |
| 849 | + self._init_new_serializer(stream) |
| 850 | + |
| 851 | + # override |
| 852 | + def setFormatter(self, fmt: Optional[logging.Formatter]) -> None: |
| 853 | + if fmt is None: |
| 854 | + return |
| 855 | + warnings.warn( |
| 856 | + f"{self.__class__.__name__} doesn't currently support Formatters", |
| 857 | + category=RuntimeWarning, |
| 858 | + ) |
| 859 | + self._formatter = fmt |
| 860 | + |
| 861 | + # override |
| 862 | + def emit(self, record: logging.LogRecord) -> None: |
| 863 | + """ |
| 864 | + Implements `logging.Handler.emit` to encode the given record into CLP's |
| 865 | + IR format before it's written to the underlying stream. |
| 866 | +
|
| 867 | + :param record: The log event to serialize. |
| 868 | + """ |
| 869 | + try: |
| 870 | + self._write(record) |
| 871 | + except Exception: |
| 872 | + self.handleError(record) |
| 873 | + |
| 874 | + # override |
| 875 | + def setStream(self, stream: IO[bytes]) -> Optional[IO[bytes]]: |
| 876 | + """ |
| 877 | + Sets the instance's stream to the given value, if it's different from |
| 878 | + the current value. The old stream is flushed before the new stream is |
| 879 | + set. |
| 880 | +
|
| 881 | + NOTE: The old stream will also be closed by this method. |
| 882 | +
|
| 883 | + :param stream: A writable byte output stream to which the handler will write the serialized |
| 884 | + IR byte sequences. |
| 885 | + :return: The old stream if the stream was changed, or `None` if it wasn't. |
| 886 | + """ |
| 887 | + |
| 888 | + # NOTE: This function is implemented by mirroring CPython's implementation. |
| 889 | + |
| 890 | + if stream is self._ostream: |
| 891 | + return None |
| 892 | + |
| 893 | + old_stream: IO[bytes] = self._ostream |
| 894 | + with self.lock if self.lock else nullcontext(): |
| 895 | + # TODO: The following call will close the old stream whereas `logging.StreamHandler`'s |
| 896 | + # implementation will only flush the stream without closing it. To support |
| 897 | + # `logging.StreamHandler`'s behaviour, we need `clp_ffi_py.ir.Serializer` to allow |
| 898 | + # closing the serializer without closing the underlying output stream. |
| 899 | + self._init_new_serializer(stream) |
| 900 | + self._ostream = stream |
| 901 | + return old_stream |
| 902 | + |
| 903 | + # override |
| 904 | + def close(self) -> None: |
| 905 | + if self._is_closed(): |
| 906 | + return |
| 907 | + self._close_serializer() |
| 908 | + super().close() |
| 909 | + |
| 910 | + def _is_closed(self) -> bool: |
| 911 | + return self._serializer is None |
| 912 | + |
| 913 | + def _close_serializer(self) -> None: |
| 914 | + """ |
| 915 | + Closes the current serializer if it's open. |
| 916 | +
|
| 917 | + NOTE: The underlying output stream will also be closed. |
| 918 | + """ |
| 919 | + if self._is_closed(): |
| 920 | + return |
| 921 | + assert self._serializer is not None |
| 922 | + self._serializer.close() |
| 923 | + self._serializer = None |
| 924 | + |
| 925 | + def _init_new_serializer(self, stream: IO[bytes]) -> None: |
| 926 | + """ |
| 927 | + Initializes a new serializer that will write to the given stream. |
| 928 | +
|
| 929 | + :param stream: The stream that the underlying serializer will write to. |
| 930 | + """ |
| 931 | + self._close_serializer() |
| 932 | + self._serializer = Serializer( |
| 933 | + ZstdCompressor().stream_writer(stream) if self._enable_compression else stream |
| 934 | + ) |
| 935 | + |
| 936 | + def _write(self, record: logging.LogRecord) -> None: |
| 937 | + """ |
| 938 | + Writes the log event into the underlying serializer. |
| 939 | +
|
| 940 | + :param record: The log event to serialize. |
| 941 | + :raise RuntimeError: If the handler has been already closed. |
| 942 | + :raise TypeError: If `record.msg` is not a Python dictionary. |
| 943 | + """ |
| 944 | + if self._is_closed(): |
| 945 | + raise RuntimeError("Stream already closed.") |
| 946 | + |
| 947 | + if not isinstance(record.msg, dict): |
| 948 | + raise TypeError("`record.msg` must be a Python dictionary.") |
| 949 | + |
| 950 | + self._serialize_kv_pair_log_event( |
| 951 | + self._auto_gen_kv_pairs_buf.generate(Timestamp.now(), record), record.msg |
| 952 | + ) |
| 953 | + |
| 954 | + def _serialize_kv_pair_log_event( |
| 955 | + self, auto_gen_kv_pairs: Dict[str, Any], user_gen_kv_pairs: Dict[str, Any] |
| 956 | + ) -> None: |
| 957 | + """ |
| 958 | + :param auto_gen_kv_pairs: A dict of auto-generated kv-pairs. |
| 959 | + :param user_gen_kv_pairs: A dict of user-generated kv-pairs. |
| 960 | + """ |
| 961 | + if self._is_closed(): |
| 962 | + raise RuntimeError("Stream already closed.") |
| 963 | + assert self._serializer is not None |
| 964 | + self._serializer.serialize_log_event_from_msgpack_map( |
| 965 | + serialize_dict_to_msgpack(auto_gen_kv_pairs), |
| 966 | + serialize_dict_to_msgpack(user_gen_kv_pairs), |
| 967 | + ) |
0 commit comments