Coverage for python / lsst / daf / butler / logging.py: 36%
270 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ButlerLogRecord",
32 "ButlerLogRecordHandler",
33 "ButlerLogRecords",
34 "ButlerMDC",
35 "JsonLogFormatter",
36)
38import datetime
39import io
40import logging
41import traceback
42from collections.abc import Callable, Generator, Iterable, Iterator, MutableSequence
43from contextlib import contextmanager
44from logging import Formatter, LogRecord, StreamHandler
45from types import TracebackType
46from typing import IO, Any, ClassVar, Literal, Self, TypeVar, cast, overload
48import pydantic_core
49from pydantic import BaseModel, ConfigDict, Field, RootModel
51_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
52"""Default format for log records."""
55class MDCDict(dict):
56 """Dictionary for MDC data.
58 This is internal class used for better formatting of MDC in Python logging
59 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
60 ``__repr__`` method to produce output better suited for logging records.
61 """
63 def __getitem__(self, name: str) -> str:
64 """Return value for a given key or empty string for missing key."""
65 return self.get(name, "")
67 def __str__(self) -> str:
68 """Return string representation, strings are interpolated without
69 quotes.
70 """
71 items = (f"{k}={self[k]}" for k in sorted(self))
72 return "{" + ", ".join(items) + "}"
74 def __repr__(self) -> str:
75 return str(self)
78class ButlerMDC:
79 """Handle setting and unsetting of global MDC records.
81 The Mapped Diagnostic Context (MDC) can be used to set context
82 for log messages.
84 Currently there is one global MDC dict. Per-thread MDC is not
85 yet supported.
86 """
88 _MDC = MDCDict()
90 _old_factory: Callable[..., logging.LogRecord] | None = None
91 """Old log record factory."""
93 @classmethod
94 def MDC(cls, key: str, value: str) -> str:
95 """Set MDC for this key to the supplied value.
97 Parameters
98 ----------
99 key : `str`
100 Key to modify.
101 value : `str`
102 New value to use.
104 Returns
105 -------
106 old : `str`
107 The previous value for this key.
108 """
109 old_value = cls._MDC[key]
110 cls._MDC[key] = value
111 return old_value
113 @classmethod
114 def MDCRemove(cls, key: str) -> None:
115 """Clear the MDC value associated with this key.
117 Can be called even if the key is not known to MDC.
119 Parameters
120 ----------
121 key : `str`
122 Key for which the MDC value should be removed.
123 """
124 cls._MDC.pop(key, None)
126 @classmethod
127 def clear_mdc(cls) -> None:
128 """Clear all MDC entries."""
129 cls._MDC.clear()
131 @classmethod
132 @contextmanager
133 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
134 """Set the MDC key for this context.
136 This context manager also adds a mapping named ``mdc`` to any
137 exceptions that escape it.
139 Parameters
140 ----------
141 mdc : `dict` of `str`, `str`
142 MDC keys to update temporarily.
144 Notes
145 -----
146 Other MDC keys are not modified. The previous values are restored
147 on exit (removing them if the were unset previously).
148 """
149 previous = {}
150 for k, v in mdc.items():
151 previous[k] = cls.MDC(k, v)
153 try:
154 yield
155 except BaseException as e:
156 # In logging, inner context overrules outer context. Need the same
157 # for exceptions.
158 inner_context: MDCDict = e.mdc if hasattr(e, "mdc") else MDCDict()
159 e.mdc = cls._MDC.copy() | inner_context # type: ignore[attr-defined]
160 raise
161 finally:
162 for k, v in previous.items():
163 if not v:
164 cls.MDCRemove(k)
165 else:
166 cls.MDC(k, v)
168 @classmethod
169 def add_mdc_log_record_factory(cls) -> None:
170 """Add a log record factory that adds a MDC record to `LogRecord`."""
171 old_factory = logging.getLogRecordFactory()
173 # Need explicit args in case exc_info is called as an arg (and because
174 # the factory API has a parameter literally called `args`).
175 def record_factory(
176 name: str,
177 level: int,
178 fn: str,
179 lno: int,
180 msg: str,
181 args: tuple,
182 exc_info: tuple | None | Literal[False],
183 func: str | None = None,
184 sinfo: TracebackType | None = None,
185 **kwargs: Any,
186 ) -> LogRecord:
187 record = old_factory(name, level, fn, lno, msg, args, exc_info, func, sinfo, **kwargs)
188 # Make sure we send a copy of the global dict in the record.
189 mdc = MDCDict(cls._MDC)
190 if exc_info is not None and exc_info is not False:
191 _, ex, _ = exc_info
192 # TODO: this doesn't handle chained exceptions, fix on DM-47546
193 if hasattr(ex, "mdc"):
194 # Context at the point where the exception was raised
195 # takes precedence.
196 mdc.update(ex.mdc)
197 record.MDC = mdc
198 return record
200 cls._old_factory = old_factory
201 logging.setLogRecordFactory(record_factory)
203 @classmethod
204 def restore_log_record_factory(cls) -> None:
205 """Restores the log record factory to the original form.
207 Does nothing if there has not been a call to
208 `add_mdc_log_record_factory`.
209 """
210 if cls._old_factory:
211 logging.setLogRecordFactory(cls._old_factory)
214class ButlerLogRecord(BaseModel):
215 """A model representing a `logging.LogRecord`.
217 A `~logging.LogRecord` always uses the current time in its record
218 when recreated and that makes it impossible to use it as a
219 serialization format. Instead have a local representation of a
220 `~logging.LogRecord` that matches Butler needs.
221 """
223 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
225 name: str
226 asctime: datetime.datetime
227 message: str
228 levelno: int
229 levelname: str
230 filename: str
231 pathname: str
232 lineno: int
233 funcName: str | None = None
234 process: int
235 processName: str
236 exc_info: str | None = None
237 MDC: dict[str, str]
239 model_config = ConfigDict(frozen=True)
241 @classmethod
242 def from_record(cls, record: LogRecord) -> ButlerLogRecord:
243 """Create a new instance from a `~logging.LogRecord`.
245 Parameters
246 ----------
247 record : `logging.LogRecord`
248 The record from which to extract the relevant information.
249 """
250 # The properties that are one-to-one mapping.
251 simple = (
252 "name",
253 "levelno",
254 "levelname",
255 "filename",
256 "pathname",
257 "lineno",
258 "funcName",
259 "process",
260 "processName",
261 )
263 record_dict = {k: getattr(record, k) for k in simple}
265 record_dict["message"] = record.getMessage()
267 # MDC -- ensure the contents are copied to prevent any confusion
268 # over the MDC global being updated later.
269 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
271 # Always use UTC because in distributed systems we can't be sure
272 # what timezone localtime is and it's easier to compare logs if
273 # every system is using the same time.
274 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC)
276 # Sometimes exception information is included so must be
277 # extracted.
278 if record.exc_info:
279 etype = record.exc_info[0]
280 evalue = record.exc_info[1]
281 tb = record.exc_info[2]
282 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
284 return cls(**record_dict)
286 def format(self, log_format: str | None = None) -> str:
287 """Format this record.
289 Parameters
290 ----------
291 log_format : `str`, optional
292 The format string to use. This string follows the standard
293 f-style use for formatting log messages. If `None`
294 the class default will be used.
296 Returns
297 -------
298 text : `str`
299 The formatted log message.
300 """
301 if log_format is None:
302 log_format = self._log_format
304 as_dict = self.model_dump()
306 # Special case MDC content. Convert it to an MDCDict
307 # so that missing items do not break formatting.
308 as_dict["MDC"] = MDCDict(as_dict["MDC"])
310 as_dict["asctime"] = as_dict["asctime"].isoformat()
311 formatted = log_format.format(**as_dict)
312 return formatted
314 def __str__(self) -> str:
315 return self.format()
318# The class below can convert LogRecord to ButlerLogRecord if needed.
319Record = LogRecord | ButlerLogRecord
322_T = TypeVar("_T", bound="ButlerLogRecords")
325class _ButlerLogRecordsModelV1(RootModel):
326 """Pydantic model used for version 1 JSON log files: a simple list of
327 `ButlerLogRecord` instances.
328 """
330 root: list[ButlerLogRecord]
332 def wrap(self, container_type: type[_T]) -> _T:
333 """Convert this model into a `ButlerLogRecords` instance.
335 Parameters
336 ----------
337 container_type : `type`
338 `ButlerLogRecords` subclass to use.
340 Returns
341 -------
342 container : `ButlerLogRecords`
343 Container for log records.
344 """
345 return container_type.from_records(self.root)
348class _ButlerLogRecordsModel(BaseModel):
349 """Pydantic model used for version 2(+) JSON log files: a struct with a
350 `records` attribute.
351 """
353 CURRENT_VERSION: ClassVar[int] = 2
354 """File format version written by this module."""
356 extended: Literal[True]
357 """Constant field that appears at the beginning of every version 2+ JSON
358 log file.
360 This field must be initialized directly when the model is constructed,
361 because using a default causes it to be omitted by Pydantic when
362 serializing with ``exclude_unset`` or ``exclude_default`` (both of which
363 are used in serializing logs).
364 """
366 version: int
367 """File format version for this file.
369 This field must be initialized directly when the model is constructed,
370 because using a default causes it to be omitted by Pydantic when
371 serializing with ``exclude_unset`` or ``exclude_default`` (both of which
372 are used in serializing logs).
373 """
375 records: list[ButlerLogRecord] = Field(default_factory=list)
376 """The actual log records."""
378 model_config = ConfigDict(extra="allow")
380 def wrap(self, container_type: type[_T]) -> _T:
381 """Convert this model into a `ButlerLogRecords` instance.
383 Parameters
384 ----------
385 container_type : `type`
386 `ButlerLogRecords` subclass to use.
388 Returns
389 -------
390 container : `ButlerLogRecords`
391 Container for log records.
392 """
393 return container_type.from_records(self.records, extra=self.__pydantic_extra__)
396class ButlerLogRecords(MutableSequence[ButlerLogRecord]):
397 """A container class for `ButlerLogRecord` objects.
399 Parameters
400 ----------
401 records : `list` [`ButlerLogRecord`]
402 List of records to use directly as the backing store for the container.
403 extra : `dict`, optional
404 Additional JSON data included with the log records. Subclasses may
405 interpret structured information, but the base class just sets this as
406 the ``extra`` attribute.
408 Notes
409 -----
410 ButlerLogRecords supports two different file formats:
412 - Full-container serialization to JSON, in which records are stored in a
413 ``records`` array and extra fields may be present (for backwards
414 compatibility, *reading* records directly from a JSON list is also
415 supported).
417 - Streaming serialization, in which each each line is a one-line JSON
418 representation of a single `ButlerLogRecord`. Extra fields may be added
419 included to this format by appending a single line with the value given
420 by `STREAMING_EXTRA_DELIMITER`, which indicates that the remainder of the
421 file is a single JSON block.
423 Subclasses of `ButlerLogRecords` are expected to support these formats by
424 using the "extra" JSON fields to hold any additional state. If subclasses
425 intercept ``extra`` at construction in a way that prevents that information
426 from being held in the base class ``extra`` field, they must override
427 ``_from_record_subset`` and `to_json_data` to pass that state to slices and
428 save it, respectively.
429 """
431 def __init__(self, records: list[ButlerLogRecord], extra: dict[str, object] | None = None):
432 self._records = records
433 self._log_format: str | None = None
434 self.extra = extra if extra is not None else {}
436 STREAMING_EXTRA_DELIMITER: ClassVar[str] = "###EXTRA###"
437 """Special string written (on its own line) after streamed log records to
438 indicate that the rest of the file is a JSON blob of "extra" data.
439 """
441 @classmethod
442 def from_records(cls, records: Iterable[ButlerLogRecord], extra: dict[str, object] | None = None) -> Self:
443 """Create collection from iterable.
445 Parameters
446 ----------
447 records : `~collections.abc.Iterable` [`ButlerLogRecord`]
448 The records to seed this class with.
449 extra : `dict`, optional
450 Additional JSON data included with the log records. Subclasses may
451 interpret structured information, but the base class just sets this
452 as the ``extra`` attribute.
454 Returns
455 -------
456 container : `ButlerLogRecords`
457 New log record container.
458 """
459 return cls(list(records), extra=extra)
461 @classmethod
462 def from_file(cls, filename: str) -> Self:
463 """Read records from file.
465 Parameters
466 ----------
467 filename : `str`
468 Name of file containing the JSON records.
470 Notes
471 -----
472 Works with one-record-per-line format JSON files and a direct
473 serialization of the Pydantic model.
474 """
475 with open(filename) as fd:
476 return cls.from_stream(fd)
478 def _from_record_subset(self, records: list[ButlerLogRecord]) -> Self:
479 """Return a new instance with a subset of the original's records.
481 Parameters
482 ----------
483 records : `list` [`ButlerLogRecord`]
484 New list of records.
486 Returns
487 -------
488 copy : `ButlerLogRecords`
489 New instance of this type.
491 Notes
492 -----
493 This is a hook provided to allow subclasses to transfer extra state
494 when the container is sliced.
495 """
496 return self.from_records(records, extra=self.extra)
498 @staticmethod
499 def _generic_startswith(startdata: str | bytes, prefix: str) -> bool:
500 # A type-safe wrapper for `str.startswith` and `bytes.startswith` that
501 # avoids converting 'startdata', as it might be big.
502 if isinstance(startdata, str):
503 return startdata.startswith(prefix)
504 else:
505 return startdata.startswith(prefix.encode())
507 @classmethod
508 def _detect_model(
509 cls,
510 startdata: str | bytes,
511 ) -> type[_ButlerLogRecordsModelV1] | type[_ButlerLogRecordsModel] | None:
512 """Given some representative data, determine if this is a serialized
513 model or a streaming format.
515 Parameters
516 ----------
517 startdata : `bytes` or `str`
518 Representative characters or bytes from the start of a serialized
519 collection of log records.
521 Returns
522 -------
523 model_type : `type` or `None`
524 If the data looks like either the `_ButlerLogRecords` or
525 `_ExtendedButlerLogRecords` models, that model type.
526 Otherwise (streaming records, or an empty string `None`).
528 Raises
529 ------
530 ValueError
531 Raised if the sentinel doesn't look like either of the supported
532 log record formats.
533 """
534 if not startdata:
535 return None
537 if cls._generic_startswith(startdata, "["):
538 # This is a JSON array of records.
539 return _ButlerLogRecordsModelV1
540 elif cls._generic_startswith(startdata, cls.STREAMING_EXTRA_DELIMITER):
541 # This is an empty log file with a log record per line format.
542 return None
543 elif not cls._generic_startswith(startdata, "{"):
544 # Limit the length of string reported in error message in case
545 # this is an enormous file.
546 max = 32
547 if len(startdata) > max:
548 msg = f"{startdata[:max]!r}..."
549 else:
550 msg = repr(startdata)
551 raise ValueError(
552 f"Unrecognized JSON log format. Expected '{{' or '[' but got {startdata[0]!r} from {msg}."
553 )
554 elif cls._generic_startswith(startdata, '{"extended":true'):
555 # This is a JSON model with a field for records.
556 return _ButlerLogRecordsModel
557 # Assume a record per line.
558 return None
560 @classmethod
561 def from_stream(cls, stream: IO) -> Self:
562 """Read records from I/O stream.
564 Parameters
565 ----------
566 stream : `typing.IO`
567 Stream from which to read JSON records.
569 Returns
570 -------
571 container : `ButlerLogRecords`
572 New log record container.
573 """
574 first_line = stream.readline()
576 empty_stream = False
577 if not first_line:
578 # Empty file, return zero records.
579 return cls.from_records([])
580 elif cls._generic_startswith(first_line, cls.STREAMING_EXTRA_DELIMITER):
581 empty_stream = True
583 model_type = cls._detect_model(first_line)
585 if model_type:
586 # This is a ButlerLogRecords model serialization so all the
587 # content must be read first.
588 all = first_line + stream.read()
589 return model_type.model_validate_json(all).wrap(cls)
591 # A stream of records with one record per line.
592 if not empty_stream:
593 records = [ButlerLogRecord.model_validate_json(first_line)]
594 for line in stream:
595 line = line.rstrip()
596 if cls._generic_startswith(line, cls.STREAMING_EXTRA_DELIMITER):
597 break
598 elif line: # skip blank lines
599 records.append(ButlerLogRecord.model_validate_json(line))
600 else:
601 # No records but might have extra metadata.
602 records = []
603 extra_data = stream.read()
604 if extra_data:
605 extra = pydantic_core.from_json(extra_data)
606 else:
607 extra = {}
608 return cls.from_records(records, extra=extra)
610 @classmethod
611 def from_raw(cls, serialized: str | bytes) -> ButlerLogRecords:
612 """Parse raw serialized form and return records.
614 Parameters
615 ----------
616 serialized : `bytes` or `str`
617 Either the serialized JSON of the model created using
618 `write` or a streaming format of one JSON `ButlerLogRecord` per
619 line. This can also support a zero-length string.
621 Returns
622 -------
623 container : `ButlerLogRecords`
624 New log record container.
625 """
626 if isinstance(serialized, str):
627 return cls.from_stream(io.StringIO(serialized))
628 else:
629 return cls.from_stream(io.BytesIO(serialized))
631 def to_json_data(self) -> str:
632 """Serialize to a JSON string.
634 Returns
635 -------
636 data : `str`
637 String containing JSON data.
638 """
639 if self.extra:
640 # There's no way to tell Pydantic to add extra fields back in
641 # when serializing directly to JSON; we have to convert to a dict
642 # first and then to JSON.
643 py_data = _ButlerLogRecordsModel(
644 extended=True, version=_ButlerLogRecordsModel.CURRENT_VERSION, records=self._records
645 ).model_dump(exclude_unset=True, exclude_defaults=True)
646 py_data.update(self.extra)
647 return pydantic_core.to_json(py_data).decode()
648 else:
649 return _ButlerLogRecordsModel(
650 extended=True, version=_ButlerLogRecordsModel.CURRENT_VERSION, records=self._records
651 ).model_dump_json(exclude_unset=True, exclude_defaults=True)
653 @property
654 def log_format(self) -> str:
655 """The log format string for these records."""
656 if self._log_format is None:
657 return _LONG_LOG_FORMAT
658 return self._log_format
660 @log_format.setter
661 def log_format(self, format: str | None) -> None:
662 self.set_log_format(format)
664 def set_log_format(self, format: str | None) -> str | None:
665 """Set the log format string for these records.
667 This may also be set via the property; method is provided for
668 backwards compatibility.
670 Parameters
671 ----------
672 format : `str`, optional
673 The new format string to use for converting this collection
674 of records into a string. If `None` the default format will be
675 used.
677 Returns
678 -------
679 old_format : `str`, optional
680 The previous log format.
681 """
682 previous = self._log_format
683 self._log_format = format
684 return previous
686 def __len__(self) -> int:
687 return len(self._records)
689 def __iter__(self) -> Iterator[ButlerLogRecord]:
690 return iter(self._records)
692 @overload
693 def __setitem__(self, index: int, value: Record) -> None: ... 693 ↛ exitline 693 didn't return from function '__setitem__' because
695 @overload
696 def __setitem__(self, index: slice, value: Iterable[Record]) -> None: ... 696 ↛ exitline 696 didn't return from function '__setitem__' because
698 def __setitem__(self, index: slice | int, value: Record | Iterable[Record]) -> None:
699 if isinstance(index, slice):
700 self._records[index] = [self._validate_record(v) for v in cast(Iterable[Record], value)]
701 else:
702 self._records[index] = self._validate_record(cast(Record, value))
704 @overload
705 def __getitem__(self, index: int) -> ButlerLogRecord: ... 705 ↛ exitline 705 didn't return from function '__getitem__' because
707 @overload
708 def __getitem__(self, index: slice) -> Self: ... 708 ↛ exitline 708 didn't return from function '__getitem__' because
710 def __getitem__(self, index: slice | int) -> Self | ButlerLogRecord:
711 # Handles slices and returns a new collection in that
712 # case.
713 item = self._records[index]
714 if isinstance(item, list):
715 return self._from_record_subset(item)
716 else:
717 return item
719 def __delitem__(self, index: slice | int) -> None:
720 del self._records[index]
722 def __eq__(self, other: object) -> bool:
723 if isinstance(other, ButlerLogRecords):
724 return self._records == other._records and self.extra == other.extra
725 return NotImplemented
727 def __str__(self) -> str:
728 # Ensure that every record uses the same format string.
729 return "\n".join(record.format(self.log_format) for record in self)
731 def _validate_record(self, record: Record) -> ButlerLogRecord:
732 if isinstance(record, ButlerLogRecord):
733 pass
734 elif isinstance(record, LogRecord):
735 record = ButlerLogRecord.from_record(record)
736 else:
737 raise ValueError(f"Can only append item of type {type(record)}")
738 return record
740 def insert(self, index: int, value: Record) -> None:
741 # Docstring provided by ABC.
742 self._records.insert(index, self._validate_record(value))
744 def append(self, value: Record) -> None:
745 # Docstring provided by ABC.
746 # Only overridden to accept `LogRecord`, not just `ButlerLogRecord`.
747 self._records.append(self._validate_record(value))
749 @classmethod
750 def write_streaming_extra(cls, file: IO[str], extra_data: str) -> None:
751 """Append the special delimiter and extra JSON data to a file written
752 in streaming mode.
754 Parameters
755 ----------
756 file : `typing.IO`
757 File object to write to, pointing to the end of the streamed log
758 records.
759 extra_data : `str`
760 Extra JSON data as a string.
761 """
762 print(cls.STREAMING_EXTRA_DELIMITER, file=file)
763 file.write(extra_data)
766class ButlerLogRecordHandler(StreamHandler):
767 """Python log handler that accumulates records.
769 Parameters
770 ----------
771 records : `ButlerLogRecords`, optional
772 Container to store logs in.
773 """
775 def __init__(self, records: ButlerLogRecords | None = None) -> None:
776 super().__init__()
777 self.records = ButlerLogRecords([]) if records is None else records
779 def emit(self, record: LogRecord) -> None:
780 self.records.append(record)
783class JsonLogFormatter(Formatter):
784 """Format a `logging.LogRecord` in JSON format."""
786 def format(self, record: LogRecord) -> str:
787 butler_record = ButlerLogRecord.from_record(record)
788 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)