Coverage for python/lsst/daf/butler/logging.py: 41%
237 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
30import datetime
31import logging
32import traceback
33from collections.abc import Callable, Generator, Iterable, Iterator
34from contextlib import contextmanager
35from logging import Formatter, LogRecord, StreamHandler
36from typing import IO, Any, ClassVar, Union, overload
38from lsst.utils.introspection import get_full_type_name
39from lsst.utils.iteration import isplit
40from pydantic import ConfigDict, PrivateAttr
42from ._compat import PYDANTIC_V2, _BaseModelCompat
44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
45"""Default format for log records."""
48class MDCDict(dict):
49 """Dictionary for MDC data.
51 This is internal class used for better formatting of MDC in Python logging
52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
53 ``__repr__`` method to produce output better suited for logging records.
54 """
56 def __getitem__(self, name: str) -> str:
57 """Return value for a given key or empty string for missing key."""
58 return self.get(name, "")
60 def __str__(self) -> str:
61 """Return string representation, strings are interpolated without
62 quotes.
63 """
64 items = (f"{k}={self[k]}" for k in sorted(self))
65 return "{" + ", ".join(items) + "}"
67 def __repr__(self) -> str:
68 return str(self)
71class ButlerMDC:
72 """Handle setting and unsetting of global MDC records.
74 The Mapped Diagnostic Context (MDC) can be used to set context
75 for log messages.
77 Currently there is one global MDC dict. Per-thread MDC is not
78 yet supported.
79 """
81 _MDC = MDCDict()
83 _old_factory: Callable[..., logging.LogRecord] | None = None
84 """Old log record factory."""
86 @classmethod
87 def MDC(cls, key: str, value: str) -> str:
88 """Set MDC for this key to the supplied value.
90 Parameters
91 ----------
92 key : `str`
93 Key to modify.
94 value : `str`
95 New value to use.
97 Returns
98 -------
99 old : `str`
100 The previous value for this key.
101 """
102 old_value = cls._MDC[key]
103 cls._MDC[key] = value
104 return old_value
106 @classmethod
107 def MDCRemove(cls, key: str) -> None:
108 """Clear the MDC value associated with this key.
110 Can be called even if the key is not known to MDC.
111 """
112 cls._MDC.pop(key, None)
114 @classmethod
115 def clear_mdc(cls) -> None:
116 """Clear all MDC entries."""
117 cls._MDC.clear()
119 @classmethod
120 @contextmanager
121 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
122 """Set the MDC key for this context.
124 Parameters
125 ----------
126 mdc : `dict` of `str`, `str`
127 MDC keys to update temporarily.
129 Notes
130 -----
131 Other MDC keys are not modified. The previous values are restored
132 on exit (removing them if the were unset previously).
133 """
134 previous = {}
135 for k, v in mdc.items():
136 previous[k] = cls.MDC(k, v)
138 try:
139 yield
140 finally:
141 for k, v in previous.items():
142 if not v:
143 cls.MDCRemove(k)
144 else:
145 cls.MDC(k, v)
147 @classmethod
148 def add_mdc_log_record_factory(cls) -> None:
149 """Add a log record factory that adds a MDC record to `LogRecord`."""
150 old_factory = logging.getLogRecordFactory()
152 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
153 record = old_factory(*args, **kwargs)
154 # Make sure we send a copy of the global dict in the record.
155 record.MDC = MDCDict(cls._MDC)
156 return record
158 cls._old_factory = old_factory
159 logging.setLogRecordFactory(record_factory)
161 @classmethod
162 def restore_log_record_factory(cls) -> None:
163 """Restores the log record factory to the original form.
165 Does nothing if there has not been a call to
166 `add_mdc_log_record_factory`.
167 """
168 if cls._old_factory:
169 logging.setLogRecordFactory(cls._old_factory)
172class ButlerLogRecord(_BaseModelCompat):
173 """A model representing a `logging.LogRecord`.
175 A `~logging.LogRecord` always uses the current time in its record
176 when recreated and that makes it impossible to use it as a
177 serialization format. Instead have a local representation of a
178 `~logging.LogRecord` that matches Butler needs.
179 """
181 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
183 name: str
184 asctime: datetime.datetime
185 message: str
186 levelno: int
187 levelname: str
188 filename: str
189 pathname: str
190 lineno: int
191 funcName: str | None = None
192 process: int
193 processName: str
194 exc_info: str | None = None
195 MDC: dict[str, str]
197 if PYDANTIC_V2: 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true
198 model_config = ConfigDict(frozen=True)
199 else:
201 class Config:
202 """Pydantic model configuration."""
204 allow_mutation = False
206 @classmethod
207 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
208 """Create a new instance from a `~logging.LogRecord`.
210 Parameters
211 ----------
212 record : `logging.LogRecord`
213 The record from which to extract the relevant information.
214 """
215 # The properties that are one-to-one mapping.
216 simple = (
217 "name",
218 "levelno",
219 "levelname",
220 "filename",
221 "pathname",
222 "lineno",
223 "funcName",
224 "process",
225 "processName",
226 )
228 record_dict = {k: getattr(record, k) for k in simple}
230 record_dict["message"] = record.getMessage()
232 # MDC -- ensure the contents are copied to prevent any confusion
233 # over the MDC global being updated later.
234 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
236 # Always use UTC because in distributed systems we can't be sure
237 # what timezone localtime is and it's easier to compare logs if
238 # every system is using the same time.
239 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC)
241 # Sometimes exception information is included so must be
242 # extracted.
243 if record.exc_info:
244 etype = record.exc_info[0]
245 evalue = record.exc_info[1]
246 tb = record.exc_info[2]
247 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
249 return cls(**record_dict)
251 def format(self, log_format: str | None = None) -> str:
252 """Format this record.
254 Parameters
255 ----------
256 log_format : `str`, optional
257 The format string to use. This string follows the standard
258 f-style use for formatting log messages. If `None`
259 the class default will be used.
261 Returns
262 -------
263 text : `str`
264 The formatted log message.
265 """
266 if log_format is None:
267 log_format = self._log_format
269 as_dict = self.model_dump()
271 # Special case MDC content. Convert it to an MDCDict
272 # so that missing items do not break formatting.
273 as_dict["MDC"] = MDCDict(as_dict["MDC"])
275 as_dict["asctime"] = as_dict["asctime"].isoformat()
276 formatted = log_format.format(**as_dict)
277 return formatted
279 def __str__(self) -> str:
280 return self.format()
283# The class below can convert LogRecord to ButlerLogRecord if needed.
284Record = LogRecord | ButlerLogRecord
287if PYDANTIC_V2: 287 ↛ 288line 287 didn't jump to line 288, because the condition on line 287 was never true
288 from pydantic import RootModel # type: ignore
290 class _ButlerLogRecords(RootModel):
291 root: list[ButlerLogRecord]
293else:
295 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef]
296 __root__: list[ButlerLogRecord]
298 @property
299 def root(self) -> list[ButlerLogRecord]:
300 return self.__root__
303# Do not inherit from MutableSequence since mypy insists on the values
304# being Any even though we wish to constrain them to Record.
305class ButlerLogRecords(_ButlerLogRecords):
306 """Class representing a collection of `ButlerLogRecord`."""
308 _log_format: str | None = PrivateAttr(None)
310 @classmethod
311 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
312 """Create collection from iterable.
314 Parameters
315 ----------
316 records : iterable of `ButlerLogRecord`
317 The records to seed this class with.
318 """
319 if PYDANTIC_V2:
320 return cls(list(records)) # type: ignore
321 else:
322 return cls(__root__=list(records)) # type: ignore
324 @classmethod
325 def from_file(cls, filename: str) -> "ButlerLogRecords":
326 """Read records from file.
328 Parameters
329 ----------
330 filename : `str`
331 Name of file containing the JSON records.
333 Notes
334 -----
335 Works with one-record-per-line format JSON files and a direct
336 serialization of the Pydantic model.
337 """
338 with open(filename) as fd:
339 return cls.from_stream(fd)
341 @staticmethod
342 def _detect_model(startdata: str | bytes) -> bool:
343 """Given some representative data, determine if this is a serialized
344 model or a streaming format.
346 Parameters
347 ----------
348 startdata : `bytes` or `str`
349 Representative characters or bytes from the start of a serialized
350 collection of log records.
352 Returns
353 -------
354 is_model : `bool`
355 Returns `True` if the data look like a serialized pydantic model.
356 Returns `False` if it looks like a streaming format. Returns
357 `False` also if an empty string is encountered since this
358 is not understood by `ButlerLogRecords.model_validate_json()`.
360 Raises
361 ------
362 ValueError
363 Raised if the sentinel doesn't look like either of the supported
364 log record formats.
365 """
366 if not startdata:
367 return False
369 # Allow byte or str streams since pydantic supports either.
370 # We don't want to convert the entire input to unicode unnecessarily.
371 error_type = "str"
372 if isinstance(startdata, bytes):
373 first_char = chr(startdata[0])
374 error_type = "byte"
375 else:
376 first_char = startdata[0]
378 if first_char == "[":
379 # This is an array of records.
380 return True
381 if first_char != "{":
382 # Limit the length of string reported in error message in case
383 # this is an enormous file.
384 max = 32
385 if len(startdata) > max:
386 startdata = f"{startdata[:max]!r}..."
387 raise ValueError(
388 "Unrecognized JSON log format. Expected '{' or '[' but got"
389 f" {first_char!r} from {error_type} content starting with {startdata!r}"
390 )
392 # Assume a record per line.
393 return False
395 @classmethod
396 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
397 """Read records from I/O stream.
399 Parameters
400 ----------
401 stream : `typing.IO`
402 Stream from which to read JSON records.
404 Notes
405 -----
406 Works with one-record-per-line format JSON files and a direct
407 serialization of the Pydantic model.
408 """
409 first_line = stream.readline()
411 if not first_line:
412 # Empty file, return zero records.
413 return cls.from_records([])
415 is_model = cls._detect_model(first_line)
417 if is_model:
418 # This is a ButlerLogRecords model serialization so all the
419 # content must be read first.
420 all = first_line + stream.read()
421 return cls.model_validate_json(all)
423 # A stream of records with one record per line.
424 records = [ButlerLogRecord.model_validate_json(first_line)]
425 for line in stream:
426 line = line.rstrip()
427 if line: # Filter out blank lines.
428 records.append(ButlerLogRecord.model_validate_json(line))
430 return cls.from_records(records)
432 @classmethod
433 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
434 """Parse raw serialized form and return records.
436 Parameters
437 ----------
438 serialized : `bytes` or `str`
439 Either the serialized JSON of the model created using
440 ``.model_dump_json()`` or a streaming format of one JSON
441 `ButlerLogRecord` per line. This can also support a zero-length
442 string.
443 """
444 if not serialized:
445 # No records to return
446 return cls.from_records([])
448 # Only send the first character for analysis.
449 is_model = cls._detect_model(serialized)
451 if is_model:
452 return cls.model_validate_json(serialized)
454 # Filter out blank lines -- mypy is confused by the newline
455 # argument to isplit() [which can't have two different types
456 # simultaneously] so we have to duplicate some logic.
457 substrings: Iterator[str | bytes]
458 if isinstance(serialized, str):
459 substrings = isplit(serialized, "\n")
460 elif isinstance(serialized, bytes):
461 substrings = isplit(serialized, b"\n")
462 else:
463 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
464 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
466 return cls.from_records(records)
468 @property
469 def log_format(self) -> str:
470 if self._log_format is None:
471 return _LONG_LOG_FORMAT
472 return self._log_format
474 # Pydantic does not allow a property setter to be given for
475 # public properties of a model that is not based on a dict.
476 def set_log_format(self, format: str | None) -> str | None:
477 """Set the log format string for these records.
479 Parameters
480 ----------
481 format : `str`, optional
482 The new format string to use for converting this collection
483 of records into a string. If `None` the default format will be
484 used.
486 Returns
487 -------
488 old_format : `str`, optional
489 The previous log format.
490 """
491 previous = self._log_format
492 self._log_format = format
493 return previous
495 def __len__(self) -> int:
496 return len(self.root)
498 # The signature does not match the one in BaseModel but that is okay
499 # if __root__ is being used.
500 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
501 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
502 return iter(self.root)
504 def __setitem__(self, index: int, value: Record) -> None:
505 self.root[index] = self._validate_record(value)
507 @overload
508 def __getitem__(self, index: int) -> ButlerLogRecord:
509 ...
511 @overload
512 def __getitem__(self, index: slice) -> "ButlerLogRecords":
513 ...
515 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
516 # Handles slices and returns a new collection in that
517 # case.
518 item = self.root[index]
519 if isinstance(item, list):
520 if PYDANTIC_V2:
521 return type(self)(item) # type: ignore
522 else:
523 return type(self)(__root__=item) # type: ignore
524 else:
525 return item
527 def __reversed__(self) -> Iterator[ButlerLogRecord]:
528 return self.root.__reversed__()
530 def __delitem__(self, index: slice | int) -> None:
531 del self.root[index]
533 def __str__(self) -> str:
534 # Ensure that every record uses the same format string.
535 return "\n".join(record.format(self.log_format) for record in self.root)
537 def _validate_record(self, record: Record) -> ButlerLogRecord:
538 if isinstance(record, ButlerLogRecord):
539 pass
540 elif isinstance(record, LogRecord):
541 record = ButlerLogRecord.from_record(record)
542 else:
543 raise ValueError(f"Can only append item of type {type(record)}")
544 return record
546 def insert(self, index: int, value: Record) -> None:
547 self.root.insert(index, self._validate_record(value))
549 def append(self, value: Record) -> None:
550 value = self._validate_record(value)
551 self.root.append(value)
553 def clear(self) -> None:
554 self.root.clear()
556 def extend(self, records: Iterable[Record]) -> None:
557 self.root.extend(self._validate_record(record) for record in records)
559 def pop(self, index: int = -1) -> ButlerLogRecord:
560 return self.root.pop(index)
562 def reverse(self) -> None:
563 self.root.reverse()
566class ButlerLogRecordHandler(StreamHandler):
567 """Python log handler that accumulates records."""
569 def __init__(self) -> None:
570 super().__init__()
571 if PYDANTIC_V2:
572 self.records = ButlerLogRecords([]) # type: ignore
573 else:
574 self.records = ButlerLogRecords(__root__=[]) # type: ignore
576 def emit(self, record: LogRecord) -> None:
577 self.records.append(record)
580class JsonLogFormatter(Formatter):
581 """Format a `LogRecord` in JSON format."""
583 def format(self, record: LogRecord) -> str:
584 butler_record = ButlerLogRecord.from_record(record)
585 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)