Coverage for python/lsst/daf/butler/core/logging.py: 40%
234 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
30import datetime
31import logging
32import traceback
33from collections.abc import Callable, Generator, Iterable, Iterator
34from contextlib import contextmanager
35from logging import Formatter, LogRecord, StreamHandler
36from typing import IO, Any, ClassVar, Union, overload
38from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
39from lsst.utils.introspection import get_full_type_name
40from lsst.utils.iteration import isplit
41from pydantic import ConfigDict, PrivateAttr
43_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
44"""Default format for log records."""
47class MDCDict(dict):
48 """Dictionary for MDC data.
50 This is internal class used for better formatting of MDC in Python logging
51 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
52 ``__repr__`` method to produce output better suited for logging records.
53 """
55 def __getitem__(self, name: str) -> str:
56 """Return value for a given key or empty string for missing key."""
57 return self.get(name, "")
59 def __str__(self) -> str:
60 """Return string representation, strings are interpolated without
61 quotes.
62 """
63 items = (f"{k}={self[k]}" for k in sorted(self))
64 return "{" + ", ".join(items) + "}"
66 def __repr__(self) -> str:
67 return str(self)
70class ButlerMDC:
71 """Handle setting and unsetting of global MDC records.
73 The Mapped Diagnostic Context (MDC) can be used to set context
74 for log messages.
76 Currently there is one global MDC dict. Per-thread MDC is not
77 yet supported.
78 """
80 _MDC = MDCDict()
82 _old_factory: Callable[..., logging.LogRecord] | None = None
83 """Old log record factory."""
85 @classmethod
86 def MDC(cls, key: str, value: str) -> str:
87 """Set MDC for this key to the supplied value.
89 Parameters
90 ----------
91 key : `str`
92 Key to modify.
93 value : `str`
94 New value to use.
96 Returns
97 -------
98 old : `str`
99 The previous value for this key.
100 """
101 old_value = cls._MDC[key]
102 cls._MDC[key] = value
103 return old_value
105 @classmethod
106 def MDCRemove(cls, key: str) -> None:
107 """Clear the MDC value associated with this key.
109 Can be called even if the key is not known to MDC.
110 """
111 cls._MDC.pop(key, None)
113 @classmethod
114 @contextmanager
115 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
116 """Set the MDC key for this context.
118 Parameters
119 ----------
120 mdc : `dict` of `str`, `str`
121 MDC keys to update temporarily.
123 Notes
124 -----
125 Other MDC keys are not modified. The previous values are restored
126 on exit (removing them if the were unset previously).
127 """
128 previous = {}
129 for k, v in mdc.items():
130 previous[k] = cls.MDC(k, v)
132 try:
133 yield
134 finally:
135 for k, v in previous.items():
136 if not v:
137 cls.MDCRemove(k)
138 else:
139 cls.MDC(k, v)
141 @classmethod
142 def add_mdc_log_record_factory(cls) -> None:
143 """Add a log record factory that adds a MDC record to `LogRecord`."""
144 old_factory = logging.getLogRecordFactory()
146 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
147 record = old_factory(*args, **kwargs)
148 # Make sure we send a copy of the global dict in the record.
149 record.MDC = MDCDict(cls._MDC)
150 return record
152 cls._old_factory = old_factory
153 logging.setLogRecordFactory(record_factory)
155 @classmethod
156 def restore_log_record_factory(cls) -> None:
157 """Restores the log record factory to the original form.
159 Does nothing if there has not been a call to
160 `add_mdc_log_record_factory`.
161 """
162 if cls._old_factory:
163 logging.setLogRecordFactory(cls._old_factory)
166class ButlerLogRecord(_BaseModelCompat):
167 """A model representing a `logging.LogRecord`.
169 A `~logging.LogRecord` always uses the current time in its record
170 when recreated and that makes it impossible to use it as a
171 serialization format. Instead have a local representation of a
172 `~logging.LogRecord` that matches Butler needs.
173 """
175 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
177 name: str
178 asctime: datetime.datetime
179 message: str
180 levelno: int
181 levelname: str
182 filename: str
183 pathname: str
184 lineno: int
185 funcName: str | None = None
186 process: int
187 processName: str
188 exc_info: str | None = None
189 MDC: dict[str, str]
191 if PYDANTIC_V2: 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true
192 model_config = ConfigDict(frozen=True)
193 else:
195 class Config:
196 """Pydantic model configuration."""
198 allow_mutation = False
200 @classmethod
201 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
202 """Create a new instance from a `~logging.LogRecord`.
204 Parameters
205 ----------
206 record : `logging.LogRecord`
207 The record from which to extract the relevant information.
208 """
209 # The properties that are one-to-one mapping.
210 simple = (
211 "name",
212 "levelno",
213 "levelname",
214 "filename",
215 "pathname",
216 "lineno",
217 "funcName",
218 "process",
219 "processName",
220 )
222 record_dict = {k: getattr(record, k) for k in simple}
224 record_dict["message"] = record.getMessage()
226 # MDC -- ensure the contents are copied to prevent any confusion
227 # over the MDC global being updated later.
228 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
230 # Always use UTC because in distributed systems we can't be sure
231 # what timezone localtime is and it's easier to compare logs if
232 # every system is using the same time.
233 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
235 # Sometimes exception information is included so must be
236 # extracted.
237 if record.exc_info:
238 etype = record.exc_info[0]
239 evalue = record.exc_info[1]
240 tb = record.exc_info[2]
241 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
243 return cls(**record_dict)
245 def format(self, log_format: str | None = None) -> str:
246 """Format this record.
248 Parameters
249 ----------
250 log_format : `str`, optional
251 The format string to use. This string follows the standard
252 f-style use for formatting log messages. If `None`
253 the class default will be used.
255 Returns
256 -------
257 text : `str`
258 The formatted log message.
259 """
260 if log_format is None:
261 log_format = self._log_format
263 as_dict = self.model_dump()
265 # Special case MDC content. Convert it to an MDCDict
266 # so that missing items do not break formatting.
267 as_dict["MDC"] = MDCDict(as_dict["MDC"])
269 as_dict["asctime"] = as_dict["asctime"].isoformat()
270 formatted = log_format.format(**as_dict)
271 return formatted
273 def __str__(self) -> str:
274 return self.format()
277# The class below can convert LogRecord to ButlerLogRecord if needed.
278Record = LogRecord | ButlerLogRecord
281if PYDANTIC_V2: 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true
282 from pydantic import RootModel # type: ignore
284 class _ButlerLogRecords(RootModel):
285 root: list[ButlerLogRecord]
287else:
289 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef]
290 __root__: list[ButlerLogRecord]
292 @property
293 def root(self) -> list[ButlerLogRecord]:
294 return self.__root__
297# Do not inherit from MutableSequence since mypy insists on the values
298# being Any even though we wish to constrain them to Record.
299class ButlerLogRecords(_ButlerLogRecords):
300 """Class representing a collection of `ButlerLogRecord`."""
302 _log_format: str | None = PrivateAttr(None)
304 @classmethod
305 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
306 """Create collection from iterable.
308 Parameters
309 ----------
310 records : iterable of `ButlerLogRecord`
311 The records to seed this class with.
312 """
313 if PYDANTIC_V2:
314 return cls(list(records)) # type: ignore
315 else:
316 return cls(__root__=list(records)) # type: ignore
318 @classmethod
319 def from_file(cls, filename: str) -> "ButlerLogRecords":
320 """Read records from file.
322 Parameters
323 ----------
324 filename : `str`
325 Name of file containing the JSON records.
327 Notes
328 -----
329 Works with one-record-per-line format JSON files and a direct
330 serialization of the Pydantic model.
331 """
332 with open(filename) as fd:
333 return cls.from_stream(fd)
335 @staticmethod
336 def _detect_model(startdata: str | bytes) -> bool:
337 """Given some representative data, determine if this is a serialized
338 model or a streaming format.
340 Parameters
341 ----------
342 startdata : `bytes` or `str`
343 Representative characters or bytes from the start of a serialized
344 collection of log records.
346 Returns
347 -------
348 is_model : `bool`
349 Returns `True` if the data look like a serialized pydantic model.
350 Returns `False` if it looks like a streaming format. Returns
351 `False` also if an empty string is encountered since this
352 is not understood by `ButlerLogRecords.model_validate_json()`.
354 Raises
355 ------
356 ValueError
357 Raised if the sentinel doesn't look like either of the supported
358 log record formats.
359 """
360 if not startdata:
361 return False
363 # Allow byte or str streams since pydantic supports either.
364 # We don't want to convert the entire input to unicode unnecessarily.
365 error_type = "str"
366 if isinstance(startdata, bytes):
367 first_char = chr(startdata[0])
368 error_type = "byte"
369 else:
370 first_char = startdata[0]
372 if first_char == "[":
373 # This is an array of records.
374 return True
375 if first_char != "{":
376 # Limit the length of string reported in error message in case
377 # this is an enormous file.
378 max = 32
379 if len(startdata) > max:
380 startdata = f"{startdata[:max]!r}..."
381 raise ValueError(
382 "Unrecognized JSON log format. Expected '{' or '[' but got"
383 f" {first_char!r} from {error_type} content starting with {startdata!r}"
384 )
386 # Assume a record per line.
387 return False
389 @classmethod
390 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
391 """Read records from I/O stream.
393 Parameters
394 ----------
395 stream : `typing.IO`
396 Stream from which to read JSON records.
398 Notes
399 -----
400 Works with one-record-per-line format JSON files and a direct
401 serialization of the Pydantic model.
402 """
403 first_line = stream.readline()
405 if not first_line:
406 # Empty file, return zero records.
407 return cls.from_records([])
409 is_model = cls._detect_model(first_line)
411 if is_model:
412 # This is a ButlerLogRecords model serialization so all the
413 # content must be read first.
414 all = first_line + stream.read()
415 return cls.model_validate_json(all)
417 # A stream of records with one record per line.
418 records = [ButlerLogRecord.model_validate_json(first_line)]
419 for line in stream:
420 line = line.rstrip()
421 if line: # Filter out blank lines.
422 records.append(ButlerLogRecord.model_validate_json(line))
424 return cls.from_records(records)
426 @classmethod
427 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
428 """Parse raw serialized form and return records.
430 Parameters
431 ----------
432 serialized : `bytes` or `str`
433 Either the serialized JSON of the model created using
434 ``.model_dump_json()`` or a streaming format of one JSON
435 `ButlerLogRecord` per line. This can also support a zero-length
436 string.
437 """
438 if not serialized:
439 # No records to return
440 return cls.from_records([])
442 # Only send the first character for analysis.
443 is_model = cls._detect_model(serialized)
445 if is_model:
446 return cls.model_validate_json(serialized)
448 # Filter out blank lines -- mypy is confused by the newline
449 # argument to isplit() [which can't have two different types
450 # simultaneously] so we have to duplicate some logic.
451 substrings: Iterator[str | bytes]
452 if isinstance(serialized, str):
453 substrings = isplit(serialized, "\n")
454 elif isinstance(serialized, bytes):
455 substrings = isplit(serialized, b"\n")
456 else:
457 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
458 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
460 return cls.from_records(records)
462 @property
463 def log_format(self) -> str:
464 if self._log_format is None:
465 return _LONG_LOG_FORMAT
466 return self._log_format
468 # Pydantic does not allow a property setter to be given for
469 # public properties of a model that is not based on a dict.
470 def set_log_format(self, format: str | None) -> str | None:
471 """Set the log format string for these records.
473 Parameters
474 ----------
475 format : `str`, optional
476 The new format string to use for converting this collection
477 of records into a string. If `None` the default format will be
478 used.
480 Returns
481 -------
482 old_format : `str`, optional
483 The previous log format.
484 """
485 previous = self._log_format
486 self._log_format = format
487 return previous
489 def __len__(self) -> int:
490 return len(self.root)
492 # The signature does not match the one in BaseModel but that is okay
493 # if __root__ is being used.
494 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
495 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
496 return iter(self.root)
498 def __setitem__(self, index: int, value: Record) -> None:
499 self.root[index] = self._validate_record(value)
501 @overload
502 def __getitem__(self, index: int) -> ButlerLogRecord:
503 ...
505 @overload
506 def __getitem__(self, index: slice) -> "ButlerLogRecords":
507 ...
509 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
510 # Handles slices and returns a new collection in that
511 # case.
512 item = self.root[index]
513 if isinstance(item, list):
514 if PYDANTIC_V2:
515 return type(self)(item) # type: ignore
516 else:
517 return type(self)(__root__=item) # type: ignore
518 else:
519 return item
521 def __reversed__(self) -> Iterator[ButlerLogRecord]:
522 return self.root.__reversed__()
524 def __delitem__(self, index: slice | int) -> None:
525 del self.root[index]
527 def __str__(self) -> str:
528 # Ensure that every record uses the same format string.
529 return "\n".join(record.format(self.log_format) for record in self.root)
531 def _validate_record(self, record: Record) -> ButlerLogRecord:
532 if isinstance(record, ButlerLogRecord):
533 pass
534 elif isinstance(record, LogRecord):
535 record = ButlerLogRecord.from_record(record)
536 else:
537 raise ValueError(f"Can only append item of type {type(record)}")
538 return record
540 def insert(self, index: int, value: Record) -> None:
541 self.root.insert(index, self._validate_record(value))
543 def append(self, value: Record) -> None:
544 value = self._validate_record(value)
545 self.root.append(value)
547 def clear(self) -> None:
548 self.root.clear()
550 def extend(self, records: Iterable[Record]) -> None:
551 self.root.extend(self._validate_record(record) for record in records)
553 def pop(self, index: int = -1) -> ButlerLogRecord:
554 return self.root.pop(index)
556 def reverse(self) -> None:
557 self.root.reverse()
560class ButlerLogRecordHandler(StreamHandler):
561 """Python log handler that accumulates records."""
563 def __init__(self) -> None:
564 super().__init__()
565 if PYDANTIC_V2:
566 self.records = ButlerLogRecords([]) # type: ignore
567 else:
568 self.records = ButlerLogRecords(__root__=[]) # type: ignore
570 def emit(self, record: LogRecord) -> None:
571 self.records.append(record)
574class JsonLogFormatter(Formatter):
575 """Format a `LogRecord` in JSON format."""
577 def format(self, record: LogRecord) -> str:
578 butler_record = ButlerLogRecord.from_record(record)
579 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)