Coverage for python/lsst/daf/butler/core/logging.py: 40%
232 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from collections.abc import Callable, Generator, Iterable, Iterator
28from contextlib import contextmanager
29from logging import Formatter, LogRecord, StreamHandler
30from typing import IO, Any, ClassVar, Union, overload
32from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
33from lsst.utils.introspection import get_full_type_name
34from lsst.utils.iteration import isplit
35from pydantic import PrivateAttr
37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
38"""Default format for log records."""
41class MDCDict(dict):
42 """Dictionary for MDC data.
44 This is internal class used for better formatting of MDC in Python logging
45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
46 ``__repr__`` method to produce output better suited for logging records.
47 """
49 def __getitem__(self, name: str) -> str:
50 """Return value for a given key or empty string for missing key."""
51 return self.get(name, "")
53 def __str__(self) -> str:
54 """Return string representation, strings are interpolated without
55 quotes.
56 """
57 items = (f"{k}={self[k]}" for k in sorted(self))
58 return "{" + ", ".join(items) + "}"
60 def __repr__(self) -> str:
61 return str(self)
64class ButlerMDC:
65 """Handle setting and unsetting of global MDC records.
67 The Mapped Diagnostic Context (MDC) can be used to set context
68 for log messages.
70 Currently there is one global MDC dict. Per-thread MDC is not
71 yet supported.
72 """
74 _MDC = MDCDict()
76 _old_factory: Callable[..., logging.LogRecord] | None = None
77 """Old log record factory."""
79 @classmethod
80 def MDC(cls, key: str, value: str) -> str:
81 """Set MDC for this key to the supplied value.
83 Parameters
84 ----------
85 key : `str`
86 Key to modify.
87 value : `str`
88 New value to use.
90 Returns
91 -------
92 old : `str`
93 The previous value for this key.
94 """
95 old_value = cls._MDC[key]
96 cls._MDC[key] = value
97 return old_value
99 @classmethod
100 def MDCRemove(cls, key: str) -> None:
101 """Clear the MDC value associated with this key.
103 Can be called even if the key is not known to MDC.
104 """
105 cls._MDC.pop(key, None)
107 @classmethod
108 @contextmanager
109 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
110 """Set the MDC key for this context.
112 Parameters
113 ----------
114 mdc : `dict` of `str`, `str`
115 MDC keys to update temporarily.
117 Notes
118 -----
119 Other MDC keys are not modified. The previous values are restored
120 on exit (removing them if the were unset previously).
121 """
122 previous = {}
123 for k, v in mdc.items():
124 previous[k] = cls.MDC(k, v)
126 try:
127 yield
128 finally:
129 for k, v in previous.items():
130 if not v:
131 cls.MDCRemove(k)
132 else:
133 cls.MDC(k, v)
135 @classmethod
136 def add_mdc_log_record_factory(cls) -> None:
137 """Add a log record factory that adds a MDC record to `LogRecord`."""
138 old_factory = logging.getLogRecordFactory()
140 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
141 record = old_factory(*args, **kwargs)
142 # Make sure we send a copy of the global dict in the record.
143 record.MDC = MDCDict(cls._MDC)
144 return record
146 cls._old_factory = old_factory
147 logging.setLogRecordFactory(record_factory)
149 @classmethod
150 def restore_log_record_factory(cls) -> None:
151 """Restores the log record factory to the original form.
153 Does nothing if there has not been a call to
154 `add_mdc_log_record_factory`.
155 """
156 if cls._old_factory:
157 logging.setLogRecordFactory(cls._old_factory)
160class ButlerLogRecord(_BaseModelCompat):
161 """A model representing a `logging.LogRecord`.
163 A `~logging.LogRecord` always uses the current time in its record
164 when recreated and that makes it impossible to use it as a
165 serialization format. Instead have a local representation of a
166 `~logging.LogRecord` that matches Butler needs.
167 """
169 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
171 name: str
172 asctime: datetime.datetime
173 message: str
174 levelno: int
175 levelname: str
176 filename: str
177 pathname: str
178 lineno: int
179 funcName: str | None = None
180 process: int
181 processName: str
182 exc_info: str | None = None
183 MDC: dict[str, str]
185 class Config:
186 """Pydantic model configuration."""
188 allow_mutation = False
190 @classmethod
191 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
192 """Create a new instance from a `~logging.LogRecord`.
194 Parameters
195 ----------
196 record : `logging.LogRecord`
197 The record from which to extract the relevant information.
198 """
199 # The properties that are one-to-one mapping.
200 simple = (
201 "name",
202 "levelno",
203 "levelname",
204 "filename",
205 "pathname",
206 "lineno",
207 "funcName",
208 "process",
209 "processName",
210 )
212 record_dict = {k: getattr(record, k) for k in simple}
214 record_dict["message"] = record.getMessage()
216 # MDC -- ensure the contents are copied to prevent any confusion
217 # over the MDC global being updated later.
218 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
220 # Always use UTC because in distributed systems we can't be sure
221 # what timezone localtime is and it's easier to compare logs if
222 # every system is using the same time.
223 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
225 # Sometimes exception information is included so must be
226 # extracted.
227 if record.exc_info:
228 etype = record.exc_info[0]
229 evalue = record.exc_info[1]
230 tb = record.exc_info[2]
231 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
233 return cls(**record_dict)
235 def format(self, log_format: str | None = None) -> str:
236 """Format this record.
238 Parameters
239 ----------
240 log_format : `str`, optional
241 The format string to use. This string follows the standard
242 f-style use for formatting log messages. If `None`
243 the class default will be used.
245 Returns
246 -------
247 text : `str`
248 The formatted log message.
249 """
250 if log_format is None:
251 log_format = self._log_format
253 as_dict = self.dict()
255 # Special case MDC content. Convert it to an MDCDict
256 # so that missing items do not break formatting.
257 as_dict["MDC"] = MDCDict(as_dict["MDC"])
259 as_dict["asctime"] = as_dict["asctime"].isoformat()
260 formatted = log_format.format(**as_dict)
261 return formatted
263 def __str__(self) -> str:
264 return self.format()
267# The class below can convert LogRecord to ButlerLogRecord if needed.
268Record = LogRecord | ButlerLogRecord
271if PYDANTIC_V2: 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true
272 from pydantic import RootModel # type: ignore
274 class _ButlerLogRecords(RootModel):
275 root: list[ButlerLogRecord]
277else:
279 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef]
280 __root__: list[ButlerLogRecord]
282 @property
283 def root(self) -> list[ButlerLogRecord]:
284 return self.__root__
287# Do not inherit from MutableSequence since mypy insists on the values
288# being Any even though we wish to constrain them to Record.
289class ButlerLogRecords(_ButlerLogRecords):
290 """Class representing a collection of `ButlerLogRecord`."""
292 _log_format: str | None = PrivateAttr(None)
294 @classmethod
295 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
296 """Create collection from iterable.
298 Parameters
299 ----------
300 records : iterable of `ButlerLogRecord`
301 The records to seed this class with.
302 """
303 if PYDANTIC_V2:
304 return cls(list(records)) # type: ignore
305 else:
306 return cls(__root__=list(records)) # type: ignore
308 @classmethod
309 def from_file(cls, filename: str) -> "ButlerLogRecords":
310 """Read records from file.
312 Parameters
313 ----------
314 filename : `str`
315 Name of file containing the JSON records.
317 Notes
318 -----
319 Works with one-record-per-line format JSON files and a direct
320 serialization of the Pydantic model.
321 """
322 with open(filename) as fd:
323 return cls.from_stream(fd)
325 @staticmethod
326 def _detect_model(startdata: str | bytes) -> bool:
327 """Given some representative data, determine if this is a serialized
328 model or a streaming format.
330 Parameters
331 ----------
332 startdata : `bytes` or `str`
333 Representative characters or bytes from the start of a serialized
334 collection of log records.
336 Returns
337 -------
338 is_model : `bool`
339 Returns `True` if the data look like a serialized pydantic model.
340 Returns `False` if it looks like a streaming format. Returns
341 `False` also if an empty string is encountered since this
342 is not understood by `ButlerLogRecords.parse_raw()`.
344 Raises
345 ------
346 ValueError
347 Raised if the sentinel doesn't look like either of the supported
348 log record formats.
349 """
350 if not startdata:
351 return False
353 # Allow byte or str streams since pydantic supports either.
354 # We don't want to convert the entire input to unicode unnecessarily.
355 error_type = "str"
356 if isinstance(startdata, bytes):
357 first_char = chr(startdata[0])
358 error_type = "byte"
359 else:
360 first_char = startdata[0]
362 if first_char == "[":
363 # This is an array of records.
364 return True
365 if first_char != "{":
366 # Limit the length of string reported in error message in case
367 # this is an enormous file.
368 max = 32
369 if len(startdata) > max:
370 startdata = f"{startdata[:max]!r}..."
371 raise ValueError(
372 "Unrecognized JSON log format. Expected '{' or '[' but got"
373 f" {first_char!r} from {error_type} content starting with {startdata!r}"
374 )
376 # Assume a record per line.
377 return False
379 @classmethod
380 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
381 """Read records from I/O stream.
383 Parameters
384 ----------
385 stream : `typing.IO`
386 Stream from which to read JSON records.
388 Notes
389 -----
390 Works with one-record-per-line format JSON files and a direct
391 serialization of the Pydantic model.
392 """
393 first_line = stream.readline()
395 if not first_line:
396 # Empty file, return zero records.
397 return cls.from_records([])
399 is_model = cls._detect_model(first_line)
401 if is_model:
402 # This is a ButlerLogRecords model serialization so all the
403 # content must be read first.
404 all = first_line + stream.read()
405 return cls.parse_raw(all)
407 # A stream of records with one record per line.
408 records = [ButlerLogRecord.parse_raw(first_line)]
409 for line in stream:
410 line = line.rstrip()
411 if line: # Filter out blank lines.
412 records.append(ButlerLogRecord.parse_raw(line))
414 return cls.from_records(records)
416 @classmethod
417 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
418 """Parse raw serialized form and return records.
420 Parameters
421 ----------
422 serialized : `bytes` or `str`
423 Either the serialized JSON of the model created using
424 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
425 per line. This can also support a zero-length string.
426 """
427 if not serialized:
428 # No records to return
429 return cls.from_records([])
431 # Only send the first character for analysis.
432 is_model = cls._detect_model(serialized)
434 if is_model:
435 return cls.parse_raw(serialized)
437 # Filter out blank lines -- mypy is confused by the newline
438 # argument to isplit() [which can't have two different types
439 # simultaneously] so we have to duplicate some logic.
440 substrings: Iterator[str | bytes]
441 if isinstance(serialized, str):
442 substrings = isplit(serialized, "\n")
443 elif isinstance(serialized, bytes):
444 substrings = isplit(serialized, b"\n")
445 else:
446 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
447 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
449 return cls.from_records(records)
451 @property
452 def log_format(self) -> str:
453 if self._log_format is None:
454 return _LONG_LOG_FORMAT
455 return self._log_format
457 # Pydantic does not allow a property setter to be given for
458 # public properties of a model that is not based on a dict.
459 def set_log_format(self, format: str | None) -> str | None:
460 """Set the log format string for these records.
462 Parameters
463 ----------
464 format : `str`, optional
465 The new format string to use for converting this collection
466 of records into a string. If `None` the default format will be
467 used.
469 Returns
470 -------
471 old_format : `str`, optional
472 The previous log format.
473 """
474 previous = self._log_format
475 self._log_format = format
476 return previous
478 def __len__(self) -> int:
479 return len(self.root)
481 # The signature does not match the one in BaseModel but that is okay
482 # if __root__ is being used.
483 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
484 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
485 return iter(self.root)
487 def __setitem__(self, index: int, value: Record) -> None:
488 self.root[index] = self._validate_record(value)
490 @overload
491 def __getitem__(self, index: int) -> ButlerLogRecord:
492 ...
494 @overload
495 def __getitem__(self, index: slice) -> "ButlerLogRecords":
496 ...
498 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
499 # Handles slices and returns a new collection in that
500 # case.
501 item = self.root[index]
502 if isinstance(item, list):
503 if PYDANTIC_V2:
504 return type(self)(item) # type: ignore
505 else:
506 return type(self)(__root__=item) # type: ignore
507 else:
508 return item
510 def __reversed__(self) -> Iterator[ButlerLogRecord]:
511 return self.root.__reversed__()
513 def __delitem__(self, index: slice | int) -> None:
514 del self.root[index]
516 def __str__(self) -> str:
517 # Ensure that every record uses the same format string.
518 return "\n".join(record.format(self.log_format) for record in self.root)
520 def _validate_record(self, record: Record) -> ButlerLogRecord:
521 if isinstance(record, ButlerLogRecord):
522 pass
523 elif isinstance(record, LogRecord):
524 record = ButlerLogRecord.from_record(record)
525 else:
526 raise ValueError(f"Can only append item of type {type(record)}")
527 return record
529 def insert(self, index: int, value: Record) -> None:
530 self.root.insert(index, self._validate_record(value))
532 def append(self, value: Record) -> None:
533 value = self._validate_record(value)
534 self.root.append(value)
536 def clear(self) -> None:
537 self.root.clear()
539 def extend(self, records: Iterable[Record]) -> None:
540 self.root.extend(self._validate_record(record) for record in records)
542 def pop(self, index: int = -1) -> ButlerLogRecord:
543 return self.root.pop(index)
545 def reverse(self) -> None:
546 self.root.reverse()
549class ButlerLogRecordHandler(StreamHandler):
550 """Python log handler that accumulates records."""
552 def __init__(self) -> None:
553 super().__init__()
554 if PYDANTIC_V2:
555 self.records = ButlerLogRecords([]) # type: ignore
556 else:
557 self.records = ButlerLogRecords(__root__=[]) # type: ignore
559 def emit(self, record: LogRecord) -> None:
560 self.records.append(record)
563class JsonLogFormatter(Formatter):
564 """Format a `LogRecord` in JSON format."""
566 def format(self, record: LogRecord) -> str:
567 butler_record = ButlerLogRecord.from_record(record)
568 return butler_record.json(exclude_unset=True, exclude_defaults=True)