Coverage for python/lsst/daf/butler/core/logging.py: 40%
234 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from collections.abc import Callable, Generator, Iterable, Iterator
28from contextlib import contextmanager
29from logging import Formatter, LogRecord, StreamHandler
30from typing import IO, Any, ClassVar, Union, overload
32from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
33from lsst.utils.introspection import get_full_type_name
34from lsst.utils.iteration import isplit
35from pydantic import ConfigDict, PrivateAttr
37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
38"""Default format for log records."""
41class MDCDict(dict):
42 """Dictionary for MDC data.
44 This is internal class used for better formatting of MDC in Python logging
45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
46 ``__repr__`` method to produce output better suited for logging records.
47 """
49 def __getitem__(self, name: str) -> str:
50 """Return value for a given key or empty string for missing key."""
51 return self.get(name, "")
53 def __str__(self) -> str:
54 """Return string representation, strings are interpolated without
55 quotes.
56 """
57 items = (f"{k}={self[k]}" for k in sorted(self))
58 return "{" + ", ".join(items) + "}"
60 def __repr__(self) -> str:
61 return str(self)
64class ButlerMDC:
65 """Handle setting and unsetting of global MDC records.
67 The Mapped Diagnostic Context (MDC) can be used to set context
68 for log messages.
70 Currently there is one global MDC dict. Per-thread MDC is not
71 yet supported.
72 """
74 _MDC = MDCDict()
76 _old_factory: Callable[..., logging.LogRecord] | None = None
77 """Old log record factory."""
79 @classmethod
80 def MDC(cls, key: str, value: str) -> str:
81 """Set MDC for this key to the supplied value.
83 Parameters
84 ----------
85 key : `str`
86 Key to modify.
87 value : `str`
88 New value to use.
90 Returns
91 -------
92 old : `str`
93 The previous value for this key.
94 """
95 old_value = cls._MDC[key]
96 cls._MDC[key] = value
97 return old_value
99 @classmethod
100 def MDCRemove(cls, key: str) -> None:
101 """Clear the MDC value associated with this key.
103 Can be called even if the key is not known to MDC.
104 """
105 cls._MDC.pop(key, None)
107 @classmethod
108 @contextmanager
109 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
110 """Set the MDC key for this context.
112 Parameters
113 ----------
114 mdc : `dict` of `str`, `str`
115 MDC keys to update temporarily.
117 Notes
118 -----
119 Other MDC keys are not modified. The previous values are restored
120 on exit (removing them if the were unset previously).
121 """
122 previous = {}
123 for k, v in mdc.items():
124 previous[k] = cls.MDC(k, v)
126 try:
127 yield
128 finally:
129 for k, v in previous.items():
130 if not v:
131 cls.MDCRemove(k)
132 else:
133 cls.MDC(k, v)
135 @classmethod
136 def add_mdc_log_record_factory(cls) -> None:
137 """Add a log record factory that adds a MDC record to `LogRecord`."""
138 old_factory = logging.getLogRecordFactory()
140 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
141 record = old_factory(*args, **kwargs)
142 # Make sure we send a copy of the global dict in the record.
143 record.MDC = MDCDict(cls._MDC)
144 return record
146 cls._old_factory = old_factory
147 logging.setLogRecordFactory(record_factory)
149 @classmethod
150 def restore_log_record_factory(cls) -> None:
151 """Restores the log record factory to the original form.
153 Does nothing if there has not been a call to
154 `add_mdc_log_record_factory`.
155 """
156 if cls._old_factory:
157 logging.setLogRecordFactory(cls._old_factory)
160class ButlerLogRecord(_BaseModelCompat):
161 """A model representing a `logging.LogRecord`.
163 A `~logging.LogRecord` always uses the current time in its record
164 when recreated and that makes it impossible to use it as a
165 serialization format. Instead have a local representation of a
166 `~logging.LogRecord` that matches Butler needs.
167 """
169 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
171 name: str
172 asctime: datetime.datetime
173 message: str
174 levelno: int
175 levelname: str
176 filename: str
177 pathname: str
178 lineno: int
179 funcName: str | None = None
180 process: int
181 processName: str
182 exc_info: str | None = None
183 MDC: dict[str, str]
185 if PYDANTIC_V2: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true
186 model_config = ConfigDict(frozen=True)
187 else:
189 class Config:
190 """Pydantic model configuration."""
192 allow_mutation = False
194 @classmethod
195 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
196 """Create a new instance from a `~logging.LogRecord`.
198 Parameters
199 ----------
200 record : `logging.LogRecord`
201 The record from which to extract the relevant information.
202 """
203 # The properties that are one-to-one mapping.
204 simple = (
205 "name",
206 "levelno",
207 "levelname",
208 "filename",
209 "pathname",
210 "lineno",
211 "funcName",
212 "process",
213 "processName",
214 )
216 record_dict = {k: getattr(record, k) for k in simple}
218 record_dict["message"] = record.getMessage()
220 # MDC -- ensure the contents are copied to prevent any confusion
221 # over the MDC global being updated later.
222 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
224 # Always use UTC because in distributed systems we can't be sure
225 # what timezone localtime is and it's easier to compare logs if
226 # every system is using the same time.
227 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
229 # Sometimes exception information is included so must be
230 # extracted.
231 if record.exc_info:
232 etype = record.exc_info[0]
233 evalue = record.exc_info[1]
234 tb = record.exc_info[2]
235 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
237 return cls(**record_dict)
239 def format(self, log_format: str | None = None) -> str:
240 """Format this record.
242 Parameters
243 ----------
244 log_format : `str`, optional
245 The format string to use. This string follows the standard
246 f-style use for formatting log messages. If `None`
247 the class default will be used.
249 Returns
250 -------
251 text : `str`
252 The formatted log message.
253 """
254 if log_format is None:
255 log_format = self._log_format
257 as_dict = self.model_dump()
259 # Special case MDC content. Convert it to an MDCDict
260 # so that missing items do not break formatting.
261 as_dict["MDC"] = MDCDict(as_dict["MDC"])
263 as_dict["asctime"] = as_dict["asctime"].isoformat()
264 formatted = log_format.format(**as_dict)
265 return formatted
267 def __str__(self) -> str:
268 return self.format()
271# The class below can convert LogRecord to ButlerLogRecord if needed.
272Record = LogRecord | ButlerLogRecord
275if PYDANTIC_V2: 275 ↛ 276line 275 didn't jump to line 276, because the condition on line 275 was never true
276 from pydantic import RootModel # type: ignore
278 class _ButlerLogRecords(RootModel):
279 root: list[ButlerLogRecord]
281else:
283 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef]
284 __root__: list[ButlerLogRecord]
286 @property
287 def root(self) -> list[ButlerLogRecord]:
288 return self.__root__
291# Do not inherit from MutableSequence since mypy insists on the values
292# being Any even though we wish to constrain them to Record.
293class ButlerLogRecords(_ButlerLogRecords):
294 """Class representing a collection of `ButlerLogRecord`."""
296 _log_format: str | None = PrivateAttr(None)
298 @classmethod
299 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
300 """Create collection from iterable.
302 Parameters
303 ----------
304 records : iterable of `ButlerLogRecord`
305 The records to seed this class with.
306 """
307 if PYDANTIC_V2:
308 return cls(list(records)) # type: ignore
309 else:
310 return cls(__root__=list(records)) # type: ignore
312 @classmethod
313 def from_file(cls, filename: str) -> "ButlerLogRecords":
314 """Read records from file.
316 Parameters
317 ----------
318 filename : `str`
319 Name of file containing the JSON records.
321 Notes
322 -----
323 Works with one-record-per-line format JSON files and a direct
324 serialization of the Pydantic model.
325 """
326 with open(filename) as fd:
327 return cls.from_stream(fd)
329 @staticmethod
330 def _detect_model(startdata: str | bytes) -> bool:
331 """Given some representative data, determine if this is a serialized
332 model or a streaming format.
334 Parameters
335 ----------
336 startdata : `bytes` or `str`
337 Representative characters or bytes from the start of a serialized
338 collection of log records.
340 Returns
341 -------
342 is_model : `bool`
343 Returns `True` if the data look like a serialized pydantic model.
344 Returns `False` if it looks like a streaming format. Returns
345 `False` also if an empty string is encountered since this
346 is not understood by `ButlerLogRecords.model_validate_json()`.
348 Raises
349 ------
350 ValueError
351 Raised if the sentinel doesn't look like either of the supported
352 log record formats.
353 """
354 if not startdata:
355 return False
357 # Allow byte or str streams since pydantic supports either.
358 # We don't want to convert the entire input to unicode unnecessarily.
359 error_type = "str"
360 if isinstance(startdata, bytes):
361 first_char = chr(startdata[0])
362 error_type = "byte"
363 else:
364 first_char = startdata[0]
366 if first_char == "[":
367 # This is an array of records.
368 return True
369 if first_char != "{":
370 # Limit the length of string reported in error message in case
371 # this is an enormous file.
372 max = 32
373 if len(startdata) > max:
374 startdata = f"{startdata[:max]!r}..."
375 raise ValueError(
376 "Unrecognized JSON log format. Expected '{' or '[' but got"
377 f" {first_char!r} from {error_type} content starting with {startdata!r}"
378 )
380 # Assume a record per line.
381 return False
383 @classmethod
384 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
385 """Read records from I/O stream.
387 Parameters
388 ----------
389 stream : `typing.IO`
390 Stream from which to read JSON records.
392 Notes
393 -----
394 Works with one-record-per-line format JSON files and a direct
395 serialization of the Pydantic model.
396 """
397 first_line = stream.readline()
399 if not first_line:
400 # Empty file, return zero records.
401 return cls.from_records([])
403 is_model = cls._detect_model(first_line)
405 if is_model:
406 # This is a ButlerLogRecords model serialization so all the
407 # content must be read first.
408 all = first_line + stream.read()
409 return cls.model_validate_json(all)
411 # A stream of records with one record per line.
412 records = [ButlerLogRecord.model_validate_json(first_line)]
413 for line in stream:
414 line = line.rstrip()
415 if line: # Filter out blank lines.
416 records.append(ButlerLogRecord.model_validate_json(line))
418 return cls.from_records(records)
420 @classmethod
421 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
422 """Parse raw serialized form and return records.
424 Parameters
425 ----------
426 serialized : `bytes` or `str`
427 Either the serialized JSON of the model created using
428 ``.model_dump_json()`` or a streaming format of one JSON
429 `ButlerLogRecord` per line. This can also support a zero-length
430 string.
431 """
432 if not serialized:
433 # No records to return
434 return cls.from_records([])
436 # Only send the first character for analysis.
437 is_model = cls._detect_model(serialized)
439 if is_model:
440 return cls.model_validate_json(serialized)
442 # Filter out blank lines -- mypy is confused by the newline
443 # argument to isplit() [which can't have two different types
444 # simultaneously] so we have to duplicate some logic.
445 substrings: Iterator[str | bytes]
446 if isinstance(serialized, str):
447 substrings = isplit(serialized, "\n")
448 elif isinstance(serialized, bytes):
449 substrings = isplit(serialized, b"\n")
450 else:
451 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
452 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
454 return cls.from_records(records)
456 @property
457 def log_format(self) -> str:
458 if self._log_format is None:
459 return _LONG_LOG_FORMAT
460 return self._log_format
462 # Pydantic does not allow a property setter to be given for
463 # public properties of a model that is not based on a dict.
464 def set_log_format(self, format: str | None) -> str | None:
465 """Set the log format string for these records.
467 Parameters
468 ----------
469 format : `str`, optional
470 The new format string to use for converting this collection
471 of records into a string. If `None` the default format will be
472 used.
474 Returns
475 -------
476 old_format : `str`, optional
477 The previous log format.
478 """
479 previous = self._log_format
480 self._log_format = format
481 return previous
483 def __len__(self) -> int:
484 return len(self.root)
486 # The signature does not match the one in BaseModel but that is okay
487 # if __root__ is being used.
488 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
489 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
490 return iter(self.root)
492 def __setitem__(self, index: int, value: Record) -> None:
493 self.root[index] = self._validate_record(value)
495 @overload
496 def __getitem__(self, index: int) -> ButlerLogRecord:
497 ...
499 @overload
500 def __getitem__(self, index: slice) -> "ButlerLogRecords":
501 ...
503 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
504 # Handles slices and returns a new collection in that
505 # case.
506 item = self.root[index]
507 if isinstance(item, list):
508 if PYDANTIC_V2:
509 return type(self)(item) # type: ignore
510 else:
511 return type(self)(__root__=item) # type: ignore
512 else:
513 return item
515 def __reversed__(self) -> Iterator[ButlerLogRecord]:
516 return self.root.__reversed__()
518 def __delitem__(self, index: slice | int) -> None:
519 del self.root[index]
521 def __str__(self) -> str:
522 # Ensure that every record uses the same format string.
523 return "\n".join(record.format(self.log_format) for record in self.root)
525 def _validate_record(self, record: Record) -> ButlerLogRecord:
526 if isinstance(record, ButlerLogRecord):
527 pass
528 elif isinstance(record, LogRecord):
529 record = ButlerLogRecord.from_record(record)
530 else:
531 raise ValueError(f"Can only append item of type {type(record)}")
532 return record
534 def insert(self, index: int, value: Record) -> None:
535 self.root.insert(index, self._validate_record(value))
537 def append(self, value: Record) -> None:
538 value = self._validate_record(value)
539 self.root.append(value)
541 def clear(self) -> None:
542 self.root.clear()
544 def extend(self, records: Iterable[Record]) -> None:
545 self.root.extend(self._validate_record(record) for record in records)
547 def pop(self, index: int = -1) -> ButlerLogRecord:
548 return self.root.pop(index)
550 def reverse(self) -> None:
551 self.root.reverse()
554class ButlerLogRecordHandler(StreamHandler):
555 """Python log handler that accumulates records."""
557 def __init__(self) -> None:
558 super().__init__()
559 if PYDANTIC_V2:
560 self.records = ButlerLogRecords([]) # type: ignore
561 else:
562 self.records = ButlerLogRecords(__root__=[]) # type: ignore
564 def emit(self, record: LogRecord) -> None:
565 self.records.append(record)
568class JsonLogFormatter(Formatter):
569 """Format a `LogRecord` in JSON format."""
571 def format(self, record: LogRecord) -> str:
572 butler_record = ButlerLogRecord.from_record(record)
573 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)