Coverage for python/lsst/daf/butler/logging.py: 40%
234 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
30import datetime
31import logging
32import traceback
33from collections.abc import Callable, Generator, Iterable, Iterator
34from contextlib import contextmanager
35from logging import Formatter, LogRecord, StreamHandler
36from typing import IO, Any, ClassVar, Union, overload
38from lsst.utils.introspection import get_full_type_name
39from lsst.utils.iteration import isplit
40from pydantic import ConfigDict, PrivateAttr
42from ._compat import PYDANTIC_V2, _BaseModelCompat
44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
45"""Default format for log records."""
48class MDCDict(dict):
49 """Dictionary for MDC data.
51 This is internal class used for better formatting of MDC in Python logging
52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
53 ``__repr__`` method to produce output better suited for logging records.
54 """
56 def __getitem__(self, name: str) -> str:
57 """Return value for a given key or empty string for missing key."""
58 return self.get(name, "")
60 def __str__(self) -> str:
61 """Return string representation, strings are interpolated without
62 quotes.
63 """
64 items = (f"{k}={self[k]}" for k in sorted(self))
65 return "{" + ", ".join(items) + "}"
67 def __repr__(self) -> str:
68 return str(self)
71class ButlerMDC:
72 """Handle setting and unsetting of global MDC records.
74 The Mapped Diagnostic Context (MDC) can be used to set context
75 for log messages.
77 Currently there is one global MDC dict. Per-thread MDC is not
78 yet supported.
79 """
81 _MDC = MDCDict()
83 _old_factory: Callable[..., logging.LogRecord] | None = None
84 """Old log record factory."""
86 @classmethod
87 def MDC(cls, key: str, value: str) -> str:
88 """Set MDC for this key to the supplied value.
90 Parameters
91 ----------
92 key : `str`
93 Key to modify.
94 value : `str`
95 New value to use.
97 Returns
98 -------
99 old : `str`
100 The previous value for this key.
101 """
102 old_value = cls._MDC[key]
103 cls._MDC[key] = value
104 return old_value
106 @classmethod
107 def MDCRemove(cls, key: str) -> None:
108 """Clear the MDC value associated with this key.
110 Can be called even if the key is not known to MDC.
111 """
112 cls._MDC.pop(key, None)
114 @classmethod
115 @contextmanager
116 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
117 """Set the MDC key for this context.
119 Parameters
120 ----------
121 mdc : `dict` of `str`, `str`
122 MDC keys to update temporarily.
124 Notes
125 -----
126 Other MDC keys are not modified. The previous values are restored
127 on exit (removing them if the were unset previously).
128 """
129 previous = {}
130 for k, v in mdc.items():
131 previous[k] = cls.MDC(k, v)
133 try:
134 yield
135 finally:
136 for k, v in previous.items():
137 if not v:
138 cls.MDCRemove(k)
139 else:
140 cls.MDC(k, v)
142 @classmethod
143 def add_mdc_log_record_factory(cls) -> None:
144 """Add a log record factory that adds a MDC record to `LogRecord`."""
145 old_factory = logging.getLogRecordFactory()
147 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
148 record = old_factory(*args, **kwargs)
149 # Make sure we send a copy of the global dict in the record.
150 record.MDC = MDCDict(cls._MDC)
151 return record
153 cls._old_factory = old_factory
154 logging.setLogRecordFactory(record_factory)
156 @classmethod
157 def restore_log_record_factory(cls) -> None:
158 """Restores the log record factory to the original form.
160 Does nothing if there has not been a call to
161 `add_mdc_log_record_factory`.
162 """
163 if cls._old_factory:
164 logging.setLogRecordFactory(cls._old_factory)
167class ButlerLogRecord(_BaseModelCompat):
168 """A model representing a `logging.LogRecord`.
170 A `~logging.LogRecord` always uses the current time in its record
171 when recreated and that makes it impossible to use it as a
172 serialization format. Instead have a local representation of a
173 `~logging.LogRecord` that matches Butler needs.
174 """
176 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
178 name: str
179 asctime: datetime.datetime
180 message: str
181 levelno: int
182 levelname: str
183 filename: str
184 pathname: str
185 lineno: int
186 funcName: str | None = None
187 process: int
188 processName: str
189 exc_info: str | None = None
190 MDC: dict[str, str]
192 if PYDANTIC_V2: 192 ↛ 193line 192 didn't jump to line 193, because the condition on line 192 was never true
193 model_config = ConfigDict(frozen=True)
194 else:
196 class Config:
197 """Pydantic model configuration."""
199 allow_mutation = False
201 @classmethod
202 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
203 """Create a new instance from a `~logging.LogRecord`.
205 Parameters
206 ----------
207 record : `logging.LogRecord`
208 The record from which to extract the relevant information.
209 """
210 # The properties that are one-to-one mapping.
211 simple = (
212 "name",
213 "levelno",
214 "levelname",
215 "filename",
216 "pathname",
217 "lineno",
218 "funcName",
219 "process",
220 "processName",
221 )
223 record_dict = {k: getattr(record, k) for k in simple}
225 record_dict["message"] = record.getMessage()
227 # MDC -- ensure the contents are copied to prevent any confusion
228 # over the MDC global being updated later.
229 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
231 # Always use UTC because in distributed systems we can't be sure
232 # what timezone localtime is and it's easier to compare logs if
233 # every system is using the same time.
234 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
236 # Sometimes exception information is included so must be
237 # extracted.
238 if record.exc_info:
239 etype = record.exc_info[0]
240 evalue = record.exc_info[1]
241 tb = record.exc_info[2]
242 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
244 return cls(**record_dict)
246 def format(self, log_format: str | None = None) -> str:
247 """Format this record.
249 Parameters
250 ----------
251 log_format : `str`, optional
252 The format string to use. This string follows the standard
253 f-style use for formatting log messages. If `None`
254 the class default will be used.
256 Returns
257 -------
258 text : `str`
259 The formatted log message.
260 """
261 if log_format is None:
262 log_format = self._log_format
264 as_dict = self.model_dump()
266 # Special case MDC content. Convert it to an MDCDict
267 # so that missing items do not break formatting.
268 as_dict["MDC"] = MDCDict(as_dict["MDC"])
270 as_dict["asctime"] = as_dict["asctime"].isoformat()
271 formatted = log_format.format(**as_dict)
272 return formatted
274 def __str__(self) -> str:
275 return self.format()
278# The class below can convert LogRecord to ButlerLogRecord if needed.
279Record = LogRecord | ButlerLogRecord
282if PYDANTIC_V2: 282 ↛ 283line 282 didn't jump to line 283, because the condition on line 282 was never true
283 from pydantic import RootModel # type: ignore
285 class _ButlerLogRecords(RootModel):
286 root: list[ButlerLogRecord]
288else:
290 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef]
291 __root__: list[ButlerLogRecord]
293 @property
294 def root(self) -> list[ButlerLogRecord]:
295 return self.__root__
298# Do not inherit from MutableSequence since mypy insists on the values
299# being Any even though we wish to constrain them to Record.
300class ButlerLogRecords(_ButlerLogRecords):
301 """Class representing a collection of `ButlerLogRecord`."""
303 _log_format: str | None = PrivateAttr(None)
305 @classmethod
306 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
307 """Create collection from iterable.
309 Parameters
310 ----------
311 records : iterable of `ButlerLogRecord`
312 The records to seed this class with.
313 """
314 if PYDANTIC_V2:
315 return cls(list(records)) # type: ignore
316 else:
317 return cls(__root__=list(records)) # type: ignore
319 @classmethod
320 def from_file(cls, filename: str) -> "ButlerLogRecords":
321 """Read records from file.
323 Parameters
324 ----------
325 filename : `str`
326 Name of file containing the JSON records.
328 Notes
329 -----
330 Works with one-record-per-line format JSON files and a direct
331 serialization of the Pydantic model.
332 """
333 with open(filename) as fd:
334 return cls.from_stream(fd)
336 @staticmethod
337 def _detect_model(startdata: str | bytes) -> bool:
338 """Given some representative data, determine if this is a serialized
339 model or a streaming format.
341 Parameters
342 ----------
343 startdata : `bytes` or `str`
344 Representative characters or bytes from the start of a serialized
345 collection of log records.
347 Returns
348 -------
349 is_model : `bool`
350 Returns `True` if the data look like a serialized pydantic model.
351 Returns `False` if it looks like a streaming format. Returns
352 `False` also if an empty string is encountered since this
353 is not understood by `ButlerLogRecords.model_validate_json()`.
355 Raises
356 ------
357 ValueError
358 Raised if the sentinel doesn't look like either of the supported
359 log record formats.
360 """
361 if not startdata:
362 return False
364 # Allow byte or str streams since pydantic supports either.
365 # We don't want to convert the entire input to unicode unnecessarily.
366 error_type = "str"
367 if isinstance(startdata, bytes):
368 first_char = chr(startdata[0])
369 error_type = "byte"
370 else:
371 first_char = startdata[0]
373 if first_char == "[":
374 # This is an array of records.
375 return True
376 if first_char != "{":
377 # Limit the length of string reported in error message in case
378 # this is an enormous file.
379 max = 32
380 if len(startdata) > max:
381 startdata = f"{startdata[:max]!r}..."
382 raise ValueError(
383 "Unrecognized JSON log format. Expected '{' or '[' but got"
384 f" {first_char!r} from {error_type} content starting with {startdata!r}"
385 )
387 # Assume a record per line.
388 return False
390 @classmethod
391 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
392 """Read records from I/O stream.
394 Parameters
395 ----------
396 stream : `typing.IO`
397 Stream from which to read JSON records.
399 Notes
400 -----
401 Works with one-record-per-line format JSON files and a direct
402 serialization of the Pydantic model.
403 """
404 first_line = stream.readline()
406 if not first_line:
407 # Empty file, return zero records.
408 return cls.from_records([])
410 is_model = cls._detect_model(first_line)
412 if is_model:
413 # This is a ButlerLogRecords model serialization so all the
414 # content must be read first.
415 all = first_line + stream.read()
416 return cls.model_validate_json(all)
418 # A stream of records with one record per line.
419 records = [ButlerLogRecord.model_validate_json(first_line)]
420 for line in stream:
421 line = line.rstrip()
422 if line: # Filter out blank lines.
423 records.append(ButlerLogRecord.model_validate_json(line))
425 return cls.from_records(records)
427 @classmethod
428 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
429 """Parse raw serialized form and return records.
431 Parameters
432 ----------
433 serialized : `bytes` or `str`
434 Either the serialized JSON of the model created using
435 ``.model_dump_json()`` or a streaming format of one JSON
436 `ButlerLogRecord` per line. This can also support a zero-length
437 string.
438 """
439 if not serialized:
440 # No records to return
441 return cls.from_records([])
443 # Only send the first character for analysis.
444 is_model = cls._detect_model(serialized)
446 if is_model:
447 return cls.model_validate_json(serialized)
449 # Filter out blank lines -- mypy is confused by the newline
450 # argument to isplit() [which can't have two different types
451 # simultaneously] so we have to duplicate some logic.
452 substrings: Iterator[str | bytes]
453 if isinstance(serialized, str):
454 substrings = isplit(serialized, "\n")
455 elif isinstance(serialized, bytes):
456 substrings = isplit(serialized, b"\n")
457 else:
458 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
459 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
461 return cls.from_records(records)
463 @property
464 def log_format(self) -> str:
465 if self._log_format is None:
466 return _LONG_LOG_FORMAT
467 return self._log_format
469 # Pydantic does not allow a property setter to be given for
470 # public properties of a model that is not based on a dict.
471 def set_log_format(self, format: str | None) -> str | None:
472 """Set the log format string for these records.
474 Parameters
475 ----------
476 format : `str`, optional
477 The new format string to use for converting this collection
478 of records into a string. If `None` the default format will be
479 used.
481 Returns
482 -------
483 old_format : `str`, optional
484 The previous log format.
485 """
486 previous = self._log_format
487 self._log_format = format
488 return previous
490 def __len__(self) -> int:
491 return len(self.root)
493 # The signature does not match the one in BaseModel but that is okay
494 # if __root__ is being used.
495 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
496 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
497 return iter(self.root)
499 def __setitem__(self, index: int, value: Record) -> None:
500 self.root[index] = self._validate_record(value)
502 @overload
503 def __getitem__(self, index: int) -> ButlerLogRecord:
504 ...
506 @overload
507 def __getitem__(self, index: slice) -> "ButlerLogRecords":
508 ...
510 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
511 # Handles slices and returns a new collection in that
512 # case.
513 item = self.root[index]
514 if isinstance(item, list):
515 if PYDANTIC_V2:
516 return type(self)(item) # type: ignore
517 else:
518 return type(self)(__root__=item) # type: ignore
519 else:
520 return item
522 def __reversed__(self) -> Iterator[ButlerLogRecord]:
523 return self.root.__reversed__()
525 def __delitem__(self, index: slice | int) -> None:
526 del self.root[index]
528 def __str__(self) -> str:
529 # Ensure that every record uses the same format string.
530 return "\n".join(record.format(self.log_format) for record in self.root)
532 def _validate_record(self, record: Record) -> ButlerLogRecord:
533 if isinstance(record, ButlerLogRecord):
534 pass
535 elif isinstance(record, LogRecord):
536 record = ButlerLogRecord.from_record(record)
537 else:
538 raise ValueError(f"Can only append item of type {type(record)}")
539 return record
541 def insert(self, index: int, value: Record) -> None:
542 self.root.insert(index, self._validate_record(value))
544 def append(self, value: Record) -> None:
545 value = self._validate_record(value)
546 self.root.append(value)
548 def clear(self) -> None:
549 self.root.clear()
551 def extend(self, records: Iterable[Record]) -> None:
552 self.root.extend(self._validate_record(record) for record in records)
554 def pop(self, index: int = -1) -> ButlerLogRecord:
555 return self.root.pop(index)
557 def reverse(self) -> None:
558 self.root.reverse()
561class ButlerLogRecordHandler(StreamHandler):
562 """Python log handler that accumulates records."""
564 def __init__(self) -> None:
565 super().__init__()
566 if PYDANTIC_V2:
567 self.records = ButlerLogRecords([]) # type: ignore
568 else:
569 self.records = ButlerLogRecords(__root__=[]) # type: ignore
571 def emit(self, record: LogRecord) -> None:
572 self.records.append(record)
575class JsonLogFormatter(Formatter):
576 """Format a `LogRecord` in JSON format."""
578 def format(self, record: LogRecord) -> str:
579 butler_record = ButlerLogRecord.from_record(record)
580 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)