Coverage for python/lsst/daf/butler/logging.py: 41%
218 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
30import datetime
31import logging
32import traceback
33from collections.abc import Callable, Generator, Iterable, Iterator
34from contextlib import contextmanager
35from logging import Formatter, LogRecord, StreamHandler
36from typing import IO, Any, ClassVar, Union, overload
38from lsst.utils.introspection import get_full_type_name
39from lsst.utils.iteration import isplit
40from pydantic import BaseModel, ConfigDict, PrivateAttr, RootModel
42_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
43"""Default format for log records."""
46class MDCDict(dict):
47 """Dictionary for MDC data.
49 This is internal class used for better formatting of MDC in Python logging
50 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
51 ``__repr__`` method to produce output better suited for logging records.
52 """
54 def __getitem__(self, name: str) -> str:
55 """Return value for a given key or empty string for missing key."""
56 return self.get(name, "")
58 def __str__(self) -> str:
59 """Return string representation, strings are interpolated without
60 quotes.
61 """
62 items = (f"{k}={self[k]}" for k in sorted(self))
63 return "{" + ", ".join(items) + "}"
65 def __repr__(self) -> str:
66 return str(self)
69class ButlerMDC:
70 """Handle setting and unsetting of global MDC records.
72 The Mapped Diagnostic Context (MDC) can be used to set context
73 for log messages.
75 Currently there is one global MDC dict. Per-thread MDC is not
76 yet supported.
77 """
79 _MDC = MDCDict()
81 _old_factory: Callable[..., logging.LogRecord] | None = None
82 """Old log record factory."""
84 @classmethod
85 def MDC(cls, key: str, value: str) -> str:
86 """Set MDC for this key to the supplied value.
88 Parameters
89 ----------
90 key : `str`
91 Key to modify.
92 value : `str`
93 New value to use.
95 Returns
96 -------
97 old : `str`
98 The previous value for this key.
99 """
100 old_value = cls._MDC[key]
101 cls._MDC[key] = value
102 return old_value
104 @classmethod
105 def MDCRemove(cls, key: str) -> None:
106 """Clear the MDC value associated with this key.
108 Can be called even if the key is not known to MDC.
110 Parameters
111 ----------
112 key : `str`
113 Key for which the MDC value should be removed.
114 """
115 cls._MDC.pop(key, None)
117 @classmethod
118 def clear_mdc(cls) -> None:
119 """Clear all MDC entries."""
120 cls._MDC.clear()
122 @classmethod
123 @contextmanager
124 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
125 """Set the MDC key for this context.
127 Parameters
128 ----------
129 mdc : `dict` of `str`, `str`
130 MDC keys to update temporarily.
132 Notes
133 -----
134 Other MDC keys are not modified. The previous values are restored
135 on exit (removing them if the were unset previously).
136 """
137 previous = {}
138 for k, v in mdc.items():
139 previous[k] = cls.MDC(k, v)
141 try:
142 yield
143 finally:
144 for k, v in previous.items():
145 if not v:
146 cls.MDCRemove(k)
147 else:
148 cls.MDC(k, v)
150 @classmethod
151 def add_mdc_log_record_factory(cls) -> None:
152 """Add a log record factory that adds a MDC record to `LogRecord`."""
153 old_factory = logging.getLogRecordFactory()
155 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
156 record = old_factory(*args, **kwargs)
157 # Make sure we send a copy of the global dict in the record.
158 record.MDC = MDCDict(cls._MDC)
159 return record
161 cls._old_factory = old_factory
162 logging.setLogRecordFactory(record_factory)
164 @classmethod
165 def restore_log_record_factory(cls) -> None:
166 """Restores the log record factory to the original form.
168 Does nothing if there has not been a call to
169 `add_mdc_log_record_factory`.
170 """
171 if cls._old_factory:
172 logging.setLogRecordFactory(cls._old_factory)
175class ButlerLogRecord(BaseModel):
176 """A model representing a `logging.LogRecord`.
178 A `~logging.LogRecord` always uses the current time in its record
179 when recreated and that makes it impossible to use it as a
180 serialization format. Instead have a local representation of a
181 `~logging.LogRecord` that matches Butler needs.
182 """
184 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
186 name: str
187 asctime: datetime.datetime
188 message: str
189 levelno: int
190 levelname: str
191 filename: str
192 pathname: str
193 lineno: int
194 funcName: str | None = None
195 process: int
196 processName: str
197 exc_info: str | None = None
198 MDC: dict[str, str]
200 model_config = ConfigDict(frozen=True)
202 @classmethod
203 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
204 """Create a new instance from a `~logging.LogRecord`.
206 Parameters
207 ----------
208 record : `logging.LogRecord`
209 The record from which to extract the relevant information.
210 """
211 # The properties that are one-to-one mapping.
212 simple = (
213 "name",
214 "levelno",
215 "levelname",
216 "filename",
217 "pathname",
218 "lineno",
219 "funcName",
220 "process",
221 "processName",
222 )
224 record_dict = {k: getattr(record, k) for k in simple}
226 record_dict["message"] = record.getMessage()
228 # MDC -- ensure the contents are copied to prevent any confusion
229 # over the MDC global being updated later.
230 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
232 # Always use UTC because in distributed systems we can't be sure
233 # what timezone localtime is and it's easier to compare logs if
234 # every system is using the same time.
235 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC)
237 # Sometimes exception information is included so must be
238 # extracted.
239 if record.exc_info:
240 etype = record.exc_info[0]
241 evalue = record.exc_info[1]
242 tb = record.exc_info[2]
243 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
245 return cls(**record_dict)
247 def format(self, log_format: str | None = None) -> str:
248 """Format this record.
250 Parameters
251 ----------
252 log_format : `str`, optional
253 The format string to use. This string follows the standard
254 f-style use for formatting log messages. If `None`
255 the class default will be used.
257 Returns
258 -------
259 text : `str`
260 The formatted log message.
261 """
262 if log_format is None:
263 log_format = self._log_format
265 as_dict = self.model_dump()
267 # Special case MDC content. Convert it to an MDCDict
268 # so that missing items do not break formatting.
269 as_dict["MDC"] = MDCDict(as_dict["MDC"])
271 as_dict["asctime"] = as_dict["asctime"].isoformat()
272 formatted = log_format.format(**as_dict)
273 return formatted
275 def __str__(self) -> str:
276 return self.format()
279# The class below can convert LogRecord to ButlerLogRecord if needed.
280Record = LogRecord | ButlerLogRecord
283class _ButlerLogRecords(RootModel):
284 root: list[ButlerLogRecord]
287# Do not inherit from MutableSequence since mypy insists on the values
288# being Any even though we wish to constrain them to Record.
289class ButlerLogRecords(_ButlerLogRecords):
290 """Class representing a collection of `ButlerLogRecord`."""
292 _log_format: str | None = PrivateAttr(None)
294 @classmethod
295 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
296 """Create collection from iterable.
298 Parameters
299 ----------
300 records : iterable of `ButlerLogRecord`
301 The records to seed this class with.
302 """
303 return cls.model_construct(root=list(records))
305 @classmethod
306 def from_file(cls, filename: str) -> "ButlerLogRecords":
307 """Read records from file.
309 Parameters
310 ----------
311 filename : `str`
312 Name of file containing the JSON records.
314 Notes
315 -----
316 Works with one-record-per-line format JSON files and a direct
317 serialization of the Pydantic model.
318 """
319 with open(filename) as fd:
320 return cls.from_stream(fd)
322 @staticmethod
323 def _detect_model(startdata: str | bytes) -> bool:
324 """Given some representative data, determine if this is a serialized
325 model or a streaming format.
327 Parameters
328 ----------
329 startdata : `bytes` or `str`
330 Representative characters or bytes from the start of a serialized
331 collection of log records.
333 Returns
334 -------
335 is_model : `bool`
336 Returns `True` if the data look like a serialized pydantic model.
337 Returns `False` if it looks like a streaming format. Returns
338 `False` also if an empty string is encountered since this
339 is not understood by `ButlerLogRecords.model_validate_json()`.
341 Raises
342 ------
343 ValueError
344 Raised if the sentinel doesn't look like either of the supported
345 log record formats.
346 """
347 if not startdata:
348 return False
350 # Allow byte or str streams since pydantic supports either.
351 # We don't want to convert the entire input to unicode unnecessarily.
352 error_type = "str"
353 if isinstance(startdata, bytes):
354 first_char = chr(startdata[0])
355 error_type = "byte"
356 else:
357 first_char = startdata[0]
359 if first_char == "[":
360 # This is an array of records.
361 return True
362 if first_char != "{":
363 # Limit the length of string reported in error message in case
364 # this is an enormous file.
365 max = 32
366 if len(startdata) > max:
367 startdata = f"{startdata[:max]!r}..."
368 raise ValueError(
369 "Unrecognized JSON log format. Expected '{' or '[' but got"
370 f" {first_char!r} from {error_type} content starting with {startdata!r}"
371 )
373 # Assume a record per line.
374 return False
376 @classmethod
377 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
378 """Read records from I/O stream.
380 Parameters
381 ----------
382 stream : `typing.IO`
383 Stream from which to read JSON records.
385 Notes
386 -----
387 Works with one-record-per-line format JSON files and a direct
388 serialization of the Pydantic model.
389 """
390 first_line = stream.readline()
392 if not first_line:
393 # Empty file, return zero records.
394 return cls.from_records([])
396 is_model = cls._detect_model(first_line)
398 if is_model:
399 # This is a ButlerLogRecords model serialization so all the
400 # content must be read first.
401 all = first_line + stream.read()
402 return cls.model_validate_json(all)
404 # A stream of records with one record per line.
405 records = [ButlerLogRecord.model_validate_json(first_line)]
406 for line in stream:
407 line = line.rstrip()
408 if line: # Filter out blank lines.
409 records.append(ButlerLogRecord.model_validate_json(line))
411 return cls.from_records(records)
413 @classmethod
414 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
415 """Parse raw serialized form and return records.
417 Parameters
418 ----------
419 serialized : `bytes` or `str`
420 Either the serialized JSON of the model created using
421 ``.model_dump_json()`` or a streaming format of one JSON
422 `ButlerLogRecord` per line. This can also support a zero-length
423 string.
424 """
425 if not serialized:
426 # No records to return
427 return cls.from_records([])
429 # Only send the first character for analysis.
430 is_model = cls._detect_model(serialized)
432 if is_model:
433 return cls.model_validate_json(serialized)
435 # Filter out blank lines -- mypy is confused by the newline
436 # argument to isplit() [which can't have two different types
437 # simultaneously] so we have to duplicate some logic.
438 substrings: Iterator[str | bytes]
439 if isinstance(serialized, str):
440 substrings = isplit(serialized, "\n")
441 elif isinstance(serialized, bytes):
442 substrings = isplit(serialized, b"\n")
443 else:
444 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
445 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
447 return cls.from_records(records)
449 @property
450 def log_format(self) -> str:
451 if self._log_format is None:
452 return _LONG_LOG_FORMAT
453 return self._log_format
455 # Pydantic does not allow a property setter to be given for
456 # public properties of a model that is not based on a dict.
457 def set_log_format(self, format: str | None) -> str | None:
458 """Set the log format string for these records.
460 Parameters
461 ----------
462 format : `str`, optional
463 The new format string to use for converting this collection
464 of records into a string. If `None` the default format will be
465 used.
467 Returns
468 -------
469 old_format : `str`, optional
470 The previous log format.
471 """
472 previous = self._log_format
473 self._log_format = format
474 return previous
476 def __len__(self) -> int:
477 return len(self.root)
479 # The signature does not match the one in BaseModel but that is okay
480 # if __root__ is being used.
481 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
482 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
483 return iter(self.root)
485 def __setitem__(self, index: int, value: Record) -> None:
486 self.root[index] = self._validate_record(value)
488 @overload
489 def __getitem__(self, index: int) -> ButlerLogRecord: ... 489 ↛ exitline 489 didn't return from function '__getitem__'
491 @overload
492 def __getitem__(self, index: slice) -> "ButlerLogRecords": ... 492 ↛ exitline 492 didn't return from function '__getitem__'
494 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
495 # Handles slices and returns a new collection in that
496 # case.
497 item = self.root[index]
498 if isinstance(item, list):
499 return type(self)(item)
500 else:
501 return item
503 def __reversed__(self) -> Iterator[ButlerLogRecord]:
504 return self.root.__reversed__()
506 def __delitem__(self, index: slice | int) -> None:
507 del self.root[index]
509 def __str__(self) -> str:
510 # Ensure that every record uses the same format string.
511 return "\n".join(record.format(self.log_format) for record in self.root)
513 def _validate_record(self, record: Record) -> ButlerLogRecord:
514 if isinstance(record, ButlerLogRecord):
515 pass
516 elif isinstance(record, LogRecord):
517 record = ButlerLogRecord.from_record(record)
518 else:
519 raise ValueError(f"Can only append item of type {type(record)}")
520 return record
522 def insert(self, index: int, value: Record) -> None:
523 self.root.insert(index, self._validate_record(value))
525 def append(self, value: Record) -> None:
526 value = self._validate_record(value)
527 self.root.append(value)
529 def clear(self) -> None:
530 self.root.clear()
532 def extend(self, records: Iterable[Record]) -> None:
533 self.root.extend(self._validate_record(record) for record in records)
535 def pop(self, index: int = -1) -> ButlerLogRecord:
536 return self.root.pop(index)
538 def reverse(self) -> None:
539 self.root.reverse()
542class ButlerLogRecordHandler(StreamHandler):
543 """Python log handler that accumulates records."""
545 def __init__(self) -> None:
546 super().__init__()
547 self.records = ButlerLogRecords.model_construct(root=[])
549 def emit(self, record: LogRecord) -> None:
550 self.records.append(record)
553class JsonLogFormatter(Formatter):
554 """Format a `LogRecord` in JSON format."""
556 def format(self, record: LogRecord) -> str:
557 butler_record = ButlerLogRecord.from_record(record)
558 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)