Coverage for python/lsst/daf/butler/logging.py: 42%
219 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 10:24 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 10:24 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
32import datetime
33import logging
34import traceback
35from collections.abc import Callable, Generator, Iterable, Iterator
36from contextlib import contextmanager
37from logging import Formatter, LogRecord, StreamHandler
38from typing import IO, Any, ClassVar, overload
40from lsst.utils.introspection import get_full_type_name
41from lsst.utils.iteration import isplit
42from pydantic import BaseModel, ConfigDict, PrivateAttr, RootModel
44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
45"""Default format for log records."""
48class MDCDict(dict):
49 """Dictionary for MDC data.
51 This is internal class used for better formatting of MDC in Python logging
52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
53 ``__repr__`` method to produce output better suited for logging records.
54 """
56 def __getitem__(self, name: str) -> str:
57 """Return value for a given key or empty string for missing key."""
58 return self.get(name, "")
60 def __str__(self) -> str:
61 """Return string representation, strings are interpolated without
62 quotes.
63 """
64 items = (f"{k}={self[k]}" for k in sorted(self))
65 return "{" + ", ".join(items) + "}"
67 def __repr__(self) -> str:
68 return str(self)
71class ButlerMDC:
72 """Handle setting and unsetting of global MDC records.
74 The Mapped Diagnostic Context (MDC) can be used to set context
75 for log messages.
77 Currently there is one global MDC dict. Per-thread MDC is not
78 yet supported.
79 """
81 _MDC = MDCDict()
83 _old_factory: Callable[..., logging.LogRecord] | None = None
84 """Old log record factory."""
86 @classmethod
87 def MDC(cls, key: str, value: str) -> str:
88 """Set MDC for this key to the supplied value.
90 Parameters
91 ----------
92 key : `str`
93 Key to modify.
94 value : `str`
95 New value to use.
97 Returns
98 -------
99 old : `str`
100 The previous value for this key.
101 """
102 old_value = cls._MDC[key]
103 cls._MDC[key] = value
104 return old_value
106 @classmethod
107 def MDCRemove(cls, key: str) -> None:
108 """Clear the MDC value associated with this key.
110 Can be called even if the key is not known to MDC.
112 Parameters
113 ----------
114 key : `str`
115 Key for which the MDC value should be removed.
116 """
117 cls._MDC.pop(key, None)
119 @classmethod
120 def clear_mdc(cls) -> None:
121 """Clear all MDC entries."""
122 cls._MDC.clear()
124 @classmethod
125 @contextmanager
126 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
127 """Set the MDC key for this context.
129 Parameters
130 ----------
131 mdc : `dict` of `str`, `str`
132 MDC keys to update temporarily.
134 Notes
135 -----
136 Other MDC keys are not modified. The previous values are restored
137 on exit (removing them if the were unset previously).
138 """
139 previous = {}
140 for k, v in mdc.items():
141 previous[k] = cls.MDC(k, v)
143 try:
144 yield
145 finally:
146 for k, v in previous.items():
147 if not v:
148 cls.MDCRemove(k)
149 else:
150 cls.MDC(k, v)
152 @classmethod
153 def add_mdc_log_record_factory(cls) -> None:
154 """Add a log record factory that adds a MDC record to `LogRecord`."""
155 old_factory = logging.getLogRecordFactory()
157 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
158 record = old_factory(*args, **kwargs)
159 # Make sure we send a copy of the global dict in the record.
160 record.MDC = MDCDict(cls._MDC)
161 return record
163 cls._old_factory = old_factory
164 logging.setLogRecordFactory(record_factory)
166 @classmethod
167 def restore_log_record_factory(cls) -> None:
168 """Restores the log record factory to the original form.
170 Does nothing if there has not been a call to
171 `add_mdc_log_record_factory`.
172 """
173 if cls._old_factory:
174 logging.setLogRecordFactory(cls._old_factory)
177class ButlerLogRecord(BaseModel):
178 """A model representing a `logging.LogRecord`.
180 A `~logging.LogRecord` always uses the current time in its record
181 when recreated and that makes it impossible to use it as a
182 serialization format. Instead have a local representation of a
183 `~logging.LogRecord` that matches Butler needs.
184 """
186 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
188 name: str
189 asctime: datetime.datetime
190 message: str
191 levelno: int
192 levelname: str
193 filename: str
194 pathname: str
195 lineno: int
196 funcName: str | None = None
197 process: int
198 processName: str
199 exc_info: str | None = None
200 MDC: dict[str, str]
202 model_config = ConfigDict(frozen=True)
204 @classmethod
205 def from_record(cls, record: LogRecord) -> ButlerLogRecord:
206 """Create a new instance from a `~logging.LogRecord`.
208 Parameters
209 ----------
210 record : `logging.LogRecord`
211 The record from which to extract the relevant information.
212 """
213 # The properties that are one-to-one mapping.
214 simple = (
215 "name",
216 "levelno",
217 "levelname",
218 "filename",
219 "pathname",
220 "lineno",
221 "funcName",
222 "process",
223 "processName",
224 )
226 record_dict = {k: getattr(record, k) for k in simple}
228 record_dict["message"] = record.getMessage()
230 # MDC -- ensure the contents are copied to prevent any confusion
231 # over the MDC global being updated later.
232 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
234 # Always use UTC because in distributed systems we can't be sure
235 # what timezone localtime is and it's easier to compare logs if
236 # every system is using the same time.
237 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC)
239 # Sometimes exception information is included so must be
240 # extracted.
241 if record.exc_info:
242 etype = record.exc_info[0]
243 evalue = record.exc_info[1]
244 tb = record.exc_info[2]
245 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
247 return cls(**record_dict)
249 def format(self, log_format: str | None = None) -> str:
250 """Format this record.
252 Parameters
253 ----------
254 log_format : `str`, optional
255 The format string to use. This string follows the standard
256 f-style use for formatting log messages. If `None`
257 the class default will be used.
259 Returns
260 -------
261 text : `str`
262 The formatted log message.
263 """
264 if log_format is None:
265 log_format = self._log_format
267 as_dict = self.model_dump()
269 # Special case MDC content. Convert it to an MDCDict
270 # so that missing items do not break formatting.
271 as_dict["MDC"] = MDCDict(as_dict["MDC"])
273 as_dict["asctime"] = as_dict["asctime"].isoformat()
274 formatted = log_format.format(**as_dict)
275 return formatted
277 def __str__(self) -> str:
278 return self.format()
281# The class below can convert LogRecord to ButlerLogRecord if needed.
282Record = LogRecord | ButlerLogRecord
285class _ButlerLogRecords(RootModel):
286 root: list[ButlerLogRecord]
289# Do not inherit from MutableSequence since mypy insists on the values
290# being Any even though we wish to constrain them to Record.
291class ButlerLogRecords(_ButlerLogRecords):
292 """Class representing a collection of `ButlerLogRecord`."""
294 _log_format: str | None = PrivateAttr(None)
296 @classmethod
297 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords:
298 """Create collection from iterable.
300 Parameters
301 ----------
302 records : iterable of `ButlerLogRecord`
303 The records to seed this class with.
304 """
305 return cls.model_construct(root=list(records))
307 @classmethod
308 def from_file(cls, filename: str) -> ButlerLogRecords:
309 """Read records from file.
311 Parameters
312 ----------
313 filename : `str`
314 Name of file containing the JSON records.
316 Notes
317 -----
318 Works with one-record-per-line format JSON files and a direct
319 serialization of the Pydantic model.
320 """
321 with open(filename) as fd:
322 return cls.from_stream(fd)
324 @staticmethod
325 def _detect_model(startdata: str | bytes) -> bool:
326 """Given some representative data, determine if this is a serialized
327 model or a streaming format.
329 Parameters
330 ----------
331 startdata : `bytes` or `str`
332 Representative characters or bytes from the start of a serialized
333 collection of log records.
335 Returns
336 -------
337 is_model : `bool`
338 Returns `True` if the data look like a serialized pydantic model.
339 Returns `False` if it looks like a streaming format. Returns
340 `False` also if an empty string is encountered since this
341 is not understood by `ButlerLogRecords.model_validate_json()`.
343 Raises
344 ------
345 ValueError
346 Raised if the sentinel doesn't look like either of the supported
347 log record formats.
348 """
349 if not startdata:
350 return False
352 # Allow byte or str streams since pydantic supports either.
353 # We don't want to convert the entire input to unicode unnecessarily.
354 error_type = "str"
355 if isinstance(startdata, bytes):
356 first_char = chr(startdata[0])
357 error_type = "byte"
358 else:
359 first_char = startdata[0]
361 if first_char == "[":
362 # This is an array of records.
363 return True
364 if first_char != "{":
365 # Limit the length of string reported in error message in case
366 # this is an enormous file.
367 max = 32
368 if len(startdata) > max:
369 startdata = f"{startdata[:max]!r}..."
370 raise ValueError(
371 "Unrecognized JSON log format. Expected '{' or '[' but got"
372 f" {first_char!r} from {error_type} content starting with {startdata!r}"
373 )
375 # Assume a record per line.
376 return False
378 @classmethod
379 def from_stream(cls, stream: IO) -> ButlerLogRecords:
380 """Read records from I/O stream.
382 Parameters
383 ----------
384 stream : `typing.IO`
385 Stream from which to read JSON records.
387 Notes
388 -----
389 Works with one-record-per-line format JSON files and a direct
390 serialization of the Pydantic model.
391 """
392 first_line = stream.readline()
394 if not first_line:
395 # Empty file, return zero records.
396 return cls.from_records([])
398 is_model = cls._detect_model(first_line)
400 if is_model:
401 # This is a ButlerLogRecords model serialization so all the
402 # content must be read first.
403 all = first_line + stream.read()
404 return cls.model_validate_json(all)
406 # A stream of records with one record per line.
407 records = [ButlerLogRecord.model_validate_json(first_line)]
408 for line in stream:
409 line = line.rstrip()
410 if line: # Filter out blank lines.
411 records.append(ButlerLogRecord.model_validate_json(line))
413 return cls.from_records(records)
415 @classmethod
416 def from_raw(cls, serialized: str | bytes) -> ButlerLogRecords:
417 """Parse raw serialized form and return records.
419 Parameters
420 ----------
421 serialized : `bytes` or `str`
422 Either the serialized JSON of the model created using
423 ``.model_dump_json()`` or a streaming format of one JSON
424 `ButlerLogRecord` per line. This can also support a zero-length
425 string.
426 """
427 if not serialized:
428 # No records to return
429 return cls.from_records([])
431 # Only send the first character for analysis.
432 is_model = cls._detect_model(serialized)
434 if is_model:
435 return cls.model_validate_json(serialized)
437 # Filter out blank lines -- mypy is confused by the newline
438 # argument to isplit() [which can't have two different types
439 # simultaneously] so we have to duplicate some logic.
440 substrings: Iterator[str | bytes]
441 if isinstance(serialized, str):
442 substrings = isplit(serialized, "\n")
443 elif isinstance(serialized, bytes):
444 substrings = isplit(serialized, b"\n")
445 else:
446 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
447 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line]
449 return cls.from_records(records)
451 @property
452 def log_format(self) -> str:
453 if self._log_format is None:
454 return _LONG_LOG_FORMAT
455 return self._log_format
457 # Pydantic does not allow a property setter to be given for
458 # public properties of a model that is not based on a dict.
459 def set_log_format(self, format: str | None) -> str | None:
460 """Set the log format string for these records.
462 Parameters
463 ----------
464 format : `str`, optional
465 The new format string to use for converting this collection
466 of records into a string. If `None` the default format will be
467 used.
469 Returns
470 -------
471 old_format : `str`, optional
472 The previous log format.
473 """
474 previous = self._log_format
475 self._log_format = format
476 return previous
478 def __len__(self) -> int:
479 return len(self.root)
481 # The signature does not match the one in BaseModel but that is okay
482 # if __root__ is being used.
483 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
484 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
485 return iter(self.root)
487 def __setitem__(self, index: int, value: Record) -> None:
488 self.root[index] = self._validate_record(value)
490 @overload
491 def __getitem__(self, index: int) -> ButlerLogRecord: ... 491 ↛ exitline 491 didn't return from function '__getitem__', because
493 @overload
494 def __getitem__(self, index: slice) -> ButlerLogRecords: ... 494 ↛ exitline 494 didn't return from function '__getitem__', because
496 def __getitem__(self, index: slice | int) -> ButlerLogRecords | ButlerLogRecord:
497 # Handles slices and returns a new collection in that
498 # case.
499 item = self.root[index]
500 if isinstance(item, list):
501 return type(self)(item)
502 else:
503 return item
505 def __reversed__(self) -> Iterator[ButlerLogRecord]:
506 return self.root.__reversed__()
508 def __delitem__(self, index: slice | int) -> None:
509 del self.root[index]
511 def __str__(self) -> str:
512 # Ensure that every record uses the same format string.
513 return "\n".join(record.format(self.log_format) for record in self.root)
515 def _validate_record(self, record: Record) -> ButlerLogRecord:
516 if isinstance(record, ButlerLogRecord):
517 pass
518 elif isinstance(record, LogRecord):
519 record = ButlerLogRecord.from_record(record)
520 else:
521 raise ValueError(f"Can only append item of type {type(record)}")
522 return record
524 def insert(self, index: int, value: Record) -> None:
525 self.root.insert(index, self._validate_record(value))
527 def append(self, value: Record) -> None:
528 value = self._validate_record(value)
529 self.root.append(value)
531 def clear(self) -> None:
532 self.root.clear()
534 def extend(self, records: Iterable[Record]) -> None:
535 self.root.extend(self._validate_record(record) for record in records)
537 def pop(self, index: int = -1) -> ButlerLogRecord:
538 return self.root.pop(index)
540 def reverse(self) -> None:
541 self.root.reverse()
544class ButlerLogRecordHandler(StreamHandler):
545 """Python log handler that accumulates records."""
547 def __init__(self) -> None:
548 super().__init__()
549 self.records = ButlerLogRecords.model_construct(root=[])
551 def emit(self, record: LogRecord) -> None:
552 self.records.append(record)
555class JsonLogFormatter(Formatter):
556 """Format a `LogRecord` in JSON format."""
558 def format(self, record: LogRecord) -> str:
559 butler_record = ButlerLogRecord.from_record(record)
560 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)