Coverage for python/lsst/daf/butler/core/logging.py: 34%
217 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from collections.abc import Callable, Generator, Iterable, Iterator
28from contextlib import contextmanager
29from logging import Formatter, LogRecord, StreamHandler
30from typing import IO, Any, ClassVar, Union, overload
32from lsst.utils.introspection import get_full_type_name
33from lsst.utils.iteration import isplit
34from pydantic import BaseModel, PrivateAttr
36_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
37"""Default format for log records."""
40class MDCDict(dict):
41 """Dictionary for MDC data.
43 This is internal class used for better formatting of MDC in Python logging
44 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
45 ``__repr__`` method to produce output better suited for logging records.
46 """
48 def __getitem__(self, name: str) -> str:
49 """Return value for a given key or empty string for missing key."""
50 return self.get(name, "")
52 def __str__(self) -> str:
53 """Return string representation, strings are interpolated without
54 quotes.
55 """
56 items = (f"{k}={self[k]}" for k in sorted(self))
57 return "{" + ", ".join(items) + "}"
59 def __repr__(self) -> str:
60 return str(self)
63class ButlerMDC:
64 """Handle setting and unsetting of global MDC records.
66 The Mapped Diagnostic Context (MDC) can be used to set context
67 for log messages.
69 Currently there is one global MDC dict. Per-thread MDC is not
70 yet supported.
71 """
73 _MDC = MDCDict()
75 _old_factory: Callable[..., logging.LogRecord] | None = None
76 """Old log record factory."""
78 @classmethod
79 def MDC(cls, key: str, value: str) -> str:
80 """Set MDC for this key to the supplied value.
82 Parameters
83 ----------
84 key : `str`
85 Key to modify.
86 value : `str`
87 New value to use.
89 Returns
90 -------
91 old : `str`
92 The previous value for this key.
93 """
94 old_value = cls._MDC[key]
95 cls._MDC[key] = value
96 return old_value
98 @classmethod
99 def MDCRemove(cls, key: str) -> None:
100 """Clear the MDC value associated with this key.
102 Can be called even if the key is not known to MDC.
103 """
104 cls._MDC.pop(key, None)
106 @classmethod
107 @contextmanager
108 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
109 """Set the MDC key for this context.
111 Parameters
112 ----------
113 mdc : `dict` of `str`, `str`
114 MDC keys to update temporarily.
116 Notes
117 -----
118 Other MDC keys are not modified. The previous values are restored
119 on exit (removing them if the were unset previously).
120 """
121 previous = {}
122 for k, v in mdc.items():
123 previous[k] = cls.MDC(k, v)
125 try:
126 yield
127 finally:
128 for k, v in previous.items():
129 if not v:
130 cls.MDCRemove(k)
131 else:
132 cls.MDC(k, v)
134 @classmethod
135 def add_mdc_log_record_factory(cls) -> None:
136 """Add a log record factory that adds a MDC record to `LogRecord`."""
137 old_factory = logging.getLogRecordFactory()
139 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
140 record = old_factory(*args, **kwargs)
141 # Make sure we send a copy of the global dict in the record.
142 record.MDC = MDCDict(cls._MDC)
143 return record
145 cls._old_factory = old_factory
146 logging.setLogRecordFactory(record_factory)
148 @classmethod
149 def restore_log_record_factory(cls) -> None:
150 """Restores the log record factory to the original form.
152 Does nothing if there has not been a call to
153 `add_mdc_log_record_factory`.
154 """
155 if cls._old_factory:
156 logging.setLogRecordFactory(cls._old_factory)
159class ButlerLogRecord(BaseModel):
160 """A model representing a `logging.LogRecord`.
162 A `~logging.LogRecord` always uses the current time in its record
163 when recreated and that makes it impossible to use it as a
164 serialization format. Instead have a local representation of a
165 `~logging.LogRecord` that matches Butler needs.
166 """
168 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
170 name: str
171 asctime: datetime.datetime
172 message: str
173 levelno: int
174 levelname: str
175 filename: str
176 pathname: str
177 lineno: int
178 funcName: str | None
179 process: int
180 processName: str
181 exc_info: str | None
182 MDC: dict[str, str]
184 class Config:
185 """Pydantic model configuration."""
187 allow_mutation = False
189 @classmethod
190 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
191 """Create a new instance from a `~logging.LogRecord`.
193 Parameters
194 ----------
195 record : `logging.LogRecord`
196 The record from which to extract the relevant information.
197 """
198 # The properties that are one-to-one mapping.
199 simple = (
200 "name",
201 "levelno",
202 "levelname",
203 "filename",
204 "pathname",
205 "lineno",
206 "funcName",
207 "process",
208 "processName",
209 )
211 record_dict = {k: getattr(record, k) for k in simple}
213 record_dict["message"] = record.getMessage()
215 # MDC -- ensure the contents are copied to prevent any confusion
216 # over the MDC global being updated later.
217 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
219 # Always use UTC because in distributed systems we can't be sure
220 # what timezone localtime is and it's easier to compare logs if
221 # every system is using the same time.
222 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
224 # Sometimes exception information is included so must be
225 # extracted.
226 if record.exc_info:
227 etype = record.exc_info[0]
228 evalue = record.exc_info[1]
229 tb = record.exc_info[2]
230 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
232 return cls(**record_dict)
234 def format(self, log_format: str | None = None) -> str:
235 """Format this record.
237 Parameters
238 ----------
239 log_format : `str`, optional
240 The format string to use. This string follows the standard
241 f-style use for formatting log messages. If `None`
242 the class default will be used.
244 Returns
245 -------
246 text : `str`
247 The formatted log message.
248 """
249 if log_format is None:
250 log_format = self._log_format
252 as_dict = self.dict()
254 # Special case MDC content. Convert it to an MDCDict
255 # so that missing items do not break formatting.
256 as_dict["MDC"] = MDCDict(as_dict["MDC"])
258 as_dict["asctime"] = as_dict["asctime"].isoformat()
259 formatted = log_format.format(**as_dict)
260 return formatted
262 def __str__(self) -> str:
263 return self.format()
266# The class below can convert LogRecord to ButlerLogRecord if needed.
267Record = LogRecord | ButlerLogRecord
270# Do not inherit from MutableSequence since mypy insists on the values
271# being Any even though we wish to constrain them to Record.
272class ButlerLogRecords(BaseModel):
273 """Class representing a collection of `ButlerLogRecord`."""
275 __root__: list[ButlerLogRecord]
276 _log_format: str | None = PrivateAttr(None)
278 @classmethod
279 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
280 """Create collection from iterable.
282 Parameters
283 ----------
284 records : iterable of `ButlerLogRecord`
285 The records to seed this class with.
286 """
287 return cls(__root__=list(records))
289 @classmethod
290 def from_file(cls, filename: str) -> "ButlerLogRecords":
291 """Read records from file.
293 Parameters
294 ----------
295 filename : `str`
296 Name of file containing the JSON records.
298 Notes
299 -----
300 Works with one-record-per-line format JSON files and a direct
301 serialization of the Pydantic model.
302 """
303 with open(filename) as fd:
304 return cls.from_stream(fd)
306 @staticmethod
307 def _detect_model(startdata: str | bytes) -> bool:
308 """Given some representative data, determine if this is a serialized
309 model or a streaming format.
311 Parameters
312 ----------
313 startdata : `bytes` or `str`
314 Representative characters or bytes from the start of a serialized
315 collection of log records.
317 Returns
318 -------
319 is_model : `bool`
320 Returns `True` if the data look like a serialized pydantic model.
321 Returns `False` if it looks like a streaming format. Returns
322 `False` also if an empty string is encountered since this
323 is not understood by `ButlerLogRecords.parse_raw()`.
325 Raises
326 ------
327 ValueError
328 Raised if the sentinel doesn't look like either of the supported
329 log record formats.
330 """
331 if not startdata:
332 return False
334 # Allow byte or str streams since pydantic supports either.
335 # We don't want to convert the entire input to unicode unnecessarily.
336 error_type = "str"
337 if isinstance(startdata, bytes):
338 first_char = chr(startdata[0])
339 error_type = "byte"
340 else:
341 first_char = startdata[0]
343 if first_char == "[":
344 # This is an array of records.
345 return True
346 if first_char != "{":
347 # Limit the length of string reported in error message in case
348 # this is an enormous file.
349 max = 32
350 if len(startdata) > max:
351 startdata = f"{startdata[:max]!r}..."
352 raise ValueError(
353 "Unrecognized JSON log format. Expected '{' or '[' but got"
354 f" {first_char!r} from {error_type} content starting with {startdata!r}"
355 )
357 # Assume a record per line.
358 return False
360 @classmethod
361 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
362 """Read records from I/O stream.
364 Parameters
365 ----------
366 stream : `typing.IO`
367 Stream from which to read JSON records.
369 Notes
370 -----
371 Works with one-record-per-line format JSON files and a direct
372 serialization of the Pydantic model.
373 """
374 first_line = stream.readline()
376 if not first_line:
377 # Empty file, return zero records.
378 return cls.from_records([])
380 is_model = cls._detect_model(first_line)
382 if is_model:
383 # This is a ButlerLogRecords model serialization so all the
384 # content must be read first.
385 all = first_line + stream.read()
386 return cls.parse_raw(all)
388 # A stream of records with one record per line.
389 records = [ButlerLogRecord.parse_raw(first_line)]
390 for line in stream:
391 line = line.rstrip()
392 if line: # Filter out blank lines.
393 records.append(ButlerLogRecord.parse_raw(line))
395 return cls.from_records(records)
397 @classmethod
398 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
399 """Parse raw serialized form and return records.
401 Parameters
402 ----------
403 serialized : `bytes` or `str`
404 Either the serialized JSON of the model created using
405 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
406 per line. This can also support a zero-length string.
407 """
408 if not serialized:
409 # No records to return
410 return cls.from_records([])
412 # Only send the first character for analysis.
413 is_model = cls._detect_model(serialized)
415 if is_model:
416 return cls.parse_raw(serialized)
418 # Filter out blank lines -- mypy is confused by the newline
419 # argument to isplit() [which can't have two different types
420 # simultaneously] so we have to duplicate some logic.
421 substrings: Iterator[str | bytes]
422 if isinstance(serialized, str):
423 substrings = isplit(serialized, "\n")
424 elif isinstance(serialized, bytes):
425 substrings = isplit(serialized, b"\n")
426 else:
427 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
428 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
430 return cls.from_records(records)
432 @property
433 def log_format(self) -> str:
434 if self._log_format is None:
435 return _LONG_LOG_FORMAT
436 return self._log_format
438 # Pydantic does not allow a property setter to be given for
439 # public properties of a model that is not based on a dict.
440 def set_log_format(self, format: str | None) -> str | None:
441 """Set the log format string for these records.
443 Parameters
444 ----------
445 format : `str`, optional
446 The new format string to use for converting this collection
447 of records into a string. If `None` the default format will be
448 used.
450 Returns
451 -------
452 old_format : `str`, optional
453 The previous log format.
454 """
455 previous = self._log_format
456 self._log_format = format
457 return previous
459 def __len__(self) -> int:
460 return len(self.__root__)
462 # The signature does not match the one in BaseModel but that is okay
463 # if __root__ is being used.
464 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
465 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
466 return iter(self.__root__)
468 def __setitem__(self, index: int, value: Record) -> None:
469 self.__root__[index] = self._validate_record(value)
471 @overload
472 def __getitem__(self, index: int) -> ButlerLogRecord:
473 ...
475 @overload
476 def __getitem__(self, index: slice) -> "ButlerLogRecords":
477 ...
479 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
480 # Handles slices and returns a new collection in that
481 # case.
482 item = self.__root__[index]
483 if isinstance(item, list):
484 return type(self)(__root__=item)
485 else:
486 return item
488 def __reversed__(self) -> Iterator[ButlerLogRecord]:
489 return self.__root__.__reversed__()
491 def __delitem__(self, index: slice | int) -> None:
492 del self.__root__[index]
494 def __str__(self) -> str:
495 # Ensure that every record uses the same format string.
496 return "\n".join(record.format(self.log_format) for record in self.__root__)
498 def _validate_record(self, record: Record) -> ButlerLogRecord:
499 if isinstance(record, ButlerLogRecord):
500 pass
501 elif isinstance(record, LogRecord):
502 record = ButlerLogRecord.from_record(record)
503 else:
504 raise ValueError(f"Can only append item of type {type(record)}")
505 return record
507 def insert(self, index: int, value: Record) -> None:
508 self.__root__.insert(index, self._validate_record(value))
510 def append(self, value: Record) -> None:
511 value = self._validate_record(value)
512 self.__root__.append(value)
514 def clear(self) -> None:
515 self.__root__.clear()
517 def extend(self, records: Iterable[Record]) -> None:
518 self.__root__.extend(self._validate_record(record) for record in records)
520 def pop(self, index: int = -1) -> ButlerLogRecord:
521 return self.__root__.pop(index)
523 def reverse(self) -> None:
524 self.__root__.reverse()
527class ButlerLogRecordHandler(StreamHandler):
528 """Python log handler that accumulates records."""
530 def __init__(self) -> None:
531 super().__init__()
532 self.records = ButlerLogRecords(__root__=[])
534 def emit(self, record: LogRecord) -> None:
535 self.records.append(record)
538class JsonLogFormatter(Formatter):
539 """Format a `LogRecord` in JSON format."""
541 def format(self, record: LogRecord) -> str:
542 butler_record = ButlerLogRecord.from_record(record)
543 return butler_record.json(exclude_unset=True, exclude_defaults=True)