Coverage for python/lsst/daf/butler/core/logging.py: 41%
220 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from collections.abc import Callable, Generator, Iterable, Iterator
28from contextlib import contextmanager
29from logging import Formatter, LogRecord, StreamHandler
30from typing import IO, Any, ClassVar, Union, overload
32from lsst.utils.introspection import get_full_type_name
33from lsst.utils.iteration import isplit
35try:
36 from pydantic.v1 import BaseModel, PrivateAttr
37except ModuleNotFoundError:
38 from pydantic import BaseModel, PrivateAttr # type: ignore
40_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
41"""Default format for log records."""
44class MDCDict(dict):
45 """Dictionary for MDC data.
47 This is internal class used for better formatting of MDC in Python logging
48 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
49 ``__repr__`` method to produce output better suited for logging records.
50 """
52 def __getitem__(self, name: str) -> str:
53 """Return value for a given key or empty string for missing key."""
54 return self.get(name, "")
56 def __str__(self) -> str:
57 """Return string representation, strings are interpolated without
58 quotes.
59 """
60 items = (f"{k}={self[k]}" for k in sorted(self))
61 return "{" + ", ".join(items) + "}"
63 def __repr__(self) -> str:
64 return str(self)
67class ButlerMDC:
68 """Handle setting and unsetting of global MDC records.
70 The Mapped Diagnostic Context (MDC) can be used to set context
71 for log messages.
73 Currently there is one global MDC dict. Per-thread MDC is not
74 yet supported.
75 """
77 _MDC = MDCDict()
79 _old_factory: Callable[..., logging.LogRecord] | None = None
80 """Old log record factory."""
82 @classmethod
83 def MDC(cls, key: str, value: str) -> str:
84 """Set MDC for this key to the supplied value.
86 Parameters
87 ----------
88 key : `str`
89 Key to modify.
90 value : `str`
91 New value to use.
93 Returns
94 -------
95 old : `str`
96 The previous value for this key.
97 """
98 old_value = cls._MDC[key]
99 cls._MDC[key] = value
100 return old_value
102 @classmethod
103 def MDCRemove(cls, key: str) -> None:
104 """Clear the MDC value associated with this key.
106 Can be called even if the key is not known to MDC.
107 """
108 cls._MDC.pop(key, None)
110 @classmethod
111 @contextmanager
112 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]:
113 """Set the MDC key for this context.
115 Parameters
116 ----------
117 mdc : `dict` of `str`, `str`
118 MDC keys to update temporarily.
120 Notes
121 -----
122 Other MDC keys are not modified. The previous values are restored
123 on exit (removing them if the were unset previously).
124 """
125 previous = {}
126 for k, v in mdc.items():
127 previous[k] = cls.MDC(k, v)
129 try:
130 yield
131 finally:
132 for k, v in previous.items():
133 if not v:
134 cls.MDCRemove(k)
135 else:
136 cls.MDC(k, v)
138 @classmethod
139 def add_mdc_log_record_factory(cls) -> None:
140 """Add a log record factory that adds a MDC record to `LogRecord`."""
141 old_factory = logging.getLogRecordFactory()
143 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
144 record = old_factory(*args, **kwargs)
145 # Make sure we send a copy of the global dict in the record.
146 record.MDC = MDCDict(cls._MDC)
147 return record
149 cls._old_factory = old_factory
150 logging.setLogRecordFactory(record_factory)
152 @classmethod
153 def restore_log_record_factory(cls) -> None:
154 """Restores the log record factory to the original form.
156 Does nothing if there has not been a call to
157 `add_mdc_log_record_factory`.
158 """
159 if cls._old_factory:
160 logging.setLogRecordFactory(cls._old_factory)
163class ButlerLogRecord(BaseModel):
164 """A model representing a `logging.LogRecord`.
166 A `~logging.LogRecord` always uses the current time in its record
167 when recreated and that makes it impossible to use it as a
168 serialization format. Instead have a local representation of a
169 `~logging.LogRecord` that matches Butler needs.
170 """
172 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
174 name: str
175 asctime: datetime.datetime
176 message: str
177 levelno: int
178 levelname: str
179 filename: str
180 pathname: str
181 lineno: int
182 funcName: str | None = None
183 process: int
184 processName: str
185 exc_info: str | None = None
186 MDC: dict[str, str]
188 class Config:
189 """Pydantic model configuration."""
191 allow_mutation = False
193 @classmethod
194 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
195 """Create a new instance from a `~logging.LogRecord`.
197 Parameters
198 ----------
199 record : `logging.LogRecord`
200 The record from which to extract the relevant information.
201 """
202 # The properties that are one-to-one mapping.
203 simple = (
204 "name",
205 "levelno",
206 "levelname",
207 "filename",
208 "pathname",
209 "lineno",
210 "funcName",
211 "process",
212 "processName",
213 )
215 record_dict = {k: getattr(record, k) for k in simple}
217 record_dict["message"] = record.getMessage()
219 # MDC -- ensure the contents are copied to prevent any confusion
220 # over the MDC global being updated later.
221 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
223 # Always use UTC because in distributed systems we can't be sure
224 # what timezone localtime is and it's easier to compare logs if
225 # every system is using the same time.
226 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
228 # Sometimes exception information is included so must be
229 # extracted.
230 if record.exc_info:
231 etype = record.exc_info[0]
232 evalue = record.exc_info[1]
233 tb = record.exc_info[2]
234 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
236 return cls(**record_dict)
238 def format(self, log_format: str | None = None) -> str:
239 """Format this record.
241 Parameters
242 ----------
243 log_format : `str`, optional
244 The format string to use. This string follows the standard
245 f-style use for formatting log messages. If `None`
246 the class default will be used.
248 Returns
249 -------
250 text : `str`
251 The formatted log message.
252 """
253 if log_format is None:
254 log_format = self._log_format
256 as_dict = self.dict()
258 # Special case MDC content. Convert it to an MDCDict
259 # so that missing items do not break formatting.
260 as_dict["MDC"] = MDCDict(as_dict["MDC"])
262 as_dict["asctime"] = as_dict["asctime"].isoformat()
263 formatted = log_format.format(**as_dict)
264 return formatted
266 def __str__(self) -> str:
267 return self.format()
270# The class below can convert LogRecord to ButlerLogRecord if needed.
271Record = LogRecord | ButlerLogRecord
274# Do not inherit from MutableSequence since mypy insists on the values
275# being Any even though we wish to constrain them to Record.
276class ButlerLogRecords(BaseModel):
277 """Class representing a collection of `ButlerLogRecord`."""
279 __root__: list[ButlerLogRecord]
280 _log_format: str | None = PrivateAttr(None)
282 @classmethod
283 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
284 """Create collection from iterable.
286 Parameters
287 ----------
288 records : iterable of `ButlerLogRecord`
289 The records to seed this class with.
290 """
291 return cls(__root__=list(records))
293 @classmethod
294 def from_file(cls, filename: str) -> "ButlerLogRecords":
295 """Read records from file.
297 Parameters
298 ----------
299 filename : `str`
300 Name of file containing the JSON records.
302 Notes
303 -----
304 Works with one-record-per-line format JSON files and a direct
305 serialization of the Pydantic model.
306 """
307 with open(filename) as fd:
308 return cls.from_stream(fd)
310 @staticmethod
311 def _detect_model(startdata: str | bytes) -> bool:
312 """Given some representative data, determine if this is a serialized
313 model or a streaming format.
315 Parameters
316 ----------
317 startdata : `bytes` or `str`
318 Representative characters or bytes from the start of a serialized
319 collection of log records.
321 Returns
322 -------
323 is_model : `bool`
324 Returns `True` if the data look like a serialized pydantic model.
325 Returns `False` if it looks like a streaming format. Returns
326 `False` also if an empty string is encountered since this
327 is not understood by `ButlerLogRecords.parse_raw()`.
329 Raises
330 ------
331 ValueError
332 Raised if the sentinel doesn't look like either of the supported
333 log record formats.
334 """
335 if not startdata:
336 return False
338 # Allow byte or str streams since pydantic supports either.
339 # We don't want to convert the entire input to unicode unnecessarily.
340 error_type = "str"
341 if isinstance(startdata, bytes):
342 first_char = chr(startdata[0])
343 error_type = "byte"
344 else:
345 first_char = startdata[0]
347 if first_char == "[":
348 # This is an array of records.
349 return True
350 if first_char != "{":
351 # Limit the length of string reported in error message in case
352 # this is an enormous file.
353 max = 32
354 if len(startdata) > max:
355 startdata = f"{startdata[:max]!r}..."
356 raise ValueError(
357 "Unrecognized JSON log format. Expected '{' or '[' but got"
358 f" {first_char!r} from {error_type} content starting with {startdata!r}"
359 )
361 # Assume a record per line.
362 return False
364 @classmethod
365 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
366 """Read records from I/O stream.
368 Parameters
369 ----------
370 stream : `typing.IO`
371 Stream from which to read JSON records.
373 Notes
374 -----
375 Works with one-record-per-line format JSON files and a direct
376 serialization of the Pydantic model.
377 """
378 first_line = stream.readline()
380 if not first_line:
381 # Empty file, return zero records.
382 return cls.from_records([])
384 is_model = cls._detect_model(first_line)
386 if is_model:
387 # This is a ButlerLogRecords model serialization so all the
388 # content must be read first.
389 all = first_line + stream.read()
390 return cls.parse_raw(all)
392 # A stream of records with one record per line.
393 records = [ButlerLogRecord.parse_raw(first_line)]
394 for line in stream:
395 line = line.rstrip()
396 if line: # Filter out blank lines.
397 records.append(ButlerLogRecord.parse_raw(line))
399 return cls.from_records(records)
401 @classmethod
402 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords":
403 """Parse raw serialized form and return records.
405 Parameters
406 ----------
407 serialized : `bytes` or `str`
408 Either the serialized JSON of the model created using
409 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
410 per line. This can also support a zero-length string.
411 """
412 if not serialized:
413 # No records to return
414 return cls.from_records([])
416 # Only send the first character for analysis.
417 is_model = cls._detect_model(serialized)
419 if is_model:
420 return cls.parse_raw(serialized)
422 # Filter out blank lines -- mypy is confused by the newline
423 # argument to isplit() [which can't have two different types
424 # simultaneously] so we have to duplicate some logic.
425 substrings: Iterator[str | bytes]
426 if isinstance(serialized, str):
427 substrings = isplit(serialized, "\n")
428 elif isinstance(serialized, bytes):
429 substrings = isplit(serialized, b"\n")
430 else:
431 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
432 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
434 return cls.from_records(records)
436 @property
437 def log_format(self) -> str:
438 if self._log_format is None:
439 return _LONG_LOG_FORMAT
440 return self._log_format
442 # Pydantic does not allow a property setter to be given for
443 # public properties of a model that is not based on a dict.
444 def set_log_format(self, format: str | None) -> str | None:
445 """Set the log format string for these records.
447 Parameters
448 ----------
449 format : `str`, optional
450 The new format string to use for converting this collection
451 of records into a string. If `None` the default format will be
452 used.
454 Returns
455 -------
456 old_format : `str`, optional
457 The previous log format.
458 """
459 previous = self._log_format
460 self._log_format = format
461 return previous
463 def __len__(self) -> int:
464 return len(self.__root__)
466 # The signature does not match the one in BaseModel but that is okay
467 # if __root__ is being used.
468 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
469 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
470 return iter(self.__root__)
472 def __setitem__(self, index: int, value: Record) -> None:
473 self.__root__[index] = self._validate_record(value)
475 @overload
476 def __getitem__(self, index: int) -> ButlerLogRecord:
477 ...
479 @overload
480 def __getitem__(self, index: slice) -> "ButlerLogRecords":
481 ...
483 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]":
484 # Handles slices and returns a new collection in that
485 # case.
486 item = self.__root__[index]
487 if isinstance(item, list):
488 return type(self)(__root__=item)
489 else:
490 return item
492 def __reversed__(self) -> Iterator[ButlerLogRecord]:
493 return self.__root__.__reversed__()
495 def __delitem__(self, index: slice | int) -> None:
496 del self.__root__[index]
498 def __str__(self) -> str:
499 # Ensure that every record uses the same format string.
500 return "\n".join(record.format(self.log_format) for record in self.__root__)
502 def _validate_record(self, record: Record) -> ButlerLogRecord:
503 if isinstance(record, ButlerLogRecord):
504 pass
505 elif isinstance(record, LogRecord):
506 record = ButlerLogRecord.from_record(record)
507 else:
508 raise ValueError(f"Can only append item of type {type(record)}")
509 return record
511 def insert(self, index: int, value: Record) -> None:
512 self.__root__.insert(index, self._validate_record(value))
514 def append(self, value: Record) -> None:
515 value = self._validate_record(value)
516 self.__root__.append(value)
518 def clear(self) -> None:
519 self.__root__.clear()
521 def extend(self, records: Iterable[Record]) -> None:
522 self.__root__.extend(self._validate_record(record) for record in records)
524 def pop(self, index: int = -1) -> ButlerLogRecord:
525 return self.__root__.pop(index)
527 def reverse(self) -> None:
528 self.__root__.reverse()
531class ButlerLogRecordHandler(StreamHandler):
532 """Python log handler that accumulates records."""
534 def __init__(self) -> None:
535 super().__init__()
536 self.records = ButlerLogRecords(__root__=[])
538 def emit(self, record: LogRecord) -> None:
539 self.records.append(record)
542class JsonLogFormatter(Formatter):
543 """Format a `LogRecord` in JSON format."""
545 def format(self, record: LogRecord) -> str:
546 butler_record = ButlerLogRecord.from_record(record)
547 return butler_record.json(exclude_unset=True, exclude_defaults=True)