Coverage for python/lsst/daf/butler/core/logging.py: 34%
216 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 02:32 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 02:32 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from contextlib import contextmanager
28from logging import Formatter, LogRecord, StreamHandler
29from typing import (
30 IO,
31 Any,
32 Callable,
33 ClassVar,
34 Dict,
35 Generator,
36 Iterable,
37 Iterator,
38 List,
39 Optional,
40 Union,
41 overload,
42)
44from lsst.utils.introspection import get_full_type_name
45from lsst.utils.iteration import isplit
46from pydantic import BaseModel, PrivateAttr
48_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
49"""Default format for log records."""
52class MDCDict(dict):
53 """Dictionary for MDC data.
55 This is internal class used for better formatting of MDC in Python logging
56 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
57 ``__repr__`` method to produce output better suited for logging records.
58 """
60 def __getitem__(self, name: str) -> str:
61 """Return value for a given key or empty string for missing key."""
62 return self.get(name, "")
64 def __str__(self) -> str:
65 """Return string representation, strings are interpolated without
66 quotes.
67 """
68 items = (f"{k}={self[k]}" for k in sorted(self))
69 return "{" + ", ".join(items) + "}"
71 def __repr__(self) -> str:
72 return str(self)
75class ButlerMDC:
76 """Handle setting and unsetting of global MDC records.
78 The Mapped Diagnostic Context (MDC) can be used to set context
79 for log messages.
81 Currently there is one global MDC dict. Per-thread MDC is not
82 yet supported.
83 """
85 _MDC = MDCDict()
87 _old_factory: Optional[Callable[..., logging.LogRecord]] = None
88 """Old log record factory."""
90 @classmethod
91 def MDC(cls, key: str, value: str) -> str:
92 """Set MDC for this key to the supplied value.
94 Parameters
95 ----------
96 key : `str`
97 Key to modify.
98 value : `str`
99 New value to use.
101 Returns
102 -------
103 old : `str`
104 The previous value for this key.
105 """
106 old_value = cls._MDC[key]
107 cls._MDC[key] = value
108 return old_value
110 @classmethod
111 def MDCRemove(cls, key: str) -> None:
112 """Clear the MDC value associated with this key.
114 Can be called even if the key is not known to MDC.
115 """
116 cls._MDC.pop(key, None)
118 @classmethod
119 @contextmanager
120 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]:
121 """Set the MDC key for this context.
123 Parameters
124 ----------
125 mdc : `dict` of `str`, `str`
126 MDC keys to update temporarily.
128 Notes
129 -----
130 Other MDC keys are not modified. The previous values are restored
131 on exit (removing them if the were unset previously).
132 """
133 previous = {}
134 for k, v in mdc.items():
135 previous[k] = cls.MDC(k, v)
137 try:
138 yield
139 finally:
140 for k, v in previous.items():
141 if not v:
142 cls.MDCRemove(k)
143 else:
144 cls.MDC(k, v)
146 @classmethod
147 def add_mdc_log_record_factory(cls) -> None:
148 """Add a log record factory that adds a MDC record to `LogRecord`."""
149 old_factory = logging.getLogRecordFactory()
151 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
152 record = old_factory(*args, **kwargs)
153 # Make sure we send a copy of the global dict in the record.
154 record.MDC = MDCDict(cls._MDC)
155 return record
157 cls._old_factory = old_factory
158 logging.setLogRecordFactory(record_factory)
160 @classmethod
161 def restore_log_record_factory(cls) -> None:
162 """Restores the log record factory to the original form.
164 Does nothing if there has not been a call to
165 `add_mdc_log_record_factory`.
166 """
167 if cls._old_factory:
168 logging.setLogRecordFactory(cls._old_factory)
171class ButlerLogRecord(BaseModel):
172 """A model representing a `logging.LogRecord`.
174 A `~logging.LogRecord` always uses the current time in its record
175 when recreated and that makes it impossible to use it as a
176 serialization format. Instead have a local representation of a
177 `~logging.LogRecord` that matches Butler needs.
178 """
180 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
182 name: str
183 asctime: datetime.datetime
184 message: str
185 levelno: int
186 levelname: str
187 filename: str
188 pathname: str
189 lineno: int
190 funcName: Optional[str]
191 process: int
192 processName: str
193 exc_info: Optional[str]
194 MDC: Dict[str, str]
196 class Config:
197 """Pydantic model configuration."""
199 allow_mutation = False
201 @classmethod
202 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
203 """Create a new instance from a `~logging.LogRecord`.
205 Parameters
206 ----------
207 record : `logging.LogRecord`
208 The record from which to extract the relevant information.
209 """
210 # The properties that are one-to-one mapping.
211 simple = (
212 "name",
213 "levelno",
214 "levelname",
215 "filename",
216 "pathname",
217 "lineno",
218 "funcName",
219 "process",
220 "processName",
221 )
223 record_dict = {k: getattr(record, k) for k in simple}
225 record_dict["message"] = record.getMessage()
227 # MDC -- ensure the contents are copied to prevent any confusion
228 # over the MDC global being updated later.
229 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
231 # Always use UTC because in distributed systems we can't be sure
232 # what timezone localtime is and it's easier to compare logs if
233 # every system is using the same time.
234 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
236 # Sometimes exception information is included so must be
237 # extracted.
238 if record.exc_info:
239 etype = record.exc_info[0]
240 evalue = record.exc_info[1]
241 tb = record.exc_info[2]
242 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
244 return cls(**record_dict)
246 def format(self, log_format: Optional[str] = None) -> str:
247 """Format this record.
249 Parameters
250 ----------
251 log_format : `str`, optional
252 The format string to use. This string follows the standard
253 f-style use for formatting log messages. If `None`
254 the class default will be used.
256 Returns
257 -------
258 text : `str`
259 The formatted log message.
260 """
261 if log_format is None:
262 log_format = self._log_format
264 as_dict = self.dict()
266 # Special case MDC content. Convert it to an MDCDict
267 # so that missing items do not break formatting.
268 as_dict["MDC"] = MDCDict(as_dict["MDC"])
270 as_dict["asctime"] = as_dict["asctime"].isoformat()
271 formatted = log_format.format(**as_dict)
272 return formatted
274 def __str__(self) -> str:
275 return self.format()
278# The class below can convert LogRecord to ButlerLogRecord if needed.
279Record = Union[LogRecord, ButlerLogRecord]
282# Do not inherit from MutableSequence since mypy insists on the values
283# being Any even though we wish to constrain them to Record.
284class ButlerLogRecords(BaseModel):
285 """Class representing a collection of `ButlerLogRecord`."""
287 __root__: List[ButlerLogRecord]
288 _log_format: Optional[str] = PrivateAttr(None)
290 @classmethod
291 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
292 """Create collection from iterable.
294 Parameters
295 ----------
296 records : iterable of `ButlerLogRecord`
297 The records to seed this class with.
298 """
299 return cls(__root__=list(records))
301 @classmethod
302 def from_file(cls, filename: str) -> "ButlerLogRecords":
303 """Read records from file.
305 Parameters
306 ----------
307 filename : `str`
308 Name of file containing the JSON records.
310 Notes
311 -----
312 Works with one-record-per-line format JSON files and a direct
313 serialization of the Pydantic model.
314 """
315 with open(filename, "r") as fd:
316 return cls.from_stream(fd)
318 @staticmethod
319 def _detect_model(startdata: Union[str, bytes]) -> bool:
320 """Given some representative data, determine if this is a serialized
321 model or a streaming format.
323 Parameters
324 ----------
325 startdata : `bytes` or `str`
326 Representative characters or bytes from the start of a serialized
327 collection of log records.
329 Returns
330 -------
331 is_model : `bool`
332 Returns `True` if the data look like a serialized pydantic model.
333 Returns `False` if it looks like a streaming format. Returns
334 `False` also if an empty string is encountered since this
335 is not understood by `ButlerLogRecords.parse_raw()`.
337 Raises
338 ------
339 ValueError
340 Raised if the sentinel doesn't look like either of the supported
341 log record formats.
342 """
343 if not startdata:
344 return False
346 # Allow byte or str streams since pydantic supports either.
347 # We don't want to convert the entire input to unicode unnecessarily.
348 error_type = "str"
349 if isinstance(startdata, bytes):
350 first_char = chr(startdata[0])
351 error_type = "byte"
352 else:
353 first_char = startdata[0]
355 if first_char == "[":
356 # This is an array of records.
357 return True
358 if first_char != "{":
359 # Limit the length of string reported in error message in case
360 # this is an enormous file.
361 max = 32
362 if len(startdata) > max:
363 startdata = f"{startdata[:max]!r}..."
364 raise ValueError(
365 "Unrecognized JSON log format. Expected '{' or '[' but got"
366 f" {first_char!r} from {error_type} content starting with {startdata!r}"
367 )
369 # Assume a record per line.
370 return False
372 @classmethod
373 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
374 """Read records from I/O stream.
376 Parameters
377 ----------
378 stream : `typing.IO`
379 Stream from which to read JSON records.
381 Notes
382 -----
383 Works with one-record-per-line format JSON files and a direct
384 serialization of the Pydantic model.
385 """
386 first_line = stream.readline()
388 if not first_line:
389 # Empty file, return zero records.
390 return cls.from_records([])
392 is_model = cls._detect_model(first_line)
394 if is_model:
395 # This is a ButlerLogRecords model serialization so all the
396 # content must be read first.
397 all = first_line + stream.read()
398 return cls.parse_raw(all)
400 # A stream of records with one record per line.
401 records = [ButlerLogRecord.parse_raw(first_line)]
402 for line in stream:
403 line = line.rstrip()
404 if line: # Filter out blank lines.
405 records.append(ButlerLogRecord.parse_raw(line))
407 return cls.from_records(records)
409 @classmethod
410 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords":
411 """Parse raw serialized form and return records.
413 Parameters
414 ----------
415 serialized : `bytes` or `str`
416 Either the serialized JSON of the model created using
417 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
418 per line. This can also support a zero-length string.
419 """
420 if not serialized:
421 # No records to return
422 return cls.from_records([])
424 # Only send the first character for analysis.
425 is_model = cls._detect_model(serialized)
427 if is_model:
428 return cls.parse_raw(serialized)
430 # Filter out blank lines -- mypy is confused by the newline
431 # argument to isplit() [which can't have two different types
432 # simultaneously] so we have to duplicate some logic.
433 substrings: Iterator[Union[str, bytes]]
434 if isinstance(serialized, str):
435 substrings = isplit(serialized, "\n")
436 elif isinstance(serialized, bytes):
437 substrings = isplit(serialized, b"\n")
438 else:
439 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
440 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
442 return cls.from_records(records)
444 @property
445 def log_format(self) -> str:
446 if self._log_format is None:
447 return _LONG_LOG_FORMAT
448 return self._log_format
450 # Pydantic does not allow a property setter to be given for
451 # public properties of a model that is not based on a dict.
452 def set_log_format(self, format: Optional[str]) -> Optional[str]:
453 """Set the log format string for these records.
455 Parameters
456 ----------
457 format : `str`, optional
458 The new format string to use for converting this collection
459 of records into a string. If `None` the default format will be
460 used.
462 Returns
463 -------
464 old_format : `str`, optional
465 The previous log format.
466 """
467 previous = self._log_format
468 self._log_format = format
469 return previous
471 def __len__(self) -> int:
472 return len(self.__root__)
474 # The signature does not match the one in BaseModel but that is okay
475 # if __root__ is being used.
476 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
477 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
478 return iter(self.__root__)
480 def __setitem__(self, index: int, value: Record) -> None:
481 self.__root__[index] = self._validate_record(value)
483 @overload
484 def __getitem__(self, index: int) -> ButlerLogRecord:
485 ...
487 @overload
488 def __getitem__(self, index: slice) -> "ButlerLogRecords":
489 ...
491 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]":
492 # Handles slices and returns a new collection in that
493 # case.
494 item = self.__root__[index]
495 if isinstance(item, list):
496 return type(self)(__root__=item)
497 else:
498 return item
500 def __reversed__(self) -> Iterator[ButlerLogRecord]:
501 return self.__root__.__reversed__()
503 def __delitem__(self, index: Union[slice, int]) -> None:
504 del self.__root__[index]
506 def __str__(self) -> str:
507 # Ensure that every record uses the same format string.
508 return "\n".join(record.format(self.log_format) for record in self.__root__)
510 def _validate_record(self, record: Record) -> ButlerLogRecord:
511 if isinstance(record, ButlerLogRecord):
512 pass
513 elif isinstance(record, LogRecord):
514 record = ButlerLogRecord.from_record(record)
515 else:
516 raise ValueError(f"Can only append item of type {type(record)}")
517 return record
519 def insert(self, index: int, value: Record) -> None:
520 self.__root__.insert(index, self._validate_record(value))
522 def append(self, value: Record) -> None:
523 value = self._validate_record(value)
524 self.__root__.append(value)
526 def clear(self) -> None:
527 self.__root__.clear()
529 def extend(self, records: Iterable[Record]) -> None:
530 self.__root__.extend(self._validate_record(record) for record in records)
532 def pop(self, index: int = -1) -> ButlerLogRecord:
533 return self.__root__.pop(index)
535 def reverse(self) -> None:
536 self.__root__.reverse()
539class ButlerLogRecordHandler(StreamHandler):
540 """Python log handler that accumulates records."""
542 def __init__(self) -> None:
543 super().__init__()
544 self.records = ButlerLogRecords(__root__=[])
546 def emit(self, record: LogRecord) -> None:
547 self.records.append(record)
550class JsonLogFormatter(Formatter):
551 """Format a `LogRecord` in JSON format."""
553 def format(self, record: LogRecord) -> str:
554 butler_record = ButlerLogRecord.from_record(record)
555 return butler_record.json(exclude_unset=True, exclude_defaults=True)