Coverage for python/lsst/daf/butler/core/logging.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler",
25 "ButlerLogRecord", "JsonLogFormatter")
27import logging
28import datetime
29import traceback
30from contextlib import contextmanager
31from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator
33from logging import LogRecord, StreamHandler, Formatter
34from pydantic import BaseModel, PrivateAttr
36from lsst.utils.iteration import isplit
37from lsst.utils.introspection import get_full_type_name
39_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
40"""Default format for log records."""
43class MDCDict(dict):
44 """Dictionary for MDC data.
46 This is internal class used for better formatting of MDC in Python logging
47 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
48 ``__repr__`` method to produce output better suited for logging records.
49 """
51 def __getitem__(self, name: str) -> str:
52 """Return value for a given key or empty string for missing key.
53 """
54 return self.get(name, "")
56 def __str__(self) -> str:
57 """Return string representation, strings are interpolated without
58 quotes.
59 """
60 items = (f"{k}={self[k]}" for k in sorted(self))
61 return "{" + ", ".join(items) + "}"
63 def __repr__(self) -> str:
64 return str(self)
67class ButlerMDC:
68 """Handle setting and unsetting of global MDC records.
70 The Mapped Diagnostic Context (MDC) can be used to set context
71 for log messages.
73 Currently there is one global MDC dict. Per-thread MDC is not
74 yet supported.
75 """
77 _MDC = MDCDict()
79 _old_factory = None
80 """Old log record factory."""
82 @classmethod
83 def MDC(cls, key: str, value: str) -> str:
84 """Set MDC for this key to the supplied value.
86 Parameters
87 ----------
88 key : `str`
89 Key to modify.
90 value : `str`
91 New value to use.
93 Returns
94 -------
95 old : `str`
96 The previous value for this key.
97 """
98 old_value = cls._MDC[key]
99 cls._MDC[key] = value
100 return old_value
102 @classmethod
103 def MDCRemove(cls, key: str) -> None:
104 """Clear the MDC value associated with this key.
106 Can be called even if the key is not known to MDC.
107 """
108 cls._MDC.pop(key, None)
110 @classmethod
111 @contextmanager
112 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]:
113 """Set the MDC key for this context.
115 Parameters
116 ----------
117 mdc : `dict` of `str`, `str`
118 MDC keys to update temporarily.
120 Notes
121 -----
122 Other MDC keys are not modified. The previous values are restored
123 on exit (removing them if the were unset previously).
124 """
125 previous = {}
126 for k, v in mdc.items():
127 previous[k] = cls.MDC(k, v)
129 try:
130 yield
131 finally:
132 for k, v in previous.items():
133 if not v:
134 cls.MDCRemove(k)
135 else:
136 cls.MDC(k, v)
138 @classmethod
139 def add_mdc_log_record_factory(cls) -> None:
140 """Add a log record factory that adds a MDC record to `LogRecord`.
141 """
142 old_factory = logging.getLogRecordFactory()
144 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
145 record = old_factory(*args, **kwargs)
146 # Make sure we send a copy of the global dict in the record.
147 record.MDC = MDCDict(cls._MDC) # type: ignore
148 return record
150 cls._old_factory = old_factory
151 logging.setLogRecordFactory(record_factory)
153 @classmethod
154 def restore_log_record_factory(cls) -> None:
155 """Restores the log record factory to the original form.
157 Does nothing if there has not been a call to
158 `add_mdc_log_record_factory`.
159 """
160 if cls._old_factory:
161 logging.setLogRecordFactory(cls._old_factory)
164class ButlerLogRecord(BaseModel):
165 """A model representing a `logging.LogRecord`.
167 A `~logging.LogRecord` always uses the current time in its record
168 when recreated and that makes it impossible to use it as a
169 serialization format. Instead have a local representation of a
170 `~logging.LogRecord` that matches Butler needs.
171 """
173 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
175 name: str
176 asctime: datetime.datetime
177 message: str
178 levelno: int
179 levelname: str
180 filename: str
181 pathname: str
182 lineno: int
183 funcName: Optional[str]
184 process: int
185 processName: str
186 exc_info: Optional[str]
187 MDC: Dict[str, str]
189 class Config:
190 """Pydantic model configuration."""
192 allow_mutation = False
194 @classmethod
195 def from_record(cls, record: LogRecord) -> ButlerLogRecord:
196 """Create a new instance from a `~logging.LogRecord`.
198 Parameters
199 ----------
200 record : `logging.LogRecord`
201 The record from which to extract the relevant information.
202 """
203 # The properties that are one-to-one mapping.
204 simple = ("name", "levelno", "levelname", "filename", "pathname",
205 "lineno", "funcName", "process", "processName")
207 record_dict = {k: getattr(record, k) for k in simple}
209 record_dict["message"] = record.getMessage()
211 # MDC -- ensure the contents are copied to prevent any confusion
212 # over the MDC global being updated later.
213 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
215 # Always use UTC because in distributed systems we can't be sure
216 # what timezone localtime is and it's easier to compare logs if
217 # every system is using the same time.
218 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created,
219 tz=datetime.timezone.utc)
221 # Sometimes exception information is included so must be
222 # extracted.
223 if record.exc_info:
224 etype = record.exc_info[0]
225 evalue = record.exc_info[1]
226 tb = record.exc_info[2]
227 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
229 return cls(**record_dict)
231 def format(self, log_format: Optional[str] = None) -> str:
232 """Format this record.
234 Parameters
235 ----------
236 log_format : `str`, optional
237 The format string to use. This string follows the standard
238 f-style use for formatting log messages. If `None`
239 the class default will be used.
241 Returns
242 -------
243 text : `str`
244 The formatted log message.
245 """
246 if log_format is None:
247 log_format = self._log_format
249 as_dict = self.dict()
251 # Special case MDC content. Convert it to an MDCDict
252 # so that missing items do not break formatting.
253 as_dict["MDC"] = MDCDict(as_dict["MDC"])
255 as_dict["asctime"] = as_dict["asctime"].isoformat()
256 formatted = log_format.format(**as_dict)
257 return formatted
259 def __str__(self) -> str:
260 return self.format()
263# The class below can convert LogRecord to ButlerLogRecord if needed.
264Record = Union[LogRecord, ButlerLogRecord]
267# Do not inherit from MutableSequence since mypy insists on the values
268# being Any even though we wish to constrain them to Record.
269class ButlerLogRecords(BaseModel):
270 """Class representing a collection of `ButlerLogRecord`.
271 """
273 __root__: List[ButlerLogRecord]
274 _log_format: Optional[str] = PrivateAttr(None)
276 @classmethod
277 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords:
278 """Create collection from iterable.
280 Parameters
281 ----------
282 records : iterable of `ButlerLogRecord`
283 The records to seed this class with.
284 """
285 return cls(__root__=list(records))
287 @classmethod
288 def from_file(cls, filename: str) -> ButlerLogRecords:
289 """Read records from file.
291 Parameters
292 ----------
293 filename : `str`
294 Name of file containing the JSON records.
296 Notes
297 -----
298 Works with one-record-per-line format JSON files and a direct
299 serialization of the Pydantic model.
300 """
301 with open(filename, "r") as fd:
302 return cls.from_stream(fd)
304 @staticmethod
305 def _detect_model(startdata: Union[str, bytes]) -> bool:
306 """Given some representative data, determine if this is a serialized
307 model or a streaming format.
309 Parameters
310 ----------
311 startdata : `bytes` or `str`
312 Representative characters or bytes from the start of a serialized
313 collection of log records.
315 Returns
316 -------
317 is_model : `bool`
318 Returns `True` if the data look like a serialized pydantic model.
319 Returns `False` if it looks like a streaming format. Returns
320 `False` also if an empty string is encountered since this
321 is not understood by `ButlerLogRecords.parse_raw()`.
323 Raises
324 ------
325 ValueError
326 Raised if the sentinel doesn't look like either of the supported
327 log record formats.
328 """
329 if not startdata:
330 return False
332 # Allow byte or str streams since pydantic supports either.
333 # We don't want to convert the entire input to unicode unnecessarily.
334 error_type = "str"
335 if isinstance(startdata, bytes):
336 first_char = chr(startdata[0])
337 error_type = "byte"
338 else:
339 first_char = startdata[0]
341 if first_char == "[":
342 # This is an array of records.
343 return True
344 if first_char != "{":
345 # Limit the length of string reported in error message in case
346 # this is an enormous file.
347 max = 32
348 if len(startdata) > max:
349 startdata = f"{startdata[:max]!r}..."
350 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got"
351 f" {first_char!r} from {error_type} content starting with {startdata!r}")
353 # Assume a record per line.
354 return False
356 @classmethod
357 def from_stream(cls, stream: IO) -> ButlerLogRecords:
358 """Read records from I/O stream.
360 Parameters
361 ----------
362 stream : `typing.IO`
363 Stream from which to read JSON records.
365 Notes
366 -----
367 Works with one-record-per-line format JSON files and a direct
368 serialization of the Pydantic model.
369 """
370 first_line = stream.readline()
372 if not first_line:
373 # Empty file, return zero records.
374 return cls.from_records([])
376 is_model = cls._detect_model(first_line)
378 if is_model:
379 # This is a ButlerLogRecords model serialization so all the
380 # content must be read first.
381 all = first_line + stream.read()
382 return cls.parse_raw(all)
384 # A stream of records with one record per line.
385 records = [ButlerLogRecord.parse_raw(first_line)]
386 for line in stream:
387 line = line.rstrip()
388 if line: # Filter out blank lines.
389 records.append(ButlerLogRecord.parse_raw(line))
391 return cls.from_records(records)
393 @classmethod
394 def from_raw(cls, serialized: Union[str, bytes]) -> ButlerLogRecords:
395 """Parse raw serialized form and return records.
397 Parameters
398 ----------
399 serialized : `bytes` or `str`
400 Either the serialized JSON of the model created using
401 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
402 per line. This can also support a zero-length string.
403 """
404 if not serialized:
405 # No records to return
406 return cls.from_records([])
408 # Only send the first character for analysis.
409 is_model = cls._detect_model(serialized)
411 if is_model:
412 return cls.parse_raw(serialized)
414 # Filter out blank lines -- mypy is confused by the newline
415 # argument to isplit() [which can't have two different types
416 # simultaneously] so we have to duplicate some logic.
417 substrings: Iterator[Union[str, bytes]]
418 if isinstance(serialized, str):
419 substrings = isplit(serialized, "\n")
420 elif isinstance(serialized, bytes):
421 substrings = isplit(serialized, b"\n")
422 else:
423 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
424 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
426 return cls.from_records(records)
428 @property
429 def log_format(self) -> str:
430 if self._log_format is None:
431 return _LONG_LOG_FORMAT
432 return self._log_format
434 # Pydantic does not allow a property setter to be given for
435 # public properties of a model that is not based on a dict.
436 def set_log_format(self, format: Optional[str]) -> Optional[str]:
437 """Set the log format string for these records.
439 Parameters
440 ----------
441 format : `str`, optional
442 The new format string to use for converting this collection
443 of records into a string. If `None` the default format will be
444 used.
446 Returns
447 -------
448 old_format : `str`, optional
449 The previous log format.
450 """
451 previous = self._log_format
452 self._log_format = format
453 return previous
455 def __len__(self) -> int:
456 return len(self.__root__)
458 # The signature does not match the one in BaseModel but that is okay
459 # if __root__ is being used.
460 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
461 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
462 return iter(self.__root__)
464 def __setitem__(self, index: int, value: Record) -> None:
465 self.__root__[index] = self._validate_record(value)
467 def __getitem__(self, index: Union[slice, int]) -> Union[ButlerLogRecords, ButlerLogRecord]:
468 # Handles slices and returns a new collection in that
469 # case.
470 item = self.__root__[index]
471 if isinstance(item, list):
472 return type(self)(__root__=item)
473 else:
474 return item
476 def __reversed__(self) -> Iterator[ButlerLogRecord]:
477 return self.__root__.__reversed__()
479 def __delitem__(self, index: Union[slice, int]) -> None:
480 del self.__root__[index]
482 def __str__(self) -> str:
483 # Ensure that every record uses the same format string.
484 return "\n".join(record.format(self.log_format) for record in self.__root__)
486 def _validate_record(self, record: Record) -> ButlerLogRecord:
487 if isinstance(record, ButlerLogRecord):
488 pass
489 elif isinstance(record, LogRecord):
490 record = ButlerLogRecord.from_record(record)
491 else:
492 raise ValueError(f"Can only append item of type {type(record)}")
493 return record
495 def insert(self, index: int, value: Record) -> None:
496 self.__root__.insert(index, self._validate_record(value))
498 def append(self, value: Record) -> None:
499 value = self._validate_record(value)
500 self.__root__.append(value)
502 def clear(self) -> None:
503 self.__root__.clear()
505 def extend(self, records: Iterable[Record]) -> None:
506 self.__root__.extend(self._validate_record(record) for record in records)
508 def pop(self, index: int = -1) -> ButlerLogRecord:
509 return self.__root__.pop(index)
511 def reverse(self) -> None:
512 self.__root__.reverse()
515class ButlerLogRecordHandler(StreamHandler):
516 """Python log handler that accumulates records.
517 """
519 def __init__(self) -> None:
520 super().__init__()
521 self.records = ButlerLogRecords(__root__=[])
523 def emit(self, record: LogRecord) -> None:
524 self.records.append(record)
527class JsonLogFormatter(Formatter):
528 """Format a `LogRecord` in JSON format."""
530 def format(self, record: LogRecord) -> str:
531 butler_record = ButlerLogRecord.from_record(record)
532 return butler_record.json(exclude_unset=True, exclude_defaults=True)