Coverage for python/lsst/daf/butler/core/logging.py: 34%
206 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("VERBOSE", "ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler",
25 "ButlerLogRecord", "JsonLogFormatter")
27import logging
28import datetime
29import traceback
30from contextlib import contextmanager
31from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator
33from logging import LogRecord, StreamHandler, Formatter
34from pydantic import BaseModel, PrivateAttr
36from .utils import isplit
38VERBOSE = (logging.INFO + logging.DEBUG) // 2
39"""Verbose log level"""
41_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
42"""Default format for log records."""
44logging.addLevelName(VERBOSE, "VERBOSE")
47class MDCDict(dict):
48 """Dictionary for MDC data.
50 This is internal class used for better formatting of MDC in Python logging
51 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
52 ``__repr__`` method to produce output better suited for logging records.
53 """
55 def __getitem__(self, name: str) -> str:
56 """Return value for a given key or empty string for missing key.
57 """
58 return self.get(name, "")
60 def __str__(self) -> str:
61 """Return string representation, strings are interpolated without
62 quotes.
63 """
64 items = (f"{k}={self[k]}" for k in sorted(self))
65 return "{" + ", ".join(items) + "}"
67 def __repr__(self) -> str:
68 return str(self)
71class ButlerMDC:
72 """Handle setting and unsetting of global MDC records.
74 The Mapped Diagnostic Context (MDC) can be used to set context
75 for log messages.
77 Currently there is one global MDC dict. Per-thread MDC is not
78 yet supported.
79 """
81 _MDC = MDCDict()
83 _old_factory = None
84 """Old log record factory."""
86 @classmethod
87 def MDC(cls, key: str, value: str) -> str:
88 """Set MDC for this key to the supplied value.
90 Parameters
91 ----------
92 key : `str`
93 Key to modify.
94 value : `str`
95 New value to use.
97 Returns
98 -------
99 old : `str`
100 The previous value for this key.
101 """
102 old_value = cls._MDC[key]
103 cls._MDC[key] = value
104 return old_value
106 @classmethod
107 def MDCRemove(cls, key: str) -> None:
108 """Clear the MDC value associated with this key.
110 Can be called even if the key is not known to MDC.
111 """
112 cls._MDC.pop(key, None)
114 @classmethod
115 @contextmanager
116 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]:
117 """Set the MDC key for this context.
119 Parameters
120 ----------
121 mdc : `dict` of `str`, `str`
122 MDC keys to update temporarily.
124 Notes
125 -----
126 Other MDC keys are not modified. The previous values are restored
127 on exit (removing them if the were unset previously).
128 """
129 previous = {}
130 for k, v in mdc.items():
131 previous[k] = cls.MDC(k, v)
133 try:
134 yield
135 finally:
136 for k, v in previous.items():
137 if not v:
138 cls.MDCRemove(k)
139 else:
140 cls.MDC(k, v)
142 @classmethod
143 def add_mdc_log_record_factory(cls) -> None:
144 """Add a log record factory that adds a MDC record to `LogRecord`.
145 """
146 old_factory = logging.getLogRecordFactory()
148 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
149 record = old_factory(*args, **kwargs)
150 # Make sure we send a copy of the global dict in the record.
151 record.MDC = MDCDict(cls._MDC) # type: ignore
152 return record
154 cls._old_factory = old_factory
155 logging.setLogRecordFactory(record_factory)
157 @classmethod
158 def restore_log_record_factory(cls) -> None:
159 """Restores the log record factory to the original form.
161 Does nothing if there has not been a call to
162 `add_mdc_log_record_factory`.
163 """
164 if cls._old_factory:
165 logging.setLogRecordFactory(cls._old_factory)
168class ButlerLogRecord(BaseModel):
169 """A model representing a `logging.LogRecord`.
171 A `~logging.LogRecord` always uses the current time in its record
172 when recreated and that makes it impossible to use it as a
173 serialization format. Instead have a local representation of a
174 `~logging.LogRecord` that matches Butler needs.
175 """
177 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
179 name: str
180 asctime: datetime.datetime
181 message: str
182 levelno: int
183 levelname: str
184 filename: str
185 pathname: str
186 lineno: int
187 funcName: Optional[str]
188 process: int
189 processName: str
190 exc_info: Optional[str]
191 MDC: Dict[str, str]
193 class Config:
194 """Pydantic model configuration."""
196 allow_mutation = False
198 @classmethod
199 def from_record(cls, record: LogRecord) -> ButlerLogRecord:
200 """Create a new instance from a `~logging.LogRecord`.
202 Parameters
203 ----------
204 record : `logging.LogRecord`
205 The record from which to extract the relevant information.
206 """
207 # The properties that are one-to-one mapping.
208 simple = ("name", "levelno", "levelname", "filename", "pathname",
209 "lineno", "funcName", "process", "processName")
211 record_dict = {k: getattr(record, k) for k in simple}
213 record_dict["message"] = record.getMessage()
215 # MDC -- ensure the contents are copied to prevent any confusion
216 # over the MDC global being updated later.
217 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
219 # Always use UTC because in distributed systems we can't be sure
220 # what timezone localtime is and it's easier to compare logs if
221 # every system is using the same time.
222 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created,
223 tz=datetime.timezone.utc)
225 # Sometimes exception information is included so must be
226 # extracted.
227 if record.exc_info:
228 etype = record.exc_info[0]
229 evalue = record.exc_info[1]
230 tb = record.exc_info[2]
231 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
233 return cls(**record_dict)
235 def format(self, log_format: Optional[str] = None) -> str:
236 """Format this record.
238 Parameters
239 ----------
240 log_format : `str`, optional
241 The format string to use. This string follows the standard
242 f-style use for formatting log messages. If `None`
243 the class default will be used.
245 Returns
246 -------
247 text : `str`
248 The formatted log message.
249 """
250 if log_format is None:
251 log_format = self._log_format
253 as_dict = self.dict()
255 # Special case MDC content. Convert it to an MDCDict
256 # so that missing items do not break formatting.
257 as_dict["MDC"] = MDCDict(as_dict["MDC"])
259 as_dict["asctime"] = as_dict["asctime"].isoformat()
260 formatted = log_format.format(**as_dict)
261 return formatted
263 def __str__(self) -> str:
264 return self.format()
267# The class below can convert LogRecord to ButlerLogRecord if needed.
268Record = Union[LogRecord, ButlerLogRecord]
271# Do not inherit from MutableSequence since mypy insists on the values
272# being Any even though we wish to constrain them to Record.
273class ButlerLogRecords(BaseModel):
274 """Class representing a collection of `ButlerLogRecord`.
275 """
277 __root__: List[ButlerLogRecord]
278 _log_format: Optional[str] = PrivateAttr(None)
280 @classmethod
281 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords:
282 """Create collection from iterable.
284 Parameters
285 ----------
286 records : iterable of `ButlerLogRecord`
287 The records to seed this class with.
288 """
289 return cls(__root__=list(records))
291 @classmethod
292 def from_file(cls, filename: str) -> ButlerLogRecords:
293 """Read records from file.
295 Parameters
296 ----------
297 filename : `str`
298 Name of file containing the JSON records.
300 Notes
301 -----
302 Works with one-record-per-line format JSON files and a direct
303 serialization of the Pydantic model.
304 """
305 with open(filename, "r") as fd:
306 return cls.from_stream(fd)
308 @staticmethod
309 def _detect_model(startdata: Union[str, bytes]) -> bool:
310 """Given some representative data, determine if this is a serialized
311 model or a streaming format.
313 Parameters
314 ----------
315 startdata : `bytes` or `str`
316 Representative characters or bytes from the start of a serialized
317 collection of log records.
319 Returns
320 -------
321 is_model : `bool`
322 Returns `True` if the data look like a serialized pydantic model.
323 Returns `False` if it looks like a streaming format. Returns
324 `False` also if an empty string is encountered since this
325 is not understood by `ButlerLogRecords.parse_raw()`.
327 Raises
328 ------
329 ValueError
330 Raised if the sentinel doesn't look like either of the supported
331 log record formats.
332 """
333 if not startdata:
334 return False
336 # Allow byte or str streams since pydantic supports either.
337 # We don't want to convert the entire input to unicode unnecessarily.
338 error_type = "str"
339 if isinstance(startdata, bytes):
340 first_char = chr(startdata[0])
341 error_type = "byte"
342 else:
343 first_char = startdata[0]
345 if first_char == "[":
346 # This is an array of records.
347 return True
348 if first_char != "{":
349 # Limit the length of string reported in error message in case
350 # this is an enormous file.
351 max = 32
352 if len(startdata) > max:
353 startdata = f"{startdata[:max]!r}..."
354 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got"
355 f" {first_char!r} from {error_type} content starting with {startdata!r}")
357 # Assume a record per line.
358 return False
360 @classmethod
361 def from_stream(cls, stream: IO) -> ButlerLogRecords:
362 """Read records from I/O stream.
364 Parameters
365 ----------
366 stream : `typing.IO`
367 Stream from which to read JSON records.
369 Notes
370 -----
371 Works with one-record-per-line format JSON files and a direct
372 serialization of the Pydantic model.
373 """
374 first_line = stream.readline()
376 if not first_line:
377 # Empty file, return zero records.
378 return cls.from_records([])
380 is_model = cls._detect_model(first_line)
382 if is_model:
383 # This is a ButlerLogRecords model serialization so all the
384 # content must be read first.
385 all = first_line + stream.read()
386 return cls.parse_raw(all)
388 # A stream of records with one record per line.
389 records = [ButlerLogRecord.parse_raw(first_line)]
390 for line in stream:
391 line = line.rstrip()
392 if line: # Filter out blank lines.
393 records.append(ButlerLogRecord.parse_raw(line))
395 return cls.from_records(records)
397 @classmethod
398 def from_raw(cls, serialized: Union[str, bytes]) -> ButlerLogRecords:
399 """Parse raw serialized form and return records.
401 Parameters
402 ----------
403 serialized : `bytes` or `str`
404 Either the serialized JSON of the model created using
405 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
406 per line. This can also support a zero-length string.
407 """
408 if not serialized:
409 # No records to return
410 return cls.from_records([])
412 # Only send the first character for analysis.
413 is_model = cls._detect_model(serialized)
415 if is_model:
416 return cls.parse_raw(serialized)
418 # Filter out blank lines -- mypy is confused by the newline
419 # argument to split().
420 newline = "\n" if isinstance(serialized, str) else b"\n"
421 records = [ButlerLogRecord.parse_raw(line) for line in isplit(serialized, newline) # type: ignore
422 if line]
423 return cls.from_records(records)
425 @property
426 def log_format(self) -> str:
427 if self._log_format is None:
428 return _LONG_LOG_FORMAT
429 return self._log_format
431 # Pydantic does not allow a property setter to be given for
432 # public properties of a model that is not based on a dict.
433 def set_log_format(self, format: Optional[str]) -> Optional[str]:
434 """Set the log format string for these records.
436 Parameters
437 ----------
438 format : `str`, optional
439 The new format string to use for converting this collection
440 of records into a string. If `None` the default format will be
441 used.
443 Returns
444 -------
445 old_format : `str`, optional
446 The previous log format.
447 """
448 previous = self._log_format
449 self._log_format = format
450 return previous
452 def __len__(self) -> int:
453 return len(self.__root__)
455 # The signature does not match the one in BaseModel but that is okay
456 # if __root__ is being used.
457 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
458 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
459 return iter(self.__root__)
461 def __setitem__(self, index: int, value: Record) -> None:
462 self.__root__[index] = self._validate_record(value)
464 def __getitem__(self, index: Union[slice, int]) -> Union[ButlerLogRecords, ButlerLogRecord]:
465 # Handles slices and returns a new collection in that
466 # case.
467 item = self.__root__[index]
468 if isinstance(item, list):
469 return type(self)(__root__=item)
470 else:
471 return item
473 def __reversed__(self) -> Iterator[ButlerLogRecord]:
474 return self.__root__.__reversed__()
476 def __delitem__(self, index: Union[slice, int]) -> None:
477 del self.__root__[index]
479 def __str__(self) -> str:
480 # Ensure that every record uses the same format string.
481 return "\n".join(record.format(self.log_format) for record in self.__root__)
483 def _validate_record(self, record: Record) -> ButlerLogRecord:
484 if isinstance(record, ButlerLogRecord):
485 pass
486 elif isinstance(record, LogRecord):
487 record = ButlerLogRecord.from_record(record)
488 else:
489 raise ValueError(f"Can only append item of type {type(record)}")
490 return record
492 def insert(self, index: int, value: Record) -> None:
493 self.__root__.insert(index, self._validate_record(value))
495 def append(self, value: Record) -> None:
496 value = self._validate_record(value)
497 self.__root__.append(value)
499 def clear(self) -> None:
500 self.__root__.clear()
502 def extend(self, records: Iterable[Record]) -> None:
503 self.__root__.extend(self._validate_record(record) for record in records)
505 def pop(self, index: int = -1) -> ButlerLogRecord:
506 return self.__root__.pop(index)
508 def reverse(self) -> None:
509 self.__root__.reverse()
512class ButlerLogRecordHandler(StreamHandler):
513 """Python log handler that accumulates records.
514 """
516 def __init__(self) -> None:
517 super().__init__()
518 self.records = ButlerLogRecords(__root__=[])
520 def emit(self, record: LogRecord) -> None:
521 self.records.append(record)
524class JsonLogFormatter(Formatter):
525 """Format a `LogRecord` in JSON format."""
527 def format(self, record: LogRecord) -> str:
528 butler_record = ButlerLogRecord.from_record(record)
529 return butler_record.json(exclude_unset=True, exclude_defaults=True)