Coverage for python/lsst/daf/butler/core/logging.py: 36%
210 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter")
24import datetime
25import logging
26import traceback
27from contextlib import contextmanager
28from logging import Formatter, LogRecord, StreamHandler
29from typing import IO, Any, Callable, ClassVar, Dict, Generator, Iterable, Iterator, List, Optional, Union
31from lsst.utils.introspection import get_full_type_name
32from lsst.utils.iteration import isplit
33from pydantic import BaseModel, PrivateAttr
35_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
36"""Default format for log records."""
39class MDCDict(dict):
40 """Dictionary for MDC data.
42 This is internal class used for better formatting of MDC in Python logging
43 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
44 ``__repr__`` method to produce output better suited for logging records.
45 """
47 def __getitem__(self, name: str) -> str:
48 """Return value for a given key or empty string for missing key."""
49 return self.get(name, "")
51 def __str__(self) -> str:
52 """Return string representation, strings are interpolated without
53 quotes.
54 """
55 items = (f"{k}={self[k]}" for k in sorted(self))
56 return "{" + ", ".join(items) + "}"
58 def __repr__(self) -> str:
59 return str(self)
62class ButlerMDC:
63 """Handle setting and unsetting of global MDC records.
65 The Mapped Diagnostic Context (MDC) can be used to set context
66 for log messages.
68 Currently there is one global MDC dict. Per-thread MDC is not
69 yet supported.
70 """
72 _MDC = MDCDict()
74 _old_factory: Optional[Callable[..., logging.LogRecord]] = None
75 """Old log record factory."""
77 @classmethod
78 def MDC(cls, key: str, value: str) -> str:
79 """Set MDC for this key to the supplied value.
81 Parameters
82 ----------
83 key : `str`
84 Key to modify.
85 value : `str`
86 New value to use.
88 Returns
89 -------
90 old : `str`
91 The previous value for this key.
92 """
93 old_value = cls._MDC[key]
94 cls._MDC[key] = value
95 return old_value
97 @classmethod
98 def MDCRemove(cls, key: str) -> None:
99 """Clear the MDC value associated with this key.
101 Can be called even if the key is not known to MDC.
102 """
103 cls._MDC.pop(key, None)
105 @classmethod
106 @contextmanager
107 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]:
108 """Set the MDC key for this context.
110 Parameters
111 ----------
112 mdc : `dict` of `str`, `str`
113 MDC keys to update temporarily.
115 Notes
116 -----
117 Other MDC keys are not modified. The previous values are restored
118 on exit (removing them if the were unset previously).
119 """
120 previous = {}
121 for k, v in mdc.items():
122 previous[k] = cls.MDC(k, v)
124 try:
125 yield
126 finally:
127 for k, v in previous.items():
128 if not v:
129 cls.MDCRemove(k)
130 else:
131 cls.MDC(k, v)
133 @classmethod
134 def add_mdc_log_record_factory(cls) -> None:
135 """Add a log record factory that adds a MDC record to `LogRecord`."""
136 old_factory = logging.getLogRecordFactory()
138 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
139 record = old_factory(*args, **kwargs)
140 # Make sure we send a copy of the global dict in the record.
141 record.MDC = MDCDict(cls._MDC)
142 return record
144 cls._old_factory = old_factory
145 logging.setLogRecordFactory(record_factory)
147 @classmethod
148 def restore_log_record_factory(cls) -> None:
149 """Restores the log record factory to the original form.
151 Does nothing if there has not been a call to
152 `add_mdc_log_record_factory`.
153 """
154 if cls._old_factory:
155 logging.setLogRecordFactory(cls._old_factory)
158class ButlerLogRecord(BaseModel):
159 """A model representing a `logging.LogRecord`.
161 A `~logging.LogRecord` always uses the current time in its record
162 when recreated and that makes it impossible to use it as a
163 serialization format. Instead have a local representation of a
164 `~logging.LogRecord` that matches Butler needs.
165 """
167 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
169 name: str
170 asctime: datetime.datetime
171 message: str
172 levelno: int
173 levelname: str
174 filename: str
175 pathname: str
176 lineno: int
177 funcName: Optional[str]
178 process: int
179 processName: str
180 exc_info: Optional[str]
181 MDC: Dict[str, str]
183 class Config:
184 """Pydantic model configuration."""
186 allow_mutation = False
188 @classmethod
189 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
190 """Create a new instance from a `~logging.LogRecord`.
192 Parameters
193 ----------
194 record : `logging.LogRecord`
195 The record from which to extract the relevant information.
196 """
197 # The properties that are one-to-one mapping.
198 simple = (
199 "name",
200 "levelno",
201 "levelname",
202 "filename",
203 "pathname",
204 "lineno",
205 "funcName",
206 "process",
207 "processName",
208 )
210 record_dict = {k: getattr(record, k) for k in simple}
212 record_dict["message"] = record.getMessage()
214 # MDC -- ensure the contents are copied to prevent any confusion
215 # over the MDC global being updated later.
216 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
218 # Always use UTC because in distributed systems we can't be sure
219 # what timezone localtime is and it's easier to compare logs if
220 # every system is using the same time.
221 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc)
223 # Sometimes exception information is included so must be
224 # extracted.
225 if record.exc_info:
226 etype = record.exc_info[0]
227 evalue = record.exc_info[1]
228 tb = record.exc_info[2]
229 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
231 return cls(**record_dict)
233 def format(self, log_format: Optional[str] = None) -> str:
234 """Format this record.
236 Parameters
237 ----------
238 log_format : `str`, optional
239 The format string to use. This string follows the standard
240 f-style use for formatting log messages. If `None`
241 the class default will be used.
243 Returns
244 -------
245 text : `str`
246 The formatted log message.
247 """
248 if log_format is None:
249 log_format = self._log_format
251 as_dict = self.dict()
253 # Special case MDC content. Convert it to an MDCDict
254 # so that missing items do not break formatting.
255 as_dict["MDC"] = MDCDict(as_dict["MDC"])
257 as_dict["asctime"] = as_dict["asctime"].isoformat()
258 formatted = log_format.format(**as_dict)
259 return formatted
261 def __str__(self) -> str:
262 return self.format()
265# The class below can convert LogRecord to ButlerLogRecord if needed.
266Record = Union[LogRecord, ButlerLogRecord]
269# Do not inherit from MutableSequence since mypy insists on the values
270# being Any even though we wish to constrain them to Record.
271class ButlerLogRecords(BaseModel):
272 """Class representing a collection of `ButlerLogRecord`."""
274 __root__: List[ButlerLogRecord]
275 _log_format: Optional[str] = PrivateAttr(None)
277 @classmethod
278 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
279 """Create collection from iterable.
281 Parameters
282 ----------
283 records : iterable of `ButlerLogRecord`
284 The records to seed this class with.
285 """
286 return cls(__root__=list(records))
288 @classmethod
289 def from_file(cls, filename: str) -> "ButlerLogRecords":
290 """Read records from file.
292 Parameters
293 ----------
294 filename : `str`
295 Name of file containing the JSON records.
297 Notes
298 -----
299 Works with one-record-per-line format JSON files and a direct
300 serialization of the Pydantic model.
301 """
302 with open(filename, "r") as fd:
303 return cls.from_stream(fd)
305 @staticmethod
306 def _detect_model(startdata: Union[str, bytes]) -> bool:
307 """Given some representative data, determine if this is a serialized
308 model or a streaming format.
310 Parameters
311 ----------
312 startdata : `bytes` or `str`
313 Representative characters or bytes from the start of a serialized
314 collection of log records.
316 Returns
317 -------
318 is_model : `bool`
319 Returns `True` if the data look like a serialized pydantic model.
320 Returns `False` if it looks like a streaming format. Returns
321 `False` also if an empty string is encountered since this
322 is not understood by `ButlerLogRecords.parse_raw()`.
324 Raises
325 ------
326 ValueError
327 Raised if the sentinel doesn't look like either of the supported
328 log record formats.
329 """
330 if not startdata:
331 return False
333 # Allow byte or str streams since pydantic supports either.
334 # We don't want to convert the entire input to unicode unnecessarily.
335 error_type = "str"
336 if isinstance(startdata, bytes):
337 first_char = chr(startdata[0])
338 error_type = "byte"
339 else:
340 first_char = startdata[0]
342 if first_char == "[":
343 # This is an array of records.
344 return True
345 if first_char != "{":
346 # Limit the length of string reported in error message in case
347 # this is an enormous file.
348 max = 32
349 if len(startdata) > max:
350 startdata = f"{startdata[:max]!r}..."
351 raise ValueError(
352 "Unrecognized JSON log format. Expected '{' or '[' but got"
353 f" {first_char!r} from {error_type} content starting with {startdata!r}"
354 )
356 # Assume a record per line.
357 return False
359 @classmethod
360 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
361 """Read records from I/O stream.
363 Parameters
364 ----------
365 stream : `typing.IO`
366 Stream from which to read JSON records.
368 Notes
369 -----
370 Works with one-record-per-line format JSON files and a direct
371 serialization of the Pydantic model.
372 """
373 first_line = stream.readline()
375 if not first_line:
376 # Empty file, return zero records.
377 return cls.from_records([])
379 is_model = cls._detect_model(first_line)
381 if is_model:
382 # This is a ButlerLogRecords model serialization so all the
383 # content must be read first.
384 all = first_line + stream.read()
385 return cls.parse_raw(all)
387 # A stream of records with one record per line.
388 records = [ButlerLogRecord.parse_raw(first_line)]
389 for line in stream:
390 line = line.rstrip()
391 if line: # Filter out blank lines.
392 records.append(ButlerLogRecord.parse_raw(line))
394 return cls.from_records(records)
396 @classmethod
397 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords":
398 """Parse raw serialized form and return records.
400 Parameters
401 ----------
402 serialized : `bytes` or `str`
403 Either the serialized JSON of the model created using
404 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
405 per line. This can also support a zero-length string.
406 """
407 if not serialized:
408 # No records to return
409 return cls.from_records([])
411 # Only send the first character for analysis.
412 is_model = cls._detect_model(serialized)
414 if is_model:
415 return cls.parse_raw(serialized)
417 # Filter out blank lines -- mypy is confused by the newline
418 # argument to isplit() [which can't have two different types
419 # simultaneously] so we have to duplicate some logic.
420 substrings: Iterator[Union[str, bytes]]
421 if isinstance(serialized, str):
422 substrings = isplit(serialized, "\n")
423 elif isinstance(serialized, bytes):
424 substrings = isplit(serialized, b"\n")
425 else:
426 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
427 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
429 return cls.from_records(records)
431 @property
432 def log_format(self) -> str:
433 if self._log_format is None:
434 return _LONG_LOG_FORMAT
435 return self._log_format
437 # Pydantic does not allow a property setter to be given for
438 # public properties of a model that is not based on a dict.
439 def set_log_format(self, format: Optional[str]) -> Optional[str]:
440 """Set the log format string for these records.
442 Parameters
443 ----------
444 format : `str`, optional
445 The new format string to use for converting this collection
446 of records into a string. If `None` the default format will be
447 used.
449 Returns
450 -------
451 old_format : `str`, optional
452 The previous log format.
453 """
454 previous = self._log_format
455 self._log_format = format
456 return previous
458 def __len__(self) -> int:
459 return len(self.__root__)
461 # The signature does not match the one in BaseModel but that is okay
462 # if __root__ is being used.
463 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
464 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
465 return iter(self.__root__)
467 def __setitem__(self, index: int, value: Record) -> None:
468 self.__root__[index] = self._validate_record(value)
470 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]":
471 # Handles slices and returns a new collection in that
472 # case.
473 item = self.__root__[index]
474 if isinstance(item, list):
475 return type(self)(__root__=item)
476 else:
477 return item
479 def __reversed__(self) -> Iterator[ButlerLogRecord]:
480 return self.__root__.__reversed__()
482 def __delitem__(self, index: Union[slice, int]) -> None:
483 del self.__root__[index]
485 def __str__(self) -> str:
486 # Ensure that every record uses the same format string.
487 return "\n".join(record.format(self.log_format) for record in self.__root__)
489 def _validate_record(self, record: Record) -> ButlerLogRecord:
490 if isinstance(record, ButlerLogRecord):
491 pass
492 elif isinstance(record, LogRecord):
493 record = ButlerLogRecord.from_record(record)
494 else:
495 raise ValueError(f"Can only append item of type {type(record)}")
496 return record
498 def insert(self, index: int, value: Record) -> None:
499 self.__root__.insert(index, self._validate_record(value))
501 def append(self, value: Record) -> None:
502 value = self._validate_record(value)
503 self.__root__.append(value)
505 def clear(self) -> None:
506 self.__root__.clear()
508 def extend(self, records: Iterable[Record]) -> None:
509 self.__root__.extend(self._validate_record(record) for record in records)
511 def pop(self, index: int = -1) -> ButlerLogRecord:
512 return self.__root__.pop(index)
514 def reverse(self) -> None:
515 self.__root__.reverse()
518class ButlerLogRecordHandler(StreamHandler):
519 """Python log handler that accumulates records."""
521 def __init__(self) -> None:
522 super().__init__()
523 self.records = ButlerLogRecords(__root__=[])
525 def emit(self, record: LogRecord) -> None:
526 self.records.append(record)
529class JsonLogFormatter(Formatter):
530 """Format a `LogRecord` in JSON format."""
532 def format(self, record: LogRecord) -> str:
533 butler_record = ButlerLogRecord.from_record(record)
534 return butler_record.json(exclude_unset=True, exclude_defaults=True)