Coverage for python/lsst/daf/butler/core/logging.py: 33%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler",
23 "ButlerLogRecord", "JsonLogFormatter")
25import logging
26import datetime
27import traceback
28from contextlib import contextmanager
29from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator
31from logging import LogRecord, StreamHandler, Formatter
32from pydantic import BaseModel, PrivateAttr
34from lsst.utils.iteration import isplit
35from lsst.utils.introspection import get_full_type_name
37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}"
38"""Default format for log records."""
41class MDCDict(dict):
42 """Dictionary for MDC data.
44 This is internal class used for better formatting of MDC in Python logging
45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and
46 ``__repr__`` method to produce output better suited for logging records.
47 """
49 def __getitem__(self, name: str) -> str:
50 """Return value for a given key or empty string for missing key.
51 """
52 return self.get(name, "")
54 def __str__(self) -> str:
55 """Return string representation, strings are interpolated without
56 quotes.
57 """
58 items = (f"{k}={self[k]}" for k in sorted(self))
59 return "{" + ", ".join(items) + "}"
61 def __repr__(self) -> str:
62 return str(self)
65class ButlerMDC:
66 """Handle setting and unsetting of global MDC records.
68 The Mapped Diagnostic Context (MDC) can be used to set context
69 for log messages.
71 Currently there is one global MDC dict. Per-thread MDC is not
72 yet supported.
73 """
75 _MDC = MDCDict()
77 _old_factory = None
78 """Old log record factory."""
80 @classmethod
81 def MDC(cls, key: str, value: str) -> str:
82 """Set MDC for this key to the supplied value.
84 Parameters
85 ----------
86 key : `str`
87 Key to modify.
88 value : `str`
89 New value to use.
91 Returns
92 -------
93 old : `str`
94 The previous value for this key.
95 """
96 old_value = cls._MDC[key]
97 cls._MDC[key] = value
98 return old_value
100 @classmethod
101 def MDCRemove(cls, key: str) -> None:
102 """Clear the MDC value associated with this key.
104 Can be called even if the key is not known to MDC.
105 """
106 cls._MDC.pop(key, None)
108 @classmethod
109 @contextmanager
110 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]:
111 """Set the MDC key for this context.
113 Parameters
114 ----------
115 mdc : `dict` of `str`, `str`
116 MDC keys to update temporarily.
118 Notes
119 -----
120 Other MDC keys are not modified. The previous values are restored
121 on exit (removing them if the were unset previously).
122 """
123 previous = {}
124 for k, v in mdc.items():
125 previous[k] = cls.MDC(k, v)
127 try:
128 yield
129 finally:
130 for k, v in previous.items():
131 if not v:
132 cls.MDCRemove(k)
133 else:
134 cls.MDC(k, v)
136 @classmethod
137 def add_mdc_log_record_factory(cls) -> None:
138 """Add a log record factory that adds a MDC record to `LogRecord`.
139 """
140 old_factory = logging.getLogRecordFactory()
142 def record_factory(*args: Any, **kwargs: Any) -> LogRecord:
143 record = old_factory(*args, **kwargs)
144 # Make sure we send a copy of the global dict in the record.
145 record.MDC = MDCDict(cls._MDC) # type: ignore
146 return record
148 cls._old_factory = old_factory
149 logging.setLogRecordFactory(record_factory)
151 @classmethod
152 def restore_log_record_factory(cls) -> None:
153 """Restores the log record factory to the original form.
155 Does nothing if there has not been a call to
156 `add_mdc_log_record_factory`.
157 """
158 if cls._old_factory:
159 logging.setLogRecordFactory(cls._old_factory)
162class ButlerLogRecord(BaseModel):
163 """A model representing a `logging.LogRecord`.
165 A `~logging.LogRecord` always uses the current time in its record
166 when recreated and that makes it impossible to use it as a
167 serialization format. Instead have a local representation of a
168 `~logging.LogRecord` that matches Butler needs.
169 """
171 _log_format: ClassVar[str] = _LONG_LOG_FORMAT
173 name: str
174 asctime: datetime.datetime
175 message: str
176 levelno: int
177 levelname: str
178 filename: str
179 pathname: str
180 lineno: int
181 funcName: Optional[str]
182 process: int
183 processName: str
184 exc_info: Optional[str]
185 MDC: Dict[str, str]
187 class Config:
188 """Pydantic model configuration."""
190 allow_mutation = False
192 @classmethod
193 def from_record(cls, record: LogRecord) -> "ButlerLogRecord":
194 """Create a new instance from a `~logging.LogRecord`.
196 Parameters
197 ----------
198 record : `logging.LogRecord`
199 The record from which to extract the relevant information.
200 """
201 # The properties that are one-to-one mapping.
202 simple = ("name", "levelno", "levelname", "filename", "pathname",
203 "lineno", "funcName", "process", "processName")
205 record_dict = {k: getattr(record, k) for k in simple}
207 record_dict["message"] = record.getMessage()
209 # MDC -- ensure the contents are copied to prevent any confusion
210 # over the MDC global being updated later.
211 record_dict["MDC"] = dict(getattr(record, "MDC", {}))
213 # Always use UTC because in distributed systems we can't be sure
214 # what timezone localtime is and it's easier to compare logs if
215 # every system is using the same time.
216 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created,
217 tz=datetime.timezone.utc)
219 # Sometimes exception information is included so must be
220 # extracted.
221 if record.exc_info:
222 etype = record.exc_info[0]
223 evalue = record.exc_info[1]
224 tb = record.exc_info[2]
225 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb))
227 return cls(**record_dict)
229 def format(self, log_format: Optional[str] = None) -> str:
230 """Format this record.
232 Parameters
233 ----------
234 log_format : `str`, optional
235 The format string to use. This string follows the standard
236 f-style use for formatting log messages. If `None`
237 the class default will be used.
239 Returns
240 -------
241 text : `str`
242 The formatted log message.
243 """
244 if log_format is None:
245 log_format = self._log_format
247 as_dict = self.dict()
249 # Special case MDC content. Convert it to an MDCDict
250 # so that missing items do not break formatting.
251 as_dict["MDC"] = MDCDict(as_dict["MDC"])
253 as_dict["asctime"] = as_dict["asctime"].isoformat()
254 formatted = log_format.format(**as_dict)
255 return formatted
257 def __str__(self) -> str:
258 return self.format()
261# The class below can convert LogRecord to ButlerLogRecord if needed.
262Record = Union[LogRecord, ButlerLogRecord]
265# Do not inherit from MutableSequence since mypy insists on the values
266# being Any even though we wish to constrain them to Record.
267class ButlerLogRecords(BaseModel):
268 """Class representing a collection of `ButlerLogRecord`.
269 """
271 __root__: List[ButlerLogRecord]
272 _log_format: Optional[str] = PrivateAttr(None)
274 @classmethod
275 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords":
276 """Create collection from iterable.
278 Parameters
279 ----------
280 records : iterable of `ButlerLogRecord`
281 The records to seed this class with.
282 """
283 return cls(__root__=list(records))
285 @classmethod
286 def from_file(cls, filename: str) -> "ButlerLogRecords":
287 """Read records from file.
289 Parameters
290 ----------
291 filename : `str`
292 Name of file containing the JSON records.
294 Notes
295 -----
296 Works with one-record-per-line format JSON files and a direct
297 serialization of the Pydantic model.
298 """
299 with open(filename, "r") as fd:
300 return cls.from_stream(fd)
302 @staticmethod
303 def _detect_model(startdata: Union[str, bytes]) -> bool:
304 """Given some representative data, determine if this is a serialized
305 model or a streaming format.
307 Parameters
308 ----------
309 startdata : `bytes` or `str`
310 Representative characters or bytes from the start of a serialized
311 collection of log records.
313 Returns
314 -------
315 is_model : `bool`
316 Returns `True` if the data look like a serialized pydantic model.
317 Returns `False` if it looks like a streaming format. Returns
318 `False` also if an empty string is encountered since this
319 is not understood by `ButlerLogRecords.parse_raw()`.
321 Raises
322 ------
323 ValueError
324 Raised if the sentinel doesn't look like either of the supported
325 log record formats.
326 """
327 if not startdata:
328 return False
330 # Allow byte or str streams since pydantic supports either.
331 # We don't want to convert the entire input to unicode unnecessarily.
332 error_type = "str"
333 if isinstance(startdata, bytes):
334 first_char = chr(startdata[0])
335 error_type = "byte"
336 else:
337 first_char = startdata[0]
339 if first_char == "[":
340 # This is an array of records.
341 return True
342 if first_char != "{":
343 # Limit the length of string reported in error message in case
344 # this is an enormous file.
345 max = 32
346 if len(startdata) > max:
347 startdata = f"{startdata[:max]!r}..."
348 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got"
349 f" {first_char!r} from {error_type} content starting with {startdata!r}")
351 # Assume a record per line.
352 return False
354 @classmethod
355 def from_stream(cls, stream: IO) -> "ButlerLogRecords":
356 """Read records from I/O stream.
358 Parameters
359 ----------
360 stream : `typing.IO`
361 Stream from which to read JSON records.
363 Notes
364 -----
365 Works with one-record-per-line format JSON files and a direct
366 serialization of the Pydantic model.
367 """
368 first_line = stream.readline()
370 if not first_line:
371 # Empty file, return zero records.
372 return cls.from_records([])
374 is_model = cls._detect_model(first_line)
376 if is_model:
377 # This is a ButlerLogRecords model serialization so all the
378 # content must be read first.
379 all = first_line + stream.read()
380 return cls.parse_raw(all)
382 # A stream of records with one record per line.
383 records = [ButlerLogRecord.parse_raw(first_line)]
384 for line in stream:
385 line = line.rstrip()
386 if line: # Filter out blank lines.
387 records.append(ButlerLogRecord.parse_raw(line))
389 return cls.from_records(records)
391 @classmethod
392 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords":
393 """Parse raw serialized form and return records.
395 Parameters
396 ----------
397 serialized : `bytes` or `str`
398 Either the serialized JSON of the model created using
399 ``.json()`` or a streaming format of one JSON `ButlerLogRecord`
400 per line. This can also support a zero-length string.
401 """
402 if not serialized:
403 # No records to return
404 return cls.from_records([])
406 # Only send the first character for analysis.
407 is_model = cls._detect_model(serialized)
409 if is_model:
410 return cls.parse_raw(serialized)
412 # Filter out blank lines -- mypy is confused by the newline
413 # argument to isplit() [which can't have two different types
414 # simultaneously] so we have to duplicate some logic.
415 substrings: Iterator[Union[str, bytes]]
416 if isinstance(serialized, str):
417 substrings = isplit(serialized, "\n")
418 elif isinstance(serialized, bytes):
419 substrings = isplit(serialized, b"\n")
420 else:
421 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}")
422 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line]
424 return cls.from_records(records)
426 @property
427 def log_format(self) -> str:
428 if self._log_format is None:
429 return _LONG_LOG_FORMAT
430 return self._log_format
432 # Pydantic does not allow a property setter to be given for
433 # public properties of a model that is not based on a dict.
434 def set_log_format(self, format: Optional[str]) -> Optional[str]:
435 """Set the log format string for these records.
437 Parameters
438 ----------
439 format : `str`, optional
440 The new format string to use for converting this collection
441 of records into a string. If `None` the default format will be
442 used.
444 Returns
445 -------
446 old_format : `str`, optional
447 The previous log format.
448 """
449 previous = self._log_format
450 self._log_format = format
451 return previous
453 def __len__(self) -> int:
454 return len(self.__root__)
456 # The signature does not match the one in BaseModel but that is okay
457 # if __root__ is being used.
458 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
459 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
460 return iter(self.__root__)
462 def __setitem__(self, index: int, value: Record) -> None:
463 self.__root__[index] = self._validate_record(value)
465 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]":
466 # Handles slices and returns a new collection in that
467 # case.
468 item = self.__root__[index]
469 if isinstance(item, list):
470 return type(self)(__root__=item)
471 else:
472 return item
474 def __reversed__(self) -> Iterator[ButlerLogRecord]:
475 return self.__root__.__reversed__()
477 def __delitem__(self, index: Union[slice, int]) -> None:
478 del self.__root__[index]
480 def __str__(self) -> str:
481 # Ensure that every record uses the same format string.
482 return "\n".join(record.format(self.log_format) for record in self.__root__)
484 def _validate_record(self, record: Record) -> ButlerLogRecord:
485 if isinstance(record, ButlerLogRecord):
486 pass
487 elif isinstance(record, LogRecord):
488 record = ButlerLogRecord.from_record(record)
489 else:
490 raise ValueError(f"Can only append item of type {type(record)}")
491 return record
493 def insert(self, index: int, value: Record) -> None:
494 self.__root__.insert(index, self._validate_record(value))
496 def append(self, value: Record) -> None:
497 value = self._validate_record(value)
498 self.__root__.append(value)
500 def clear(self) -> None:
501 self.__root__.clear()
503 def extend(self, records: Iterable[Record]) -> None:
504 self.__root__.extend(self._validate_record(record) for record in records)
506 def pop(self, index: int = -1) -> ButlerLogRecord:
507 return self.__root__.pop(index)
509 def reverse(self) -> None:
510 self.__root__.reverse()
513class ButlerLogRecordHandler(StreamHandler):
514 """Python log handler that accumulates records.
515 """
517 def __init__(self) -> None:
518 super().__init__()
519 self.records = ButlerLogRecords(__root__=[])
521 def emit(self, record: LogRecord) -> None:
522 self.records.append(record)
525class JsonLogFormatter(Formatter):
526 """Format a `LogRecord` in JSON format."""
528 def format(self, record: LogRecord) -> str:
529 butler_record = ButlerLogRecord.from_record(record)
530 return butler_record.json(exclude_unset=True, exclude_defaults=True)