Coverage for python/lsst/daf/butler/logging.py: 41%

237 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

29 

30import datetime 

31import logging 

32import traceback 

33from collections.abc import Callable, Generator, Iterable, Iterator 

34from contextlib import contextmanager 

35from logging import Formatter, LogRecord, StreamHandler 

36from typing import IO, Any, ClassVar, Union, overload 

37 

38from lsst.utils.introspection import get_full_type_name 

39from lsst.utils.iteration import isplit 

40from pydantic import ConfigDict, PrivateAttr 

41 

42from ._compat import PYDANTIC_V2, _BaseModelCompat 

43 

44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

45"""Default format for log records.""" 

46 

47 

48class MDCDict(dict): 

49 """Dictionary for MDC data. 

50 

51 This is internal class used for better formatting of MDC in Python logging 

52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

53 ``__repr__`` method to produce output better suited for logging records. 

54 """ 

55 

56 def __getitem__(self, name: str) -> str: 

57 """Return value for a given key or empty string for missing key.""" 

58 return self.get(name, "") 

59 

60 def __str__(self) -> str: 

61 """Return string representation, strings are interpolated without 

62 quotes. 

63 """ 

64 items = (f"{k}={self[k]}" for k in sorted(self)) 

65 return "{" + ", ".join(items) + "}" 

66 

67 def __repr__(self) -> str: 

68 return str(self) 

69 

70 

71class ButlerMDC: 

72 """Handle setting and unsetting of global MDC records. 

73 

74 The Mapped Diagnostic Context (MDC) can be used to set context 

75 for log messages. 

76 

77 Currently there is one global MDC dict. Per-thread MDC is not 

78 yet supported. 

79 """ 

80 

81 _MDC = MDCDict() 

82 

83 _old_factory: Callable[..., logging.LogRecord] | None = None 

84 """Old log record factory.""" 

85 

86 @classmethod 

87 def MDC(cls, key: str, value: str) -> str: 

88 """Set MDC for this key to the supplied value. 

89 

90 Parameters 

91 ---------- 

92 key : `str` 

93 Key to modify. 

94 value : `str` 

95 New value to use. 

96 

97 Returns 

98 ------- 

99 old : `str` 

100 The previous value for this key. 

101 """ 

102 old_value = cls._MDC[key] 

103 cls._MDC[key] = value 

104 return old_value 

105 

106 @classmethod 

107 def MDCRemove(cls, key: str) -> None: 

108 """Clear the MDC value associated with this key. 

109 

110 Can be called even if the key is not known to MDC. 

111 """ 

112 cls._MDC.pop(key, None) 

113 

114 @classmethod 

115 def clear_mdc(cls) -> None: 

116 """Clear all MDC entries.""" 

117 cls._MDC.clear() 

118 

119 @classmethod 

120 @contextmanager 

121 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

122 """Set the MDC key for this context. 

123 

124 Parameters 

125 ---------- 

126 mdc : `dict` of `str`, `str` 

127 MDC keys to update temporarily. 

128 

129 Notes 

130 ----- 

131 Other MDC keys are not modified. The previous values are restored 

132 on exit (removing them if the were unset previously). 

133 """ 

134 previous = {} 

135 for k, v in mdc.items(): 

136 previous[k] = cls.MDC(k, v) 

137 

138 try: 

139 yield 

140 finally: 

141 for k, v in previous.items(): 

142 if not v: 

143 cls.MDCRemove(k) 

144 else: 

145 cls.MDC(k, v) 

146 

147 @classmethod 

148 def add_mdc_log_record_factory(cls) -> None: 

149 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

150 old_factory = logging.getLogRecordFactory() 

151 

152 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

153 record = old_factory(*args, **kwargs) 

154 # Make sure we send a copy of the global dict in the record. 

155 record.MDC = MDCDict(cls._MDC) 

156 return record 

157 

158 cls._old_factory = old_factory 

159 logging.setLogRecordFactory(record_factory) 

160 

161 @classmethod 

162 def restore_log_record_factory(cls) -> None: 

163 """Restores the log record factory to the original form. 

164 

165 Does nothing if there has not been a call to 

166 `add_mdc_log_record_factory`. 

167 """ 

168 if cls._old_factory: 

169 logging.setLogRecordFactory(cls._old_factory) 

170 

171 

172class ButlerLogRecord(_BaseModelCompat): 

173 """A model representing a `logging.LogRecord`. 

174 

175 A `~logging.LogRecord` always uses the current time in its record 

176 when recreated and that makes it impossible to use it as a 

177 serialization format. Instead have a local representation of a 

178 `~logging.LogRecord` that matches Butler needs. 

179 """ 

180 

181 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

182 

183 name: str 

184 asctime: datetime.datetime 

185 message: str 

186 levelno: int 

187 levelname: str 

188 filename: str 

189 pathname: str 

190 lineno: int 

191 funcName: str | None = None 

192 process: int 

193 processName: str 

194 exc_info: str | None = None 

195 MDC: dict[str, str] 

196 

197 if PYDANTIC_V2: 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true

198 model_config = ConfigDict(frozen=True) 

199 else: 

200 

201 class Config: 

202 """Pydantic model configuration.""" 

203 

204 allow_mutation = False 

205 

206 @classmethod 

207 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

208 """Create a new instance from a `~logging.LogRecord`. 

209 

210 Parameters 

211 ---------- 

212 record : `logging.LogRecord` 

213 The record from which to extract the relevant information. 

214 """ 

215 # The properties that are one-to-one mapping. 

216 simple = ( 

217 "name", 

218 "levelno", 

219 "levelname", 

220 "filename", 

221 "pathname", 

222 "lineno", 

223 "funcName", 

224 "process", 

225 "processName", 

226 ) 

227 

228 record_dict = {k: getattr(record, k) for k in simple} 

229 

230 record_dict["message"] = record.getMessage() 

231 

232 # MDC -- ensure the contents are copied to prevent any confusion 

233 # over the MDC global being updated later. 

234 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

235 

236 # Always use UTC because in distributed systems we can't be sure 

237 # what timezone localtime is and it's easier to compare logs if 

238 # every system is using the same time. 

239 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC) 

240 

241 # Sometimes exception information is included so must be 

242 # extracted. 

243 if record.exc_info: 

244 etype = record.exc_info[0] 

245 evalue = record.exc_info[1] 

246 tb = record.exc_info[2] 

247 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

248 

249 return cls(**record_dict) 

250 

251 def format(self, log_format: str | None = None) -> str: 

252 """Format this record. 

253 

254 Parameters 

255 ---------- 

256 log_format : `str`, optional 

257 The format string to use. This string follows the standard 

258 f-style use for formatting log messages. If `None` 

259 the class default will be used. 

260 

261 Returns 

262 ------- 

263 text : `str` 

264 The formatted log message. 

265 """ 

266 if log_format is None: 

267 log_format = self._log_format 

268 

269 as_dict = self.model_dump() 

270 

271 # Special case MDC content. Convert it to an MDCDict 

272 # so that missing items do not break formatting. 

273 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

274 

275 as_dict["asctime"] = as_dict["asctime"].isoformat() 

276 formatted = log_format.format(**as_dict) 

277 return formatted 

278 

279 def __str__(self) -> str: 

280 return self.format() 

281 

282 

283# The class below can convert LogRecord to ButlerLogRecord if needed. 

284Record = LogRecord | ButlerLogRecord 

285 

286 

287if PYDANTIC_V2: 287 ↛ 288line 287 didn't jump to line 288, because the condition on line 287 was never true

288 from pydantic import RootModel # type: ignore 

289 

290 class _ButlerLogRecords(RootModel): 

291 root: list[ButlerLogRecord] 

292 

293else: 

294 

295 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef] 

296 __root__: list[ButlerLogRecord] 

297 

298 @property 

299 def root(self) -> list[ButlerLogRecord]: 

300 return self.__root__ 

301 

302 

303# Do not inherit from MutableSequence since mypy insists on the values 

304# being Any even though we wish to constrain them to Record. 

305class ButlerLogRecords(_ButlerLogRecords): 

306 """Class representing a collection of `ButlerLogRecord`.""" 

307 

308 _log_format: str | None = PrivateAttr(None) 

309 

310 @classmethod 

311 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

312 """Create collection from iterable. 

313 

314 Parameters 

315 ---------- 

316 records : iterable of `ButlerLogRecord` 

317 The records to seed this class with. 

318 """ 

319 if PYDANTIC_V2: 

320 return cls(list(records)) # type: ignore 

321 else: 

322 return cls(__root__=list(records)) # type: ignore 

323 

324 @classmethod 

325 def from_file(cls, filename: str) -> "ButlerLogRecords": 

326 """Read records from file. 

327 

328 Parameters 

329 ---------- 

330 filename : `str` 

331 Name of file containing the JSON records. 

332 

333 Notes 

334 ----- 

335 Works with one-record-per-line format JSON files and a direct 

336 serialization of the Pydantic model. 

337 """ 

338 with open(filename) as fd: 

339 return cls.from_stream(fd) 

340 

341 @staticmethod 

342 def _detect_model(startdata: str | bytes) -> bool: 

343 """Given some representative data, determine if this is a serialized 

344 model or a streaming format. 

345 

346 Parameters 

347 ---------- 

348 startdata : `bytes` or `str` 

349 Representative characters or bytes from the start of a serialized 

350 collection of log records. 

351 

352 Returns 

353 ------- 

354 is_model : `bool` 

355 Returns `True` if the data look like a serialized pydantic model. 

356 Returns `False` if it looks like a streaming format. Returns 

357 `False` also if an empty string is encountered since this 

358 is not understood by `ButlerLogRecords.model_validate_json()`. 

359 

360 Raises 

361 ------ 

362 ValueError 

363 Raised if the sentinel doesn't look like either of the supported 

364 log record formats. 

365 """ 

366 if not startdata: 

367 return False 

368 

369 # Allow byte or str streams since pydantic supports either. 

370 # We don't want to convert the entire input to unicode unnecessarily. 

371 error_type = "str" 

372 if isinstance(startdata, bytes): 

373 first_char = chr(startdata[0]) 

374 error_type = "byte" 

375 else: 

376 first_char = startdata[0] 

377 

378 if first_char == "[": 

379 # This is an array of records. 

380 return True 

381 if first_char != "{": 

382 # Limit the length of string reported in error message in case 

383 # this is an enormous file. 

384 max = 32 

385 if len(startdata) > max: 

386 startdata = f"{startdata[:max]!r}..." 

387 raise ValueError( 

388 "Unrecognized JSON log format. Expected '{' or '[' but got" 

389 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

390 ) 

391 

392 # Assume a record per line. 

393 return False 

394 

395 @classmethod 

396 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

397 """Read records from I/O stream. 

398 

399 Parameters 

400 ---------- 

401 stream : `typing.IO` 

402 Stream from which to read JSON records. 

403 

404 Notes 

405 ----- 

406 Works with one-record-per-line format JSON files and a direct 

407 serialization of the Pydantic model. 

408 """ 

409 first_line = stream.readline() 

410 

411 if not first_line: 

412 # Empty file, return zero records. 

413 return cls.from_records([]) 

414 

415 is_model = cls._detect_model(first_line) 

416 

417 if is_model: 

418 # This is a ButlerLogRecords model serialization so all the 

419 # content must be read first. 

420 all = first_line + stream.read() 

421 return cls.model_validate_json(all) 

422 

423 # A stream of records with one record per line. 

424 records = [ButlerLogRecord.model_validate_json(first_line)] 

425 for line in stream: 

426 line = line.rstrip() 

427 if line: # Filter out blank lines. 

428 records.append(ButlerLogRecord.model_validate_json(line)) 

429 

430 return cls.from_records(records) 

431 

432 @classmethod 

433 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

434 """Parse raw serialized form and return records. 

435 

436 Parameters 

437 ---------- 

438 serialized : `bytes` or `str` 

439 Either the serialized JSON of the model created using 

440 ``.model_dump_json()`` or a streaming format of one JSON 

441 `ButlerLogRecord` per line. This can also support a zero-length 

442 string. 

443 """ 

444 if not serialized: 

445 # No records to return 

446 return cls.from_records([]) 

447 

448 # Only send the first character for analysis. 

449 is_model = cls._detect_model(serialized) 

450 

451 if is_model: 

452 return cls.model_validate_json(serialized) 

453 

454 # Filter out blank lines -- mypy is confused by the newline 

455 # argument to isplit() [which can't have two different types 

456 # simultaneously] so we have to duplicate some logic. 

457 substrings: Iterator[str | bytes] 

458 if isinstance(serialized, str): 

459 substrings = isplit(serialized, "\n") 

460 elif isinstance(serialized, bytes): 

461 substrings = isplit(serialized, b"\n") 

462 else: 

463 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

464 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

465 

466 return cls.from_records(records) 

467 

468 @property 

469 def log_format(self) -> str: 

470 if self._log_format is None: 

471 return _LONG_LOG_FORMAT 

472 return self._log_format 

473 

474 # Pydantic does not allow a property setter to be given for 

475 # public properties of a model that is not based on a dict. 

476 def set_log_format(self, format: str | None) -> str | None: 

477 """Set the log format string for these records. 

478 

479 Parameters 

480 ---------- 

481 format : `str`, optional 

482 The new format string to use for converting this collection 

483 of records into a string. If `None` the default format will be 

484 used. 

485 

486 Returns 

487 ------- 

488 old_format : `str`, optional 

489 The previous log format. 

490 """ 

491 previous = self._log_format 

492 self._log_format = format 

493 return previous 

494 

495 def __len__(self) -> int: 

496 return len(self.root) 

497 

498 # The signature does not match the one in BaseModel but that is okay 

499 # if __root__ is being used. 

500 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

501 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

502 return iter(self.root) 

503 

504 def __setitem__(self, index: int, value: Record) -> None: 

505 self.root[index] = self._validate_record(value) 

506 

507 @overload 

508 def __getitem__(self, index: int) -> ButlerLogRecord: 

509 ... 

510 

511 @overload 

512 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

513 ... 

514 

515 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

516 # Handles slices and returns a new collection in that 

517 # case. 

518 item = self.root[index] 

519 if isinstance(item, list): 

520 if PYDANTIC_V2: 

521 return type(self)(item) # type: ignore 

522 else: 

523 return type(self)(__root__=item) # type: ignore 

524 else: 

525 return item 

526 

527 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

528 return self.root.__reversed__() 

529 

530 def __delitem__(self, index: slice | int) -> None: 

531 del self.root[index] 

532 

533 def __str__(self) -> str: 

534 # Ensure that every record uses the same format string. 

535 return "\n".join(record.format(self.log_format) for record in self.root) 

536 

537 def _validate_record(self, record: Record) -> ButlerLogRecord: 

538 if isinstance(record, ButlerLogRecord): 

539 pass 

540 elif isinstance(record, LogRecord): 

541 record = ButlerLogRecord.from_record(record) 

542 else: 

543 raise ValueError(f"Can only append item of type {type(record)}") 

544 return record 

545 

546 def insert(self, index: int, value: Record) -> None: 

547 self.root.insert(index, self._validate_record(value)) 

548 

549 def append(self, value: Record) -> None: 

550 value = self._validate_record(value) 

551 self.root.append(value) 

552 

553 def clear(self) -> None: 

554 self.root.clear() 

555 

556 def extend(self, records: Iterable[Record]) -> None: 

557 self.root.extend(self._validate_record(record) for record in records) 

558 

559 def pop(self, index: int = -1) -> ButlerLogRecord: 

560 return self.root.pop(index) 

561 

562 def reverse(self) -> None: 

563 self.root.reverse() 

564 

565 

566class ButlerLogRecordHandler(StreamHandler): 

567 """Python log handler that accumulates records.""" 

568 

569 def __init__(self) -> None: 

570 super().__init__() 

571 if PYDANTIC_V2: 

572 self.records = ButlerLogRecords([]) # type: ignore 

573 else: 

574 self.records = ButlerLogRecords(__root__=[]) # type: ignore 

575 

576 def emit(self, record: LogRecord) -> None: 

577 self.records.append(record) 

578 

579 

580class JsonLogFormatter(Formatter): 

581 """Format a `LogRecord` in JSON format.""" 

582 

583 def format(self, record: LogRecord) -> str: 

584 butler_record = ButlerLogRecord.from_record(record) 

585 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)