Coverage for python/lsst/daf/butler/logging.py: 40%

234 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

29 

30import datetime 

31import logging 

32import traceback 

33from collections.abc import Callable, Generator, Iterable, Iterator 

34from contextlib import contextmanager 

35from logging import Formatter, LogRecord, StreamHandler 

36from typing import IO, Any, ClassVar, Union, overload 

37 

38from lsst.utils.introspection import get_full_type_name 

39from lsst.utils.iteration import isplit 

40from pydantic import ConfigDict, PrivateAttr 

41 

42from ._compat import PYDANTIC_V2, _BaseModelCompat 

43 

44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

45"""Default format for log records.""" 

46 

47 

48class MDCDict(dict): 

49 """Dictionary for MDC data. 

50 

51 This is internal class used for better formatting of MDC in Python logging 

52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

53 ``__repr__`` method to produce output better suited for logging records. 

54 """ 

55 

56 def __getitem__(self, name: str) -> str: 

57 """Return value for a given key or empty string for missing key.""" 

58 return self.get(name, "") 

59 

60 def __str__(self) -> str: 

61 """Return string representation, strings are interpolated without 

62 quotes. 

63 """ 

64 items = (f"{k}={self[k]}" for k in sorted(self)) 

65 return "{" + ", ".join(items) + "}" 

66 

67 def __repr__(self) -> str: 

68 return str(self) 

69 

70 

71class ButlerMDC: 

72 """Handle setting and unsetting of global MDC records. 

73 

74 The Mapped Diagnostic Context (MDC) can be used to set context 

75 for log messages. 

76 

77 Currently there is one global MDC dict. Per-thread MDC is not 

78 yet supported. 

79 """ 

80 

81 _MDC = MDCDict() 

82 

83 _old_factory: Callable[..., logging.LogRecord] | None = None 

84 """Old log record factory.""" 

85 

86 @classmethod 

87 def MDC(cls, key: str, value: str) -> str: 

88 """Set MDC for this key to the supplied value. 

89 

90 Parameters 

91 ---------- 

92 key : `str` 

93 Key to modify. 

94 value : `str` 

95 New value to use. 

96 

97 Returns 

98 ------- 

99 old : `str` 

100 The previous value for this key. 

101 """ 

102 old_value = cls._MDC[key] 

103 cls._MDC[key] = value 

104 return old_value 

105 

106 @classmethod 

107 def MDCRemove(cls, key: str) -> None: 

108 """Clear the MDC value associated with this key. 

109 

110 Can be called even if the key is not known to MDC. 

111 """ 

112 cls._MDC.pop(key, None) 

113 

114 @classmethod 

115 @contextmanager 

116 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

117 """Set the MDC key for this context. 

118 

119 Parameters 

120 ---------- 

121 mdc : `dict` of `str`, `str` 

122 MDC keys to update temporarily. 

123 

124 Notes 

125 ----- 

126 Other MDC keys are not modified. The previous values are restored 

127 on exit (removing them if the were unset previously). 

128 """ 

129 previous = {} 

130 for k, v in mdc.items(): 

131 previous[k] = cls.MDC(k, v) 

132 

133 try: 

134 yield 

135 finally: 

136 for k, v in previous.items(): 

137 if not v: 

138 cls.MDCRemove(k) 

139 else: 

140 cls.MDC(k, v) 

141 

142 @classmethod 

143 def add_mdc_log_record_factory(cls) -> None: 

144 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

145 old_factory = logging.getLogRecordFactory() 

146 

147 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

148 record = old_factory(*args, **kwargs) 

149 # Make sure we send a copy of the global dict in the record. 

150 record.MDC = MDCDict(cls._MDC) 

151 return record 

152 

153 cls._old_factory = old_factory 

154 logging.setLogRecordFactory(record_factory) 

155 

156 @classmethod 

157 def restore_log_record_factory(cls) -> None: 

158 """Restores the log record factory to the original form. 

159 

160 Does nothing if there has not been a call to 

161 `add_mdc_log_record_factory`. 

162 """ 

163 if cls._old_factory: 

164 logging.setLogRecordFactory(cls._old_factory) 

165 

166 

167class ButlerLogRecord(_BaseModelCompat): 

168 """A model representing a `logging.LogRecord`. 

169 

170 A `~logging.LogRecord` always uses the current time in its record 

171 when recreated and that makes it impossible to use it as a 

172 serialization format. Instead have a local representation of a 

173 `~logging.LogRecord` that matches Butler needs. 

174 """ 

175 

176 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

177 

178 name: str 

179 asctime: datetime.datetime 

180 message: str 

181 levelno: int 

182 levelname: str 

183 filename: str 

184 pathname: str 

185 lineno: int 

186 funcName: str | None = None 

187 process: int 

188 processName: str 

189 exc_info: str | None = None 

190 MDC: dict[str, str] 

191 

192 if PYDANTIC_V2: 192 ↛ 193line 192 didn't jump to line 193, because the condition on line 192 was never true

193 model_config = ConfigDict(frozen=True) 

194 else: 

195 

196 class Config: 

197 """Pydantic model configuration.""" 

198 

199 allow_mutation = False 

200 

201 @classmethod 

202 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

203 """Create a new instance from a `~logging.LogRecord`. 

204 

205 Parameters 

206 ---------- 

207 record : `logging.LogRecord` 

208 The record from which to extract the relevant information. 

209 """ 

210 # The properties that are one-to-one mapping. 

211 simple = ( 

212 "name", 

213 "levelno", 

214 "levelname", 

215 "filename", 

216 "pathname", 

217 "lineno", 

218 "funcName", 

219 "process", 

220 "processName", 

221 ) 

222 

223 record_dict = {k: getattr(record, k) for k in simple} 

224 

225 record_dict["message"] = record.getMessage() 

226 

227 # MDC -- ensure the contents are copied to prevent any confusion 

228 # over the MDC global being updated later. 

229 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

230 

231 # Always use UTC because in distributed systems we can't be sure 

232 # what timezone localtime is and it's easier to compare logs if 

233 # every system is using the same time. 

234 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

235 

236 # Sometimes exception information is included so must be 

237 # extracted. 

238 if record.exc_info: 

239 etype = record.exc_info[0] 

240 evalue = record.exc_info[1] 

241 tb = record.exc_info[2] 

242 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

243 

244 return cls(**record_dict) 

245 

246 def format(self, log_format: str | None = None) -> str: 

247 """Format this record. 

248 

249 Parameters 

250 ---------- 

251 log_format : `str`, optional 

252 The format string to use. This string follows the standard 

253 f-style use for formatting log messages. If `None` 

254 the class default will be used. 

255 

256 Returns 

257 ------- 

258 text : `str` 

259 The formatted log message. 

260 """ 

261 if log_format is None: 

262 log_format = self._log_format 

263 

264 as_dict = self.model_dump() 

265 

266 # Special case MDC content. Convert it to an MDCDict 

267 # so that missing items do not break formatting. 

268 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

269 

270 as_dict["asctime"] = as_dict["asctime"].isoformat() 

271 formatted = log_format.format(**as_dict) 

272 return formatted 

273 

274 def __str__(self) -> str: 

275 return self.format() 

276 

277 

278# The class below can convert LogRecord to ButlerLogRecord if needed. 

279Record = LogRecord | ButlerLogRecord 

280 

281 

282if PYDANTIC_V2: 282 ↛ 283line 282 didn't jump to line 283, because the condition on line 282 was never true

283 from pydantic import RootModel # type: ignore 

284 

285 class _ButlerLogRecords(RootModel): 

286 root: list[ButlerLogRecord] 

287 

288else: 

289 

290 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef] 

291 __root__: list[ButlerLogRecord] 

292 

293 @property 

294 def root(self) -> list[ButlerLogRecord]: 

295 return self.__root__ 

296 

297 

298# Do not inherit from MutableSequence since mypy insists on the values 

299# being Any even though we wish to constrain them to Record. 

300class ButlerLogRecords(_ButlerLogRecords): 

301 """Class representing a collection of `ButlerLogRecord`.""" 

302 

303 _log_format: str | None = PrivateAttr(None) 

304 

305 @classmethod 

306 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

307 """Create collection from iterable. 

308 

309 Parameters 

310 ---------- 

311 records : iterable of `ButlerLogRecord` 

312 The records to seed this class with. 

313 """ 

314 if PYDANTIC_V2: 

315 return cls(list(records)) # type: ignore 

316 else: 

317 return cls(__root__=list(records)) # type: ignore 

318 

319 @classmethod 

320 def from_file(cls, filename: str) -> "ButlerLogRecords": 

321 """Read records from file. 

322 

323 Parameters 

324 ---------- 

325 filename : `str` 

326 Name of file containing the JSON records. 

327 

328 Notes 

329 ----- 

330 Works with one-record-per-line format JSON files and a direct 

331 serialization of the Pydantic model. 

332 """ 

333 with open(filename) as fd: 

334 return cls.from_stream(fd) 

335 

336 @staticmethod 

337 def _detect_model(startdata: str | bytes) -> bool: 

338 """Given some representative data, determine if this is a serialized 

339 model or a streaming format. 

340 

341 Parameters 

342 ---------- 

343 startdata : `bytes` or `str` 

344 Representative characters or bytes from the start of a serialized 

345 collection of log records. 

346 

347 Returns 

348 ------- 

349 is_model : `bool` 

350 Returns `True` if the data look like a serialized pydantic model. 

351 Returns `False` if it looks like a streaming format. Returns 

352 `False` also if an empty string is encountered since this 

353 is not understood by `ButlerLogRecords.model_validate_json()`. 

354 

355 Raises 

356 ------ 

357 ValueError 

358 Raised if the sentinel doesn't look like either of the supported 

359 log record formats. 

360 """ 

361 if not startdata: 

362 return False 

363 

364 # Allow byte or str streams since pydantic supports either. 

365 # We don't want to convert the entire input to unicode unnecessarily. 

366 error_type = "str" 

367 if isinstance(startdata, bytes): 

368 first_char = chr(startdata[0]) 

369 error_type = "byte" 

370 else: 

371 first_char = startdata[0] 

372 

373 if first_char == "[": 

374 # This is an array of records. 

375 return True 

376 if first_char != "{": 

377 # Limit the length of string reported in error message in case 

378 # this is an enormous file. 

379 max = 32 

380 if len(startdata) > max: 

381 startdata = f"{startdata[:max]!r}..." 

382 raise ValueError( 

383 "Unrecognized JSON log format. Expected '{' or '[' but got" 

384 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

385 ) 

386 

387 # Assume a record per line. 

388 return False 

389 

390 @classmethod 

391 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

392 """Read records from I/O stream. 

393 

394 Parameters 

395 ---------- 

396 stream : `typing.IO` 

397 Stream from which to read JSON records. 

398 

399 Notes 

400 ----- 

401 Works with one-record-per-line format JSON files and a direct 

402 serialization of the Pydantic model. 

403 """ 

404 first_line = stream.readline() 

405 

406 if not first_line: 

407 # Empty file, return zero records. 

408 return cls.from_records([]) 

409 

410 is_model = cls._detect_model(first_line) 

411 

412 if is_model: 

413 # This is a ButlerLogRecords model serialization so all the 

414 # content must be read first. 

415 all = first_line + stream.read() 

416 return cls.model_validate_json(all) 

417 

418 # A stream of records with one record per line. 

419 records = [ButlerLogRecord.model_validate_json(first_line)] 

420 for line in stream: 

421 line = line.rstrip() 

422 if line: # Filter out blank lines. 

423 records.append(ButlerLogRecord.model_validate_json(line)) 

424 

425 return cls.from_records(records) 

426 

427 @classmethod 

428 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

429 """Parse raw serialized form and return records. 

430 

431 Parameters 

432 ---------- 

433 serialized : `bytes` or `str` 

434 Either the serialized JSON of the model created using 

435 ``.model_dump_json()`` or a streaming format of one JSON 

436 `ButlerLogRecord` per line. This can also support a zero-length 

437 string. 

438 """ 

439 if not serialized: 

440 # No records to return 

441 return cls.from_records([]) 

442 

443 # Only send the first character for analysis. 

444 is_model = cls._detect_model(serialized) 

445 

446 if is_model: 

447 return cls.model_validate_json(serialized) 

448 

449 # Filter out blank lines -- mypy is confused by the newline 

450 # argument to isplit() [which can't have two different types 

451 # simultaneously] so we have to duplicate some logic. 

452 substrings: Iterator[str | bytes] 

453 if isinstance(serialized, str): 

454 substrings = isplit(serialized, "\n") 

455 elif isinstance(serialized, bytes): 

456 substrings = isplit(serialized, b"\n") 

457 else: 

458 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

459 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

460 

461 return cls.from_records(records) 

462 

463 @property 

464 def log_format(self) -> str: 

465 if self._log_format is None: 

466 return _LONG_LOG_FORMAT 

467 return self._log_format 

468 

469 # Pydantic does not allow a property setter to be given for 

470 # public properties of a model that is not based on a dict. 

471 def set_log_format(self, format: str | None) -> str | None: 

472 """Set the log format string for these records. 

473 

474 Parameters 

475 ---------- 

476 format : `str`, optional 

477 The new format string to use for converting this collection 

478 of records into a string. If `None` the default format will be 

479 used. 

480 

481 Returns 

482 ------- 

483 old_format : `str`, optional 

484 The previous log format. 

485 """ 

486 previous = self._log_format 

487 self._log_format = format 

488 return previous 

489 

490 def __len__(self) -> int: 

491 return len(self.root) 

492 

493 # The signature does not match the one in BaseModel but that is okay 

494 # if __root__ is being used. 

495 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

496 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

497 return iter(self.root) 

498 

499 def __setitem__(self, index: int, value: Record) -> None: 

500 self.root[index] = self._validate_record(value) 

501 

502 @overload 

503 def __getitem__(self, index: int) -> ButlerLogRecord: 

504 ... 

505 

506 @overload 

507 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

508 ... 

509 

510 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

511 # Handles slices and returns a new collection in that 

512 # case. 

513 item = self.root[index] 

514 if isinstance(item, list): 

515 if PYDANTIC_V2: 

516 return type(self)(item) # type: ignore 

517 else: 

518 return type(self)(__root__=item) # type: ignore 

519 else: 

520 return item 

521 

522 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

523 return self.root.__reversed__() 

524 

525 def __delitem__(self, index: slice | int) -> None: 

526 del self.root[index] 

527 

528 def __str__(self) -> str: 

529 # Ensure that every record uses the same format string. 

530 return "\n".join(record.format(self.log_format) for record in self.root) 

531 

532 def _validate_record(self, record: Record) -> ButlerLogRecord: 

533 if isinstance(record, ButlerLogRecord): 

534 pass 

535 elif isinstance(record, LogRecord): 

536 record = ButlerLogRecord.from_record(record) 

537 else: 

538 raise ValueError(f"Can only append item of type {type(record)}") 

539 return record 

540 

541 def insert(self, index: int, value: Record) -> None: 

542 self.root.insert(index, self._validate_record(value)) 

543 

544 def append(self, value: Record) -> None: 

545 value = self._validate_record(value) 

546 self.root.append(value) 

547 

548 def clear(self) -> None: 

549 self.root.clear() 

550 

551 def extend(self, records: Iterable[Record]) -> None: 

552 self.root.extend(self._validate_record(record) for record in records) 

553 

554 def pop(self, index: int = -1) -> ButlerLogRecord: 

555 return self.root.pop(index) 

556 

557 def reverse(self) -> None: 

558 self.root.reverse() 

559 

560 

561class ButlerLogRecordHandler(StreamHandler): 

562 """Python log handler that accumulates records.""" 

563 

564 def __init__(self) -> None: 

565 super().__init__() 

566 if PYDANTIC_V2: 

567 self.records = ButlerLogRecords([]) # type: ignore 

568 else: 

569 self.records = ButlerLogRecords(__root__=[]) # type: ignore 

570 

571 def emit(self, record: LogRecord) -> None: 

572 self.records.append(record) 

573 

574 

575class JsonLogFormatter(Formatter): 

576 """Format a `LogRecord` in JSON format.""" 

577 

578 def format(self, record: LogRecord) -> str: 

579 butler_record = ButlerLogRecord.from_record(record) 

580 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)