Coverage for python/lsst/daf/butler/logging.py: 42%

219 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

31 

32import datetime 

33import logging 

34import traceback 

35from collections.abc import Callable, Generator, Iterable, Iterator 

36from contextlib import contextmanager 

37from logging import Formatter, LogRecord, StreamHandler 

38from typing import IO, Any, ClassVar, overload 

39 

40from lsst.utils.introspection import get_full_type_name 

41from lsst.utils.iteration import isplit 

42from pydantic import BaseModel, ConfigDict, PrivateAttr, RootModel 

43 

44_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

45"""Default format for log records.""" 

46 

47 

48class MDCDict(dict): 

49 """Dictionary for MDC data. 

50 

51 This is internal class used for better formatting of MDC in Python logging 

52 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

53 ``__repr__`` method to produce output better suited for logging records. 

54 """ 

55 

56 def __getitem__(self, name: str) -> str: 

57 """Return value for a given key or empty string for missing key.""" 

58 return self.get(name, "") 

59 

60 def __str__(self) -> str: 

61 """Return string representation, strings are interpolated without 

62 quotes. 

63 """ 

64 items = (f"{k}={self[k]}" for k in sorted(self)) 

65 return "{" + ", ".join(items) + "}" 

66 

67 def __repr__(self) -> str: 

68 return str(self) 

69 

70 

71class ButlerMDC: 

72 """Handle setting and unsetting of global MDC records. 

73 

74 The Mapped Diagnostic Context (MDC) can be used to set context 

75 for log messages. 

76 

77 Currently there is one global MDC dict. Per-thread MDC is not 

78 yet supported. 

79 """ 

80 

81 _MDC = MDCDict() 

82 

83 _old_factory: Callable[..., logging.LogRecord] | None = None 

84 """Old log record factory.""" 

85 

86 @classmethod 

87 def MDC(cls, key: str, value: str) -> str: 

88 """Set MDC for this key to the supplied value. 

89 

90 Parameters 

91 ---------- 

92 key : `str` 

93 Key to modify. 

94 value : `str` 

95 New value to use. 

96 

97 Returns 

98 ------- 

99 old : `str` 

100 The previous value for this key. 

101 """ 

102 old_value = cls._MDC[key] 

103 cls._MDC[key] = value 

104 return old_value 

105 

106 @classmethod 

107 def MDCRemove(cls, key: str) -> None: 

108 """Clear the MDC value associated with this key. 

109 

110 Can be called even if the key is not known to MDC. 

111 

112 Parameters 

113 ---------- 

114 key : `str` 

115 Key for which the MDC value should be removed. 

116 """ 

117 cls._MDC.pop(key, None) 

118 

119 @classmethod 

120 def clear_mdc(cls) -> None: 

121 """Clear all MDC entries.""" 

122 cls._MDC.clear() 

123 

124 @classmethod 

125 @contextmanager 

126 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

127 """Set the MDC key for this context. 

128 

129 Parameters 

130 ---------- 

131 mdc : `dict` of `str`, `str` 

132 MDC keys to update temporarily. 

133 

134 Notes 

135 ----- 

136 Other MDC keys are not modified. The previous values are restored 

137 on exit (removing them if the were unset previously). 

138 """ 

139 previous = {} 

140 for k, v in mdc.items(): 

141 previous[k] = cls.MDC(k, v) 

142 

143 try: 

144 yield 

145 finally: 

146 for k, v in previous.items(): 

147 if not v: 

148 cls.MDCRemove(k) 

149 else: 

150 cls.MDC(k, v) 

151 

152 @classmethod 

153 def add_mdc_log_record_factory(cls) -> None: 

154 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

155 old_factory = logging.getLogRecordFactory() 

156 

157 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

158 record = old_factory(*args, **kwargs) 

159 # Make sure we send a copy of the global dict in the record. 

160 record.MDC = MDCDict(cls._MDC) 

161 return record 

162 

163 cls._old_factory = old_factory 

164 logging.setLogRecordFactory(record_factory) 

165 

166 @classmethod 

167 def restore_log_record_factory(cls) -> None: 

168 """Restores the log record factory to the original form. 

169 

170 Does nothing if there has not been a call to 

171 `add_mdc_log_record_factory`. 

172 """ 

173 if cls._old_factory: 

174 logging.setLogRecordFactory(cls._old_factory) 

175 

176 

177class ButlerLogRecord(BaseModel): 

178 """A model representing a `logging.LogRecord`. 

179 

180 A `~logging.LogRecord` always uses the current time in its record 

181 when recreated and that makes it impossible to use it as a 

182 serialization format. Instead have a local representation of a 

183 `~logging.LogRecord` that matches Butler needs. 

184 """ 

185 

186 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

187 

188 name: str 

189 asctime: datetime.datetime 

190 message: str 

191 levelno: int 

192 levelname: str 

193 filename: str 

194 pathname: str 

195 lineno: int 

196 funcName: str | None = None 

197 process: int 

198 processName: str 

199 exc_info: str | None = None 

200 MDC: dict[str, str] 

201 

202 model_config = ConfigDict(frozen=True) 

203 

204 @classmethod 

205 def from_record(cls, record: LogRecord) -> ButlerLogRecord: 

206 """Create a new instance from a `~logging.LogRecord`. 

207 

208 Parameters 

209 ---------- 

210 record : `logging.LogRecord` 

211 The record from which to extract the relevant information. 

212 """ 

213 # The properties that are one-to-one mapping. 

214 simple = ( 

215 "name", 

216 "levelno", 

217 "levelname", 

218 "filename", 

219 "pathname", 

220 "lineno", 

221 "funcName", 

222 "process", 

223 "processName", 

224 ) 

225 

226 record_dict = {k: getattr(record, k) for k in simple} 

227 

228 record_dict["message"] = record.getMessage() 

229 

230 # MDC -- ensure the contents are copied to prevent any confusion 

231 # over the MDC global being updated later. 

232 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

233 

234 # Always use UTC because in distributed systems we can't be sure 

235 # what timezone localtime is and it's easier to compare logs if 

236 # every system is using the same time. 

237 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC) 

238 

239 # Sometimes exception information is included so must be 

240 # extracted. 

241 if record.exc_info: 

242 etype = record.exc_info[0] 

243 evalue = record.exc_info[1] 

244 tb = record.exc_info[2] 

245 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

246 

247 return cls(**record_dict) 

248 

249 def format(self, log_format: str | None = None) -> str: 

250 """Format this record. 

251 

252 Parameters 

253 ---------- 

254 log_format : `str`, optional 

255 The format string to use. This string follows the standard 

256 f-style use for formatting log messages. If `None` 

257 the class default will be used. 

258 

259 Returns 

260 ------- 

261 text : `str` 

262 The formatted log message. 

263 """ 

264 if log_format is None: 

265 log_format = self._log_format 

266 

267 as_dict = self.model_dump() 

268 

269 # Special case MDC content. Convert it to an MDCDict 

270 # so that missing items do not break formatting. 

271 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

272 

273 as_dict["asctime"] = as_dict["asctime"].isoformat() 

274 formatted = log_format.format(**as_dict) 

275 return formatted 

276 

277 def __str__(self) -> str: 

278 return self.format() 

279 

280 

281# The class below can convert LogRecord to ButlerLogRecord if needed. 

282Record = LogRecord | ButlerLogRecord 

283 

284 

285class _ButlerLogRecords(RootModel): 

286 root: list[ButlerLogRecord] 

287 

288 

289# Do not inherit from MutableSequence since mypy insists on the values 

290# being Any even though we wish to constrain them to Record. 

291class ButlerLogRecords(_ButlerLogRecords): 

292 """Class representing a collection of `ButlerLogRecord`.""" 

293 

294 _log_format: str | None = PrivateAttr(None) 

295 

296 @classmethod 

297 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords: 

298 """Create collection from iterable. 

299 

300 Parameters 

301 ---------- 

302 records : iterable of `ButlerLogRecord` 

303 The records to seed this class with. 

304 """ 

305 return cls.model_construct(root=list(records)) 

306 

307 @classmethod 

308 def from_file(cls, filename: str) -> ButlerLogRecords: 

309 """Read records from file. 

310 

311 Parameters 

312 ---------- 

313 filename : `str` 

314 Name of file containing the JSON records. 

315 

316 Notes 

317 ----- 

318 Works with one-record-per-line format JSON files and a direct 

319 serialization of the Pydantic model. 

320 """ 

321 with open(filename) as fd: 

322 return cls.from_stream(fd) 

323 

324 @staticmethod 

325 def _detect_model(startdata: str | bytes) -> bool: 

326 """Given some representative data, determine if this is a serialized 

327 model or a streaming format. 

328 

329 Parameters 

330 ---------- 

331 startdata : `bytes` or `str` 

332 Representative characters or bytes from the start of a serialized 

333 collection of log records. 

334 

335 Returns 

336 ------- 

337 is_model : `bool` 

338 Returns `True` if the data look like a serialized pydantic model. 

339 Returns `False` if it looks like a streaming format. Returns 

340 `False` also if an empty string is encountered since this 

341 is not understood by `ButlerLogRecords.model_validate_json()`. 

342 

343 Raises 

344 ------ 

345 ValueError 

346 Raised if the sentinel doesn't look like either of the supported 

347 log record formats. 

348 """ 

349 if not startdata: 

350 return False 

351 

352 # Allow byte or str streams since pydantic supports either. 

353 # We don't want to convert the entire input to unicode unnecessarily. 

354 error_type = "str" 

355 if isinstance(startdata, bytes): 

356 first_char = chr(startdata[0]) 

357 error_type = "byte" 

358 else: 

359 first_char = startdata[0] 

360 

361 if first_char == "[": 

362 # This is an array of records. 

363 return True 

364 if first_char != "{": 

365 # Limit the length of string reported in error message in case 

366 # this is an enormous file. 

367 max = 32 

368 if len(startdata) > max: 

369 startdata = f"{startdata[:max]!r}..." 

370 raise ValueError( 

371 "Unrecognized JSON log format. Expected '{' or '[' but got" 

372 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

373 ) 

374 

375 # Assume a record per line. 

376 return False 

377 

378 @classmethod 

379 def from_stream(cls, stream: IO) -> ButlerLogRecords: 

380 """Read records from I/O stream. 

381 

382 Parameters 

383 ---------- 

384 stream : `typing.IO` 

385 Stream from which to read JSON records. 

386 

387 Notes 

388 ----- 

389 Works with one-record-per-line format JSON files and a direct 

390 serialization of the Pydantic model. 

391 """ 

392 first_line = stream.readline() 

393 

394 if not first_line: 

395 # Empty file, return zero records. 

396 return cls.from_records([]) 

397 

398 is_model = cls._detect_model(first_line) 

399 

400 if is_model: 

401 # This is a ButlerLogRecords model serialization so all the 

402 # content must be read first. 

403 all = first_line + stream.read() 

404 return cls.model_validate_json(all) 

405 

406 # A stream of records with one record per line. 

407 records = [ButlerLogRecord.model_validate_json(first_line)] 

408 for line in stream: 

409 line = line.rstrip() 

410 if line: # Filter out blank lines. 

411 records.append(ButlerLogRecord.model_validate_json(line)) 

412 

413 return cls.from_records(records) 

414 

415 @classmethod 

416 def from_raw(cls, serialized: str | bytes) -> ButlerLogRecords: 

417 """Parse raw serialized form and return records. 

418 

419 Parameters 

420 ---------- 

421 serialized : `bytes` or `str` 

422 Either the serialized JSON of the model created using 

423 ``.model_dump_json()`` or a streaming format of one JSON 

424 `ButlerLogRecord` per line. This can also support a zero-length 

425 string. 

426 """ 

427 if not serialized: 

428 # No records to return 

429 return cls.from_records([]) 

430 

431 # Only send the first character for analysis. 

432 is_model = cls._detect_model(serialized) 

433 

434 if is_model: 

435 return cls.model_validate_json(serialized) 

436 

437 # Filter out blank lines -- mypy is confused by the newline 

438 # argument to isplit() [which can't have two different types 

439 # simultaneously] so we have to duplicate some logic. 

440 substrings: Iterator[str | bytes] 

441 if isinstance(serialized, str): 

442 substrings = isplit(serialized, "\n") 

443 elif isinstance(serialized, bytes): 

444 substrings = isplit(serialized, b"\n") 

445 else: 

446 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

447 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

448 

449 return cls.from_records(records) 

450 

451 @property 

452 def log_format(self) -> str: 

453 if self._log_format is None: 

454 return _LONG_LOG_FORMAT 

455 return self._log_format 

456 

457 # Pydantic does not allow a property setter to be given for 

458 # public properties of a model that is not based on a dict. 

459 def set_log_format(self, format: str | None) -> str | None: 

460 """Set the log format string for these records. 

461 

462 Parameters 

463 ---------- 

464 format : `str`, optional 

465 The new format string to use for converting this collection 

466 of records into a string. If `None` the default format will be 

467 used. 

468 

469 Returns 

470 ------- 

471 old_format : `str`, optional 

472 The previous log format. 

473 """ 

474 previous = self._log_format 

475 self._log_format = format 

476 return previous 

477 

478 def __len__(self) -> int: 

479 return len(self.root) 

480 

481 # The signature does not match the one in BaseModel but that is okay 

482 # if __root__ is being used. 

483 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

484 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

485 return iter(self.root) 

486 

487 def __setitem__(self, index: int, value: Record) -> None: 

488 self.root[index] = self._validate_record(value) 

489 

490 @overload 

491 def __getitem__(self, index: int) -> ButlerLogRecord: ... 491 ↛ exitline 491 didn't return from function '__getitem__', because

492 

493 @overload 

494 def __getitem__(self, index: slice) -> ButlerLogRecords: ... 494 ↛ exitline 494 didn't return from function '__getitem__', because

495 

496 def __getitem__(self, index: slice | int) -> ButlerLogRecords | ButlerLogRecord: 

497 # Handles slices and returns a new collection in that 

498 # case. 

499 item = self.root[index] 

500 if isinstance(item, list): 

501 return type(self)(item) 

502 else: 

503 return item 

504 

505 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

506 return self.root.__reversed__() 

507 

508 def __delitem__(self, index: slice | int) -> None: 

509 del self.root[index] 

510 

511 def __str__(self) -> str: 

512 # Ensure that every record uses the same format string. 

513 return "\n".join(record.format(self.log_format) for record in self.root) 

514 

515 def _validate_record(self, record: Record) -> ButlerLogRecord: 

516 if isinstance(record, ButlerLogRecord): 

517 pass 

518 elif isinstance(record, LogRecord): 

519 record = ButlerLogRecord.from_record(record) 

520 else: 

521 raise ValueError(f"Can only append item of type {type(record)}") 

522 return record 

523 

524 def insert(self, index: int, value: Record) -> None: 

525 self.root.insert(index, self._validate_record(value)) 

526 

527 def append(self, value: Record) -> None: 

528 value = self._validate_record(value) 

529 self.root.append(value) 

530 

531 def clear(self) -> None: 

532 self.root.clear() 

533 

534 def extend(self, records: Iterable[Record]) -> None: 

535 self.root.extend(self._validate_record(record) for record in records) 

536 

537 def pop(self, index: int = -1) -> ButlerLogRecord: 

538 return self.root.pop(index) 

539 

540 def reverse(self) -> None: 

541 self.root.reverse() 

542 

543 

544class ButlerLogRecordHandler(StreamHandler): 

545 """Python log handler that accumulates records.""" 

546 

547 def __init__(self) -> None: 

548 super().__init__() 

549 self.records = ButlerLogRecords.model_construct(root=[]) 

550 

551 def emit(self, record: LogRecord) -> None: 

552 self.records.append(record) 

553 

554 

555class JsonLogFormatter(Formatter): 

556 """Format a `LogRecord` in JSON format.""" 

557 

558 def format(self, record: LogRecord) -> str: 

559 butler_record = ButlerLogRecord.from_record(record) 

560 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)