Coverage for python/lsst/daf/butler/logging.py: 41%

220 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

29 

30import datetime 

31import logging 

32import traceback 

33from collections.abc import Callable, Generator, Iterable, Iterator 

34from contextlib import contextmanager 

35from logging import Formatter, LogRecord, StreamHandler 

36from typing import IO, Any, ClassVar, Union, overload 

37 

38from lsst.utils.introspection import get_full_type_name 

39from lsst.utils.iteration import isplit 

40from pydantic import BaseModel, ConfigDict, PrivateAttr, RootModel 

41 

42_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

43"""Default format for log records.""" 

44 

45 

46class MDCDict(dict): 

47 """Dictionary for MDC data. 

48 

49 This is internal class used for better formatting of MDC in Python logging 

50 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

51 ``__repr__`` method to produce output better suited for logging records. 

52 """ 

53 

54 def __getitem__(self, name: str) -> str: 

55 """Return value for a given key or empty string for missing key.""" 

56 return self.get(name, "") 

57 

58 def __str__(self) -> str: 

59 """Return string representation, strings are interpolated without 

60 quotes. 

61 """ 

62 items = (f"{k}={self[k]}" for k in sorted(self)) 

63 return "{" + ", ".join(items) + "}" 

64 

65 def __repr__(self) -> str: 

66 return str(self) 

67 

68 

69class ButlerMDC: 

70 """Handle setting and unsetting of global MDC records. 

71 

72 The Mapped Diagnostic Context (MDC) can be used to set context 

73 for log messages. 

74 

75 Currently there is one global MDC dict. Per-thread MDC is not 

76 yet supported. 

77 """ 

78 

79 _MDC = MDCDict() 

80 

81 _old_factory: Callable[..., logging.LogRecord] | None = None 

82 """Old log record factory.""" 

83 

84 @classmethod 

85 def MDC(cls, key: str, value: str) -> str: 

86 """Set MDC for this key to the supplied value. 

87 

88 Parameters 

89 ---------- 

90 key : `str` 

91 Key to modify. 

92 value : `str` 

93 New value to use. 

94 

95 Returns 

96 ------- 

97 old : `str` 

98 The previous value for this key. 

99 """ 

100 old_value = cls._MDC[key] 

101 cls._MDC[key] = value 

102 return old_value 

103 

104 @classmethod 

105 def MDCRemove(cls, key: str) -> None: 

106 """Clear the MDC value associated with this key. 

107 

108 Can be called even if the key is not known to MDC. 

109 

110 Parameters 

111 ---------- 

112 key : `str` 

113 Key for which the MDC value should be removed. 

114 """ 

115 cls._MDC.pop(key, None) 

116 

117 @classmethod 

118 def clear_mdc(cls) -> None: 

119 """Clear all MDC entries.""" 

120 cls._MDC.clear() 

121 

122 @classmethod 

123 @contextmanager 

124 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

125 """Set the MDC key for this context. 

126 

127 Parameters 

128 ---------- 

129 mdc : `dict` of `str`, `str` 

130 MDC keys to update temporarily. 

131 

132 Notes 

133 ----- 

134 Other MDC keys are not modified. The previous values are restored 

135 on exit (removing them if the were unset previously). 

136 """ 

137 previous = {} 

138 for k, v in mdc.items(): 

139 previous[k] = cls.MDC(k, v) 

140 

141 try: 

142 yield 

143 finally: 

144 for k, v in previous.items(): 

145 if not v: 

146 cls.MDCRemove(k) 

147 else: 

148 cls.MDC(k, v) 

149 

150 @classmethod 

151 def add_mdc_log_record_factory(cls) -> None: 

152 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

153 old_factory = logging.getLogRecordFactory() 

154 

155 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

156 record = old_factory(*args, **kwargs) 

157 # Make sure we send a copy of the global dict in the record. 

158 record.MDC = MDCDict(cls._MDC) 

159 return record 

160 

161 cls._old_factory = old_factory 

162 logging.setLogRecordFactory(record_factory) 

163 

164 @classmethod 

165 def restore_log_record_factory(cls) -> None: 

166 """Restores the log record factory to the original form. 

167 

168 Does nothing if there has not been a call to 

169 `add_mdc_log_record_factory`. 

170 """ 

171 if cls._old_factory: 

172 logging.setLogRecordFactory(cls._old_factory) 

173 

174 

175class ButlerLogRecord(BaseModel): 

176 """A model representing a `logging.LogRecord`. 

177 

178 A `~logging.LogRecord` always uses the current time in its record 

179 when recreated and that makes it impossible to use it as a 

180 serialization format. Instead have a local representation of a 

181 `~logging.LogRecord` that matches Butler needs. 

182 """ 

183 

184 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

185 

186 name: str 

187 asctime: datetime.datetime 

188 message: str 

189 levelno: int 

190 levelname: str 

191 filename: str 

192 pathname: str 

193 lineno: int 

194 funcName: str | None = None 

195 process: int 

196 processName: str 

197 exc_info: str | None = None 

198 MDC: dict[str, str] 

199 

200 model_config = ConfigDict(frozen=True) 

201 

202 @classmethod 

203 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

204 """Create a new instance from a `~logging.LogRecord`. 

205 

206 Parameters 

207 ---------- 

208 record : `logging.LogRecord` 

209 The record from which to extract the relevant information. 

210 """ 

211 # The properties that are one-to-one mapping. 

212 simple = ( 

213 "name", 

214 "levelno", 

215 "levelname", 

216 "filename", 

217 "pathname", 

218 "lineno", 

219 "funcName", 

220 "process", 

221 "processName", 

222 ) 

223 

224 record_dict = {k: getattr(record, k) for k in simple} 

225 

226 record_dict["message"] = record.getMessage() 

227 

228 # MDC -- ensure the contents are copied to prevent any confusion 

229 # over the MDC global being updated later. 

230 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

231 

232 # Always use UTC because in distributed systems we can't be sure 

233 # what timezone localtime is and it's easier to compare logs if 

234 # every system is using the same time. 

235 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.UTC) 

236 

237 # Sometimes exception information is included so must be 

238 # extracted. 

239 if record.exc_info: 

240 etype = record.exc_info[0] 

241 evalue = record.exc_info[1] 

242 tb = record.exc_info[2] 

243 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

244 

245 return cls(**record_dict) 

246 

247 def format(self, log_format: str | None = None) -> str: 

248 """Format this record. 

249 

250 Parameters 

251 ---------- 

252 log_format : `str`, optional 

253 The format string to use. This string follows the standard 

254 f-style use for formatting log messages. If `None` 

255 the class default will be used. 

256 

257 Returns 

258 ------- 

259 text : `str` 

260 The formatted log message. 

261 """ 

262 if log_format is None: 

263 log_format = self._log_format 

264 

265 as_dict = self.model_dump() 

266 

267 # Special case MDC content. Convert it to an MDCDict 

268 # so that missing items do not break formatting. 

269 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

270 

271 as_dict["asctime"] = as_dict["asctime"].isoformat() 

272 formatted = log_format.format(**as_dict) 

273 return formatted 

274 

275 def __str__(self) -> str: 

276 return self.format() 

277 

278 

279# The class below can convert LogRecord to ButlerLogRecord if needed. 

280Record = LogRecord | ButlerLogRecord 

281 

282 

283class _ButlerLogRecords(RootModel): 

284 root: list[ButlerLogRecord] 

285 

286 

287# Do not inherit from MutableSequence since mypy insists on the values 

288# being Any even though we wish to constrain them to Record. 

289class ButlerLogRecords(_ButlerLogRecords): 

290 """Class representing a collection of `ButlerLogRecord`.""" 

291 

292 _log_format: str | None = PrivateAttr(None) 

293 

294 @classmethod 

295 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

296 """Create collection from iterable. 

297 

298 Parameters 

299 ---------- 

300 records : iterable of `ButlerLogRecord` 

301 The records to seed this class with. 

302 """ 

303 return cls.model_construct(root=list(records)) 

304 

305 @classmethod 

306 def from_file(cls, filename: str) -> "ButlerLogRecords": 

307 """Read records from file. 

308 

309 Parameters 

310 ---------- 

311 filename : `str` 

312 Name of file containing the JSON records. 

313 

314 Notes 

315 ----- 

316 Works with one-record-per-line format JSON files and a direct 

317 serialization of the Pydantic model. 

318 """ 

319 with open(filename) as fd: 

320 return cls.from_stream(fd) 

321 

322 @staticmethod 

323 def _detect_model(startdata: str | bytes) -> bool: 

324 """Given some representative data, determine if this is a serialized 

325 model or a streaming format. 

326 

327 Parameters 

328 ---------- 

329 startdata : `bytes` or `str` 

330 Representative characters or bytes from the start of a serialized 

331 collection of log records. 

332 

333 Returns 

334 ------- 

335 is_model : `bool` 

336 Returns `True` if the data look like a serialized pydantic model. 

337 Returns `False` if it looks like a streaming format. Returns 

338 `False` also if an empty string is encountered since this 

339 is not understood by `ButlerLogRecords.model_validate_json()`. 

340 

341 Raises 

342 ------ 

343 ValueError 

344 Raised if the sentinel doesn't look like either of the supported 

345 log record formats. 

346 """ 

347 if not startdata: 

348 return False 

349 

350 # Allow byte or str streams since pydantic supports either. 

351 # We don't want to convert the entire input to unicode unnecessarily. 

352 error_type = "str" 

353 if isinstance(startdata, bytes): 

354 first_char = chr(startdata[0]) 

355 error_type = "byte" 

356 else: 

357 first_char = startdata[0] 

358 

359 if first_char == "[": 

360 # This is an array of records. 

361 return True 

362 if first_char != "{": 

363 # Limit the length of string reported in error message in case 

364 # this is an enormous file. 

365 max = 32 

366 if len(startdata) > max: 

367 startdata = f"{startdata[:max]!r}..." 

368 raise ValueError( 

369 "Unrecognized JSON log format. Expected '{' or '[' but got" 

370 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

371 ) 

372 

373 # Assume a record per line. 

374 return False 

375 

376 @classmethod 

377 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

378 """Read records from I/O stream. 

379 

380 Parameters 

381 ---------- 

382 stream : `typing.IO` 

383 Stream from which to read JSON records. 

384 

385 Notes 

386 ----- 

387 Works with one-record-per-line format JSON files and a direct 

388 serialization of the Pydantic model. 

389 """ 

390 first_line = stream.readline() 

391 

392 if not first_line: 

393 # Empty file, return zero records. 

394 return cls.from_records([]) 

395 

396 is_model = cls._detect_model(first_line) 

397 

398 if is_model: 

399 # This is a ButlerLogRecords model serialization so all the 

400 # content must be read first. 

401 all = first_line + stream.read() 

402 return cls.model_validate_json(all) 

403 

404 # A stream of records with one record per line. 

405 records = [ButlerLogRecord.model_validate_json(first_line)] 

406 for line in stream: 

407 line = line.rstrip() 

408 if line: # Filter out blank lines. 

409 records.append(ButlerLogRecord.model_validate_json(line)) 

410 

411 return cls.from_records(records) 

412 

413 @classmethod 

414 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

415 """Parse raw serialized form and return records. 

416 

417 Parameters 

418 ---------- 

419 serialized : `bytes` or `str` 

420 Either the serialized JSON of the model created using 

421 ``.model_dump_json()`` or a streaming format of one JSON 

422 `ButlerLogRecord` per line. This can also support a zero-length 

423 string. 

424 """ 

425 if not serialized: 

426 # No records to return 

427 return cls.from_records([]) 

428 

429 # Only send the first character for analysis. 

430 is_model = cls._detect_model(serialized) 

431 

432 if is_model: 

433 return cls.model_validate_json(serialized) 

434 

435 # Filter out blank lines -- mypy is confused by the newline 

436 # argument to isplit() [which can't have two different types 

437 # simultaneously] so we have to duplicate some logic. 

438 substrings: Iterator[str | bytes] 

439 if isinstance(serialized, str): 

440 substrings = isplit(serialized, "\n") 

441 elif isinstance(serialized, bytes): 

442 substrings = isplit(serialized, b"\n") 

443 else: 

444 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

445 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

446 

447 return cls.from_records(records) 

448 

449 @property 

450 def log_format(self) -> str: 

451 if self._log_format is None: 

452 return _LONG_LOG_FORMAT 

453 return self._log_format 

454 

455 # Pydantic does not allow a property setter to be given for 

456 # public properties of a model that is not based on a dict. 

457 def set_log_format(self, format: str | None) -> str | None: 

458 """Set the log format string for these records. 

459 

460 Parameters 

461 ---------- 

462 format : `str`, optional 

463 The new format string to use for converting this collection 

464 of records into a string. If `None` the default format will be 

465 used. 

466 

467 Returns 

468 ------- 

469 old_format : `str`, optional 

470 The previous log format. 

471 """ 

472 previous = self._log_format 

473 self._log_format = format 

474 return previous 

475 

476 def __len__(self) -> int: 

477 return len(self.root) 

478 

479 # The signature does not match the one in BaseModel but that is okay 

480 # if __root__ is being used. 

481 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

482 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

483 return iter(self.root) 

484 

485 def __setitem__(self, index: int, value: Record) -> None: 

486 self.root[index] = self._validate_record(value) 

487 

488 @overload 

489 def __getitem__(self, index: int) -> ButlerLogRecord: 

490 ... 

491 

492 @overload 

493 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

494 ... 

495 

496 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

497 # Handles slices and returns a new collection in that 

498 # case. 

499 item = self.root[index] 

500 if isinstance(item, list): 

501 return type(self)(item) 

502 else: 

503 return item 

504 

505 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

506 return self.root.__reversed__() 

507 

508 def __delitem__(self, index: slice | int) -> None: 

509 del self.root[index] 

510 

511 def __str__(self) -> str: 

512 # Ensure that every record uses the same format string. 

513 return "\n".join(record.format(self.log_format) for record in self.root) 

514 

515 def _validate_record(self, record: Record) -> ButlerLogRecord: 

516 if isinstance(record, ButlerLogRecord): 

517 pass 

518 elif isinstance(record, LogRecord): 

519 record = ButlerLogRecord.from_record(record) 

520 else: 

521 raise ValueError(f"Can only append item of type {type(record)}") 

522 return record 

523 

524 def insert(self, index: int, value: Record) -> None: 

525 self.root.insert(index, self._validate_record(value)) 

526 

527 def append(self, value: Record) -> None: 

528 value = self._validate_record(value) 

529 self.root.append(value) 

530 

531 def clear(self) -> None: 

532 self.root.clear() 

533 

534 def extend(self, records: Iterable[Record]) -> None: 

535 self.root.extend(self._validate_record(record) for record in records) 

536 

537 def pop(self, index: int = -1) -> ButlerLogRecord: 

538 return self.root.pop(index) 

539 

540 def reverse(self) -> None: 

541 self.root.reverse() 

542 

543 

544class ButlerLogRecordHandler(StreamHandler): 

545 """Python log handler that accumulates records.""" 

546 

547 def __init__(self) -> None: 

548 super().__init__() 

549 self.records = ButlerLogRecords.model_construct(root=[]) 

550 

551 def emit(self, record: LogRecord) -> None: 

552 self.records.append(record) 

553 

554 

555class JsonLogFormatter(Formatter): 

556 """Format a `LogRecord` in JSON format.""" 

557 

558 def format(self, record: LogRecord) -> str: 

559 butler_record = ButlerLogRecord.from_record(record) 

560 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)