Coverage for python/lsst/daf/butler/core/logging.py: 40%

234 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

29 

30import datetime 

31import logging 

32import traceback 

33from collections.abc import Callable, Generator, Iterable, Iterator 

34from contextlib import contextmanager 

35from logging import Formatter, LogRecord, StreamHandler 

36from typing import IO, Any, ClassVar, Union, overload 

37 

38from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

39from lsst.utils.introspection import get_full_type_name 

40from lsst.utils.iteration import isplit 

41from pydantic import ConfigDict, PrivateAttr 

42 

43_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

44"""Default format for log records.""" 

45 

46 

47class MDCDict(dict): 

48 """Dictionary for MDC data. 

49 

50 This is internal class used for better formatting of MDC in Python logging 

51 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

52 ``__repr__`` method to produce output better suited for logging records. 

53 """ 

54 

55 def __getitem__(self, name: str) -> str: 

56 """Return value for a given key or empty string for missing key.""" 

57 return self.get(name, "") 

58 

59 def __str__(self) -> str: 

60 """Return string representation, strings are interpolated without 

61 quotes. 

62 """ 

63 items = (f"{k}={self[k]}" for k in sorted(self)) 

64 return "{" + ", ".join(items) + "}" 

65 

66 def __repr__(self) -> str: 

67 return str(self) 

68 

69 

70class ButlerMDC: 

71 """Handle setting and unsetting of global MDC records. 

72 

73 The Mapped Diagnostic Context (MDC) can be used to set context 

74 for log messages. 

75 

76 Currently there is one global MDC dict. Per-thread MDC is not 

77 yet supported. 

78 """ 

79 

80 _MDC = MDCDict() 

81 

82 _old_factory: Callable[..., logging.LogRecord] | None = None 

83 """Old log record factory.""" 

84 

85 @classmethod 

86 def MDC(cls, key: str, value: str) -> str: 

87 """Set MDC for this key to the supplied value. 

88 

89 Parameters 

90 ---------- 

91 key : `str` 

92 Key to modify. 

93 value : `str` 

94 New value to use. 

95 

96 Returns 

97 ------- 

98 old : `str` 

99 The previous value for this key. 

100 """ 

101 old_value = cls._MDC[key] 

102 cls._MDC[key] = value 

103 return old_value 

104 

105 @classmethod 

106 def MDCRemove(cls, key: str) -> None: 

107 """Clear the MDC value associated with this key. 

108 

109 Can be called even if the key is not known to MDC. 

110 """ 

111 cls._MDC.pop(key, None) 

112 

113 @classmethod 

114 @contextmanager 

115 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

116 """Set the MDC key for this context. 

117 

118 Parameters 

119 ---------- 

120 mdc : `dict` of `str`, `str` 

121 MDC keys to update temporarily. 

122 

123 Notes 

124 ----- 

125 Other MDC keys are not modified. The previous values are restored 

126 on exit (removing them if the were unset previously). 

127 """ 

128 previous = {} 

129 for k, v in mdc.items(): 

130 previous[k] = cls.MDC(k, v) 

131 

132 try: 

133 yield 

134 finally: 

135 for k, v in previous.items(): 

136 if not v: 

137 cls.MDCRemove(k) 

138 else: 

139 cls.MDC(k, v) 

140 

141 @classmethod 

142 def add_mdc_log_record_factory(cls) -> None: 

143 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

144 old_factory = logging.getLogRecordFactory() 

145 

146 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

147 record = old_factory(*args, **kwargs) 

148 # Make sure we send a copy of the global dict in the record. 

149 record.MDC = MDCDict(cls._MDC) 

150 return record 

151 

152 cls._old_factory = old_factory 

153 logging.setLogRecordFactory(record_factory) 

154 

155 @classmethod 

156 def restore_log_record_factory(cls) -> None: 

157 """Restores the log record factory to the original form. 

158 

159 Does nothing if there has not been a call to 

160 `add_mdc_log_record_factory`. 

161 """ 

162 if cls._old_factory: 

163 logging.setLogRecordFactory(cls._old_factory) 

164 

165 

166class ButlerLogRecord(_BaseModelCompat): 

167 """A model representing a `logging.LogRecord`. 

168 

169 A `~logging.LogRecord` always uses the current time in its record 

170 when recreated and that makes it impossible to use it as a 

171 serialization format. Instead have a local representation of a 

172 `~logging.LogRecord` that matches Butler needs. 

173 """ 

174 

175 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

176 

177 name: str 

178 asctime: datetime.datetime 

179 message: str 

180 levelno: int 

181 levelname: str 

182 filename: str 

183 pathname: str 

184 lineno: int 

185 funcName: str | None = None 

186 process: int 

187 processName: str 

188 exc_info: str | None = None 

189 MDC: dict[str, str] 

190 

191 if PYDANTIC_V2: 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 model_config = ConfigDict(frozen=True) 

193 else: 

194 

195 class Config: 

196 """Pydantic model configuration.""" 

197 

198 allow_mutation = False 

199 

200 @classmethod 

201 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

202 """Create a new instance from a `~logging.LogRecord`. 

203 

204 Parameters 

205 ---------- 

206 record : `logging.LogRecord` 

207 The record from which to extract the relevant information. 

208 """ 

209 # The properties that are one-to-one mapping. 

210 simple = ( 

211 "name", 

212 "levelno", 

213 "levelname", 

214 "filename", 

215 "pathname", 

216 "lineno", 

217 "funcName", 

218 "process", 

219 "processName", 

220 ) 

221 

222 record_dict = {k: getattr(record, k) for k in simple} 

223 

224 record_dict["message"] = record.getMessage() 

225 

226 # MDC -- ensure the contents are copied to prevent any confusion 

227 # over the MDC global being updated later. 

228 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

229 

230 # Always use UTC because in distributed systems we can't be sure 

231 # what timezone localtime is and it's easier to compare logs if 

232 # every system is using the same time. 

233 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

234 

235 # Sometimes exception information is included so must be 

236 # extracted. 

237 if record.exc_info: 

238 etype = record.exc_info[0] 

239 evalue = record.exc_info[1] 

240 tb = record.exc_info[2] 

241 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

242 

243 return cls(**record_dict) 

244 

245 def format(self, log_format: str | None = None) -> str: 

246 """Format this record. 

247 

248 Parameters 

249 ---------- 

250 log_format : `str`, optional 

251 The format string to use. This string follows the standard 

252 f-style use for formatting log messages. If `None` 

253 the class default will be used. 

254 

255 Returns 

256 ------- 

257 text : `str` 

258 The formatted log message. 

259 """ 

260 if log_format is None: 

261 log_format = self._log_format 

262 

263 as_dict = self.model_dump() 

264 

265 # Special case MDC content. Convert it to an MDCDict 

266 # so that missing items do not break formatting. 

267 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

268 

269 as_dict["asctime"] = as_dict["asctime"].isoformat() 

270 formatted = log_format.format(**as_dict) 

271 return formatted 

272 

273 def __str__(self) -> str: 

274 return self.format() 

275 

276 

277# The class below can convert LogRecord to ButlerLogRecord if needed. 

278Record = LogRecord | ButlerLogRecord 

279 

280 

281if PYDANTIC_V2: 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true

282 from pydantic import RootModel # type: ignore 

283 

284 class _ButlerLogRecords(RootModel): 

285 root: list[ButlerLogRecord] 

286 

287else: 

288 

289 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef] 

290 __root__: list[ButlerLogRecord] 

291 

292 @property 

293 def root(self) -> list[ButlerLogRecord]: 

294 return self.__root__ 

295 

296 

297# Do not inherit from MutableSequence since mypy insists on the values 

298# being Any even though we wish to constrain them to Record. 

299class ButlerLogRecords(_ButlerLogRecords): 

300 """Class representing a collection of `ButlerLogRecord`.""" 

301 

302 _log_format: str | None = PrivateAttr(None) 

303 

304 @classmethod 

305 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

306 """Create collection from iterable. 

307 

308 Parameters 

309 ---------- 

310 records : iterable of `ButlerLogRecord` 

311 The records to seed this class with. 

312 """ 

313 if PYDANTIC_V2: 

314 return cls(list(records)) # type: ignore 

315 else: 

316 return cls(__root__=list(records)) # type: ignore 

317 

318 @classmethod 

319 def from_file(cls, filename: str) -> "ButlerLogRecords": 

320 """Read records from file. 

321 

322 Parameters 

323 ---------- 

324 filename : `str` 

325 Name of file containing the JSON records. 

326 

327 Notes 

328 ----- 

329 Works with one-record-per-line format JSON files and a direct 

330 serialization of the Pydantic model. 

331 """ 

332 with open(filename) as fd: 

333 return cls.from_stream(fd) 

334 

335 @staticmethod 

336 def _detect_model(startdata: str | bytes) -> bool: 

337 """Given some representative data, determine if this is a serialized 

338 model or a streaming format. 

339 

340 Parameters 

341 ---------- 

342 startdata : `bytes` or `str` 

343 Representative characters or bytes from the start of a serialized 

344 collection of log records. 

345 

346 Returns 

347 ------- 

348 is_model : `bool` 

349 Returns `True` if the data look like a serialized pydantic model. 

350 Returns `False` if it looks like a streaming format. Returns 

351 `False` also if an empty string is encountered since this 

352 is not understood by `ButlerLogRecords.model_validate_json()`. 

353 

354 Raises 

355 ------ 

356 ValueError 

357 Raised if the sentinel doesn't look like either of the supported 

358 log record formats. 

359 """ 

360 if not startdata: 

361 return False 

362 

363 # Allow byte or str streams since pydantic supports either. 

364 # We don't want to convert the entire input to unicode unnecessarily. 

365 error_type = "str" 

366 if isinstance(startdata, bytes): 

367 first_char = chr(startdata[0]) 

368 error_type = "byte" 

369 else: 

370 first_char = startdata[0] 

371 

372 if first_char == "[": 

373 # This is an array of records. 

374 return True 

375 if first_char != "{": 

376 # Limit the length of string reported in error message in case 

377 # this is an enormous file. 

378 max = 32 

379 if len(startdata) > max: 

380 startdata = f"{startdata[:max]!r}..." 

381 raise ValueError( 

382 "Unrecognized JSON log format. Expected '{' or '[' but got" 

383 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

384 ) 

385 

386 # Assume a record per line. 

387 return False 

388 

389 @classmethod 

390 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

391 """Read records from I/O stream. 

392 

393 Parameters 

394 ---------- 

395 stream : `typing.IO` 

396 Stream from which to read JSON records. 

397 

398 Notes 

399 ----- 

400 Works with one-record-per-line format JSON files and a direct 

401 serialization of the Pydantic model. 

402 """ 

403 first_line = stream.readline() 

404 

405 if not first_line: 

406 # Empty file, return zero records. 

407 return cls.from_records([]) 

408 

409 is_model = cls._detect_model(first_line) 

410 

411 if is_model: 

412 # This is a ButlerLogRecords model serialization so all the 

413 # content must be read first. 

414 all = first_line + stream.read() 

415 return cls.model_validate_json(all) 

416 

417 # A stream of records with one record per line. 

418 records = [ButlerLogRecord.model_validate_json(first_line)] 

419 for line in stream: 

420 line = line.rstrip() 

421 if line: # Filter out blank lines. 

422 records.append(ButlerLogRecord.model_validate_json(line)) 

423 

424 return cls.from_records(records) 

425 

426 @classmethod 

427 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

428 """Parse raw serialized form and return records. 

429 

430 Parameters 

431 ---------- 

432 serialized : `bytes` or `str` 

433 Either the serialized JSON of the model created using 

434 ``.model_dump_json()`` or a streaming format of one JSON 

435 `ButlerLogRecord` per line. This can also support a zero-length 

436 string. 

437 """ 

438 if not serialized: 

439 # No records to return 

440 return cls.from_records([]) 

441 

442 # Only send the first character for analysis. 

443 is_model = cls._detect_model(serialized) 

444 

445 if is_model: 

446 return cls.model_validate_json(serialized) 

447 

448 # Filter out blank lines -- mypy is confused by the newline 

449 # argument to isplit() [which can't have two different types 

450 # simultaneously] so we have to duplicate some logic. 

451 substrings: Iterator[str | bytes] 

452 if isinstance(serialized, str): 

453 substrings = isplit(serialized, "\n") 

454 elif isinstance(serialized, bytes): 

455 substrings = isplit(serialized, b"\n") 

456 else: 

457 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

458 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

459 

460 return cls.from_records(records) 

461 

462 @property 

463 def log_format(self) -> str: 

464 if self._log_format is None: 

465 return _LONG_LOG_FORMAT 

466 return self._log_format 

467 

468 # Pydantic does not allow a property setter to be given for 

469 # public properties of a model that is not based on a dict. 

470 def set_log_format(self, format: str | None) -> str | None: 

471 """Set the log format string for these records. 

472 

473 Parameters 

474 ---------- 

475 format : `str`, optional 

476 The new format string to use for converting this collection 

477 of records into a string. If `None` the default format will be 

478 used. 

479 

480 Returns 

481 ------- 

482 old_format : `str`, optional 

483 The previous log format. 

484 """ 

485 previous = self._log_format 

486 self._log_format = format 

487 return previous 

488 

489 def __len__(self) -> int: 

490 return len(self.root) 

491 

492 # The signature does not match the one in BaseModel but that is okay 

493 # if __root__ is being used. 

494 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

495 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

496 return iter(self.root) 

497 

498 def __setitem__(self, index: int, value: Record) -> None: 

499 self.root[index] = self._validate_record(value) 

500 

501 @overload 

502 def __getitem__(self, index: int) -> ButlerLogRecord: 

503 ... 

504 

505 @overload 

506 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

507 ... 

508 

509 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

510 # Handles slices and returns a new collection in that 

511 # case. 

512 item = self.root[index] 

513 if isinstance(item, list): 

514 if PYDANTIC_V2: 

515 return type(self)(item) # type: ignore 

516 else: 

517 return type(self)(__root__=item) # type: ignore 

518 else: 

519 return item 

520 

521 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

522 return self.root.__reversed__() 

523 

524 def __delitem__(self, index: slice | int) -> None: 

525 del self.root[index] 

526 

527 def __str__(self) -> str: 

528 # Ensure that every record uses the same format string. 

529 return "\n".join(record.format(self.log_format) for record in self.root) 

530 

531 def _validate_record(self, record: Record) -> ButlerLogRecord: 

532 if isinstance(record, ButlerLogRecord): 

533 pass 

534 elif isinstance(record, LogRecord): 

535 record = ButlerLogRecord.from_record(record) 

536 else: 

537 raise ValueError(f"Can only append item of type {type(record)}") 

538 return record 

539 

540 def insert(self, index: int, value: Record) -> None: 

541 self.root.insert(index, self._validate_record(value)) 

542 

543 def append(self, value: Record) -> None: 

544 value = self._validate_record(value) 

545 self.root.append(value) 

546 

547 def clear(self) -> None: 

548 self.root.clear() 

549 

550 def extend(self, records: Iterable[Record]) -> None: 

551 self.root.extend(self._validate_record(record) for record in records) 

552 

553 def pop(self, index: int = -1) -> ButlerLogRecord: 

554 return self.root.pop(index) 

555 

556 def reverse(self) -> None: 

557 self.root.reverse() 

558 

559 

560class ButlerLogRecordHandler(StreamHandler): 

561 """Python log handler that accumulates records.""" 

562 

563 def __init__(self) -> None: 

564 super().__init__() 

565 if PYDANTIC_V2: 

566 self.records = ButlerLogRecords([]) # type: ignore 

567 else: 

568 self.records = ButlerLogRecords(__root__=[]) # type: ignore 

569 

570 def emit(self, record: LogRecord) -> None: 

571 self.records.append(record) 

572 

573 

574class JsonLogFormatter(Formatter): 

575 """Format a `LogRecord` in JSON format.""" 

576 

577 def format(self, record: LogRecord) -> str: 

578 butler_record = ButlerLogRecord.from_record(record) 

579 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)