Coverage for python/lsst/daf/butler/core/logging.py: 40%

232 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from collections.abc import Callable, Generator, Iterable, Iterator 

28from contextlib import contextmanager 

29from logging import Formatter, LogRecord, StreamHandler 

30from typing import IO, Any, ClassVar, Union, overload 

31 

32from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

33from lsst.utils.introspection import get_full_type_name 

34from lsst.utils.iteration import isplit 

35from pydantic import PrivateAttr 

36 

37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

38"""Default format for log records.""" 

39 

40 

41class MDCDict(dict): 

42 """Dictionary for MDC data. 

43 

44 This is internal class used for better formatting of MDC in Python logging 

45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

46 ``__repr__`` method to produce output better suited for logging records. 

47 """ 

48 

49 def __getitem__(self, name: str) -> str: 

50 """Return value for a given key or empty string for missing key.""" 

51 return self.get(name, "") 

52 

53 def __str__(self) -> str: 

54 """Return string representation, strings are interpolated without 

55 quotes. 

56 """ 

57 items = (f"{k}={self[k]}" for k in sorted(self)) 

58 return "{" + ", ".join(items) + "}" 

59 

60 def __repr__(self) -> str: 

61 return str(self) 

62 

63 

64class ButlerMDC: 

65 """Handle setting and unsetting of global MDC records. 

66 

67 The Mapped Diagnostic Context (MDC) can be used to set context 

68 for log messages. 

69 

70 Currently there is one global MDC dict. Per-thread MDC is not 

71 yet supported. 

72 """ 

73 

74 _MDC = MDCDict() 

75 

76 _old_factory: Callable[..., logging.LogRecord] | None = None 

77 """Old log record factory.""" 

78 

79 @classmethod 

80 def MDC(cls, key: str, value: str) -> str: 

81 """Set MDC for this key to the supplied value. 

82 

83 Parameters 

84 ---------- 

85 key : `str` 

86 Key to modify. 

87 value : `str` 

88 New value to use. 

89 

90 Returns 

91 ------- 

92 old : `str` 

93 The previous value for this key. 

94 """ 

95 old_value = cls._MDC[key] 

96 cls._MDC[key] = value 

97 return old_value 

98 

99 @classmethod 

100 def MDCRemove(cls, key: str) -> None: 

101 """Clear the MDC value associated with this key. 

102 

103 Can be called even if the key is not known to MDC. 

104 """ 

105 cls._MDC.pop(key, None) 

106 

107 @classmethod 

108 @contextmanager 

109 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

110 """Set the MDC key for this context. 

111 

112 Parameters 

113 ---------- 

114 mdc : `dict` of `str`, `str` 

115 MDC keys to update temporarily. 

116 

117 Notes 

118 ----- 

119 Other MDC keys are not modified. The previous values are restored 

120 on exit (removing them if the were unset previously). 

121 """ 

122 previous = {} 

123 for k, v in mdc.items(): 

124 previous[k] = cls.MDC(k, v) 

125 

126 try: 

127 yield 

128 finally: 

129 for k, v in previous.items(): 

130 if not v: 

131 cls.MDCRemove(k) 

132 else: 

133 cls.MDC(k, v) 

134 

135 @classmethod 

136 def add_mdc_log_record_factory(cls) -> None: 

137 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

138 old_factory = logging.getLogRecordFactory() 

139 

140 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

141 record = old_factory(*args, **kwargs) 

142 # Make sure we send a copy of the global dict in the record. 

143 record.MDC = MDCDict(cls._MDC) 

144 return record 

145 

146 cls._old_factory = old_factory 

147 logging.setLogRecordFactory(record_factory) 

148 

149 @classmethod 

150 def restore_log_record_factory(cls) -> None: 

151 """Restores the log record factory to the original form. 

152 

153 Does nothing if there has not been a call to 

154 `add_mdc_log_record_factory`. 

155 """ 

156 if cls._old_factory: 

157 logging.setLogRecordFactory(cls._old_factory) 

158 

159 

160class ButlerLogRecord(_BaseModelCompat): 

161 """A model representing a `logging.LogRecord`. 

162 

163 A `~logging.LogRecord` always uses the current time in its record 

164 when recreated and that makes it impossible to use it as a 

165 serialization format. Instead have a local representation of a 

166 `~logging.LogRecord` that matches Butler needs. 

167 """ 

168 

169 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

170 

171 name: str 

172 asctime: datetime.datetime 

173 message: str 

174 levelno: int 

175 levelname: str 

176 filename: str 

177 pathname: str 

178 lineno: int 

179 funcName: str | None = None 

180 process: int 

181 processName: str 

182 exc_info: str | None = None 

183 MDC: dict[str, str] 

184 

185 class Config: 

186 """Pydantic model configuration.""" 

187 

188 allow_mutation = False 

189 

190 @classmethod 

191 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

192 """Create a new instance from a `~logging.LogRecord`. 

193 

194 Parameters 

195 ---------- 

196 record : `logging.LogRecord` 

197 The record from which to extract the relevant information. 

198 """ 

199 # The properties that are one-to-one mapping. 

200 simple = ( 

201 "name", 

202 "levelno", 

203 "levelname", 

204 "filename", 

205 "pathname", 

206 "lineno", 

207 "funcName", 

208 "process", 

209 "processName", 

210 ) 

211 

212 record_dict = {k: getattr(record, k) for k in simple} 

213 

214 record_dict["message"] = record.getMessage() 

215 

216 # MDC -- ensure the contents are copied to prevent any confusion 

217 # over the MDC global being updated later. 

218 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

219 

220 # Always use UTC because in distributed systems we can't be sure 

221 # what timezone localtime is and it's easier to compare logs if 

222 # every system is using the same time. 

223 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

224 

225 # Sometimes exception information is included so must be 

226 # extracted. 

227 if record.exc_info: 

228 etype = record.exc_info[0] 

229 evalue = record.exc_info[1] 

230 tb = record.exc_info[2] 

231 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

232 

233 return cls(**record_dict) 

234 

235 def format(self, log_format: str | None = None) -> str: 

236 """Format this record. 

237 

238 Parameters 

239 ---------- 

240 log_format : `str`, optional 

241 The format string to use. This string follows the standard 

242 f-style use for formatting log messages. If `None` 

243 the class default will be used. 

244 

245 Returns 

246 ------- 

247 text : `str` 

248 The formatted log message. 

249 """ 

250 if log_format is None: 

251 log_format = self._log_format 

252 

253 as_dict = self.dict() 

254 

255 # Special case MDC content. Convert it to an MDCDict 

256 # so that missing items do not break formatting. 

257 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

258 

259 as_dict["asctime"] = as_dict["asctime"].isoformat() 

260 formatted = log_format.format(**as_dict) 

261 return formatted 

262 

263 def __str__(self) -> str: 

264 return self.format() 

265 

266 

267# The class below can convert LogRecord to ButlerLogRecord if needed. 

268Record = LogRecord | ButlerLogRecord 

269 

270 

271if PYDANTIC_V2: 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true

272 from pydantic import RootModel # type: ignore 

273 

274 class _ButlerLogRecords(RootModel): 

275 root: list[ButlerLogRecord] 

276 

277else: 

278 

279 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef] 

280 __root__: list[ButlerLogRecord] 

281 

282 @property 

283 def root(self) -> list[ButlerLogRecord]: 

284 return self.__root__ 

285 

286 

287# Do not inherit from MutableSequence since mypy insists on the values 

288# being Any even though we wish to constrain them to Record. 

289class ButlerLogRecords(_ButlerLogRecords): 

290 """Class representing a collection of `ButlerLogRecord`.""" 

291 

292 _log_format: str | None = PrivateAttr(None) 

293 

294 @classmethod 

295 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

296 """Create collection from iterable. 

297 

298 Parameters 

299 ---------- 

300 records : iterable of `ButlerLogRecord` 

301 The records to seed this class with. 

302 """ 

303 if PYDANTIC_V2: 

304 return cls(list(records)) # type: ignore 

305 else: 

306 return cls(__root__=list(records)) # type: ignore 

307 

308 @classmethod 

309 def from_file(cls, filename: str) -> "ButlerLogRecords": 

310 """Read records from file. 

311 

312 Parameters 

313 ---------- 

314 filename : `str` 

315 Name of file containing the JSON records. 

316 

317 Notes 

318 ----- 

319 Works with one-record-per-line format JSON files and a direct 

320 serialization of the Pydantic model. 

321 """ 

322 with open(filename) as fd: 

323 return cls.from_stream(fd) 

324 

325 @staticmethod 

326 def _detect_model(startdata: str | bytes) -> bool: 

327 """Given some representative data, determine if this is a serialized 

328 model or a streaming format. 

329 

330 Parameters 

331 ---------- 

332 startdata : `bytes` or `str` 

333 Representative characters or bytes from the start of a serialized 

334 collection of log records. 

335 

336 Returns 

337 ------- 

338 is_model : `bool` 

339 Returns `True` if the data look like a serialized pydantic model. 

340 Returns `False` if it looks like a streaming format. Returns 

341 `False` also if an empty string is encountered since this 

342 is not understood by `ButlerLogRecords.parse_raw()`. 

343 

344 Raises 

345 ------ 

346 ValueError 

347 Raised if the sentinel doesn't look like either of the supported 

348 log record formats. 

349 """ 

350 if not startdata: 

351 return False 

352 

353 # Allow byte or str streams since pydantic supports either. 

354 # We don't want to convert the entire input to unicode unnecessarily. 

355 error_type = "str" 

356 if isinstance(startdata, bytes): 

357 first_char = chr(startdata[0]) 

358 error_type = "byte" 

359 else: 

360 first_char = startdata[0] 

361 

362 if first_char == "[": 

363 # This is an array of records. 

364 return True 

365 if first_char != "{": 

366 # Limit the length of string reported in error message in case 

367 # this is an enormous file. 

368 max = 32 

369 if len(startdata) > max: 

370 startdata = f"{startdata[:max]!r}..." 

371 raise ValueError( 

372 "Unrecognized JSON log format. Expected '{' or '[' but got" 

373 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

374 ) 

375 

376 # Assume a record per line. 

377 return False 

378 

379 @classmethod 

380 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

381 """Read records from I/O stream. 

382 

383 Parameters 

384 ---------- 

385 stream : `typing.IO` 

386 Stream from which to read JSON records. 

387 

388 Notes 

389 ----- 

390 Works with one-record-per-line format JSON files and a direct 

391 serialization of the Pydantic model. 

392 """ 

393 first_line = stream.readline() 

394 

395 if not first_line: 

396 # Empty file, return zero records. 

397 return cls.from_records([]) 

398 

399 is_model = cls._detect_model(first_line) 

400 

401 if is_model: 

402 # This is a ButlerLogRecords model serialization so all the 

403 # content must be read first. 

404 all = first_line + stream.read() 

405 return cls.parse_raw(all) 

406 

407 # A stream of records with one record per line. 

408 records = [ButlerLogRecord.parse_raw(first_line)] 

409 for line in stream: 

410 line = line.rstrip() 

411 if line: # Filter out blank lines. 

412 records.append(ButlerLogRecord.parse_raw(line)) 

413 

414 return cls.from_records(records) 

415 

416 @classmethod 

417 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

418 """Parse raw serialized form and return records. 

419 

420 Parameters 

421 ---------- 

422 serialized : `bytes` or `str` 

423 Either the serialized JSON of the model created using 

424 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

425 per line. This can also support a zero-length string. 

426 """ 

427 if not serialized: 

428 # No records to return 

429 return cls.from_records([]) 

430 

431 # Only send the first character for analysis. 

432 is_model = cls._detect_model(serialized) 

433 

434 if is_model: 

435 return cls.parse_raw(serialized) 

436 

437 # Filter out blank lines -- mypy is confused by the newline 

438 # argument to isplit() [which can't have two different types 

439 # simultaneously] so we have to duplicate some logic. 

440 substrings: Iterator[str | bytes] 

441 if isinstance(serialized, str): 

442 substrings = isplit(serialized, "\n") 

443 elif isinstance(serialized, bytes): 

444 substrings = isplit(serialized, b"\n") 

445 else: 

446 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

447 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

448 

449 return cls.from_records(records) 

450 

451 @property 

452 def log_format(self) -> str: 

453 if self._log_format is None: 

454 return _LONG_LOG_FORMAT 

455 return self._log_format 

456 

457 # Pydantic does not allow a property setter to be given for 

458 # public properties of a model that is not based on a dict. 

459 def set_log_format(self, format: str | None) -> str | None: 

460 """Set the log format string for these records. 

461 

462 Parameters 

463 ---------- 

464 format : `str`, optional 

465 The new format string to use for converting this collection 

466 of records into a string. If `None` the default format will be 

467 used. 

468 

469 Returns 

470 ------- 

471 old_format : `str`, optional 

472 The previous log format. 

473 """ 

474 previous = self._log_format 

475 self._log_format = format 

476 return previous 

477 

478 def __len__(self) -> int: 

479 return len(self.root) 

480 

481 # The signature does not match the one in BaseModel but that is okay 

482 # if __root__ is being used. 

483 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

484 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

485 return iter(self.root) 

486 

487 def __setitem__(self, index: int, value: Record) -> None: 

488 self.root[index] = self._validate_record(value) 

489 

490 @overload 

491 def __getitem__(self, index: int) -> ButlerLogRecord: 

492 ... 

493 

494 @overload 

495 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

496 ... 

497 

498 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

499 # Handles slices and returns a new collection in that 

500 # case. 

501 item = self.root[index] 

502 if isinstance(item, list): 

503 if PYDANTIC_V2: 

504 return type(self)(item) # type: ignore 

505 else: 

506 return type(self)(__root__=item) # type: ignore 

507 else: 

508 return item 

509 

510 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

511 return self.root.__reversed__() 

512 

513 def __delitem__(self, index: slice | int) -> None: 

514 del self.root[index] 

515 

516 def __str__(self) -> str: 

517 # Ensure that every record uses the same format string. 

518 return "\n".join(record.format(self.log_format) for record in self.root) 

519 

520 def _validate_record(self, record: Record) -> ButlerLogRecord: 

521 if isinstance(record, ButlerLogRecord): 

522 pass 

523 elif isinstance(record, LogRecord): 

524 record = ButlerLogRecord.from_record(record) 

525 else: 

526 raise ValueError(f"Can only append item of type {type(record)}") 

527 return record 

528 

529 def insert(self, index: int, value: Record) -> None: 

530 self.root.insert(index, self._validate_record(value)) 

531 

532 def append(self, value: Record) -> None: 

533 value = self._validate_record(value) 

534 self.root.append(value) 

535 

536 def clear(self) -> None: 

537 self.root.clear() 

538 

539 def extend(self, records: Iterable[Record]) -> None: 

540 self.root.extend(self._validate_record(record) for record in records) 

541 

542 def pop(self, index: int = -1) -> ButlerLogRecord: 

543 return self.root.pop(index) 

544 

545 def reverse(self) -> None: 

546 self.root.reverse() 

547 

548 

549class ButlerLogRecordHandler(StreamHandler): 

550 """Python log handler that accumulates records.""" 

551 

552 def __init__(self) -> None: 

553 super().__init__() 

554 if PYDANTIC_V2: 

555 self.records = ButlerLogRecords([]) # type: ignore 

556 else: 

557 self.records = ButlerLogRecords(__root__=[]) # type: ignore 

558 

559 def emit(self, record: LogRecord) -> None: 

560 self.records.append(record) 

561 

562 

563class JsonLogFormatter(Formatter): 

564 """Format a `LogRecord` in JSON format.""" 

565 

566 def format(self, record: LogRecord) -> str: 

567 butler_record = ButlerLogRecord.from_record(record) 

568 return butler_record.json(exclude_unset=True, exclude_defaults=True)