Coverage for python/lsst/daf/butler/core/logging.py: 40%

234 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from collections.abc import Callable, Generator, Iterable, Iterator 

28from contextlib import contextmanager 

29from logging import Formatter, LogRecord, StreamHandler 

30from typing import IO, Any, ClassVar, Union, overload 

31 

32from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

33from lsst.utils.introspection import get_full_type_name 

34from lsst.utils.iteration import isplit 

35from pydantic import ConfigDict, PrivateAttr 

36 

37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

38"""Default format for log records.""" 

39 

40 

41class MDCDict(dict): 

42 """Dictionary for MDC data. 

43 

44 This is internal class used for better formatting of MDC in Python logging 

45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

46 ``__repr__`` method to produce output better suited for logging records. 

47 """ 

48 

49 def __getitem__(self, name: str) -> str: 

50 """Return value for a given key or empty string for missing key.""" 

51 return self.get(name, "") 

52 

53 def __str__(self) -> str: 

54 """Return string representation, strings are interpolated without 

55 quotes. 

56 """ 

57 items = (f"{k}={self[k]}" for k in sorted(self)) 

58 return "{" + ", ".join(items) + "}" 

59 

60 def __repr__(self) -> str: 

61 return str(self) 

62 

63 

64class ButlerMDC: 

65 """Handle setting and unsetting of global MDC records. 

66 

67 The Mapped Diagnostic Context (MDC) can be used to set context 

68 for log messages. 

69 

70 Currently there is one global MDC dict. Per-thread MDC is not 

71 yet supported. 

72 """ 

73 

74 _MDC = MDCDict() 

75 

76 _old_factory: Callable[..., logging.LogRecord] | None = None 

77 """Old log record factory.""" 

78 

79 @classmethod 

80 def MDC(cls, key: str, value: str) -> str: 

81 """Set MDC for this key to the supplied value. 

82 

83 Parameters 

84 ---------- 

85 key : `str` 

86 Key to modify. 

87 value : `str` 

88 New value to use. 

89 

90 Returns 

91 ------- 

92 old : `str` 

93 The previous value for this key. 

94 """ 

95 old_value = cls._MDC[key] 

96 cls._MDC[key] = value 

97 return old_value 

98 

99 @classmethod 

100 def MDCRemove(cls, key: str) -> None: 

101 """Clear the MDC value associated with this key. 

102 

103 Can be called even if the key is not known to MDC. 

104 """ 

105 cls._MDC.pop(key, None) 

106 

107 @classmethod 

108 @contextmanager 

109 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

110 """Set the MDC key for this context. 

111 

112 Parameters 

113 ---------- 

114 mdc : `dict` of `str`, `str` 

115 MDC keys to update temporarily. 

116 

117 Notes 

118 ----- 

119 Other MDC keys are not modified. The previous values are restored 

120 on exit (removing them if the were unset previously). 

121 """ 

122 previous = {} 

123 for k, v in mdc.items(): 

124 previous[k] = cls.MDC(k, v) 

125 

126 try: 

127 yield 

128 finally: 

129 for k, v in previous.items(): 

130 if not v: 

131 cls.MDCRemove(k) 

132 else: 

133 cls.MDC(k, v) 

134 

135 @classmethod 

136 def add_mdc_log_record_factory(cls) -> None: 

137 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

138 old_factory = logging.getLogRecordFactory() 

139 

140 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

141 record = old_factory(*args, **kwargs) 

142 # Make sure we send a copy of the global dict in the record. 

143 record.MDC = MDCDict(cls._MDC) 

144 return record 

145 

146 cls._old_factory = old_factory 

147 logging.setLogRecordFactory(record_factory) 

148 

149 @classmethod 

150 def restore_log_record_factory(cls) -> None: 

151 """Restores the log record factory to the original form. 

152 

153 Does nothing if there has not been a call to 

154 `add_mdc_log_record_factory`. 

155 """ 

156 if cls._old_factory: 

157 logging.setLogRecordFactory(cls._old_factory) 

158 

159 

160class ButlerLogRecord(_BaseModelCompat): 

161 """A model representing a `logging.LogRecord`. 

162 

163 A `~logging.LogRecord` always uses the current time in its record 

164 when recreated and that makes it impossible to use it as a 

165 serialization format. Instead have a local representation of a 

166 `~logging.LogRecord` that matches Butler needs. 

167 """ 

168 

169 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

170 

171 name: str 

172 asctime: datetime.datetime 

173 message: str 

174 levelno: int 

175 levelname: str 

176 filename: str 

177 pathname: str 

178 lineno: int 

179 funcName: str | None = None 

180 process: int 

181 processName: str 

182 exc_info: str | None = None 

183 MDC: dict[str, str] 

184 

185 if PYDANTIC_V2: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true

186 model_config = ConfigDict(frozen=True) 

187 else: 

188 

189 class Config: 

190 """Pydantic model configuration.""" 

191 

192 allow_mutation = False 

193 

194 @classmethod 

195 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

196 """Create a new instance from a `~logging.LogRecord`. 

197 

198 Parameters 

199 ---------- 

200 record : `logging.LogRecord` 

201 The record from which to extract the relevant information. 

202 """ 

203 # The properties that are one-to-one mapping. 

204 simple = ( 

205 "name", 

206 "levelno", 

207 "levelname", 

208 "filename", 

209 "pathname", 

210 "lineno", 

211 "funcName", 

212 "process", 

213 "processName", 

214 ) 

215 

216 record_dict = {k: getattr(record, k) for k in simple} 

217 

218 record_dict["message"] = record.getMessage() 

219 

220 # MDC -- ensure the contents are copied to prevent any confusion 

221 # over the MDC global being updated later. 

222 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

223 

224 # Always use UTC because in distributed systems we can't be sure 

225 # what timezone localtime is and it's easier to compare logs if 

226 # every system is using the same time. 

227 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

228 

229 # Sometimes exception information is included so must be 

230 # extracted. 

231 if record.exc_info: 

232 etype = record.exc_info[0] 

233 evalue = record.exc_info[1] 

234 tb = record.exc_info[2] 

235 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

236 

237 return cls(**record_dict) 

238 

239 def format(self, log_format: str | None = None) -> str: 

240 """Format this record. 

241 

242 Parameters 

243 ---------- 

244 log_format : `str`, optional 

245 The format string to use. This string follows the standard 

246 f-style use for formatting log messages. If `None` 

247 the class default will be used. 

248 

249 Returns 

250 ------- 

251 text : `str` 

252 The formatted log message. 

253 """ 

254 if log_format is None: 

255 log_format = self._log_format 

256 

257 as_dict = self.model_dump() 

258 

259 # Special case MDC content. Convert it to an MDCDict 

260 # so that missing items do not break formatting. 

261 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

262 

263 as_dict["asctime"] = as_dict["asctime"].isoformat() 

264 formatted = log_format.format(**as_dict) 

265 return formatted 

266 

267 def __str__(self) -> str: 

268 return self.format() 

269 

270 

271# The class below can convert LogRecord to ButlerLogRecord if needed. 

272Record = LogRecord | ButlerLogRecord 

273 

274 

275if PYDANTIC_V2: 275 ↛ 276line 275 didn't jump to line 276, because the condition on line 275 was never true

276 from pydantic import RootModel # type: ignore 

277 

278 class _ButlerLogRecords(RootModel): 

279 root: list[ButlerLogRecord] 

280 

281else: 

282 

283 class _ButlerLogRecords(_BaseModelCompat): # type:ignore[no-redef] 

284 __root__: list[ButlerLogRecord] 

285 

286 @property 

287 def root(self) -> list[ButlerLogRecord]: 

288 return self.__root__ 

289 

290 

291# Do not inherit from MutableSequence since mypy insists on the values 

292# being Any even though we wish to constrain them to Record. 

293class ButlerLogRecords(_ButlerLogRecords): 

294 """Class representing a collection of `ButlerLogRecord`.""" 

295 

296 _log_format: str | None = PrivateAttr(None) 

297 

298 @classmethod 

299 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

300 """Create collection from iterable. 

301 

302 Parameters 

303 ---------- 

304 records : iterable of `ButlerLogRecord` 

305 The records to seed this class with. 

306 """ 

307 if PYDANTIC_V2: 

308 return cls(list(records)) # type: ignore 

309 else: 

310 return cls(__root__=list(records)) # type: ignore 

311 

312 @classmethod 

313 def from_file(cls, filename: str) -> "ButlerLogRecords": 

314 """Read records from file. 

315 

316 Parameters 

317 ---------- 

318 filename : `str` 

319 Name of file containing the JSON records. 

320 

321 Notes 

322 ----- 

323 Works with one-record-per-line format JSON files and a direct 

324 serialization of the Pydantic model. 

325 """ 

326 with open(filename) as fd: 

327 return cls.from_stream(fd) 

328 

329 @staticmethod 

330 def _detect_model(startdata: str | bytes) -> bool: 

331 """Given some representative data, determine if this is a serialized 

332 model or a streaming format. 

333 

334 Parameters 

335 ---------- 

336 startdata : `bytes` or `str` 

337 Representative characters or bytes from the start of a serialized 

338 collection of log records. 

339 

340 Returns 

341 ------- 

342 is_model : `bool` 

343 Returns `True` if the data look like a serialized pydantic model. 

344 Returns `False` if it looks like a streaming format. Returns 

345 `False` also if an empty string is encountered since this 

346 is not understood by `ButlerLogRecords.model_validate_json()`. 

347 

348 Raises 

349 ------ 

350 ValueError 

351 Raised if the sentinel doesn't look like either of the supported 

352 log record formats. 

353 """ 

354 if not startdata: 

355 return False 

356 

357 # Allow byte or str streams since pydantic supports either. 

358 # We don't want to convert the entire input to unicode unnecessarily. 

359 error_type = "str" 

360 if isinstance(startdata, bytes): 

361 first_char = chr(startdata[0]) 

362 error_type = "byte" 

363 else: 

364 first_char = startdata[0] 

365 

366 if first_char == "[": 

367 # This is an array of records. 

368 return True 

369 if first_char != "{": 

370 # Limit the length of string reported in error message in case 

371 # this is an enormous file. 

372 max = 32 

373 if len(startdata) > max: 

374 startdata = f"{startdata[:max]!r}..." 

375 raise ValueError( 

376 "Unrecognized JSON log format. Expected '{' or '[' but got" 

377 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

378 ) 

379 

380 # Assume a record per line. 

381 return False 

382 

383 @classmethod 

384 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

385 """Read records from I/O stream. 

386 

387 Parameters 

388 ---------- 

389 stream : `typing.IO` 

390 Stream from which to read JSON records. 

391 

392 Notes 

393 ----- 

394 Works with one-record-per-line format JSON files and a direct 

395 serialization of the Pydantic model. 

396 """ 

397 first_line = stream.readline() 

398 

399 if not first_line: 

400 # Empty file, return zero records. 

401 return cls.from_records([]) 

402 

403 is_model = cls._detect_model(first_line) 

404 

405 if is_model: 

406 # This is a ButlerLogRecords model serialization so all the 

407 # content must be read first. 

408 all = first_line + stream.read() 

409 return cls.model_validate_json(all) 

410 

411 # A stream of records with one record per line. 

412 records = [ButlerLogRecord.model_validate_json(first_line)] 

413 for line in stream: 

414 line = line.rstrip() 

415 if line: # Filter out blank lines. 

416 records.append(ButlerLogRecord.model_validate_json(line)) 

417 

418 return cls.from_records(records) 

419 

420 @classmethod 

421 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

422 """Parse raw serialized form and return records. 

423 

424 Parameters 

425 ---------- 

426 serialized : `bytes` or `str` 

427 Either the serialized JSON of the model created using 

428 ``.model_dump_json()`` or a streaming format of one JSON 

429 `ButlerLogRecord` per line. This can also support a zero-length 

430 string. 

431 """ 

432 if not serialized: 

433 # No records to return 

434 return cls.from_records([]) 

435 

436 # Only send the first character for analysis. 

437 is_model = cls._detect_model(serialized) 

438 

439 if is_model: 

440 return cls.model_validate_json(serialized) 

441 

442 # Filter out blank lines -- mypy is confused by the newline 

443 # argument to isplit() [which can't have two different types 

444 # simultaneously] so we have to duplicate some logic. 

445 substrings: Iterator[str | bytes] 

446 if isinstance(serialized, str): 

447 substrings = isplit(serialized, "\n") 

448 elif isinstance(serialized, bytes): 

449 substrings = isplit(serialized, b"\n") 

450 else: 

451 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

452 records = [ButlerLogRecord.model_validate_json(line) for line in substrings if line] 

453 

454 return cls.from_records(records) 

455 

456 @property 

457 def log_format(self) -> str: 

458 if self._log_format is None: 

459 return _LONG_LOG_FORMAT 

460 return self._log_format 

461 

462 # Pydantic does not allow a property setter to be given for 

463 # public properties of a model that is not based on a dict. 

464 def set_log_format(self, format: str | None) -> str | None: 

465 """Set the log format string for these records. 

466 

467 Parameters 

468 ---------- 

469 format : `str`, optional 

470 The new format string to use for converting this collection 

471 of records into a string. If `None` the default format will be 

472 used. 

473 

474 Returns 

475 ------- 

476 old_format : `str`, optional 

477 The previous log format. 

478 """ 

479 previous = self._log_format 

480 self._log_format = format 

481 return previous 

482 

483 def __len__(self) -> int: 

484 return len(self.root) 

485 

486 # The signature does not match the one in BaseModel but that is okay 

487 # if __root__ is being used. 

488 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

489 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

490 return iter(self.root) 

491 

492 def __setitem__(self, index: int, value: Record) -> None: 

493 self.root[index] = self._validate_record(value) 

494 

495 @overload 

496 def __getitem__(self, index: int) -> ButlerLogRecord: 

497 ... 

498 

499 @overload 

500 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

501 ... 

502 

503 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

504 # Handles slices and returns a new collection in that 

505 # case. 

506 item = self.root[index] 

507 if isinstance(item, list): 

508 if PYDANTIC_V2: 

509 return type(self)(item) # type: ignore 

510 else: 

511 return type(self)(__root__=item) # type: ignore 

512 else: 

513 return item 

514 

515 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

516 return self.root.__reversed__() 

517 

518 def __delitem__(self, index: slice | int) -> None: 

519 del self.root[index] 

520 

521 def __str__(self) -> str: 

522 # Ensure that every record uses the same format string. 

523 return "\n".join(record.format(self.log_format) for record in self.root) 

524 

525 def _validate_record(self, record: Record) -> ButlerLogRecord: 

526 if isinstance(record, ButlerLogRecord): 

527 pass 

528 elif isinstance(record, LogRecord): 

529 record = ButlerLogRecord.from_record(record) 

530 else: 

531 raise ValueError(f"Can only append item of type {type(record)}") 

532 return record 

533 

534 def insert(self, index: int, value: Record) -> None: 

535 self.root.insert(index, self._validate_record(value)) 

536 

537 def append(self, value: Record) -> None: 

538 value = self._validate_record(value) 

539 self.root.append(value) 

540 

541 def clear(self) -> None: 

542 self.root.clear() 

543 

544 def extend(self, records: Iterable[Record]) -> None: 

545 self.root.extend(self._validate_record(record) for record in records) 

546 

547 def pop(self, index: int = -1) -> ButlerLogRecord: 

548 return self.root.pop(index) 

549 

550 def reverse(self) -> None: 

551 self.root.reverse() 

552 

553 

554class ButlerLogRecordHandler(StreamHandler): 

555 """Python log handler that accumulates records.""" 

556 

557 def __init__(self) -> None: 

558 super().__init__() 

559 if PYDANTIC_V2: 

560 self.records = ButlerLogRecords([]) # type: ignore 

561 else: 

562 self.records = ButlerLogRecords(__root__=[]) # type: ignore 

563 

564 def emit(self, record: LogRecord) -> None: 

565 self.records.append(record) 

566 

567 

568class JsonLogFormatter(Formatter): 

569 """Format a `LogRecord` in JSON format.""" 

570 

571 def format(self, record: LogRecord) -> str: 

572 butler_record = ButlerLogRecord.from_record(record) 

573 return butler_record.model_dump_json(exclude_unset=True, exclude_defaults=True)