Coverage for python/lsst/daf/butler/core/logging.py: 34%

216 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-01 02:25 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from contextlib import contextmanager 

28from logging import Formatter, LogRecord, StreamHandler 

29from typing import ( 

30 IO, 

31 Any, 

32 Callable, 

33 ClassVar, 

34 Dict, 

35 Generator, 

36 Iterable, 

37 Iterator, 

38 List, 

39 Optional, 

40 Union, 

41 overload, 

42) 

43 

44from lsst.utils.introspection import get_full_type_name 

45from lsst.utils.iteration import isplit 

46from pydantic import BaseModel, PrivateAttr 

47 

48_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

49"""Default format for log records.""" 

50 

51 

52class MDCDict(dict): 

53 """Dictionary for MDC data. 

54 

55 This is internal class used for better formatting of MDC in Python logging 

56 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

57 ``__repr__`` method to produce output better suited for logging records. 

58 """ 

59 

60 def __getitem__(self, name: str) -> str: 

61 """Return value for a given key or empty string for missing key.""" 

62 return self.get(name, "") 

63 

64 def __str__(self) -> str: 

65 """Return string representation, strings are interpolated without 

66 quotes. 

67 """ 

68 items = (f"{k}={self[k]}" for k in sorted(self)) 

69 return "{" + ", ".join(items) + "}" 

70 

71 def __repr__(self) -> str: 

72 return str(self) 

73 

74 

75class ButlerMDC: 

76 """Handle setting and unsetting of global MDC records. 

77 

78 The Mapped Diagnostic Context (MDC) can be used to set context 

79 for log messages. 

80 

81 Currently there is one global MDC dict. Per-thread MDC is not 

82 yet supported. 

83 """ 

84 

85 _MDC = MDCDict() 

86 

87 _old_factory: Optional[Callable[..., logging.LogRecord]] = None 

88 """Old log record factory.""" 

89 

90 @classmethod 

91 def MDC(cls, key: str, value: str) -> str: 

92 """Set MDC for this key to the supplied value. 

93 

94 Parameters 

95 ---------- 

96 key : `str` 

97 Key to modify. 

98 value : `str` 

99 New value to use. 

100 

101 Returns 

102 ------- 

103 old : `str` 

104 The previous value for this key. 

105 """ 

106 old_value = cls._MDC[key] 

107 cls._MDC[key] = value 

108 return old_value 

109 

110 @classmethod 

111 def MDCRemove(cls, key: str) -> None: 

112 """Clear the MDC value associated with this key. 

113 

114 Can be called even if the key is not known to MDC. 

115 """ 

116 cls._MDC.pop(key, None) 

117 

118 @classmethod 

119 @contextmanager 

120 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]: 

121 """Set the MDC key for this context. 

122 

123 Parameters 

124 ---------- 

125 mdc : `dict` of `str`, `str` 

126 MDC keys to update temporarily. 

127 

128 Notes 

129 ----- 

130 Other MDC keys are not modified. The previous values are restored 

131 on exit (removing them if the were unset previously). 

132 """ 

133 previous = {} 

134 for k, v in mdc.items(): 

135 previous[k] = cls.MDC(k, v) 

136 

137 try: 

138 yield 

139 finally: 

140 for k, v in previous.items(): 

141 if not v: 

142 cls.MDCRemove(k) 

143 else: 

144 cls.MDC(k, v) 

145 

146 @classmethod 

147 def add_mdc_log_record_factory(cls) -> None: 

148 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

149 old_factory = logging.getLogRecordFactory() 

150 

151 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

152 record = old_factory(*args, **kwargs) 

153 # Make sure we send a copy of the global dict in the record. 

154 record.MDC = MDCDict(cls._MDC) 

155 return record 

156 

157 cls._old_factory = old_factory 

158 logging.setLogRecordFactory(record_factory) 

159 

160 @classmethod 

161 def restore_log_record_factory(cls) -> None: 

162 """Restores the log record factory to the original form. 

163 

164 Does nothing if there has not been a call to 

165 `add_mdc_log_record_factory`. 

166 """ 

167 if cls._old_factory: 

168 logging.setLogRecordFactory(cls._old_factory) 

169 

170 

171class ButlerLogRecord(BaseModel): 

172 """A model representing a `logging.LogRecord`. 

173 

174 A `~logging.LogRecord` always uses the current time in its record 

175 when recreated and that makes it impossible to use it as a 

176 serialization format. Instead have a local representation of a 

177 `~logging.LogRecord` that matches Butler needs. 

178 """ 

179 

180 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

181 

182 name: str 

183 asctime: datetime.datetime 

184 message: str 

185 levelno: int 

186 levelname: str 

187 filename: str 

188 pathname: str 

189 lineno: int 

190 funcName: Optional[str] 

191 process: int 

192 processName: str 

193 exc_info: Optional[str] 

194 MDC: Dict[str, str] 

195 

196 class Config: 

197 """Pydantic model configuration.""" 

198 

199 allow_mutation = False 

200 

201 @classmethod 

202 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

203 """Create a new instance from a `~logging.LogRecord`. 

204 

205 Parameters 

206 ---------- 

207 record : `logging.LogRecord` 

208 The record from which to extract the relevant information. 

209 """ 

210 # The properties that are one-to-one mapping. 

211 simple = ( 

212 "name", 

213 "levelno", 

214 "levelname", 

215 "filename", 

216 "pathname", 

217 "lineno", 

218 "funcName", 

219 "process", 

220 "processName", 

221 ) 

222 

223 record_dict = {k: getattr(record, k) for k in simple} 

224 

225 record_dict["message"] = record.getMessage() 

226 

227 # MDC -- ensure the contents are copied to prevent any confusion 

228 # over the MDC global being updated later. 

229 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

230 

231 # Always use UTC because in distributed systems we can't be sure 

232 # what timezone localtime is and it's easier to compare logs if 

233 # every system is using the same time. 

234 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

235 

236 # Sometimes exception information is included so must be 

237 # extracted. 

238 if record.exc_info: 

239 etype = record.exc_info[0] 

240 evalue = record.exc_info[1] 

241 tb = record.exc_info[2] 

242 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

243 

244 return cls(**record_dict) 

245 

246 def format(self, log_format: Optional[str] = None) -> str: 

247 """Format this record. 

248 

249 Parameters 

250 ---------- 

251 log_format : `str`, optional 

252 The format string to use. This string follows the standard 

253 f-style use for formatting log messages. If `None` 

254 the class default will be used. 

255 

256 Returns 

257 ------- 

258 text : `str` 

259 The formatted log message. 

260 """ 

261 if log_format is None: 

262 log_format = self._log_format 

263 

264 as_dict = self.dict() 

265 

266 # Special case MDC content. Convert it to an MDCDict 

267 # so that missing items do not break formatting. 

268 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

269 

270 as_dict["asctime"] = as_dict["asctime"].isoformat() 

271 formatted = log_format.format(**as_dict) 

272 return formatted 

273 

274 def __str__(self) -> str: 

275 return self.format() 

276 

277 

278# The class below can convert LogRecord to ButlerLogRecord if needed. 

279Record = Union[LogRecord, ButlerLogRecord] 

280 

281 

282# Do not inherit from MutableSequence since mypy insists on the values 

283# being Any even though we wish to constrain them to Record. 

284class ButlerLogRecords(BaseModel): 

285 """Class representing a collection of `ButlerLogRecord`.""" 

286 

287 __root__: List[ButlerLogRecord] 

288 _log_format: Optional[str] = PrivateAttr(None) 

289 

290 @classmethod 

291 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

292 """Create collection from iterable. 

293 

294 Parameters 

295 ---------- 

296 records : iterable of `ButlerLogRecord` 

297 The records to seed this class with. 

298 """ 

299 return cls(__root__=list(records)) 

300 

301 @classmethod 

302 def from_file(cls, filename: str) -> "ButlerLogRecords": 

303 """Read records from file. 

304 

305 Parameters 

306 ---------- 

307 filename : `str` 

308 Name of file containing the JSON records. 

309 

310 Notes 

311 ----- 

312 Works with one-record-per-line format JSON files and a direct 

313 serialization of the Pydantic model. 

314 """ 

315 with open(filename, "r") as fd: 

316 return cls.from_stream(fd) 

317 

318 @staticmethod 

319 def _detect_model(startdata: Union[str, bytes]) -> bool: 

320 """Given some representative data, determine if this is a serialized 

321 model or a streaming format. 

322 

323 Parameters 

324 ---------- 

325 startdata : `bytes` or `str` 

326 Representative characters or bytes from the start of a serialized 

327 collection of log records. 

328 

329 Returns 

330 ------- 

331 is_model : `bool` 

332 Returns `True` if the data look like a serialized pydantic model. 

333 Returns `False` if it looks like a streaming format. Returns 

334 `False` also if an empty string is encountered since this 

335 is not understood by `ButlerLogRecords.parse_raw()`. 

336 

337 Raises 

338 ------ 

339 ValueError 

340 Raised if the sentinel doesn't look like either of the supported 

341 log record formats. 

342 """ 

343 if not startdata: 

344 return False 

345 

346 # Allow byte or str streams since pydantic supports either. 

347 # We don't want to convert the entire input to unicode unnecessarily. 

348 error_type = "str" 

349 if isinstance(startdata, bytes): 

350 first_char = chr(startdata[0]) 

351 error_type = "byte" 

352 else: 

353 first_char = startdata[0] 

354 

355 if first_char == "[": 

356 # This is an array of records. 

357 return True 

358 if first_char != "{": 

359 # Limit the length of string reported in error message in case 

360 # this is an enormous file. 

361 max = 32 

362 if len(startdata) > max: 

363 startdata = f"{startdata[:max]!r}..." 

364 raise ValueError( 

365 "Unrecognized JSON log format. Expected '{' or '[' but got" 

366 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

367 ) 

368 

369 # Assume a record per line. 

370 return False 

371 

372 @classmethod 

373 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

374 """Read records from I/O stream. 

375 

376 Parameters 

377 ---------- 

378 stream : `typing.IO` 

379 Stream from which to read JSON records. 

380 

381 Notes 

382 ----- 

383 Works with one-record-per-line format JSON files and a direct 

384 serialization of the Pydantic model. 

385 """ 

386 first_line = stream.readline() 

387 

388 if not first_line: 

389 # Empty file, return zero records. 

390 return cls.from_records([]) 

391 

392 is_model = cls._detect_model(first_line) 

393 

394 if is_model: 

395 # This is a ButlerLogRecords model serialization so all the 

396 # content must be read first. 

397 all = first_line + stream.read() 

398 return cls.parse_raw(all) 

399 

400 # A stream of records with one record per line. 

401 records = [ButlerLogRecord.parse_raw(first_line)] 

402 for line in stream: 

403 line = line.rstrip() 

404 if line: # Filter out blank lines. 

405 records.append(ButlerLogRecord.parse_raw(line)) 

406 

407 return cls.from_records(records) 

408 

409 @classmethod 

410 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords": 

411 """Parse raw serialized form and return records. 

412 

413 Parameters 

414 ---------- 

415 serialized : `bytes` or `str` 

416 Either the serialized JSON of the model created using 

417 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

418 per line. This can also support a zero-length string. 

419 """ 

420 if not serialized: 

421 # No records to return 

422 return cls.from_records([]) 

423 

424 # Only send the first character for analysis. 

425 is_model = cls._detect_model(serialized) 

426 

427 if is_model: 

428 return cls.parse_raw(serialized) 

429 

430 # Filter out blank lines -- mypy is confused by the newline 

431 # argument to isplit() [which can't have two different types 

432 # simultaneously] so we have to duplicate some logic. 

433 substrings: Iterator[Union[str, bytes]] 

434 if isinstance(serialized, str): 

435 substrings = isplit(serialized, "\n") 

436 elif isinstance(serialized, bytes): 

437 substrings = isplit(serialized, b"\n") 

438 else: 

439 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

440 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

441 

442 return cls.from_records(records) 

443 

444 @property 

445 def log_format(self) -> str: 

446 if self._log_format is None: 

447 return _LONG_LOG_FORMAT 

448 return self._log_format 

449 

450 # Pydantic does not allow a property setter to be given for 

451 # public properties of a model that is not based on a dict. 

452 def set_log_format(self, format: Optional[str]) -> Optional[str]: 

453 """Set the log format string for these records. 

454 

455 Parameters 

456 ---------- 

457 format : `str`, optional 

458 The new format string to use for converting this collection 

459 of records into a string. If `None` the default format will be 

460 used. 

461 

462 Returns 

463 ------- 

464 old_format : `str`, optional 

465 The previous log format. 

466 """ 

467 previous = self._log_format 

468 self._log_format = format 

469 return previous 

470 

471 def __len__(self) -> int: 

472 return len(self.__root__) 

473 

474 # The signature does not match the one in BaseModel but that is okay 

475 # if __root__ is being used. 

476 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

477 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

478 return iter(self.__root__) 

479 

480 def __setitem__(self, index: int, value: Record) -> None: 

481 self.__root__[index] = self._validate_record(value) 

482 

483 @overload 

484 def __getitem__(self, index: int) -> ButlerLogRecord: 

485 ... 

486 

487 @overload 

488 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

489 ... 

490 

491 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

492 # Handles slices and returns a new collection in that 

493 # case. 

494 item = self.__root__[index] 

495 if isinstance(item, list): 

496 return type(self)(__root__=item) 

497 else: 

498 return item 

499 

500 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

501 return self.__root__.__reversed__() 

502 

503 def __delitem__(self, index: Union[slice, int]) -> None: 

504 del self.__root__[index] 

505 

506 def __str__(self) -> str: 

507 # Ensure that every record uses the same format string. 

508 return "\n".join(record.format(self.log_format) for record in self.__root__) 

509 

510 def _validate_record(self, record: Record) -> ButlerLogRecord: 

511 if isinstance(record, ButlerLogRecord): 

512 pass 

513 elif isinstance(record, LogRecord): 

514 record = ButlerLogRecord.from_record(record) 

515 else: 

516 raise ValueError(f"Can only append item of type {type(record)}") 

517 return record 

518 

519 def insert(self, index: int, value: Record) -> None: 

520 self.__root__.insert(index, self._validate_record(value)) 

521 

522 def append(self, value: Record) -> None: 

523 value = self._validate_record(value) 

524 self.__root__.append(value) 

525 

526 def clear(self) -> None: 

527 self.__root__.clear() 

528 

529 def extend(self, records: Iterable[Record]) -> None: 

530 self.__root__.extend(self._validate_record(record) for record in records) 

531 

532 def pop(self, index: int = -1) -> ButlerLogRecord: 

533 return self.__root__.pop(index) 

534 

535 def reverse(self) -> None: 

536 self.__root__.reverse() 

537 

538 

539class ButlerLogRecordHandler(StreamHandler): 

540 """Python log handler that accumulates records.""" 

541 

542 def __init__(self) -> None: 

543 super().__init__() 

544 self.records = ButlerLogRecords(__root__=[]) 

545 

546 def emit(self, record: LogRecord) -> None: 

547 self.records.append(record) 

548 

549 

550class JsonLogFormatter(Formatter): 

551 """Format a `LogRecord` in JSON format.""" 

552 

553 def format(self, record: LogRecord) -> str: 

554 butler_record = ButlerLogRecord.from_record(record) 

555 return butler_record.json(exclude_unset=True, exclude_defaults=True)