Coverage for python/lsst/daf/butler/core/logging.py: 34%

217 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from collections.abc import Callable, Generator, Iterable, Iterator 

28from contextlib import contextmanager 

29from logging import Formatter, LogRecord, StreamHandler 

30from typing import IO, Any, ClassVar, Union, overload 

31 

32from lsst.utils.introspection import get_full_type_name 

33from lsst.utils.iteration import isplit 

34from pydantic import BaseModel, PrivateAttr 

35 

36_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

37"""Default format for log records.""" 

38 

39 

40class MDCDict(dict): 

41 """Dictionary for MDC data. 

42 

43 This is internal class used for better formatting of MDC in Python logging 

44 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

45 ``__repr__`` method to produce output better suited for logging records. 

46 """ 

47 

48 def __getitem__(self, name: str) -> str: 

49 """Return value for a given key or empty string for missing key.""" 

50 return self.get(name, "") 

51 

52 def __str__(self) -> str: 

53 """Return string representation, strings are interpolated without 

54 quotes. 

55 """ 

56 items = (f"{k}={self[k]}" for k in sorted(self)) 

57 return "{" + ", ".join(items) + "}" 

58 

59 def __repr__(self) -> str: 

60 return str(self) 

61 

62 

63class ButlerMDC: 

64 """Handle setting and unsetting of global MDC records. 

65 

66 The Mapped Diagnostic Context (MDC) can be used to set context 

67 for log messages. 

68 

69 Currently there is one global MDC dict. Per-thread MDC is not 

70 yet supported. 

71 """ 

72 

73 _MDC = MDCDict() 

74 

75 _old_factory: Callable[..., logging.LogRecord] | None = None 

76 """Old log record factory.""" 

77 

78 @classmethod 

79 def MDC(cls, key: str, value: str) -> str: 

80 """Set MDC for this key to the supplied value. 

81 

82 Parameters 

83 ---------- 

84 key : `str` 

85 Key to modify. 

86 value : `str` 

87 New value to use. 

88 

89 Returns 

90 ------- 

91 old : `str` 

92 The previous value for this key. 

93 """ 

94 old_value = cls._MDC[key] 

95 cls._MDC[key] = value 

96 return old_value 

97 

98 @classmethod 

99 def MDCRemove(cls, key: str) -> None: 

100 """Clear the MDC value associated with this key. 

101 

102 Can be called even if the key is not known to MDC. 

103 """ 

104 cls._MDC.pop(key, None) 

105 

106 @classmethod 

107 @contextmanager 

108 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

109 """Set the MDC key for this context. 

110 

111 Parameters 

112 ---------- 

113 mdc : `dict` of `str`, `str` 

114 MDC keys to update temporarily. 

115 

116 Notes 

117 ----- 

118 Other MDC keys are not modified. The previous values are restored 

119 on exit (removing them if the were unset previously). 

120 """ 

121 previous = {} 

122 for k, v in mdc.items(): 

123 previous[k] = cls.MDC(k, v) 

124 

125 try: 

126 yield 

127 finally: 

128 for k, v in previous.items(): 

129 if not v: 

130 cls.MDCRemove(k) 

131 else: 

132 cls.MDC(k, v) 

133 

134 @classmethod 

135 def add_mdc_log_record_factory(cls) -> None: 

136 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

137 old_factory = logging.getLogRecordFactory() 

138 

139 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

140 record = old_factory(*args, **kwargs) 

141 # Make sure we send a copy of the global dict in the record. 

142 record.MDC = MDCDict(cls._MDC) 

143 return record 

144 

145 cls._old_factory = old_factory 

146 logging.setLogRecordFactory(record_factory) 

147 

148 @classmethod 

149 def restore_log_record_factory(cls) -> None: 

150 """Restores the log record factory to the original form. 

151 

152 Does nothing if there has not been a call to 

153 `add_mdc_log_record_factory`. 

154 """ 

155 if cls._old_factory: 

156 logging.setLogRecordFactory(cls._old_factory) 

157 

158 

159class ButlerLogRecord(BaseModel): 

160 """A model representing a `logging.LogRecord`. 

161 

162 A `~logging.LogRecord` always uses the current time in its record 

163 when recreated and that makes it impossible to use it as a 

164 serialization format. Instead have a local representation of a 

165 `~logging.LogRecord` that matches Butler needs. 

166 """ 

167 

168 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

169 

170 name: str 

171 asctime: datetime.datetime 

172 message: str 

173 levelno: int 

174 levelname: str 

175 filename: str 

176 pathname: str 

177 lineno: int 

178 funcName: str | None 

179 process: int 

180 processName: str 

181 exc_info: str | None 

182 MDC: dict[str, str] 

183 

184 class Config: 

185 """Pydantic model configuration.""" 

186 

187 allow_mutation = False 

188 

189 @classmethod 

190 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

191 """Create a new instance from a `~logging.LogRecord`. 

192 

193 Parameters 

194 ---------- 

195 record : `logging.LogRecord` 

196 The record from which to extract the relevant information. 

197 """ 

198 # The properties that are one-to-one mapping. 

199 simple = ( 

200 "name", 

201 "levelno", 

202 "levelname", 

203 "filename", 

204 "pathname", 

205 "lineno", 

206 "funcName", 

207 "process", 

208 "processName", 

209 ) 

210 

211 record_dict = {k: getattr(record, k) for k in simple} 

212 

213 record_dict["message"] = record.getMessage() 

214 

215 # MDC -- ensure the contents are copied to prevent any confusion 

216 # over the MDC global being updated later. 

217 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

218 

219 # Always use UTC because in distributed systems we can't be sure 

220 # what timezone localtime is and it's easier to compare logs if 

221 # every system is using the same time. 

222 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

223 

224 # Sometimes exception information is included so must be 

225 # extracted. 

226 if record.exc_info: 

227 etype = record.exc_info[0] 

228 evalue = record.exc_info[1] 

229 tb = record.exc_info[2] 

230 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

231 

232 return cls(**record_dict) 

233 

234 def format(self, log_format: str | None = None) -> str: 

235 """Format this record. 

236 

237 Parameters 

238 ---------- 

239 log_format : `str`, optional 

240 The format string to use. This string follows the standard 

241 f-style use for formatting log messages. If `None` 

242 the class default will be used. 

243 

244 Returns 

245 ------- 

246 text : `str` 

247 The formatted log message. 

248 """ 

249 if log_format is None: 

250 log_format = self._log_format 

251 

252 as_dict = self.dict() 

253 

254 # Special case MDC content. Convert it to an MDCDict 

255 # so that missing items do not break formatting. 

256 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

257 

258 as_dict["asctime"] = as_dict["asctime"].isoformat() 

259 formatted = log_format.format(**as_dict) 

260 return formatted 

261 

262 def __str__(self) -> str: 

263 return self.format() 

264 

265 

266# The class below can convert LogRecord to ButlerLogRecord if needed. 

267Record = LogRecord | ButlerLogRecord 

268 

269 

270# Do not inherit from MutableSequence since mypy insists on the values 

271# being Any even though we wish to constrain them to Record. 

272class ButlerLogRecords(BaseModel): 

273 """Class representing a collection of `ButlerLogRecord`.""" 

274 

275 __root__: list[ButlerLogRecord] 

276 _log_format: str | None = PrivateAttr(None) 

277 

278 @classmethod 

279 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

280 """Create collection from iterable. 

281 

282 Parameters 

283 ---------- 

284 records : iterable of `ButlerLogRecord` 

285 The records to seed this class with. 

286 """ 

287 return cls(__root__=list(records)) 

288 

289 @classmethod 

290 def from_file(cls, filename: str) -> "ButlerLogRecords": 

291 """Read records from file. 

292 

293 Parameters 

294 ---------- 

295 filename : `str` 

296 Name of file containing the JSON records. 

297 

298 Notes 

299 ----- 

300 Works with one-record-per-line format JSON files and a direct 

301 serialization of the Pydantic model. 

302 """ 

303 with open(filename) as fd: 

304 return cls.from_stream(fd) 

305 

306 @staticmethod 

307 def _detect_model(startdata: str | bytes) -> bool: 

308 """Given some representative data, determine if this is a serialized 

309 model or a streaming format. 

310 

311 Parameters 

312 ---------- 

313 startdata : `bytes` or `str` 

314 Representative characters or bytes from the start of a serialized 

315 collection of log records. 

316 

317 Returns 

318 ------- 

319 is_model : `bool` 

320 Returns `True` if the data look like a serialized pydantic model. 

321 Returns `False` if it looks like a streaming format. Returns 

322 `False` also if an empty string is encountered since this 

323 is not understood by `ButlerLogRecords.parse_raw()`. 

324 

325 Raises 

326 ------ 

327 ValueError 

328 Raised if the sentinel doesn't look like either of the supported 

329 log record formats. 

330 """ 

331 if not startdata: 

332 return False 

333 

334 # Allow byte or str streams since pydantic supports either. 

335 # We don't want to convert the entire input to unicode unnecessarily. 

336 error_type = "str" 

337 if isinstance(startdata, bytes): 

338 first_char = chr(startdata[0]) 

339 error_type = "byte" 

340 else: 

341 first_char = startdata[0] 

342 

343 if first_char == "[": 

344 # This is an array of records. 

345 return True 

346 if first_char != "{": 

347 # Limit the length of string reported in error message in case 

348 # this is an enormous file. 

349 max = 32 

350 if len(startdata) > max: 

351 startdata = f"{startdata[:max]!r}..." 

352 raise ValueError( 

353 "Unrecognized JSON log format. Expected '{' or '[' but got" 

354 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

355 ) 

356 

357 # Assume a record per line. 

358 return False 

359 

360 @classmethod 

361 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

362 """Read records from I/O stream. 

363 

364 Parameters 

365 ---------- 

366 stream : `typing.IO` 

367 Stream from which to read JSON records. 

368 

369 Notes 

370 ----- 

371 Works with one-record-per-line format JSON files and a direct 

372 serialization of the Pydantic model. 

373 """ 

374 first_line = stream.readline() 

375 

376 if not first_line: 

377 # Empty file, return zero records. 

378 return cls.from_records([]) 

379 

380 is_model = cls._detect_model(first_line) 

381 

382 if is_model: 

383 # This is a ButlerLogRecords model serialization so all the 

384 # content must be read first. 

385 all = first_line + stream.read() 

386 return cls.parse_raw(all) 

387 

388 # A stream of records with one record per line. 

389 records = [ButlerLogRecord.parse_raw(first_line)] 

390 for line in stream: 

391 line = line.rstrip() 

392 if line: # Filter out blank lines. 

393 records.append(ButlerLogRecord.parse_raw(line)) 

394 

395 return cls.from_records(records) 

396 

397 @classmethod 

398 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

399 """Parse raw serialized form and return records. 

400 

401 Parameters 

402 ---------- 

403 serialized : `bytes` or `str` 

404 Either the serialized JSON of the model created using 

405 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

406 per line. This can also support a zero-length string. 

407 """ 

408 if not serialized: 

409 # No records to return 

410 return cls.from_records([]) 

411 

412 # Only send the first character for analysis. 

413 is_model = cls._detect_model(serialized) 

414 

415 if is_model: 

416 return cls.parse_raw(serialized) 

417 

418 # Filter out blank lines -- mypy is confused by the newline 

419 # argument to isplit() [which can't have two different types 

420 # simultaneously] so we have to duplicate some logic. 

421 substrings: Iterator[str | bytes] 

422 if isinstance(serialized, str): 

423 substrings = isplit(serialized, "\n") 

424 elif isinstance(serialized, bytes): 

425 substrings = isplit(serialized, b"\n") 

426 else: 

427 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

428 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

429 

430 return cls.from_records(records) 

431 

432 @property 

433 def log_format(self) -> str: 

434 if self._log_format is None: 

435 return _LONG_LOG_FORMAT 

436 return self._log_format 

437 

438 # Pydantic does not allow a property setter to be given for 

439 # public properties of a model that is not based on a dict. 

440 def set_log_format(self, format: str | None) -> str | None: 

441 """Set the log format string for these records. 

442 

443 Parameters 

444 ---------- 

445 format : `str`, optional 

446 The new format string to use for converting this collection 

447 of records into a string. If `None` the default format will be 

448 used. 

449 

450 Returns 

451 ------- 

452 old_format : `str`, optional 

453 The previous log format. 

454 """ 

455 previous = self._log_format 

456 self._log_format = format 

457 return previous 

458 

459 def __len__(self) -> int: 

460 return len(self.__root__) 

461 

462 # The signature does not match the one in BaseModel but that is okay 

463 # if __root__ is being used. 

464 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

465 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

466 return iter(self.__root__) 

467 

468 def __setitem__(self, index: int, value: Record) -> None: 

469 self.__root__[index] = self._validate_record(value) 

470 

471 @overload 

472 def __getitem__(self, index: int) -> ButlerLogRecord: 

473 ... 

474 

475 @overload 

476 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

477 ... 

478 

479 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

480 # Handles slices and returns a new collection in that 

481 # case. 

482 item = self.__root__[index] 

483 if isinstance(item, list): 

484 return type(self)(__root__=item) 

485 else: 

486 return item 

487 

488 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

489 return self.__root__.__reversed__() 

490 

491 def __delitem__(self, index: slice | int) -> None: 

492 del self.__root__[index] 

493 

494 def __str__(self) -> str: 

495 # Ensure that every record uses the same format string. 

496 return "\n".join(record.format(self.log_format) for record in self.__root__) 

497 

498 def _validate_record(self, record: Record) -> ButlerLogRecord: 

499 if isinstance(record, ButlerLogRecord): 

500 pass 

501 elif isinstance(record, LogRecord): 

502 record = ButlerLogRecord.from_record(record) 

503 else: 

504 raise ValueError(f"Can only append item of type {type(record)}") 

505 return record 

506 

507 def insert(self, index: int, value: Record) -> None: 

508 self.__root__.insert(index, self._validate_record(value)) 

509 

510 def append(self, value: Record) -> None: 

511 value = self._validate_record(value) 

512 self.__root__.append(value) 

513 

514 def clear(self) -> None: 

515 self.__root__.clear() 

516 

517 def extend(self, records: Iterable[Record]) -> None: 

518 self.__root__.extend(self._validate_record(record) for record in records) 

519 

520 def pop(self, index: int = -1) -> ButlerLogRecord: 

521 return self.__root__.pop(index) 

522 

523 def reverse(self) -> None: 

524 self.__root__.reverse() 

525 

526 

527class ButlerLogRecordHandler(StreamHandler): 

528 """Python log handler that accumulates records.""" 

529 

530 def __init__(self) -> None: 

531 super().__init__() 

532 self.records = ButlerLogRecords(__root__=[]) 

533 

534 def emit(self, record: LogRecord) -> None: 

535 self.records.append(record) 

536 

537 

538class JsonLogFormatter(Formatter): 

539 """Format a `LogRecord` in JSON format.""" 

540 

541 def format(self, record: LogRecord) -> str: 

542 butler_record = ButlerLogRecord.from_record(record) 

543 return butler_record.json(exclude_unset=True, exclude_defaults=True)