Coverage for python/lsst/daf/butler/core/logging.py: 35%

220 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from collections.abc import Callable, Generator, Iterable, Iterator 

28from contextlib import contextmanager 

29from logging import Formatter, LogRecord, StreamHandler 

30from typing import IO, Any, ClassVar, Union, overload 

31 

32from lsst.utils.introspection import get_full_type_name 

33from lsst.utils.iteration import isplit 

34 

35try: 

36 from pydantic.v1 import BaseModel, PrivateAttr 

37except ModuleNotFoundError: 

38 from pydantic import BaseModel, PrivateAttr # type: ignore 

39 

40_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

41"""Default format for log records.""" 

42 

43 

44class MDCDict(dict): 

45 """Dictionary for MDC data. 

46 

47 This is internal class used for better formatting of MDC in Python logging 

48 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

49 ``__repr__`` method to produce output better suited for logging records. 

50 """ 

51 

52 def __getitem__(self, name: str) -> str: 

53 """Return value for a given key or empty string for missing key.""" 

54 return self.get(name, "") 

55 

56 def __str__(self) -> str: 

57 """Return string representation, strings are interpolated without 

58 quotes. 

59 """ 

60 items = (f"{k}={self[k]}" for k in sorted(self)) 

61 return "{" + ", ".join(items) + "}" 

62 

63 def __repr__(self) -> str: 

64 return str(self) 

65 

66 

67class ButlerMDC: 

68 """Handle setting and unsetting of global MDC records. 

69 

70 The Mapped Diagnostic Context (MDC) can be used to set context 

71 for log messages. 

72 

73 Currently there is one global MDC dict. Per-thread MDC is not 

74 yet supported. 

75 """ 

76 

77 _MDC = MDCDict() 

78 

79 _old_factory: Callable[..., logging.LogRecord] | None = None 

80 """Old log record factory.""" 

81 

82 @classmethod 

83 def MDC(cls, key: str, value: str) -> str: 

84 """Set MDC for this key to the supplied value. 

85 

86 Parameters 

87 ---------- 

88 key : `str` 

89 Key to modify. 

90 value : `str` 

91 New value to use. 

92 

93 Returns 

94 ------- 

95 old : `str` 

96 The previous value for this key. 

97 """ 

98 old_value = cls._MDC[key] 

99 cls._MDC[key] = value 

100 return old_value 

101 

102 @classmethod 

103 def MDCRemove(cls, key: str) -> None: 

104 """Clear the MDC value associated with this key. 

105 

106 Can be called even if the key is not known to MDC. 

107 """ 

108 cls._MDC.pop(key, None) 

109 

110 @classmethod 

111 @contextmanager 

112 def set_mdc(cls, mdc: dict[str, str]) -> Generator[None, None, None]: 

113 """Set the MDC key for this context. 

114 

115 Parameters 

116 ---------- 

117 mdc : `dict` of `str`, `str` 

118 MDC keys to update temporarily. 

119 

120 Notes 

121 ----- 

122 Other MDC keys are not modified. The previous values are restored 

123 on exit (removing them if the were unset previously). 

124 """ 

125 previous = {} 

126 for k, v in mdc.items(): 

127 previous[k] = cls.MDC(k, v) 

128 

129 try: 

130 yield 

131 finally: 

132 for k, v in previous.items(): 

133 if not v: 

134 cls.MDCRemove(k) 

135 else: 

136 cls.MDC(k, v) 

137 

138 @classmethod 

139 def add_mdc_log_record_factory(cls) -> None: 

140 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

141 old_factory = logging.getLogRecordFactory() 

142 

143 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

144 record = old_factory(*args, **kwargs) 

145 # Make sure we send a copy of the global dict in the record. 

146 record.MDC = MDCDict(cls._MDC) 

147 return record 

148 

149 cls._old_factory = old_factory 

150 logging.setLogRecordFactory(record_factory) 

151 

152 @classmethod 

153 def restore_log_record_factory(cls) -> None: 

154 """Restores the log record factory to the original form. 

155 

156 Does nothing if there has not been a call to 

157 `add_mdc_log_record_factory`. 

158 """ 

159 if cls._old_factory: 

160 logging.setLogRecordFactory(cls._old_factory) 

161 

162 

163class ButlerLogRecord(BaseModel): 

164 """A model representing a `logging.LogRecord`. 

165 

166 A `~logging.LogRecord` always uses the current time in its record 

167 when recreated and that makes it impossible to use it as a 

168 serialization format. Instead have a local representation of a 

169 `~logging.LogRecord` that matches Butler needs. 

170 """ 

171 

172 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

173 

174 name: str 

175 asctime: datetime.datetime 

176 message: str 

177 levelno: int 

178 levelname: str 

179 filename: str 

180 pathname: str 

181 lineno: int 

182 funcName: str | None = None 

183 process: int 

184 processName: str 

185 exc_info: str | None = None 

186 MDC: dict[str, str] 

187 

188 class Config: 

189 """Pydantic model configuration.""" 

190 

191 allow_mutation = False 

192 

193 @classmethod 

194 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

195 """Create a new instance from a `~logging.LogRecord`. 

196 

197 Parameters 

198 ---------- 

199 record : `logging.LogRecord` 

200 The record from which to extract the relevant information. 

201 """ 

202 # The properties that are one-to-one mapping. 

203 simple = ( 

204 "name", 

205 "levelno", 

206 "levelname", 

207 "filename", 

208 "pathname", 

209 "lineno", 

210 "funcName", 

211 "process", 

212 "processName", 

213 ) 

214 

215 record_dict = {k: getattr(record, k) for k in simple} 

216 

217 record_dict["message"] = record.getMessage() 

218 

219 # MDC -- ensure the contents are copied to prevent any confusion 

220 # over the MDC global being updated later. 

221 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

222 

223 # Always use UTC because in distributed systems we can't be sure 

224 # what timezone localtime is and it's easier to compare logs if 

225 # every system is using the same time. 

226 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

227 

228 # Sometimes exception information is included so must be 

229 # extracted. 

230 if record.exc_info: 

231 etype = record.exc_info[0] 

232 evalue = record.exc_info[1] 

233 tb = record.exc_info[2] 

234 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

235 

236 return cls(**record_dict) 

237 

238 def format(self, log_format: str | None = None) -> str: 

239 """Format this record. 

240 

241 Parameters 

242 ---------- 

243 log_format : `str`, optional 

244 The format string to use. This string follows the standard 

245 f-style use for formatting log messages. If `None` 

246 the class default will be used. 

247 

248 Returns 

249 ------- 

250 text : `str` 

251 The formatted log message. 

252 """ 

253 if log_format is None: 

254 log_format = self._log_format 

255 

256 as_dict = self.dict() 

257 

258 # Special case MDC content. Convert it to an MDCDict 

259 # so that missing items do not break formatting. 

260 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

261 

262 as_dict["asctime"] = as_dict["asctime"].isoformat() 

263 formatted = log_format.format(**as_dict) 

264 return formatted 

265 

266 def __str__(self) -> str: 

267 return self.format() 

268 

269 

270# The class below can convert LogRecord to ButlerLogRecord if needed. 

271Record = LogRecord | ButlerLogRecord 

272 

273 

274# Do not inherit from MutableSequence since mypy insists on the values 

275# being Any even though we wish to constrain them to Record. 

276class ButlerLogRecords(BaseModel): 

277 """Class representing a collection of `ButlerLogRecord`.""" 

278 

279 __root__: list[ButlerLogRecord] 

280 _log_format: str | None = PrivateAttr(None) 

281 

282 @classmethod 

283 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

284 """Create collection from iterable. 

285 

286 Parameters 

287 ---------- 

288 records : iterable of `ButlerLogRecord` 

289 The records to seed this class with. 

290 """ 

291 return cls(__root__=list(records)) 

292 

293 @classmethod 

294 def from_file(cls, filename: str) -> "ButlerLogRecords": 

295 """Read records from file. 

296 

297 Parameters 

298 ---------- 

299 filename : `str` 

300 Name of file containing the JSON records. 

301 

302 Notes 

303 ----- 

304 Works with one-record-per-line format JSON files and a direct 

305 serialization of the Pydantic model. 

306 """ 

307 with open(filename) as fd: 

308 return cls.from_stream(fd) 

309 

310 @staticmethod 

311 def _detect_model(startdata: str | bytes) -> bool: 

312 """Given some representative data, determine if this is a serialized 

313 model or a streaming format. 

314 

315 Parameters 

316 ---------- 

317 startdata : `bytes` or `str` 

318 Representative characters or bytes from the start of a serialized 

319 collection of log records. 

320 

321 Returns 

322 ------- 

323 is_model : `bool` 

324 Returns `True` if the data look like a serialized pydantic model. 

325 Returns `False` if it looks like a streaming format. Returns 

326 `False` also if an empty string is encountered since this 

327 is not understood by `ButlerLogRecords.parse_raw()`. 

328 

329 Raises 

330 ------ 

331 ValueError 

332 Raised if the sentinel doesn't look like either of the supported 

333 log record formats. 

334 """ 

335 if not startdata: 

336 return False 

337 

338 # Allow byte or str streams since pydantic supports either. 

339 # We don't want to convert the entire input to unicode unnecessarily. 

340 error_type = "str" 

341 if isinstance(startdata, bytes): 

342 first_char = chr(startdata[0]) 

343 error_type = "byte" 

344 else: 

345 first_char = startdata[0] 

346 

347 if first_char == "[": 

348 # This is an array of records. 

349 return True 

350 if first_char != "{": 

351 # Limit the length of string reported in error message in case 

352 # this is an enormous file. 

353 max = 32 

354 if len(startdata) > max: 

355 startdata = f"{startdata[:max]!r}..." 

356 raise ValueError( 

357 "Unrecognized JSON log format. Expected '{' or '[' but got" 

358 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

359 ) 

360 

361 # Assume a record per line. 

362 return False 

363 

364 @classmethod 

365 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

366 """Read records from I/O stream. 

367 

368 Parameters 

369 ---------- 

370 stream : `typing.IO` 

371 Stream from which to read JSON records. 

372 

373 Notes 

374 ----- 

375 Works with one-record-per-line format JSON files and a direct 

376 serialization of the Pydantic model. 

377 """ 

378 first_line = stream.readline() 

379 

380 if not first_line: 

381 # Empty file, return zero records. 

382 return cls.from_records([]) 

383 

384 is_model = cls._detect_model(first_line) 

385 

386 if is_model: 

387 # This is a ButlerLogRecords model serialization so all the 

388 # content must be read first. 

389 all = first_line + stream.read() 

390 return cls.parse_raw(all) 

391 

392 # A stream of records with one record per line. 

393 records = [ButlerLogRecord.parse_raw(first_line)] 

394 for line in stream: 

395 line = line.rstrip() 

396 if line: # Filter out blank lines. 

397 records.append(ButlerLogRecord.parse_raw(line)) 

398 

399 return cls.from_records(records) 

400 

401 @classmethod 

402 def from_raw(cls, serialized: str | bytes) -> "ButlerLogRecords": 

403 """Parse raw serialized form and return records. 

404 

405 Parameters 

406 ---------- 

407 serialized : `bytes` or `str` 

408 Either the serialized JSON of the model created using 

409 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

410 per line. This can also support a zero-length string. 

411 """ 

412 if not serialized: 

413 # No records to return 

414 return cls.from_records([]) 

415 

416 # Only send the first character for analysis. 

417 is_model = cls._detect_model(serialized) 

418 

419 if is_model: 

420 return cls.parse_raw(serialized) 

421 

422 # Filter out blank lines -- mypy is confused by the newline 

423 # argument to isplit() [which can't have two different types 

424 # simultaneously] so we have to duplicate some logic. 

425 substrings: Iterator[str | bytes] 

426 if isinstance(serialized, str): 

427 substrings = isplit(serialized, "\n") 

428 elif isinstance(serialized, bytes): 

429 substrings = isplit(serialized, b"\n") 

430 else: 

431 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

432 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

433 

434 return cls.from_records(records) 

435 

436 @property 

437 def log_format(self) -> str: 

438 if self._log_format is None: 

439 return _LONG_LOG_FORMAT 

440 return self._log_format 

441 

442 # Pydantic does not allow a property setter to be given for 

443 # public properties of a model that is not based on a dict. 

444 def set_log_format(self, format: str | None) -> str | None: 

445 """Set the log format string for these records. 

446 

447 Parameters 

448 ---------- 

449 format : `str`, optional 

450 The new format string to use for converting this collection 

451 of records into a string. If `None` the default format will be 

452 used. 

453 

454 Returns 

455 ------- 

456 old_format : `str`, optional 

457 The previous log format. 

458 """ 

459 previous = self._log_format 

460 self._log_format = format 

461 return previous 

462 

463 def __len__(self) -> int: 

464 return len(self.__root__) 

465 

466 # The signature does not match the one in BaseModel but that is okay 

467 # if __root__ is being used. 

468 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

469 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

470 return iter(self.__root__) 

471 

472 def __setitem__(self, index: int, value: Record) -> None: 

473 self.__root__[index] = self._validate_record(value) 

474 

475 @overload 

476 def __getitem__(self, index: int) -> ButlerLogRecord: 

477 ... 

478 

479 @overload 

480 def __getitem__(self, index: slice) -> "ButlerLogRecords": 

481 ... 

482 

483 def __getitem__(self, index: slice | int) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

484 # Handles slices and returns a new collection in that 

485 # case. 

486 item = self.__root__[index] 

487 if isinstance(item, list): 

488 return type(self)(__root__=item) 

489 else: 

490 return item 

491 

492 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

493 return self.__root__.__reversed__() 

494 

495 def __delitem__(self, index: slice | int) -> None: 

496 del self.__root__[index] 

497 

498 def __str__(self) -> str: 

499 # Ensure that every record uses the same format string. 

500 return "\n".join(record.format(self.log_format) for record in self.__root__) 

501 

502 def _validate_record(self, record: Record) -> ButlerLogRecord: 

503 if isinstance(record, ButlerLogRecord): 

504 pass 

505 elif isinstance(record, LogRecord): 

506 record = ButlerLogRecord.from_record(record) 

507 else: 

508 raise ValueError(f"Can only append item of type {type(record)}") 

509 return record 

510 

511 def insert(self, index: int, value: Record) -> None: 

512 self.__root__.insert(index, self._validate_record(value)) 

513 

514 def append(self, value: Record) -> None: 

515 value = self._validate_record(value) 

516 self.__root__.append(value) 

517 

518 def clear(self) -> None: 

519 self.__root__.clear() 

520 

521 def extend(self, records: Iterable[Record]) -> None: 

522 self.__root__.extend(self._validate_record(record) for record in records) 

523 

524 def pop(self, index: int = -1) -> ButlerLogRecord: 

525 return self.__root__.pop(index) 

526 

527 def reverse(self) -> None: 

528 self.__root__.reverse() 

529 

530 

531class ButlerLogRecordHandler(StreamHandler): 

532 """Python log handler that accumulates records.""" 

533 

534 def __init__(self) -> None: 

535 super().__init__() 

536 self.records = ButlerLogRecords(__root__=[]) 

537 

538 def emit(self, record: LogRecord) -> None: 

539 self.records.append(record) 

540 

541 

542class JsonLogFormatter(Formatter): 

543 """Format a `LogRecord` in JSON format.""" 

544 

545 def format(self, record: LogRecord) -> str: 

546 butler_record = ButlerLogRecord.from_record(record) 

547 return butler_record.json(exclude_unset=True, exclude_defaults=True)