Coverage for python/lsst/daf/butler/core/logging.py: 36%

210 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-19 12:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", "ButlerLogRecord", "JsonLogFormatter") 

23 

24import datetime 

25import logging 

26import traceback 

27from contextlib import contextmanager 

28from logging import Formatter, LogRecord, StreamHandler 

29from typing import IO, Any, ClassVar, Dict, Generator, Iterable, Iterator, List, Optional, Union 

30 

31from lsst.utils.introspection import get_full_type_name 

32from lsst.utils.iteration import isplit 

33from pydantic import BaseModel, PrivateAttr 

34 

35_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

36"""Default format for log records.""" 

37 

38 

39class MDCDict(dict): 

40 """Dictionary for MDC data. 

41 

42 This is internal class used for better formatting of MDC in Python logging 

43 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

44 ``__repr__`` method to produce output better suited for logging records. 

45 """ 

46 

47 def __getitem__(self, name: str) -> str: 

48 """Return value for a given key or empty string for missing key.""" 

49 return self.get(name, "") 

50 

51 def __str__(self) -> str: 

52 """Return string representation, strings are interpolated without 

53 quotes. 

54 """ 

55 items = (f"{k}={self[k]}" for k in sorted(self)) 

56 return "{" + ", ".join(items) + "}" 

57 

58 def __repr__(self) -> str: 

59 return str(self) 

60 

61 

62class ButlerMDC: 

63 """Handle setting and unsetting of global MDC records. 

64 

65 The Mapped Diagnostic Context (MDC) can be used to set context 

66 for log messages. 

67 

68 Currently there is one global MDC dict. Per-thread MDC is not 

69 yet supported. 

70 """ 

71 

72 _MDC = MDCDict() 

73 

74 _old_factory = None 

75 """Old log record factory.""" 

76 

77 @classmethod 

78 def MDC(cls, key: str, value: str) -> str: 

79 """Set MDC for this key to the supplied value. 

80 

81 Parameters 

82 ---------- 

83 key : `str` 

84 Key to modify. 

85 value : `str` 

86 New value to use. 

87 

88 Returns 

89 ------- 

90 old : `str` 

91 The previous value for this key. 

92 """ 

93 old_value = cls._MDC[key] 

94 cls._MDC[key] = value 

95 return old_value 

96 

97 @classmethod 

98 def MDCRemove(cls, key: str) -> None: 

99 """Clear the MDC value associated with this key. 

100 

101 Can be called even if the key is not known to MDC. 

102 """ 

103 cls._MDC.pop(key, None) 

104 

105 @classmethod 

106 @contextmanager 

107 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]: 

108 """Set the MDC key for this context. 

109 

110 Parameters 

111 ---------- 

112 mdc : `dict` of `str`, `str` 

113 MDC keys to update temporarily. 

114 

115 Notes 

116 ----- 

117 Other MDC keys are not modified. The previous values are restored 

118 on exit (removing them if the were unset previously). 

119 """ 

120 previous = {} 

121 for k, v in mdc.items(): 

122 previous[k] = cls.MDC(k, v) 

123 

124 try: 

125 yield 

126 finally: 

127 for k, v in previous.items(): 

128 if not v: 

129 cls.MDCRemove(k) 

130 else: 

131 cls.MDC(k, v) 

132 

133 @classmethod 

134 def add_mdc_log_record_factory(cls) -> None: 

135 """Add a log record factory that adds a MDC record to `LogRecord`.""" 

136 old_factory = logging.getLogRecordFactory() 

137 

138 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

139 record = old_factory(*args, **kwargs) 

140 # Make sure we send a copy of the global dict in the record. 

141 record.MDC = MDCDict(cls._MDC) # type: ignore 

142 return record 

143 

144 cls._old_factory = old_factory 

145 logging.setLogRecordFactory(record_factory) 

146 

147 @classmethod 

148 def restore_log_record_factory(cls) -> None: 

149 """Restores the log record factory to the original form. 

150 

151 Does nothing if there has not been a call to 

152 `add_mdc_log_record_factory`. 

153 """ 

154 if cls._old_factory: 

155 logging.setLogRecordFactory(cls._old_factory) 

156 

157 

158class ButlerLogRecord(BaseModel): 

159 """A model representing a `logging.LogRecord`. 

160 

161 A `~logging.LogRecord` always uses the current time in its record 

162 when recreated and that makes it impossible to use it as a 

163 serialization format. Instead have a local representation of a 

164 `~logging.LogRecord` that matches Butler needs. 

165 """ 

166 

167 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

168 

169 name: str 

170 asctime: datetime.datetime 

171 message: str 

172 levelno: int 

173 levelname: str 

174 filename: str 

175 pathname: str 

176 lineno: int 

177 funcName: Optional[str] 

178 process: int 

179 processName: str 

180 exc_info: Optional[str] 

181 MDC: Dict[str, str] 

182 

183 class Config: 

184 """Pydantic model configuration.""" 

185 

186 allow_mutation = False 

187 

188 @classmethod 

189 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

190 """Create a new instance from a `~logging.LogRecord`. 

191 

192 Parameters 

193 ---------- 

194 record : `logging.LogRecord` 

195 The record from which to extract the relevant information. 

196 """ 

197 # The properties that are one-to-one mapping. 

198 simple = ( 

199 "name", 

200 "levelno", 

201 "levelname", 

202 "filename", 

203 "pathname", 

204 "lineno", 

205 "funcName", 

206 "process", 

207 "processName", 

208 ) 

209 

210 record_dict = {k: getattr(record, k) for k in simple} 

211 

212 record_dict["message"] = record.getMessage() 

213 

214 # MDC -- ensure the contents are copied to prevent any confusion 

215 # over the MDC global being updated later. 

216 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

217 

218 # Always use UTC because in distributed systems we can't be sure 

219 # what timezone localtime is and it's easier to compare logs if 

220 # every system is using the same time. 

221 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, tz=datetime.timezone.utc) 

222 

223 # Sometimes exception information is included so must be 

224 # extracted. 

225 if record.exc_info: 

226 etype = record.exc_info[0] 

227 evalue = record.exc_info[1] 

228 tb = record.exc_info[2] 

229 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

230 

231 return cls(**record_dict) 

232 

233 def format(self, log_format: Optional[str] = None) -> str: 

234 """Format this record. 

235 

236 Parameters 

237 ---------- 

238 log_format : `str`, optional 

239 The format string to use. This string follows the standard 

240 f-style use for formatting log messages. If `None` 

241 the class default will be used. 

242 

243 Returns 

244 ------- 

245 text : `str` 

246 The formatted log message. 

247 """ 

248 if log_format is None: 

249 log_format = self._log_format 

250 

251 as_dict = self.dict() 

252 

253 # Special case MDC content. Convert it to an MDCDict 

254 # so that missing items do not break formatting. 

255 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

256 

257 as_dict["asctime"] = as_dict["asctime"].isoformat() 

258 formatted = log_format.format(**as_dict) 

259 return formatted 

260 

261 def __str__(self) -> str: 

262 return self.format() 

263 

264 

265# The class below can convert LogRecord to ButlerLogRecord if needed. 

266Record = Union[LogRecord, ButlerLogRecord] 

267 

268 

269# Do not inherit from MutableSequence since mypy insists on the values 

270# being Any even though we wish to constrain them to Record. 

271class ButlerLogRecords(BaseModel): 

272 """Class representing a collection of `ButlerLogRecord`.""" 

273 

274 __root__: List[ButlerLogRecord] 

275 _log_format: Optional[str] = PrivateAttr(None) 

276 

277 @classmethod 

278 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

279 """Create collection from iterable. 

280 

281 Parameters 

282 ---------- 

283 records : iterable of `ButlerLogRecord` 

284 The records to seed this class with. 

285 """ 

286 return cls(__root__=list(records)) 

287 

288 @classmethod 

289 def from_file(cls, filename: str) -> "ButlerLogRecords": 

290 """Read records from file. 

291 

292 Parameters 

293 ---------- 

294 filename : `str` 

295 Name of file containing the JSON records. 

296 

297 Notes 

298 ----- 

299 Works with one-record-per-line format JSON files and a direct 

300 serialization of the Pydantic model. 

301 """ 

302 with open(filename, "r") as fd: 

303 return cls.from_stream(fd) 

304 

305 @staticmethod 

306 def _detect_model(startdata: Union[str, bytes]) -> bool: 

307 """Given some representative data, determine if this is a serialized 

308 model or a streaming format. 

309 

310 Parameters 

311 ---------- 

312 startdata : `bytes` or `str` 

313 Representative characters or bytes from the start of a serialized 

314 collection of log records. 

315 

316 Returns 

317 ------- 

318 is_model : `bool` 

319 Returns `True` if the data look like a serialized pydantic model. 

320 Returns `False` if it looks like a streaming format. Returns 

321 `False` also if an empty string is encountered since this 

322 is not understood by `ButlerLogRecords.parse_raw()`. 

323 

324 Raises 

325 ------ 

326 ValueError 

327 Raised if the sentinel doesn't look like either of the supported 

328 log record formats. 

329 """ 

330 if not startdata: 

331 return False 

332 

333 # Allow byte or str streams since pydantic supports either. 

334 # We don't want to convert the entire input to unicode unnecessarily. 

335 error_type = "str" 

336 if isinstance(startdata, bytes): 

337 first_char = chr(startdata[0]) 

338 error_type = "byte" 

339 else: 

340 first_char = startdata[0] 

341 

342 if first_char == "[": 

343 # This is an array of records. 

344 return True 

345 if first_char != "{": 

346 # Limit the length of string reported in error message in case 

347 # this is an enormous file. 

348 max = 32 

349 if len(startdata) > max: 

350 startdata = f"{startdata[:max]!r}..." 

351 raise ValueError( 

352 "Unrecognized JSON log format. Expected '{' or '[' but got" 

353 f" {first_char!r} from {error_type} content starting with {startdata!r}" 

354 ) 

355 

356 # Assume a record per line. 

357 return False 

358 

359 @classmethod 

360 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

361 """Read records from I/O stream. 

362 

363 Parameters 

364 ---------- 

365 stream : `typing.IO` 

366 Stream from which to read JSON records. 

367 

368 Notes 

369 ----- 

370 Works with one-record-per-line format JSON files and a direct 

371 serialization of the Pydantic model. 

372 """ 

373 first_line = stream.readline() 

374 

375 if not first_line: 

376 # Empty file, return zero records. 

377 return cls.from_records([]) 

378 

379 is_model = cls._detect_model(first_line) 

380 

381 if is_model: 

382 # This is a ButlerLogRecords model serialization so all the 

383 # content must be read first. 

384 all = first_line + stream.read() 

385 return cls.parse_raw(all) 

386 

387 # A stream of records with one record per line. 

388 records = [ButlerLogRecord.parse_raw(first_line)] 

389 for line in stream: 

390 line = line.rstrip() 

391 if line: # Filter out blank lines. 

392 records.append(ButlerLogRecord.parse_raw(line)) 

393 

394 return cls.from_records(records) 

395 

396 @classmethod 

397 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords": 

398 """Parse raw serialized form and return records. 

399 

400 Parameters 

401 ---------- 

402 serialized : `bytes` or `str` 

403 Either the serialized JSON of the model created using 

404 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

405 per line. This can also support a zero-length string. 

406 """ 

407 if not serialized: 

408 # No records to return 

409 return cls.from_records([]) 

410 

411 # Only send the first character for analysis. 

412 is_model = cls._detect_model(serialized) 

413 

414 if is_model: 

415 return cls.parse_raw(serialized) 

416 

417 # Filter out blank lines -- mypy is confused by the newline 

418 # argument to isplit() [which can't have two different types 

419 # simultaneously] so we have to duplicate some logic. 

420 substrings: Iterator[Union[str, bytes]] 

421 if isinstance(serialized, str): 

422 substrings = isplit(serialized, "\n") 

423 elif isinstance(serialized, bytes): 

424 substrings = isplit(serialized, b"\n") 

425 else: 

426 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

427 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

428 

429 return cls.from_records(records) 

430 

431 @property 

432 def log_format(self) -> str: 

433 if self._log_format is None: 

434 return _LONG_LOG_FORMAT 

435 return self._log_format 

436 

437 # Pydantic does not allow a property setter to be given for 

438 # public properties of a model that is not based on a dict. 

439 def set_log_format(self, format: Optional[str]) -> Optional[str]: 

440 """Set the log format string for these records. 

441 

442 Parameters 

443 ---------- 

444 format : `str`, optional 

445 The new format string to use for converting this collection 

446 of records into a string. If `None` the default format will be 

447 used. 

448 

449 Returns 

450 ------- 

451 old_format : `str`, optional 

452 The previous log format. 

453 """ 

454 previous = self._log_format 

455 self._log_format = format 

456 return previous 

457 

458 def __len__(self) -> int: 

459 return len(self.__root__) 

460 

461 # The signature does not match the one in BaseModel but that is okay 

462 # if __root__ is being used. 

463 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

464 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

465 return iter(self.__root__) 

466 

467 def __setitem__(self, index: int, value: Record) -> None: 

468 self.__root__[index] = self._validate_record(value) 

469 

470 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

471 # Handles slices and returns a new collection in that 

472 # case. 

473 item = self.__root__[index] 

474 if isinstance(item, list): 

475 return type(self)(__root__=item) 

476 else: 

477 return item 

478 

479 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

480 return self.__root__.__reversed__() 

481 

482 def __delitem__(self, index: Union[slice, int]) -> None: 

483 del self.__root__[index] 

484 

485 def __str__(self) -> str: 

486 # Ensure that every record uses the same format string. 

487 return "\n".join(record.format(self.log_format) for record in self.__root__) 

488 

489 def _validate_record(self, record: Record) -> ButlerLogRecord: 

490 if isinstance(record, ButlerLogRecord): 

491 pass 

492 elif isinstance(record, LogRecord): 

493 record = ButlerLogRecord.from_record(record) 

494 else: 

495 raise ValueError(f"Can only append item of type {type(record)}") 

496 return record 

497 

498 def insert(self, index: int, value: Record) -> None: 

499 self.__root__.insert(index, self._validate_record(value)) 

500 

501 def append(self, value: Record) -> None: 

502 value = self._validate_record(value) 

503 self.__root__.append(value) 

504 

505 def clear(self) -> None: 

506 self.__root__.clear() 

507 

508 def extend(self, records: Iterable[Record]) -> None: 

509 self.__root__.extend(self._validate_record(record) for record in records) 

510 

511 def pop(self, index: int = -1) -> ButlerLogRecord: 

512 return self.__root__.pop(index) 

513 

514 def reverse(self) -> None: 

515 self.__root__.reverse() 

516 

517 

518class ButlerLogRecordHandler(StreamHandler): 

519 """Python log handler that accumulates records.""" 

520 

521 def __init__(self) -> None: 

522 super().__init__() 

523 self.records = ButlerLogRecords(__root__=[]) 

524 

525 def emit(self, record: LogRecord) -> None: 

526 self.records.append(record) 

527 

528 

529class JsonLogFormatter(Formatter): 

530 """Format a `LogRecord` in JSON format.""" 

531 

532 def format(self, record: LogRecord) -> str: 

533 butler_record = ButlerLogRecord.from_record(record) 

534 return butler_record.json(exclude_unset=True, exclude_defaults=True)