Coverage for python/lsst/daf/butler/core/logging.py: 38%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

206 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("VERBOSE", "ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", 

25 "ButlerLogRecord", "JsonLogFormatter") 

26 

27import logging 

28import datetime 

29import traceback 

30from contextlib import contextmanager 

31from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator 

32 

33from logging import LogRecord, StreamHandler, Formatter 

34from pydantic import BaseModel, PrivateAttr 

35 

36from .utils import isplit 

37 

38VERBOSE = (logging.INFO + logging.DEBUG) // 2 

39"""Verbose log level""" 

40 

41_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

42"""Default format for log records.""" 

43 

44logging.addLevelName(VERBOSE, "VERBOSE") 

45 

46 

47class MDCDict(dict): 

48 """Dictionary for MDC data. 

49 

50 This is internal class used for better formatting of MDC in Python logging 

51 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

52 ``__repr__`` method to produce output better suited for logging records. 

53 """ 

54 

55 def __getitem__(self, name: str) -> str: 

56 """Return value for a given key or empty string for missing key. 

57 """ 

58 return self.get(name, "") 

59 

60 def __str__(self) -> str: 

61 """Return string representation, strings are interpolated without 

62 quotes. 

63 """ 

64 items = (f"{k}={self[k]}" for k in sorted(self)) 

65 return "{" + ", ".join(items) + "}" 

66 

67 def __repr__(self) -> str: 

68 return str(self) 

69 

70 

71class ButlerMDC: 

72 """Handle setting and unsetting of global MDC records. 

73 

74 The Mapped Diagnostic Context (MDC) can be used to set context 

75 for log messages. 

76 

77 Currently there is one global MDC dict. Per-thread MDC is not 

78 yet supported. 

79 """ 

80 

81 _MDC = MDCDict() 

82 

83 _old_factory = None 

84 """Old log record factory.""" 

85 

86 @classmethod 

87 def MDC(cls, key: str, value: str) -> str: 

88 """Set MDC for this key to the supplied value. 

89 

90 Parameters 

91 ---------- 

92 key : `str` 

93 Key to modify. 

94 value : `str` 

95 New value to use. 

96 

97 Returns 

98 ------- 

99 old : `str` 

100 The previous value for this key. 

101 """ 

102 old_value = cls._MDC[key] 

103 cls._MDC[key] = value 

104 return old_value 

105 

106 @classmethod 

107 def MDCRemove(cls, key: str) -> None: 

108 """Clear the MDC value associated with this key. 

109 

110 Can be called even if the key is not known to MDC. 

111 """ 

112 cls._MDC.pop(key, None) 

113 

114 @classmethod 

115 @contextmanager 

116 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]: 

117 """Set the MDC key for this context. 

118 

119 Parameters 

120 ---------- 

121 mdc : `dict` of `str`, `str` 

122 MDC keys to update temporarily. 

123 

124 Notes 

125 ----- 

126 Other MDC keys are not modified. The previous values are restored 

127 on exit (removing them if the were unset previously). 

128 """ 

129 previous = {} 

130 for k, v in mdc.items(): 

131 previous[k] = cls.MDC(k, v) 

132 

133 try: 

134 yield 

135 finally: 

136 for k, v in previous.items(): 

137 if not v: 

138 cls.MDCRemove(k) 

139 else: 

140 cls.MDC(k, v) 

141 

142 @classmethod 

143 def add_mdc_log_record_factory(cls) -> None: 

144 """Add a log record factory that adds a MDC record to `LogRecord`. 

145 """ 

146 old_factory = logging.getLogRecordFactory() 

147 

148 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

149 record = old_factory(*args, **kwargs) 

150 # Make sure we send a copy of the global dict in the record. 

151 record.MDC = MDCDict(cls._MDC) # type: ignore 

152 return record 

153 

154 cls._old_factory = old_factory 

155 logging.setLogRecordFactory(record_factory) 

156 

157 @classmethod 

158 def restore_log_record_factory(cls) -> None: 

159 """Restores the log record factory to the original form. 

160 

161 Does nothing if there has not been a call to 

162 `add_mdc_log_record_factory`. 

163 """ 

164 if cls._old_factory: 

165 logging.setLogRecordFactory(cls._old_factory) 

166 

167 

168class ButlerLogRecord(BaseModel): 

169 """A model representing a `logging.LogRecord`. 

170 

171 A `~logging.LogRecord` always uses the current time in its record 

172 when recreated and that makes it impossible to use it as a 

173 serialization format. Instead have a local representation of a 

174 `~logging.LogRecord` that matches Butler needs. 

175 """ 

176 

177 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

178 

179 name: str 

180 asctime: datetime.datetime 

181 message: str 

182 levelno: int 

183 levelname: str 

184 filename: str 

185 pathname: str 

186 lineno: int 

187 funcName: Optional[str] 

188 process: int 

189 processName: str 

190 exc_info: Optional[str] 

191 MDC: Dict[str, str] 

192 

193 class Config: 

194 """Pydantic model configuration.""" 

195 

196 allow_mutation = False 

197 

198 @classmethod 

199 def from_record(cls, record: LogRecord) -> ButlerLogRecord: 

200 """Create a new instance from a `~logging.LogRecord`. 

201 

202 Parameters 

203 ---------- 

204 record : `logging.LogRecord` 

205 The record from which to extract the relevant information. 

206 """ 

207 # The properties that are one-to-one mapping. 

208 simple = ("name", "levelno", "levelname", "filename", "pathname", 

209 "lineno", "funcName", "process", "processName") 

210 

211 record_dict = {k: getattr(record, k) for k in simple} 

212 

213 record_dict["message"] = record.getMessage() 

214 

215 # MDC -- ensure the contents are copied to prevent any confusion 

216 # over the MDC global being updated later. 

217 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

218 

219 # Always use UTC because in distributed systems we can't be sure 

220 # what timezone localtime is and it's easier to compare logs if 

221 # every system is using the same time. 

222 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, 

223 tz=datetime.timezone.utc) 

224 

225 # Sometimes exception information is included so must be 

226 # extracted. 

227 if record.exc_info: 

228 etype = record.exc_info[0] 

229 evalue = record.exc_info[1] 

230 tb = record.exc_info[2] 

231 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

232 

233 return cls(**record_dict) 

234 

235 def format(self, log_format: Optional[str] = None) -> str: 

236 """Format this record. 

237 

238 Parameters 

239 ---------- 

240 log_format : `str`, optional 

241 The format string to use. This string follows the standard 

242 f-style use for formatting log messages. If `None` 

243 the class default will be used. 

244 

245 Returns 

246 ------- 

247 text : `str` 

248 The formatted log message. 

249 """ 

250 if log_format is None: 

251 log_format = self._log_format 

252 

253 as_dict = self.dict() 

254 

255 # Special case MDC content. Convert it to an MDCDict 

256 # so that missing items do not break formatting. 

257 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

258 

259 as_dict["asctime"] = as_dict["asctime"].isoformat() 

260 formatted = log_format.format(**as_dict) 

261 return formatted 

262 

263 def __str__(self) -> str: 

264 return self.format() 

265 

266 

267# The class below can convert LogRecord to ButlerLogRecord if needed. 

268Record = Union[LogRecord, ButlerLogRecord] 

269 

270 

271# Do not inherit from MutableSequence since mypy insists on the values 

272# being Any even though we wish to constrain them to Record. 

273class ButlerLogRecords(BaseModel): 

274 """Class representing a collection of `ButlerLogRecord`. 

275 """ 

276 

277 __root__: List[ButlerLogRecord] 

278 _log_format: Optional[str] = PrivateAttr(None) 

279 

280 @classmethod 

281 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords: 

282 """Create collection from iterable. 

283 

284 Parameters 

285 ---------- 

286 records : iterable of `ButlerLogRecord` 

287 The records to seed this class with. 

288 """ 

289 return cls(__root__=list(records)) 

290 

291 @classmethod 

292 def from_file(cls, filename: str) -> ButlerLogRecords: 

293 """Read records from file. 

294 

295 Parameters 

296 ---------- 

297 filename : `str` 

298 Name of file containing the JSON records. 

299 

300 Notes 

301 ----- 

302 Works with one-record-per-line format JSON files and a direct 

303 serialization of the Pydantic model. 

304 """ 

305 with open(filename, "r") as fd: 

306 return cls.from_stream(fd) 

307 

308 @staticmethod 

309 def _detect_model(startdata: Union[str, bytes]) -> bool: 

310 """Given some representative data, determine if this is a serialized 

311 model or a streaming format. 

312 

313 Parameters 

314 ---------- 

315 startdata : `bytes` or `str` 

316 Representative characters or bytes from the start of a serialized 

317 collection of log records. 

318 

319 Returns 

320 ------- 

321 is_model : `bool` 

322 Returns `True` if the data look like a serialized pydantic model. 

323 Returns `False` if it looks like a streaming format. Returns 

324 `False` also if an empty string is encountered since this 

325 is not understood by `ButlerLogRecords.parse_raw()`. 

326 

327 Raises 

328 ------ 

329 ValueError 

330 Raised if the sentinel doesn't look like either of the supported 

331 log record formats. 

332 """ 

333 if not startdata: 

334 return False 

335 

336 # Allow byte or str streams since pydantic supports either. 

337 # We don't want to convert the entire input to unicode unnecessarily. 

338 error_type = "str" 

339 if isinstance(startdata, bytes): 

340 first_char = chr(startdata[0]) 

341 error_type = "byte" 

342 else: 

343 first_char = startdata[0] 

344 

345 if first_char == "[": 

346 # This is an array of records. 

347 return True 

348 if first_char != "{": 

349 # Limit the length of string reported in error message in case 

350 # this is an enormous file. 

351 max = 32 

352 if len(startdata) > max: 

353 startdata = f"{startdata[:max]!r}..." 

354 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got" 

355 f" {first_char!r} from {error_type} content starting with {startdata!r}") 

356 

357 # Assume a record per line. 

358 return False 

359 

360 @classmethod 

361 def from_stream(cls, stream: IO) -> ButlerLogRecords: 

362 """Read records from I/O stream. 

363 

364 Parameters 

365 ---------- 

366 stream : `typing.IO` 

367 Stream from which to read JSON records. 

368 

369 Notes 

370 ----- 

371 Works with one-record-per-line format JSON files and a direct 

372 serialization of the Pydantic model. 

373 """ 

374 first_line = stream.readline() 

375 

376 if not first_line: 

377 # Empty file, return zero records. 

378 return cls.from_records([]) 

379 

380 is_model = cls._detect_model(first_line) 

381 

382 if is_model: 

383 # This is a ButlerLogRecords model serialization so all the 

384 # content must be read first. 

385 all = first_line + stream.read() 

386 return cls.parse_raw(all) 

387 

388 # A stream of records with one record per line. 

389 records = [ButlerLogRecord.parse_raw(first_line)] 

390 for line in stream: 

391 line = line.rstrip() 

392 if line: # Filter out blank lines. 

393 records.append(ButlerLogRecord.parse_raw(line)) 

394 

395 return cls.from_records(records) 

396 

397 @classmethod 

398 def from_raw(cls, serialized: Union[str, bytes]) -> ButlerLogRecords: 

399 """Parse raw serialized form and return records. 

400 

401 Parameters 

402 ---------- 

403 serialized : `bytes` or `str` 

404 Either the serialized JSON of the model created using 

405 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

406 per line. This can also support a zero-length string. 

407 """ 

408 if not serialized: 

409 # No records to return 

410 return cls.from_records([]) 

411 

412 # Only send the first character for analysis. 

413 is_model = cls._detect_model(serialized) 

414 

415 if is_model: 

416 return cls.parse_raw(serialized) 

417 

418 # Filter out blank lines -- mypy is confused by the newline 

419 # argument to split(). 

420 newline = "\n" if isinstance(serialized, str) else b"\n" 

421 records = [ButlerLogRecord.parse_raw(line) for line in isplit(serialized, newline) # type: ignore 

422 if line] 

423 return cls.from_records(records) 

424 

425 @property 

426 def log_format(self) -> str: 

427 if self._log_format is None: 

428 return _LONG_LOG_FORMAT 

429 return self._log_format 

430 

431 # Pydantic does not allow a property setter to be given for 

432 # public properties of a model that is not based on a dict. 

433 def set_log_format(self, format: Optional[str]) -> Optional[str]: 

434 """Set the log format string for these records. 

435 

436 Parameters 

437 ---------- 

438 format : `str`, optional 

439 The new format string to use for converting this collection 

440 of records into a string. If `None` the default format will be 

441 used. 

442 

443 Returns 

444 ------- 

445 old_format : `str`, optional 

446 The previous log format. 

447 """ 

448 previous = self._log_format 

449 self._log_format = format 

450 return previous 

451 

452 def __len__(self) -> int: 

453 return len(self.__root__) 

454 

455 # The signature does not match the one in BaseModel but that is okay 

456 # if __root__ is being used. 

457 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

458 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

459 return iter(self.__root__) 

460 

461 def __setitem__(self, index: int, value: Record) -> None: 

462 self.__root__[index] = self._validate_record(value) 

463 

464 def __getitem__(self, index: Union[slice, int]) -> Union[ButlerLogRecords, ButlerLogRecord]: 

465 # Handles slices and returns a new collection in that 

466 # case. 

467 item = self.__root__[index] 

468 if isinstance(item, list): 

469 return type(self)(__root__=item) 

470 else: 

471 return item 

472 

473 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

474 return self.__root__.__reversed__() 

475 

476 def __delitem__(self, index: Union[slice, int]) -> None: 

477 del self.__root__[index] 

478 

479 def __str__(self) -> str: 

480 # Ensure that every record uses the same format string. 

481 return "\n".join(record.format(self.log_format) for record in self.__root__) 

482 

483 def _validate_record(self, record: Record) -> ButlerLogRecord: 

484 if isinstance(record, ButlerLogRecord): 

485 pass 

486 elif isinstance(record, LogRecord): 

487 record = ButlerLogRecord.from_record(record) 

488 else: 

489 raise ValueError(f"Can only append item of type {type(record)}") 

490 return record 

491 

492 def insert(self, index: int, value: Record) -> None: 

493 self.__root__.insert(index, self._validate_record(value)) 

494 

495 def append(self, value: Record) -> None: 

496 value = self._validate_record(value) 

497 self.__root__.append(value) 

498 

499 def clear(self) -> None: 

500 self.__root__.clear() 

501 

502 def extend(self, records: Iterable[Record]) -> None: 

503 self.__root__.extend(self._validate_record(record) for record in records) 

504 

505 def pop(self, index: int = -1) -> ButlerLogRecord: 

506 return self.__root__.pop(index) 

507 

508 def reverse(self) -> None: 

509 self.__root__.reverse() 

510 

511 

512class ButlerLogRecordHandler(StreamHandler): 

513 """Python log handler that accumulates records. 

514 """ 

515 

516 def __init__(self) -> None: 

517 super().__init__() 

518 self.records = ButlerLogRecords(__root__=[]) 

519 

520 def emit(self, record: LogRecord) -> None: 

521 self.records.append(record) 

522 

523 

524class JsonLogFormatter(Formatter): 

525 """Format a `LogRecord` in JSON format.""" 

526 

527 def format(self, record: LogRecord) -> str: 

528 butler_record = ButlerLogRecord.from_record(record) 

529 return butler_record.json(exclude_unset=True, exclude_defaults=True)