Coverage for python/lsst/daf/butler/core/logging.py: 33%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

208 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", 

23 "ButlerLogRecord", "JsonLogFormatter") 

24 

25import logging 

26import datetime 

27import traceback 

28from contextlib import contextmanager 

29from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator 

30 

31from logging import LogRecord, StreamHandler, Formatter 

32from pydantic import BaseModel, PrivateAttr 

33 

34from lsst.utils.iteration import isplit 

35from lsst.utils.introspection import get_full_type_name 

36 

37_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

38"""Default format for log records.""" 

39 

40 

41class MDCDict(dict): 

42 """Dictionary for MDC data. 

43 

44 This is internal class used for better formatting of MDC in Python logging 

45 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

46 ``__repr__`` method to produce output better suited for logging records. 

47 """ 

48 

49 def __getitem__(self, name: str) -> str: 

50 """Return value for a given key or empty string for missing key. 

51 """ 

52 return self.get(name, "") 

53 

54 def __str__(self) -> str: 

55 """Return string representation, strings are interpolated without 

56 quotes. 

57 """ 

58 items = (f"{k}={self[k]}" for k in sorted(self)) 

59 return "{" + ", ".join(items) + "}" 

60 

61 def __repr__(self) -> str: 

62 return str(self) 

63 

64 

65class ButlerMDC: 

66 """Handle setting and unsetting of global MDC records. 

67 

68 The Mapped Diagnostic Context (MDC) can be used to set context 

69 for log messages. 

70 

71 Currently there is one global MDC dict. Per-thread MDC is not 

72 yet supported. 

73 """ 

74 

75 _MDC = MDCDict() 

76 

77 _old_factory = None 

78 """Old log record factory.""" 

79 

80 @classmethod 

81 def MDC(cls, key: str, value: str) -> str: 

82 """Set MDC for this key to the supplied value. 

83 

84 Parameters 

85 ---------- 

86 key : `str` 

87 Key to modify. 

88 value : `str` 

89 New value to use. 

90 

91 Returns 

92 ------- 

93 old : `str` 

94 The previous value for this key. 

95 """ 

96 old_value = cls._MDC[key] 

97 cls._MDC[key] = value 

98 return old_value 

99 

100 @classmethod 

101 def MDCRemove(cls, key: str) -> None: 

102 """Clear the MDC value associated with this key. 

103 

104 Can be called even if the key is not known to MDC. 

105 """ 

106 cls._MDC.pop(key, None) 

107 

108 @classmethod 

109 @contextmanager 

110 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]: 

111 """Set the MDC key for this context. 

112 

113 Parameters 

114 ---------- 

115 mdc : `dict` of `str`, `str` 

116 MDC keys to update temporarily. 

117 

118 Notes 

119 ----- 

120 Other MDC keys are not modified. The previous values are restored 

121 on exit (removing them if the were unset previously). 

122 """ 

123 previous = {} 

124 for k, v in mdc.items(): 

125 previous[k] = cls.MDC(k, v) 

126 

127 try: 

128 yield 

129 finally: 

130 for k, v in previous.items(): 

131 if not v: 

132 cls.MDCRemove(k) 

133 else: 

134 cls.MDC(k, v) 

135 

136 @classmethod 

137 def add_mdc_log_record_factory(cls) -> None: 

138 """Add a log record factory that adds a MDC record to `LogRecord`. 

139 """ 

140 old_factory = logging.getLogRecordFactory() 

141 

142 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

143 record = old_factory(*args, **kwargs) 

144 # Make sure we send a copy of the global dict in the record. 

145 record.MDC = MDCDict(cls._MDC) # type: ignore 

146 return record 

147 

148 cls._old_factory = old_factory 

149 logging.setLogRecordFactory(record_factory) 

150 

151 @classmethod 

152 def restore_log_record_factory(cls) -> None: 

153 """Restores the log record factory to the original form. 

154 

155 Does nothing if there has not been a call to 

156 `add_mdc_log_record_factory`. 

157 """ 

158 if cls._old_factory: 

159 logging.setLogRecordFactory(cls._old_factory) 

160 

161 

162class ButlerLogRecord(BaseModel): 

163 """A model representing a `logging.LogRecord`. 

164 

165 A `~logging.LogRecord` always uses the current time in its record 

166 when recreated and that makes it impossible to use it as a 

167 serialization format. Instead have a local representation of a 

168 `~logging.LogRecord` that matches Butler needs. 

169 """ 

170 

171 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

172 

173 name: str 

174 asctime: datetime.datetime 

175 message: str 

176 levelno: int 

177 levelname: str 

178 filename: str 

179 pathname: str 

180 lineno: int 

181 funcName: Optional[str] 

182 process: int 

183 processName: str 

184 exc_info: Optional[str] 

185 MDC: Dict[str, str] 

186 

187 class Config: 

188 """Pydantic model configuration.""" 

189 

190 allow_mutation = False 

191 

192 @classmethod 

193 def from_record(cls, record: LogRecord) -> "ButlerLogRecord": 

194 """Create a new instance from a `~logging.LogRecord`. 

195 

196 Parameters 

197 ---------- 

198 record : `logging.LogRecord` 

199 The record from which to extract the relevant information. 

200 """ 

201 # The properties that are one-to-one mapping. 

202 simple = ("name", "levelno", "levelname", "filename", "pathname", 

203 "lineno", "funcName", "process", "processName") 

204 

205 record_dict = {k: getattr(record, k) for k in simple} 

206 

207 record_dict["message"] = record.getMessage() 

208 

209 # MDC -- ensure the contents are copied to prevent any confusion 

210 # over the MDC global being updated later. 

211 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

212 

213 # Always use UTC because in distributed systems we can't be sure 

214 # what timezone localtime is and it's easier to compare logs if 

215 # every system is using the same time. 

216 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, 

217 tz=datetime.timezone.utc) 

218 

219 # Sometimes exception information is included so must be 

220 # extracted. 

221 if record.exc_info: 

222 etype = record.exc_info[0] 

223 evalue = record.exc_info[1] 

224 tb = record.exc_info[2] 

225 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

226 

227 return cls(**record_dict) 

228 

229 def format(self, log_format: Optional[str] = None) -> str: 

230 """Format this record. 

231 

232 Parameters 

233 ---------- 

234 log_format : `str`, optional 

235 The format string to use. This string follows the standard 

236 f-style use for formatting log messages. If `None` 

237 the class default will be used. 

238 

239 Returns 

240 ------- 

241 text : `str` 

242 The formatted log message. 

243 """ 

244 if log_format is None: 

245 log_format = self._log_format 

246 

247 as_dict = self.dict() 

248 

249 # Special case MDC content. Convert it to an MDCDict 

250 # so that missing items do not break formatting. 

251 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

252 

253 as_dict["asctime"] = as_dict["asctime"].isoformat() 

254 formatted = log_format.format(**as_dict) 

255 return formatted 

256 

257 def __str__(self) -> str: 

258 return self.format() 

259 

260 

261# The class below can convert LogRecord to ButlerLogRecord if needed. 

262Record = Union[LogRecord, ButlerLogRecord] 

263 

264 

265# Do not inherit from MutableSequence since mypy insists on the values 

266# being Any even though we wish to constrain them to Record. 

267class ButlerLogRecords(BaseModel): 

268 """Class representing a collection of `ButlerLogRecord`. 

269 """ 

270 

271 __root__: List[ButlerLogRecord] 

272 _log_format: Optional[str] = PrivateAttr(None) 

273 

274 @classmethod 

275 def from_records(cls, records: Iterable[ButlerLogRecord]) -> "ButlerLogRecords": 

276 """Create collection from iterable. 

277 

278 Parameters 

279 ---------- 

280 records : iterable of `ButlerLogRecord` 

281 The records to seed this class with. 

282 """ 

283 return cls(__root__=list(records)) 

284 

285 @classmethod 

286 def from_file(cls, filename: str) -> "ButlerLogRecords": 

287 """Read records from file. 

288 

289 Parameters 

290 ---------- 

291 filename : `str` 

292 Name of file containing the JSON records. 

293 

294 Notes 

295 ----- 

296 Works with one-record-per-line format JSON files and a direct 

297 serialization of the Pydantic model. 

298 """ 

299 with open(filename, "r") as fd: 

300 return cls.from_stream(fd) 

301 

302 @staticmethod 

303 def _detect_model(startdata: Union[str, bytes]) -> bool: 

304 """Given some representative data, determine if this is a serialized 

305 model or a streaming format. 

306 

307 Parameters 

308 ---------- 

309 startdata : `bytes` or `str` 

310 Representative characters or bytes from the start of a serialized 

311 collection of log records. 

312 

313 Returns 

314 ------- 

315 is_model : `bool` 

316 Returns `True` if the data look like a serialized pydantic model. 

317 Returns `False` if it looks like a streaming format. Returns 

318 `False` also if an empty string is encountered since this 

319 is not understood by `ButlerLogRecords.parse_raw()`. 

320 

321 Raises 

322 ------ 

323 ValueError 

324 Raised if the sentinel doesn't look like either of the supported 

325 log record formats. 

326 """ 

327 if not startdata: 

328 return False 

329 

330 # Allow byte or str streams since pydantic supports either. 

331 # We don't want to convert the entire input to unicode unnecessarily. 

332 error_type = "str" 

333 if isinstance(startdata, bytes): 

334 first_char = chr(startdata[0]) 

335 error_type = "byte" 

336 else: 

337 first_char = startdata[0] 

338 

339 if first_char == "[": 

340 # This is an array of records. 

341 return True 

342 if first_char != "{": 

343 # Limit the length of string reported in error message in case 

344 # this is an enormous file. 

345 max = 32 

346 if len(startdata) > max: 

347 startdata = f"{startdata[:max]!r}..." 

348 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got" 

349 f" {first_char!r} from {error_type} content starting with {startdata!r}") 

350 

351 # Assume a record per line. 

352 return False 

353 

354 @classmethod 

355 def from_stream(cls, stream: IO) -> "ButlerLogRecords": 

356 """Read records from I/O stream. 

357 

358 Parameters 

359 ---------- 

360 stream : `typing.IO` 

361 Stream from which to read JSON records. 

362 

363 Notes 

364 ----- 

365 Works with one-record-per-line format JSON files and a direct 

366 serialization of the Pydantic model. 

367 """ 

368 first_line = stream.readline() 

369 

370 if not first_line: 

371 # Empty file, return zero records. 

372 return cls.from_records([]) 

373 

374 is_model = cls._detect_model(first_line) 

375 

376 if is_model: 

377 # This is a ButlerLogRecords model serialization so all the 

378 # content must be read first. 

379 all = first_line + stream.read() 

380 return cls.parse_raw(all) 

381 

382 # A stream of records with one record per line. 

383 records = [ButlerLogRecord.parse_raw(first_line)] 

384 for line in stream: 

385 line = line.rstrip() 

386 if line: # Filter out blank lines. 

387 records.append(ButlerLogRecord.parse_raw(line)) 

388 

389 return cls.from_records(records) 

390 

391 @classmethod 

392 def from_raw(cls, serialized: Union[str, bytes]) -> "ButlerLogRecords": 

393 """Parse raw serialized form and return records. 

394 

395 Parameters 

396 ---------- 

397 serialized : `bytes` or `str` 

398 Either the serialized JSON of the model created using 

399 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

400 per line. This can also support a zero-length string. 

401 """ 

402 if not serialized: 

403 # No records to return 

404 return cls.from_records([]) 

405 

406 # Only send the first character for analysis. 

407 is_model = cls._detect_model(serialized) 

408 

409 if is_model: 

410 return cls.parse_raw(serialized) 

411 

412 # Filter out blank lines -- mypy is confused by the newline 

413 # argument to isplit() [which can't have two different types 

414 # simultaneously] so we have to duplicate some logic. 

415 substrings: Iterator[Union[str, bytes]] 

416 if isinstance(serialized, str): 

417 substrings = isplit(serialized, "\n") 

418 elif isinstance(serialized, bytes): 

419 substrings = isplit(serialized, b"\n") 

420 else: 

421 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

422 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

423 

424 return cls.from_records(records) 

425 

426 @property 

427 def log_format(self) -> str: 

428 if self._log_format is None: 

429 return _LONG_LOG_FORMAT 

430 return self._log_format 

431 

432 # Pydantic does not allow a property setter to be given for 

433 # public properties of a model that is not based on a dict. 

434 def set_log_format(self, format: Optional[str]) -> Optional[str]: 

435 """Set the log format string for these records. 

436 

437 Parameters 

438 ---------- 

439 format : `str`, optional 

440 The new format string to use for converting this collection 

441 of records into a string. If `None` the default format will be 

442 used. 

443 

444 Returns 

445 ------- 

446 old_format : `str`, optional 

447 The previous log format. 

448 """ 

449 previous = self._log_format 

450 self._log_format = format 

451 return previous 

452 

453 def __len__(self) -> int: 

454 return len(self.__root__) 

455 

456 # The signature does not match the one in BaseModel but that is okay 

457 # if __root__ is being used. 

458 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

459 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

460 return iter(self.__root__) 

461 

462 def __setitem__(self, index: int, value: Record) -> None: 

463 self.__root__[index] = self._validate_record(value) 

464 

465 def __getitem__(self, index: Union[slice, int]) -> "Union[ButlerLogRecords, ButlerLogRecord]": 

466 # Handles slices and returns a new collection in that 

467 # case. 

468 item = self.__root__[index] 

469 if isinstance(item, list): 

470 return type(self)(__root__=item) 

471 else: 

472 return item 

473 

474 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

475 return self.__root__.__reversed__() 

476 

477 def __delitem__(self, index: Union[slice, int]) -> None: 

478 del self.__root__[index] 

479 

480 def __str__(self) -> str: 

481 # Ensure that every record uses the same format string. 

482 return "\n".join(record.format(self.log_format) for record in self.__root__) 

483 

484 def _validate_record(self, record: Record) -> ButlerLogRecord: 

485 if isinstance(record, ButlerLogRecord): 

486 pass 

487 elif isinstance(record, LogRecord): 

488 record = ButlerLogRecord.from_record(record) 

489 else: 

490 raise ValueError(f"Can only append item of type {type(record)}") 

491 return record 

492 

493 def insert(self, index: int, value: Record) -> None: 

494 self.__root__.insert(index, self._validate_record(value)) 

495 

496 def append(self, value: Record) -> None: 

497 value = self._validate_record(value) 

498 self.__root__.append(value) 

499 

500 def clear(self) -> None: 

501 self.__root__.clear() 

502 

503 def extend(self, records: Iterable[Record]) -> None: 

504 self.__root__.extend(self._validate_record(record) for record in records) 

505 

506 def pop(self, index: int = -1) -> ButlerLogRecord: 

507 return self.__root__.pop(index) 

508 

509 def reverse(self) -> None: 

510 self.__root__.reverse() 

511 

512 

513class ButlerLogRecordHandler(StreamHandler): 

514 """Python log handler that accumulates records. 

515 """ 

516 

517 def __init__(self) -> None: 

518 super().__init__() 

519 self.records = ButlerLogRecords(__root__=[]) 

520 

521 def emit(self, record: LogRecord) -> None: 

522 self.records.append(record) 

523 

524 

525class JsonLogFormatter(Formatter): 

526 """Format a `LogRecord` in JSON format.""" 

527 

528 def format(self, record: LogRecord) -> str: 

529 butler_record = ButlerLogRecord.from_record(record) 

530 return butler_record.json(exclude_unset=True, exclude_defaults=True)