Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerMDC", "ButlerLogRecords", "ButlerLogRecordHandler", 

25 "ButlerLogRecord", "JsonLogFormatter") 

26 

27import logging 

28import datetime 

29import traceback 

30from contextlib import contextmanager 

31from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO, Any, Generator 

32 

33from logging import LogRecord, StreamHandler, Formatter 

34from pydantic import BaseModel, PrivateAttr 

35 

36from lsst.utils.iteration import isplit 

37from lsst.utils.introspection import get_full_type_name 

38 

39_LONG_LOG_FORMAT = "{levelname} {asctime} {name} {filename}:{lineno} - {message}" 

40"""Default format for log records.""" 

41 

42 

43class MDCDict(dict): 

44 """Dictionary for MDC data. 

45 

46 This is internal class used for better formatting of MDC in Python logging 

47 output. It behaves like `defaultdict(str)` but overrides ``__str__`` and 

48 ``__repr__`` method to produce output better suited for logging records. 

49 """ 

50 

51 def __getitem__(self, name: str) -> str: 

52 """Return value for a given key or empty string for missing key. 

53 """ 

54 return self.get(name, "") 

55 

56 def __str__(self) -> str: 

57 """Return string representation, strings are interpolated without 

58 quotes. 

59 """ 

60 items = (f"{k}={self[k]}" for k in sorted(self)) 

61 return "{" + ", ".join(items) + "}" 

62 

63 def __repr__(self) -> str: 

64 return str(self) 

65 

66 

67class ButlerMDC: 

68 """Handle setting and unsetting of global MDC records. 

69 

70 The Mapped Diagnostic Context (MDC) can be used to set context 

71 for log messages. 

72 

73 Currently there is one global MDC dict. Per-thread MDC is not 

74 yet supported. 

75 """ 

76 

77 _MDC = MDCDict() 

78 

79 _old_factory = None 

80 """Old log record factory.""" 

81 

82 @classmethod 

83 def MDC(cls, key: str, value: str) -> str: 

84 """Set MDC for this key to the supplied value. 

85 

86 Parameters 

87 ---------- 

88 key : `str` 

89 Key to modify. 

90 value : `str` 

91 New value to use. 

92 

93 Returns 

94 ------- 

95 old : `str` 

96 The previous value for this key. 

97 """ 

98 old_value = cls._MDC[key] 

99 cls._MDC[key] = value 

100 return old_value 

101 

102 @classmethod 

103 def MDCRemove(cls, key: str) -> None: 

104 """Clear the MDC value associated with this key. 

105 

106 Can be called even if the key is not known to MDC. 

107 """ 

108 cls._MDC.pop(key, None) 

109 

110 @classmethod 

111 @contextmanager 

112 def set_mdc(cls, mdc: Dict[str, str]) -> Generator[None, None, None]: 

113 """Set the MDC key for this context. 

114 

115 Parameters 

116 ---------- 

117 mdc : `dict` of `str`, `str` 

118 MDC keys to update temporarily. 

119 

120 Notes 

121 ----- 

122 Other MDC keys are not modified. The previous values are restored 

123 on exit (removing them if the were unset previously). 

124 """ 

125 previous = {} 

126 for k, v in mdc.items(): 

127 previous[k] = cls.MDC(k, v) 

128 

129 try: 

130 yield 

131 finally: 

132 for k, v in previous.items(): 

133 if not v: 

134 cls.MDCRemove(k) 

135 else: 

136 cls.MDC(k, v) 

137 

138 @classmethod 

139 def add_mdc_log_record_factory(cls) -> None: 

140 """Add a log record factory that adds a MDC record to `LogRecord`. 

141 """ 

142 old_factory = logging.getLogRecordFactory() 

143 

144 def record_factory(*args: Any, **kwargs: Any) -> LogRecord: 

145 record = old_factory(*args, **kwargs) 

146 # Make sure we send a copy of the global dict in the record. 

147 record.MDC = MDCDict(cls._MDC) # type: ignore 

148 return record 

149 

150 cls._old_factory = old_factory 

151 logging.setLogRecordFactory(record_factory) 

152 

153 @classmethod 

154 def restore_log_record_factory(cls) -> None: 

155 """Restores the log record factory to the original form. 

156 

157 Does nothing if there has not been a call to 

158 `add_mdc_log_record_factory`. 

159 """ 

160 if cls._old_factory: 

161 logging.setLogRecordFactory(cls._old_factory) 

162 

163 

164class ButlerLogRecord(BaseModel): 

165 """A model representing a `logging.LogRecord`. 

166 

167 A `~logging.LogRecord` always uses the current time in its record 

168 when recreated and that makes it impossible to use it as a 

169 serialization format. Instead have a local representation of a 

170 `~logging.LogRecord` that matches Butler needs. 

171 """ 

172 

173 _log_format: ClassVar[str] = _LONG_LOG_FORMAT 

174 

175 name: str 

176 asctime: datetime.datetime 

177 message: str 

178 levelno: int 

179 levelname: str 

180 filename: str 

181 pathname: str 

182 lineno: int 

183 funcName: Optional[str] 

184 process: int 

185 processName: str 

186 exc_info: Optional[str] 

187 MDC: Dict[str, str] 

188 

189 class Config: 

190 """Pydantic model configuration.""" 

191 

192 allow_mutation = False 

193 

194 @classmethod 

195 def from_record(cls, record: LogRecord) -> ButlerLogRecord: 

196 """Create a new instance from a `~logging.LogRecord`. 

197 

198 Parameters 

199 ---------- 

200 record : `logging.LogRecord` 

201 The record from which to extract the relevant information. 

202 """ 

203 # The properties that are one-to-one mapping. 

204 simple = ("name", "levelno", "levelname", "filename", "pathname", 

205 "lineno", "funcName", "process", "processName") 

206 

207 record_dict = {k: getattr(record, k) for k in simple} 

208 

209 record_dict["message"] = record.getMessage() 

210 

211 # MDC -- ensure the contents are copied to prevent any confusion 

212 # over the MDC global being updated later. 

213 record_dict["MDC"] = dict(getattr(record, "MDC", {})) 

214 

215 # Always use UTC because in distributed systems we can't be sure 

216 # what timezone localtime is and it's easier to compare logs if 

217 # every system is using the same time. 

218 record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created, 

219 tz=datetime.timezone.utc) 

220 

221 # Sometimes exception information is included so must be 

222 # extracted. 

223 if record.exc_info: 

224 etype = record.exc_info[0] 

225 evalue = record.exc_info[1] 

226 tb = record.exc_info[2] 

227 record_dict["exc_info"] = "\n".join(traceback.format_exception(etype, evalue, tb)) 

228 

229 return cls(**record_dict) 

230 

231 def format(self, log_format: Optional[str] = None) -> str: 

232 """Format this record. 

233 

234 Parameters 

235 ---------- 

236 log_format : `str`, optional 

237 The format string to use. This string follows the standard 

238 f-style use for formatting log messages. If `None` 

239 the class default will be used. 

240 

241 Returns 

242 ------- 

243 text : `str` 

244 The formatted log message. 

245 """ 

246 if log_format is None: 

247 log_format = self._log_format 

248 

249 as_dict = self.dict() 

250 

251 # Special case MDC content. Convert it to an MDCDict 

252 # so that missing items do not break formatting. 

253 as_dict["MDC"] = MDCDict(as_dict["MDC"]) 

254 

255 as_dict["asctime"] = as_dict["asctime"].isoformat() 

256 formatted = log_format.format(**as_dict) 

257 return formatted 

258 

259 def __str__(self) -> str: 

260 return self.format() 

261 

262 

263# The class below can convert LogRecord to ButlerLogRecord if needed. 

264Record = Union[LogRecord, ButlerLogRecord] 

265 

266 

267# Do not inherit from MutableSequence since mypy insists on the values 

268# being Any even though we wish to constrain them to Record. 

269class ButlerLogRecords(BaseModel): 

270 """Class representing a collection of `ButlerLogRecord`. 

271 """ 

272 

273 __root__: List[ButlerLogRecord] 

274 _log_format: Optional[str] = PrivateAttr(None) 

275 

276 @classmethod 

277 def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords: 

278 """Create collection from iterable. 

279 

280 Parameters 

281 ---------- 

282 records : iterable of `ButlerLogRecord` 

283 The records to seed this class with. 

284 """ 

285 return cls(__root__=list(records)) 

286 

287 @classmethod 

288 def from_file(cls, filename: str) -> ButlerLogRecords: 

289 """Read records from file. 

290 

291 Parameters 

292 ---------- 

293 filename : `str` 

294 Name of file containing the JSON records. 

295 

296 Notes 

297 ----- 

298 Works with one-record-per-line format JSON files and a direct 

299 serialization of the Pydantic model. 

300 """ 

301 with open(filename, "r") as fd: 

302 return cls.from_stream(fd) 

303 

304 @staticmethod 

305 def _detect_model(startdata: Union[str, bytes]) -> bool: 

306 """Given some representative data, determine if this is a serialized 

307 model or a streaming format. 

308 

309 Parameters 

310 ---------- 

311 startdata : `bytes` or `str` 

312 Representative characters or bytes from the start of a serialized 

313 collection of log records. 

314 

315 Returns 

316 ------- 

317 is_model : `bool` 

318 Returns `True` if the data look like a serialized pydantic model. 

319 Returns `False` if it looks like a streaming format. Returns 

320 `False` also if an empty string is encountered since this 

321 is not understood by `ButlerLogRecords.parse_raw()`. 

322 

323 Raises 

324 ------ 

325 ValueError 

326 Raised if the sentinel doesn't look like either of the supported 

327 log record formats. 

328 """ 

329 if not startdata: 

330 return False 

331 

332 # Allow byte or str streams since pydantic supports either. 

333 # We don't want to convert the entire input to unicode unnecessarily. 

334 error_type = "str" 

335 if isinstance(startdata, bytes): 

336 first_char = chr(startdata[0]) 

337 error_type = "byte" 

338 else: 

339 first_char = startdata[0] 

340 

341 if first_char == "[": 

342 # This is an array of records. 

343 return True 

344 if first_char != "{": 

345 # Limit the length of string reported in error message in case 

346 # this is an enormous file. 

347 max = 32 

348 if len(startdata) > max: 

349 startdata = f"{startdata[:max]!r}..." 

350 raise ValueError("Unrecognized JSON log format. Expected '{' or '[' but got" 

351 f" {first_char!r} from {error_type} content starting with {startdata!r}") 

352 

353 # Assume a record per line. 

354 return False 

355 

356 @classmethod 

357 def from_stream(cls, stream: IO) -> ButlerLogRecords: 

358 """Read records from I/O stream. 

359 

360 Parameters 

361 ---------- 

362 stream : `typing.IO` 

363 Stream from which to read JSON records. 

364 

365 Notes 

366 ----- 

367 Works with one-record-per-line format JSON files and a direct 

368 serialization of the Pydantic model. 

369 """ 

370 first_line = stream.readline() 

371 

372 if not first_line: 

373 # Empty file, return zero records. 

374 return cls.from_records([]) 

375 

376 is_model = cls._detect_model(first_line) 

377 

378 if is_model: 

379 # This is a ButlerLogRecords model serialization so all the 

380 # content must be read first. 

381 all = first_line + stream.read() 

382 return cls.parse_raw(all) 

383 

384 # A stream of records with one record per line. 

385 records = [ButlerLogRecord.parse_raw(first_line)] 

386 for line in stream: 

387 line = line.rstrip() 

388 if line: # Filter out blank lines. 

389 records.append(ButlerLogRecord.parse_raw(line)) 

390 

391 return cls.from_records(records) 

392 

393 @classmethod 

394 def from_raw(cls, serialized: Union[str, bytes]) -> ButlerLogRecords: 

395 """Parse raw serialized form and return records. 

396 

397 Parameters 

398 ---------- 

399 serialized : `bytes` or `str` 

400 Either the serialized JSON of the model created using 

401 ``.json()`` or a streaming format of one JSON `ButlerLogRecord` 

402 per line. This can also support a zero-length string. 

403 """ 

404 if not serialized: 

405 # No records to return 

406 return cls.from_records([]) 

407 

408 # Only send the first character for analysis. 

409 is_model = cls._detect_model(serialized) 

410 

411 if is_model: 

412 return cls.parse_raw(serialized) 

413 

414 # Filter out blank lines -- mypy is confused by the newline 

415 # argument to isplit() [which can't have two different types 

416 # simultaneously] so we have to duplicate some logic. 

417 substrings: Iterator[Union[str, bytes]] 

418 if isinstance(serialized, str): 

419 substrings = isplit(serialized, "\n") 

420 elif isinstance(serialized, bytes): 

421 substrings = isplit(serialized, b"\n") 

422 else: 

423 raise TypeError(f"Serialized form must be str or bytes not {get_full_type_name(serialized)}") 

424 records = [ButlerLogRecord.parse_raw(line) for line in substrings if line] 

425 

426 return cls.from_records(records) 

427 

428 @property 

429 def log_format(self) -> str: 

430 if self._log_format is None: 

431 return _LONG_LOG_FORMAT 

432 return self._log_format 

433 

434 # Pydantic does not allow a property setter to be given for 

435 # public properties of a model that is not based on a dict. 

436 def set_log_format(self, format: Optional[str]) -> Optional[str]: 

437 """Set the log format string for these records. 

438 

439 Parameters 

440 ---------- 

441 format : `str`, optional 

442 The new format string to use for converting this collection 

443 of records into a string. If `None` the default format will be 

444 used. 

445 

446 Returns 

447 ------- 

448 old_format : `str`, optional 

449 The previous log format. 

450 """ 

451 previous = self._log_format 

452 self._log_format = format 

453 return previous 

454 

455 def __len__(self) -> int: 

456 return len(self.__root__) 

457 

458 # The signature does not match the one in BaseModel but that is okay 

459 # if __root__ is being used. 

460 # See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types 

461 def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore 

462 return iter(self.__root__) 

463 

464 def __setitem__(self, index: int, value: Record) -> None: 

465 self.__root__[index] = self._validate_record(value) 

466 

467 def __getitem__(self, index: Union[slice, int]) -> Union[ButlerLogRecords, ButlerLogRecord]: 

468 # Handles slices and returns a new collection in that 

469 # case. 

470 item = self.__root__[index] 

471 if isinstance(item, list): 

472 return type(self)(__root__=item) 

473 else: 

474 return item 

475 

476 def __reversed__(self) -> Iterator[ButlerLogRecord]: 

477 return self.__root__.__reversed__() 

478 

479 def __delitem__(self, index: Union[slice, int]) -> None: 

480 del self.__root__[index] 

481 

482 def __str__(self) -> str: 

483 # Ensure that every record uses the same format string. 

484 return "\n".join(record.format(self.log_format) for record in self.__root__) 

485 

486 def _validate_record(self, record: Record) -> ButlerLogRecord: 

487 if isinstance(record, ButlerLogRecord): 

488 pass 

489 elif isinstance(record, LogRecord): 

490 record = ButlerLogRecord.from_record(record) 

491 else: 

492 raise ValueError(f"Can only append item of type {type(record)}") 

493 return record 

494 

495 def insert(self, index: int, value: Record) -> None: 

496 self.__root__.insert(index, self._validate_record(value)) 

497 

498 def append(self, value: Record) -> None: 

499 value = self._validate_record(value) 

500 self.__root__.append(value) 

501 

502 def clear(self) -> None: 

503 self.__root__.clear() 

504 

505 def extend(self, records: Iterable[Record]) -> None: 

506 self.__root__.extend(self._validate_record(record) for record in records) 

507 

508 def pop(self, index: int = -1) -> ButlerLogRecord: 

509 return self.__root__.pop(index) 

510 

511 def reverse(self) -> None: 

512 self.__root__.reverse() 

513 

514 

515class ButlerLogRecordHandler(StreamHandler): 

516 """Python log handler that accumulates records. 

517 """ 

518 

519 def __init__(self) -> None: 

520 super().__init__() 

521 self.records = ButlerLogRecords(__root__=[]) 

522 

523 def emit(self, record: LogRecord) -> None: 

524 self.records.append(record) 

525 

526 

527class JsonLogFormatter(Formatter): 

528 """Format a `LogRecord` in JSON format.""" 

529 

530 def format(self, record: LogRecord) -> str: 

531 butler_record = ButlerLogRecord.from_record(record) 

532 return butler_record.json(exclude_unset=True, exclude_defaults=True)