Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Code to support header manipulation operations.""" 

13 

14__all__ = ("merge_headers", "fix_header") 

15 

16import datetime 

17import pkg_resources 

18import posixpath 

19import logging 

20import itertools 

21import copy 

22import os 

23import yaml 

24from collections.abc import Mapping 

25from collections import Counter 

26 

27from .translator import MetadataTranslator 

28from .translators import FitsTranslator 

29 

30log = logging.getLogger(__name__) 

31 

32ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH" 

33"""Name of environment variable containing search path for header fix up.""" 

34 

35HIERARCH = "HIERARCH ASTRO METADATA FIX" 

36"""FITS-style hierarchical keyword root.""" 

37 

38FIXUP_SENTINEL = HIERARCH + " MODIFIED" 

39"""Keyword to add to header when header has been fixed.""" 

40 

41 

42def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None): 

43 """Merge multiple headers into a single dict. 

44 

45 Given a list of dict-like data headers, combine them following the 

46 specified mode. 

47 

48 Parameters 

49 ---------- 

50 headers : `list` of `dict` (or `dict`-like) 

51 Collection of headers to combine. `~lsst.daf.base.PropertyList` 

52 is supported. 

53 mode : `str` 

54 Scheme to use when a header has the same key as another header 

55 but different value. Options are: 

56 

57 - ``'overwrite'`` : Value in later header overwrites earlier value. 

58 - ``'drop'`` : Entire key is dropped. If a key only appears in a 

59 subset of the headers, and is identical in those, it will be 

60 retained. 

61 - ``'diff'`` : As for ``drop`` but the dropped values are stored in a 

62 `list` of `dict` in the returned merged header in key 

63 ``__DIFF__``. The order used matches the supplied order or 

64 the sorted order if specified. This allows a simple header diff 

65 to be performed and associated with the original headers. Only 

66 keys that appear in all headers will be retained in the merged one. 

67 Unlike for ``'drop'`` headers that are identical and only present in 

68 a subset will always be included in the diff. 

69 - ``'first'`` : Retain first value encountered. 

70 - ``'append'`` : Convert value to list with a value for each header 

71 (`None` if the key was not present). If the value is 

72 identical in multiple headers but key is missing in 

73 some, then the single identical header is stored. 

74 sort : `bool`, optional 

75 If `True`, sort the supplied headers into date order if possible. 

76 This affects the resulting merged output depending on the requested 

77 merge mode. An attempt will be made to extract a date from the 

78 headers. 

79 first : `list` or `tuple`, optional 

80 Keys to retain even if they differ. For all modes excepting ``append`` 

81 (where it is ignored) the value in the merged header will always be 

82 the value first encountered. This is usually to allow time-dependent 

83 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow 

84 the header to indicate the range of values. No exception is raised if 

85 a key can not be found in a header since this allows a range of 

86 expected headers to be listed covering multiple instruments. 

87 last : `list` or `tuple`, optional 

88 Keys to retain even if they differ. For all modes excepting ``append`` 

89 (where it is ignored) the value in the merged header will always be 

90 the final value encountered. This is usually to allow time-dependent 

91 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow 

92 the header to indicate the range of values. No exception is raised if 

93 a key can not be found in a header since this allows a range of 

94 expected headers to be listed covering multiple instruments. 

95 

96 Returns 

97 ------- 

98 merged : `dict` 

99 Single `dict` combining all the headers using the specified 

100 combination mode. 

101 

102 Notes 

103 ----- 

104 If ``first`` and ``last`` are supplied, the keys from ``first`` are 

105 handled first, followed by the keys from ``last``. No check is made to 

106 ensure that the keys do not overlap. 

107 """ 

108 if not headers: 

109 raise ValueError("No headers supplied.") 

110 

111 # Copy the input list because we will be reorganizing it 

112 headers = list(headers) 

113 

114 # With a single header provided return a copy immediately 

115 if len(headers) == 1: 

116 return copy.deepcopy(headers[0]) 

117 

118 if sort: 

119 def key_func(hdr): 

120 translator_class = None 

121 try: 

122 translator_class = MetadataTranslator.determine_translator(hdr) 

123 except ValueError: 

124 # Try the FITS translator 

125 translator_class = FitsTranslator 

126 translator = translator_class(hdr) 

127 return translator.to_datetime_begin() 

128 

129 headers = sorted(headers, key=key_func) 

130 

131 log.debug("Received %d headers for merging", len(headers)) 

132 

133 # Pull out first header 

134 first_hdr = headers.pop(0) 

135 

136 # Seed the merged header with a copy 

137 merged = copy.deepcopy(first_hdr) 

138 

139 if mode == "overwrite": 

140 for h in headers: 

141 merged.update(h) 

142 

143 elif mode == "first": 

144 # Reversing the headers and using overwrite mode would result in the 

145 # header order being inconsistent dependent on mode. 

146 for hdr in headers: 

147 for key in hdr: 

148 if key not in merged: 

149 merged[key] = hdr[key] 

150 

151 elif mode == "drop": 

152 drop = set() 

153 for hdr in headers: 

154 for key in hdr: 

155 if key not in merged: 

156 merged[key] = hdr[key] 

157 elif merged[key] != hdr[key]: 

158 # Key should be dropped later (not in loop since removing 

159 # the key now might add it back for the next header). 

160 drop.add(key) 

161 

162 for key in drop: 

163 del merged[key] 

164 

165 elif mode == "diff": 

166 dropped_keys = set() 

167 

168 # Only want to keep keys in the merged header that are in all the 

169 # input headers and identical. Seed with the first header 

170 counter = Counter(merged.keys()) 

171 

172 for hdr in headers: 

173 counter.update(hdr.keys()) 

174 for key in hdr: 

175 if key not in merged: 

176 merged[key] = hdr[key] 

177 elif merged[key] != hdr[key]: 

178 # Key should be dropped later (not in loop since removing 

179 # the key now might add it back for the next header). 

180 dropped_keys.add(key) 

181 

182 # Add to the list of dropped keys all the keys that 

183 # have a count less than number of input headers (incl first one) 

184 n = len(headers) + 1 

185 for key in counter: 

186 if counter[key] != n: 

187 dropped_keys.add(key) 

188 

189 # For each dropped key, create a distinct diff header 

190 # We must include the first header in this 

191 diffs = [] 

192 for hdr in itertools.chain([first_hdr], headers): 

193 # Get a list of all the dropped keys that are in this header 

194 # Sometimes a key will only be in some headers. For now 

195 # do not include it in the diff at all. 

196 diff_keys = dropped_keys & set(hdr) 

197 

198 diffs.append({k: hdr[k] for k in diff_keys}) 

199 

200 # PropertyList does not let us attach a dict to it 

201 # so if we encounter this we have to force a type change to dict 

202 try: 

203 merged["__DIFF__"] = diffs 

204 except TypeError: 

205 merged = dict(merged) 

206 merged["__DIFF__"] = diffs 

207 

208 for key in dropped_keys: 

209 del merged[key] 

210 

211 elif mode == "append": 

212 fill = set() 

213 for hdr in headers: 

214 for key in hdr: 

215 if key not in merged: 

216 merged[key] = hdr[key] 

217 elif not isinstance(merged[key], list) and merged[key] != hdr[key]: 

218 # If we detect different values, store an empty list 

219 # in the slot and fill it later. Do it at end so 

220 # we can pick up earlier values and fill empty with None. 

221 merged[key] = [] 

222 fill.add(key) 

223 

224 # Fill the entries that have multiple differing values 

225 for key in fill: 

226 merged[key] = [h[key] if key in h else None 

227 for h in itertools.chain([first_hdr], headers)] 

228 

229 else: 

230 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.") 

231 

232 # Force the first and last values to be inserted 

233 # 

234 if mode != "append": 

235 def retain_value(to_receive, to_retain, sources): 

236 if to_retain: 

237 for k in to_retain: 

238 # Look for values until we find one 

239 for h in sources: 

240 if k in h: 

241 to_receive[k] = h[k] 

242 break 

243 

244 all_headers = (first_hdr, *headers) 

245 retain_value(merged, first, all_headers) 

246 retain_value(merged, last, tuple(reversed(all_headers))) 

247 

248 return merged 

249 

250 

251def _read_yaml(fh, msg): 

252 """Read YAML from file descriptor. 

253 

254 Parameters 

255 ---------- 

256 fh : `io.IOBase` 

257 Open file handle containing the YAML stream 

258 msg : `str` 

259 Text to include in log file when referring to this stream. Examples 

260 could be "file something.yaml" or "resource module:resource". 

261 

262 Returns 

263 ------- 

264 parsed : `dict` or `None` 

265 The contents of the YAML file if it was a `dict`, else `None` if 

266 the contents could not be parsed or the contents were YAML but 

267 not a mapping. 

268 """ 

269 try: 

270 content = yaml.safe_load(fh) 

271 except Exception as e: 

272 log.warning("Error parsing YAML header corrections from %s: %s", msg, str(e)) 

273 return None 

274 

275 if not isinstance(content, Mapping): 

276 log.warning("YAML Mapping not found in %s. Ignoring contents.", msg) 

277 return None 

278 

279 return content 

280 

281 

282def _find_from_file(header, paths, target_file): 

283 """Search file system for matching correction files. 

284 

285 Parameters 

286 ---------- 

287 header : `dict` 

288 Header to update. 

289 paths : `list` 

290 Paths to search. 

291 target_file : `str` 

292 File to locate in the path. 

293 

294 Returns 

295 ------- 

296 correction_found : `str` or `None` 

297 The path of the correction file used to update the header or 

298 `None`. Only the first correction located in a path is used. 

299 """ 

300 for p in paths: 

301 correction_file = os.path.join(p, target_file) 

302 if os.path.exists(correction_file): 

303 with open(correction_file) as fh: 

304 log.debug("Applying header corrections from file %s", correction_file) 

305 corrections = _read_yaml(fh, f"file {correction_file}") 

306 

307 if corrections is None: 

308 continue 

309 

310 # Apply corrections 

311 header.update(corrections) 

312 

313 return correction_file 

314 return None 

315 

316 

317def _find_from_resource(header, package, resource_root, target_file): 

318 """Search package resource for correction information. 

319 

320 Parameters 

321 ---------- 

322 header : `dict` 

323 Header to update. 

324 package : `str` 

325 Package resource to search. 

326 resource_root : `str` 

327 Resource root. 

328 target_file : `str` 

329 Resource to locate. 

330 

331 Returns 

332 ------- 

333 resource : `str` or `None` 

334 Name of resource read. `None` if no corrections found. 

335 """ 

336 if package is not None and resource_root is not None: 

337 resource_name = posixpath.join(resource_root, target_file) 

338 if pkg_resources.resource_exists(package, resource_name): 

339 log.debug("Applying header corrections from package resource %s:%s", package, resource_name) 

340 with pkg_resources.resource_stream(package, resource_name) as fh: 

341 corrections = _read_yaml(fh, f"package resource {package}:{resource_name}") 

342 

343 if corrections is None: 

344 return None 

345 

346 header.update(corrections) 

347 

348 return f"{package}:{resource_name}" 

349 return None 

350 

351 

352def fix_header(header, search_path=None, translator_class=None, filename=None): 

353 """Update, in place, the supplied header with known corrections. 

354 

355 Parameters 

356 ---------- 

357 header : `dict`-like 

358 Header to correct. 

359 search_path : `list` or `str`, optional 

360 Explicit directory paths to search for correction files. 

361 A single directory path can be given as a string. 

362 translator_class : `MetadataTranslator`-class, optional 

363 If not `None`, the class to use to translate the supplied headers 

364 into standard form. Otherwise each registered translator class will 

365 be asked in turn if it knows how to translate the supplied header. 

366 filename : `str`, optional 

367 Name of the file whose header is being translated. For some 

368 datasets with missing header information this can sometimes 

369 allow for some fixups in translations. 

370 

371 Returns 

372 ------- 

373 fixed : `bool` 

374 `True` if the header was updated. 

375 

376 Raises 

377 ------ 

378 TypeError 

379 Raised if the supplied translation class is not a `MetadataTranslator`. 

380 

381 Notes 

382 ----- 

383 In order to determine that a header update is required it is 

384 necessary for the header to be handled by the supplied translator 

385 class or else support automatic translation class determination. 

386 It is also required that the ``observation_id`` and ``instrument`` 

387 be calculable prior to header fix up. If a translator class can not 

388 be found or if there is a problem determining the instrument or 

389 observation ID, the function will return without action. 

390 

391 Correction files use names of the form ``instrument-obsid.yaml`` (for 

392 example ``LATISS-AT_O_20190329_000022.yaml``). 

393 The YAML file should have the format of: 

394 

395 .. code-block:: yaml 

396 

397 EXPTIME: 30.0 

398 IMGTYPE: bias 

399 

400 where each key/value pair is copied directly into the supplied header, 

401 overwriting any previous values. 

402 

403 This function searches a number of locations for such a correction file. 

404 The search order is: 

405 

406 - Any paths explicitly supplied through ``search_path``. 

407 - The contents of the PATH-like environment variable 

408 ``$METADATA_CORRECTIONS_PATH``. 

409 - Any search paths supplied by the matching translator class. 

410 

411 The first file located in the search path is used for the correction. 

412 """ 

413 

414 if FIXUP_SENTINEL in header: 

415 return header[FIXUP_SENTINEL] 

416 

417 if translator_class is None: 

418 try: 

419 translator_class = MetadataTranslator.determine_translator(header, 

420 filename=filename) 

421 except ValueError as e: 

422 # if the header is not recognized, we should not complain 

423 # and should not proceed further. 

424 log.debug("Unable to determine translator class %s -- not fixing header: %e", 

425 f"for {filename}" if filename is not None else "", e) 

426 return False 

427 elif not issubclass(translator_class, MetadataTranslator): 

428 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}") 

429 

430 # Create an instance for this header 

431 translator = translator_class(header, filename=filename) 

432 

433 # To determine the file look up we need the observation_id and instrument 

434 try: 

435 obsid = translator.to_observation_id() 

436 instrument = translator.to_instrument() 

437 except Exception: 

438 # Return without comment if these translations failed 

439 return False 

440 

441 target_file = f"{instrument}-{obsid}.yaml" 

442 log.debug("Checking for header correction file named %s", target_file) 

443 

444 # Work out the search path 

445 paths = [] 

446 if search_path is not None: 

447 if isinstance(search_path, str): 

448 # Allow a single path to be given as a string 

449 search_path = [search_path] 

450 paths.extend(search_path) 

451 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]: 

452 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep)) 

453 

454 paths.extend(translator.search_paths()) 

455 

456 # Prioritize file system overrides 

457 corrections_file = _find_from_file(header, paths, target_file) 

458 

459 # Apply updates from resources only if none found in files 

460 if corrections_file is None: 

461 package, resource_root = translator.resource_root() 

462 corrections_file = _find_from_resource(header, package, resource_root, target_file) 

463 

464 # Allow a translation class to do local fixups 

465 # Allow it to fail but log the failure 

466 try: 

467 translator_modified = translator_class.fix_header(header, instrument, obsid, filename=filename) 

468 except Exception as e: 

469 log.fatal("Ignoring translator header fixup of %s %s: %s", 

470 instrument, obsid, e) 

471 translator_modified = False 

472 

473 was_modified = (corrections_file is not None) or translator_modified 

474 

475 # Always add a sentinel even if we nothing was updated 

476 # since this will speed up later fixes by not requiring the file 

477 # system scan or calling of the per-instrument translator methods. 

478 # Do not do it if there has been a problem determining a translator 

479 # since it may be that a new translator is registered later on for 

480 # another attempt. 

481 header[FIXUP_SENTINEL] = was_modified 

482 

483 # Record provenance 

484 header[HIERARCH + " DATE"] = datetime.datetime.now().isoformat() 

485 if corrections_file is not None: 

486 header[HIERARCH + " FILE"] = corrections_file 

487 if translator_modified: 

488 # Store the translator version 

489 header[HIERARCH + " VERSION"] = translator_class.translator_version() 

490 

491 return was_modified