Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Code to support header manipulation operations.""" 

13 

14__all__ = ("merge_headers", "fix_header") 

15 

16import pkg_resources 

17import posixpath 

18import logging 

19import itertools 

20import copy 

21import os 

22import yaml 

23from collections.abc import Mapping 

24from collections import Counter 

25 

26from .translator import MetadataTranslator 

27from .translators import FitsTranslator 

28 

29log = logging.getLogger(__name__) 

30 

31ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH" 

32"""Name of environment variable containing search path for header fix up.""" 

33 

34 

35def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None): 

36 """Merge multiple headers into a single dict. 

37 

38 Given a list of dict-like data headers, combine them following the 

39 specified mode. 

40 

41 Parameters 

42 ---------- 

43 headers : `list` of `dict` (or `dict`-like) 

44 Collection of headers to combine. `~lsst.daf.base.PropertyList` 

45 is supported. 

46 mode : `str` 

47 Scheme to use when a header has the same key as another header 

48 but different value. Options are: 

49 

50 - ``'overwrite'`` : Value in later header overwrites earlier value. 

51 - ``'drop'`` : Entire key is dropped. If a key only appears in a 

52 subset of the headers, and is identical in those, it will be 

53 retained. 

54 - ``'diff'`` : As for ``drop`` but the dropped values are stored in a 

55 `list` of `dict` in the returned merged header in key 

56 ``__DIFF__``. The order used matches the supplied order or 

57 the sorted order if specified. This allows a simple header diff 

58 to be performed and associated with the original headers. Only 

59 keys that appear in all headers will be retained in the merged one. 

60 Unlike for ``'drop'`` headers that are identical and only present in 

61 a subset will always be included in the diff. 

62 - ``'first'`` : Retain first value encountered. 

63 - ``'append'`` : Convert value to list with a value for each header 

64 (`None` if the key was not present). If the value is 

65 identical in multiple headers but key is missing in 

66 some, then the single identical header is stored. 

67 sort : `bool`, optional 

68 If `True`, sort the supplied headers into date order if possible. 

69 This affects the resulting merged output depending on the requested 

70 merge mode. An attempt will be made to extract a date from the 

71 headers. 

72 first : `list` or `tuple`, optional 

73 Keys to retain even if they differ. For all modes excepting ``append`` 

74 (where it is ignored) the value in the merged header will always be 

75 the value first encountered. This is usually to allow time-dependent 

76 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow 

77 the header to indicate the range of values. No exception is raised if 

78 a key can not be found in a header since this allows a range of 

79 expected headers to be listed covering multiple instruments. 

80 last : `list` or `tuple`, optional 

81 Keys to retain even if they differ. For all modes excepting ``append`` 

82 (where it is ignored) the value in the merged header will always be 

83 the final value encountered. This is usually to allow time-dependent 

84 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow 

85 the header to indicate the range of values. No exception is raised if 

86 a key can not be found in a header since this allows a range of 

87 expected headers to be listed covering multiple instruments. 

88 

89 Returns 

90 ------- 

91 merged : `dict` 

92 Single `dict` combining all the headers using the specified 

93 combination mode. 

94 

95 Notes 

96 ----- 

97 If ``first`` and ``last`` are supplied, the keys from ``first`` are 

98 handled first, followed by the keys from ``last``. No check is made to 

99 ensure that the keys do not overlap. 

100 """ 

101 if not headers: 

102 raise ValueError("No headers supplied.") 

103 

104 # Copy the input list because we will be reorganizing it 

105 headers = list(headers) 

106 

107 # With a single header provided return a copy immediately 

108 if len(headers) == 1: 

109 return copy.deepcopy(headers[0]) 

110 

111 if sort: 

112 def key_func(hdr): 

113 translator_class = None 

114 try: 

115 translator_class = MetadataTranslator.determine_translator(hdr) 

116 except ValueError: 

117 # Try the FITS translator 

118 translator_class = FitsTranslator 

119 translator = translator_class(hdr) 

120 return translator.to_datetime_begin() 

121 

122 headers = sorted(headers, key=key_func) 

123 

124 log.debug("Received %d headers for merging", len(headers)) 

125 

126 # Pull out first header 

127 first_hdr = headers.pop(0) 

128 

129 # Seed the merged header with a copy 

130 merged = copy.deepcopy(first_hdr) 

131 

132 if mode == "overwrite": 

133 for h in headers: 

134 merged.update(h) 

135 

136 elif mode == "first": 

137 # Reversing the headers and using overwrite mode would result in the 

138 # header order being inconsistent dependent on mode. 

139 for hdr in headers: 

140 for key in hdr: 

141 if key not in merged: 

142 merged[key] = hdr[key] 

143 

144 elif mode == "drop": 

145 drop = set() 

146 for hdr in headers: 

147 for key in hdr: 

148 if key not in merged: 

149 merged[key] = hdr[key] 

150 elif merged[key] != hdr[key]: 

151 # Key should be dropped later (not in loop since removing 

152 # the key now might add it back for the next header). 

153 drop.add(key) 

154 

155 for key in drop: 

156 del merged[key] 

157 

158 elif mode == "diff": 

159 dropped_keys = set() 

160 

161 # Only want to keep keys in the merged header that are in all the 

162 # input headers and identical. Seed with the first header 

163 counter = Counter(merged.keys()) 

164 

165 for hdr in headers: 

166 counter.update(hdr.keys()) 

167 for key in hdr: 

168 if key not in merged: 

169 merged[key] = hdr[key] 

170 elif merged[key] != hdr[key]: 

171 # Key should be dropped later (not in loop since removing 

172 # the key now might add it back for the next header). 

173 dropped_keys.add(key) 

174 

175 # Add to the list of dropped keys all the keys that 

176 # have a count less than number of input headers (incl first one) 

177 n = len(headers) + 1 

178 for key in counter: 

179 if counter[key] != n: 

180 dropped_keys.add(key) 

181 

182 # For each dropped key, create a distinct diff header 

183 # We must include the first header in this 

184 diffs = [] 

185 for hdr in itertools.chain([first_hdr], headers): 

186 # Get a list of all the dropped keys that are in this header 

187 # Sometimes a key will only be in some headers. For now 

188 # do not include it in the diff at all. 

189 diff_keys = dropped_keys & set(hdr) 

190 

191 diffs.append({k: hdr[k] for k in diff_keys}) 

192 

193 # PropertyList does not let us attach a dict to it 

194 # so if we encounter this we have to force a type change to dict 

195 try: 

196 merged["__DIFF__"] = diffs 

197 except TypeError: 

198 merged = dict(merged) 

199 merged["__DIFF__"] = diffs 

200 

201 for key in dropped_keys: 

202 del merged[key] 

203 

204 elif mode == "append": 

205 fill = set() 

206 for hdr in headers: 

207 for key in hdr: 

208 if key not in merged: 

209 merged[key] = hdr[key] 

210 elif not isinstance(merged[key], list) and merged[key] != hdr[key]: 

211 # If we detect different values, store an empty list 

212 # in the slot and fill it later. Do it at end so 

213 # we can pick up earlier values and fill empty with None. 

214 merged[key] = [] 

215 fill.add(key) 

216 

217 # Fill the entries that have multiple differing values 

218 for key in fill: 

219 merged[key] = [h[key] if key in h else None 

220 for h in itertools.chain([first_hdr], headers)] 

221 

222 else: 

223 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.") 

224 

225 # Force the first and last values to be inserted 

226 # 

227 if mode != "append": 

228 def retain_value(to_receive, to_retain, sources): 

229 if to_retain: 

230 for k in to_retain: 

231 # Look for values until we find one 

232 for h in sources: 

233 if k in h: 

234 to_receive[k] = h[k] 

235 break 

236 

237 all_headers = (first_hdr, *headers) 

238 retain_value(merged, first, all_headers) 

239 retain_value(merged, last, tuple(reversed(all_headers))) 

240 

241 return merged 

242 

243 

244def _read_yaml(fh, msg): 

245 """Read YAML from file descriptor. 

246 

247 Parameters 

248 ---------- 

249 fh : `io.IOBase` 

250 Open file handle containing the YAML stream 

251 msg : `str` 

252 Text to include in log file when referring to this stream. Examples 

253 could be "file something.yaml" or "resource module:resource". 

254 

255 Returns 

256 ------- 

257 parsed : `dict` or `None` 

258 The contents of the YAML file if it was a `dict`, else `None` if 

259 the contents could not be parsed or the contents were YAML but 

260 not a mapping. 

261 """ 

262 try: 

263 content = yaml.safe_load(fh) 

264 except Exception as e: 

265 log.warning("Error parsing YAML header corrections from %s: %s", msg, str(e)) 

266 return None 

267 

268 if not isinstance(content, Mapping): 

269 log.warning("YAML Mapping not found in %s. Ignoring contents.", msg) 

270 return None 

271 

272 return content 

273 

274 

275def _find_from_file(header, paths, target_file): 

276 """Search file system for matching correction files. 

277 

278 Parameters 

279 ---------- 

280 header : `dict` 

281 Header to update. 

282 paths : `list` 

283 Paths to search. 

284 target_file : `str` 

285 File to locate in the path. 

286 

287 Returns 

288 ------- 

289 modified : `bool` 

290 `True` if a correction was found. Only the first correction located 

291 in a path is used. 

292 """ 

293 for p in paths: 

294 correction_file = os.path.join(p, target_file) 

295 if os.path.exists(correction_file): 

296 with open(correction_file) as fh: 

297 log.debug("Applying header corrections from file %s", correction_file) 

298 corrections = _read_yaml(fh, f"file {correction_file}") 

299 

300 if corrections is None: 

301 continue 

302 

303 # Apply corrections 

304 header.update(corrections) 

305 

306 return True 

307 return False 

308 

309 

310def _find_from_resource(header, package, resource_root, target_file): 

311 """Search package resource for correction information. 

312 

313 Parameters 

314 ---------- 

315 header : `dict` 

316 Header to update. 

317 package : `str` 

318 Package resource to search. 

319 resource_root : `str` 

320 Resource root. 

321 target_file : `str` 

322 Resource to locate. 

323 

324 Returns 

325 ------- 

326 modified : `bool` 

327 `True` if a correction was found. 

328 """ 

329 if package is not None and resource_root is not None: 

330 resource_name = posixpath.join(resource_root, target_file) 

331 if pkg_resources.resource_exists(package, resource_name): 

332 log.debug("Applying header corrections from package resource %s:%s", package, resource_name) 

333 with pkg_resources.resource_stream(package, resource_name) as fh: 

334 corrections = _read_yaml(fh, f"package resource {package}:{resource_name}") 

335 

336 if corrections is None: 

337 return False 

338 

339 header.update(corrections) 

340 

341 return True 

342 return False 

343 

344 

345def fix_header(header, search_path=None, translator_class=None, filename=None): 

346 """Update, in place, the supplied header with known corrections. 

347 

348 Parameters 

349 ---------- 

350 header : `dict`-like 

351 Header to correct. 

352 search_path : `list` or `str`, optional 

353 Explicit directory paths to search for correction files. 

354 A single directory path can be given as a string. 

355 translator_class : `MetadataTranslator`-class, optional 

356 If not `None`, the class to use to translate the supplied headers 

357 into standard form. Otherwise each registered translator class will 

358 be asked in turn if it knows how to translate the supplied header. 

359 filename : `str`, optional 

360 Name of the file whose header is being translated. For some 

361 datasets with missing header information this can sometimes 

362 allow for some fixups in translations. 

363 

364 Returns 

365 ------- 

366 fixed : `bool` 

367 `True` if the header was updated. 

368 

369 Raises 

370 ------ 

371 TypeError 

372 Raised if the supplied translation class is not a `MetadataTranslator`. 

373 

374 Notes 

375 ----- 

376 In order to determine that a header update is required it is 

377 necessary for the header to be handled by the supplied translator 

378 class or else support automatic translation class determination. 

379 It is also required that the ``observation_id`` and ``instrument`` 

380 be calculable prior to header fix up. If a translator class can not 

381 be found or if there is a problem determining the instrument or 

382 observation ID, the function will return without action. 

383 

384 Correction files use names of the form ``instrument-obsid.yaml`` (for 

385 example ``LATISS-AT_O_20190329_000022.yaml``). 

386 The YAML file should have the format of: 

387 

388 .. code-block:: yaml 

389 

390 EXPTIME: 30.0 

391 IMGTYPE: bias 

392 

393 where each key/value pair is copied directly into the supplied header, 

394 overwriting any previous values. 

395 

396 This function searches a number of locations for such a correction file. 

397 The search order is: 

398 

399 - Any paths explicitly supplied through ``search_path``. 

400 - The contents of the PATH-like environment variable 

401 ``$METADATA_CORRECTIONS_PATH``. 

402 - Any search paths supplied by the matching translator class. 

403 

404 The first file located in the search path is used for the correction. 

405 """ 

406 

407 if translator_class is None: 

408 try: 

409 translator_class = MetadataTranslator.determine_translator(header, 

410 filename=filename) 

411 except ValueError as e: 

412 # if the header is not recognized, we should not complain 

413 # and should not proceed further. 

414 log.debug("Unable to determine translator class %s -- not fixing header: %e", 

415 f"for {filename}" if filename is not None else "", e) 

416 return False 

417 elif not issubclass(translator_class, MetadataTranslator): 

418 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}") 

419 

420 # Create an instance for this header 

421 translator = translator_class(header, filename=filename) 

422 

423 # To determine the file look up we need the observation_id and instrument 

424 try: 

425 obsid = translator.to_observation_id() 

426 instrument = translator.to_instrument() 

427 except Exception: 

428 # Return without comment if these translations failed 

429 return False 

430 

431 target_file = f"{instrument}-{obsid}.yaml" 

432 log.debug("Checking for header correction file named %s", target_file) 

433 

434 # Work out the search path 

435 paths = [] 

436 if search_path is not None: 

437 if isinstance(search_path, str): 

438 # Allow a single path to be given as a string 

439 search_path = [search_path] 

440 paths.extend(search_path) 

441 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]: 

442 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep)) 

443 

444 paths.extend(translator.search_paths()) 

445 

446 # Prioritize file system overrides 

447 modified = _find_from_file(header, paths, target_file) 

448 

449 # Apply updates from resources only if none found in files 

450 if not modified: 

451 package, resource_root = translator.resource_root() 

452 modified = _find_from_resource(header, package, resource_root, target_file) 

453 

454 # Allow a translation class to do local fixups 

455 # Allow it to fail but log the failure 

456 try: 

457 translator_modified = translator_class.fix_header(header, instrument, obsid, filename=filename) 

458 except Exception as e: 

459 log.fatal("Ignoring translator header fixup of %s %s: %s", 

460 instrument, obsid, e) 

461 translator_modified = False 

462 

463 return modified or translator_modified