Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Code to support header manipulation operations.""" 

13 

14__all__ = ("merge_headers", "fix_header") 

15 

16import pkg_resources 

17import posixpath 

18import logging 

19import itertools 

20import copy 

21import os 

22import yaml 

23from collections.abc import Mapping 

24 

25from .translator import MetadataTranslator 

26from .translators import FitsTranslator 

27 

28log = logging.getLogger(__name__) 

29 

30ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH" 

31"""Name of environment variable containing search path for header fix up.""" 

32 

33 

34def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None): 

35 """Merge multiple headers into a single dict. 

36 

37 Given a list of dict-like data headers, combine them following the 

38 specified mode. 

39 

40 Parameters 

41 ---------- 

42 headers : `list` of `dict` (or `dict`-like) 

43 Collection of headers to combine. `~lsst.daf.base.PropertyList` 

44 is supported. 

45 mode : `str` 

46 Scheme to use when a header has the same key as another header 

47 but different value. Options are: 

48 

49 - ``'overwrite'`` : Value in later header overwrites earlier value. 

50 - ``'drop'`` : Entire key is dropped. 

51 - ``'first'`` : Retain first value encountered. 

52 - ``'append'`` : Convert value to list with a value for each header 

53 (`None` if the key was not present). If the value is 

54 identical in multiple headers but key is missing in 

55 some, then the single identical header is stored. 

56 sort : `bool`, optional 

57 If `True`, sort the supplied headers into date order if possible. 

58 This affects the resulting merged output depending on the requested 

59 merge mode. An attempt will be made to extract a date from the 

60 headers. 

61 first : `list` or `tuple`, optional 

62 Keys to retain even if they differ. For all modes excepting ``append`` 

63 (where it is ignored) the value in the merged header will always be 

64 the value first encountered. This is usually to allow time-dependent 

65 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow 

66 the header to indicate the range of values. No exception is raised if 

67 a key can not be found in a header since this allows a range of 

68 expected headers to be listed covering multiple instruments. 

69 last : `list` or `tuple`, optional 

70 Keys to retain even if they differ. For all modes excepting ``append`` 

71 (where it is ignored) the value in the merged header will always be 

72 the final value encountered. This is usually to allow time-dependent 

73 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow 

74 the header to indicate the range of values. No exception is raised if 

75 a key can not be found in a header since this allows a range of 

76 expected headers to be listed covering multiple instruments. 

77 

78 Returns 

79 ------- 

80 merged : `dict` 

81 Single `dict` combining all the headers using the specified 

82 combination mode. 

83 

84 Notes 

85 ----- 

86 If ``first`` and ``last`` are supplied, the keys from ``first`` are 

87 handled first, followed by the keys from ``last``. No check is made to 

88 ensure that the keys do not overlap. 

89 """ 

90 if not headers: 

91 raise ValueError("No headers supplied.") 

92 

93 # Copy the input list because we will be reorganizing it 

94 headers = list(headers) 

95 

96 # With a single header provided return a copy immediately 

97 if len(headers) == 1: 

98 return copy.deepcopy(headers[0]) 

99 

100 if sort: 

101 def key_func(hdr): 

102 translator_class = None 

103 try: 

104 translator_class = MetadataTranslator.determine_translator(hdr) 

105 except ValueError: 

106 # Try the FITS translator 

107 translator_class = FitsTranslator 

108 translator = translator_class(hdr) 

109 return translator.to_datetime_begin() 

110 

111 headers = sorted(headers, key=key_func) 

112 

113 log.debug("Received %d headers for merging", len(headers)) 

114 

115 # Pull out first header 

116 first_hdr = headers.pop(0) 

117 

118 # Seed the merged header with a copy 

119 merged = copy.deepcopy(first_hdr) 

120 

121 if mode == "overwrite": 

122 for h in headers: 

123 merged.update(h) 

124 

125 elif mode == "first": 

126 # Reversing the headers and using overwrite mode would result in the 

127 # header order being inconsistent dependent on mode. 

128 for hdr in headers: 

129 for key in hdr: 

130 if key not in merged: 

131 merged[key] = hdr[key] 

132 

133 elif mode == "drop": 

134 drop = set() 

135 for hdr in headers: 

136 for key in hdr: 

137 if key not in merged: 

138 merged[key] = hdr[key] 

139 elif merged[key] != hdr[key]: 

140 # Key should be dropped later (not in loop since removing 

141 # the key now might add it back for the next header). 

142 drop.add(key) 

143 

144 for key in drop: 

145 del merged[key] 

146 

147 elif mode == "append": 

148 fill = set() 

149 for hdr in headers: 

150 for key in hdr: 

151 if key not in merged: 

152 merged[key] = hdr[key] 

153 elif not isinstance(merged[key], list) and merged[key] != hdr[key]: 

154 # If we detect different values, store an empty list 

155 # in the slot and fill it later. Do it at end so 

156 # we can pick up earlier values and fill empty with None. 

157 merged[key] = [] 

158 fill.add(key) 

159 

160 # Fill the entries that have multiple differing values 

161 for key in fill: 

162 merged[key] = [h[key] if key in h else None 

163 for h in itertools.chain([first_hdr], headers)] 

164 

165 else: 

166 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.") 

167 

168 # Force the first and last values to be inserted 

169 # 

170 if mode != "append": 

171 def retain_value(to_receive, to_retain, sources): 

172 if to_retain: 

173 for k in to_retain: 

174 # Look for values until we find one 

175 for h in sources: 

176 if k in h: 

177 to_receive[k] = h[k] 

178 break 

179 

180 all_headers = (first_hdr, *headers) 

181 retain_value(merged, first, all_headers) 

182 retain_value(merged, last, tuple(reversed(all_headers))) 

183 

184 return merged 

185 

186 

187def _read_yaml(fh, msg): 

188 """Read YAML from file descriptor. 

189 

190 Parameters 

191 ---------- 

192 fh : `io.IOBase` 

193 Open file handle containing the YAML stream 

194 msg : `str` 

195 Text to include in log file when referring to this stream. Examples 

196 could be "file something.yaml" or "resource module:resource". 

197 

198 Returns 

199 ------- 

200 parsed : `dict` or `None` 

201 The contents of the YAML file if it was a `dict`, else `None` if 

202 the contents could not be parsed or the contents were YAML but 

203 not a mapping. 

204 """ 

205 try: 

206 content = yaml.safe_load(fh) 

207 except Exception as e: 

208 log.warning("Error parsing YAML header corrections from %s: %s", msg, str(e)) 

209 return None 

210 

211 if not isinstance(content, Mapping): 

212 log.warning("YAML Mapping not found in %s. Ignoring contents.", msg) 

213 return None 

214 

215 return content 

216 

217 

218def _find_from_file(header, paths, target_file): 

219 """Search file system for matching correction files. 

220 

221 Parameters 

222 ---------- 

223 header : `dict` 

224 Header to update. 

225 paths : `list` 

226 Paths to search. 

227 target_file : `str` 

228 File to locate in the path. 

229 

230 Returns 

231 ------- 

232 modified : `bool` 

233 `True` if a correction was found. Only the first correction located 

234 in a path is used. 

235 """ 

236 for p in paths: 

237 correction_file = os.path.join(p, target_file) 

238 if os.path.exists(correction_file): 

239 with open(correction_file) as fh: 

240 log.debug("Applying header corrections from file %s", correction_file) 

241 corrections = _read_yaml(fh, f"file {correction_file}") 

242 

243 if corrections is None: 

244 continue 

245 

246 # Apply corrections 

247 header.update(corrections) 

248 

249 return True 

250 return False 

251 

252 

253def _find_from_resource(header, package, resource_root, target_file): 

254 """Search package resource for correction information. 

255 

256 Parameters 

257 ---------- 

258 header : `dict` 

259 Header to update. 

260 package : `str` 

261 Package resource to search. 

262 resource_root : `str` 

263 Resource root. 

264 target_file : `str` 

265 Resource to locate. 

266 

267 Returns 

268 ------- 

269 modified : `bool` 

270 `True` if a correction was found. 

271 """ 

272 if package is not None and resource_root is not None: 

273 resource_name = posixpath.join(resource_root, target_file) 

274 if pkg_resources.resource_exists(package, resource_name): 

275 log.debug("Applying header corrections from package resource %s:%s", package, resource_name) 

276 with pkg_resources.resource_stream(package, resource_name) as fh: 

277 corrections = _read_yaml(fh, f"package resource {package}:{resource_name}") 

278 

279 if corrections is None: 

280 return False 

281 

282 header.update(corrections) 

283 

284 return True 

285 return False 

286 

287 

288def fix_header(header, search_path=None, translator_class=None, filename=None): 

289 """Update, in place, the supplied header with known corrections. 

290 

291 Parameters 

292 ---------- 

293 header : `dict`-like 

294 Header to correct. 

295 search_path : `list` or `str`, optional 

296 Explicit directory paths to search for correction files. 

297 A single directory path can be given as a string. 

298 translator_class : `MetadataTranslator`-class, optional 

299 If not `None`, the class to use to translate the supplied headers 

300 into standard form. Otherwise each registered translator class will 

301 be asked in turn if it knows how to translate the supplied header. 

302 filename : `str`, optional 

303 Name of the file whose header is being translated. For some 

304 datasets with missing header information this can sometimes 

305 allow for some fixups in translations. 

306 

307 Returns 

308 ------- 

309 fixed : `bool` 

310 `True` if the header was updated. 

311 

312 Raises 

313 ------ 

314 TypeError 

315 Raised if the supplied translation class is not a `MetadataTranslator`. 

316 

317 Notes 

318 ----- 

319 In order to determine that a header update is required it is 

320 necessary for the header to be handled by the supplied translator 

321 class or else support automatic translation class determination. 

322 It is also required that the ``observation_id`` and ``instrument`` 

323 be calculable prior to header fix up. If a translator class can not 

324 be found or if there is a problem determining the instrument or 

325 observation ID, the function will return without action. 

326 

327 Correction files use names of the form ``instrument-obsid.yaml`` (for 

328 example ``LATISS-AT_O_20190329_000022.yaml``). 

329 The YAML file should have the format of: 

330 

331 .. code-block:: yaml 

332 

333 EXPTIME: 30.0 

334 IMGTYPE: bias 

335 

336 where each key/value pair is copied directly into the supplied header, 

337 overwriting any previous values. 

338 

339 This function searches a number of locations for such a correction file. 

340 The search order is: 

341 

342 - Any paths explicitly supplied through ``search_path``. 

343 - The contents of the PATH-like environment variable 

344 ``$METADATA_CORRECTIONS_PATH``. 

345 - Any search paths supplied by the matching translator class. 

346 

347 The first file located in the search path is used for the correction. 

348 """ 

349 

350 if translator_class is None: 

351 try: 

352 translator_class = MetadataTranslator.determine_translator(header, 

353 filename=filename) 

354 except ValueError: 

355 # if the header is not recognized, we should not complain 

356 # and should not proceed further. 

357 return False 

358 elif not issubclass(translator_class, MetadataTranslator): 

359 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}") 

360 

361 # Create an instance for this header 

362 translator = translator_class(header, filename=filename) 

363 

364 # To determine the file look up we need the observation_id and instrument 

365 try: 

366 obsid = translator.to_observation_id() 

367 instrument = translator.to_instrument() 

368 except Exception: 

369 # Return without comment if these translations failed 

370 return False 

371 

372 target_file = f"{instrument}-{obsid}.yaml" 

373 log.debug("Checking for header correction file named %s", target_file) 

374 

375 # Work out the search path 

376 paths = [] 

377 if search_path is not None: 

378 if isinstance(search_path, str): 

379 # Allow a single path to be given as a string 

380 search_path = [search_path] 

381 paths.extend(search_path) 

382 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]: 

383 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep)) 

384 

385 paths.extend(translator.search_paths()) 

386 

387 # Prioritize file system overrides 

388 modified = _find_from_file(header, paths, target_file) 

389 

390 # Apply updates from resources only if none found in files 

391 if not modified: 

392 package, resource_root = translator.resource_root() 

393 modified = _find_from_resource(header, package, resource_root, target_file) 

394 

395 # Allow a translation class to do local fixups 

396 # Allow it to fail but log the failure 

397 try: 

398 translator_modified = translator_class.fix_header(header, instrument, obsid, filename=filename) 

399 except Exception as e: 

400 log.fatal("Ignoring translator header fixup of %s %s: %s", 

401 instrument, obsid, e) 

402 translator_modified = False 

403 

404 return modified or translator_modified