Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Code to support header manipulation operations.""" 

13 

14__all__ = ("merge_headers", "fix_header") 

15 

16import pkg_resources 

17import posixpath 

18import logging 

19import itertools 

20import copy 

21import os 

22import yaml 

23 

24from .translator import MetadataTranslator 

25from .translators import FitsTranslator 

26 

27log = logging.getLogger(__name__) 

28 

29ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH" 

30"""Name of environment variable containing search path for header fix up.""" 

31 

32 

33def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None): 

34 """Merge multiple headers into a single dict. 

35 

36 Given a list of dict-like data headers, combine them following the 

37 specified mode. 

38 

39 Parameters 

40 ---------- 

41 headers : `list` of `dict` (or `dict`-like) 

42 Collection of headers to combine. `~lsst.daf.base.PropertyList` 

43 is supported. 

44 mode : `str` 

45 Scheme to use when a header has the same key as another header 

46 but different value. Options are: 

47 

48 - ``'overwrite'`` : Value in later header overwrites earlier value. 

49 - ``'drop'`` : Entire key is dropped. 

50 - ``'first'`` : Retain first value encountered. 

51 - ``'append'`` : Convert value to list with a value for each header 

52 (`None` if the key was not present). If the value is 

53 identical in multiple headers but key is missing in 

54 some, then the single identical header is stored. 

55 sort : `bool`, optional 

56 If `True`, sort the supplied headers into date order if possible. 

57 This affects the resulting merged output depending on the requested 

58 merge mode. An attempt will be made to extract a date from the 

59 headers. 

60 first : `list` or `tuple`, optional 

61 Keys to retain even if they differ. For all modes excepting ``append`` 

62 (where it is ignored) the value in the merged header will always be 

63 the value first encountered. This is usually to allow time-dependent 

64 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow 

65 the header to indicate the range of values. No exception is raised if 

66 a key can not be found in a header since this allows a range of 

67 expected headers to be listed covering multiple instruments. 

68 last : `list` or `tuple`, optional 

69 Keys to retain even if they differ. For all modes excepting ``append`` 

70 (where it is ignored) the value in the merged header will always be 

71 the final value encountered. This is usually to allow time-dependent 

72 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow 

73 the header to indicate the range of values. No exception is raised if 

74 a key can not be found in a header since this allows a range of 

75 expected headers to be listed covering multiple instruments. 

76 

77 Returns 

78 ------- 

79 merged : `dict` 

80 Single `dict` combining all the headers using the specified 

81 combination mode. 

82 

83 Notes 

84 ----- 

85 If ``first`` and ``last`` are supplied, the keys from ``first`` are 

86 handled first, followed by the keys from ``last``. No check is made to 

87 ensure that the keys do not overlap. 

88 """ 

89 if not headers: 

90 raise ValueError("No headers supplied.") 

91 

92 # Copy the input list because we will be reorganizing it 

93 headers = list(headers) 

94 

95 # With a single header provided return a copy immediately 

96 if len(headers) == 1: 

97 return copy.deepcopy(headers[0]) 

98 

99 if sort: 

100 def key_func(hdr): 

101 translator_class = None 

102 try: 

103 translator_class = MetadataTranslator.determine_translator(hdr) 

104 except ValueError: 

105 # Try the FITS translator 

106 translator_class = FitsTranslator 

107 translator = translator_class(hdr) 

108 return translator.to_datetime_begin() 

109 

110 headers = sorted(headers, key=key_func) 

111 

112 log.debug("Received %d headers for merging", len(headers)) 

113 

114 # Pull out first header 

115 first_hdr = headers.pop(0) 

116 

117 # Seed the merged header with a copy 

118 merged = copy.deepcopy(first_hdr) 

119 

120 if mode == "overwrite": 

121 for h in headers: 

122 merged.update(h) 

123 

124 elif mode == "first": 

125 # Reversing the headers and using overwrite mode would result in the 

126 # header order being inconsistent dependent on mode. 

127 for hdr in headers: 

128 for key in hdr: 

129 if key not in merged: 

130 merged[key] = hdr[key] 

131 

132 elif mode == "drop": 

133 drop = set() 

134 for hdr in headers: 

135 for key in hdr: 

136 if key not in merged: 

137 merged[key] = hdr[key] 

138 elif merged[key] != hdr[key]: 

139 # Key should be dropped later (not in loop since removing 

140 # the key now might add it back for the next header). 

141 drop.add(key) 

142 

143 for key in drop: 

144 del merged[key] 

145 

146 elif mode == "append": 

147 fill = set() 

148 for hdr in headers: 

149 for key in hdr: 

150 if key not in merged: 

151 merged[key] = hdr[key] 

152 elif not isinstance(merged[key], list) and merged[key] != hdr[key]: 

153 # If we detect different values, store an empty list 

154 # in the slot and fill it later. Do it at end so 

155 # we can pick up earlier values and fill empty with None. 

156 merged[key] = [] 

157 fill.add(key) 

158 

159 # Fill the entries that have multiple differing values 

160 for key in fill: 

161 merged[key] = [h[key] if key in h else None 

162 for h in itertools.chain([first_hdr], headers)] 

163 

164 else: 

165 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.") 

166 

167 # Force the first and last values to be inserted 

168 # 

169 if mode != "append": 

170 def retain_value(to_receive, to_retain, sources): 

171 if to_retain: 

172 for k in to_retain: 

173 # Look for values until we find one 

174 for h in sources: 

175 if k in h: 

176 to_receive[k] = h[k] 

177 break 

178 

179 all_headers = (first_hdr, *headers) 

180 retain_value(merged, first, all_headers) 

181 retain_value(merged, last, tuple(reversed(all_headers))) 

182 

183 return merged 

184 

185 

186def _find_from_file(header, paths, target_file): 

187 """Search file system for matching correction files. 

188 

189 Parameters 

190 ---------- 

191 header : `dict` 

192 Header to update. 

193 paths : `list` 

194 Paths to search. 

195 target_file : `str` 

196 File to locate in the path. 

197 

198 Returns 

199 ------- 

200 modified : `bool` 

201 `True` if a correction was found. Only the first correction located 

202 in a path is used. 

203 """ 

204 for p in paths: 

205 correction_file = os.path.join(p, target_file) 

206 if os.path.exists(correction_file): 

207 with open(correction_file) as fh: 

208 log.debug("Applying header corrections from file %s", correction_file) 

209 corrections = yaml.safe_load(fh) 

210 

211 # Apply corrections 

212 header.update(corrections) 

213 

214 return True 

215 return False 

216 

217 

218def _find_from_resource(header, package, resource_root, target_file): 

219 """Search package resource for correction information. 

220 

221 Parameters 

222 ---------- 

223 header : `dict` 

224 Header to update. 

225 package : `str` 

226 Package resource to search. 

227 resource_root : `str` 

228 Resource root. 

229 target_file : `str` 

230 Resource to locate. 

231 

232 Returns 

233 ------- 

234 modified : `bool` 

235 `True` if a correction was found. 

236 """ 

237 if package is not None and resource_root is not None: 

238 resource_name = posixpath.join(resource_root, target_file) 

239 if pkg_resources.resource_exists(package, resource_name): 

240 log.debug("Applying header corrections from package resource %s:%s", package, resource_name) 

241 with pkg_resources.resource_stream(package, resource_name) as fh: 

242 corrections = yaml.safe_load(fh) 

243 header.update(corrections) 

244 

245 return True 

246 return False 

247 

248 

249def fix_header(header, search_path=None, translator_class=None, filename=None): 

250 """Update, in place, the supplied header with known corrections. 

251 

252 Parameters 

253 ---------- 

254 header : `dict`-like 

255 Header to correct. 

256 search_path : `list` or `str`, optional 

257 Explicit directory paths to search for correction files. 

258 A single directory path can be given as a string. 

259 translator_class : `MetadataTranslator`-class, optional 

260 If not `None`, the class to use to translate the supplied headers 

261 into standard form. Otherwise each registered translator class will 

262 be asked in turn if it knows how to translate the supplied header. 

263 filename : `str`, optional 

264 Name of the file whose header is being translated. For some 

265 datasets with missing header information this can sometimes 

266 allow for some fixups in translations. 

267 

268 Returns 

269 ------- 

270 fixed : `bool` 

271 `True` if the header was updated. 

272 

273 Raises 

274 ------ 

275 TypeError 

276 Raised if the supplied translation class is not a `MetadataTranslator`. 

277 

278 Notes 

279 ----- 

280 In order to determine that a header update is required it is 

281 necessary for the header to be handled by the supplied translator 

282 class or else support automatic translation class determination. 

283 It is also required that the ``observation_id`` and ``instrument`` 

284 be calculable prior to header fix up. If a translator class can not 

285 be found or if there is a problem determining the instrument or 

286 observation ID, the function will return without action. 

287 

288 Correction files use names of the form ``instrument-obsid.yaml`` (for 

289 example ``LATISS-AT_O_20190329_000022.yaml``). 

290 The YAML file should have the format of: 

291 

292 .. code-block:: yaml 

293 

294 EXPTIME: 30.0 

295 IMGTYPE: bias 

296 

297 where each key/value pair is copied directly into the supplied header, 

298 overwriting any previous values. 

299 

300 This function searches a number of locations for such a correction file. 

301 The search order is: 

302 

303 - Any paths explicitly supplied through ``search_path``. 

304 - The contents of the PATH-like environment variable 

305 ``$METADATA_CORRECTIONS_PATH``. 

306 - Any search paths supplied by the matching translator class. 

307 

308 The first file located in the search path is used for the correction. 

309 """ 

310 

311 if translator_class is None: 

312 try: 

313 translator_class = MetadataTranslator.determine_translator(header, 

314 filename=filename) 

315 except ValueError: 

316 # if the header is not recognized, we should not complain 

317 # and should not proceed further. 

318 return False 

319 elif not issubclass(translator_class, MetadataTranslator): 

320 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}") 

321 

322 # Create an instance for this header 

323 translator = translator_class(header, filename=filename) 

324 

325 # To determine the file look up we need the observation_id and instrument 

326 try: 

327 obsid = translator.to_observation_id() 

328 instrument = translator.to_instrument() 

329 except Exception: 

330 # Return without comment if these translations failed 

331 return False 

332 

333 target_file = f"{instrument}-{obsid}.yaml" 

334 log.debug("Checking for header correction file named %s", target_file) 

335 

336 # Work out the search path 

337 paths = [] 

338 if search_path is not None: 

339 if isinstance(search_path, str): 

340 # Allow a single path to be given as a string 

341 search_path = [search_path] 

342 paths.extend(search_path) 

343 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]: 

344 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep)) 

345 

346 paths.extend(translator.search_paths()) 

347 

348 # Prioritize file system overrides 

349 modified = _find_from_file(header, paths, target_file) 

350 

351 # Apply updates from resources only if none found in files 

352 if not modified: 

353 package, resource_root = translator.resource_root() 

354 modified = _find_from_resource(header, package, resource_root, target_file) 

355 

356 # Allow a translation class to do local fixups 

357 # Allow it to fail but log the failure 

358 try: 

359 translator_modified = translator_class.fix_header(header) 

360 except Exception as e: 

361 log.fatal("Ignoring translator header fixup of %s %s: %s", 

362 instrument, obsid, e) 

363 translator_modified = False 

364 

365 return modified or translator_modified