Coverage for python/astro_metadata_translator/headers.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Code to support header manipulation operations."""
14__all__ = ("merge_headers", "fix_header")
16import pkg_resources
17import posixpath
18import logging
19import itertools
20import copy
21import os
22import yaml
24from .translator import MetadataTranslator
25from .translators import FitsTranslator
27log = logging.getLogger(__name__)
29ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH"
30"""Name of environment variable containing search path for header fix up."""
33def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None):
34 """Merge multiple headers into a single dict.
36 Given a list of dict-like data headers, combine them following the
37 specified mode.
39 Parameters
40 ----------
41 headers : `list` of `dict` (or `dict`-like)
42 Collection of headers to combine. `~lsst.daf.base.PropertyList`
43 is supported.
44 mode : `str`
45 Scheme to use when a header has the same key as another header
46 but different value. Options are:
48 - ``'overwrite'`` : Value in later header overwrites earlier value.
49 - ``'drop'`` : Entire key is dropped.
50 - ``'first'`` : Retain first value encountered.
51 - ``'append'`` : Convert value to list with a value for each header
52 (`None` if the key was not present). If the value is
53 identical in multiple headers but key is missing in
54 some, then the single identical header is stored.
55 sort : `bool`, optional
56 If `True`, sort the supplied headers into date order if possible.
57 This affects the resulting merged output depending on the requested
58 merge mode. An attempt will be made to extract a date from the
59 headers.
60 first : `list` or `tuple`, optional
61 Keys to retain even if they differ. For all modes excepting ``append``
62 (where it is ignored) the value in the merged header will always be
63 the value first encountered. This is usually to allow time-dependent
64 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow
65 the header to indicate the range of values. No exception is raised if
66 a key can not be found in a header since this allows a range of
67 expected headers to be listed covering multiple instruments.
68 last : `list` or `tuple`, optional
69 Keys to retain even if they differ. For all modes excepting ``append``
70 (where it is ignored) the value in the merged header will always be
71 the final value encountered. This is usually to allow time-dependent
72 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow
73 the header to indicate the range of values. No exception is raised if
74 a key can not be found in a header since this allows a range of
75 expected headers to be listed covering multiple instruments.
77 Returns
78 -------
79 merged : `dict`
80 Single `dict` combining all the headers using the specified
81 combination mode.
83 Notes
84 -----
85 If ``first`` and ``last`` are supplied, the keys from ``first`` are
86 handled first, followed by the keys from ``last``. No check is made to
87 ensure that the keys do not overlap.
88 """
89 if not headers:
90 raise ValueError("No headers supplied.")
92 # Copy the input list because we will be reorganizing it
93 headers = list(headers)
95 # With a single header provided return a copy immediately
96 if len(headers) == 1:
97 return copy.deepcopy(headers[0])
99 if sort:
100 def key_func(hdr):
101 translator_class = None
102 try:
103 translator_class = MetadataTranslator.determine_translator(hdr)
104 except ValueError:
105 # Try the FITS translator
106 translator_class = FitsTranslator
107 translator = translator_class(hdr)
108 return translator.to_datetime_begin()
110 headers = sorted(headers, key=key_func)
112 log.debug("Received %d headers for merging", len(headers))
114 # Pull out first header
115 first_hdr = headers.pop(0)
117 # Seed the merged header with a copy
118 merged = copy.deepcopy(first_hdr)
120 if mode == "overwrite":
121 for h in headers:
122 merged.update(h)
124 elif mode == "first":
125 # Reversing the headers and using overwrite mode would result in the
126 # header order being inconsistent dependent on mode.
127 for hdr in headers:
128 for key in hdr:
129 if key not in merged:
130 merged[key] = hdr[key]
132 elif mode == "drop":
133 drop = set()
134 for hdr in headers:
135 for key in hdr:
136 if key not in merged:
137 merged[key] = hdr[key]
138 elif merged[key] != hdr[key]:
139 # Key should be dropped later (not in loop since removing
140 # the key now might add it back for the next header).
141 drop.add(key)
143 for key in drop:
144 del merged[key]
146 elif mode == "append":
147 fill = set()
148 for hdr in headers:
149 for key in hdr:
150 if key not in merged:
151 merged[key] = hdr[key]
152 elif not isinstance(merged[key], list) and merged[key] != hdr[key]:
153 # If we detect different values, store an empty list
154 # in the slot and fill it later. Do it at end so
155 # we can pick up earlier values and fill empty with None.
156 merged[key] = []
157 fill.add(key)
159 # Fill the entries that have multiple differing values
160 for key in fill:
161 merged[key] = [h[key] if key in h else None
162 for h in itertools.chain([first_hdr], headers)]
164 else:
165 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.")
167 # Force the first and last values to be inserted
168 #
169 if mode != "append":
170 def retain_value(to_receive, to_retain, sources):
171 if to_retain:
172 for k in to_retain:
173 # Look for values until we find one
174 for h in sources:
175 if k in h:
176 to_receive[k] = h[k]
177 break
179 all_headers = (first_hdr, *headers)
180 retain_value(merged, first, all_headers)
181 retain_value(merged, last, tuple(reversed(all_headers)))
183 return merged
186def _find_from_file(header, paths, target_file):
187 """Search file system for matching correction files.
189 Parameters
190 ----------
191 header : `dict`
192 Header to update.
193 paths : `list`
194 Paths to search.
195 target_file : `str`
196 File to locate in the path.
198 Returns
199 -------
200 modified : `bool`
201 `True` if a correction was found. Only the first correction located
202 in a path is used.
203 """
204 for p in paths:
205 correction_file = os.path.join(p, target_file)
206 if os.path.exists(correction_file):
207 with open(correction_file) as fh:
208 log.debug("Applying header corrections from file %s", correction_file)
209 corrections = yaml.safe_load(fh)
211 # Apply corrections
212 header.update(corrections)
214 return True
215 return False
218def _find_from_resource(header, package, resource_root, target_file):
219 """Search package resource for correction information.
221 Parameters
222 ----------
223 header : `dict`
224 Header to update.
225 package : `str`
226 Package resource to search.
227 resource_root : `str`
228 Resource root.
229 target_file : `str`
230 Resource to locate.
232 Returns
233 -------
234 modified : `bool`
235 `True` if a correction was found.
236 """
237 if package is not None and resource_root is not None:
238 resource_name = posixpath.join(resource_root, target_file)
239 if pkg_resources.resource_exists(package, resource_name):
240 log.debug("Applying header corrections from package resource %s:%s", package, resource_name)
241 with pkg_resources.resource_stream(package, resource_name) as fh:
242 corrections = yaml.safe_load(fh)
243 header.update(corrections)
245 return True
246 return False
249def fix_header(header, search_path=None, translator_class=None, filename=None):
250 """Update, in place, the supplied header with known corrections.
252 Parameters
253 ----------
254 header : `dict`-like
255 Header to correct.
256 search_path : `list` or `str`, optional
257 Explicit directory paths to search for correction files.
258 A single directory path can be given as a string.
259 translator_class : `MetadataTranslator`-class, optional
260 If not `None`, the class to use to translate the supplied headers
261 into standard form. Otherwise each registered translator class will
262 be asked in turn if it knows how to translate the supplied header.
263 filename : `str`, optional
264 Name of the file whose header is being translated. For some
265 datasets with missing header information this can sometimes
266 allow for some fixups in translations.
268 Returns
269 -------
270 fixed : `bool`
271 `True` if the header was updated.
273 Raises
274 ------
275 TypeError
276 Raised if the supplied translation class is not a `MetadataTranslator`.
278 Notes
279 -----
280 In order to determine that a header update is required it is
281 necessary for the header to be handled by the supplied translator
282 class or else support automatic translation class determination.
283 It is also required that the ``observation_id`` and ``instrument``
284 be calculable prior to header fix up. If a translator class can not
285 be found or if there is a problem determining the instrument or
286 observation ID, the function will return without action.
288 Correction files use names of the form ``instrument-obsid.yaml`` (for
289 example ``LATISS-AT_O_20190329_000022.yaml``).
290 The YAML file should have the format of:
292 .. code-block:: yaml
294 EXPTIME: 30.0
295 IMGTYPE: bias
297 where each key/value pair is copied directly into the supplied header,
298 overwriting any previous values.
300 This function searches a number of locations for such a correction file.
301 The search order is:
303 - Any paths explicitly supplied through ``search_path``.
304 - The contents of the PATH-like environment variable
305 ``$METADATA_CORRECTIONS_PATH``.
306 - Any search paths supplied by the matching translator class.
308 The first file located in the search path is used for the correction.
309 """
311 if translator_class is None:
312 try:
313 translator_class = MetadataTranslator.determine_translator(header,
314 filename=filename)
315 except ValueError:
316 # if the header is not recognized, we should not complain
317 # and should not proceed further.
318 return False
319 elif not issubclass(translator_class, MetadataTranslator):
320 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}")
322 # Create an instance for this header
323 translator = translator_class(header, filename=filename)
325 # To determine the file look up we need the observation_id and instrument
326 try:
327 obsid = translator.to_observation_id()
328 instrument = translator.to_instrument()
329 except Exception:
330 # Return without comment if these translations failed
331 return False
333 target_file = f"{instrument}-{obsid}.yaml"
334 log.debug("Checking for header correction file named %s", target_file)
336 # Work out the search path
337 paths = []
338 if search_path is not None:
339 if isinstance(search_path, str):
340 # Allow a single path to be given as a string
341 search_path = [search_path]
342 paths.extend(search_path)
343 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]:
344 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep))
346 paths.extend(translator.search_paths())
348 # Prioritize file system overrides
349 modified = _find_from_file(header, paths, target_file)
351 # Apply updates from resources only if none found in files
352 if not modified:
353 package, resource_root = translator.resource_root()
354 modified = _find_from_resource(header, package, resource_root, target_file)
356 # Allow a translation class to do local fixups
357 # Allow it to fail but log the failure
358 try:
359 translator_modified = translator_class.fix_header(header)
360 except Exception as e:
361 log.fatal("Ignoring translator header fixup of %s %s: %s",
362 instrument, obsid, e)
363 translator_modified = False
365 return modified or translator_modified