Coverage for python/astro_metadata_translator/headers.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Code to support header manipulation operations."""
14__all__ = ("merge_headers", "fix_header")
16import pkg_resources
17import posixpath
18import logging
19import itertools
20import copy
21import os
22import yaml
23from collections.abc import Mapping
25from .translator import MetadataTranslator
26from .translators import FitsTranslator
28log = logging.getLogger(__name__)
30ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH"
31"""Name of environment variable containing search path for header fix up."""
34def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None):
35 """Merge multiple headers into a single dict.
37 Given a list of dict-like data headers, combine them following the
38 specified mode.
40 Parameters
41 ----------
42 headers : `list` of `dict` (or `dict`-like)
43 Collection of headers to combine. `~lsst.daf.base.PropertyList`
44 is supported.
45 mode : `str`
46 Scheme to use when a header has the same key as another header
47 but different value. Options are:
49 - ``'overwrite'`` : Value in later header overwrites earlier value.
50 - ``'drop'`` : Entire key is dropped.
51 - ``'first'`` : Retain first value encountered.
52 - ``'append'`` : Convert value to list with a value for each header
53 (`None` if the key was not present). If the value is
54 identical in multiple headers but key is missing in
55 some, then the single identical header is stored.
56 sort : `bool`, optional
57 If `True`, sort the supplied headers into date order if possible.
58 This affects the resulting merged output depending on the requested
59 merge mode. An attempt will be made to extract a date from the
60 headers.
61 first : `list` or `tuple`, optional
62 Keys to retain even if they differ. For all modes excepting ``append``
63 (where it is ignored) the value in the merged header will always be
64 the value first encountered. This is usually to allow time-dependent
65 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow
66 the header to indicate the range of values. No exception is raised if
67 a key can not be found in a header since this allows a range of
68 expected headers to be listed covering multiple instruments.
69 last : `list` or `tuple`, optional
70 Keys to retain even if they differ. For all modes excepting ``append``
71 (where it is ignored) the value in the merged header will always be
72 the final value encountered. This is usually to allow time-dependent
73 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow
74 the header to indicate the range of values. No exception is raised if
75 a key can not be found in a header since this allows a range of
76 expected headers to be listed covering multiple instruments.
78 Returns
79 -------
80 merged : `dict`
81 Single `dict` combining all the headers using the specified
82 combination mode.
84 Notes
85 -----
86 If ``first`` and ``last`` are supplied, the keys from ``first`` are
87 handled first, followed by the keys from ``last``. No check is made to
88 ensure that the keys do not overlap.
89 """
90 if not headers:
91 raise ValueError("No headers supplied.")
93 # Copy the input list because we will be reorganizing it
94 headers = list(headers)
96 # With a single header provided return a copy immediately
97 if len(headers) == 1:
98 return copy.deepcopy(headers[0])
100 if sort:
101 def key_func(hdr):
102 translator_class = None
103 try:
104 translator_class = MetadataTranslator.determine_translator(hdr)
105 except ValueError:
106 # Try the FITS translator
107 translator_class = FitsTranslator
108 translator = translator_class(hdr)
109 return translator.to_datetime_begin()
111 headers = sorted(headers, key=key_func)
113 log.debug("Received %d headers for merging", len(headers))
115 # Pull out first header
116 first_hdr = headers.pop(0)
118 # Seed the merged header with a copy
119 merged = copy.deepcopy(first_hdr)
121 if mode == "overwrite":
122 for h in headers:
123 merged.update(h)
125 elif mode == "first":
126 # Reversing the headers and using overwrite mode would result in the
127 # header order being inconsistent dependent on mode.
128 for hdr in headers:
129 for key in hdr:
130 if key not in merged:
131 merged[key] = hdr[key]
133 elif mode == "drop":
134 drop = set()
135 for hdr in headers:
136 for key in hdr:
137 if key not in merged:
138 merged[key] = hdr[key]
139 elif merged[key] != hdr[key]:
140 # Key should be dropped later (not in loop since removing
141 # the key now might add it back for the next header).
142 drop.add(key)
144 for key in drop:
145 del merged[key]
147 elif mode == "append":
148 fill = set()
149 for hdr in headers:
150 for key in hdr:
151 if key not in merged:
152 merged[key] = hdr[key]
153 elif not isinstance(merged[key], list) and merged[key] != hdr[key]:
154 # If we detect different values, store an empty list
155 # in the slot and fill it later. Do it at end so
156 # we can pick up earlier values and fill empty with None.
157 merged[key] = []
158 fill.add(key)
160 # Fill the entries that have multiple differing values
161 for key in fill:
162 merged[key] = [h[key] if key in h else None
163 for h in itertools.chain([first_hdr], headers)]
165 else:
166 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.")
168 # Force the first and last values to be inserted
169 #
170 if mode != "append":
171 def retain_value(to_receive, to_retain, sources):
172 if to_retain:
173 for k in to_retain:
174 # Look for values until we find one
175 for h in sources:
176 if k in h:
177 to_receive[k] = h[k]
178 break
180 all_headers = (first_hdr, *headers)
181 retain_value(merged, first, all_headers)
182 retain_value(merged, last, tuple(reversed(all_headers)))
184 return merged
187def _read_yaml(fh, msg):
188 """Read YAML from file descriptor.
190 Parameters
191 ----------
192 fh : `io.IOBase`
193 Open file handle containing the YAML stream
194 msg : `str`
195 Text to include in log file when referring to this stream. Examples
196 could be "file something.yaml" or "resource module:resource".
198 Returns
199 -------
200 parsed : `dict` or `None`
201 The contents of the YAML file if it was a `dict`, else `None` if
202 the contents could not be parsed or the contents were YAML but
203 not a mapping.
204 """
205 try:
206 content = yaml.safe_load(fh)
207 except Exception as e:
208 log.warning("Error parsing YAML header corrections from %s: %s", msg, str(e))
209 return None
211 if not isinstance(content, Mapping):
212 log.warning("YAML Mapping not found in %s. Ignoring contents.", msg)
213 return None
215 return content
218def _find_from_file(header, paths, target_file):
219 """Search file system for matching correction files.
221 Parameters
222 ----------
223 header : `dict`
224 Header to update.
225 paths : `list`
226 Paths to search.
227 target_file : `str`
228 File to locate in the path.
230 Returns
231 -------
232 modified : `bool`
233 `True` if a correction was found. Only the first correction located
234 in a path is used.
235 """
236 for p in paths:
237 correction_file = os.path.join(p, target_file)
238 if os.path.exists(correction_file):
239 with open(correction_file) as fh:
240 log.debug("Applying header corrections from file %s", correction_file)
241 corrections = _read_yaml(fh, f"file {correction_file}")
243 if corrections is None:
244 continue
246 # Apply corrections
247 header.update(corrections)
249 return True
250 return False
253def _find_from_resource(header, package, resource_root, target_file):
254 """Search package resource for correction information.
256 Parameters
257 ----------
258 header : `dict`
259 Header to update.
260 package : `str`
261 Package resource to search.
262 resource_root : `str`
263 Resource root.
264 target_file : `str`
265 Resource to locate.
267 Returns
268 -------
269 modified : `bool`
270 `True` if a correction was found.
271 """
272 if package is not None and resource_root is not None:
273 resource_name = posixpath.join(resource_root, target_file)
274 if pkg_resources.resource_exists(package, resource_name):
275 log.debug("Applying header corrections from package resource %s:%s", package, resource_name)
276 with pkg_resources.resource_stream(package, resource_name) as fh:
277 corrections = _read_yaml(fh, f"package resource {package}:{resource_name}")
279 if corrections is None:
280 return False
282 header.update(corrections)
284 return True
285 return False
288def fix_header(header, search_path=None, translator_class=None, filename=None):
289 """Update, in place, the supplied header with known corrections.
291 Parameters
292 ----------
293 header : `dict`-like
294 Header to correct.
295 search_path : `list` or `str`, optional
296 Explicit directory paths to search for correction files.
297 A single directory path can be given as a string.
298 translator_class : `MetadataTranslator`-class, optional
299 If not `None`, the class to use to translate the supplied headers
300 into standard form. Otherwise each registered translator class will
301 be asked in turn if it knows how to translate the supplied header.
302 filename : `str`, optional
303 Name of the file whose header is being translated. For some
304 datasets with missing header information this can sometimes
305 allow for some fixups in translations.
307 Returns
308 -------
309 fixed : `bool`
310 `True` if the header was updated.
312 Raises
313 ------
314 TypeError
315 Raised if the supplied translation class is not a `MetadataTranslator`.
317 Notes
318 -----
319 In order to determine that a header update is required it is
320 necessary for the header to be handled by the supplied translator
321 class or else support automatic translation class determination.
322 It is also required that the ``observation_id`` and ``instrument``
323 be calculable prior to header fix up. If a translator class can not
324 be found or if there is a problem determining the instrument or
325 observation ID, the function will return without action.
327 Correction files use names of the form ``instrument-obsid.yaml`` (for
328 example ``LATISS-AT_O_20190329_000022.yaml``).
329 The YAML file should have the format of:
331 .. code-block:: yaml
333 EXPTIME: 30.0
334 IMGTYPE: bias
336 where each key/value pair is copied directly into the supplied header,
337 overwriting any previous values.
339 This function searches a number of locations for such a correction file.
340 The search order is:
342 - Any paths explicitly supplied through ``search_path``.
343 - The contents of the PATH-like environment variable
344 ``$METADATA_CORRECTIONS_PATH``.
345 - Any search paths supplied by the matching translator class.
347 The first file located in the search path is used for the correction.
348 """
350 if translator_class is None:
351 try:
352 translator_class = MetadataTranslator.determine_translator(header,
353 filename=filename)
354 except ValueError:
355 # if the header is not recognized, we should not complain
356 # and should not proceed further.
357 return False
358 elif not issubclass(translator_class, MetadataTranslator):
359 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}")
361 # Create an instance for this header
362 translator = translator_class(header, filename=filename)
364 # To determine the file look up we need the observation_id and instrument
365 try:
366 obsid = translator.to_observation_id()
367 instrument = translator.to_instrument()
368 except Exception:
369 # Return without comment if these translations failed
370 return False
372 target_file = f"{instrument}-{obsid}.yaml"
373 log.debug("Checking for header correction file named %s", target_file)
375 # Work out the search path
376 paths = []
377 if search_path is not None:
378 if isinstance(search_path, str):
379 # Allow a single path to be given as a string
380 search_path = [search_path]
381 paths.extend(search_path)
382 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]:
383 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep))
385 paths.extend(translator.search_paths())
387 # Prioritize file system overrides
388 modified = _find_from_file(header, paths, target_file)
390 # Apply updates from resources only if none found in files
391 if not modified:
392 package, resource_root = translator.resource_root()
393 modified = _find_from_resource(header, package, resource_root, target_file)
395 # Allow a translation class to do local fixups
396 # Allow it to fail but log the failure
397 try:
398 translator_modified = translator_class.fix_header(header, instrument, obsid, filename=filename)
399 except Exception as e:
400 log.fatal("Ignoring translator header fixup of %s %s: %s",
401 instrument, obsid, e)
402 translator_modified = False
404 return modified or translator_modified