Coverage for python/astro_metadata_translator/headers.py: 9%
187 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:46 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:46 -0700
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Code to support header manipulation operations."""
14from __future__ import annotations
16__all__ = ("merge_headers", "fix_header")
18import copy
19import datetime
20import itertools
21import logging
22import os
23import posixpath
24from collections import Counter
25from collections.abc import Mapping
26from typing import IO, Any, List, MutableMapping, Optional, Sequence, Tuple, Type, Union
28import pkg_resources
29import yaml
31from .translator import MetadataTranslator
32from .translators import FitsTranslator
34log = logging.getLogger(__name__)
36ENV_VAR_NAME = "METADATA_CORRECTIONS_PATH"
37"""Name of environment variable containing search path for header fix up."""
39HIERARCH = "HIERARCH ASTRO METADATA FIX"
40"""FITS-style hierarchical keyword root."""
42FIXUP_SENTINEL = HIERARCH + " MODIFIED"
43"""Keyword to add to header when header has been fixed."""
46def merge_headers(
47 headers: Sequence[MutableMapping[str, Any]],
48 mode: str = "overwrite",
49 sort: bool = False,
50 first: Optional[Sequence[str]] = None,
51 last: Optional[Sequence[str]] = None,
52) -> MutableMapping[str, Any]:
53 """Merge multiple headers into a single dict.
55 Given a list of dict-like data headers, combine them following the
56 specified mode.
58 Parameters
59 ----------
60 headers : `list` of `dict` (or `dict`-like)
61 Collection of headers to combine. `~lsst.daf.base.PropertyList`
62 is supported.
63 mode : `str`
64 Scheme to use when a header has the same key as another header
65 but different value. Options are:
67 - ``'overwrite'`` : Value in later header overwrites earlier value.
68 - ``'drop'`` : Entire key is dropped. If a key only appears in a
69 subset of the headers, and is identical in those, it will be
70 retained.
71 - ``'diff'`` : As for ``drop`` but the dropped values are stored in a
72 `list` of `dict` in the returned merged header in key
73 ``__DIFF__``. The order used matches the supplied order or
74 the sorted order if specified. This allows a simple header diff
75 to be performed and associated with the original headers. Only
76 keys that appear in all headers will be retained in the merged one.
77 Unlike for ``'drop'`` headers that are identical and only present in
78 a subset will always be included in the diff.
79 - ``'first'`` : Retain first value encountered.
80 - ``'append'`` : Convert value to list with a value for each header
81 (`None` if the key was not present). If the value is
82 identical in multiple headers but key is missing in
83 some, then the single identical header is stored.
84 sort : `bool`, optional
85 If `True`, sort the supplied headers into date order if possible.
86 This affects the resulting merged output depending on the requested
87 merge mode. An attempt will be made to extract a date from the
88 headers.
89 first : `list` or `tuple`, optional
90 Keys to retain even if they differ. For all modes excepting ``append``
91 (where it is ignored) the value in the merged header will always be
92 the value first encountered. This is usually to allow time-dependent
93 headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow
94 the header to indicate the range of values. No exception is raised if
95 a key can not be found in a header since this allows a range of
96 expected headers to be listed covering multiple instruments.
97 last : `list` or `tuple`, optional
98 Keys to retain even if they differ. For all modes excepting ``append``
99 (where it is ignored) the value in the merged header will always be
100 the final value encountered. This is usually to allow time-dependent
101 headers such as ``DATE-END`` and ``AZEND`` to be retained to allow
102 the header to indicate the range of values. No exception is raised if
103 a key can not be found in a header since this allows a range of
104 expected headers to be listed covering multiple instruments.
106 Returns
107 -------
108 merged : `dict`
109 Single `dict` combining all the headers using the specified
110 combination mode.
112 Notes
113 -----
114 If ``first`` and ``last`` are supplied, the keys from ``first`` are
115 handled first, followed by the keys from ``last``. No check is made to
116 ensure that the keys do not overlap.
117 """
118 if not headers:
119 raise ValueError("No headers supplied.")
121 # Copy the input list because we will be reorganizing it
122 headers = list(headers)
124 # With a single header provided return a copy immediately
125 if len(headers) == 1:
126 return copy.deepcopy(headers[0])
128 if sort:
130 def key_func(hdr: Mapping[str, Any]) -> Any:
131 translator_class = None
132 try:
133 translator_class = MetadataTranslator.determine_translator(hdr)
134 except ValueError:
135 # Try the FITS translator
136 translator_class = FitsTranslator
137 translator = translator_class(hdr)
138 return translator.to_datetime_begin()
140 headers = sorted(headers, key=key_func)
142 log.debug("Received %d headers for merging", len(headers))
144 # Pull out first header
145 first_hdr = headers.pop(0)
147 # Seed the merged header with a copy
148 merged = copy.deepcopy(first_hdr)
150 if mode == "overwrite":
151 for h in headers:
152 merged.update(h)
154 elif mode == "first":
155 # Reversing the headers and using overwrite mode would result in the
156 # header order being inconsistent dependent on mode.
157 for hdr in headers:
158 for key in hdr:
159 if key not in merged:
160 merged[key] = hdr[key]
162 elif mode == "drop":
163 drop = set()
164 for hdr in headers:
165 for key in hdr:
166 if key not in merged:
167 merged[key] = hdr[key]
168 elif merged[key] != hdr[key]:
169 # Key should be dropped later (not in loop since removing
170 # the key now might add it back for the next header).
171 drop.add(key)
173 for key in drop:
174 del merged[key]
176 elif mode == "diff":
177 dropped_keys = set()
179 # Only want to keep keys in the merged header that are in all the
180 # input headers and identical. Seed with the first header
181 counter = Counter(merged.keys())
183 for hdr in headers:
184 counter.update(hdr.keys())
185 for key in hdr:
186 if key not in merged:
187 merged[key] = hdr[key]
188 elif merged[key] != hdr[key]:
189 # Key should be dropped later (not in loop since removing
190 # the key now might add it back for the next header).
191 dropped_keys.add(key)
193 # Add to the list of dropped keys all the keys that
194 # have a count less than number of input headers (incl first one)
195 n = len(headers) + 1
196 for key in counter:
197 if counter[key] != n:
198 dropped_keys.add(key)
200 # For each dropped key, create a distinct diff header
201 # We must include the first header in this
202 diffs = []
203 for hdr in itertools.chain([first_hdr], headers):
204 # Get a list of all the dropped keys that are in this header
205 # Sometimes a key will only be in some headers. For now
206 # do not include it in the diff at all.
207 diff_keys = dropped_keys & set(hdr)
209 diffs.append({k: hdr[k] for k in diff_keys})
211 # PropertyList does not let us attach a dict to it
212 # so if we encounter this we have to force a type change to dict
213 try:
214 merged["__DIFF__"] = diffs
215 except TypeError:
216 merged = dict(merged)
217 merged["__DIFF__"] = diffs
219 for key in dropped_keys:
220 del merged[key]
222 elif mode == "append":
223 fill = set()
224 for hdr in headers:
225 for key in hdr:
226 if key not in merged:
227 merged[key] = hdr[key]
228 elif not isinstance(merged[key], list) and merged[key] != hdr[key]:
229 # If we detect different values, store an empty list
230 # in the slot and fill it later. Do it at end so
231 # we can pick up earlier values and fill empty with None.
232 merged[key] = []
233 fill.add(key)
235 # Fill the entries that have multiple differing values
236 for key in fill:
237 merged[key] = [h[key] if key in h else None for h in itertools.chain([first_hdr], headers)]
239 else:
240 raise ValueError(f"Unsupported value of '{mode}' for mode parameter.")
242 # Force the first and last values to be inserted
243 #
244 if mode != "append":
246 def retain_value(
247 to_receive: MutableMapping[str, Any],
248 to_retain: Optional[Sequence[str]],
249 sources: Tuple[Mapping[str, Any], ...],
250 ) -> None:
251 if to_retain:
252 for k in to_retain:
253 # Look for values until we find one
254 for h in sources:
255 if k in h:
256 to_receive[k] = h[k]
257 break
259 all_headers = (first_hdr, *headers)
260 retain_value(merged, first, all_headers)
261 retain_value(merged, last, tuple(reversed(all_headers)))
263 return merged
266def _read_yaml(fh: IO[bytes], msg: str) -> Optional[Mapping[str, Any]]:
267 """Read YAML from file descriptor.
269 Parameters
270 ----------
271 fh : `io.IOBase`
272 Open file handle containing the YAML stream
273 msg : `str`
274 Text to include in log file when referring to this stream. Examples
275 could be "file something.yaml" or "resource module:resource".
277 Returns
278 -------
279 parsed : `dict` or `None`
280 The contents of the YAML file if it was a `dict`, else `None` if
281 the contents could not be parsed or the contents were YAML but
282 not a mapping.
283 """
284 try:
285 content = yaml.safe_load(fh)
286 except Exception as e:
287 log.warning("Error parsing YAML header corrections from %s: %s", msg, str(e))
288 return None
290 if not isinstance(content, Mapping):
291 log.warning("YAML Mapping not found in %s. Ignoring contents.", msg)
292 return None
294 return content
297def _find_from_file(
298 header: MutableMapping[str, Any], paths: Sequence[str], target_file: str
299) -> Optional[str]:
300 """Search file system for matching correction files.
302 Parameters
303 ----------
304 header : `dict`
305 Header to update.
306 paths : `list`
307 Paths to search.
308 target_file : `str`
309 File to locate in the path.
311 Returns
312 -------
313 correction_found : `str` or `None`
314 The path of the correction file used to update the header or
315 `None`. Only the first correction located in a path is used.
316 """
317 for p in paths:
318 correction_file = os.path.join(p, target_file)
319 if os.path.exists(correction_file):
320 with open(correction_file, "rb") as fh:
321 log.debug("Applying header corrections from file %s", correction_file)
322 corrections = _read_yaml(fh, f"file {correction_file}")
324 if corrections is None:
325 continue
327 # Apply corrections
328 header.update(corrections)
330 return correction_file
331 return None
334def _find_from_resource(
335 header: MutableMapping[str, Any], package: Optional[str], resource_root: Optional[str], target_file: str
336) -> Optional[str]:
337 """Search package resource for correction information.
339 Parameters
340 ----------
341 header : `dict`
342 Header to update.
343 package : `str`
344 Package resource to search.
345 resource_root : `str`
346 Resource root.
347 target_file : `str`
348 Resource to locate.
350 Returns
351 -------
352 resource : `str` or `None`
353 Name of resource read. `None` if no corrections found.
354 """
355 if package is not None and resource_root is not None:
356 resource_name = posixpath.join(resource_root, target_file)
357 if pkg_resources.resource_exists(package, resource_name):
358 log.debug("Applying header corrections from package resource %s:%s", package, resource_name)
359 with pkg_resources.resource_stream(package, resource_name) as fh:
360 corrections = _read_yaml(fh, f"package resource {package}:{resource_name}")
362 if corrections is None:
363 return None
365 header.update(corrections)
367 return f"{package}:{resource_name}"
368 return None
371def fix_header(
372 header: MutableMapping[str, Any],
373 search_path: Optional[Union[str, Sequence[str]]] = None,
374 translator_class: Optional[Type[MetadataTranslator]] = None,
375 filename: Optional[str] = None,
376) -> bool:
377 """Update, in place, the supplied header with known corrections.
379 Parameters
380 ----------
381 header : `dict`-like
382 Header to correct.
383 search_path : `list` or `str`, optional
384 Explicit directory paths to search for correction files.
385 A single directory path can be given as a string.
386 translator_class : `MetadataTranslator`-class, optional
387 If not `None`, the class to use to translate the supplied headers
388 into standard form. Otherwise each registered translator class will
389 be asked in turn if it knows how to translate the supplied header.
390 filename : `str`, optional
391 Name of the file whose header is being translated. For some
392 datasets with missing header information this can sometimes
393 allow for some fixups in translations.
395 Returns
396 -------
397 fixed : `bool`
398 `True` if the header was updated.
400 Raises
401 ------
402 TypeError
403 Raised if the supplied translation class is not a `MetadataTranslator`.
405 Notes
406 -----
407 In order to determine that a header update is required it is
408 necessary for the header to be handled by the supplied translator
409 class or else support automatic translation class determination.
410 It is also required that the ``observation_id`` and ``instrument``
411 be calculable prior to header fix up. If a translator class can not
412 be found or if there is a problem determining the instrument or
413 observation ID, the function will return without action.
415 Correction files use names of the form ``instrument-obsid.yaml`` (for
416 example ``LATISS-AT_O_20190329_000022.yaml``).
417 The YAML file should have the format of:
419 .. code-block:: yaml
421 EXPTIME: 30.0
422 IMGTYPE: bias
424 where each key/value pair is copied directly into the supplied header,
425 overwriting any previous values.
427 This function searches a number of locations for such a correction file.
428 The search order is:
430 - Any paths explicitly supplied through ``search_path``.
431 - The contents of the PATH-like environment variable
432 ``$METADATA_CORRECTIONS_PATH``.
433 - Any search paths supplied by the matching translator class.
435 The first file located in the search path is used for the correction.
436 """
438 if FIXUP_SENTINEL in header:
439 return header[FIXUP_SENTINEL]
441 if translator_class is None:
442 try:
443 translator_class = MetadataTranslator.determine_translator(header, filename=filename)
444 except ValueError as e:
445 # if the header is not recognized, we should not complain
446 # and should not proceed further.
447 log.debug(
448 "Unable to determine translator class %s -- not fixing header: %s",
449 f"for {filename}" if filename is not None else "",
450 e,
451 )
452 return False
453 elif not issubclass(translator_class, MetadataTranslator):
454 raise TypeError(f"Translator class must be a MetadataTranslator, not {translator_class}")
456 # Create an instance for this header
457 translator = translator_class(header, filename=filename)
459 # To determine the file look up we need the observation_id and instrument
460 try:
461 obsid = translator.to_observation_id()
462 instrument = translator.to_instrument()
463 except Exception:
464 # Return without comment if these translations failed
465 return False
467 target_file = f"{instrument}-{obsid}.yaml"
468 log.debug("Checking for header correction file named %s", target_file)
470 # Work out the search path
471 paths: List[str] = []
472 if search_path is not None:
473 if isinstance(search_path, str):
474 # Allow a single path to be given as a string
475 search_path = [search_path]
476 paths.extend(search_path)
477 if ENV_VAR_NAME in os.environ and os.environ[ENV_VAR_NAME]:
478 paths.extend(os.environ[ENV_VAR_NAME].split(os.path.pathsep))
480 paths.extend(translator.search_paths())
482 # Prioritize file system overrides
483 corrections_file = _find_from_file(header, paths, target_file)
485 # Apply updates from resources only if none found in files
486 if corrections_file is None:
487 package, resource_root = translator.resource_root()
488 corrections_file = _find_from_resource(header, package, resource_root, target_file)
490 # Allow a translation class to do local fixups
491 # Allow it to fail but log the failure
492 try:
493 translator_modified = translator_class.fix_header(header, instrument, obsid, filename=filename)
494 except Exception as e:
495 log.fatal("Ignoring translator header fixup of %s %s: %s", instrument, obsid, e)
496 translator_modified = False
498 was_modified = (corrections_file is not None) or translator_modified
500 # Always add a sentinel even if we nothing was updated
501 # since this will speed up later fixes by not requiring the file
502 # system scan or calling of the per-instrument translator methods.
503 # Do not do it if there has been a problem determining a translator
504 # since it may be that a new translator is registered later on for
505 # another attempt.
506 header[FIXUP_SENTINEL] = was_modified
508 # Record provenance
509 header[HIERARCH + " DATE"] = datetime.datetime.now().isoformat()
510 if corrections_file is not None:
511 header[HIERARCH + " FILE"] = corrections_file
512 if translator_modified:
513 # Store the translator version
514 header[HIERARCH + " VERSION"] = translator_class.translator_version()
516 return was_modified