Coverage for python/astro_metadata_translator/file_helpers.py: 14%
111 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-28 02:59 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-28 02:59 -0700
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Support functions for script implementations.
14These functions should not be treated as part of the public API.
15"""
17from __future__ import annotations
19__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info")
21import json
22import logging
23import os
24import re
25import traceback
26from collections.abc import Iterable, MutableMapping
27from typing import IO, Any
29from .headers import merge_headers
30from .observationInfo import ObservationInfo
31from .tests import read_test_file
33log = logging.getLogger(__name__)
35# Prefer afw over Astropy
36try:
37 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata
38 from lsst.afw.fits import FitsError, readMetadata
40 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None:
41 """Read a FITS header using afw.
43 Parameters
44 ----------
45 file : `str`
46 The file to read.
47 hdu : `int`
48 The header number to read.
49 can_raise : `bool`, optional
50 Indicate whether the function can raise and exception (default)
51 or should return `None` on error. Can still raise if an unexpected
52 error is encountered.
54 Returns
55 -------
56 md : `dict`
57 The requested header. `None` if it could not be read and
58 ``can_raise`` is `False`.
60 Notes
61 -----
62 Tries to catch a FitsError 104 and convert to `FileNotFoundError`.
63 """
64 try:
65 return readMetadata(file, hdu=hdu)
66 except FitsError as e:
67 if can_raise:
68 # Try to convert a basic fits error code
69 if "(104)" in str(e):
70 raise FileNotFoundError(f"No such file or directory: {file}") from e
71 raise e
72 return None
74except ImportError:
75 from astropy.io import fits
77 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None:
78 """Read a FITS header using astropy."""
79 # For detailed docstrings see the afw implementation above
80 header = None
81 try:
82 with fits.open(file) as fits_file:
83 try:
84 header = fits_file[hdu].header
85 except IndexError as e:
86 if can_raise:
87 raise e
88 except Exception as e:
89 if can_raise:
90 raise e
91 return header
94def find_files(files: Iterable[str], regex: str) -> list[str]:
95 """Find files for processing.
97 Parameters
98 ----------
99 files : iterable of `str`
100 The files or directories from which the headers are to be read.
101 regex : `str`
102 Regular expression string used to filter files when a directory is
103 scanned.
105 Returns
106 -------
107 found_files : `list` of `str`
108 The files that were found.
109 """
110 file_regex = re.compile(regex)
111 found_files = []
113 # Find all the files of interest
114 for file in files:
115 if os.path.isdir(file):
116 for root, dirs, files in os.walk(file):
117 for name in files:
118 path = os.path.join(root, name)
119 if os.path.isfile(path) and file_regex.search(name):
120 found_files.append(path)
121 else:
122 found_files.append(file)
124 return found_files
127def read_basic_metadata_from_file(
128 file: str, hdrnum: int, can_raise: bool = True
129) -> MutableMapping[str, Any] | None:
130 """Read a raw header from a file, merging if necessary.
132 Parameters
133 ----------
134 file : `str`
135 Name of file to read. Can be FITS, YAML or JSON. YAML or JSON must be
136 a simple top-level dict.
137 hdrnum : `int`
138 Header number to read. Only relevant for FITS. If greater than 1
139 it will be merged with the primary header. If a negative number is
140 given the second header, if present, will be merged with the primary
141 header. If there is only a primary header a negative number behaves
142 identically to specifying 0 for the HDU number.
143 can_raise : `bool`, optional
144 Indicate whether the function can raise an exception (default)
145 or should return `None` on error. Can still raise if an unexpected
146 error is encountered.
148 Returns
149 -------
150 header : `dict`
151 The header as a dict. Can be `None` if there was a problem reading
152 the file.
153 """
154 if file.endswith(".yaml") or file.endswith(".json"):
155 try:
156 md = read_test_file(
157 file,
158 )
159 except Exception as e:
160 if not can_raise:
161 md = None
162 else:
163 raise e
164 if hdrnum != 0:
165 # YAML can't have HDUs so skip merging below
166 hdrnum = 0
167 else:
168 md = _read_fits_metadata(file, 0, can_raise=can_raise)
169 if md is None:
170 log.warning("Unable to open file %s", file)
171 return None
172 if hdrnum < 0:
173 if "EXTEND" in md and md["EXTEND"]:
174 hdrnum = 1
175 if hdrnum > 0:
176 # Allow this to fail
177 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False)
178 # Astropy does not allow append mode since it does not
179 # convert lists to multiple cards. Overwrite for now
180 if mdn is not None:
181 md = merge_headers([md, mdn], mode="overwrite")
182 else:
183 log.warning("HDU %d was not found in file %s. Ignoring request.", hdrnum, file)
185 return md
188def read_file_info(
189 file: str,
190 hdrnum: int,
191 print_trace: bool | None = None,
192 content_mode: str = "translated",
193 content_type: str = "simple",
194 outstream: IO | None = None,
195) -> str | MutableMapping[str, Any] | ObservationInfo | None:
196 """Read information from file.
198 Parameters
199 ----------
200 file : `str`
201 The file from which the header is to be read.
202 hdrnum : `int`
203 The HDU number to read. The primary header is always read and
204 merged with the header from this HDU.
205 print_trace : `bool` or `None`
206 If there is an error reading the file and this parameter is `True`,
207 a full traceback of the exception will be reported. If `False` prints
208 a one line summary of the error condition. If `None` the exception
209 will be allowed to propagate.
210 content_mode : `str`
211 Content returned. This can be: ``metadata`` to return the unfixed
212 metadata headers; ``translated`` to return the output from metadata
213 translation.
214 content_type : `str`, optional
215 Form of content to be returned. Can be either ``json`` to return a
216 JSON string, ``simple`` to always return a `dict`, or ``native`` to
217 return either a `dict` (for ``metadata``) or `.ObservationInfo` for
218 ``translated``.
219 outstream : `io.StringIO` or `None`, optional
220 Output stream to use for standard messages. Defaults to `None` which
221 uses the default output stream.
223 Returns
224 -------
225 simple : `dict` of `str` or `.ObservationInfo`
226 The return value of `.ObservationInfo.to_simple`. Returns `None`
227 if there was a problem and ``print_trace`` is not `None`.
228 """
229 if content_mode not in ("metadata", "translated"):
230 raise ValueError(f"Unrecognized content mode request: {content_mode}")
232 if content_type not in ("native", "simple", "json"):
233 raise ValueError(f"Unrecognized content type request {content_type}")
235 try:
236 # Calculate the JSON from the file
237 md = read_basic_metadata_from_file(file, hdrnum, can_raise=True if print_trace is None else False)
238 if md is None:
239 return None
240 if content_mode == "metadata":
241 # Do not fix the header
242 if content_type == "json":
243 # Add a key to tell the reader whether this is md or translated
244 md["__CONTENT__"] = content_mode
245 try:
246 json_str = json.dumps(md)
247 except TypeError:
248 # Cast to dict and try again -- PropertyList is a problem
249 json_str = json.dumps(dict(md))
250 return json_str
251 return md
252 obs_info = ObservationInfo(md, pedantic=True, filename=file)
253 if content_type == "native":
254 return obs_info
255 simple = obs_info.to_simple()
256 if content_type == "simple":
257 return simple
258 if content_type == "json":
259 # Add a key to tell the reader if this is metadata or translated
260 simple["__CONTENT__"] = content_mode
261 return json.dumps(simple)
262 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}")
263 except Exception as e:
264 if print_trace is None:
265 raise e
266 if print_trace:
267 traceback.print_exc(file=outstream)
268 else:
269 print(repr(e), file=outstream)
270 return None