Coverage for python/astro_metadata_translator/file_helpers.py: 13%
110 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:30 +0000
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Support functions for script implementations."""
14from __future__ import annotations
16__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info")
18import json
19import os
20import re
21import sys
22import traceback
23from collections.abc import Iterable, MutableMapping
24from typing import IO, Any
26from .headers import merge_headers
27from .observationInfo import ObservationInfo
28from .tests import read_test_file
30# Prefer afw over Astropy
31try:
32 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata
33 from lsst.afw.fits import FitsError, readMetadata
35 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None:
36 """Read a FITS header using afw.
38 Parameters
39 ----------
40 file : `str`
41 The file to read.
42 hdu : `int`
43 The header number to read.
44 can_raise : `bool`, optional
45 Indicate whether the function can raise and exception (default)
46 or should return `None` on error. Can still raise if an unexpected
47 error is encountered.
49 Returns
50 -------
51 md : `dict`
52 The requested header. `None` if it could not be read and
53 ``can_raise`` is `False`.
55 Notes
56 -----
57 Tries to catch a FitsError 104 and convert to `FileNotFoundError`.
58 """
59 try:
60 return readMetadata(file, hdu=hdu)
61 except FitsError as e:
62 if can_raise:
63 # Try to convert a basic fits error code
64 if "(104)" in str(e):
65 raise FileNotFoundError(f"No such file or directory: {file}") from e
66 raise e
67 return None
69except ImportError:
70 from astropy.io import fits
72 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None:
73 """Read a FITS header using astropy."""
74 # For detailed docstrings see the afw implementation above
75 header = None
76 try:
77 with fits.open(file) as fits_file:
78 try:
79 header = fits_file[hdu].header
80 except IndexError as e:
81 if can_raise:
82 raise e
83 except Exception as e:
84 if can_raise:
85 raise e
86 return header
89def find_files(files: Iterable[str], regex: str) -> list[str]:
90 """Find files for processing.
92 Parameters
93 ----------
94 files : iterable of `str`
95 The files or directories from which the headers are to be read.
96 regex : `str`
97 Regular expression string used to filter files when a directory is
98 scanned.
100 Returns
101 -------
102 found_files : `list` of `str`
103 The files that were found.
104 """
105 file_regex = re.compile(regex)
106 found_files = []
108 # Find all the files of interest
109 for file in files:
110 if os.path.isdir(file):
111 for root, dirs, files in os.walk(file):
112 for name in files:
113 path = os.path.join(root, name)
114 if os.path.isfile(path) and file_regex.search(name):
115 found_files.append(path)
116 else:
117 found_files.append(file)
119 return found_files
122def read_basic_metadata_from_file(
123 file: str, hdrnum: int, errstream: IO = sys.stderr, can_raise: bool = True
124) -> MutableMapping[str, Any] | None:
125 """Read a raw header from a file, merging if necessary.
127 Parameters
128 ----------
129 file : `str`
130 Name of file to read. Can be FITS or YAML. YAML must be a simple
131 top-level dict.
132 hdrnum : `int`
133 Header number to read. Only relevant for FITS. If greater than 1
134 it will be merged with the primary header. If a negative number is
135 given the second header, if present, will be merged with the primary
136 header. If there is only a primary header a negative number behaves
137 identically to specifying 0 for the HDU number.
138 errstream : `io.StringIO`, optional
139 Stream to send messages that would normally be sent to standard
140 error. Defaults to `sys.stderr`. Only used if exceptions are disabled.
141 can_raise : `bool`, optional
142 Indicate whether the function can raise an exception (default)
143 or should return `None` on error. Can still raise if an unexpected
144 error is encountered.
146 Returns
147 -------
148 header : `dict`
149 The header as a dict. Can be `None` if there was a problem reading
150 the file.
151 """
152 if file.endswith(".yaml"):
153 try:
154 md = read_test_file(
155 file,
156 )
157 except Exception as e:
158 if not can_raise:
159 md = None
160 else:
161 raise e
162 if hdrnum != 0:
163 # YAML can't have HDUs so skip merging below
164 hdrnum = 0
165 else:
166 md = _read_fits_metadata(file, 0, can_raise=can_raise)
167 if md is None:
168 print(f"Unable to open file {file}", file=errstream)
169 return None
170 if hdrnum < 0:
171 if "EXTEND" in md and md["EXTEND"]:
172 hdrnum = 1
173 if hdrnum > 0:
174 # Allow this to fail
175 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False)
176 # Astropy does not allow append mode since it does not
177 # convert lists to multiple cards. Overwrite for now
178 if mdn is not None:
179 md = merge_headers([md, mdn], mode="overwrite")
180 else:
181 print(f"HDU {hdrnum} was not found in file {file}. Ignoring request.", file=errstream)
183 return md
186def read_file_info(
187 file: str,
188 hdrnum: int,
189 print_trace: bool | None = None,
190 content_mode: str = "translated",
191 content_type: str = "simple",
192 outstream: IO = sys.stdout,
193 errstream: IO = sys.stderr,
194) -> str | MutableMapping[str, Any] | ObservationInfo | None:
195 """Read information from file.
197 Parameters
198 ----------
199 file : `str`
200 The file from which the header is to be read.
201 hdrnum : `int`
202 The HDU number to read. The primary header is always read and
203 merged with the header from this HDU.
204 print_trace : `bool` or `None`
205 If there is an error reading the file and this parameter is `True`,
206 a full traceback of the exception will be reported. If `False` prints
207 a one line summary of the error condition. If `None` the exception
208 will be allowed to propagate.
209 content_mode : `str`
210 Content returned. This can be: ``metadata`` to return the unfixed
211 metadata headers; ``translated`` to return the output from metadata
212 translation.
213 content_type : `str`, optional
214 Form of content to be returned. Can be either ``json`` to return a
215 JSON string, ``simple`` to always return a `dict`, or ``native`` to
216 return either a `dict` (for ``metadata``) or `.ObservationInfo` for
217 ``translated``.
218 outstream : `io.StringIO`, optional
219 Output stream to use for standard messages. Defaults to `sys.stdout`.
220 errstream : `io.StringIO`, optional
221 Stream to send messages that would normally be sent to standard
222 error. Defaults to `sys.stderr`.
224 Returns
225 -------
226 simple : `dict` of `str` or `.ObservationInfo`
227 The return value of `.ObservationInfo.to_simple`. Returns `None`
228 if there was a problem and ``print_trace`` is not `None`.
229 """
230 if content_mode not in ("metadata", "translated"):
231 raise ValueError(f"Unrecognized content mode request: {content_mode}")
233 if content_type not in ("native", "simple", "json"):
234 raise ValueError(f"Unrecognized content type request {content_type}")
236 try:
237 # Calculate the JSON from the file
238 md = read_basic_metadata_from_file(
239 file, hdrnum, errstream=errstream, can_raise=True if print_trace is None else False
240 )
241 if md is None:
242 return None
243 if content_mode == "metadata":
244 # Do not fix the header
245 if content_type == "json":
246 # Add a key to tell the reader whether this is md or translated
247 md["__CONTENT__"] = content_mode
248 try:
249 json_str = json.dumps(md)
250 except TypeError:
251 # Cast to dict and try again -- PropertyList is a problem
252 json_str = json.dumps(dict(md))
253 return json_str
254 return md
255 obs_info = ObservationInfo(md, pedantic=True, filename=file)
256 if content_type == "native":
257 return obs_info
258 simple = obs_info.to_simple()
259 if content_type == "simple":
260 return simple
261 if content_type == "json":
262 # Add a key to tell the reader if this is metadata or translated
263 simple["__CONTENT__"] = content_mode
264 return json.dumps(simple)
265 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}")
266 except Exception as e:
267 if print_trace is None:
268 raise e
269 if print_trace:
270 traceback.print_exc(file=outstream)
271 else:
272 print(repr(e), file=outstream)
273 return None