Coverage for python/astro_metadata_translator/file_helpers.py: 13%
109 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 18:35 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 18:35 +0000
1# This file is part of astro_metadata_translator.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the LICENSE file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12"""Support functions for script implementations."""
14from __future__ import annotations
16__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info")
18import json
19import os
20import re
21import sys
22import traceback
23from typing import IO, Any, Iterable, List, MutableMapping, Optional, Union
25from .headers import merge_headers
26from .observationInfo import ObservationInfo
27from .tests import read_test_file
29# Prefer afw over Astropy
30try:
31 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata
32 from lsst.afw.fits import FitsError, readMetadata
34 def _read_fits_metadata(
35 file: str, hdu: int, can_raise: bool = False
36 ) -> Optional[MutableMapping[str, Any]]:
37 """Read a FITS header using afw.
39 Parameters
40 ----------
41 file : `str`
42 The file to read.
43 hdu : `int`
44 The header number to read.
45 can_raise : `bool`, optional
46 Indicate whether the function can raise and exception (default)
47 or should return `None` on error. Can still raise if an unexpected
48 error is encountered.
50 Returns
51 -------
52 md : `dict`
53 The requested header. `None` if it could not be read and
54 ``can_raise`` is `False`.
56 Notes
57 -----
58 Tries to catch a FitsError 104 and convert to `FileNotFoundError`.
59 """
60 try:
61 return readMetadata(file, hdu=hdu)
62 except FitsError as e:
63 if can_raise:
64 # Try to convert a basic fits error code
65 if "(104)" in str(e):
66 raise FileNotFoundError(f"No such file or directory: {file}") from e
67 raise e
68 return None
70except ImportError:
71 from astropy.io import fits
73 def _read_fits_metadata(
74 file: str, hdu: int, can_raise: bool = False
75 ) -> Optional[MutableMapping[str, Any]]:
76 """Read a FITS header using astropy."""
78 # For detailed docstrings see the afw implementation above
79 header = None
80 try:
81 with fits.open(file) as fits_file:
82 try:
83 header = fits_file[hdu].header
84 except IndexError as e:
85 if can_raise:
86 raise e
87 except Exception as e:
88 if can_raise:
89 raise e
90 return header
93def find_files(files: Iterable[str], regex: str) -> List[str]:
94 """Find files for processing.
96 Parameters
97 ----------
98 files : iterable of `str`
99 The files or directories from which the headers are to be read.
100 regex : `str`
101 Regular expression string used to filter files when a directory is
102 scanned.
104 Returns
105 -------
106 found_files : `list` of `str`
107 The files that were found.
108 """
109 file_regex = re.compile(regex)
110 found_files = []
112 # Find all the files of interest
113 for file in files:
114 if os.path.isdir(file):
115 for root, dirs, files in os.walk(file):
116 for name in files:
117 path = os.path.join(root, name)
118 if os.path.isfile(path) and file_regex.search(name):
119 found_files.append(path)
120 else:
121 found_files.append(file)
123 return found_files
126def read_basic_metadata_from_file(
127 file: str, hdrnum: int, errstream: IO = sys.stderr, can_raise: bool = True
128) -> Optional[MutableMapping[str, Any]]:
129 """Read a raw header from a file, merging if necessary
131 Parameters
132 ----------
133 file : `str`
134 Name of file to read. Can be FITS or YAML. YAML must be a simple
135 top-level dict.
136 hdrnum : `int`
137 Header number to read. Only relevant for FITS. If greater than 1
138 it will be merged with the primary header. If a negative number is
139 given the second header, if present, will be merged with the primary
140 header. If there is only a primary header a negative number behaves
141 identically to specifying 0 for the HDU number.
142 errstream : `io.StringIO`, optional
143 Stream to send messages that would normally be sent to standard
144 error. Defaults to `sys.stderr`. Only used if exceptions are disabled.
145 can_raise : `bool`, optional
146 Indicate whether the function can raise an exception (default)
147 or should return `None` on error. Can still raise if an unexpected
148 error is encountered.
150 Returns
151 -------
152 header : `dict`
153 The header as a dict. Can be `None` if there was a problem reading
154 the file.
155 """
156 if file.endswith(".yaml"):
157 try:
158 md = read_test_file(
159 file,
160 )
161 except Exception as e:
162 if not can_raise:
163 md = None
164 else:
165 raise e
166 if hdrnum != 0:
167 # YAML can't have HDUs so skip merging below
168 hdrnum = 0
169 else:
170 md = _read_fits_metadata(file, 0, can_raise=can_raise)
171 if md is None:
172 print(f"Unable to open file {file}", file=errstream)
173 return None
174 if hdrnum < 0:
175 if "EXTEND" in md and md["EXTEND"]:
176 hdrnum = 1
177 if hdrnum > 0:
178 # Allow this to fail
179 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False)
180 # Astropy does not allow append mode since it does not
181 # convert lists to multiple cards. Overwrite for now
182 if mdn is not None:
183 md = merge_headers([md, mdn], mode="overwrite")
184 else:
185 print(f"HDU {hdrnum} was not found in file {file}. Ignoring request.", file=errstream)
187 return md
190def read_file_info(
191 file: str,
192 hdrnum: int,
193 print_trace: Optional[bool] = None,
194 content_mode: str = "translated",
195 content_type: str = "simple",
196 outstream: IO = sys.stdout,
197 errstream: IO = sys.stderr,
198) -> Optional[Union[str, MutableMapping[str, Any], ObservationInfo]]:
199 """Read information from file
201 Parameters
202 ----------
203 file : `str`
204 The file from which the header is to be read.
205 hdrnum : `int`
206 The HDU number to read. The primary header is always read and
207 merged with the header from this HDU.
208 print_trace : `bool` or `None`
209 If there is an error reading the file and this parameter is `True`,
210 a full traceback of the exception will be reported. If `False` prints
211 a one line summary of the error condition. If `None` the exception
212 will be allowed to propagate.
213 content_mode : `str`
214 Content returned. This can be: ``metadata`` to return the unfixed
215 metadata headers; ``translated`` to return the output from metadata
216 translation.
217 content_type : `str`, optional
218 Form of content to be returned. Can be either ``json`` to return a
219 JSON string, ``simple`` to always return a `dict`, or ``native`` to
220 return either a `dict` (for ``metadata``) or `ObservationInfo` for
221 ``translated``.
222 outstream : `io.StringIO`, optional
223 Output stream to use for standard messages. Defaults to `sys.stdout`.
224 errstream : `io.StringIO`, optional
225 Stream to send messages that would normally be sent to standard
226 error. Defaults to `sys.stderr`.
228 Returns
229 -------
230 simple : `dict` of `str` or `ObservationInfo`
231 The return value of `ObservationInfo.to_simple()`. Returns `None`
232 if there was a problem and `print_trace` is not `None`.
233 """
235 if content_mode not in ("metadata", "translated"):
236 raise ValueError(f"Unrecognized content mode request: {content_mode}")
238 if content_type not in ("native", "simple", "json"):
239 raise ValueError(f"Unrecognized content type request {content_type}")
241 try:
242 # Calculate the JSON from the file
243 md = read_basic_metadata_from_file(
244 file, hdrnum, errstream=errstream, can_raise=True if print_trace is None else False
245 )
246 if md is None:
247 return None
248 if content_mode == "metadata":
249 # Do not fix the header
250 if content_type == "json":
251 # Add a key to tell the reader whether this is md or translated
252 md["__CONTENT__"] = content_mode
253 try:
254 json_str = json.dumps(md)
255 except TypeError:
256 # Cast to dict and try again -- PropertyList is a problem
257 json_str = json.dumps(dict(md))
258 return json_str
259 return md
260 obs_info = ObservationInfo(md, pedantic=True, filename=file)
261 if content_type == "native":
262 return obs_info
263 simple = obs_info.to_simple()
264 if content_type == "simple":
265 return simple
266 if content_type == "json":
267 # Add a key to tell the reader if this is metadata or translated
268 simple["__CONTENT__"] = content_mode
269 return json.dumps(simple)
270 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}")
271 except Exception as e:
272 if print_trace is None:
273 raise e
274 if print_trace:
275 traceback.print_exc(file=outstream)
276 else:
277 print(repr(e), file=outstream)
278 return None