Coverage for python/astro_metadata_translator/file_helpers.py: 14%

111 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-06 03:48 -0700

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Support functions for script implementations. 

13 

14These functions should not be treated as part of the public API. 

15""" 

16 

17from __future__ import annotations 

18 

19__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info") 

20 

21import json 

22import logging 

23import os 

24import re 

25import traceback 

26from collections.abc import Iterable, MutableMapping 

27from typing import IO, Any 

28 

29from .headers import merge_headers 

30from .observationInfo import ObservationInfo 

31from .tests import read_test_file 

32 

33log = logging.getLogger(__name__) 

34 

35# Prefer afw over Astropy 

36try: 

37 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata 

38 from lsst.afw.fits import FitsError, readMetadata 

39 

40 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

41 """Read a FITS header using afw. 

42 

43 Parameters 

44 ---------- 

45 file : `str` 

46 The file to read. 

47 hdu : `int` 

48 The header number to read. 

49 can_raise : `bool`, optional 

50 Indicate whether the function can raise and exception (default) 

51 or should return `None` on error. Can still raise if an unexpected 

52 error is encountered. 

53 

54 Returns 

55 ------- 

56 md : `dict` 

57 The requested header. `None` if it could not be read and 

58 ``can_raise`` is `False`. 

59 

60 Notes 

61 ----- 

62 Tries to catch a FitsError 104 and convert to `FileNotFoundError`. 

63 """ 

64 try: 

65 return readMetadata(file, hdu=hdu) 

66 except FitsError as e: 

67 if can_raise: 

68 # Try to convert a basic fits error code 

69 if "(104)" in str(e): 

70 raise FileNotFoundError(f"No such file or directory: {file}") from e 

71 raise e 

72 return None 

73 

74except ImportError: 

75 from astropy.io import fits 

76 

77 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

78 """Read a FITS header using astropy.""" 

79 # For detailed docstrings see the afw implementation above 

80 header = None 

81 try: 

82 with fits.open(file) as fits_file: 

83 try: 

84 header = fits_file[hdu].header 

85 except IndexError as e: 

86 if can_raise: 

87 raise e 

88 except Exception as e: 

89 if can_raise: 

90 raise e 

91 return header 

92 

93 

94def find_files(files: Iterable[str], regex: str) -> list[str]: 

95 """Find files for processing. 

96 

97 Parameters 

98 ---------- 

99 files : iterable of `str` 

100 The files or directories from which the headers are to be read. 

101 regex : `str` 

102 Regular expression string used to filter files when a directory is 

103 scanned. 

104 

105 Returns 

106 ------- 

107 found_files : `list` of `str` 

108 The files that were found. 

109 """ 

110 file_regex = re.compile(regex) 

111 found_files = [] 

112 

113 # Find all the files of interest 

114 for file in files: 

115 if os.path.isdir(file): 

116 for root, dirs, files in os.walk(file): 

117 for name in files: 

118 path = os.path.join(root, name) 

119 if os.path.isfile(path) and file_regex.search(name): 

120 found_files.append(path) 

121 else: 

122 found_files.append(file) 

123 

124 return found_files 

125 

126 

127def read_basic_metadata_from_file( 

128 file: str, hdrnum: int, can_raise: bool = True 

129) -> MutableMapping[str, Any] | None: 

130 """Read a raw header from a file, merging if necessary. 

131 

132 Parameters 

133 ---------- 

134 file : `str` 

135 Name of file to read. Can be FITS, YAML or JSON. YAML or JSON must be 

136 a simple top-level dict. 

137 hdrnum : `int` 

138 Header number to read. Only relevant for FITS. If greater than 1 

139 it will be merged with the primary header. If a negative number is 

140 given the second header, if present, will be merged with the primary 

141 header. If there is only a primary header a negative number behaves 

142 identically to specifying 0 for the HDU number. 

143 can_raise : `bool`, optional 

144 Indicate whether the function can raise an exception (default) 

145 or should return `None` on error. Can still raise if an unexpected 

146 error is encountered. 

147 

148 Returns 

149 ------- 

150 header : `dict` 

151 The header as a dict. Can be `None` if there was a problem reading 

152 the file. 

153 """ 

154 if file.endswith(".yaml") or file.endswith(".json"): 

155 try: 

156 md = read_test_file( 

157 file, 

158 ) 

159 except Exception as e: 

160 if not can_raise: 

161 md = None 

162 else: 

163 raise e 

164 if hdrnum != 0: 

165 # YAML can't have HDUs so skip merging below 

166 hdrnum = 0 

167 else: 

168 md = _read_fits_metadata(file, 0, can_raise=can_raise) 

169 if md is None: 

170 log.warning("Unable to open file %s", file) 

171 return None 

172 if hdrnum < 0: 

173 if "EXTEND" in md and md["EXTEND"]: 

174 hdrnum = 1 

175 if hdrnum > 0: 

176 # Allow this to fail 

177 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False) 

178 # Astropy does not allow append mode since it does not 

179 # convert lists to multiple cards. Overwrite for now 

180 if mdn is not None: 

181 md = merge_headers([md, mdn], mode="overwrite") 

182 else: 

183 log.warning("HDU %d was not found in file %s. Ignoring request.", hdrnum, file) 

184 

185 return md 

186 

187 

188def read_file_info( 

189 file: str, 

190 hdrnum: int, 

191 print_trace: bool | None = None, 

192 content_mode: str = "translated", 

193 content_type: str = "simple", 

194 outstream: IO | None = None, 

195) -> str | MutableMapping[str, Any] | ObservationInfo | None: 

196 """Read information from file. 

197 

198 Parameters 

199 ---------- 

200 file : `str` 

201 The file from which the header is to be read. 

202 hdrnum : `int` 

203 The HDU number to read. The primary header is always read and 

204 merged with the header from this HDU. 

205 print_trace : `bool` or `None` 

206 If there is an error reading the file and this parameter is `True`, 

207 a full traceback of the exception will be reported. If `False` prints 

208 a one line summary of the error condition. If `None` the exception 

209 will be allowed to propagate. 

210 content_mode : `str` 

211 Content returned. This can be: ``metadata`` to return the unfixed 

212 metadata headers; ``translated`` to return the output from metadata 

213 translation. 

214 content_type : `str`, optional 

215 Form of content to be returned. Can be either ``json`` to return a 

216 JSON string, ``simple`` to always return a `dict`, or ``native`` to 

217 return either a `dict` (for ``metadata``) or `.ObservationInfo` for 

218 ``translated``. 

219 outstream : `io.StringIO` or `None`, optional 

220 Output stream to use for standard messages. Defaults to `None` which 

221 uses the default output stream. 

222 

223 Returns 

224 ------- 

225 simple : `dict` of `str` or `.ObservationInfo` 

226 The return value of `.ObservationInfo.to_simple`. Returns `None` 

227 if there was a problem and ``print_trace`` is not `None`. 

228 """ 

229 if content_mode not in ("metadata", "translated"): 

230 raise ValueError(f"Unrecognized content mode request: {content_mode}") 

231 

232 if content_type not in ("native", "simple", "json"): 

233 raise ValueError(f"Unrecognized content type request {content_type}") 

234 

235 try: 

236 # Calculate the JSON from the file 

237 md = read_basic_metadata_from_file(file, hdrnum, can_raise=True if print_trace is None else False) 

238 if md is None: 

239 return None 

240 if content_mode == "metadata": 

241 # Do not fix the header 

242 if content_type == "json": 

243 # Add a key to tell the reader whether this is md or translated 

244 md["__CONTENT__"] = content_mode 

245 try: 

246 json_str = json.dumps(md) 

247 except TypeError: 

248 # Cast to dict and try again -- PropertyList is a problem 

249 json_str = json.dumps(dict(md)) 

250 return json_str 

251 return md 

252 obs_info = ObservationInfo(md, pedantic=True, filename=file) 

253 if content_type == "native": 

254 return obs_info 

255 simple = obs_info.to_simple() 

256 if content_type == "simple": 

257 return simple 

258 if content_type == "json": 

259 # Add a key to tell the reader if this is metadata or translated 

260 simple["__CONTENT__"] = content_mode 

261 return json.dumps(simple) 

262 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}") 

263 except Exception as e: 

264 if print_trace is None: 

265 raise e 

266 if print_trace: 

267 traceback.print_exc(file=outstream) 

268 else: 

269 print(repr(e), file=outstream) 

270 return None