Coverage for python/astro_metadata_translator/file_helpers.py: 13%

110 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-27 02:38 -0700

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Support functions for script implementations.""" 

13 

14from __future__ import annotations 

15 

16__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info") 

17 

18import json 

19import os 

20import re 

21import sys 

22import traceback 

23from collections.abc import Iterable, MutableMapping 

24from typing import IO, Any 

25 

26from .headers import merge_headers 

27from .observationInfo import ObservationInfo 

28from .tests import read_test_file 

29 

30# Prefer afw over Astropy 

31try: 

32 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata 

33 from lsst.afw.fits import FitsError, readMetadata 

34 

35 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

36 """Read a FITS header using afw. 

37 

38 Parameters 

39 ---------- 

40 file : `str` 

41 The file to read. 

42 hdu : `int` 

43 The header number to read. 

44 can_raise : `bool`, optional 

45 Indicate whether the function can raise and exception (default) 

46 or should return `None` on error. Can still raise if an unexpected 

47 error is encountered. 

48 

49 Returns 

50 ------- 

51 md : `dict` 

52 The requested header. `None` if it could not be read and 

53 ``can_raise`` is `False`. 

54 

55 Notes 

56 ----- 

57 Tries to catch a FitsError 104 and convert to `FileNotFoundError`. 

58 """ 

59 try: 

60 return readMetadata(file, hdu=hdu) 

61 except FitsError as e: 

62 if can_raise: 

63 # Try to convert a basic fits error code 

64 if "(104)" in str(e): 

65 raise FileNotFoundError(f"No such file or directory: {file}") from e 

66 raise e 

67 return None 

68 

69except ImportError: 

70 from astropy.io import fits 

71 

72 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

73 """Read a FITS header using astropy.""" 

74 

75 # For detailed docstrings see the afw implementation above 

76 header = None 

77 try: 

78 with fits.open(file) as fits_file: 

79 try: 

80 header = fits_file[hdu].header 

81 except IndexError as e: 

82 if can_raise: 

83 raise e 

84 except Exception as e: 

85 if can_raise: 

86 raise e 

87 return header 

88 

89 

90def find_files(files: Iterable[str], regex: str) -> list[str]: 

91 """Find files for processing. 

92 

93 Parameters 

94 ---------- 

95 files : iterable of `str` 

96 The files or directories from which the headers are to be read. 

97 regex : `str` 

98 Regular expression string used to filter files when a directory is 

99 scanned. 

100 

101 Returns 

102 ------- 

103 found_files : `list` of `str` 

104 The files that were found. 

105 """ 

106 file_regex = re.compile(regex) 

107 found_files = [] 

108 

109 # Find all the files of interest 

110 for file in files: 

111 if os.path.isdir(file): 

112 for root, dirs, files in os.walk(file): 

113 for name in files: 

114 path = os.path.join(root, name) 

115 if os.path.isfile(path) and file_regex.search(name): 

116 found_files.append(path) 

117 else: 

118 found_files.append(file) 

119 

120 return found_files 

121 

122 

123def read_basic_metadata_from_file( 

124 file: str, hdrnum: int, errstream: IO = sys.stderr, can_raise: bool = True 

125) -> MutableMapping[str, Any] | None: 

126 """Read a raw header from a file, merging if necessary 

127 

128 Parameters 

129 ---------- 

130 file : `str` 

131 Name of file to read. Can be FITS or YAML. YAML must be a simple 

132 top-level dict. 

133 hdrnum : `int` 

134 Header number to read. Only relevant for FITS. If greater than 1 

135 it will be merged with the primary header. If a negative number is 

136 given the second header, if present, will be merged with the primary 

137 header. If there is only a primary header a negative number behaves 

138 identically to specifying 0 for the HDU number. 

139 errstream : `io.StringIO`, optional 

140 Stream to send messages that would normally be sent to standard 

141 error. Defaults to `sys.stderr`. Only used if exceptions are disabled. 

142 can_raise : `bool`, optional 

143 Indicate whether the function can raise an exception (default) 

144 or should return `None` on error. Can still raise if an unexpected 

145 error is encountered. 

146 

147 Returns 

148 ------- 

149 header : `dict` 

150 The header as a dict. Can be `None` if there was a problem reading 

151 the file. 

152 """ 

153 if file.endswith(".yaml"): 

154 try: 

155 md = read_test_file( 

156 file, 

157 ) 

158 except Exception as e: 

159 if not can_raise: 

160 md = None 

161 else: 

162 raise e 

163 if hdrnum != 0: 

164 # YAML can't have HDUs so skip merging below 

165 hdrnum = 0 

166 else: 

167 md = _read_fits_metadata(file, 0, can_raise=can_raise) 

168 if md is None: 

169 print(f"Unable to open file {file}", file=errstream) 

170 return None 

171 if hdrnum < 0: 

172 if "EXTEND" in md and md["EXTEND"]: 

173 hdrnum = 1 

174 if hdrnum > 0: 

175 # Allow this to fail 

176 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False) 

177 # Astropy does not allow append mode since it does not 

178 # convert lists to multiple cards. Overwrite for now 

179 if mdn is not None: 

180 md = merge_headers([md, mdn], mode="overwrite") 

181 else: 

182 print(f"HDU {hdrnum} was not found in file {file}. Ignoring request.", file=errstream) 

183 

184 return md 

185 

186 

187def read_file_info( 

188 file: str, 

189 hdrnum: int, 

190 print_trace: bool | None = None, 

191 content_mode: str = "translated", 

192 content_type: str = "simple", 

193 outstream: IO = sys.stdout, 

194 errstream: IO = sys.stderr, 

195) -> str | MutableMapping[str, Any] | ObservationInfo | None: 

196 """Read information from file 

197 

198 Parameters 

199 ---------- 

200 file : `str` 

201 The file from which the header is to be read. 

202 hdrnum : `int` 

203 The HDU number to read. The primary header is always read and 

204 merged with the header from this HDU. 

205 print_trace : `bool` or `None` 

206 If there is an error reading the file and this parameter is `True`, 

207 a full traceback of the exception will be reported. If `False` prints 

208 a one line summary of the error condition. If `None` the exception 

209 will be allowed to propagate. 

210 content_mode : `str` 

211 Content returned. This can be: ``metadata`` to return the unfixed 

212 metadata headers; ``translated`` to return the output from metadata 

213 translation. 

214 content_type : `str`, optional 

215 Form of content to be returned. Can be either ``json`` to return a 

216 JSON string, ``simple`` to always return a `dict`, or ``native`` to 

217 return either a `dict` (for ``metadata``) or `ObservationInfo` for 

218 ``translated``. 

219 outstream : `io.StringIO`, optional 

220 Output stream to use for standard messages. Defaults to `sys.stdout`. 

221 errstream : `io.StringIO`, optional 

222 Stream to send messages that would normally be sent to standard 

223 error. Defaults to `sys.stderr`. 

224 

225 Returns 

226 ------- 

227 simple : `dict` of `str` or `ObservationInfo` 

228 The return value of `ObservationInfo.to_simple()`. Returns `None` 

229 if there was a problem and `print_trace` is not `None`. 

230 """ 

231 

232 if content_mode not in ("metadata", "translated"): 

233 raise ValueError(f"Unrecognized content mode request: {content_mode}") 

234 

235 if content_type not in ("native", "simple", "json"): 

236 raise ValueError(f"Unrecognized content type request {content_type}") 

237 

238 try: 

239 # Calculate the JSON from the file 

240 md = read_basic_metadata_from_file( 

241 file, hdrnum, errstream=errstream, can_raise=True if print_trace is None else False 

242 ) 

243 if md is None: 

244 return None 

245 if content_mode == "metadata": 

246 # Do not fix the header 

247 if content_type == "json": 

248 # Add a key to tell the reader whether this is md or translated 

249 md["__CONTENT__"] = content_mode 

250 try: 

251 json_str = json.dumps(md) 

252 except TypeError: 

253 # Cast to dict and try again -- PropertyList is a problem 

254 json_str = json.dumps(dict(md)) 

255 return json_str 

256 return md 

257 obs_info = ObservationInfo(md, pedantic=True, filename=file) 

258 if content_type == "native": 

259 return obs_info 

260 simple = obs_info.to_simple() 

261 if content_type == "simple": 

262 return simple 

263 if content_type == "json": 

264 # Add a key to tell the reader if this is metadata or translated 

265 simple["__CONTENT__"] = content_mode 

266 return json.dumps(simple) 

267 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}") 

268 except Exception as e: 

269 if print_trace is None: 

270 raise e 

271 if print_trace: 

272 traceback.print_exc(file=outstream) 

273 else: 

274 print(repr(e), file=outstream) 

275 return None