Coverage for python/astro_metadata_translator/file_helpers.py: 13%

110 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-21 10:02 +0000

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Support functions for script implementations.""" 

13 

14from __future__ import annotations 

15 

16__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info") 

17 

18import json 

19import os 

20import re 

21import sys 

22import traceback 

23from collections.abc import Iterable, MutableMapping 

24from typing import IO, Any 

25 

26from .headers import merge_headers 

27from .observationInfo import ObservationInfo 

28from .tests import read_test_file 

29 

30# Prefer afw over Astropy 

31try: 

32 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata 

33 from lsst.afw.fits import FitsError, readMetadata 

34 

35 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

36 """Read a FITS header using afw. 

37 

38 Parameters 

39 ---------- 

40 file : `str` 

41 The file to read. 

42 hdu : `int` 

43 The header number to read. 

44 can_raise : `bool`, optional 

45 Indicate whether the function can raise and exception (default) 

46 or should return `None` on error. Can still raise if an unexpected 

47 error is encountered. 

48 

49 Returns 

50 ------- 

51 md : `dict` 

52 The requested header. `None` if it could not be read and 

53 ``can_raise`` is `False`. 

54 

55 Notes 

56 ----- 

57 Tries to catch a FitsError 104 and convert to `FileNotFoundError`. 

58 """ 

59 try: 

60 return readMetadata(file, hdu=hdu) 

61 except FitsError as e: 

62 if can_raise: 

63 # Try to convert a basic fits error code 

64 if "(104)" in str(e): 

65 raise FileNotFoundError(f"No such file or directory: {file}") from e 

66 raise e 

67 return None 

68 

69except ImportError: 

70 from astropy.io import fits 

71 

72 def _read_fits_metadata(file: str, hdu: int, can_raise: bool = False) -> MutableMapping[str, Any] | None: 

73 """Read a FITS header using astropy.""" 

74 # For detailed docstrings see the afw implementation above 

75 header = None 

76 try: 

77 with fits.open(file) as fits_file: 

78 try: 

79 header = fits_file[hdu].header 

80 except IndexError as e: 

81 if can_raise: 

82 raise e 

83 except Exception as e: 

84 if can_raise: 

85 raise e 

86 return header 

87 

88 

89def find_files(files: Iterable[str], regex: str) -> list[str]: 

90 """Find files for processing. 

91 

92 Parameters 

93 ---------- 

94 files : iterable of `str` 

95 The files or directories from which the headers are to be read. 

96 regex : `str` 

97 Regular expression string used to filter files when a directory is 

98 scanned. 

99 

100 Returns 

101 ------- 

102 found_files : `list` of `str` 

103 The files that were found. 

104 """ 

105 file_regex = re.compile(regex) 

106 found_files = [] 

107 

108 # Find all the files of interest 

109 for file in files: 

110 if os.path.isdir(file): 

111 for root, dirs, files in os.walk(file): 

112 for name in files: 

113 path = os.path.join(root, name) 

114 if os.path.isfile(path) and file_regex.search(name): 

115 found_files.append(path) 

116 else: 

117 found_files.append(file) 

118 

119 return found_files 

120 

121 

122def read_basic_metadata_from_file( 

123 file: str, hdrnum: int, errstream: IO = sys.stderr, can_raise: bool = True 

124) -> MutableMapping[str, Any] | None: 

125 """Read a raw header from a file, merging if necessary. 

126 

127 Parameters 

128 ---------- 

129 file : `str` 

130 Name of file to read. Can be FITS or YAML. YAML must be a simple 

131 top-level dict. 

132 hdrnum : `int` 

133 Header number to read. Only relevant for FITS. If greater than 1 

134 it will be merged with the primary header. If a negative number is 

135 given the second header, if present, will be merged with the primary 

136 header. If there is only a primary header a negative number behaves 

137 identically to specifying 0 for the HDU number. 

138 errstream : `io.StringIO`, optional 

139 Stream to send messages that would normally be sent to standard 

140 error. Defaults to `sys.stderr`. Only used if exceptions are disabled. 

141 can_raise : `bool`, optional 

142 Indicate whether the function can raise an exception (default) 

143 or should return `None` on error. Can still raise if an unexpected 

144 error is encountered. 

145 

146 Returns 

147 ------- 

148 header : `dict` 

149 The header as a dict. Can be `None` if there was a problem reading 

150 the file. 

151 """ 

152 if file.endswith(".yaml"): 

153 try: 

154 md = read_test_file( 

155 file, 

156 ) 

157 except Exception as e: 

158 if not can_raise: 

159 md = None 

160 else: 

161 raise e 

162 if hdrnum != 0: 

163 # YAML can't have HDUs so skip merging below 

164 hdrnum = 0 

165 else: 

166 md = _read_fits_metadata(file, 0, can_raise=can_raise) 

167 if md is None: 

168 print(f"Unable to open file {file}", file=errstream) 

169 return None 

170 if hdrnum < 0: 

171 if "EXTEND" in md and md["EXTEND"]: 

172 hdrnum = 1 

173 if hdrnum > 0: 

174 # Allow this to fail 

175 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False) 

176 # Astropy does not allow append mode since it does not 

177 # convert lists to multiple cards. Overwrite for now 

178 if mdn is not None: 

179 md = merge_headers([md, mdn], mode="overwrite") 

180 else: 

181 print(f"HDU {hdrnum} was not found in file {file}. Ignoring request.", file=errstream) 

182 

183 return md 

184 

185 

186def read_file_info( 

187 file: str, 

188 hdrnum: int, 

189 print_trace: bool | None = None, 

190 content_mode: str = "translated", 

191 content_type: str = "simple", 

192 outstream: IO = sys.stdout, 

193 errstream: IO = sys.stderr, 

194) -> str | MutableMapping[str, Any] | ObservationInfo | None: 

195 """Read information from file. 

196 

197 Parameters 

198 ---------- 

199 file : `str` 

200 The file from which the header is to be read. 

201 hdrnum : `int` 

202 The HDU number to read. The primary header is always read and 

203 merged with the header from this HDU. 

204 print_trace : `bool` or `None` 

205 If there is an error reading the file and this parameter is `True`, 

206 a full traceback of the exception will be reported. If `False` prints 

207 a one line summary of the error condition. If `None` the exception 

208 will be allowed to propagate. 

209 content_mode : `str` 

210 Content returned. This can be: ``metadata`` to return the unfixed 

211 metadata headers; ``translated`` to return the output from metadata 

212 translation. 

213 content_type : `str`, optional 

214 Form of content to be returned. Can be either ``json`` to return a 

215 JSON string, ``simple`` to always return a `dict`, or ``native`` to 

216 return either a `dict` (for ``metadata``) or `.ObservationInfo` for 

217 ``translated``. 

218 outstream : `io.StringIO`, optional 

219 Output stream to use for standard messages. Defaults to `sys.stdout`. 

220 errstream : `io.StringIO`, optional 

221 Stream to send messages that would normally be sent to standard 

222 error. Defaults to `sys.stderr`. 

223 

224 Returns 

225 ------- 

226 simple : `dict` of `str` or `.ObservationInfo` 

227 The return value of `.ObservationInfo.to_simple`. Returns `None` 

228 if there was a problem and ``print_trace`` is not `None`. 

229 """ 

230 if content_mode not in ("metadata", "translated"): 

231 raise ValueError(f"Unrecognized content mode request: {content_mode}") 

232 

233 if content_type not in ("native", "simple", "json"): 

234 raise ValueError(f"Unrecognized content type request {content_type}") 

235 

236 try: 

237 # Calculate the JSON from the file 

238 md = read_basic_metadata_from_file( 

239 file, hdrnum, errstream=errstream, can_raise=True if print_trace is None else False 

240 ) 

241 if md is None: 

242 return None 

243 if content_mode == "metadata": 

244 # Do not fix the header 

245 if content_type == "json": 

246 # Add a key to tell the reader whether this is md or translated 

247 md["__CONTENT__"] = content_mode 

248 try: 

249 json_str = json.dumps(md) 

250 except TypeError: 

251 # Cast to dict and try again -- PropertyList is a problem 

252 json_str = json.dumps(dict(md)) 

253 return json_str 

254 return md 

255 obs_info = ObservationInfo(md, pedantic=True, filename=file) 

256 if content_type == "native": 

257 return obs_info 

258 simple = obs_info.to_simple() 

259 if content_type == "simple": 

260 return simple 

261 if content_type == "json": 

262 # Add a key to tell the reader if this is metadata or translated 

263 simple["__CONTENT__"] = content_mode 

264 return json.dumps(simple) 

265 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}") 

266 except Exception as e: 

267 if print_trace is None: 

268 raise e 

269 if print_trace: 

270 traceback.print_exc(file=outstream) 

271 else: 

272 print(repr(e), file=outstream) 

273 return None