Coverage for python/astro_metadata_translator/file_helpers.py: 13%

109 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-18 02:21 -0700

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Support functions for script implementations.""" 

13 

14from __future__ import annotations 

15 

16__all__ = ("find_files", "read_basic_metadata_from_file", "read_file_info") 

17 

18import json 

19import os 

20import re 

21import sys 

22import traceback 

23from typing import IO, Any, Iterable, List, MutableMapping, Optional, Union 

24 

25from .headers import merge_headers 

26from .observationInfo import ObservationInfo 

27from .tests import read_test_file 

28 

29# Prefer afw over Astropy 

30try: 

31 import lsst.daf.base # noqa: F401 need PropertyBase for readMetadata 

32 from lsst.afw.fits import FitsError, readMetadata 

33 

34 def _read_fits_metadata( 

35 file: str, hdu: int, can_raise: bool = False 

36 ) -> Optional[MutableMapping[str, Any]]: 

37 """Read a FITS header using afw. 

38 

39 Parameters 

40 ---------- 

41 file : `str` 

42 The file to read. 

43 hdu : `int` 

44 The header number to read. 

45 can_raise : `bool`, optional 

46 Indicate whether the function can raise and exception (default) 

47 or should return `None` on error. Can still raise if an unexpected 

48 error is encountered. 

49 

50 Returns 

51 ------- 

52 md : `dict` 

53 The requested header. `None` if it could not be read and 

54 ``can_raise`` is `False`. 

55 

56 Notes 

57 ----- 

58 Tries to catch a FitsError 104 and convert to `FileNotFoundError`. 

59 """ 

60 try: 

61 return readMetadata(file, hdu=hdu) 

62 except FitsError as e: 

63 if can_raise: 

64 # Try to convert a basic fits error code 

65 if "(104)" in str(e): 

66 raise FileNotFoundError(f"No such file or directory: {file}") from e 

67 raise e 

68 return None 

69 

70except ImportError: 

71 from astropy.io import fits 

72 

73 def _read_fits_metadata( 

74 file: str, hdu: int, can_raise: bool = False 

75 ) -> Optional[MutableMapping[str, Any]]: 

76 """Read a FITS header using astropy.""" 

77 

78 # For detailed docstrings see the afw implementation above 

79 header = None 

80 try: 

81 with fits.open(file) as fits_file: 

82 try: 

83 header = fits_file[hdu].header 

84 except IndexError as e: 

85 if can_raise: 

86 raise e 

87 except Exception as e: 

88 if can_raise: 

89 raise e 

90 return header 

91 

92 

93def find_files(files: Iterable[str], regex: str) -> List[str]: 

94 """Find files for processing. 

95 

96 Parameters 

97 ---------- 

98 files : iterable of `str` 

99 The files or directories from which the headers are to be read. 

100 regex : `str` 

101 Regular expression string used to filter files when a directory is 

102 scanned. 

103 

104 Returns 

105 ------- 

106 found_files : `list` of `str` 

107 The files that were found. 

108 """ 

109 file_regex = re.compile(regex) 

110 found_files = [] 

111 

112 # Find all the files of interest 

113 for file in files: 

114 if os.path.isdir(file): 

115 for root, dirs, files in os.walk(file): 

116 for name in files: 

117 path = os.path.join(root, name) 

118 if os.path.isfile(path) and file_regex.search(name): 

119 found_files.append(path) 

120 else: 

121 found_files.append(file) 

122 

123 return found_files 

124 

125 

126def read_basic_metadata_from_file( 

127 file: str, hdrnum: int, errstream: IO = sys.stderr, can_raise: bool = True 

128) -> Optional[MutableMapping[str, Any]]: 

129 """Read a raw header from a file, merging if necessary 

130 

131 Parameters 

132 ---------- 

133 file : `str` 

134 Name of file to read. Can be FITS or YAML. YAML must be a simple 

135 top-level dict. 

136 hdrnum : `int` 

137 Header number to read. Only relevant for FITS. If greater than 1 

138 it will be merged with the primary header. If a negative number is 

139 given the second header, if present, will be merged with the primary 

140 header. If there is only a primary header a negative number behaves 

141 identically to specifying 0 for the HDU number. 

142 errstream : `io.StringIO`, optional 

143 Stream to send messages that would normally be sent to standard 

144 error. Defaults to `sys.stderr`. Only used if exceptions are disabled. 

145 can_raise : `bool`, optional 

146 Indicate whether the function can raise an exception (default) 

147 or should return `None` on error. Can still raise if an unexpected 

148 error is encountered. 

149 

150 Returns 

151 ------- 

152 header : `dict` 

153 The header as a dict. Can be `None` if there was a problem reading 

154 the file. 

155 """ 

156 if file.endswith(".yaml"): 

157 try: 

158 md = read_test_file( 

159 file, 

160 ) 

161 except Exception as e: 

162 if not can_raise: 

163 md = None 

164 else: 

165 raise e 

166 if hdrnum != 0: 

167 # YAML can't have HDUs so skip merging below 

168 hdrnum = 0 

169 else: 

170 md = _read_fits_metadata(file, 0, can_raise=can_raise) 

171 if md is None: 

172 print(f"Unable to open file {file}", file=errstream) 

173 return None 

174 if hdrnum < 0: 

175 if "EXTEND" in md and md["EXTEND"]: 

176 hdrnum = 1 

177 if hdrnum > 0: 

178 # Allow this to fail 

179 mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False) 

180 # Astropy does not allow append mode since it does not 

181 # convert lists to multiple cards. Overwrite for now 

182 if mdn is not None: 

183 md = merge_headers([md, mdn], mode="overwrite") 

184 else: 

185 print(f"HDU {hdrnum} was not found in file {file}. Ignoring request.", file=errstream) 

186 

187 return md 

188 

189 

190def read_file_info( 

191 file: str, 

192 hdrnum: int, 

193 print_trace: Optional[bool] = None, 

194 content_mode: str = "translated", 

195 content_type: str = "simple", 

196 outstream: IO = sys.stdout, 

197 errstream: IO = sys.stderr, 

198) -> Optional[Union[str, MutableMapping[str, Any], ObservationInfo]]: 

199 """Read information from file 

200 

201 Parameters 

202 ---------- 

203 file : `str` 

204 The file from which the header is to be read. 

205 hdrnum : `int` 

206 The HDU number to read. The primary header is always read and 

207 merged with the header from this HDU. 

208 print_trace : `bool` or `None` 

209 If there is an error reading the file and this parameter is `True`, 

210 a full traceback of the exception will be reported. If `False` prints 

211 a one line summary of the error condition. If `None` the exception 

212 will be allowed to propagate. 

213 content_mode : `str` 

214 Content returned. This can be: ``metadata`` to return the unfixed 

215 metadata headers; ``translated`` to return the output from metadata 

216 translation. 

217 content_type : `str`, optional 

218 Form of content to be returned. Can be either ``json`` to return a 

219 JSON string, ``simple`` to always return a `dict`, or ``native`` to 

220 return either a `dict` (for ``metadata``) or `ObservationInfo` for 

221 ``translated``. 

222 outstream : `io.StringIO`, optional 

223 Output stream to use for standard messages. Defaults to `sys.stdout`. 

224 errstream : `io.StringIO`, optional 

225 Stream to send messages that would normally be sent to standard 

226 error. Defaults to `sys.stderr`. 

227 

228 Returns 

229 ------- 

230 simple : `dict` of `str` or `ObservationInfo` 

231 The return value of `ObservationInfo.to_simple()`. Returns `None` 

232 if there was a problem and `print_trace` is not `None`. 

233 """ 

234 

235 if content_mode not in ("metadata", "translated"): 

236 raise ValueError(f"Unrecognized content mode request: {content_mode}") 

237 

238 if content_type not in ("native", "simple", "json"): 

239 raise ValueError(f"Unrecognized content type request {content_type}") 

240 

241 try: 

242 # Calculate the JSON from the file 

243 md = read_basic_metadata_from_file( 

244 file, hdrnum, errstream=errstream, can_raise=True if print_trace is None else False 

245 ) 

246 if md is None: 

247 return None 

248 if content_mode == "metadata": 

249 # Do not fix the header 

250 if content_type == "json": 

251 # Add a key to tell the reader whether this is md or translated 

252 md["__CONTENT__"] = content_mode 

253 try: 

254 json_str = json.dumps(md) 

255 except TypeError: 

256 # Cast to dict and try again -- PropertyList is a problem 

257 json_str = json.dumps(dict(md)) 

258 return json_str 

259 return md 

260 obs_info = ObservationInfo(md, pedantic=True, filename=file) 

261 if content_type == "native": 

262 return obs_info 

263 simple = obs_info.to_simple() 

264 if content_type == "simple": 

265 return simple 

266 if content_type == "json": 

267 # Add a key to tell the reader if this is metadata or translated 

268 simple["__CONTENT__"] = content_mode 

269 return json.dumps(simple) 

270 raise RuntimeError(f"Logic error. Unrecognized mode for reading file: {content_mode}/{content_type}") 

271 except Exception as e: 

272 if print_trace is None: 

273 raise e 

274 if print_trace: 

275 traceback.print_exc(file=outstream) 

276 else: 

277 print(repr(e), file=outstream) 

278 return None