Coverage for python/lsst/obs/base/_read_curated_calibs.py: 13%

93 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 10:50 +0000

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CuratedCalibration", "read_all"] 

25 

26import glob 

27import os 

28from collections.abc import Mapping 

29from typing import TYPE_CHECKING, Any, Protocol 

30 

31import dateutil.parser 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 import datetime 

35 

36 import lsst.afw.cameraGeom 

37 

38 

39class CuratedCalibration(Protocol): 

40 """Protocol that describes the methods needed by this class when dealing 

41 with curated calibration datasets. 

42 """ 

43 

44 @classmethod 

45 def readText(cls, path: str) -> CuratedCalibration: ... 45 ↛ exitline 45 didn't return from function 'readText', because

46 

47 def getMetadata(self) -> Mapping: ... 47 ↛ exitline 47 didn't jump to line 47, because

48 

49 

50def read_one_calib( 

51 path: tuple[str, ...], 

52 chip_id: int | None, 

53 filter_name: str | None, 

54 calib_class: type[CuratedCalibration], 

55) -> tuple[dict[datetime.datetime, CuratedCalibration], str]: 

56 """Read data for a particular path from the standard format at a 

57 particular root. 

58 

59 Parameters 

60 ---------- 

61 path : `tuple` [`str`] 

62 This tuple contains the top level of the data tree at index=0, 

63 and then further optional subdirectories in subsequent 

64 indices. See Notes below for more details. 

65 chip_id : `int` or None 

66 The identifier for the sensor in question. To be used in 

67 validation. 

68 filter_name : `str` or None 

69 The identifier for the filter in question. To be used in 

70 validation. 

71 calib_class : `Any` 

72 The class to use to read the curated calibration text file. Must 

73 support the ``readText()`` method. 

74 

75 Returns 

76 ------- 

77 data_dict : `dict` 

78 A dictionary of calibration objects constructed from the 

79 appropriate factory class. The key is the validity start time 

80 as a `datetime.datetime` object. 

81 calib_type : `str` 

82 The type of calibrations that have been read and are included 

83 in the ``data_dict``. 

84 

85 Notes 

86 ----- 

87 Curated calibrations are read from the appropriate ``obs_ _data`` 

88 package, and are required to have a common directory structure to 

89 be identified and ingested properly. The top-level directories 

90 are organized by the instrument's ``policyName``. These names are 

91 generally all lower-case, but this is not universally true. 

92 

93 Below the top-level instrument directory, subdirectories named 

94 after the curated calibration type contained within, with the 

95 dataset_type_name forced to lowercase. For calibrations that 

96 depend on the detector (i.e., the defects), the next level of 

97 subdirectories should contain directories named with the detector 

98 name, again forced to lowercase. 

99 

100 For filter dependent calibrations that do not depend on the 

101 detector (i.e., transmission_filter), the calibrations should be 

102 grouped into directories named with the physical filter name 

103 (again, all lowercase) below the dataset_type_name directory. 

104 Filter dependent calibrations that do depend on the detector 

105 (i.e., transmission_system), have physical filter named 

106 directories below the detector level directories. 

107 """ 

108 files = [] 

109 extensions = (".ecsv", ".yaml", ".json") 

110 for ext in extensions: 

111 files.extend(glob.glob(os.path.join(*path, f"*{ext}"))) 

112 

113 parts = os.path.split(path[0]) 

114 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<calib_type> 

115 calib_type = parts[1] 

116 data_dict: dict[datetime.datetime, Any] = {} 

117 for f in files: 

118 date_str = os.path.splitext(os.path.basename(f))[0] 

119 valid_start = dateutil.parser.parse(date_str) 

120 data_dict[valid_start] = calib_class.readText(f) 

121 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, calib_type) 

122 return data_dict, calib_type 

123 

124 

125def check_metadata( 

126 obj: Any, 

127 valid_start: datetime.datetime, 

128 instrument: str, 

129 chip_id: int | None, 

130 filter_name: str | None, 

131 filepath: str, 

132 calib_type: str, 

133) -> None: 

134 """Check that the metadata is complete and self consistent. 

135 

136 Parameters 

137 ---------- 

138 obj : object of same type as the factory 

139 Object to retrieve metadata from in order to compare with 

140 metadata inferred from the path. 

141 valid_start : `datetime` 

142 Start of the validity range for data. 

143 instrument : `str` 

144 Name of the instrument in question. 

145 chip_id : `int` 

146 Identifier of the sensor in question. 

147 filter_name : `str` 

148 Identifier of the filter in question. 

149 filepath : `str` 

150 Path of the file read to construct the data. 

151 calib_type : `str` 

152 Name of the type of data being read. 

153 

154 Returns 

155 ------- 

156 None 

157 

158 Raises 

159 ------ 

160 ValueError 

161 If the metadata from the path and the metadata encoded 

162 in the path do not match for any reason. 

163 """ 

164 md = obj.getMetadata() 

165 # It is an error if these two do not exist. 

166 finst = md["INSTRUME"] 

167 fcalib_type = md["OBSTYPE"] 

168 # These may optionally not exist. 

169 fchip_id = md.get("DETECTOR", None) 

170 ffilter_name = md.get("FILTER", None) 

171 

172 if chip_id is not None: 

173 fchip_id = int(fchip_id) 

174 if filter_name is not None: 

175 ffilter_name = ffilter_name.lower() 

176 filter_name = filter_name.lower() 

177 

178 if not ( 

179 (finst.lower(), fchip_id, ffilter_name, fcalib_type.lower()) 

180 == (instrument.lower(), chip_id, filter_name, calib_type.lower()) 

181 ): 

182 raise ValueError( 

183 "Path and file metadata do not agree:\n" 

184 f"Path metadata: {instrument} {chip_id} {filter_name} {calib_type}\n" 

185 f"File metadata: {finst} {fchip_id} {ffilter_name} {fcalib_type}\n" 

186 f"File read from : {filepath}\n" 

187 ) 

188 

189 

190def read_all( 

191 root: str, 

192 camera: lsst.afw.cameraGeom.Camera, 

193 calib_class: type[CuratedCalibration], 

194 required_dimensions: list[str], 

195 filters: set[str], 

196) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]: 

197 """Read all data from the standard format at a particular root. 

198 

199 Parameters 

200 ---------- 

201 root : `str` 

202 Path to the top level of the data tree. This is expected to hold 

203 directories named after the sensor names. They are expected to be 

204 lower case. 

205 camera : `lsst.afw.cameraGeom.Camera` 

206 The camera that goes with the data being read. 

207 calib_class : `Any` 

208 The class to use to read the curated calibration text file. Must 

209 support the ``readText()`` and ``getMetadata()`` methods. 

210 required_dimensions : `list` [`str`] 

211 Dimensions required for the calibration. 

212 filters : `list` [`str`] 

213 List of the known filters for this camera. Used to identify 

214 filter-dependent calibrations. 

215 

216 Returns 

217 ------- 

218 calibration_data : `dict` 

219 A dictionary of dictionaries of calibration objects 

220 constructed with the appropriate factory class. The first key 

221 is the sensor name in lower case, and the second is the 

222 validity start time as a `datetime.datetime` object. 

223 calib_type : `str` 

224 The type of calibrations that have been read and are included 

225 in the ``data_dict``. 

226 

227 Notes 

228 ----- 

229 Each leaf object in the constructed dictionary has metadata associated with 

230 it. The detector ID may be retrieved from the DETECTOR entry of that 

231 metadata. 

232 """ 

233 calibration_data = {} 

234 

235 root = os.path.normpath(root) 

236 dirs = os.listdir(root) # assumes all directories contain data 

237 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))] 

238 if not dirs: 

239 dirs = [root] 

240 

241 calib_types = set() 

242 # We assume the directories have been lowered. 

243 detector_map = {det.getName().lower(): det.getName() for det in camera} 

244 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters} 

245 

246 paths_to_search: list[tuple[str, ...]] = [] 

247 for d in dirs: 

248 dir_name = os.path.basename(d) 

249 # Catch possible mistakes: 

250 if "detector" in required_dimensions: 

251 if dir_name not in detector_map: 

252 # Top level directories must be detectors if they're 

253 # required. 

254 detectors = list(detector_map) 

255 max_detectors = 10 

256 note_str = "knows" 

257 if len(detectors) > max_detectors: 

258 # report example subset 

259 note_str = "examples" 

260 detectors = detectors[:max_detectors] 

261 raise RuntimeError( 

262 f"Detector {dir_name} not known to supplied camera " 

263 f"{camera.getName()} ({note_str}: {','.join(detectors)})" 

264 ) 

265 elif "physical_filter" in required_dimensions: 

266 # If the calibration depends on both detector and 

267 # physical_filter, the subdirs here should contain the 

268 # filter name. 

269 subdirs = os.listdir(os.path.join(root, dir_name)) 

270 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))] 

271 for sd in subdirs: 

272 subdir_name = os.path.basename(sd) 

273 if subdir_name not in filter_map: 

274 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.") 

275 else: 

276 paths_to_search.append((root, dir_name, subdir_name)) 

277 else: 

278 paths_to_search.append((root, dir_name)) 

279 elif "physical_filter" in required_dimensions: 

280 # If detector is not required, but physical_filter is, 

281 # then the top level should contain the filter 

282 # directories. 

283 if dir_name not in filter_map: 

284 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.") 

285 paths_to_search.append((root, dir_name)) 

286 else: 

287 # Neither detector nor physical_filter are required, so 

288 # the calibration is global, and will not be found in 

289 # subdirectories. 

290 paths_to_search.append((root,)) 

291 

292 for path in paths_to_search: 

293 chip_id = None 

294 filter_name = None 

295 if "detector" in required_dimensions: 

296 chip_id = camera[detector_map[path[1]]].getId() 

297 if "physical_filter" in required_dimensions: 

298 filter_name = filter_map[path[-1]] 

299 

300 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class) 

301 

302 calib_types.add(calib_type) 

303 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here. 

304 raise ValueError(f"Error mixing calib types: {calib_types}") 

305 

306 no_data = all([v == {} for v in calibration_data.values()]) 

307 if no_data: 

308 raise RuntimeError("No data to ingest") 

309 

310 return calibration_data, calib_type