Coverage for python/lsst/obs/base/_read_curated_calibs.py: 12%

95 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-27 11:09 +0000

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CuratedCalibration", "read_all"] 

25 

26import glob 

27import os 

28from collections.abc import Mapping 

29from typing import TYPE_CHECKING, Any, Protocol 

30 

31import dateutil.parser 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 import datetime 

35 

36 import lsst.afw.cameraGeom 

37 

38 

39class CuratedCalibration(Protocol): 

40 """Protocol that describes the methods needed by this class when dealing 

41 with curated calibration datasets. 

42 """ 

43 

44 @classmethod 

45 def readText(cls, path: str) -> CuratedCalibration: 

46 ... 

47 

48 def getMetadata(self) -> Mapping: 

49 ... 

50 

51 

52def read_one_calib( 

53 path: tuple[str, ...], 

54 chip_id: int | None, 

55 filter_name: str | None, 

56 calib_class: type[CuratedCalibration], 

57) -> tuple[dict[datetime.datetime, CuratedCalibration], str]: 

58 """Read data for a particular path from the standard format at a 

59 particular root. 

60 

61 Parameters 

62 ---------- 

63 path : `tuple` [`str`] 

64 This tuple contains the top level of the data tree at index=0, 

65 and then further optional subdirectories in subsequent 

66 indices. See Notes below for more details. 

67 chip_id : `int` or None 

68 The identifier for the sensor in question. To be used in 

69 validation. 

70 filter_name : `str` or None 

71 The identifier for the filter in question. To be used in 

72 validation. 

73 calib_class : `Any` 

74 The class to use to read the curated calibration text file. Must 

75 support the ``readText()`` method. 

76 

77 Returns 

78 ------- 

79 data_dict : `dict` 

80 A dictionary of calibration objects constructed from the 

81 appropriate factory class. The key is the validity start time 

82 as a `datetime.datetime` object. 

83 calib_type : `str` 

84 The type of calibrations that have been read and are included 

85 in the ``data_dict``. 

86 

87 Notes 

88 ----- 

89 Curated calibrations are read from the appropriate ``obs_ _data`` 

90 package, and are required to have a common directory structure to 

91 be identified and ingested properly. The top-level directories 

92 are organized by the instrument's ``policyName``. These names are 

93 generally all lower-case, but this is not universally true. 

94 

95 Below the top-level instrument directory, subdirectories named 

96 after the curated calibration type contained within, with the 

97 dataset_type_name forced to lowercase. For calibrations that 

98 depend on the detector (i.e., the defects), the next level of 

99 subdirectories should contain directories named with the detector 

100 name, again forced to lowercase. 

101 

102 For filter dependent calibrations that do not depend on the 

103 detector (i.e., transmission_filter), the calibrations should be 

104 grouped into directories named with the physical filter name 

105 (again, all lowercase) below the dataset_type_name directory. 

106 Filter dependent calibrations that do depend on the detector 

107 (i.e., transmission_system), have physical filter named 

108 directories below the detector level directories. 

109 """ 

110 files = [] 

111 extensions = (".ecsv", ".yaml", ".json") 

112 for ext in extensions: 

113 files.extend(glob.glob(os.path.join(*path, f"*{ext}"))) 

114 

115 parts = os.path.split(path[0]) 

116 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<calib_type> 

117 calib_type = parts[1] 

118 data_dict: dict[datetime.datetime, Any] = {} 

119 for f in files: 

120 date_str = os.path.splitext(os.path.basename(f))[0] 

121 valid_start = dateutil.parser.parse(date_str) 

122 data_dict[valid_start] = calib_class.readText(f) 

123 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, calib_type) 

124 return data_dict, calib_type 

125 

126 

127def check_metadata( 

128 obj: Any, 

129 valid_start: datetime.datetime, 

130 instrument: str, 

131 chip_id: int | None, 

132 filter_name: str | None, 

133 filepath: str, 

134 calib_type: str, 

135) -> None: 

136 """Check that the metadata is complete and self consistent. 

137 

138 Parameters 

139 ---------- 

140 obj : object of same type as the factory 

141 Object to retrieve metadata from in order to compare with 

142 metadata inferred from the path. 

143 valid_start : `datetime` 

144 Start of the validity range for data. 

145 instrument : `str` 

146 Name of the instrument in question. 

147 chip_id : `int` 

148 Identifier of the sensor in question. 

149 filter_name : `str` 

150 Identifier of the filter in question. 

151 filepath : `str` 

152 Path of the file read to construct the data. 

153 calib_type : `str` 

154 Name of the type of data being read. 

155 

156 Returns 

157 ------- 

158 None 

159 

160 Raises 

161 ------ 

162 ValueError 

163 If the metadata from the path and the metadata encoded 

164 in the path do not match for any reason. 

165 """ 

166 md = obj.getMetadata() 

167 # It is an error if these two do not exist. 

168 finst = md["INSTRUME"] 

169 fcalib_type = md["OBSTYPE"] 

170 # These may optionally not exist. 

171 fchip_id = md.get("DETECTOR", None) 

172 ffilter_name = md.get("FILTER", None) 

173 

174 if chip_id is not None: 

175 fchip_id = int(fchip_id) 

176 if filter_name is not None: 

177 ffilter_name = ffilter_name.lower() 

178 filter_name = filter_name.lower() 

179 

180 if not ( 

181 (finst.lower(), fchip_id, ffilter_name, fcalib_type.lower()) 

182 == (instrument.lower(), chip_id, filter_name, calib_type.lower()) 

183 ): 

184 raise ValueError( 

185 "Path and file metadata do not agree:\n" 

186 f"Path metadata: {instrument} {chip_id} {filter_name} {calib_type}\n" 

187 f"File metadata: {finst} {fchip_id} {ffilter_name} {fcalib_type}\n" 

188 f"File read from : {filepath}\n" 

189 ) 

190 

191 

192def read_all( 

193 root: str, 

194 camera: lsst.afw.cameraGeom.Camera, 

195 calib_class: type[CuratedCalibration], 

196 required_dimensions: list[str], 

197 filters: set[str], 

198) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]: 

199 """Read all data from the standard format at a particular root. 

200 

201 Parameters 

202 ---------- 

203 root : `str` 

204 Path to the top level of the data tree. This is expected to hold 

205 directories named after the sensor names. They are expected to be 

206 lower case. 

207 camera : `lsst.afw.cameraGeom.Camera` 

208 The camera that goes with the data being read. 

209 calib_class : `Any` 

210 The class to use to read the curated calibration text file. Must 

211 support the ``readText()`` and ``getMetadata()`` methods. 

212 required_dimensions : `list` [`str`] 

213 Dimensions required for the calibration. 

214 filters : `list` [`str`] 

215 List of the known filters for this camera. Used to identify 

216 filter-dependent calibrations. 

217 

218 Returns 

219 ------- 

220 calibration_data : `dict` 

221 A dictionary of dictionaries of calibration objects 

222 constructed with the appropriate factory class. The first key 

223 is the sensor name in lower case, and the second is the 

224 validity start time as a `datetime.datetime` object. 

225 calib_type : `str` 

226 The type of calibrations that have been read and are included 

227 in the ``data_dict``. 

228 

229 Notes 

230 ----- 

231 Each leaf object in the constructed dictionary has metadata associated with 

232 it. The detector ID may be retrieved from the DETECTOR entry of that 

233 metadata. 

234 """ 

235 calibration_data = {} 

236 

237 root = os.path.normpath(root) 

238 dirs = os.listdir(root) # assumes all directories contain data 

239 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))] 

240 if not dirs: 

241 dirs = [root] 

242 

243 calib_types = set() 

244 # We assume the directories have been lowered. 

245 detector_map = {det.getName().lower(): det.getName() for det in camera} 

246 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters} 

247 

248 paths_to_search: list[tuple[str, ...]] = [] 

249 for d in dirs: 

250 dir_name = os.path.basename(d) 

251 # Catch possible mistakes: 

252 if "detector" in required_dimensions: 

253 if dir_name not in detector_map: 

254 # Top level directories must be detectors if they're 

255 # required. 

256 detectors = list(detector_map) 

257 max_detectors = 10 

258 note_str = "knows" 

259 if len(detectors) > max_detectors: 

260 # report example subset 

261 note_str = "examples" 

262 detectors = detectors[:max_detectors] 

263 raise RuntimeError( 

264 f"Detector {dir_name} not known to supplied camera " 

265 f"{camera.getName()} ({note_str}: {','.join(detectors)})" 

266 ) 

267 elif "physical_filter" in required_dimensions: 

268 # If the calibration depends on both detector and 

269 # physical_filter, the subdirs here should contain the 

270 # filter name. 

271 subdirs = os.listdir(os.path.join(root, dir_name)) 

272 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))] 

273 for sd in subdirs: 

274 subdir_name = os.path.basename(sd) 

275 if subdir_name not in filter_map: 

276 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.") 

277 else: 

278 paths_to_search.append((root, dir_name, subdir_name)) 

279 else: 

280 paths_to_search.append((root, dir_name)) 

281 elif "physical_filter" in required_dimensions: 

282 # If detector is not required, but physical_filter is, 

283 # then the top level should contain the filter 

284 # directories. 

285 if dir_name not in filter_map: 

286 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.") 

287 paths_to_search.append((root, dir_name)) 

288 else: 

289 # Neither detector nor physical_filter are required, so 

290 # the calibration is global, and will not be found in 

291 # subdirectories. 

292 paths_to_search.append((root,)) 

293 

294 for path in paths_to_search: 

295 chip_id = None 

296 filter_name = None 

297 if "detector" in required_dimensions: 

298 chip_id = camera[detector_map[path[1]]].getId() 

299 if "physical_filter" in required_dimensions: 

300 filter_name = filter_map[path[-1]] 

301 

302 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class) 

303 

304 calib_types.add(calib_type) 

305 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here. 

306 raise ValueError(f"Error mixing calib types: {calib_types}") 

307 

308 no_data = all([v == {} for v in calibration_data.values()]) 

309 if no_data: 

310 raise RuntimeError("No data to ingest") 

311 

312 return calibration_data, calib_type