Coverage for python/lsst/obs/base/_read_curated_calibs.py: 12%

95 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-25 09:47 +0000

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CuratedCalibration", "read_all"] 

25 

26import glob 

27import os 

28from collections.abc import Mapping 

29from typing import TYPE_CHECKING, Any, Protocol 

30 

31import dateutil.parser 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 import datetime 

35 

36 import lsst.afw.cameraGeom 

37 

38 

39class CuratedCalibration(Protocol): 

40 """Protocol that describes the methods needed by this class when dealing 

41 with curated calibration datasets. 

42 """ 

43 

44 @classmethod 

45 def readText(cls, path: str) -> CuratedCalibration: 

46 ... 

47 

48 def getMetadata(self) -> Mapping: 

49 ... 

50 

51 

52def read_one_calib( 

53 path: tuple[str, ...], 

54 chip_id: int | None, 

55 filter_name: str | None, 

56 calib_class: type[CuratedCalibration], 

57) -> tuple[dict[datetime.datetime, CuratedCalibration], str]: 

58 """Read data for a particular path from the standard format at a 

59 particular root. 

60 

61 Parameters 

62 ---------- 

63 path : `tuple` [`str`] 

64 This tuple contains the top level of the data tree at index=0, 

65 and then further optional subdirectories in subsequent 

66 indices. See Notes below for more details. 

67 chip_id : `int` or None 

68 The identifier for the sensor in question. To be used in 

69 validation. 

70 filter_name : `str` or None 

71 The identifier for the filter in question. To be used in 

72 validation. 

73 calib_class : `Any` 

74 The class to use to read the curated calibration text file. Must 

75 support the ``readText()`` method. 

76 

77 Returns 

78 ------- 

79 `dict` 

80 A dictionary of objects constructed from the appropriate factory class. 

81 The key is the validity start time as a `datetime` object. 

82 

83 Notes 

84 ----- 

85 Curated calibrations are read from the appropriate ``obs_ _data`` 

86 package, and are required to have a common directory structure to 

87 be identified and ingested properly. The top-level directories 

88 are organized by the instrument's ``policyName``. These names are 

89 generally all lower-case, but this is not universally true. 

90 

91 Below the top-level instrument directory, subdirectories named 

92 after the curated calibration type contained within, with the 

93 dataset_type_name forced to lowercase. For calibrations that 

94 depend on the detector (i.e., the defects), the next level of 

95 subdirectories should contain directories named with the detector 

96 name, again forced to lowercase. 

97 

98 For filter dependent calibrations that do not depend on the 

99 detector (i.e., transmission_filter), the calibrations should be 

100 grouped into directories named with the physical filter name 

101 (again, all lowercase) below the dataset_type_name directory. 

102 Filter dependent calibrations that do depend on the detector 

103 (i.e., transmission_system), have physical filter named 

104 directories below the detector level directories. 

105 """ 

106 files = [] 

107 extensions = (".ecsv", ".yaml", ".json") 

108 for ext in extensions: 

109 files.extend(glob.glob(os.path.join(*path, f"*{ext}"))) 

110 

111 parts = os.path.split(path[0]) 

112 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<data_name> 

113 data_name = parts[1] 

114 data_dict: dict[datetime.datetime, Any] = {} 

115 for f in files: 

116 date_str = os.path.splitext(os.path.basename(f))[0] 

117 valid_start = dateutil.parser.parse(date_str) 

118 data_dict[valid_start] = calib_class.readText(f) 

119 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, data_name) 

120 return data_dict, data_name 

121 

122 

123def check_metadata( 

124 obj: Any, 

125 valid_start: datetime.datetime, 

126 instrument: str, 

127 chip_id: int | None, 

128 filter_name: str | None, 

129 filepath: str, 

130 data_name: str, 

131) -> None: 

132 """Check that the metadata is complete and self consistent. 

133 

134 Parameters 

135 ---------- 

136 obj : object of same type as the factory 

137 Object to retrieve metadata from in order to compare with 

138 metadata inferred from the path. 

139 valid_start : `datetime` 

140 Start of the validity range for data. 

141 instrument : `str` 

142 Name of the instrument in question. 

143 chip_id : `int` 

144 Identifier of the sensor in question. 

145 filter_name : `str` 

146 Identifier of the filter in question. 

147 filepath : `str` 

148 Path of the file read to construct the data. 

149 data_name : `str` 

150 Name of the type of data being read. 

151 

152 Returns 

153 ------- 

154 None 

155 

156 Raises 

157 ------ 

158 ValueError 

159 If the metadata from the path and the metadata encoded 

160 in the path do not match for any reason. 

161 """ 

162 md = obj.getMetadata() 

163 # It is an error if these two do not exist. 

164 finst = md["INSTRUME"] 

165 fdata_name = md["OBSTYPE"] 

166 # These may optionally not exist. 

167 fchip_id = md.get("DETECTOR", None) 

168 ffilter_name = md.get("FILTER", None) 

169 

170 if chip_id is not None: 

171 fchip_id = int(fchip_id) 

172 if filter_name is not None: 

173 ffilter_name = ffilter_name.lower() 

174 filter_name = filter_name.lower() 

175 

176 if not ( 

177 (finst.lower(), fchip_id, ffilter_name, fdata_name.lower()) 

178 == (instrument.lower(), chip_id, filter_name, data_name.lower()) 

179 ): 

180 raise ValueError( 

181 "Path and file metadata do not agree:\n" 

182 f"Path metadata: {instrument} {chip_id} {filter_name} {data_name}\n" 

183 f"File metadata: {finst} {fchip_id} {ffilter_name} {fdata_name}\n" 

184 f"File read from : {filepath}\n" 

185 ) 

186 

187 

188def read_all( 

189 root: str, 

190 camera: lsst.afw.cameraGeom.Camera, 

191 calib_class: type[CuratedCalibration], 

192 required_dimensions: list[str], 

193 filters: set[str], 

194) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]: 

195 """Read all data from the standard format at a particular root. 

196 

197 Parameters 

198 ---------- 

199 root : `str` 

200 Path to the top level of the data tree. This is expected to hold 

201 directories named after the sensor names. They are expected to be 

202 lower case. 

203 camera : `lsst.afw.cameraGeom.Camera` 

204 The camera that goes with the data being read. 

205 calib_class : `Any` 

206 The class to use to read the curated calibration text file. Must 

207 support the ``readText()`` and ``getMetadata()`` methods. 

208 required_dimensions : `list` [`str`] 

209 Dimensions required for the calibration. 

210 filters : `list` [`str`] 

211 List of the known filters for this camera. Used to identify 

212 filter-dependent calibrations. 

213 

214 Returns 

215 ------- 

216 dict 

217 A dictionary of dictionaries of objects constructed with the 

218 appropriate factory class. The first key is the sensor name lowered, 

219 and the second is the validity start time as a `datetime` object. 

220 

221 Notes 

222 ----- 

223 Each leaf object in the constructed dictionary has metadata associated with 

224 it. The detector ID may be retrieved from the DETECTOR entry of that 

225 metadata. 

226 """ 

227 calibration_data = {} 

228 

229 root = os.path.normpath(root) 

230 dirs = os.listdir(root) # assumes all directories contain data 

231 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))] 

232 if not dirs: 

233 dirs = [root] 

234 

235 calib_types = set() 

236 # We assume the directories have been lowered. 

237 detector_map = {det.getName().lower(): det.getName() for det in camera} 

238 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters} 

239 

240 paths_to_search: list[tuple[str, ...]] = [] 

241 for d in dirs: 

242 dir_name = os.path.basename(d) 

243 # Catch possible mistakes: 

244 if "detector" in required_dimensions: 

245 if dir_name not in detector_map: 

246 # Top level directories must be detectors if they're 

247 # required. 

248 detectors = list(detector_map) 

249 max_detectors = 10 

250 note_str = "knows" 

251 if len(detectors) > max_detectors: 

252 # report example subset 

253 note_str = "examples" 

254 detectors = detectors[:max_detectors] 

255 raise RuntimeError( 

256 f"Detector {dir_name} not known to supplied camera " 

257 f"{camera.getName()} ({note_str}: {','.join(detectors)})" 

258 ) 

259 elif "physical_filter" in required_dimensions: 

260 # If the calibration depends on both detector and 

261 # physical_filter, the subdirs here should contain the 

262 # filter name. 

263 subdirs = os.listdir(os.path.join(root, dir_name)) 

264 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))] 

265 for sd in subdirs: 

266 subdir_name = os.path.basename(sd) 

267 if subdir_name not in filter_map: 

268 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.") 

269 else: 

270 paths_to_search.append((root, dir_name, subdir_name)) 

271 else: 

272 paths_to_search.append((root, dir_name)) 

273 elif "physical_filter" in required_dimensions: 

274 # If detector is not required, but physical_filter is, 

275 # then the top level should contain the filter 

276 # directories. 

277 if dir_name not in filter_map: 

278 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.") 

279 paths_to_search.append((root, dir_name)) 

280 else: 

281 # Neither detector nor physical_filter are required, so 

282 # the calibration is global, and will not be found in 

283 # subdirectories. 

284 paths_to_search.append((root,)) 

285 

286 for path in paths_to_search: 

287 chip_id = None 

288 filter_name = None 

289 if "detector" in required_dimensions: 

290 chip_id = camera[detector_map[path[1]]].getId() 

291 if "physical_filter" in required_dimensions: 

292 filter_name = filter_map[path[-1]] 

293 

294 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class) 

295 

296 calib_types.add(calib_type) 

297 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here. 

298 raise ValueError(f"Error mixing calib types: {calib_types}") 

299 

300 no_data = all([v == {} for v in calibration_data.values()]) 

301 if no_data: 

302 raise RuntimeError("No data to ingest") 

303 

304 return calibration_data, calib_type