Coverage for python/lsst/obs/base/_read_curated_calibs.py: 11%

95 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-06 02:49 -0700

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["CuratedCalibration", "read_all"] 

25 

26import glob 

27import os 

28from collections.abc import Mapping 

29from typing import TYPE_CHECKING, Any, Protocol, Type, Union 

30 

31import dateutil.parser 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 import datetime 

35 

36 import lsst.afw.cameraGeom 

37 

38 

39class CuratedCalibration(Protocol): 

40 """Protocol that describes the methods needed by this class when dealing 

41 with curated calibration datasets.""" 

42 

43 @classmethod 

44 def readText(cls, path: str) -> CuratedCalibration: 

45 ... 

46 

47 def getMetadata(self) -> Mapping: 

48 ... 

49 

50 

51def read_one_calib( 

52 path: tuple[str, ...], 

53 chip_id: Union[int, None], 

54 filter_name: Union[str, None], 

55 calib_class: Type[CuratedCalibration], 

56) -> tuple[dict[datetime.datetime, CuratedCalibration], str]: 

57 """Read data for a particular path from the standard format at a 

58 particular root. 

59 

60 Parameters 

61 ---------- 

62 path : `tuple` [`str`] 

63 This tuple contains the top level of the data tree at index=0, 

64 and then further optional subdirectories in subsequent 

65 indices. See Notes below for more details. 

66 chip_id : `int` or None 

67 The identifier for the sensor in question. To be used in 

68 validation. 

69 filter_name : `str` or None 

70 The identifier for the filter in question. To be used in 

71 validation. 

72 calib_class : `Any` 

73 The class to use to read the curated calibration text file. Must 

74 support the ``readText()`` method. 

75 

76 Returns 

77 ------- 

78 `dict` 

79 A dictionary of objects constructed from the appropriate factory class. 

80 The key is the validity start time as a `datetime` object. 

81 

82 Notes 

83 ----- 

84 Curated calibrations are read from the appropriate ``obs_ _data`` 

85 package, and are required to have a common directory structure to 

86 be identified and ingested properly. The top-level directories 

87 are organized by the instrument's ``policyName``. These names are 

88 generally all lower-case, but this is not universally true. 

89 

90 Below the top-level instrument directory, subdirectories named 

91 after the curated calibration type contained within, with the 

92 dataset_type_name forced to lowercase. For calibrations that 

93 depend on the detector (i.e., the defects), the next level of 

94 subdirectories should contain directories named with the detector 

95 name, again forced to lowercase. 

96 

97 For filter dependent calibrations that do not depend on the 

98 detector (i.e., transmission_filter), the calibrations should be 

99 grouped into directories named with the physical filter name 

100 (again, all lowercase) below the dataset_type_name directory. 

101 Filter dependent calibrations that do depend on the detector 

102 (i.e., transmission_system), have physical filter named 

103 directories below the detector level directories. 

104 """ 

105 files = [] 

106 extensions = (".ecsv", ".yaml", ".json") 

107 for ext in extensions: 

108 files.extend(glob.glob(os.path.join(*path, f"*{ext}"))) 

109 

110 parts = os.path.split(path[0]) 

111 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<data_name> 

112 data_name = parts[1] 

113 data_dict: dict[datetime.datetime, Any] = {} 

114 for f in files: 

115 date_str = os.path.splitext(os.path.basename(f))[0] 

116 valid_start = dateutil.parser.parse(date_str) 

117 data_dict[valid_start] = calib_class.readText(f) 

118 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, data_name) 

119 return data_dict, data_name 

120 

121 

122def check_metadata( 

123 obj: Any, 

124 valid_start: datetime.datetime, 

125 instrument: str, 

126 chip_id: Union[int, None], 

127 filter_name: Union[str, None], 

128 filepath: str, 

129 data_name: str, 

130) -> None: 

131 """Check that the metadata is complete and self consistent 

132 

133 Parameters 

134 ---------- 

135 obj : object of same type as the factory 

136 Object to retrieve metadata from in order to compare with 

137 metadata inferred from the path. 

138 valid_start : `datetime` 

139 Start of the validity range for data. 

140 instrument : `str` 

141 Name of the instrument in question. 

142 chip_id : `int` 

143 Identifier of the sensor in question. 

144 filter_name : `str` 

145 Identifier of the filter in question. 

146 filepath : `str` 

147 Path of the file read to construct the data. 

148 data_name : `str` 

149 Name of the type of data being read. 

150 

151 Returns 

152 ------- 

153 None 

154 

155 Raises 

156 ------ 

157 ValueError 

158 If the metadata from the path and the metadata encoded 

159 in the path do not match for any reason. 

160 """ 

161 md = obj.getMetadata() 

162 # It is an error if these two do not exist. 

163 finst = md["INSTRUME"] 

164 fdata_name = md["OBSTYPE"] 

165 # These may optionally not exist. 

166 fchip_id = md.get("DETECTOR", None) 

167 ffilter_name = md.get("FILTER", None) 

168 

169 if chip_id is not None: 

170 fchip_id = int(fchip_id) 

171 if filter_name is not None: 

172 ffilter_name = ffilter_name.lower() 

173 filter_name = filter_name.lower() 

174 

175 if not ( 

176 (finst.lower(), fchip_id, ffilter_name, fdata_name.lower()) 

177 == (instrument.lower(), chip_id, filter_name, data_name.lower()) 

178 ): 

179 raise ValueError( 

180 "Path and file metadata do not agree:\n" 

181 f"Path metadata: {instrument} {chip_id} {filter_name} {data_name}\n" 

182 f"File metadata: {finst} {fchip_id} {ffilter_name} {fdata_name}\n" 

183 f"File read from : {filepath}\n" 

184 ) 

185 

186 

187def read_all( 

188 root: str, 

189 camera: lsst.afw.cameraGeom.Camera, 

190 calib_class: Type[CuratedCalibration], 

191 required_dimensions: list[str], 

192 filters: set[str], 

193) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]: 

194 """Read all data from the standard format at a particular root. 

195 

196 Parameters 

197 ---------- 

198 root : `str` 

199 Path to the top level of the data tree. This is expected to hold 

200 directories named after the sensor names. They are expected to be 

201 lower case. 

202 camera : `lsst.afw.cameraGeom.Camera` 

203 The camera that goes with the data being read. 

204 calib_class : `Any` 

205 The class to use to read the curated calibration text file. Must 

206 support the ``readText()`` and ``getMetadata()`` methods. 

207 required_dimensions : `list` [`str`] 

208 Dimensions required for the calibration. 

209 filters : `list` [`str`] 

210 List of the known filters for this camera. Used to identify 

211 filter-dependent calibrations. 

212 

213 Returns 

214 ------- 

215 dict 

216 A dictionary of dictionaries of objects constructed with the 

217 appropriate factory class. The first key is the sensor name lowered, 

218 and the second is the validity start time as a `datetime` object. 

219 

220 Notes 

221 ----- 

222 Each leaf object in the constructed dictionary has metadata associated with 

223 it. The detector ID may be retrieved from the DETECTOR entry of that 

224 metadata. 

225 """ 

226 calibration_data = {} 

227 

228 root = os.path.normpath(root) 

229 dirs = os.listdir(root) # assumes all directories contain data 

230 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))] 

231 if not dirs: 

232 dirs = [root] 

233 

234 calib_types = set() 

235 # We assume the directories have been lowered. 

236 detector_map = {det.getName().lower(): det.getName() for det in camera} 

237 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters} 

238 

239 paths_to_search: list[tuple[str, ...]] = [] 

240 for d in dirs: 

241 dir_name = os.path.basename(d) 

242 # Catch possible mistakes: 

243 if "detector" in required_dimensions: 

244 if dir_name not in detector_map: 

245 # Top level directories must be detectors if they're 

246 # required. 

247 detectors = [det for det in detector_map.keys()] 

248 max_detectors = 10 

249 note_str = "knows" 

250 if len(detectors) > max_detectors: 

251 # report example subset 

252 note_str = "examples" 

253 detectors = detectors[:max_detectors] 

254 raise RuntimeError( 

255 f"Detector {dir_name} not known to supplied camera " 

256 f"{camera.getName()} ({note_str}: {','.join(detectors)})" 

257 ) 

258 elif "physical_filter" in required_dimensions: 

259 # If the calibration depends on both detector and 

260 # physical_filter, the subdirs here should contain the 

261 # filter name. 

262 subdirs = os.listdir(os.path.join(root, dir_name)) 

263 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))] 

264 for sd in subdirs: 

265 subdir_name = os.path.basename(sd) 

266 if subdir_name not in filter_map: 

267 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.") 

268 else: 

269 paths_to_search.append((root, dir_name, subdir_name)) 

270 else: 

271 paths_to_search.append((root, dir_name)) 

272 elif "physical_filter" in required_dimensions: 

273 # If detector is not required, but physical_filter is, 

274 # then the top level should contain the filter 

275 # directories. 

276 if dir_name not in filter_map: 

277 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.") 

278 paths_to_search.append((root, dir_name)) 

279 else: 

280 # Neither detector nor physical_filter are required, so 

281 # the calibration is global, and will not be found in 

282 # subdirectories. 

283 paths_to_search.append((root,)) 

284 

285 for path in paths_to_search: 

286 chip_id = None 

287 filter_name = None 

288 if "detector" in required_dimensions: 

289 chip_id = camera[detector_map[path[1]]].getId() 

290 if "physical_filter" in required_dimensions: 

291 filter_name = filter_map[path[-1]] 

292 

293 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class) 

294 

295 calib_types.add(calib_type) 

296 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here. 

297 raise ValueError(f"Error mixing calib types: {calib_types}") 

298 

299 no_data = all([v == {} for v in calibration_data.values()]) 

300 if no_data: 

301 raise RuntimeError("No data to ingest") 

302 

303 return calibration_data, calib_type