Coverage for python/lsst/obs/base/_read_curated_calibs.py: 11%
95 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 02:19 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 02:19 -0700
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["CuratedCalibration", "read_all"]
26import glob
27import os
28from collections.abc import Mapping
29from typing import TYPE_CHECKING, Any, Protocol, Type, Union
31import dateutil.parser
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import datetime
36 import lsst.afw.cameraGeom
39class CuratedCalibration(Protocol):
40 """Protocol that describes the methods needed by this class when dealing
41 with curated calibration datasets."""
43 @classmethod
44 def readText(cls, path: str) -> CuratedCalibration:
45 ...
47 def getMetadata(self) -> Mapping:
48 ...
51def read_one_calib(
52 path: tuple[str, ...],
53 chip_id: Union[int, None],
54 filter_name: Union[str, None],
55 calib_class: Type[CuratedCalibration],
56) -> tuple[dict[datetime.datetime, CuratedCalibration], str]:
57 """Read data for a particular path from the standard format at a
58 particular root.
60 Parameters
61 ----------
62 path : `tuple` [`str`]
63 This tuple contains the top level of the data tree at index=0,
64 and then further optional subdirectories in subsequent
65 indices. See Notes below for more details.
66 chip_id : `int` or None
67 The identifier for the sensor in question. To be used in
68 validation.
69 filter_name : `str` or None
70 The identifier for the filter in question. To be used in
71 validation.
72 calib_class : `Any`
73 The class to use to read the curated calibration text file. Must
74 support the ``readText()`` method.
76 Returns
77 -------
78 `dict`
79 A dictionary of objects constructed from the appropriate factory class.
80 The key is the validity start time as a `datetime` object.
82 Notes
83 -----
84 Curated calibrations are read from the appropriate ``obs_ _data``
85 package, and are required to have a common directory structure to
86 be identified and ingested properly. The top-level directories
87 are organized by the instrument's ``policyName``. These names are
88 generally all lower-case, but this is not universally true.
90 Below the top-level instrument directory, subdirectories named
91 after the curated calibration type contained within, with the
92 dataset_type_name forced to lowercase. For calibrations that
93 depend on the detector (i.e., the defects), the next level of
94 subdirectories should contain directories named with the detector
95 name, again forced to lowercase.
97 For filter dependent calibrations that do not depend on the
98 detector (i.e., transmission_filter), the calibrations should be
99 grouped into directories named with the physical filter name
100 (again, all lowercase) below the dataset_type_name directory.
101 Filter dependent calibrations that do depend on the detector
102 (i.e., transmission_system), have physical filter named
103 directories below the detector level directories.
104 """
105 files = []
106 extensions = (".ecsv", ".yaml", ".json")
107 for ext in extensions:
108 files.extend(glob.glob(os.path.join(*path, f"*{ext}")))
110 parts = os.path.split(path[0])
111 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<data_name>
112 data_name = parts[1]
113 data_dict: dict[datetime.datetime, Any] = {}
114 for f in files:
115 date_str = os.path.splitext(os.path.basename(f))[0]
116 valid_start = dateutil.parser.parse(date_str)
117 data_dict[valid_start] = calib_class.readText(f)
118 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, data_name)
119 return data_dict, data_name
122def check_metadata(
123 obj: Any,
124 valid_start: datetime.datetime,
125 instrument: str,
126 chip_id: Union[int, None],
127 filter_name: Union[str, None],
128 filepath: str,
129 data_name: str,
130) -> None:
131 """Check that the metadata is complete and self consistent
133 Parameters
134 ----------
135 obj : object of same type as the factory
136 Object to retrieve metadata from in order to compare with
137 metadata inferred from the path.
138 valid_start : `datetime`
139 Start of the validity range for data.
140 instrument : `str`
141 Name of the instrument in question.
142 chip_id : `int`
143 Identifier of the sensor in question.
144 filter_name : `str`
145 Identifier of the filter in question.
146 filepath : `str`
147 Path of the file read to construct the data.
148 data_name : `str`
149 Name of the type of data being read.
151 Returns
152 -------
153 None
155 Raises
156 ------
157 ValueError
158 If the metadata from the path and the metadata encoded
159 in the path do not match for any reason.
160 """
161 md = obj.getMetadata()
162 # It is an error if these two do not exist.
163 finst = md["INSTRUME"]
164 fdata_name = md["OBSTYPE"]
165 # These may optionally not exist.
166 fchip_id = md.get("DETECTOR", None)
167 ffilter_name = md.get("FILTER", None)
169 if chip_id is not None:
170 fchip_id = int(fchip_id)
171 if filter_name is not None:
172 ffilter_name = ffilter_name.lower()
173 filter_name = filter_name.lower()
175 if not (
176 (finst.lower(), fchip_id, ffilter_name, fdata_name.lower())
177 == (instrument.lower(), chip_id, filter_name, data_name.lower())
178 ):
179 raise ValueError(
180 "Path and file metadata do not agree:\n"
181 f"Path metadata: {instrument} {chip_id} {filter_name} {data_name}\n"
182 f"File metadata: {finst} {fchip_id} {ffilter_name} {fdata_name}\n"
183 f"File read from : {filepath}\n"
184 )
187def read_all(
188 root: str,
189 camera: lsst.afw.cameraGeom.Camera,
190 calib_class: Type[CuratedCalibration],
191 required_dimensions: list[str],
192 filters: set[str],
193) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]:
194 """Read all data from the standard format at a particular root.
196 Parameters
197 ----------
198 root : `str`
199 Path to the top level of the data tree. This is expected to hold
200 directories named after the sensor names. They are expected to be
201 lower case.
202 camera : `lsst.afw.cameraGeom.Camera`
203 The camera that goes with the data being read.
204 calib_class : `Any`
205 The class to use to read the curated calibration text file. Must
206 support the ``readText()`` and ``getMetadata()`` methods.
207 required_dimensions : `list` [`str`]
208 Dimensions required for the calibration.
209 filters : `list` [`str`]
210 List of the known filters for this camera. Used to identify
211 filter-dependent calibrations.
213 Returns
214 -------
215 dict
216 A dictionary of dictionaries of objects constructed with the
217 appropriate factory class. The first key is the sensor name lowered,
218 and the second is the validity start time as a `datetime` object.
220 Notes
221 -----
222 Each leaf object in the constructed dictionary has metadata associated with
223 it. The detector ID may be retrieved from the DETECTOR entry of that
224 metadata.
225 """
226 calibration_data = {}
228 root = os.path.normpath(root)
229 dirs = os.listdir(root) # assumes all directories contain data
230 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))]
231 if not dirs:
232 dirs = [root]
234 calib_types = set()
235 # We assume the directories have been lowered.
236 detector_map = {det.getName().lower(): det.getName() for det in camera}
237 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters}
239 paths_to_search: list[tuple[str, ...]] = []
240 for d in dirs:
241 dir_name = os.path.basename(d)
242 # Catch possible mistakes:
243 if "detector" in required_dimensions:
244 if dir_name not in detector_map:
245 # Top level directories must be detectors if they're
246 # required.
247 detectors = [det for det in detector_map.keys()]
248 max_detectors = 10
249 note_str = "knows"
250 if len(detectors) > max_detectors:
251 # report example subset
252 note_str = "examples"
253 detectors = detectors[:max_detectors]
254 raise RuntimeError(
255 f"Detector {dir_name} not known to supplied camera "
256 f"{camera.getName()} ({note_str}: {','.join(detectors)})"
257 )
258 elif "physical_filter" in required_dimensions:
259 # If the calibration depends on both detector and
260 # physical_filter, the subdirs here should contain the
261 # filter name.
262 subdirs = os.listdir(os.path.join(root, dir_name))
263 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))]
264 for sd in subdirs:
265 subdir_name = os.path.basename(sd)
266 if subdir_name not in filter_map:
267 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.")
268 else:
269 paths_to_search.append((root, dir_name, subdir_name))
270 else:
271 paths_to_search.append((root, dir_name))
272 elif "physical_filter" in required_dimensions:
273 # If detector is not required, but physical_filter is,
274 # then the top level should contain the filter
275 # directories.
276 if dir_name not in filter_map:
277 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.")
278 paths_to_search.append((root, dir_name))
279 else:
280 # Neither detector nor physical_filter are required, so
281 # the calibration is global, and will not be found in
282 # subdirectories.
283 paths_to_search.append((root,))
285 for path in paths_to_search:
286 chip_id = None
287 filter_name = None
288 if "detector" in required_dimensions:
289 chip_id = camera[detector_map[path[1]]].getId()
290 if "physical_filter" in required_dimensions:
291 filter_name = filter_map[path[-1]]
293 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class)
295 calib_types.add(calib_type)
296 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here.
297 raise ValueError(f"Error mixing calib types: {calib_types}")
299 no_data = all([v == {} for v in calibration_data.values()])
300 if no_data:
301 raise RuntimeError("No data to ingest")
303 return calibration_data, calib_type