Coverage for python/lsst/obs/base/_read_curated_calibs.py: 12%
95 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 07:56 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 07:56 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["CuratedCalibration", "read_all"]
26import glob
27import os
28from collections.abc import Mapping
29from typing import TYPE_CHECKING, Any, Protocol
31import dateutil.parser
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import datetime
36 import lsst.afw.cameraGeom
39class CuratedCalibration(Protocol):
40 """Protocol that describes the methods needed by this class when dealing
41 with curated calibration datasets.
42 """
44 @classmethod
45 def readText(cls, path: str) -> CuratedCalibration:
46 ...
48 def getMetadata(self) -> Mapping:
49 ...
52def read_one_calib(
53 path: tuple[str, ...],
54 chip_id: int | None,
55 filter_name: str | None,
56 calib_class: type[CuratedCalibration],
57) -> tuple[dict[datetime.datetime, CuratedCalibration], str]:
58 """Read data for a particular path from the standard format at a
59 particular root.
61 Parameters
62 ----------
63 path : `tuple` [`str`]
64 This tuple contains the top level of the data tree at index=0,
65 and then further optional subdirectories in subsequent
66 indices. See Notes below for more details.
67 chip_id : `int` or None
68 The identifier for the sensor in question. To be used in
69 validation.
70 filter_name : `str` or None
71 The identifier for the filter in question. To be used in
72 validation.
73 calib_class : `Any`
74 The class to use to read the curated calibration text file. Must
75 support the ``readText()`` method.
77 Returns
78 -------
79 `dict`
80 A dictionary of objects constructed from the appropriate factory class.
81 The key is the validity start time as a `datetime` object.
83 Notes
84 -----
85 Curated calibrations are read from the appropriate ``obs_ _data``
86 package, and are required to have a common directory structure to
87 be identified and ingested properly. The top-level directories
88 are organized by the instrument's ``policyName``. These names are
89 generally all lower-case, but this is not universally true.
91 Below the top-level instrument directory, subdirectories named
92 after the curated calibration type contained within, with the
93 dataset_type_name forced to lowercase. For calibrations that
94 depend on the detector (i.e., the defects), the next level of
95 subdirectories should contain directories named with the detector
96 name, again forced to lowercase.
98 For filter dependent calibrations that do not depend on the
99 detector (i.e., transmission_filter), the calibrations should be
100 grouped into directories named with the physical filter name
101 (again, all lowercase) below the dataset_type_name directory.
102 Filter dependent calibrations that do depend on the detector
103 (i.e., transmission_system), have physical filter named
104 directories below the detector level directories.
105 """
106 files = []
107 extensions = (".ecsv", ".yaml", ".json")
108 for ext in extensions:
109 files.extend(glob.glob(os.path.join(*path, f"*{ext}")))
111 parts = os.path.split(path[0])
112 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<data_name>
113 data_name = parts[1]
114 data_dict: dict[datetime.datetime, Any] = {}
115 for f in files:
116 date_str = os.path.splitext(os.path.basename(f))[0]
117 valid_start = dateutil.parser.parse(date_str)
118 data_dict[valid_start] = calib_class.readText(f)
119 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, data_name)
120 return data_dict, data_name
123def check_metadata(
124 obj: Any,
125 valid_start: datetime.datetime,
126 instrument: str,
127 chip_id: int | None,
128 filter_name: str | None,
129 filepath: str,
130 data_name: str,
131) -> None:
132 """Check that the metadata is complete and self consistent.
134 Parameters
135 ----------
136 obj : object of same type as the factory
137 Object to retrieve metadata from in order to compare with
138 metadata inferred from the path.
139 valid_start : `datetime`
140 Start of the validity range for data.
141 instrument : `str`
142 Name of the instrument in question.
143 chip_id : `int`
144 Identifier of the sensor in question.
145 filter_name : `str`
146 Identifier of the filter in question.
147 filepath : `str`
148 Path of the file read to construct the data.
149 data_name : `str`
150 Name of the type of data being read.
152 Returns
153 -------
154 None
156 Raises
157 ------
158 ValueError
159 If the metadata from the path and the metadata encoded
160 in the path do not match for any reason.
161 """
162 md = obj.getMetadata()
163 # It is an error if these two do not exist.
164 finst = md["INSTRUME"]
165 fdata_name = md["OBSTYPE"]
166 # These may optionally not exist.
167 fchip_id = md.get("DETECTOR", None)
168 ffilter_name = md.get("FILTER", None)
170 if chip_id is not None:
171 fchip_id = int(fchip_id)
172 if filter_name is not None:
173 ffilter_name = ffilter_name.lower()
174 filter_name = filter_name.lower()
176 if not (
177 (finst.lower(), fchip_id, ffilter_name, fdata_name.lower())
178 == (instrument.lower(), chip_id, filter_name, data_name.lower())
179 ):
180 raise ValueError(
181 "Path and file metadata do not agree:\n"
182 f"Path metadata: {instrument} {chip_id} {filter_name} {data_name}\n"
183 f"File metadata: {finst} {fchip_id} {ffilter_name} {fdata_name}\n"
184 f"File read from : {filepath}\n"
185 )
188def read_all(
189 root: str,
190 camera: lsst.afw.cameraGeom.Camera,
191 calib_class: type[CuratedCalibration],
192 required_dimensions: list[str],
193 filters: set[str],
194) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]:
195 """Read all data from the standard format at a particular root.
197 Parameters
198 ----------
199 root : `str`
200 Path to the top level of the data tree. This is expected to hold
201 directories named after the sensor names. They are expected to be
202 lower case.
203 camera : `lsst.afw.cameraGeom.Camera`
204 The camera that goes with the data being read.
205 calib_class : `Any`
206 The class to use to read the curated calibration text file. Must
207 support the ``readText()`` and ``getMetadata()`` methods.
208 required_dimensions : `list` [`str`]
209 Dimensions required for the calibration.
210 filters : `list` [`str`]
211 List of the known filters for this camera. Used to identify
212 filter-dependent calibrations.
214 Returns
215 -------
216 dict
217 A dictionary of dictionaries of objects constructed with the
218 appropriate factory class. The first key is the sensor name lowered,
219 and the second is the validity start time as a `datetime` object.
221 Notes
222 -----
223 Each leaf object in the constructed dictionary has metadata associated with
224 it. The detector ID may be retrieved from the DETECTOR entry of that
225 metadata.
226 """
227 calibration_data = {}
229 root = os.path.normpath(root)
230 dirs = os.listdir(root) # assumes all directories contain data
231 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))]
232 if not dirs:
233 dirs = [root]
235 calib_types = set()
236 # We assume the directories have been lowered.
237 detector_map = {det.getName().lower(): det.getName() for det in camera}
238 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters}
240 paths_to_search: list[tuple[str, ...]] = []
241 for d in dirs:
242 dir_name = os.path.basename(d)
243 # Catch possible mistakes:
244 if "detector" in required_dimensions:
245 if dir_name not in detector_map:
246 # Top level directories must be detectors if they're
247 # required.
248 detectors = list(detector_map)
249 max_detectors = 10
250 note_str = "knows"
251 if len(detectors) > max_detectors:
252 # report example subset
253 note_str = "examples"
254 detectors = detectors[:max_detectors]
255 raise RuntimeError(
256 f"Detector {dir_name} not known to supplied camera "
257 f"{camera.getName()} ({note_str}: {','.join(detectors)})"
258 )
259 elif "physical_filter" in required_dimensions:
260 # If the calibration depends on both detector and
261 # physical_filter, the subdirs here should contain the
262 # filter name.
263 subdirs = os.listdir(os.path.join(root, dir_name))
264 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))]
265 for sd in subdirs:
266 subdir_name = os.path.basename(sd)
267 if subdir_name not in filter_map:
268 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.")
269 else:
270 paths_to_search.append((root, dir_name, subdir_name))
271 else:
272 paths_to_search.append((root, dir_name))
273 elif "physical_filter" in required_dimensions:
274 # If detector is not required, but physical_filter is,
275 # then the top level should contain the filter
276 # directories.
277 if dir_name not in filter_map:
278 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.")
279 paths_to_search.append((root, dir_name))
280 else:
281 # Neither detector nor physical_filter are required, so
282 # the calibration is global, and will not be found in
283 # subdirectories.
284 paths_to_search.append((root,))
286 for path in paths_to_search:
287 chip_id = None
288 filter_name = None
289 if "detector" in required_dimensions:
290 chip_id = camera[detector_map[path[1]]].getId()
291 if "physical_filter" in required_dimensions:
292 filter_name = filter_map[path[-1]]
294 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class)
296 calib_types.add(calib_type)
297 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here.
298 raise ValueError(f"Error mixing calib types: {calib_types}")
300 no_data = all([v == {} for v in calibration_data.values()])
301 if no_data:
302 raise RuntimeError("No data to ingest")
304 return calibration_data, calib_type