Coverage for python/lsst/obs/base/_read_curated_calibs.py: 12%
95 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-29 17:03 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-29 17:03 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["CuratedCalibration", "read_all"]
26import glob
27import os
28from collections.abc import Mapping
29from typing import TYPE_CHECKING, Any, Protocol
31import dateutil.parser
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 import datetime
36 import lsst.afw.cameraGeom
39class CuratedCalibration(Protocol):
40 """Protocol that describes the methods needed by this class when dealing
41 with curated calibration datasets.
42 """
44 @classmethod
45 def readText(cls, path: str) -> CuratedCalibration:
46 ...
48 def getMetadata(self) -> Mapping:
49 ...
52def read_one_calib(
53 path: tuple[str, ...],
54 chip_id: int | None,
55 filter_name: str | None,
56 calib_class: type[CuratedCalibration],
57) -> tuple[dict[datetime.datetime, CuratedCalibration], str]:
58 """Read data for a particular path from the standard format at a
59 particular root.
61 Parameters
62 ----------
63 path : `tuple` [`str`]
64 This tuple contains the top level of the data tree at index=0,
65 and then further optional subdirectories in subsequent
66 indices. See Notes below for more details.
67 chip_id : `int` or None
68 The identifier for the sensor in question. To be used in
69 validation.
70 filter_name : `str` or None
71 The identifier for the filter in question. To be used in
72 validation.
73 calib_class : `Any`
74 The class to use to read the curated calibration text file. Must
75 support the ``readText()`` method.
77 Returns
78 -------
79 data_dict : `dict`
80 A dictionary of calibration objects constructed from the
81 appropriate factory class. The key is the validity start time
82 as a `datetime.datetime` object.
83 calib_type : `str`
84 The type of calibrations that have been read and are included
85 in the ``data_dict``.
87 Notes
88 -----
89 Curated calibrations are read from the appropriate ``obs_ _data``
90 package, and are required to have a common directory structure to
91 be identified and ingested properly. The top-level directories
92 are organized by the instrument's ``policyName``. These names are
93 generally all lower-case, but this is not universally true.
95 Below the top-level instrument directory, subdirectories named
96 after the curated calibration type contained within, with the
97 dataset_type_name forced to lowercase. For calibrations that
98 depend on the detector (i.e., the defects), the next level of
99 subdirectories should contain directories named with the detector
100 name, again forced to lowercase.
102 For filter dependent calibrations that do not depend on the
103 detector (i.e., transmission_filter), the calibrations should be
104 grouped into directories named with the physical filter name
105 (again, all lowercase) below the dataset_type_name directory.
106 Filter dependent calibrations that do depend on the detector
107 (i.e., transmission_system), have physical filter named
108 directories below the detector level directories.
109 """
110 files = []
111 extensions = (".ecsv", ".yaml", ".json")
112 for ext in extensions:
113 files.extend(glob.glob(os.path.join(*path, f"*{ext}")))
115 parts = os.path.split(path[0])
116 instrument = os.path.split(parts[0])[1] # convention is that these reside at <instrument>/<calib_type>
117 calib_type = parts[1]
118 data_dict: dict[datetime.datetime, Any] = {}
119 for f in files:
120 date_str = os.path.splitext(os.path.basename(f))[0]
121 valid_start = dateutil.parser.parse(date_str)
122 data_dict[valid_start] = calib_class.readText(f)
123 check_metadata(data_dict[valid_start], valid_start, instrument, chip_id, filter_name, f, calib_type)
124 return data_dict, calib_type
127def check_metadata(
128 obj: Any,
129 valid_start: datetime.datetime,
130 instrument: str,
131 chip_id: int | None,
132 filter_name: str | None,
133 filepath: str,
134 calib_type: str,
135) -> None:
136 """Check that the metadata is complete and self consistent.
138 Parameters
139 ----------
140 obj : object of same type as the factory
141 Object to retrieve metadata from in order to compare with
142 metadata inferred from the path.
143 valid_start : `datetime`
144 Start of the validity range for data.
145 instrument : `str`
146 Name of the instrument in question.
147 chip_id : `int`
148 Identifier of the sensor in question.
149 filter_name : `str`
150 Identifier of the filter in question.
151 filepath : `str`
152 Path of the file read to construct the data.
153 calib_type : `str`
154 Name of the type of data being read.
156 Returns
157 -------
158 None
160 Raises
161 ------
162 ValueError
163 If the metadata from the path and the metadata encoded
164 in the path do not match for any reason.
165 """
166 md = obj.getMetadata()
167 # It is an error if these two do not exist.
168 finst = md["INSTRUME"]
169 fcalib_type = md["OBSTYPE"]
170 # These may optionally not exist.
171 fchip_id = md.get("DETECTOR", None)
172 ffilter_name = md.get("FILTER", None)
174 if chip_id is not None:
175 fchip_id = int(fchip_id)
176 if filter_name is not None:
177 ffilter_name = ffilter_name.lower()
178 filter_name = filter_name.lower()
180 if not (
181 (finst.lower(), fchip_id, ffilter_name, fcalib_type.lower())
182 == (instrument.lower(), chip_id, filter_name, calib_type.lower())
183 ):
184 raise ValueError(
185 "Path and file metadata do not agree:\n"
186 f"Path metadata: {instrument} {chip_id} {filter_name} {calib_type}\n"
187 f"File metadata: {finst} {fchip_id} {ffilter_name} {fcalib_type}\n"
188 f"File read from : {filepath}\n"
189 )
192def read_all(
193 root: str,
194 camera: lsst.afw.cameraGeom.Camera,
195 calib_class: type[CuratedCalibration],
196 required_dimensions: list[str],
197 filters: set[str],
198) -> tuple[dict[tuple[str, ...], dict[datetime.datetime, CuratedCalibration]], str]:
199 """Read all data from the standard format at a particular root.
201 Parameters
202 ----------
203 root : `str`
204 Path to the top level of the data tree. This is expected to hold
205 directories named after the sensor names. They are expected to be
206 lower case.
207 camera : `lsst.afw.cameraGeom.Camera`
208 The camera that goes with the data being read.
209 calib_class : `Any`
210 The class to use to read the curated calibration text file. Must
211 support the ``readText()`` and ``getMetadata()`` methods.
212 required_dimensions : `list` [`str`]
213 Dimensions required for the calibration.
214 filters : `list` [`str`]
215 List of the known filters for this camera. Used to identify
216 filter-dependent calibrations.
218 Returns
219 -------
220 calibration_data : `dict`
221 A dictionary of dictionaries of calibration objects
222 constructed with the appropriate factory class. The first key
223 is the sensor name in lower case, and the second is the
224 validity start time as a `datetime.datetime` object.
225 calib_type : `str`
226 The type of calibrations that have been read and are included
227 in the ``data_dict``.
229 Notes
230 -----
231 Each leaf object in the constructed dictionary has metadata associated with
232 it. The detector ID may be retrieved from the DETECTOR entry of that
233 metadata.
234 """
235 calibration_data = {}
237 root = os.path.normpath(root)
238 dirs = os.listdir(root) # assumes all directories contain data
239 dirs = [d for d in dirs if os.path.isdir(os.path.join(root, d))]
240 if not dirs:
241 dirs = [root]
243 calib_types = set()
244 # We assume the directories have been lowered.
245 detector_map = {det.getName().lower(): det.getName() for det in camera}
246 filter_map = {filterName.lower().replace(" ", "_"): filterName for filterName in filters}
248 paths_to_search: list[tuple[str, ...]] = []
249 for d in dirs:
250 dir_name = os.path.basename(d)
251 # Catch possible mistakes:
252 if "detector" in required_dimensions:
253 if dir_name not in detector_map:
254 # Top level directories must be detectors if they're
255 # required.
256 detectors = list(detector_map)
257 max_detectors = 10
258 note_str = "knows"
259 if len(detectors) > max_detectors:
260 # report example subset
261 note_str = "examples"
262 detectors = detectors[:max_detectors]
263 raise RuntimeError(
264 f"Detector {dir_name} not known to supplied camera "
265 f"{camera.getName()} ({note_str}: {','.join(detectors)})"
266 )
267 elif "physical_filter" in required_dimensions:
268 # If the calibration depends on both detector and
269 # physical_filter, the subdirs here should contain the
270 # filter name.
271 subdirs = os.listdir(os.path.join(root, dir_name))
272 subdirs = [d for d in subdirs if os.path.isdir(os.path.join(root, dir_name, d))]
273 for sd in subdirs:
274 subdir_name = os.path.basename(sd)
275 if subdir_name not in filter_map:
276 raise RuntimeError(f"Filter {subdir_name} not known to supplied camera.")
277 else:
278 paths_to_search.append((root, dir_name, subdir_name))
279 else:
280 paths_to_search.append((root, dir_name))
281 elif "physical_filter" in required_dimensions:
282 # If detector is not required, but physical_filter is,
283 # then the top level should contain the filter
284 # directories.
285 if dir_name not in filter_map:
286 raise RuntimeError(f"Filter {dir_name} not known to supplied camera.")
287 paths_to_search.append((root, dir_name))
288 else:
289 # Neither detector nor physical_filter are required, so
290 # the calibration is global, and will not be found in
291 # subdirectories.
292 paths_to_search.append((root,))
294 for path in paths_to_search:
295 chip_id = None
296 filter_name = None
297 if "detector" in required_dimensions:
298 chip_id = camera[detector_map[path[1]]].getId()
299 if "physical_filter" in required_dimensions:
300 filter_name = filter_map[path[-1]]
302 calibration_data[path], calib_type = read_one_calib(path, chip_id, filter_name, calib_class)
304 calib_types.add(calib_type)
305 if len(calib_types) != 1: # set.add(None) has length 1 so None is OK here.
306 raise ValueError(f"Error mixing calib types: {calib_types}")
308 no_data = all([v == {} for v in calibration_data.values()])
309 if no_data:
310 raise RuntimeError("No data to ingest")
312 return calibration_data, calib_type