Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%
109 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 02:47 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 02:47 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DataCoordinateReader",
32 "DatasetRefReader",
33 "DimensionRecordReader",
34)
36from abc import ABC, abstractmethod
37from collections.abc import Callable, Mapping, Set
38from typing import TYPE_CHECKING, Any
40from lsst.utils.classes import cached_getter
42from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
43from ..._dataset_ref import DatasetRef
44from ..._dataset_type import DatasetType
45from ...dimensions import (
46 DataCoordinate,
47 DimensionElement,
48 DimensionGroup,
49 DimensionRecord,
50 DimensionRecordSet,
51)
53if TYPE_CHECKING:
54 from lsst.daf.relation import ColumnTag
57class DataCoordinateReader(ABC):
58 """Base class and factory for reader objects that extract `DataCoordinate`
59 instances from query result rows.
60 """
62 @staticmethod
63 def make(
64 dimensions: DimensionGroup,
65 full: bool = True,
66 records: bool = False,
67 record_caches: Mapping[str, DimensionRecordSet] | None = None,
68 ) -> DataCoordinateReader:
69 """Construct a concrete reader for a set of dimensions.
71 Parameters
72 ----------
73 dimensions : `DimensionGroup`
74 Dimensions of the `DataCoordinate` instances the new reader will
75 read.
76 full : `bool`, optional
77 Whether to expect and extract implied dimensions as well as
78 required dimensions.
79 records : `bool`, optional
80 Whether to attach dimension records.
81 record_caches : `~collections.abc.Mapping`, optional
82 Mapping of cached dimension records. Ignored unless
83 ``records=True``.
85 Returns
86 -------
87 reader : `DataCoordinateReader`
88 Concrete reader instance.
89 """
90 if full:
91 full_reader = _FullDataCoordinateReader(dimensions)
92 if records:
93 if record_caches is None:
94 record_caches = {}
95 else:
96 record_caches = {
97 e: cache for e, cache in record_caches.items() if e in dimensions.elements
98 }
99 record_readers = {}
100 for element_name in dimensions.elements:
101 element = dimensions.universe[element_name]
102 if element_name not in record_caches:
103 record_readers[element] = DimensionRecordReader(element)
104 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers)
105 return full_reader
106 else:
107 assert not records, "Cannot add records unless full=True."
108 return _BasicDataCoordinateReader(dimensions)
110 __slots__ = ()
112 @abstractmethod
113 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
114 """Read a `DataCoordinate` from a query result row.
116 Parameters
117 ----------
118 row : `~collections.abc.Mapping`
119 Mapping with `ColumnTag` keys representing a query result row.
121 Returns
122 -------
123 data_coordinate : `DataCoordinate`
124 New data ID.
125 """
126 raise NotImplementedError()
128 @property
129 @abstractmethod
130 def columns_required(self) -> Set[ColumnTag]:
131 raise NotImplementedError()
134class _BasicDataCoordinateReader(DataCoordinateReader):
135 """Private subclass of `DataCoordinateReader` for the ``full=False`` case.
137 Parameters
138 ----------
139 dimensions : `DimensionGroup`
140 Dimensions of the `DataCoordinate` instances read.
141 """
143 def __init__(self, dimensions: DimensionGroup):
144 self._dimensions = dimensions
145 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names)
147 __slots__ = ("_dimensions", "_tags")
149 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
150 # Docstring inherited.
151 return DataCoordinate.from_required_values(
152 self._dimensions,
153 tuple(row[tag] for tag in self._tags),
154 )
156 @property
157 def columns_required(self) -> Set[ColumnTag]:
158 return frozenset(self._tags)
161class _FullDataCoordinateReader(DataCoordinateReader):
162 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
164 Parameters
165 ----------
166 dimensions : `DimensionGroup`
167 Dimensions of the `DataCoordinate` instances read.
168 """
170 def __init__(self, dimensions: DimensionGroup):
171 self._dimensions = dimensions
172 self._tags = tuple(
173 DimensionKeyColumnTag(name) for name in self._dimensions.as_group().data_coordinate_keys
174 )
176 __slots__ = ("_dimensions", "_tags")
178 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
179 # Docstring inherited.
180 return DataCoordinate.from_full_values(
181 self._dimensions,
182 tuple(row[tag] for tag in self._tags),
183 )
185 @property
186 def columns_required(self) -> Set[ColumnTag]:
187 return frozenset(self._tags)
190class _ExpandedDataCoordinateReader(DataCoordinateReader):
191 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
193 Parameters
194 ----------
195 full_reader : `_FullDataCoordinateReader`
196 Reader for full data IDs that don't have records.
197 record_caches : `~collections.abc.Mapping`
198 Mapping of cached dimension records.
199 record_readers : `~collections.abc.Mapping`
200 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should
201 include all elements in the data coordinate's dimensions that are not
202 in ``record_cache``.
203 """
205 def __init__(
206 self,
207 full_reader: _FullDataCoordinateReader,
208 record_caches: Mapping[str, DimensionRecordSet],
209 record_readers: Mapping[DimensionElement, DimensionRecordReader],
210 ):
211 self._full_reader = full_reader
212 self._record_readers = record_readers
213 self._record_caches = record_caches
215 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required")
217 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
218 # Docstring inherited.
219 full = self._full_reader.read(row)
220 records = {}
221 for element_name, cache in self._record_caches.items():
222 records[element_name] = cache.find(full.subset(cache.element.minimal_group))
223 for element, reader in self._record_readers.items():
224 records[element.name] = reader.read(row)
225 return full.expanded(records)
227 @property
228 @cached_getter
229 def columns_required(self) -> Set[ColumnTag]:
230 result = set(self._full_reader.columns_required)
231 for reader in self._record_readers.values():
232 result.update(reader.columns_required)
233 return result
236class DatasetRefReader:
237 """Reader class that extracts `DatasetRef` objects from query result rows.
239 Parameters
240 ----------
241 dataset_type : `DatasetType`
242 Dataset type for extracted references.
243 full : `bool`, optional
244 Whether to expect and extract implied dimensions as well as required
245 dimensions.
246 translate_collection : `~collections.abc.Callable`, optional
247 Callable that returns `str` collection names given collection primary
248 key values. Optional only for registries that use names as primary
249 keys, or if ``run`` is always passed to `read`.
250 records : `bool`, optional
251 Whether to attach dimension records to data IDs.
252 record_caches : `~collections.abc.Mapping`, optional
253 Nested mapping (outer keys are dimension element names, inner keys
254 are data IDs for that element) of cached dimension records.
255 Ignored unless ``records=True``.
256 """
258 def __init__(
259 self,
260 dataset_type: DatasetType,
261 *,
262 full: bool = True,
263 translate_collection: Callable[[Any], str] | None = None,
264 records: bool = False,
265 record_caches: Mapping[str, DimensionRecordSet] | None = None,
266 ):
267 self._data_coordinate_reader = DataCoordinateReader.make(
268 dataset_type.dimensions.as_group(), full=full, records=records, record_caches=record_caches
269 )
270 self._dataset_type = dataset_type
271 self._translate_collection = translate_collection
272 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id")
273 self._run_tag = DatasetColumnTag(dataset_type.name, "run")
275 __slots__ = (
276 "_data_coordinate_reader",
277 "_dataset_type",
278 "_translate_collection",
279 "_id_tag",
280 "_run_tag",
281 "_cached_columns_required",
282 )
284 def read(
285 self,
286 row: Mapping[ColumnTag, Any],
287 *,
288 run: str | None = None,
289 data_id: DataCoordinate | None = None,
290 ) -> DatasetRef:
291 """Read a `DatasetRef` from a query result row.
293 Parameters
294 ----------
295 row : `~collections.abc.Mapping`
296 Mapping with `ColumnTag` keys representing a query result row.
297 run : `str`, optional
298 Name of the `~CollectionType.RUN` collection; when provided the run
299 key does not need to be present in the result row, and
300 ``translate_collection`` does not need to be provided at
301 construction.
302 data_id : `DataCoordinate`, optional
303 Data ID; when provided the dimensions do not need to be present in
304 the result row.
305 """
306 if data_id is None:
307 data_id = self._data_coordinate_reader.read(row)
308 if run is None:
309 run_key = row[self._run_tag]
310 if self._translate_collection is not None:
311 run = self._translate_collection(run_key)
312 else:
313 run = run_key
314 return DatasetRef(
315 self._dataset_type,
316 data_id,
317 run=run,
318 id=row[self._id_tag],
319 )
321 @property
322 @cached_getter
323 def columns_required(self) -> Set[ColumnTag]:
324 result = set(self._data_coordinate_reader.columns_required)
325 result.add(self._id_tag)
326 result.add(self._run_tag)
327 return result
330class DimensionRecordReader:
331 """Read dimension records.
333 Parameters
334 ----------
335 element : `DimensionElement`
336 The element to read.
337 """
339 def __init__(self, element: DimensionElement):
340 self._cls = element.RecordClass
341 self._tags = element.RecordClass.fields.columns
343 __slots__ = ("_cls", "_tags")
345 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord:
346 return self._cls(**{name: row[tag] for tag, name in self._tags.items()})
348 @property
349 def columns_required(self) -> Set[ColumnTag]:
350 return self._tags.keys()