Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%
109 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DataCoordinateReader",
32 "DatasetRefReader",
33 "DimensionRecordReader",
34)
36from abc import ABC, abstractmethod
37from collections.abc import Callable, Mapping, Set
38from typing import TYPE_CHECKING, Any
40from lsst.utils.classes import cached_getter
42from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
43from ..._dataset_ref import DatasetRef
44from ..._dataset_type import DatasetType
45from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionRecord
47if TYPE_CHECKING:
48 from lsst.daf.relation import ColumnTag
51class DataCoordinateReader(ABC):
52 """Base class and factory for reader objects that extract `DataCoordinate`
53 instances from query result rows.
54 """
56 @staticmethod
57 def make(
58 dimensions: DimensionGroup,
59 full: bool = True,
60 records: bool = False,
61 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
62 ) -> DataCoordinateReader:
63 """Construct a concrete reader for a set of dimensions.
65 Parameters
66 ----------
67 dimensions : `DimensionGroup`
68 Dimensions of the `DataCoordinate` instances the new reader will
69 read.
70 full : `bool`, optional
71 Whether to expect and extract implied dimensions as well as
72 required dimensions.
73 records : `bool`, optional
74 Whether to attach dimension records.
75 record_caches : `~collections.abc.Mapping`, optional
76 Nested mapping (outer keys are dimension elements, inner keys are
77 data IDs for that element) of cached dimension records. Ignored
78 unless ``records=True``.
80 Returns
81 -------
82 reader : `DataCoordinateReader`
83 Concrete reader instance.
84 """
85 if full:
86 full_reader = _FullDataCoordinateReader(dimensions)
87 if records:
88 if record_caches is None:
89 record_caches = {}
90 else:
91 record_caches = {
92 e: cache for e, cache in record_caches.items() if e in dimensions.elements
93 }
94 record_readers = {}
95 for element_name in dimensions.elements:
96 element = dimensions.universe[element_name]
97 if element_name not in record_caches:
98 record_readers[element] = DimensionRecordReader(element)
99 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers)
100 return full_reader
101 else:
102 assert not records, "Cannot add records unless full=True."
103 return _BasicDataCoordinateReader(dimensions)
105 __slots__ = ()
107 @abstractmethod
108 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
109 """Read a `DataCoordinate` from a query result row.
111 Parameters
112 ----------
113 row : `~collections.abc.Mapping`
114 Mapping with `ColumnTag` keys representing a query result row.
116 Returns
117 -------
118 data_coordinate : `DataCoordinate`
119 New data ID.
120 """
121 raise NotImplementedError()
123 @property
124 @abstractmethod
125 def columns_required(self) -> Set[ColumnTag]:
126 raise NotImplementedError()
129class _BasicDataCoordinateReader(DataCoordinateReader):
130 """Private subclass of `DataCoordinateReader` for the ``full=False`` case.
132 Parameters
133 ----------
134 dimensions : `DimensionGroup`
135 Dimensions of the `DataCoordinate` instances read.
136 """
138 def __init__(self, dimensions: DimensionGroup):
139 self._dimensions = dimensions
140 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names)
142 __slots__ = ("_dimensions", "_tags")
144 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
145 # Docstring inherited.
146 return DataCoordinate.from_required_values(
147 self._dimensions,
148 tuple(row[tag] for tag in self._tags),
149 )
151 @property
152 def columns_required(self) -> Set[ColumnTag]:
153 return frozenset(self._tags)
156class _FullDataCoordinateReader(DataCoordinateReader):
157 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
159 Parameters
160 ----------
161 dimensions : `DimensionGroup`
162 Dimensions of the `DataCoordinate` instances read.
163 """
165 def __init__(self, dimensions: DimensionGroup):
166 self._dimensions = dimensions
167 self._tags = tuple(
168 DimensionKeyColumnTag(name) for name in self._dimensions.as_group().data_coordinate_keys
169 )
171 __slots__ = ("_dimensions", "_tags")
173 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
174 # Docstring inherited.
175 return DataCoordinate.from_full_values(
176 self._dimensions,
177 tuple(row[tag] for tag in self._tags),
178 )
180 @property
181 def columns_required(self) -> Set[ColumnTag]:
182 return frozenset(self._tags)
185class _ExpandedDataCoordinateReader(DataCoordinateReader):
186 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
188 Parameters
189 ----------
190 full_reader : `_FullDataCoordinateReader`
191 Reader for full data IDs that don't have records.
192 record_caches : `~collections.abc.Mapping`
193 Nested mapping (outer keys are dimension elements, inner keys are data
194 IDs for that element) of cached dimension records.
195 record_readers : `~collections.abc.Mapping`
196 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should
197 include all elements in the data coordinate's dimensions that are not
198 in ``record_cache``.
199 """
201 def __init__(
202 self,
203 full_reader: _FullDataCoordinateReader,
204 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]],
205 record_readers: Mapping[DimensionElement, DimensionRecordReader],
206 ):
207 self._full_reader = full_reader
208 self._record_readers = record_readers
209 self._record_caches = record_caches
211 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required")
213 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
214 # Docstring inherited.
215 full = self._full_reader.read(row)
216 records = {}
217 for element, cache in self._record_caches.items():
218 records[element.name] = cache[full.subset(element.graph)]
219 for element, reader in self._record_readers.items():
220 records[element.name] = reader.read(row)
221 return full.expanded(records)
223 @property
224 @cached_getter
225 def columns_required(self) -> Set[ColumnTag]:
226 result = set(self._full_reader.columns_required)
227 for reader in self._record_readers.values():
228 result.update(reader.columns_required)
229 return result
232class DatasetRefReader:
233 """Reader class that extracts `DatasetRef` objects from query result rows.
235 Parameters
236 ----------
237 dataset_type : `DatasetType`
238 Dataset type for extracted references.
239 full : `bool`, optional
240 Whether to expect and extract implied dimensions as well as required
241 dimensions.
242 translate_collection : `~collections.abc.Callable`, optional
243 Callable that returns `str` collection names given collection primary
244 key values. Optional only for registries that use names as primary
245 keys, or if ``run`` is always passed to `read`.
246 records : `bool`, optional
247 Whether to attach dimension records to data IDs.
248 record_caches : `~collections.abc.Mapping`, optional
249 Nested mapping (outer keys are dimension element names, inner keys
250 are data IDs for that element) of cached dimension records.
251 Ignored unless ``records=True``.
252 """
254 def __init__(
255 self,
256 dataset_type: DatasetType,
257 *,
258 full: bool = True,
259 translate_collection: Callable[[Any], str] | None = None,
260 records: bool = False,
261 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
262 ):
263 self._data_coordinate_reader = DataCoordinateReader.make(
264 dataset_type.dimensions.as_group(), full=full, records=records, record_caches=record_caches
265 )
266 self._dataset_type = dataset_type
267 self._translate_collection = translate_collection
268 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id")
269 self._run_tag = DatasetColumnTag(dataset_type.name, "run")
271 __slots__ = (
272 "_data_coordinate_reader",
273 "_dataset_type",
274 "_translate_collection",
275 "_id_tag",
276 "_run_tag",
277 "_cached_columns_required",
278 )
280 def read(
281 self,
282 row: Mapping[ColumnTag, Any],
283 *,
284 run: str | None = None,
285 data_id: DataCoordinate | None = None,
286 ) -> DatasetRef:
287 """Read a `DatasetRef` from a query result row.
289 Parameters
290 ----------
291 row : `~collections.abc.Mapping`
292 Mapping with `ColumnTag` keys representing a query result row.
293 run : `str`, optional
294 Name of the `~CollectionType.RUN` collection; when provided the run
295 key does not need to be present in the result row, and
296 ``translate_collection`` does not need to be provided at
297 construction.
298 data_id : `DataCoordinate`, optional
299 Data ID; when provided the dimensions do not need to be present in
300 the result row.
301 """
302 if data_id is None:
303 data_id = self._data_coordinate_reader.read(row)
304 if run is None:
305 run_key = row[self._run_tag]
306 if self._translate_collection is not None:
307 run = self._translate_collection(run_key)
308 else:
309 run = run_key
310 return DatasetRef(
311 self._dataset_type,
312 data_id,
313 run=run,
314 id=row[self._id_tag],
315 )
317 @property
318 @cached_getter
319 def columns_required(self) -> Set[ColumnTag]:
320 result = set(self._data_coordinate_reader.columns_required)
321 result.add(self._id_tag)
322 result.add(self._run_tag)
323 return result
326class DimensionRecordReader:
327 """Read dimension records."""
329 def __init__(self, element: DimensionElement):
330 self._cls = element.RecordClass
331 self._tags = element.RecordClass.fields.columns
333 __slots__ = ("_cls", "_tags")
335 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord:
336 return self._cls(**{name: row[tag] for tag, name in self._tags.items()})
338 @property
339 def columns_required(self) -> Set[ColumnTag]:
340 return self._tags.keys()