Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%
105 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DataCoordinateReader",
32 "DatasetRefReader",
33 "DimensionRecordReader",
34)
36from abc import ABC, abstractmethod
37from collections.abc import Callable, Mapping, Set
38from typing import TYPE_CHECKING, Any
40from lsst.utils.classes import cached_getter
42from ...core import (
43 DataCoordinate,
44 DatasetColumnTag,
45 DatasetRef,
46 DatasetType,
47 DimensionElement,
48 DimensionGraph,
49 DimensionKeyColumnTag,
50 DimensionRecord,
51)
53if TYPE_CHECKING:
54 from lsst.daf.relation import ColumnTag
57class DataCoordinateReader(ABC):
58 """Base class and factory for reader objects that extract `DataCoordinate`
59 instances from query result rows.
60 """
62 @staticmethod
63 def make(
64 dimensions: DimensionGraph,
65 full: bool = True,
66 records: bool = False,
67 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
68 ) -> DataCoordinateReader:
69 """Construct a concrete reader for a set of dimensions.
71 Parameters
72 ----------
73 dimensions : `DimensionGraph`
74 Dimensions of the `DataCoordinate` instances the new reader will
75 read.
76 full : `bool`, optional
77 Whether to expect and extract implied dimensions as well as
78 required dimensions.
79 records : `bool`, optional
80 Whether to attach dimension records.
81 record_caches : `~collections.abc.Mapping`, optional
82 Nested mapping (outer keys are dimension elements, inner keys are
83 data IDs for that element) of cached dimension records. Ignored
84 unless ``records=True``.
86 Returns
87 -------
88 reader : `DataCoordinateReader`
89 Concrete reader instance.
90 """
91 if full:
92 full_reader = _FullDataCoordinateReader(dimensions)
93 if records:
94 if record_caches is None:
95 record_caches = {}
96 else:
97 record_caches = {
98 e: cache for e, cache in record_caches.items() if e in dimensions.elements
99 }
100 record_readers = {}
101 for element in dimensions.elements:
102 if element not in record_caches:
103 record_readers[element] = DimensionRecordReader(element)
104 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers)
105 return full_reader
106 else:
107 assert not records, "Cannot add records unless full=True."
108 return _BasicDataCoordinateReader(dimensions)
110 __slots__ = ()
112 @abstractmethod
113 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
114 """Read a `DataCoordinate` from a query result row.
116 Parameters
117 ----------
118 row : `~collections.abc.Mapping`
119 Mapping with `ColumnTag` keys representing a query result row.
121 Returns
122 -------
123 data_coordinate : `DataCoordinate`
124 New data ID.
125 """
126 raise NotImplementedError()
128 @property
129 @abstractmethod
130 def columns_required(self) -> Set[ColumnTag]:
131 raise NotImplementedError()
134class _BasicDataCoordinateReader(DataCoordinateReader):
135 """Private subclass of `DataCoordinateReader` for the ``full=False`` case.
137 Parameters
138 ----------
139 dimensions : `DimensionGraph`
140 Dimensions of the `DataCoordinate` instances read.
141 """
143 def __init__(self, dimensions: DimensionGraph):
144 self._dimensions = dimensions
145 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names)
147 __slots__ = ("_dimensions", "_tags")
149 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
150 # Docstring inherited.
151 return DataCoordinate.fromRequiredValues(
152 self._dimensions,
153 tuple(row[tag] for tag in self._tags),
154 )
156 @property
157 def columns_required(self) -> Set[ColumnTag]:
158 return frozenset(self._tags)
161class _FullDataCoordinateReader(DataCoordinateReader):
162 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
164 Parameters
165 ----------
166 dimensions : `DimensionGraph`
167 Dimensions of the `DataCoordinate` instances read.
168 """
170 def __init__(self, dimensions: DimensionGraph):
171 self._dimensions = dimensions
172 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions._dataCoordinateIndices)
174 __slots__ = ("_dimensions", "_tags")
176 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
177 # Docstring inherited.
178 return DataCoordinate.fromFullValues(
179 self._dimensions,
180 tuple(row[tag] for tag in self._tags),
181 )
183 @property
184 def columns_required(self) -> Set[ColumnTag]:
185 return frozenset(self._tags)
188class _ExpandedDataCoordinateReader(DataCoordinateReader):
189 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
191 Parameters
192 ----------
193 full_reader : `_FullDataCoordinateReader`
194 Reader for full data IDs that don't have records.
195 record_caches : `~collections.abc.Mapping`
196 Nested mapping (outer keys are dimension elements, inner keys are data
197 IDs for that element) of cached dimension records.
198 record_readers : `~collections.abc.Mapping`
199 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should
200 include all elements in the data coordinate's dimensions that are not
201 in ``record_cache``.
202 """
204 def __init__(
205 self,
206 full_reader: _FullDataCoordinateReader,
207 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]],
208 record_readers: Mapping[DimensionElement, DimensionRecordReader],
209 ):
210 self._full_reader = full_reader
211 self._record_readers = record_readers
212 self._record_caches = record_caches
214 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required")
216 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
217 # Docstring inherited.
218 full = self._full_reader.read(row)
219 records = {}
220 for element, cache in self._record_caches.items():
221 records[element.name] = cache[full.subset(element.graph)]
222 for element, reader in self._record_readers.items():
223 records[element.name] = reader.read(row)
224 return full.expanded(records)
226 @property
227 @cached_getter
228 def columns_required(self) -> Set[ColumnTag]:
229 result = set(self._full_reader.columns_required)
230 for reader in self._record_readers.values():
231 result.update(reader.columns_required)
232 return result
235class DatasetRefReader:
236 """Reader class that extracts `DatasetRef` objects from query result rows.
238 Parameters
239 ----------
240 dataset_type : `DatasetType`
241 Dataset type for extracted references.
242 full : `bool`, optional
243 Whether to expect and extract implied dimensions as well as required
244 dimensions.
245 translate_collection : `~collections.abc.Callable`, optional
246 Callable that returns `str` collection names given collection primary
247 key values. Optional only for registries that use names as primary
248 keys, or if ``run`` is always passed to `read`.
249 records : `bool`, optional
250 Whether to attach dimension records to data IDs.
251 record_caches : `~collections.abc.Mapping`, optional
252 Nested mapping (outer keys are dimension element names, inner keys
253 are data IDs for that element) of cached dimension records.
254 Ignored unless ``records=True``.
255 """
257 def __init__(
258 self,
259 dataset_type: DatasetType,
260 *,
261 full: bool = True,
262 translate_collection: Callable[[Any], str] | None = None,
263 records: bool = False,
264 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
265 ):
266 self._data_coordinate_reader = DataCoordinateReader.make(
267 dataset_type.dimensions, full=full, records=records, record_caches=record_caches
268 )
269 self._dataset_type = dataset_type
270 self._translate_collection = translate_collection
271 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id")
272 self._run_tag = DatasetColumnTag(dataset_type.name, "run")
274 __slots__ = (
275 "_data_coordinate_reader",
276 "_dataset_type",
277 "_translate_collection",
278 "_id_tag",
279 "_run_tag",
280 "_cached_columns_required",
281 )
283 def read(
284 self,
285 row: Mapping[ColumnTag, Any],
286 *,
287 run: str | None = None,
288 data_id: DataCoordinate | None = None,
289 ) -> DatasetRef:
290 """Read a `DatasetRef` from a query result row.
292 Parameters
293 ----------
294 row : `~collections.abc.Mapping`
295 Mapping with `ColumnTag` keys representing a query result row.
296 run : `str`, optional
297 Name of the `~CollectionType.RUN` collection; when provided the run
298 key does not need to be present in the result row, and
299 ``translate_collection`` does not need to be provided at
300 construction.
301 data_id : `DataCoordinate`, optional
302 Data ID; when provided the dimensions do not need to be present in
303 the result row.
304 """
305 if data_id is None:
306 data_id = self._data_coordinate_reader.read(row)
307 if run is None:
308 run_key = row[self._run_tag]
309 if self._translate_collection is not None:
310 run = self._translate_collection(run_key)
311 else:
312 run = run_key
313 return DatasetRef(
314 self._dataset_type,
315 data_id,
316 run=run,
317 id=row[self._id_tag],
318 )
320 @property
321 @cached_getter
322 def columns_required(self) -> Set[ColumnTag]:
323 result = set(self._data_coordinate_reader.columns_required)
324 result.add(self._id_tag)
325 result.add(self._run_tag)
326 return result
329class DimensionRecordReader:
330 """Read dimension records."""
332 def __init__(self, element: DimensionElement):
333 self._cls = element.RecordClass
334 self._tags = element.RecordClass.fields.columns
336 __slots__ = ("_cls", "_tags")
338 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord:
339 return self._cls(**{name: row[tag] for tag, name in self._tags.items()})
341 @property
342 def columns_required(self) -> Set[ColumnTag]:
343 return self._tags.keys()