Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 35%
105 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:11 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:11 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "DataCoordinateReader",
26 "DatasetRefReader",
27 "DimensionRecordReader",
28)
30from abc import ABC, abstractmethod
31from collections.abc import Callable, Mapping, Set
32from typing import TYPE_CHECKING, Any
34from lsst.utils.classes import cached_getter
36from ...core import (
37 DataCoordinate,
38 DatasetColumnTag,
39 DatasetRef,
40 DatasetType,
41 DimensionElement,
42 DimensionGraph,
43 DimensionKeyColumnTag,
44 DimensionRecord,
45)
47if TYPE_CHECKING:
48 from lsst.daf.relation import ColumnTag
51class DataCoordinateReader(ABC):
52 """Base class and factory for reader objects that extract `DataCoordinate`
53 instances from query result rows.
54 """
56 @staticmethod
57 def make(
58 dimensions: DimensionGraph,
59 full: bool = True,
60 records: bool = False,
61 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
62 ) -> DataCoordinateReader:
63 """Construct a concrete reader for a set of dimensions.
65 Parameters
66 ----------
67 dimensions : `DimensionGraph`
68 Dimensions of the `DataCoordinate` instances the new reader will
69 read.
70 full : `bool`, optional
71 Whether to expect and extract implied dimensions as well as
72 required dimensions.
73 records : `bool`, optional
74 Whether to attach dimension records.
75 record_caches : `~collections.abc.Mapping`, optional
76 Nested mapping (outer keys are dimension elements, inner keys are
77 data IDs for that element) of cached dimension records. Ignored
78 unless ``records=True``.
80 Returns
81 -------
82 reader : `DataCoordinateReader`
83 Concrete reader instance.
84 """
85 if full:
86 full_reader = _FullDataCoordinateReader(dimensions)
87 if records:
88 if record_caches is None:
89 record_caches = {}
90 else:
91 record_caches = {
92 e: cache for e, cache in record_caches.items() if e in dimensions.elements
93 }
94 record_readers = {}
95 for element in dimensions.elements:
96 if element not in record_caches:
97 record_readers[element] = DimensionRecordReader(element)
98 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers)
99 return full_reader
100 else:
101 assert not records, "Cannot add records unless full=True."
102 return _BasicDataCoordinateReader(dimensions)
104 __slots__ = ()
106 @abstractmethod
107 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
108 """Read a `DataCoordinate` from a query result row.
110 Parameters
111 ----------
112 row : `~collections.abc.Mapping`
113 Mapping with `ColumnTag` keys representing a query result row.
115 Returns
116 -------
117 data_coordinate : `DataCoordinate`
118 New data ID.
119 """
120 raise NotImplementedError()
122 @property
123 @abstractmethod
124 def columns_required(self) -> Set[ColumnTag]:
125 raise NotImplementedError()
128class _BasicDataCoordinateReader(DataCoordinateReader):
129 """Private subclass of `DataCoordinateReader` for the ``full=False`` case.
131 Parameters
132 ----------
133 dimensions : `DimensionGraph`
134 Dimensions of the `DataCoordinate` instances read.
135 """
137 def __init__(self, dimensions: DimensionGraph):
138 self._dimensions = dimensions
139 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names)
141 __slots__ = ("_dimensions", "_tags")
143 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
144 # Docstring inherited.
145 return DataCoordinate.fromRequiredValues(
146 self._dimensions,
147 tuple(row[tag] for tag in self._tags),
148 )
150 @property
151 def columns_required(self) -> Set[ColumnTag]:
152 return frozenset(self._tags)
155class _FullDataCoordinateReader(DataCoordinateReader):
156 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
158 Parameters
159 ----------
160 dimensions : `DimensionGraph`
161 Dimensions of the `DataCoordinate` instances read.
162 """
164 def __init__(self, dimensions: DimensionGraph):
165 self._dimensions = dimensions
166 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions._dataCoordinateIndices)
168 __slots__ = ("_dimensions", "_tags")
170 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
171 # Docstring inherited.
172 return DataCoordinate.fromFullValues(
173 self._dimensions,
174 tuple(row[tag] for tag in self._tags),
175 )
177 @property
178 def columns_required(self) -> Set[ColumnTag]:
179 return frozenset(self._tags)
182class _ExpandedDataCoordinateReader(DataCoordinateReader):
183 """Private subclass of `DataCoordinateReader` for the ``full=True`` case.
185 Parameters
186 ----------
187 full_reader : `_FullDataCoordinateReader`
188 Reader for full data IDs that don't have records.
189 record_caches : `~collections.abc.Mapping`
190 Nested mapping (outer keys are dimension elements, inner keys are data
191 IDs for that element) of cached dimension records.
192 record_readers : `~collections.abc.Mapping`
193 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should
194 include all elements in the data coordinate's dimensions that are not
195 in ``record_cache``.
196 """
198 def __init__(
199 self,
200 full_reader: _FullDataCoordinateReader,
201 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]],
202 record_readers: Mapping[DimensionElement, DimensionRecordReader],
203 ):
204 self._full_reader = full_reader
205 self._record_readers = record_readers
206 self._record_caches = record_caches
208 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required")
210 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate:
211 # Docstring inherited.
212 full = self._full_reader.read(row)
213 records = {}
214 for element, cache in self._record_caches.items():
215 records[element.name] = cache[full.subset(element.graph)]
216 for element, reader in self._record_readers.items():
217 records[element.name] = reader.read(row)
218 return full.expanded(records)
220 @property
221 @cached_getter
222 def columns_required(self) -> Set[ColumnTag]:
223 result = set(self._full_reader.columns_required)
224 for reader in self._record_readers.values():
225 result.update(reader.columns_required)
226 return result
229class DatasetRefReader:
230 """Reader class that extracts `DatasetRef` objects from query result rows.
232 Parameters
233 ----------
234 dataset_type : `DatasetType`
235 Dataset type for extracted references.
236 full : `bool`, optional
237 Whether to expect and extract implied dimensions as well as required
238 dimensions.
239 translate_collection : `~collections.abc.Callable`, optional
240 Callable that returns `str` collection names given collection primary
241 key values. Optional only for registries that use names as primary
242 keys, or if ``run`` is always passed to `read`.
243 records : `bool`, optional
244 Whether to attach dimension records to data IDs.
245 record_caches : `~collections.abc.Mapping`, optional
246 Nested mapping (outer keys are dimension element names, inner keys
247 are data IDs for that element) of cached dimension records.
248 Ignored unless ``records=True``.
249 """
251 def __init__(
252 self,
253 dataset_type: DatasetType,
254 *,
255 full: bool = True,
256 translate_collection: Callable[[Any], str] | None = None,
257 records: bool = False,
258 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None,
259 ):
260 self._data_coordinate_reader = DataCoordinateReader.make(
261 dataset_type.dimensions, full=full, records=records, record_caches=record_caches
262 )
263 self._dataset_type = dataset_type
264 self._translate_collection = translate_collection
265 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id")
266 self._run_tag = DatasetColumnTag(dataset_type.name, "run")
268 __slots__ = (
269 "_data_coordinate_reader",
270 "_dataset_type",
271 "_translate_collection",
272 "_id_tag",
273 "_run_tag",
274 "_cached_columns_required",
275 )
277 def read(
278 self,
279 row: Mapping[ColumnTag, Any],
280 *,
281 run: str | None = None,
282 data_id: DataCoordinate | None = None,
283 ) -> DatasetRef:
284 """Read a `DatasetRef` from a query result row.
286 Parameters
287 ----------
288 row : `~collections.abc.Mapping`
289 Mapping with `ColumnTag` keys representing a query result row.
290 run : `str`, optional
291 Name of the `~CollectionType.RUN` collection; when provided the run
292 key does not need to be present in the result row, and
293 ``translate_collection`` does not need to be provided at
294 construction.
295 data_id : `DataCoordinate`, optional
296 Data ID; when provided the dimensions do not need to be present in
297 the result row.
298 """
299 if data_id is None:
300 data_id = self._data_coordinate_reader.read(row)
301 if run is None:
302 run_key = row[self._run_tag]
303 if self._translate_collection is not None:
304 run = self._translate_collection(run_key)
305 else:
306 run = run_key
307 return DatasetRef(
308 self._dataset_type,
309 data_id,
310 run=run,
311 id=row[self._id_tag],
312 )
314 @property
315 @cached_getter
316 def columns_required(self) -> Set[ColumnTag]:
317 result = set(self._data_coordinate_reader.columns_required)
318 result.add(self._id_tag)
319 result.add(self._run_tag)
320 return result
323class DimensionRecordReader:
324 def __init__(self, element: DimensionElement):
325 self._cls = element.RecordClass
326 self._tags = element.RecordClass.fields.columns
328 __slots__ = ("_cls", "_tags")
330 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord:
331 return self._cls(**{name: row[tag] for tag, name in self._tags.items()})
333 @property
334 def columns_required(self) -> Set[ColumnTag]:
335 return self._tags.keys()