Coverage for python/lsst/daf/butler/registry/_collection_summary.py: 27%
56 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("CollectionSummary",)
25import dataclasses
26from collections.abc import Generator, Iterable, Mapping, Set
27from typing import cast
29from ..core import DataCoordinate, DatasetRef, DatasetType
30from ..core.named import NamedValueSet
33@dataclasses.dataclass
34class CollectionSummary:
35 """A summary of the datasets that can be found in a collection."""
37 def copy(self) -> CollectionSummary:
38 """Return a deep copy of this object.
40 Returns
41 -------
42 copy : `CollectionSummary`
43 A copy of ``self`` that can be modified without modifying ``self``
44 at all.
45 """
46 return CollectionSummary(
47 dataset_types=self.dataset_types.copy(),
48 governors={k: v.copy() for k, v in self.governors.items()},
49 )
51 def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]:
52 """Include the given datasets in the summary, yielding them back as a
53 generator.
55 Parameters
56 ----------
57 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
58 Datasets to include.
60 Yields
61 ------
62 ref : `DatasetRef`
63 The same dataset references originally passed in.
65 Notes
66 -----
67 As a generator, this method does nothing if its return iterator is not
68 used. Call `add_datasets` instead to avoid this; this method is
69 intended for the case where the given iterable may be single-pass and a
70 copy is not desired, but other processing needs to be done on its
71 elements.
72 """
73 for ref in refs:
74 self.dataset_types.add(ref.datasetType)
75 for gov in ref.dataId.graph.governors.names:
76 self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov]))
77 yield ref
79 def add_datasets(self, refs: Iterable[DatasetRef]) -> None:
80 """Include the given datasets in the summary.
82 Parameters
83 ----------
84 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
85 Datasets to include.
86 """
87 for _ in self.add_datasets_generator(refs):
88 pass
90 def add_data_ids_generator(
91 self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]
92 ) -> Generator[DataCoordinate, None, None]:
93 """Include the given dataset type and data IDs in the summary, yielding
94 them back as a generator.
96 Parameters
97 ----------
98 dataset_type : `DatasetType`
99 Dataset type to include.
100 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ]
101 Data IDs to include.
103 Yields
104 ------
105 data_id : `DataCoordinate`
106 The same data IDs originally passed in.
108 Notes
109 -----
110 As a generator, this method does nothing if its return iterator is not
111 used. Call `add_data_ids` instead to avoid this; this method is
112 intended for the case where the given iterable may be single-pass and a
113 copy is not desired, but other processing needs to be done on its
114 elements.
115 """
116 self.dataset_types.add(dataset_type)
117 for data_id in data_ids:
118 for gov in data_id.graph.governors.names:
119 self.governors.setdefault(gov, set()).add(cast(str, data_id[gov]))
120 yield data_id
122 def add_data_ids(self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]) -> None:
123 """Include the given dataset type and data IDs in the summary.
125 Parameters
126 ----------
127 dataset_type : `DatasetType`
128 Dataset type to include.
129 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ]
130 Data IDs to include.
131 """
132 for _ in self.add_data_ids_generator(dataset_type, data_ids):
133 pass
135 def update(self, *args: CollectionSummary) -> None:
136 """Update this summary with dataset types and governor dimension values
137 from other summaries.
139 Parameters
140 ----------
141 *args : `CollectionSummary`
142 Summaries to include in ``self``.
143 """
144 for arg in args:
145 self.dataset_types.update(arg.dataset_types)
146 for gov, values in arg.governors.items():
147 self.governors.setdefault(gov, set()).update(values)
149 def union(*args: CollectionSummary) -> CollectionSummary:
150 """Construct a summary that contains all dataset types and governor
151 dimension values in any of the inputs.
153 Parameters
154 ----------
155 *args : `CollectionSummary`
156 Summaries to combine.
158 Returns
159 -------
160 unioned : `CollectionSummary`
161 New summary object that represents the union of the given ones.
162 """
163 result = CollectionSummary()
164 result.update(*args)
165 return result
167 def is_compatible_with(
168 self,
169 dataset_type: DatasetType,
170 dimensions: Mapping[str, Set[str]],
171 rejections: list[str] | None = None,
172 name: str | None = None,
173 ) -> bool:
174 """Test whether the collection summarized by this object should be
175 queried for a given dataset type and governor dimension values.
177 Parameters
178 ----------
179 dataset_type : `DatasetType`
180 Dataset type being queried. If this collection has no instances of
181 this dataset type (or its parent dataset type, if it is a
182 component), `False` will always be returned.
183 dimensions : `~collections.abc.Mapping`
184 Bounds on the values governor dimensions can take in the query,
185 usually from a WHERE expression, as a mapping from dimension name
186 to a set of `str` governor dimension values.
187 rejections : `list` [ `str` ], optional
188 If provided, a list that will be populated with a log- or
189 exception-friendly message explaining why this dataset is
190 incompatible with this collection when `False` is returned.
191 name : `str`, optional
192 Name of the collection this object summarizes, for use in messages
193 appended to ``rejections``. Ignored if ``rejections`` is `None`.
195 Returns
196 -------
197 compatible : `bool`
198 `True` if the dataset query described by this summary and the given
199 arguments might yield non-empty results; `False` if the result from
200 such a query is definitely empty.
201 """
202 parent = dataset_type if not dataset_type.isComponent() else dataset_type.makeCompositeDatasetType()
203 if parent.name not in self.dataset_types.names:
204 if rejections is not None:
205 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.")
206 return False
207 for gov_name in self.governors.keys() & dataset_type.dimensions.names & dimensions.keys():
208 values_in_collection = self.governors[gov_name]
209 values_given = dimensions[gov_name]
210 if values_in_collection.isdisjoint(values_given):
211 if rejections is not None:
212 rejections.append(
213 f"No datasets with {gov_name} in {values_given} in collection {name!r}."
214 )
215 return False
216 return True
218 dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet)
219 """Dataset types that may be present in the collection
220 (`NamedValueSet` [ `DatasetType` ]).
222 A dataset type not in this set is definitely not in the collection, but
223 the converse is not necessarily true.
224 """
226 governors: dict[str, set[str]] = dataclasses.field(default_factory=dict)
227 """Governor data ID values that are present in the collection's dataset
228 data IDs (`dict` [ `str`, `set` [ `str` ] ]).
230 A data ID value not in this restriction is not necessarily inconsistent
231 with a query in the collection; such a search may only involve dataset
232 types that do not include one or more governor dimensions in their data
233 IDs, and hence the values of those data IDs are unconstrained by this
234 collection in the query.
235 """