Coverage for python/lsst/daf/butler/registry/_collection_summary.py: 26%
55 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("CollectionSummary",)
25import dataclasses
26from typing import AbstractSet, Generator, Iterable, List, Mapping, Optional, cast
28from ..core import DataCoordinate, DatasetRef, DatasetType
29from ..core.named import NamedValueSet
32@dataclasses.dataclass
33class CollectionSummary:
34 """A summary of the datasets that can be found in a collection."""
36 def copy(self) -> CollectionSummary:
37 """Return a deep copy of this object.
39 Returns
40 -------
41 copy : `CollectionSummary`
42 A copy of ``self`` that can be modified without modifying ``self``
43 at all.
44 """
45 return CollectionSummary(
46 dataset_types=self.dataset_types.copy(),
47 governors={k: v.copy() for k, v in self.governors.items()},
48 )
50 def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]:
51 """Include the given datasets in the summary, yielding them back as a
52 generator.
54 Parameters
55 ----------
56 refs : `Iterable` [ `DatasetRef` ]
57 Datasets to include.
59 Yields
60 ------
61 ref : `DatasetRef`
62 The same dataset references originally passed in.
64 Notes
65 -----
66 As a generator, this method does nothing if its return iterator is not
67 used. Call `add_datasets` instead to avoid this; this method is
68 intended for the case where the given iterable may be single-pass and a
69 copy is not desired, but other processing needs to be done on its
70 elements.
71 """
72 for ref in refs:
73 self.dataset_types.add(ref.datasetType)
74 for gov in ref.dataId.graph.governors.names:
75 self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov]))
76 yield ref
78 def add_datasets(self, refs: Iterable[DatasetRef]) -> None:
79 """Include the given datasets in the summary.
81 Parameters
82 ----------
83 refs : `Iterable` [ `DatasetRef` ]
84 Datasets to include.
85 """
86 for _ in self.add_datasets_generator(refs):
87 pass
89 def add_data_ids_generator(
90 self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]
91 ) -> Generator[DataCoordinate, None, None]:
92 """Include the given dataset type and data IDs in the summary, yielding
93 them back as a generator.
95 Parameters
96 ----------
97 dataset_type : `DatasetType`
98 Dataset type to include.
99 data_ids : `Iterable` [ `DataCoordinate` ]
100 Data IDs to include.
102 Yields
103 ------
104 data_id : `DataCoordinate`
105 The same data IDs originally passed in.
107 Notes
108 -----
109 As a generator, this method does nothing if its return iterator is not
110 used. Call `add_data_ids` instead to avoid this; this method is
111 intended for the case where the given iterable may be single-pass and a
112 copy is not desired, but other processing needs to be done on its
113 elements.
114 """
115 self.dataset_types.add(dataset_type)
116 for data_id in data_ids:
117 for gov in data_id.graph.governors.names:
118 self.governors.setdefault(gov, set()).add(cast(str, data_id[gov]))
119 yield data_id
121 def add_data_ids(self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]) -> None:
122 """Include the given dataset type and data IDs in the summary.
124 Parameters
125 ----------
126 dataset_type : `DatasetType`
127 Dataset type to include.
128 data_ids : `Iterable` [ `DataCoordinate` ]
129 Data IDs to include.
130 """
131 for _ in self.add_data_ids_generator(dataset_type, data_ids):
132 pass
134 def update(self, *args: CollectionSummary) -> None:
135 """Update this summary with dataset types and governor dimension values
136 from other summaries.
138 Parameters
139 ----------
140 *args : `CollectionSummary`
141 Summaries to include in ``self``.
142 """
143 for arg in args:
144 self.dataset_types.update(arg.dataset_types)
145 for gov, values in arg.governors.items():
146 self.governors.setdefault(gov, set()).update(values)
148 def union(*args: CollectionSummary) -> CollectionSummary:
149 """Construct a summary that contains all dataset types and governor
150 dimension values in any of the inputs.
152 Parameters
153 ----------
154 *args : `CollectionSummary`
155 Summaries to combine.
157 Returns
158 -------
159 unioned : `CollectionSummary`
160 New summary object that represents the union of the given ones.
161 """
162 result = CollectionSummary()
163 result.update(*args)
164 return result
166 def is_compatible_with(
167 self,
168 dataset_type: DatasetType,
169 dimensions: Mapping[str, AbstractSet[str]],
170 rejections: Optional[List[str]] = None,
171 name: Optional[str] = None,
172 ) -> bool:
173 """Test whether the collection summarized by this object should be
174 queried for a given dataset type and governor dimension values.
176 Parameters
177 ----------
178 dataset_type : `DatasetType`
179 Dataset type being queried. If this collection has no instances of
180 this dataset type (or its parent dataset type, if it is a
181 component), `False` will always be returned.
182 dimensions : `Mapping`
183 Bounds on the values governor dimensions can take in the query,
184 usually from a WHERE expression, as a mapping from dimension name
185 to a set of `str` governor dimension values.
186 rejections : `list` [ `str` ], optional
187 If provided, a list that will be populated with a log- or
188 exception-friendly message explaining why this dataset is
189 incompatible with this collection when `False` is returned.
190 name : `str`, optional
191 Name of the collection this object summarizes, for use in messages
192 appended to ``rejections``. Ignored if ``rejections`` is `None`.
194 Returns
195 -------
196 compatible : `bool`
197 `True` if the dataset query described by this summary and the given
198 arguments might yield non-empty results; `False` if the result from
199 such a query is definitely empty.
200 """
201 parent = dataset_type if not dataset_type.isComponent() else dataset_type.makeCompositeDatasetType()
202 if parent.name not in self.dataset_types.names:
203 if rejections is not None:
204 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.")
205 return False
206 for gov_name in self.governors.keys() & dataset_type.dimensions.names & dimensions.keys():
207 values_in_collection = self.governors[gov_name]
208 values_given = dimensions[gov_name]
209 if values_in_collection.isdisjoint(values_given):
210 if rejections is not None:
211 rejections.append(
212 f"No datasets with {gov_name} in {values_given} in collection {name!r}."
213 )
214 return False
215 return True
217 dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet)
218 """Dataset types that may be present in the collection
219 (`NamedValueSet` [ `DatasetType` ]).
221 A dataset type not in this set is definitely not in the collection, but
222 the converse is not necessarily true.
223 """
225 governors: dict[str, set[str]] = dataclasses.field(default_factory=dict)
226 """Governor data ID values that are present in the collection's dataset
227 data IDs (`dict` [ `str`, `set` [ `str` ] ]).
229 A data ID value not in this restriction is not necessarily inconsistent
230 with a query in the collection; such a search may only involve dataset
231 types that do not include one or more governor dimensions in their data
232 IDs, and hence the values of those data IDs are unconstrained by this
233 collection in the query.
234 """