Coverage for python/lsst/daf/butler/registry/summaries.py: 30%
102 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-28 07:52 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-28 07:52 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CollectionSummary",
25 "GovernorDimensionRestriction",
26)
28import itertools
29from dataclasses import dataclass
30from typing import (
31 AbstractSet,
32 Any,
33 ItemsView,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Set,
40 Union,
41 ValuesView,
42)
44from lsst.utils.iteration import ensure_iterable
46from ..core import (
47 DataCoordinate,
48 DatasetType,
49 DimensionUniverse,
50 GovernorDimension,
51 NamedKeyDict,
52 NamedKeyMapping,
53 NamedValueAbstractSet,
54 NamedValueSet,
55)
58class GovernorDimensionRestriction(NamedKeyMapping[GovernorDimension, AbstractSet[str]]):
59 """A custom mapping that represents a restriction on the values one or
60 more governor dimensions may take in some context.
62 Parameters
63 ----------
64 mapping : `NamedKeyDict` [ `GovernorDimension`, `Set` [ `str` ]]
65 Mapping from governor dimension to the values it may take. Dimensions
66 not present in the mapping are not constrained at all.
67 """
69 def __init__(self, mapping: NamedKeyDict[GovernorDimension, Set[str]]):
70 self._mapping = mapping
72 @classmethod
73 def makeEmpty(cls, universe: DimensionUniverse) -> GovernorDimensionRestriction:
74 """Construct a `GovernorDimensionRestriction` that allows no values
75 for any governor dimension in the given `DimensionUniverse`.
77 Parameters
78 ----------
79 universe : `DimensionUniverse`
80 Object that manages all dimensions.
82 Returns
83 -------
84 restriction : `GovernorDimensionRestriction`
85 Restriction instance that maps all governor dimensions to an empty
86 set.
87 """
88 return cls(NamedKeyDict((k, set()) for k in universe.getGovernorDimensions()))
90 @classmethod
91 def makeFull(cls) -> GovernorDimensionRestriction:
92 """Construct a `GovernorDimensionRestriction` that allows any value
93 for any governor dimension.
95 Returns
96 -------
97 restriction : `GovernorDimensionRestriction`
98 Restriction instance that contains no keys, and hence contains
99 allows any value for any governor dimension.
100 """
101 return cls(NamedKeyDict())
103 def __eq__(self, other: Any) -> bool:
104 if not isinstance(other, GovernorDimensionRestriction):
105 return False
106 return self._mapping == other._mapping
108 def __str__(self) -> str:
109 return "({})".format(
110 ", ".join(f"{dimension.name}: {values}" for dimension, values in self._mapping.items())
111 )
113 def __repr__(self) -> str:
114 return "GovernorDimensionRestriction({})".format(
115 ", ".join(f"{dimension.name}={values}" for dimension, values in self._mapping.items())
116 )
118 def __iter__(self) -> Iterator[GovernorDimension]:
119 return iter(self._mapping)
121 def __len__(self) -> int:
122 return len(self._mapping)
124 @property
125 def names(self) -> AbstractSet[str]:
126 # Docstring inherited.
127 return self._mapping.names
129 def keys(self) -> NamedValueAbstractSet[GovernorDimension]: # type: ignore
130 return self._mapping.keys()
132 def values(self) -> ValuesView[AbstractSet[str]]:
133 return self._mapping.values()
135 def items(self) -> ItemsView[GovernorDimension, AbstractSet[str]]:
136 return self._mapping.items()
138 def __getitem__(self, key: Union[str, GovernorDimension]) -> AbstractSet[str]:
139 return self._mapping[key]
141 def copy(self) -> GovernorDimensionRestriction:
142 """Return a deep copy of this object.
144 Returns
145 -------
146 copy : `GovernorDimensionRestriction`
147 A copy of ``self`` that can be modified without modifying ``self``
148 at all.
149 """
150 return GovernorDimensionRestriction(NamedKeyDict((k, set(v)) for k, v in self.items()))
152 def add(self, dimension: GovernorDimension, value: str) -> None:
153 """Add a single dimension value to the restriction.
155 Parameters
156 ----------
157 dimension : `GovernorDimension`
158 Dimension to update.
159 value : `str`
160 Value to allow for this dimension.
161 """
162 current = self._mapping.get(dimension)
163 if current is not None:
164 current.add(value)
166 def update(self, other: Mapping[GovernorDimension, Union[str, Iterable[str]]]) -> None:
167 """Update ``self`` to include all dimension values in either ``self``
168 or ``other``.
170 Parameters
171 ----------
172 other : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ]
173 Mapping to union into ``self``. This may be another
174 `GovernorDimensionRestriction` or any other mapping from dimension
175 to `str` or iterable of `str`.
176 """
177 for dimension in self.keys() - other.keys():
178 self._mapping.pop(dimension, None)
179 for dimension in self.keys() & other.keys():
180 self._mapping[dimension].update(ensure_iterable(other[dimension]))
181 # Dimensions that are in 'other' but not in 'self' are ignored, because
182 # 'self' says they are already unconstrained.
184 def union(
185 self, *others: Mapping[GovernorDimension, Union[str, Iterable[str]]]
186 ) -> GovernorDimensionRestriction:
187 """Construct a restriction that permits any values permitted by any of
188 the input restrictions.
190 Parameters
191 ----------
192 *others : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ]
193 Mappings to union into ``self``. These may be other
194 `GovernorDimensionRestriction` instances or any other kind of
195 mapping from dimension to `str` or iterable of `str`.
197 Returns
198 -------
199 unioned : `GovernorDimensionRestriction`
200 New restriction object that represents the union of ``self`` with
201 ``others``.
202 """
203 result = self.copy()
204 for other in others:
205 result.update(other)
206 return result
208 def intersection_update(self, other: Mapping[GovernorDimension, Union[str, Iterable[str]]]) -> None:
209 """Update ``self`` to include only dimension values in both ``self``
210 and ``other``.
212 Parameters
213 ----------
214 other : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ]
215 Mapping to intersect into ``self``. This may be another
216 `GovernorDimensionRestriction` or any other mapping from dimension
217 to `str` or iterable of `str`.
218 """
219 for dimension, values in other.items():
220 new_values = set(ensure_iterable(values))
221 # Yes, this will often result in a (no-op) self-intersection on the
222 # inner set, but this is easier to read (and obviously more or less
223 # efficient) than adding a check to avoid it.
224 self._mapping.setdefault(dimension, new_values).intersection_update(new_values)
226 def intersection(
227 self, *others: Mapping[GovernorDimension, Union[str, Iterable[str]]]
228 ) -> GovernorDimensionRestriction:
229 """Construct a restriction that permits only values permitted by all of
230 the input restrictions.
232 Parameters
233 ----------
234 *others : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ]
235 Mappings to intersect with ``self``. These may be other
236 `GovernorDimensionRestriction` instances or any other kind of
237 mapping from dimension to `str` or iterable of `str`.
238 Returns
239 -------
240 intersection : `GovernorDimensionRestriction`
241 New restriction object that represents the intersection of ``self``
242 with ``others``.
243 """
244 result = self.copy()
245 for other in others:
246 result.intersection_update(other)
247 return result
249 def update_extract(self, data_id: DataCoordinate) -> None:
250 """Update ``self`` to include all governor dimension values in the
251 given data ID (in addition to those already in ``self``).
253 Parameters
254 ----------
255 data_id : `DataCoordinate`
256 Data ID from which governor dimension values should be extracted.
257 Values for non-governor dimensions are ignored.
258 """
259 for dimension in data_id.graph.governors:
260 current = self._mapping.get(dimension)
261 if current is not None:
262 current.add(data_id[dimension])
265@dataclass
266class CollectionSummary:
267 """A summary of the datasets that can be found in a collection."""
269 @classmethod
270 def makeEmpty(cls, universe: DimensionUniverse) -> CollectionSummary:
271 """Construct a `CollectionSummary` for a collection with no
272 datasets.
274 Parameters
275 ----------
276 universe : `DimensionUniverse`
277 Object that manages all dimensions.
279 Returns
280 -------
281 summary : `CollectionSummary`
282 Summary object with no dataset types and no governor dimension
283 values.
284 """
285 return cls(
286 datasetTypes=NamedValueSet(),
287 dimensions=GovernorDimensionRestriction.makeEmpty(universe),
288 )
290 def copy(self) -> CollectionSummary:
291 """Return a deep copy of this object.
293 Returns
294 -------
295 copy : `CollectionSummary`
296 A copy of ``self`` that can be modified without modifying ``self``
297 at all.
298 """
299 return CollectionSummary(datasetTypes=self.datasetTypes.copy(), dimensions=self.dimensions.copy())
301 def union(self, *others: CollectionSummary) -> CollectionSummary:
302 """Construct a summary that contains all dataset types and governor
303 dimension values in any of the inputs.
305 Parameters
306 ----------
307 *others : `CollectionSummary`
308 Restrictions to combine with ``self``.
310 Returns
311 -------
312 unioned : `CollectionSummary`
313 New summary object that represents the union of ``self`` with
314 ``others``.
315 """
316 if not others:
317 return self
318 datasetTypes = NamedValueSet(self.datasetTypes)
319 datasetTypes.update(itertools.chain.from_iterable(o.datasetTypes for o in others))
320 dimensions = self.dimensions.union(*[o.dimensions for o in others])
321 return CollectionSummary(datasetTypes, dimensions)
323 def is_compatible_with(
324 self,
325 datasetType: DatasetType,
326 restriction: GovernorDimensionRestriction,
327 rejections: Optional[List[str]] = None,
328 name: Optional[str] = None,
329 ) -> bool:
330 """Test whether the collection summarized by this object should be
331 queried for a given dataset type and governor dimension values.
333 Parameters
334 ----------
335 datasetType : `DatasetType`
336 Dataset type being queried. If this collection has no instances of
337 this dataset type (or its parent dataset type, if it is a
338 component), `False` will always be returned.
339 restriction : `GovernorDimensionRestriction`
340 Restriction on the values governor dimensions can take in the
341 query, usually from a WHERE expression. If this is disjoint with
342 the data IDs actually present in the collection, `False` will be
343 returned.
344 rejections : `list` [ `str` ], optional
345 If provided, a list that will be populated with a log- or
346 exception-friendly message explaining why this dataset is
347 incompatible with this collection when `False` is returned.
348 name : `str`, optional
349 Name of the collection this object summarizes, for use in messages
350 appended to ``rejections``. Ignored if ``rejections`` is `None`.
352 Returns
353 -------
354 compatible : `bool`
355 `True` if the dataset query described by this summary and the given
356 arguments might yield non-empty results; `False` if the result from
357 such a query is definitely empty.
358 """
359 parent = datasetType if not datasetType.isComponent() else datasetType.makeCompositeDatasetType()
360 if parent not in self.datasetTypes:
361 if rejections is not None:
362 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.")
363 return False
364 for governor in datasetType.dimensions.governors:
365 if (values_in_self := self.dimensions.get(governor)) is not None:
366 if (values_in_other := restriction.get(governor)) is not None:
367 if values_in_self.isdisjoint(values_in_other):
368 assert values_in_other, f"No valid values in restriction for dimension {governor}."
369 if rejections is not None:
370 rejections.append(
371 f"No datasets with {governor.name} in {values_in_other} "
372 f"in collection {name!r}."
373 )
374 return False
375 return True
377 datasetTypes: NamedValueSet[DatasetType]
378 """Dataset types that may be present in the collection
379 (`NamedValueSet` [ `DatasetType` ]).
381 A dataset type not in this set is definitely not in the collection, but
382 the converse is not necessarily true.
383 """
385 dimensions: GovernorDimensionRestriction
386 """Governor dimension values that may be present in the collection
387 (`GovernorDimensionRestriction`).
389 A dimension value not in this restriction is definitely not in the
390 collection, but the converse is not necessarily true.
391 """