Coverage for python / lsst / daf / butler / dimensions / _group.py: 39%
211 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("DimensionGroup", "SerializedDimensionGroup")
32import itertools
33from collections.abc import Iterable, Iterator, Mapping, Set
34from types import MappingProxyType
35from typing import TYPE_CHECKING, Any, TypeAlias
37import pydantic
38from deprecated.sphinx import deprecated
39from pydantic_core import core_schema
41from lsst.utils.classes import cached_getter, immutable
43from .. import pydantic_utils
44from .._named import NamedValueAbstractSet, NamedValueSet
45from .._topology import TopologicalFamily, TopologicalSpace
47if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
48 from ._elements import DimensionElement
49 from ._universe import DimensionUniverse
52class SortedSequenceSet(Set[str]):
53 """A set-like interface wrapper around a tuple.
55 This delegates directly to ``tuple.__contains__``, so there is an implicit
56 assumption that `len` is small and hence O(N) lookups are not a problem, as
57 is the case for sets of dimension names.
59 Parameters
60 ----------
61 seq : `tuple` [`str`, ...]
62 Strings to see the set.
63 """
65 def __init__(self, seq: tuple[str, ...]):
66 self._seq = seq
68 __slots__ = ("_seq",)
70 def __contains__(self, x: object) -> bool:
71 return x in self._seq
73 def __iter__(self) -> Iterator[str]:
74 return iter(self._seq)
76 def __len__(self) -> int:
77 return len(self._seq)
79 def __hash__(self) -> int:
80 return hash(self._seq)
82 def __eq__(self, other: object) -> bool:
83 if seq := getattr(other, "_seq", None):
84 return seq == self._seq
85 return super().__eq__(other)
87 # MyPy really wants _from_iterable to be generic, but this set doesn't
88 # support anything other than strings.
89 @classmethod
90 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: # type: ignore[override]
91 # This is used by collections.abc.Set mixin methods when they need
92 # to return a new object (e.g. in `__and__`).
93 return set(iterable)
95 def __repr__(self) -> str:
96 return f"{{{', '.join(str(k) for k in self._seq)}}}"
98 def as_tuple(self) -> tuple[str, ...]:
99 """Return the underlying tuple.
101 Returns
102 -------
103 t : `tuple`
104 A tuple of all the values.
105 """
106 return self._seq
108 # TODO: remove on DM-45185
109 @property
110 @deprecated(
111 "Deprecated in favor of direct iteration over the parent set. Will be removed after v28.",
112 version="v28",
113 category=FutureWarning,
114 )
115 def names(self) -> Set[str]:
116 """An alias to ``self``.
118 This is a backwards-compatibility API that allows `DimensionGroup` to
119 mimic the old ``DimensionGraph`` object it replaced, by permitting
120 expressions like ``x.required.names`` when ``x`` can be an object of
121 either type.
122 """
123 return self
126@immutable
127class DimensionGroup: # numpydoc ignore=PR02
128 """An immutable, dependency-complete collection of dimensions.
130 `DimensionGroup` behaves in many respects like a set of `str` dimension
131 names that maintains several special subsets and supersets of related
132 dimension elements. It does not fully implement the `collections.abc.Set`
133 interface, because it defines a few different iteration orders and does not
134 privilege any one of them by implementing ``__iter__``.
136 Parameters
137 ----------
138 universe : `DimensionUniverse`
139 Object that manages all known dimensions.
140 names : `~collections.abc.Iterable` of `str`, optional
141 An iterable of the names of dimensions that must be included in the
142 group. All (recursive) dependencies of these dimensions will also be
143 included. At most one of ``dimensions`` and ``names`` must be
144 provided.
145 _conform : `bool`, optional
146 If `True` (default), expand to include dependencies. `False` should
147 only be used for callers that can guarantee that other arguments are
148 already correctly expanded, and is for internal use only.
150 Notes
151 -----
152 `DimensionGroup` should be used instead of other collections in most
153 contexts where a collection of dimensions is required and a
154 `DimensionUniverse` is available. Exceptions include cases where order
155 matters (and is different from the consistent ordering defined by the
156 `DimensionUniverse`), or complete `~collections.abc.Set` semantics are
157 required.
159 This class is not a Pydantic model, but it implements the
160 `__get_pydantic_core_schema__` special method and hence can be used as a
161 field in Pydantic models or [de]serialized directly via
162 `pydantic.TypeAdapter`, but validation requires a `DimensionUniverse` to be
163 passed as the "universe" key in the Pydantic validation context. The
164 `.pydantic_utils.DeferredValidation` class can be used to defer validation
165 of this object or other types that use it until that context is available.
166 """
168 def __new__(
169 cls,
170 universe: DimensionUniverse,
171 names: Iterable[str] | DimensionGroup = frozenset(),
172 _conform: bool = True,
173 ) -> DimensionGroup:
174 if isinstance(names, DimensionGroup):
175 if names.universe is universe:
176 return names
177 else:
178 names = names.names
179 if _conform:
180 # Expand dimension names to include all required and implied
181 # dependencies.
182 to_expand = set(names)
183 names = set()
184 while to_expand:
185 dimension = universe[to_expand.pop()]
186 names.add(dimension.name)
187 to_expand.update(dimension.required.names)
188 to_expand.update(dimension.implied.names)
189 to_expand.difference_update(names)
190 else:
191 names = frozenset(names)
192 # Look in the cache of existing groups, with the expanded set of names.
193 cache_key = frozenset(names)
194 self = universe._cached_groups.get(cache_key)
195 if self is not None:
196 return self
197 # This is apparently a new group. Create it, and add it to the cache.
198 self = super().__new__(cls)
199 self.universe = universe
200 # Reorder dimensions by iterating over the universe (which is
201 # ordered already) and extracting the ones in the set.
202 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names)))
203 # Make a set that includes both the dimensions and any
204 # DimensionElements whose dependencies are in self.dimensions.
205 self.elements = SortedSequenceSet(
206 tuple(e.name for e in universe.elements if e.required.names <= self.names)
207 )
208 self.governors = SortedSequenceSet(
209 tuple(d for d in self.names if d in universe.governor_dimensions.names)
210 )
211 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names))
212 # Split dependencies up into "required" and "implied" subsets.
213 # Note that a dimension may be required in one group and implied in
214 # another.
215 required: list[str] = []
216 implied: list[str] = []
217 for dim1 in self.names:
218 for dim2 in self.names:
219 if dim1 in universe[dim2].implied.names:
220 implied.append(dim1)
221 break
222 else:
223 # If no other dimension implies dim1, it's required.
224 required.append(dim1)
225 self.required = SortedSequenceSet(tuple(required))
226 self.implied = SortedSequenceSet(tuple(implied))
228 self._space_families = MappingProxyType(
229 {
230 space: NamedValueSet(
231 universe[e].topology[space] for e in self.elements if space in universe[e].topology
232 ).freeze()
233 for space in TopologicalSpace.__members__.values()
234 }
235 )
237 # Build mappings from dimension to index; this is really for
238 # DataCoordinate, but we put it in DimensionGroup because many (many!)
239 # DataCoordinates will share the same DimensionGroup, and we want them
240 # to be lightweight. The order here is what's convenient for
241 # DataCoordinate: all required dimensions before all implied
242 # dimensions.
243 self._data_coordinate_indices = {
244 name: i for i, name in enumerate(itertools.chain(self.required, self.implied))
245 }
246 return universe._cached_groups.set_or_get(cache_key, self)
248 def __getnewargs__(self) -> tuple:
249 return (self.universe, self.names._seq, False)
251 def __deepcopy__(self, memo: dict) -> DimensionGroup:
252 # DimensionGroup is recursively immutable; see note in @immutable
253 # decorator.
254 return self
256 def __len__(self) -> int:
257 return len(self.names)
259 def __contains__(self, element: str) -> bool:
260 if element in self.elements:
261 return True
262 else:
263 from ._elements import DimensionElement
265 if isinstance(element, DimensionElement): # type: ignore[unreachable]
266 raise TypeError(
267 "DimensionGroup does not support membership tests using DimensionElement "
268 "instances; use their names instead."
269 )
270 return False
272 def __str__(self) -> str:
273 return str(self.names)
275 def __repr__(self) -> str:
276 return f"DimensionGroup({self.names})"
278 # TODO: remove on DM-45185
279 @deprecated(
280 "Deprecated as no longer necessary (this method always returns 'self'). Will be removed after v28.",
281 version="v28",
282 category=FutureWarning,
283 )
284 def as_group(self) -> DimensionGroup:
285 """Return ``self``.
287 Returns
288 -------
289 group : `DimensionGroup`
290 Returns itself.
292 Notes
293 -----
294 This is a backwards-compatibility API that allowed both the old
295 ``DimensionGraph`` class and `DimensionGroup` to be coerced to the
296 latter.
297 """
298 return self
300 def isdisjoint(self, other: DimensionGroup) -> bool:
301 """Test whether the intersection of two groups is empty.
303 Parameters
304 ----------
305 other : `DimensionGroup`
306 Other group to compare with.
308 Returns
309 -------
310 is_disjoin : `bool`
311 Returns `True` if either operand is the empty.
312 """
313 return self.names.isdisjoint(other.names)
315 def issubset(self, other: DimensionGroup) -> bool:
316 """Test whether all dimensions in ``self`` are also in ``other``.
318 Parameters
319 ----------
320 other : `DimensionGroup`
321 Other group to compare with.
323 Returns
324 -------
325 is_subset : `bool`
326 Returns `True` if ``self`` is empty.
327 """
328 return self.names <= other.names
330 def issuperset(self, other: DimensionGroup) -> bool:
331 """Test whether all dimensions in ``other`` are also in ``self``.
333 Parameters
334 ----------
335 other : `DimensionGroup`
336 Other group to compare with.
338 Returns
339 -------
340 is_superset : `bool`
341 Returns `True` if ``other`` is empty.
342 """
343 return self.names >= other.names
345 def __eq__(self, other: Any) -> bool:
346 if isinstance(other, DimensionGroup):
347 return self.names == other.names
348 else:
349 return False
351 def __hash__(self) -> int:
352 return hash(self.required._seq)
354 def __le__(self, other: DimensionGroup) -> bool:
355 return self.names <= other.names
357 def __ge__(self, other: DimensionGroup) -> bool:
358 return self.names >= other.names
360 def __lt__(self, other: DimensionGroup) -> bool:
361 return self.names < other.names
363 def __gt__(self, other: DimensionGroup) -> bool:
364 return self.names > other.names
366 def union(*operands: DimensionGroup, universe: DimensionUniverse | None = None) -> DimensionGroup:
367 """Construct a new group with all dimensions in any of the operands.
369 Parameters
370 ----------
371 *operands : `DimensionGroup`
372 Groups to union.
373 universe : `DimensionUniverse`, optional
374 Universe to use to create an empty universe when no operands are
375 provided (i.e. when this method is called on the class).
377 Returns
378 -------
379 union : `DimensionGroup`
380 Union of all the groups.
382 Notes
383 -----
384 The elements of the returned group may exceed the naive union of their
385 elements, as some dimension elements are included in groups whenever
386 multiple dimensions are present, and those dependency dimensions could
387 have been provided by different operands.
388 """
389 names = set().union(*[operand.names for operand in operands])
390 if universe is None:
391 try:
392 universe = operands[0].universe
393 except IndexError:
394 raise TypeError(
395 "'universe' must be provided when 'union' is called with an empty iterable."
396 ) from None
397 return DimensionGroup(universe, names)
399 def intersection(self, *others: DimensionGroup) -> DimensionGroup:
400 """Construct a new group with only dimensions in all of the operands.
402 Parameters
403 ----------
404 *others : `DimensionGroup`
405 Other groups to compare with.
407 Returns
408 -------
409 inter : `DimensionGroup`
410 Intersection of all the groups.
412 Notes
413 -----
414 See also `union`.
415 """
416 names = set(self.names).intersection(*[other.names for other in others])
417 return DimensionGroup(self.universe, names=names)
419 def difference(self, other: DimensionGroup) -> DimensionGroup:
420 """Construct a new group with dimensions that are in ``self`` but not
421 ``other`` OR dependencies of those in ``self`` but not in ``other``.
423 Parameters
424 ----------
425 other : `DimensionGroup`
426 Other group to compare with.
428 Returns
429 -------
430 diff : `DimensionGroup`
431 Difference of the two groups.
433 Notes
434 -----
435 This is not exactly equivalent to a true `set` difference, because the
436 result must be expanded to include required and implied dependencies,
437 and those may be common to ``self`` and ``other``.
438 """
439 return DimensionGroup(self.universe, names=self.names - other.names)
441 def __or__(self, other: DimensionGroup) -> DimensionGroup:
442 return self.union(other)
444 def __and__(self, other: DimensionGroup) -> DimensionGroup:
445 return self.intersection(other)
447 def __sub__(self, other: DimensionGroup) -> DimensionGroup:
448 return self.difference(other)
450 @property
451 def data_coordinate_keys(self) -> Set[str]:
452 """A set of dimensions ordered like `DataCoordinate.mapping`.
454 This order is defined as all required dimensions followed by all
455 implied dimensions.
456 """
457 return self._data_coordinate_indices.keys()
459 @property
460 @cached_getter
461 def lookup_order(self) -> tuple[str, ...]:
462 """A tuple of all elements in the order needed to find their records.
464 Unlike the table definition/topological order (which is what
465 `DimensionUniverse.sorted` gives you), when dimension A implies
466 dimension B, dimension A appears first.
467 """
468 done: set[str] = set()
469 order: list[str] = []
471 def add_to_order(element: DimensionElement) -> None:
472 if element.name in done:
473 return
474 predecessors = set(element.required.names)
475 predecessors.discard(element.name)
476 if not done.issuperset(predecessors):
477 return
478 order.append(element.name)
479 done.add(element.name)
480 for other in element.implied:
481 add_to_order(other)
483 while not done.issuperset(self.required):
484 for dimension in self.required:
485 add_to_order(self.universe[dimension])
487 order.extend(element for element in self.elements if element not in done)
488 return tuple(order)
490 def _choose_dimension(self, families: NamedValueAbstractSet[TopologicalFamily]) -> str | None:
491 if len(families) != 1:
492 return None
493 return list(families)[0].choose(self).name
495 @property
496 def region_dimension(self) -> str | None:
497 """Return the most appropriate spatial dimension to use when looking
498 up a region.
500 Returns `None` if there are no appropriate dimensions or more than one
501 spatial family.
502 """
503 return self._choose_dimension(self.spatial)
505 @property
506 def timespan_dimension(self) -> str | None:
507 """Return the most appropriate temporal dimension to use when looking
508 up a time span.
510 Returns `None` if there are no appropriate dimensions or more than one
511 temporal family.
512 """
513 return self._choose_dimension(self.temporal)
515 @property
516 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
517 """Families represented by the spatial elements in this graph."""
518 return self._space_families[TopologicalSpace.SPATIAL]
520 @property
521 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
522 """Families represented by the temporal elements in this graph."""
523 return self._space_families[TopologicalSpace.TEMPORAL]
525 # Class attributes below are shadowed by instance attributes, and are
526 # present just to hold the docstrings for those instance attributes.
528 universe: DimensionUniverse
529 """The set of all known dimensions, of which this group is a subset
530 (`DimensionUniverse`).
531 """
533 names: SortedSequenceSet
534 """A true `~collections.abc.Set` of the dimension names.
536 Iteration order is consist with `DimensionUniverse.sorted`: each dimension
537 is preceded by its required and implied dependencies.
538 """
540 elements: SortedSequenceSet
541 """A true `~collections.abc.Set` of all dimension element names in the
542 group; a superset of `dimensions`.
543 """
545 governors: SortedSequenceSet
546 """A true `~collections.abc.Set` of all governor dimension names in the
547 group.
548 """
550 skypix: SortedSequenceSet
551 """A true `~collections.abc.Set` of all skypix dimension names in the
552 group.
553 """
555 required: SortedSequenceSet
556 """The dimensions that must be directly identified via their primary keys
557 in a data ID in order to identify the rest of the elements in the group.
558 """
560 implied: SortedSequenceSet
561 """The dimensions that need not be directly identified via their primary
562 keys in a data ID.
563 """
565 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
566 """Families of elements in this graph that exist in topological spaces
567 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to
568 `NamedValueAbstractSet` of `TopologicalFamily`).
569 """
571 _data_coordinate_indices: dict[str, int]
573 @classmethod
574 def _validate(cls, data: Any, info: pydantic.ValidationInfo) -> DimensionGroup:
575 """Pydantic validator (deserializer) for `DimensionGroup`.
577 This satisfies the `pydantic.WithInfoPlainValidatorFunction` signature.
578 """
579 universe = pydantic_utils.get_universe_from_context(info.context)
580 return cls.from_simple(data, universe)
582 @classmethod
583 def from_simple(cls, data: SerializedDimensionGroup, universe: DimensionUniverse) -> DimensionGroup:
584 """Create an instance of this class from serialized data.
586 Parameters
587 ----------
588 data : `SerializedDimensionGroup`
589 Serialized data from a previous call to ``to_simple``.
590 universe : `DimensionUniverse`
591 Dimension universe in which this dimension group will be defined.
592 """
593 return universe.conform(data)
595 def to_simple(self) -> SerializedDimensionGroup:
596 """Convert this class to a simple data format suitable for
597 serialization.
598 """
599 return list(self.names)
601 @classmethod
602 def __get_pydantic_core_schema__(
603 cls, source_type: Any, handler: pydantic.GetCoreSchemaHandler
604 ) -> core_schema.CoreSchema:
605 # This is the Pydantic hook for overriding serialization, validation,
606 # and JSON schema generation.
607 list_of_str_schema = core_schema.list_schema(core_schema.str_schema())
608 from_list_of_str_schema = core_schema.chain_schema(
609 [list_of_str_schema, core_schema.with_info_plain_validator_function(cls._validate)]
610 )
611 return core_schema.json_or_python_schema(
612 # When deserializing from JSON, expect it to look like list[str].
613 json_schema=from_list_of_str_schema,
614 # When deserializing from Python, first see if it's already a
615 # DimensionGroup and then try conversion from list[str].
616 python_schema=core_schema.union_schema(
617 [core_schema.is_instance_schema(DimensionGroup), from_list_of_str_schema]
618 ),
619 # When serializing convert it to a `list[str]`.
620 serialization=core_schema.plain_serializer_function_ser_schema(
621 cls.to_simple, return_schema=list_of_str_schema
622 ),
623 )
626SerializedDimensionGroup: TypeAlias = list[str]