Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 33%
165 statements
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-29 02:58 -0700
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-29 02:58 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph", "SerializedDimensionGraph"]
26import itertools
27from types import MappingProxyType
28from typing import (
29 TYPE_CHECKING,
30 AbstractSet,
31 Any,
32 ClassVar,
33 Dict,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Set,
40 Tuple,
41 Union,
42)
44from lsst.utils.classes import cached_getter, immutable
45from pydantic import BaseModel
47from .._topology import TopologicalFamily, TopologicalSpace
48from ..json import from_json_pydantic, to_json_pydantic
49from ..named import NamedValueAbstractSet, NamedValueSet
51if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
52 from ...registry import Registry
53 from ._elements import Dimension, DimensionElement
54 from ._governor import GovernorDimension
55 from ._universe import DimensionUniverse
58class SerializedDimensionGraph(BaseModel):
59 """Simplified model of a `DimensionGraph` suitable for serialization."""
61 names: List[str]
63 @classmethod
64 def direct(cls, *, names: List[str]) -> SerializedDimensionGraph:
65 """Construct a `SerializedDimensionGraph` directly without validators.
67 This differs from the pydantic "construct" method in that the arguments
68 are explicitly what the model requires, and it will recurse through
69 members, constructing them from their corresponding `direct` methods.
71 This method should only be called when the inputs are trusted.
72 """
73 node = SerializedDimensionGraph.__new__(cls)
74 object.__setattr__(node, "names", names)
75 object.__setattr__(node, "__fields_set__", {"names"})
76 return node
79@immutable
80class DimensionGraph:
81 """An immutable, dependency-complete collection of dimensions.
83 `DimensionGraph` behaves in many respects like a set of `Dimension`
84 instances that maintains several special subsets and supersets of
85 related `DimensionElement` instances. It does not fully implement the
86 `collections.abc.Set` interface, as its automatic expansion of dependencies
87 would make set difference and XOR operations behave surprisingly.
89 It also provides dict-like lookup of `DimensionElement` instances from
90 their names.
92 Parameters
93 ----------
94 universe : `DimensionUniverse`
95 The special graph of all known dimensions of which this graph will be
96 a subset.
97 dimensions : iterable of `Dimension`, optional
98 An iterable of `Dimension` instances that must be included in the
99 graph. All (recursive) dependencies of these dimensions will also
100 be included. At most one of ``dimensions`` and ``names`` must be
101 provided.
102 names : iterable of `str`, optional
103 An iterable of the names of dimensions that must be included in the
104 graph. All (recursive) dependencies of these dimensions will also
105 be included. At most one of ``dimensions`` and ``names`` must be
106 provided.
107 conform : `bool`, optional
108 If `True` (default), expand to include dependencies. `False` should
109 only be used for callers that can guarantee that other arguments are
110 already correctly expanded, and is primarily for internal use.
112 Notes
113 -----
114 `DimensionGraph` should be used instead of other collections in most
115 contexts where a collection of dimensions is required and a
116 `DimensionUniverse` is available. Exceptions include cases where order
117 matters (and is different from the consistent ordering defined by the
118 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
119 required.
120 """
122 _serializedType = SerializedDimensionGraph
124 def __new__(
125 cls,
126 universe: DimensionUniverse,
127 dimensions: Optional[Iterable[Dimension]] = None,
128 names: Optional[Iterable[str]] = None,
129 conform: bool = True,
130 ) -> DimensionGraph:
131 conformedNames: Set[str]
132 if names is None:
133 if dimensions is None:
134 conformedNames = set()
135 else:
136 try:
137 # Optimize for NamedValueSet/NamedKeyDict, though that's
138 # not required.
139 conformedNames = set(dimensions.names) # type: ignore
140 except AttributeError:
141 conformedNames = set(d.name for d in dimensions)
142 else:
143 if dimensions is not None:
144 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
145 conformedNames = set(names)
146 if conform:
147 universe.expandDimensionNameSet(conformedNames)
148 # Look in the cache of existing graphs, with the expanded set of names.
149 cacheKey = frozenset(conformedNames)
150 self = universe._cache.get(cacheKey, None)
151 if self is not None:
152 return self
153 # This is apparently a new graph. Create it, and add it to the cache.
154 self = super().__new__(cls)
155 universe._cache[cacheKey] = self
156 self.universe = universe
157 # Reorder dimensions by iterating over the universe (which is
158 # ordered already) and extracting the ones in the set.
159 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
160 # Make a set that includes both the dimensions and any
161 # DimensionElements whose dependencies are in self.dimensions.
162 self.elements = NamedValueSet(
163 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names
164 ).freeze()
165 self._finish()
166 return self
168 def _finish(self) -> None:
169 # Make a set containing just the governor dimensions in this graph.
170 # Need local import to avoid cycle.
171 from ._governor import GovernorDimension
173 self.governors = NamedValueSet(
174 d for d in self.dimensions if isinstance(d, GovernorDimension)
175 ).freeze()
176 # Split dependencies up into "required" and "implied" subsets.
177 # Note that a dimension may be required in one graph and implied in
178 # another.
179 required: NamedValueSet[Dimension] = NamedValueSet()
180 implied: NamedValueSet[Dimension] = NamedValueSet()
181 for i1, dim1 in enumerate(self.dimensions):
182 for i2, dim2 in enumerate(self.dimensions):
183 if dim1.name in dim2.implied.names:
184 implied.add(dim1)
185 break
186 else:
187 # If no other dimension implies dim1, it's required.
188 required.add(dim1)
189 self.required = required.freeze()
190 self.implied = implied.freeze()
192 self.topology = MappingProxyType(
193 {
194 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
195 for space in TopologicalSpace.__members__.values()
196 }
197 )
199 # Build mappings from dimension to index; this is really for
200 # DataCoordinate, but we put it in DimensionGraph because many
201 # (many!) DataCoordinates will share the same DimensionGraph, and
202 # we want them to be lightweight. The order here is what's convenient
203 # for DataCoordinate: all required dimensions before all implied
204 # dimensions.
205 self._dataCoordinateIndices: Dict[str, int] = {
206 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
207 }
209 def __getnewargs__(self) -> tuple:
210 return (self.universe, None, tuple(self.dimensions.names), False)
212 def __deepcopy__(self, memo: dict) -> DimensionGraph:
213 # DimensionGraph is recursively immutable; see note in @immutable
214 # decorator.
215 return self
217 @property
218 def names(self) -> AbstractSet[str]:
219 """Set of the names of all dimensions in the graph (`KeysView`)."""
220 return self.dimensions.names
222 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
223 """Convert this class to a simple python type.
225 This type is suitable for serialization.
227 Parameters
228 ----------
229 minimal : `bool`, optional
230 Use minimal serialization. Has no effect on for this class.
232 Returns
233 -------
234 names : `list`
235 The names of the dimensions.
236 """
237 # Names are all we can serialize.
238 return SerializedDimensionGraph(names=list(self.names))
240 @classmethod
241 def from_simple(
242 cls,
243 names: SerializedDimensionGraph,
244 universe: Optional[DimensionUniverse] = None,
245 registry: Optional[Registry] = None,
246 ) -> DimensionGraph:
247 """Construct a new object from the simplified form.
249 This is assumed to support data data returned from the `to_simple`
250 method.
252 Parameters
253 ----------
254 names : `list` of `str`
255 The names of the dimensions.
256 universe : `DimensionUniverse`
257 The special graph of all known dimensions of which this graph will
258 be a subset. Can be `None` if `Registry` is provided.
259 registry : `lsst.daf.butler.Registry`, optional
260 Registry from which a universe can be extracted. Can be `None`
261 if universe is provided explicitly.
263 Returns
264 -------
265 graph : `DimensionGraph`
266 Newly-constructed object.
267 """
268 if universe is None and registry is None:
269 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
270 if universe is None and registry is not None:
271 universe = registry.dimensions
272 if universe is None:
273 # this is for mypy
274 raise ValueError("Unable to determine a usable universe")
276 return cls(names=names.names, universe=universe)
278 to_json = to_json_pydantic
279 from_json: ClassVar = classmethod(from_json_pydantic)
281 def __iter__(self) -> Iterator[Dimension]:
282 """Iterate over all dimensions in the graph.
284 (and true `Dimension` instances only).
285 """
286 return iter(self.dimensions)
288 def __len__(self) -> int:
289 """Return the number of dimensions in the graph.
291 (and true `Dimension` instances only).
292 """
293 return len(self.dimensions)
295 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
296 """Return `True` if the given element or element name is in the graph.
298 This test covers all `DimensionElement` instances in ``self.elements``,
299 not just true `Dimension` instances).
300 """
301 return element in self.elements
303 def __getitem__(self, name: str) -> DimensionElement:
304 """Return the element with the given name.
306 This lookup covers all `DimensionElement` instances in
307 ``self.elements``, not just true `Dimension` instances).
308 """
309 return self.elements[name]
311 def get(self, name: str, default: Any = None) -> DimensionElement:
312 """Return the element with the given name.
314 This lookup covers all `DimensionElement` instances in
315 ``self.elements``, not just true `Dimension` instances).
316 """
317 return self.elements.get(name, default)
319 def __str__(self) -> str:
320 return str(self.dimensions)
322 def __repr__(self) -> str:
323 return f"DimensionGraph({str(self)})"
325 def isdisjoint(self, other: DimensionGraph) -> bool:
326 """Test whether the intersection of two graphs is empty.
328 Returns `True` if either operand is the empty.
329 """
330 return self.dimensions.isdisjoint(other.dimensions)
332 def issubset(self, other: DimensionGraph) -> bool:
333 """Test whether all dimensions in ``self`` are also in ``other``.
335 Returns `True` if ``self`` is empty.
336 """
337 return self.dimensions <= other.dimensions
339 def issuperset(self, other: DimensionGraph) -> bool:
340 """Test whether all dimensions in ``other`` are also in ``self``.
342 Returns `True` if ``other`` is empty.
343 """
344 return self.dimensions >= other.dimensions
346 def __eq__(self, other: Any) -> bool:
347 """Test the arguments have exactly the same dimensions & elements."""
348 if isinstance(other, DimensionGraph):
349 return self.dimensions == other.dimensions
350 else:
351 return False
353 def __hash__(self) -> int:
354 return hash(tuple(self.dimensions.names))
356 def __le__(self, other: DimensionGraph) -> bool:
357 """Test whether ``self`` is a subset of ``other``."""
358 return self.dimensions <= other.dimensions
360 def __ge__(self, other: DimensionGraph) -> bool:
361 """Test whether ``self`` is a superset of ``other``."""
362 return self.dimensions >= other.dimensions
364 def __lt__(self, other: DimensionGraph) -> bool:
365 """Test whether ``self`` is a strict subset of ``other``."""
366 return self.dimensions < other.dimensions
368 def __gt__(self, other: DimensionGraph) -> bool:
369 """Test whether ``self`` is a strict superset of ``other``."""
370 return self.dimensions > other.dimensions
372 def union(self, *others: DimensionGraph) -> DimensionGraph:
373 """Construct a new graph with all dimensions in any of the operands.
375 The elements of the returned graph may exceed the naive union of
376 their elements, as some `DimensionElement` instances are included
377 in graphs whenever multiple dimensions are present, and those
378 dependency dimensions could have been provided by different operands.
379 """
380 names = set(self.names).union(*[other.names for other in others])
381 return DimensionGraph(self.universe, names=names)
383 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
384 """Construct a new graph with only dimensions in all of the operands.
386 See also `union`.
387 """
388 names = set(self.names).intersection(*[other.names for other in others])
389 return DimensionGraph(self.universe, names=names)
391 def __or__(self, other: DimensionGraph) -> DimensionGraph:
392 """Construct a new graph with all dimensions in any of the operands.
394 See `union`.
395 """
396 return self.union(other)
398 def __and__(self, other: DimensionGraph) -> DimensionGraph:
399 """Construct a new graph with only dimensions in all of the operands.
401 See `intersection`.
402 """
403 return self.intersection(other)
405 @property
406 @cached_getter
407 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
408 """Return a tuple of all elements in specific order.
410 The order allows records to be
411 found given their primary keys, starting from only the primary keys of
412 required dimensions (`tuple` [ `DimensionRecord` ]).
414 Unlike the table definition/topological order (which is what
415 DimensionUniverse.sorted gives you), when dimension A implies
416 dimension B, dimension A appears first.
417 """
418 done: Set[str] = set()
419 order = []
421 def addToOrder(element: DimensionElement) -> None:
422 if element.name in done:
423 return
424 predecessors = set(element.required.names)
425 predecessors.discard(element.name)
426 if not done.issuperset(predecessors):
427 return
428 order.append(element)
429 done.add(element.name)
430 for other in element.implied:
431 addToOrder(other)
433 while not done.issuperset(self.required):
434 for dimension in self.required:
435 addToOrder(dimension)
437 order.extend(element for element in self.elements if element.name not in done)
438 return tuple(order)
440 @property
441 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
442 """Families represented by the spatial elements in this graph."""
443 return self.topology[TopologicalSpace.SPATIAL]
445 @property
446 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
447 """Families represented by the temporal elements in this graph."""
448 return self.topology[TopologicalSpace.TEMPORAL]
450 # Class attributes below are shadowed by instance attributes, and are
451 # present just to hold the docstrings for those instance attributes.
453 universe: DimensionUniverse
454 """The set of all known dimensions, of which this graph is a subset
455 (`DimensionUniverse`).
456 """
458 dimensions: NamedValueAbstractSet[Dimension]
459 """A true `~collections.abc.Set` of all true `Dimension` instances in the
460 graph (`NamedValueAbstractSet` of `Dimension`).
462 This is the set used for iteration, ``len()``, and most set-like operations
463 on `DimensionGraph` itself.
464 """
466 elements: NamedValueAbstractSet[DimensionElement]
467 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
468 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
469 `DimensionElement`).
471 This is the set used for dict-like lookups, including the ``in`` operator,
472 on `DimensionGraph` itself.
473 """
475 governors: NamedValueAbstractSet[GovernorDimension]
476 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
477 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
478 """
480 required: NamedValueAbstractSet[Dimension]
481 """The subset of `dimensions` whose elements must be directly identified
482 via their primary keys in a data ID in order to identify the rest of the
483 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
484 """
486 implied: NamedValueAbstractSet[Dimension]
487 """The subset of `dimensions` whose elements need not be directly
488 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
489 `Dimension`).
490 """
492 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
493 """Families of elements in this graph that can participate in topological
494 relationships (`Mapping` from `TopologicalSpace` to
495 `NamedValueAbstractSet` of `TopologicalFamily`).
496 """