Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 38%
163 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph", "SerializedDimensionGraph"]
26import itertools
27from collections.abc import Iterable, Iterator, Mapping, Set
28from types import MappingProxyType
29from typing import TYPE_CHECKING, Any, ClassVar
31from lsst.daf.butler._compat import _BaseModelCompat
32from lsst.utils.classes import cached_getter, immutable
34from .._topology import TopologicalFamily, TopologicalSpace
35from ..json import from_json_pydantic, to_json_pydantic
36from ..named import NamedValueAbstractSet, NamedValueSet
38if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
39 from ...registry import Registry
40 from ._elements import Dimension, DimensionElement
41 from ._governor import GovernorDimension
42 from ._universe import DimensionUniverse
45class SerializedDimensionGraph(_BaseModelCompat):
46 """Simplified model of a `DimensionGraph` suitable for serialization."""
48 names: list[str]
50 @classmethod
51 def direct(cls, *, names: list[str]) -> SerializedDimensionGraph:
52 """Construct a `SerializedDimensionGraph` directly without validators.
54 This differs from the pydantic "construct" method in that the arguments
55 are explicitly what the model requires, and it will recurse through
56 members, constructing them from their corresponding `direct` methods.
58 This method should only be called when the inputs are trusted.
59 """
60 return cls.model_construct(names=names)
63@immutable
64class DimensionGraph:
65 """An immutable, dependency-complete collection of dimensions.
67 `DimensionGraph` behaves in many respects like a set of `Dimension`
68 instances that maintains several special subsets and supersets of
69 related `DimensionElement` instances. It does not fully implement the
70 `collections.abc.Set` interface, as its automatic expansion of dependencies
71 would make set difference and XOR operations behave surprisingly.
73 It also provides dict-like lookup of `DimensionElement` instances from
74 their names.
76 Parameters
77 ----------
78 universe : `DimensionUniverse`
79 The special graph of all known dimensions of which this graph will be
80 a subset.
81 dimensions : iterable of `Dimension`, optional
82 An iterable of `Dimension` instances that must be included in the
83 graph. All (recursive) dependencies of these dimensions will also
84 be included. At most one of ``dimensions`` and ``names`` must be
85 provided.
86 names : iterable of `str`, optional
87 An iterable of the names of dimensions that must be included in the
88 graph. All (recursive) dependencies of these dimensions will also
89 be included. At most one of ``dimensions`` and ``names`` must be
90 provided.
91 conform : `bool`, optional
92 If `True` (default), expand to include dependencies. `False` should
93 only be used for callers that can guarantee that other arguments are
94 already correctly expanded, and is primarily for internal use.
96 Notes
97 -----
98 `DimensionGraph` should be used instead of other collections in most
99 contexts where a collection of dimensions is required and a
100 `DimensionUniverse` is available. Exceptions include cases where order
101 matters (and is different from the consistent ordering defined by the
102 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
103 required.
104 """
106 _serializedType = SerializedDimensionGraph
108 def __new__(
109 cls,
110 universe: DimensionUniverse,
111 dimensions: Iterable[Dimension] | None = None,
112 names: Iterable[str] | None = None,
113 conform: bool = True,
114 ) -> DimensionGraph:
115 conformedNames: set[str]
116 if names is None:
117 if dimensions is None:
118 conformedNames = set()
119 else:
120 try:
121 # Optimize for NamedValueSet/NamedKeyDict, though that's
122 # not required.
123 conformedNames = set(dimensions.names) # type: ignore
124 except AttributeError:
125 conformedNames = {d.name for d in dimensions}
126 else:
127 if dimensions is not None:
128 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
129 conformedNames = set(names)
130 if conform:
131 universe.expandDimensionNameSet(conformedNames)
132 # Look in the cache of existing graphs, with the expanded set of names.
133 cacheKey = frozenset(conformedNames)
134 self = universe._cache.get(cacheKey, None)
135 if self is not None:
136 return self
137 # This is apparently a new graph. Create it, and add it to the cache.
138 self = super().__new__(cls)
139 universe._cache[cacheKey] = self
140 self.universe = universe
141 # Reorder dimensions by iterating over the universe (which is
142 # ordered already) and extracting the ones in the set.
143 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
144 # Make a set that includes both the dimensions and any
145 # DimensionElements whose dependencies are in self.dimensions.
146 self.elements = NamedValueSet(
147 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names
148 ).freeze()
149 self._finish()
150 return self
152 def _finish(self) -> None:
153 # Make a set containing just the governor dimensions in this graph.
154 # Need local import to avoid cycle.
155 from ._governor import GovernorDimension
157 self.governors = NamedValueSet(
158 d for d in self.dimensions if isinstance(d, GovernorDimension)
159 ).freeze()
160 # Split dependencies up into "required" and "implied" subsets.
161 # Note that a dimension may be required in one graph and implied in
162 # another.
163 required: NamedValueSet[Dimension] = NamedValueSet()
164 implied: NamedValueSet[Dimension] = NamedValueSet()
165 for dim1 in self.dimensions:
166 for dim2 in self.dimensions:
167 if dim1.name in dim2.implied.names:
168 implied.add(dim1)
169 break
170 else:
171 # If no other dimension implies dim1, it's required.
172 required.add(dim1)
173 self.required = required.freeze()
174 self.implied = implied.freeze()
176 self.topology = MappingProxyType(
177 {
178 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
179 for space in TopologicalSpace.__members__.values()
180 }
181 )
183 # Build mappings from dimension to index; this is really for
184 # DataCoordinate, but we put it in DimensionGraph because many
185 # (many!) DataCoordinates will share the same DimensionGraph, and
186 # we want them to be lightweight. The order here is what's convenient
187 # for DataCoordinate: all required dimensions before all implied
188 # dimensions.
189 self._dataCoordinateIndices: dict[str, int] = {
190 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
191 }
193 def __getnewargs__(self) -> tuple:
194 return (self.universe, None, tuple(self.dimensions.names), False)
196 def __deepcopy__(self, memo: dict) -> DimensionGraph:
197 # DimensionGraph is recursively immutable; see note in @immutable
198 # decorator.
199 return self
201 @property
202 def names(self) -> Set[str]:
203 """Set of the names of all dimensions in the graph (`KeysView`)."""
204 return self.dimensions.names
206 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
207 """Convert this class to a simple python type.
209 This type is suitable for serialization.
211 Parameters
212 ----------
213 minimal : `bool`, optional
214 Use minimal serialization. Has no effect on for this class.
216 Returns
217 -------
218 names : `list`
219 The names of the dimensions.
220 """
221 # Names are all we can serialize.
222 return SerializedDimensionGraph(names=list(self.names))
224 @classmethod
225 def from_simple(
226 cls,
227 names: SerializedDimensionGraph,
228 universe: DimensionUniverse | None = None,
229 registry: Registry | None = None,
230 ) -> DimensionGraph:
231 """Construct a new object from the simplified form.
233 This is assumed to support data data returned from the `to_simple`
234 method.
236 Parameters
237 ----------
238 names : `list` of `str`
239 The names of the dimensions.
240 universe : `DimensionUniverse`
241 The special graph of all known dimensions of which this graph will
242 be a subset. Can be `None` if `Registry` is provided.
243 registry : `lsst.daf.butler.Registry`, optional
244 Registry from which a universe can be extracted. Can be `None`
245 if universe is provided explicitly.
247 Returns
248 -------
249 graph : `DimensionGraph`
250 Newly-constructed object.
251 """
252 if universe is None and registry is None:
253 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
254 if universe is None and registry is not None:
255 universe = registry.dimensions
256 if universe is None:
257 # this is for mypy
258 raise ValueError("Unable to determine a usable universe")
260 return cls(names=names.names, universe=universe)
262 to_json = to_json_pydantic
263 from_json: ClassVar = classmethod(from_json_pydantic)
265 def __iter__(self) -> Iterator[Dimension]:
266 """Iterate over all dimensions in the graph.
268 (and true `Dimension` instances only).
269 """
270 return iter(self.dimensions)
272 def __len__(self) -> int:
273 """Return the number of dimensions in the graph.
275 (and true `Dimension` instances only).
276 """
277 return len(self.dimensions)
279 def __contains__(self, element: str | DimensionElement) -> bool:
280 """Return `True` if the given element or element name is in the graph.
282 This test covers all `DimensionElement` instances in ``self.elements``,
283 not just true `Dimension` instances).
284 """
285 return element in self.elements
287 def __getitem__(self, name: str) -> DimensionElement:
288 """Return the element with the given name.
290 This lookup covers all `DimensionElement` instances in
291 ``self.elements``, not just true `Dimension` instances).
292 """
293 return self.elements[name]
295 def get(self, name: str, default: Any = None) -> DimensionElement:
296 """Return the element with the given name.
298 This lookup covers all `DimensionElement` instances in
299 ``self.elements``, not just true `Dimension` instances).
300 """
301 return self.elements.get(name, default)
303 def __str__(self) -> str:
304 return str(self.dimensions)
306 def __repr__(self) -> str:
307 return f"DimensionGraph({str(self)})"
309 def isdisjoint(self, other: DimensionGraph) -> bool:
310 """Test whether the intersection of two graphs is empty.
312 Returns `True` if either operand is the empty.
313 """
314 return self.dimensions.isdisjoint(other.dimensions)
316 def issubset(self, other: DimensionGraph) -> bool:
317 """Test whether all dimensions in ``self`` are also in ``other``.
319 Returns `True` if ``self`` is empty.
320 """
321 return self.dimensions <= other.dimensions
323 def issuperset(self, other: DimensionGraph) -> bool:
324 """Test whether all dimensions in ``other`` are also in ``self``.
326 Returns `True` if ``other`` is empty.
327 """
328 return self.dimensions >= other.dimensions
330 def __eq__(self, other: Any) -> bool:
331 """Test the arguments have exactly the same dimensions & elements."""
332 if isinstance(other, DimensionGraph):
333 return self.dimensions == other.dimensions
334 else:
335 return False
337 def __hash__(self) -> int:
338 return hash(tuple(self.dimensions.names))
340 def __le__(self, other: DimensionGraph) -> bool:
341 """Test whether ``self`` is a subset of ``other``."""
342 return self.dimensions <= other.dimensions
344 def __ge__(self, other: DimensionGraph) -> bool:
345 """Test whether ``self`` is a superset of ``other``."""
346 return self.dimensions >= other.dimensions
348 def __lt__(self, other: DimensionGraph) -> bool:
349 """Test whether ``self`` is a strict subset of ``other``."""
350 return self.dimensions < other.dimensions
352 def __gt__(self, other: DimensionGraph) -> bool:
353 """Test whether ``self`` is a strict superset of ``other``."""
354 return self.dimensions > other.dimensions
356 def union(self, *others: DimensionGraph) -> DimensionGraph:
357 """Construct a new graph with all dimensions in any of the operands.
359 The elements of the returned graph may exceed the naive union of
360 their elements, as some `DimensionElement` instances are included
361 in graphs whenever multiple dimensions are present, and those
362 dependency dimensions could have been provided by different operands.
363 """
364 names = set(self.names).union(*[other.names for other in others])
365 return DimensionGraph(self.universe, names=names)
367 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
368 """Construct a new graph with only dimensions in all of the operands.
370 See also `union`.
371 """
372 names = set(self.names).intersection(*[other.names for other in others])
373 return DimensionGraph(self.universe, names=names)
375 def __or__(self, other: DimensionGraph) -> DimensionGraph:
376 """Construct a new graph with all dimensions in any of the operands.
378 See `union`.
379 """
380 return self.union(other)
382 def __and__(self, other: DimensionGraph) -> DimensionGraph:
383 """Construct a new graph with only dimensions in all of the operands.
385 See `intersection`.
386 """
387 return self.intersection(other)
389 @property
390 @cached_getter
391 def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]:
392 """Return a tuple of all elements in specific order.
394 The order allows records to be
395 found given their primary keys, starting from only the primary keys of
396 required dimensions (`tuple` [ `DimensionRecord` ]).
398 Unlike the table definition/topological order (which is what
399 DimensionUniverse.sorted gives you), when dimension A implies
400 dimension B, dimension A appears first.
401 """
402 done: set[str] = set()
403 order = []
405 def addToOrder(element: DimensionElement) -> None:
406 if element.name in done:
407 return
408 predecessors = set(element.required.names)
409 predecessors.discard(element.name)
410 if not done.issuperset(predecessors):
411 return
412 order.append(element)
413 done.add(element.name)
414 for other in element.implied:
415 addToOrder(other)
417 while not done.issuperset(self.required):
418 for dimension in self.required:
419 addToOrder(dimension)
421 order.extend(element for element in self.elements if element.name not in done)
422 return tuple(order)
424 @property
425 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
426 """Families represented by the spatial elements in this graph."""
427 return self.topology[TopologicalSpace.SPATIAL]
429 @property
430 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
431 """Families represented by the temporal elements in this graph."""
432 return self.topology[TopologicalSpace.TEMPORAL]
434 # Class attributes below are shadowed by instance attributes, and are
435 # present just to hold the docstrings for those instance attributes.
437 universe: DimensionUniverse
438 """The set of all known dimensions, of which this graph is a subset
439 (`DimensionUniverse`).
440 """
442 dimensions: NamedValueAbstractSet[Dimension]
443 """A true `~collections.abc.Set` of all true `Dimension` instances in the
444 graph (`NamedValueAbstractSet` of `Dimension`).
446 This is the set used for iteration, ``len()``, and most set-like operations
447 on `DimensionGraph` itself.
448 """
450 elements: NamedValueAbstractSet[DimensionElement]
451 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
452 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
453 `DimensionElement`).
455 This is the set used for dict-like lookups, including the ``in`` operator,
456 on `DimensionGraph` itself.
457 """
459 governors: NamedValueAbstractSet[GovernorDimension]
460 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
461 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
462 """
464 required: NamedValueAbstractSet[Dimension]
465 """The subset of `dimensions` whose elements must be directly identified
466 via their primary keys in a data ID in order to identify the rest of the
467 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
468 """
470 implied: NamedValueAbstractSet[Dimension]
471 """The subset of `dimensions` whose elements need not be directly
472 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
473 `Dimension`).
474 """
476 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
477 """Families of elements in this graph that can participate in topological
478 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to
479 `NamedValueAbstractSet` of `TopologicalFamily`).
480 """