Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 38%
169 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph", "SerializedDimensionGraph"]
26import itertools
27from collections.abc import Iterable, Iterator, Mapping, Set
28from types import MappingProxyType
29from typing import TYPE_CHECKING, Any, ClassVar
31from lsst.utils.classes import cached_getter, immutable
33try:
34 from pydantic.v1 import BaseModel
35except ModuleNotFoundError:
36 from pydantic import BaseModel # type: ignore
38from .._topology import TopologicalFamily, TopologicalSpace
39from ..json import from_json_pydantic, to_json_pydantic
40from ..named import NamedValueAbstractSet, NamedValueSet
42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
43 from ...registry import Registry
44 from ._elements import Dimension, DimensionElement
45 from ._governor import GovernorDimension
46 from ._universe import DimensionUniverse
49class SerializedDimensionGraph(BaseModel):
50 """Simplified model of a `DimensionGraph` suitable for serialization."""
52 names: list[str]
54 @classmethod
55 def direct(cls, *, names: list[str]) -> SerializedDimensionGraph:
56 """Construct a `SerializedDimensionGraph` directly without validators.
58 This differs from the pydantic "construct" method in that the arguments
59 are explicitly what the model requires, and it will recurse through
60 members, constructing them from their corresponding `direct` methods.
62 This method should only be called when the inputs are trusted.
63 """
64 node = SerializedDimensionGraph.__new__(cls)
65 object.__setattr__(node, "names", names)
66 object.__setattr__(node, "__fields_set__", {"names"})
67 return node
70@immutable
71class DimensionGraph:
72 """An immutable, dependency-complete collection of dimensions.
74 `DimensionGraph` behaves in many respects like a set of `Dimension`
75 instances that maintains several special subsets and supersets of
76 related `DimensionElement` instances. It does not fully implement the
77 `collections.abc.Set` interface, as its automatic expansion of dependencies
78 would make set difference and XOR operations behave surprisingly.
80 It also provides dict-like lookup of `DimensionElement` instances from
81 their names.
83 Parameters
84 ----------
85 universe : `DimensionUniverse`
86 The special graph of all known dimensions of which this graph will be
87 a subset.
88 dimensions : iterable of `Dimension`, optional
89 An iterable of `Dimension` instances that must be included in the
90 graph. All (recursive) dependencies of these dimensions will also
91 be included. At most one of ``dimensions`` and ``names`` must be
92 provided.
93 names : iterable of `str`, optional
94 An iterable of the names of dimensions that must be included in the
95 graph. All (recursive) dependencies of these dimensions will also
96 be included. At most one of ``dimensions`` and ``names`` must be
97 provided.
98 conform : `bool`, optional
99 If `True` (default), expand to include dependencies. `False` should
100 only be used for callers that can guarantee that other arguments are
101 already correctly expanded, and is primarily for internal use.
103 Notes
104 -----
105 `DimensionGraph` should be used instead of other collections in most
106 contexts where a collection of dimensions is required and a
107 `DimensionUniverse` is available. Exceptions include cases where order
108 matters (and is different from the consistent ordering defined by the
109 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
110 required.
111 """
113 _serializedType = SerializedDimensionGraph
115 def __new__(
116 cls,
117 universe: DimensionUniverse,
118 dimensions: Iterable[Dimension] | None = None,
119 names: Iterable[str] | None = None,
120 conform: bool = True,
121 ) -> DimensionGraph:
122 conformedNames: set[str]
123 if names is None:
124 if dimensions is None:
125 conformedNames = set()
126 else:
127 try:
128 # Optimize for NamedValueSet/NamedKeyDict, though that's
129 # not required.
130 conformedNames = set(dimensions.names) # type: ignore
131 except AttributeError:
132 conformedNames = {d.name for d in dimensions}
133 else:
134 if dimensions is not None:
135 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
136 conformedNames = set(names)
137 if conform:
138 universe.expandDimensionNameSet(conformedNames)
139 # Look in the cache of existing graphs, with the expanded set of names.
140 cacheKey = frozenset(conformedNames)
141 self = universe._cache.get(cacheKey, None)
142 if self is not None:
143 return self
144 # This is apparently a new graph. Create it, and add it to the cache.
145 self = super().__new__(cls)
146 universe._cache[cacheKey] = self
147 self.universe = universe
148 # Reorder dimensions by iterating over the universe (which is
149 # ordered already) and extracting the ones in the set.
150 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
151 # Make a set that includes both the dimensions and any
152 # DimensionElements whose dependencies are in self.dimensions.
153 self.elements = NamedValueSet(
154 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names
155 ).freeze()
156 self._finish()
157 return self
159 def _finish(self) -> None:
160 # Make a set containing just the governor dimensions in this graph.
161 # Need local import to avoid cycle.
162 from ._governor import GovernorDimension
164 self.governors = NamedValueSet(
165 d for d in self.dimensions if isinstance(d, GovernorDimension)
166 ).freeze()
167 # Split dependencies up into "required" and "implied" subsets.
168 # Note that a dimension may be required in one graph and implied in
169 # another.
170 required: NamedValueSet[Dimension] = NamedValueSet()
171 implied: NamedValueSet[Dimension] = NamedValueSet()
172 for i1, dim1 in enumerate(self.dimensions):
173 for i2, dim2 in enumerate(self.dimensions):
174 if dim1.name in dim2.implied.names:
175 implied.add(dim1)
176 break
177 else:
178 # If no other dimension implies dim1, it's required.
179 required.add(dim1)
180 self.required = required.freeze()
181 self.implied = implied.freeze()
183 self.topology = MappingProxyType(
184 {
185 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
186 for space in TopologicalSpace.__members__.values()
187 }
188 )
190 # Build mappings from dimension to index; this is really for
191 # DataCoordinate, but we put it in DimensionGraph because many
192 # (many!) DataCoordinates will share the same DimensionGraph, and
193 # we want them to be lightweight. The order here is what's convenient
194 # for DataCoordinate: all required dimensions before all implied
195 # dimensions.
196 self._dataCoordinateIndices: dict[str, int] = {
197 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
198 }
200 def __getnewargs__(self) -> tuple:
201 return (self.universe, None, tuple(self.dimensions.names), False)
203 def __deepcopy__(self, memo: dict) -> DimensionGraph:
204 # DimensionGraph is recursively immutable; see note in @immutable
205 # decorator.
206 return self
208 @property
209 def names(self) -> Set[str]:
210 """Set of the names of all dimensions in the graph (`KeysView`)."""
211 return self.dimensions.names
213 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
214 """Convert this class to a simple python type.
216 This type is suitable for serialization.
218 Parameters
219 ----------
220 minimal : `bool`, optional
221 Use minimal serialization. Has no effect on for this class.
223 Returns
224 -------
225 names : `list`
226 The names of the dimensions.
227 """
228 # Names are all we can serialize.
229 return SerializedDimensionGraph(names=list(self.names))
231 @classmethod
232 def from_simple(
233 cls,
234 names: SerializedDimensionGraph,
235 universe: DimensionUniverse | None = None,
236 registry: Registry | None = None,
237 ) -> DimensionGraph:
238 """Construct a new object from the simplified form.
240 This is assumed to support data data returned from the `to_simple`
241 method.
243 Parameters
244 ----------
245 names : `list` of `str`
246 The names of the dimensions.
247 universe : `DimensionUniverse`
248 The special graph of all known dimensions of which this graph will
249 be a subset. Can be `None` if `Registry` is provided.
250 registry : `lsst.daf.butler.Registry`, optional
251 Registry from which a universe can be extracted. Can be `None`
252 if universe is provided explicitly.
254 Returns
255 -------
256 graph : `DimensionGraph`
257 Newly-constructed object.
258 """
259 if universe is None and registry is None:
260 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
261 if universe is None and registry is not None:
262 universe = registry.dimensions
263 if universe is None:
264 # this is for mypy
265 raise ValueError("Unable to determine a usable universe")
267 return cls(names=names.names, universe=universe)
269 to_json = to_json_pydantic
270 from_json: ClassVar = classmethod(from_json_pydantic)
272 def __iter__(self) -> Iterator[Dimension]:
273 """Iterate over all dimensions in the graph.
275 (and true `Dimension` instances only).
276 """
277 return iter(self.dimensions)
279 def __len__(self) -> int:
280 """Return the number of dimensions in the graph.
282 (and true `Dimension` instances only).
283 """
284 return len(self.dimensions)
286 def __contains__(self, element: str | DimensionElement) -> bool:
287 """Return `True` if the given element or element name is in the graph.
289 This test covers all `DimensionElement` instances in ``self.elements``,
290 not just true `Dimension` instances).
291 """
292 return element in self.elements
294 def __getitem__(self, name: str) -> DimensionElement:
295 """Return the element with the given name.
297 This lookup covers all `DimensionElement` instances in
298 ``self.elements``, not just true `Dimension` instances).
299 """
300 return self.elements[name]
302 def get(self, name: str, default: Any = None) -> DimensionElement:
303 """Return the element with the given name.
305 This lookup covers all `DimensionElement` instances in
306 ``self.elements``, not just true `Dimension` instances).
307 """
308 return self.elements.get(name, default)
310 def __str__(self) -> str:
311 return str(self.dimensions)
313 def __repr__(self) -> str:
314 return f"DimensionGraph({str(self)})"
316 def isdisjoint(self, other: DimensionGraph) -> bool:
317 """Test whether the intersection of two graphs is empty.
319 Returns `True` if either operand is the empty.
320 """
321 return self.dimensions.isdisjoint(other.dimensions)
323 def issubset(self, other: DimensionGraph) -> bool:
324 """Test whether all dimensions in ``self`` are also in ``other``.
326 Returns `True` if ``self`` is empty.
327 """
328 return self.dimensions <= other.dimensions
330 def issuperset(self, other: DimensionGraph) -> bool:
331 """Test whether all dimensions in ``other`` are also in ``self``.
333 Returns `True` if ``other`` is empty.
334 """
335 return self.dimensions >= other.dimensions
337 def __eq__(self, other: Any) -> bool:
338 """Test the arguments have exactly the same dimensions & elements."""
339 if isinstance(other, DimensionGraph):
340 return self.dimensions == other.dimensions
341 else:
342 return False
344 def __hash__(self) -> int:
345 return hash(tuple(self.dimensions.names))
347 def __le__(self, other: DimensionGraph) -> bool:
348 """Test whether ``self`` is a subset of ``other``."""
349 return self.dimensions <= other.dimensions
351 def __ge__(self, other: DimensionGraph) -> bool:
352 """Test whether ``self`` is a superset of ``other``."""
353 return self.dimensions >= other.dimensions
355 def __lt__(self, other: DimensionGraph) -> bool:
356 """Test whether ``self`` is a strict subset of ``other``."""
357 return self.dimensions < other.dimensions
359 def __gt__(self, other: DimensionGraph) -> bool:
360 """Test whether ``self`` is a strict superset of ``other``."""
361 return self.dimensions > other.dimensions
363 def union(self, *others: DimensionGraph) -> DimensionGraph:
364 """Construct a new graph with all dimensions in any of the operands.
366 The elements of the returned graph may exceed the naive union of
367 their elements, as some `DimensionElement` instances are included
368 in graphs whenever multiple dimensions are present, and those
369 dependency dimensions could have been provided by different operands.
370 """
371 names = set(self.names).union(*[other.names for other in others])
372 return DimensionGraph(self.universe, names=names)
374 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
375 """Construct a new graph with only dimensions in all of the operands.
377 See also `union`.
378 """
379 names = set(self.names).intersection(*[other.names for other in others])
380 return DimensionGraph(self.universe, names=names)
382 def __or__(self, other: DimensionGraph) -> DimensionGraph:
383 """Construct a new graph with all dimensions in any of the operands.
385 See `union`.
386 """
387 return self.union(other)
389 def __and__(self, other: DimensionGraph) -> DimensionGraph:
390 """Construct a new graph with only dimensions in all of the operands.
392 See `intersection`.
393 """
394 return self.intersection(other)
396 @property
397 @cached_getter
398 def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]:
399 """Return a tuple of all elements in specific order.
401 The order allows records to be
402 found given their primary keys, starting from only the primary keys of
403 required dimensions (`tuple` [ `DimensionRecord` ]).
405 Unlike the table definition/topological order (which is what
406 DimensionUniverse.sorted gives you), when dimension A implies
407 dimension B, dimension A appears first.
408 """
409 done: set[str] = set()
410 order = []
412 def addToOrder(element: DimensionElement) -> None:
413 if element.name in done:
414 return
415 predecessors = set(element.required.names)
416 predecessors.discard(element.name)
417 if not done.issuperset(predecessors):
418 return
419 order.append(element)
420 done.add(element.name)
421 for other in element.implied:
422 addToOrder(other)
424 while not done.issuperset(self.required):
425 for dimension in self.required:
426 addToOrder(dimension)
428 order.extend(element for element in self.elements if element.name not in done)
429 return tuple(order)
431 @property
432 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
433 """Families represented by the spatial elements in this graph."""
434 return self.topology[TopologicalSpace.SPATIAL]
436 @property
437 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
438 """Families represented by the temporal elements in this graph."""
439 return self.topology[TopologicalSpace.TEMPORAL]
441 # Class attributes below are shadowed by instance attributes, and are
442 # present just to hold the docstrings for those instance attributes.
444 universe: DimensionUniverse
445 """The set of all known dimensions, of which this graph is a subset
446 (`DimensionUniverse`).
447 """
449 dimensions: NamedValueAbstractSet[Dimension]
450 """A true `~collections.abc.Set` of all true `Dimension` instances in the
451 graph (`NamedValueAbstractSet` of `Dimension`).
453 This is the set used for iteration, ``len()``, and most set-like operations
454 on `DimensionGraph` itself.
455 """
457 elements: NamedValueAbstractSet[DimensionElement]
458 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
459 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
460 `DimensionElement`).
462 This is the set used for dict-like lookups, including the ``in`` operator,
463 on `DimensionGraph` itself.
464 """
466 governors: NamedValueAbstractSet[GovernorDimension]
467 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
468 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
469 """
471 required: NamedValueAbstractSet[Dimension]
472 """The subset of `dimensions` whose elements must be directly identified
473 via their primary keys in a data ID in order to identify the rest of the
474 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
475 """
477 implied: NamedValueAbstractSet[Dimension]
478 """The subset of `dimensions` whose elements need not be directly
479 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
480 `Dimension`).
481 """
483 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
484 """Families of elements in this graph that can participate in topological
485 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to
486 `NamedValueAbstractSet` of `TopologicalFamily`).
487 """