Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 33%
170 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-23 02:26 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-23 02:26 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph", "SerializedDimensionGraph"]
26import itertools
27from types import MappingProxyType
28from typing import (
29 TYPE_CHECKING,
30 AbstractSet,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Set,
39 Tuple,
40 Union,
41)
43from lsst.utils.classes import cached_getter, immutable
44from pydantic import BaseModel
46from .._topology import TopologicalFamily, TopologicalSpace
47from ..json import from_json_pydantic, to_json_pydantic
48from ..named import NamedValueAbstractSet, NamedValueSet
50if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from ...registry import Registry
52 from ._elements import Dimension, DimensionElement
53 from ._governor import GovernorDimension
54 from ._universe import DimensionUniverse
57class SerializedDimensionGraph(BaseModel):
58 """Simplified model of a `DimensionGraph` suitable for serialization."""
60 names: List[str]
62 @classmethod
63 def direct(cls, *, names: List[str]) -> SerializedDimensionGraph:
64 """Construct a `SerializedDimensionGraph` directly without validators.
66 This differs from the pydantic "construct" method in that the arguments
67 are explicitly what the model requires, and it will recurse through
68 members, constructing them from their corresponding `direct` methods.
70 This method should only be called when the inputs are trusted.
71 """
72 node = SerializedDimensionGraph.__new__(cls)
73 object.__setattr__(node, "names", names)
74 object.__setattr__(node, "__fields_set__", {"names"})
75 return node
78@immutable
79class DimensionGraph:
80 """An immutable, dependency-complete collection of dimensions.
82 `DimensionGraph` behaves in many respects like a set of `Dimension`
83 instances that maintains several special subsets and supersets of
84 related `DimensionElement` instances. It does not fully implement the
85 `collections.abc.Set` interface, as its automatic expansion of dependencies
86 would make set difference and XOR operations behave surprisingly.
88 It also provides dict-like lookup of `DimensionElement` instances from
89 their names.
91 Parameters
92 ----------
93 universe : `DimensionUniverse`
94 The special graph of all known dimensions of which this graph will be
95 a subset.
96 dimensions : iterable of `Dimension`, optional
97 An iterable of `Dimension` instances that must be included in the
98 graph. All (recursive) dependencies of these dimensions will also
99 be included. At most one of ``dimensions`` and ``names`` must be
100 provided.
101 names : iterable of `str`, optional
102 An iterable of the names of dimensions that must be included in the
103 graph. All (recursive) dependencies of these dimensions will also
104 be included. At most one of ``dimensions`` and ``names`` must be
105 provided.
106 conform : `bool`, optional
107 If `True` (default), expand to include dependencies. `False` should
108 only be used for callers that can guarantee that other arguments are
109 already correctly expanded, and is primarily for internal use.
111 Notes
112 -----
113 `DimensionGraph` should be used instead of other collections in most
114 contexts where a collection of dimensions is required and a
115 `DimensionUniverse` is available. Exceptions include cases where order
116 matters (and is different from the consistent ordering defined by the
117 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
118 required.
119 """
121 _serializedType = SerializedDimensionGraph
123 def __new__(
124 cls,
125 universe: DimensionUniverse,
126 dimensions: Optional[Iterable[Dimension]] = None,
127 names: Optional[Iterable[str]] = None,
128 conform: bool = True,
129 ) -> DimensionGraph:
130 conformedNames: Set[str]
131 if names is None:
132 if dimensions is None:
133 conformedNames = set()
134 else:
135 try:
136 # Optimize for NamedValueSet/NamedKeyDict, though that's
137 # not required.
138 conformedNames = set(dimensions.names) # type: ignore
139 except AttributeError:
140 conformedNames = set(d.name for d in dimensions)
141 else:
142 if dimensions is not None:
143 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
144 conformedNames = set(names)
145 if conform:
146 universe.expandDimensionNameSet(conformedNames)
147 # Look in the cache of existing graphs, with the expanded set of names.
148 cacheKey = frozenset(conformedNames)
149 self = universe._cache.get(cacheKey, None)
150 if self is not None:
151 return self
152 # This is apparently a new graph. Create it, and add it to the cache.
153 self = super().__new__(cls)
154 universe._cache[cacheKey] = self
155 self.universe = universe
156 # Reorder dimensions by iterating over the universe (which is
157 # ordered already) and extracting the ones in the set.
158 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
159 # Make a set that includes both the dimensions and any
160 # DimensionElements whose dependencies are in self.dimensions.
161 self.elements = NamedValueSet(
162 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names
163 ).freeze()
164 self._finish()
165 return self
167 def _finish(self) -> None:
168 # Make a set containing just the governor dimensions in this graph.
169 # Need local import to avoid cycle.
170 from ._governor import GovernorDimension
172 self.governors = NamedValueSet(
173 d for d in self.dimensions if isinstance(d, GovernorDimension)
174 ).freeze()
175 # Split dependencies up into "required" and "implied" subsets.
176 # Note that a dimension may be required in one graph and implied in
177 # another.
178 required: NamedValueSet[Dimension] = NamedValueSet()
179 implied: NamedValueSet[Dimension] = NamedValueSet()
180 for i1, dim1 in enumerate(self.dimensions):
181 for i2, dim2 in enumerate(self.dimensions):
182 if dim1.name in dim2.implied.names:
183 implied.add(dim1)
184 break
185 else:
186 # If no other dimension implies dim1, it's required.
187 required.add(dim1)
188 self.required = required.freeze()
189 self.implied = implied.freeze()
191 self.topology = MappingProxyType(
192 {
193 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
194 for space in TopologicalSpace.__members__.values()
195 }
196 )
198 # Build mappings from dimension to index; this is really for
199 # DataCoordinate, but we put it in DimensionGraph because many
200 # (many!) DataCoordinates will share the same DimensionGraph, and
201 # we want them to be lightweight. The order here is what's convenient
202 # for DataCoordinate: all required dimensions before all implied
203 # dimensions.
204 self._dataCoordinateIndices: Dict[str, int] = {
205 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
206 }
208 def __getnewargs__(self) -> tuple:
209 return (self.universe, None, tuple(self.dimensions.names), False)
211 def __deepcopy__(self, memo: dict) -> DimensionGraph:
212 # DimensionGraph is recursively immutable; see note in @immutable
213 # decorator.
214 return self
216 @property
217 def names(self) -> AbstractSet[str]:
218 """Set of the names of all dimensions in the graph (`KeysView`)."""
219 return self.dimensions.names
221 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
222 """Convert this class to a simple python type.
224 This type is suitable for serialization.
226 Parameters
227 ----------
228 minimal : `bool`, optional
229 Use minimal serialization. Has no effect on for this class.
231 Returns
232 -------
233 names : `list`
234 The names of the dimensions.
235 """
236 # Names are all we can serialize.
237 return SerializedDimensionGraph(names=list(self.names))
239 @classmethod
240 def from_simple(
241 cls,
242 names: SerializedDimensionGraph,
243 universe: Optional[DimensionUniverse] = None,
244 registry: Optional[Registry] = None,
245 ) -> DimensionGraph:
246 """Construct a new object from the simplified form.
248 This is assumed to support data data returned from the `to_simple`
249 method.
251 Parameters
252 ----------
253 names : `list` of `str`
254 The names of the dimensions.
255 universe : `DimensionUniverse`
256 The special graph of all known dimensions of which this graph will
257 be a subset. Can be `None` if `Registry` is provided.
258 registry : `lsst.daf.butler.Registry`, optional
259 Registry from which a universe can be extracted. Can be `None`
260 if universe is provided explicitly.
262 Returns
263 -------
264 graph : `DimensionGraph`
265 Newly-constructed object.
266 """
267 if universe is None and registry is None:
268 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
269 if universe is None and registry is not None:
270 universe = registry.dimensions
271 if universe is None:
272 # this is for mypy
273 raise ValueError("Unable to determine a usable universe")
275 return cls(names=names.names, universe=universe)
277 to_json = to_json_pydantic
278 from_json = classmethod(from_json_pydantic)
280 def __iter__(self) -> Iterator[Dimension]:
281 """Iterate over all dimensions in the graph.
283 (and true `Dimension` instances only).
284 """
285 return iter(self.dimensions)
287 def __len__(self) -> int:
288 """Return the number of dimensions in the graph.
290 (and true `Dimension` instances only).
291 """
292 return len(self.dimensions)
294 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
295 """Return `True` if the given element or element name is in the graph.
297 This test covers all `DimensionElement` instances in ``self.elements``,
298 not just true `Dimension` instances).
299 """
300 return element in self.elements
302 def __getitem__(self, name: str) -> DimensionElement:
303 """Return the element with the given name.
305 This lookup covers all `DimensionElement` instances in
306 ``self.elements``, not just true `Dimension` instances).
307 """
308 return self.elements[name]
310 def get(self, name: str, default: Any = None) -> DimensionElement:
311 """Return the element with the given name.
313 This lookup covers all `DimensionElement` instances in
314 ``self.elements``, not just true `Dimension` instances).
315 """
316 return self.elements.get(name, default)
318 def __str__(self) -> str:
319 return str(self.dimensions)
321 def __repr__(self) -> str:
322 return f"DimensionGraph({str(self)})"
324 def isdisjoint(self, other: DimensionGraph) -> bool:
325 """Test whether the intersection of two graphs is empty.
327 Returns `True` if either operand is the empty.
328 """
329 return self.dimensions.isdisjoint(other.dimensions)
331 def issubset(self, other: DimensionGraph) -> bool:
332 """Test whether all dimensions in ``self`` are also in ``other``.
334 Returns `True` if ``self`` is empty.
335 """
336 return self.dimensions <= other.dimensions
338 def issuperset(self, other: DimensionGraph) -> bool:
339 """Test whether all dimensions in ``other`` are also in ``self``.
341 Returns `True` if ``other`` is empty.
342 """
343 return self.dimensions >= other.dimensions
345 def __eq__(self, other: Any) -> bool:
346 """Test the arguments have exactly the same dimensions & elements."""
347 if isinstance(other, DimensionGraph):
348 return self.dimensions == other.dimensions
349 else:
350 return False
352 def __hash__(self) -> int:
353 return hash(tuple(self.dimensions.names))
355 def __le__(self, other: DimensionGraph) -> bool:
356 """Test whether ``self`` is a subset of ``other``."""
357 return self.dimensions <= other.dimensions
359 def __ge__(self, other: DimensionGraph) -> bool:
360 """Test whether ``self`` is a superset of ``other``."""
361 return self.dimensions >= other.dimensions
363 def __lt__(self, other: DimensionGraph) -> bool:
364 """Test whether ``self`` is a strict subset of ``other``."""
365 return self.dimensions < other.dimensions
367 def __gt__(self, other: DimensionGraph) -> bool:
368 """Test whether ``self`` is a strict superset of ``other``."""
369 return self.dimensions > other.dimensions
371 def union(self, *others: DimensionGraph) -> DimensionGraph:
372 """Construct a new graph with all dimensions in any of the operands.
374 The elements of the returned graph may exceed the naive union of
375 their elements, as some `DimensionElement` instances are included
376 in graphs whenever multiple dimensions are present, and those
377 dependency dimensions could have been provided by different operands.
378 """
379 names = set(self.names).union(*[other.names for other in others])
380 return DimensionGraph(self.universe, names=names)
382 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
383 """Construct a new graph with only dimensions in all of the operands.
385 See also `union`.
386 """
387 names = set(self.names).intersection(*[other.names for other in others])
388 return DimensionGraph(self.universe, names=names)
390 def __or__(self, other: DimensionGraph) -> DimensionGraph:
391 """Construct a new graph with all dimensions in any of the operands.
393 See `union`.
394 """
395 return self.union(other)
397 def __and__(self, other: DimensionGraph) -> DimensionGraph:
398 """Construct a new graph with only dimensions in all of the operands.
400 See `intersection`.
401 """
402 return self.intersection(other)
404 @property # type: ignore
405 @cached_getter
406 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
407 """Return a tuple of all elements in specific order.
409 The order allows records to be
410 found given their primary keys, starting from only the primary keys of
411 required dimensions (`tuple` [ `DimensionRecord` ]).
413 Unlike the table definition/topological order (which is what
414 DimensionUniverse.sorted gives you), when dimension A implies
415 dimension B, dimension A appears first.
416 """
417 done: Set[str] = set()
418 order = []
420 def addToOrder(element: DimensionElement) -> None:
421 if element.name in done:
422 return
423 predecessors = set(element.required.names)
424 predecessors.discard(element.name)
425 if not done.issuperset(predecessors):
426 return
427 order.append(element)
428 done.add(element.name)
429 for other in element.implied:
430 addToOrder(other)
432 while not done.issuperset(self.required):
433 for dimension in self.required:
434 addToOrder(dimension)
436 order.extend(element for element in self.elements if element.name not in done)
437 return tuple(order)
439 @property
440 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
441 """Families represented by the spatial elements in this graph."""
442 return self.topology[TopologicalSpace.SPATIAL]
444 @property
445 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
446 """Families represented by the temporal elements in this graph."""
447 return self.topology[TopologicalSpace.TEMPORAL]
449 # Class attributes below are shadowed by instance attributes, and are
450 # present just to hold the docstrings for those instance attributes.
452 universe: DimensionUniverse
453 """The set of all known dimensions, of which this graph is a subset
454 (`DimensionUniverse`).
455 """
457 dimensions: NamedValueAbstractSet[Dimension]
458 """A true `~collections.abc.Set` of all true `Dimension` instances in the
459 graph (`NamedValueAbstractSet` of `Dimension`).
461 This is the set used for iteration, ``len()``, and most set-like operations
462 on `DimensionGraph` itself.
463 """
465 elements: NamedValueAbstractSet[DimensionElement]
466 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
467 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
468 `DimensionElement`).
470 This is the set used for dict-like lookups, including the ``in`` operator,
471 on `DimensionGraph` itself.
472 """
474 governors: NamedValueAbstractSet[GovernorDimension]
475 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
476 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
477 """
479 required: NamedValueAbstractSet[Dimension]
480 """The subset of `dimensions` whose elements must be directly identified
481 via their primary keys in a data ID in order to identify the rest of the
482 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
483 """
485 implied: NamedValueAbstractSet[Dimension]
486 """The subset of `dimensions` whose elements need not be directly
487 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
488 `Dimension`).
489 """
491 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
492 """Families of elements in this graph that can participate in topological
493 relationships (`Mapping` from `TopologicalSpace` to
494 `NamedValueAbstractSet` of `TopologicalFamily`).
495 """