Coverage for python/lsst/daf/butler/core/dimensions/_graph.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph", "SerializedDimensionGraph"]
26from pydantic import BaseModel
27import itertools
28from types import MappingProxyType
29from typing import (
30 AbstractSet,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Mapping,
37 Optional,
38 Set,
39 Tuple,
40 TYPE_CHECKING,
41 Union,
42)
44from ..named import NamedValueAbstractSet, NamedValueSet
45from ..utils import cached_getter, immutable
46from .._topology import TopologicalSpace, TopologicalFamily
47from ..json import from_json_pydantic, to_json_pydantic
49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from ._universe import DimensionUniverse
51 from ._elements import DimensionElement, Dimension
52 from ._governor import GovernorDimension
53 from ...registry import Registry
56class SerializedDimensionGraph(BaseModel):
57 """Simplified model of a `DimensionGraph` suitable for serialization."""
59 names: List[str]
62@immutable
63class DimensionGraph:
64 """An immutable, dependency-complete collection of dimensions.
66 `DimensionGraph` behaves in many respects like a set of `Dimension`
67 instances that maintains several special subsets and supersets of
68 related `DimensionElement` instances. It does not fully implement the
69 `collections.abc.Set` interface, as its automatic expansion of dependencies
70 would make set difference and XOR operations behave surprisingly.
72 It also provides dict-like lookup of `DimensionElement` instances from
73 their names.
75 Parameters
76 ----------
77 universe : `DimensionUniverse`
78 The special graph of all known dimensions of which this graph will be
79 a subset.
80 dimensions : iterable of `Dimension`, optional
81 An iterable of `Dimension` instances that must be included in the
82 graph. All (recursive) dependencies of these dimensions will also
83 be included. At most one of ``dimensions`` and ``names`` must be
84 provided.
85 names : iterable of `str`, optional
86 An iterable of the names of dimensiosn that must be included in the
87 graph. All (recursive) dependencies of these dimensions will also
88 be included. At most one of ``dimensions`` and ``names`` must be
89 provided.
90 conform : `bool`, optional
91 If `True` (default), expand to include dependencies. `False` should
92 only be used for callers that can guarantee that other arguments are
93 already correctly expanded, and is primarily for internal use.
95 Notes
96 -----
97 `DimensionGraph` should be used instead of other collections in most
98 contexts where a collection of dimensions is required and a
99 `DimensionUniverse` is available. Exceptions include cases where order
100 matters (and is different from the consistent ordering defined by the
101 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
102 required.
103 """
105 _serializedType = SerializedDimensionGraph
107 def __new__(
108 cls,
109 universe: DimensionUniverse,
110 dimensions: Optional[Iterable[Dimension]] = None,
111 names: Optional[Iterable[str]] = None,
112 conform: bool = True
113 ) -> DimensionGraph:
114 conformedNames: Set[str]
115 if names is None:
116 if dimensions is None:
117 conformedNames = set()
118 else:
119 try:
120 # Optimize for NamedValueSet/NamedKeyDict, though that's
121 # not required.
122 conformedNames = set(dimensions.names) # type: ignore
123 except AttributeError:
124 conformedNames = set(d.name for d in dimensions)
125 else:
126 if dimensions is not None:
127 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
128 conformedNames = set(names)
129 if conform:
130 universe.expandDimensionNameSet(conformedNames)
131 # Look in the cache of existing graphs, with the expanded set of names.
132 cacheKey = frozenset(conformedNames)
133 self = universe._cache.get(cacheKey, None)
134 if self is not None:
135 return self
136 # This is apparently a new graph. Create it, and add it to the cache.
137 self = super().__new__(cls)
138 universe._cache[cacheKey] = self
139 self.universe = universe
140 # Reorder dimensions by iterating over the universe (which is
141 # ordered already) and extracting the ones in the set.
142 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
143 # Make a set that includes both the dimensions and any
144 # DimensionElements whose dependencies are in self.dimensions.
145 self.elements = NamedValueSet(e for e in universe.getStaticElements()
146 if e.required.names <= self.dimensions.names).freeze()
147 self._finish()
148 return self
150 def _finish(self) -> None:
151 # Make a set containing just the governor dimensions in this graph.
152 # Need local import to avoid cycle.
153 from ._governor import GovernorDimension
154 self.governors = NamedValueSet(
155 d for d in self.dimensions if isinstance(d, GovernorDimension)
156 ).freeze()
157 # Split dependencies up into "required" and "implied" subsets.
158 # Note that a dimension may be required in one graph and implied in
159 # another.
160 required: NamedValueSet[Dimension] = NamedValueSet()
161 implied: NamedValueSet[Dimension] = NamedValueSet()
162 for i1, dim1 in enumerate(self.dimensions):
163 for i2, dim2 in enumerate(self.dimensions):
164 if dim1.name in dim2.implied.names:
165 implied.add(dim1)
166 break
167 else:
168 # If no other dimension implies dim1, it's required.
169 required.add(dim1)
170 self.required = required.freeze()
171 self.implied = implied.freeze()
173 self.topology = MappingProxyType({
174 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
175 for space in TopologicalSpace.__members__.values()
176 })
178 # Build mappings from dimension to index; this is really for
179 # DataCoordinate, but we put it in DimensionGraph because many
180 # (many!) DataCoordinates will share the same DimensionGraph, and
181 # we want them to be lightweight. The order here is what's convenient
182 # for DataCoordinate: all required dimensions before all implied
183 # dimensions.
184 self._dataCoordinateIndices: Dict[str, int] = {
185 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
186 }
188 def __getnewargs__(self) -> tuple:
189 return (self.universe, None, tuple(self.dimensions.names), False)
191 def __deepcopy__(self, memo: dict) -> DimensionGraph:
192 # DimensionGraph is recursively immutable; see note in @immutable
193 # decorator.
194 return self
196 @property
197 def names(self) -> AbstractSet[str]:
198 """Set of the names of all dimensions in the graph (`KeysView`)."""
199 return self.dimensions.names
201 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph:
202 """Convert this class to a simple python type.
204 This type is suitable for serialization.
206 Parameters
207 ----------
208 minimal : `bool`, optional
209 Use minimal serialization. Has no effect on for this class.
211 Returns
212 -------
213 names : `list`
214 The names of the dimensions.
215 """
216 # Names are all we can serialize.
217 return SerializedDimensionGraph(names=list(self.names))
219 @classmethod
220 def from_simple(cls, names: SerializedDimensionGraph,
221 universe: Optional[DimensionUniverse] = None,
222 registry: Optional[Registry] = None) -> DimensionGraph:
223 """Construct a new object from the simplified form.
225 This is assumed to support data data returned from the `to_simple`
226 method.
228 Parameters
229 ----------
230 names : `list` of `str`
231 The names of the dimensions.
232 universe : `DimensionUniverse`
233 The special graph of all known dimensions of which this graph will
234 be a subset. Can be `None` if `Registry` is provided.
235 registry : `lsst.daf.butler.Registry`, optional
236 Registry from which a universe can be extracted. Can be `None`
237 if universe is provided explicitly.
239 Returns
240 -------
241 graph : `DimensionGraph`
242 Newly-constructed object.
243 """
244 if universe is None and registry is None:
245 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
246 if universe is None and registry is not None:
247 universe = registry.dimensions
248 if universe is None:
249 # this is for mypy
250 raise ValueError("Unable to determine a usable universe")
252 return cls(names=names.names, universe=universe)
254 to_json = to_json_pydantic
255 from_json = classmethod(from_json_pydantic)
257 def __iter__(self) -> Iterator[Dimension]:
258 """Iterate over all dimensions in the graph.
260 (and true `Dimension` instances only).
261 """
262 return iter(self.dimensions)
264 def __len__(self) -> int:
265 """Return the number of dimensions in the graph.
267 (and true `Dimension` instances only).
268 """
269 return len(self.dimensions)
271 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
272 """Return `True` if the given element or element name is in the graph.
274 This test covers all `DimensionElement` instances in ``self.elements``,
275 not just true `Dimension` instances).
276 """
277 return element in self.elements
279 def __getitem__(self, name: str) -> DimensionElement:
280 """Return the element with the given name.
282 This lookup covers all `DimensionElement` instances in
283 ``self.elements``, not just true `Dimension` instances).
284 """
285 return self.elements[name]
287 def get(self, name: str, default: Any = None) -> DimensionElement:
288 """Return the element with the given name.
290 This lookup covers all `DimensionElement` instances in
291 ``self.elements``, not just true `Dimension` instances).
292 """
293 return self.elements.get(name, default)
295 def __str__(self) -> str:
296 return str(self.dimensions)
298 def __repr__(self) -> str:
299 return f"DimensionGraph({str(self)})"
301 def isdisjoint(self, other: DimensionGraph) -> bool:
302 """Test whether the intersection of two graphs is empty.
304 Returns `True` if either operand is the empty.
305 """
306 return self.dimensions.isdisjoint(other.dimensions)
308 def issubset(self, other: DimensionGraph) -> bool:
309 """Test whether all dimensions in ``self`` are also in ``other``.
311 Returns `True` if ``self`` is empty.
312 """
313 return self.dimensions <= other.dimensions
315 def issuperset(self, other: DimensionGraph) -> bool:
316 """Test whether all dimensions in ``other`` are also in ``self``.
318 Returns `True` if ``other`` is empty.
319 """
320 return self.dimensions >= other.dimensions
322 def __eq__(self, other: Any) -> bool:
323 """Test the arguments have exactly the same dimensions & elements."""
324 if isinstance(other, DimensionGraph):
325 return self.dimensions == other.dimensions
326 else:
327 return False
329 def __hash__(self) -> int:
330 return hash(tuple(self.dimensions.names))
332 def __le__(self, other: DimensionGraph) -> bool:
333 """Test whether ``self`` is a subset of ``other``."""
334 return self.dimensions <= other.dimensions
336 def __ge__(self, other: DimensionGraph) -> bool:
337 """Test whether ``self`` is a superset of ``other``."""
338 return self.dimensions >= other.dimensions
340 def __lt__(self, other: DimensionGraph) -> bool:
341 """Test whether ``self`` is a strict subset of ``other``."""
342 return self.dimensions < other.dimensions
344 def __gt__(self, other: DimensionGraph) -> bool:
345 """Test whether ``self`` is a strict superset of ``other``."""
346 return self.dimensions > other.dimensions
348 def union(self, *others: DimensionGraph) -> DimensionGraph:
349 """Construct a new graph with all dimensions in any of the operands.
351 The elements of the returned graph may exceed the naive union of
352 their elements, as some `DimensionElement` instances are included
353 in graphs whenever multiple dimensions are present, and those
354 dependency dimensions could have been provided by different operands.
355 """
356 names = set(self.names).union(*[other.names for other in others])
357 return DimensionGraph(self.universe, names=names)
359 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
360 """Construct a new graph with only dimensions in all of the operands.
362 See also `union`.
363 """
364 names = set(self.names).intersection(*[other.names for other in others])
365 return DimensionGraph(self.universe, names=names)
367 def __or__(self, other: DimensionGraph) -> DimensionGraph:
368 """Construct a new graph with all dimensions in any of the operands.
370 See `union`.
371 """
372 return self.union(other)
374 def __and__(self, other: DimensionGraph) -> DimensionGraph:
375 """Construct a new graph with only dimensions in all of the operands.
377 See `intersection`.
378 """
379 return self.intersection(other)
381 @property # type: ignore
382 @cached_getter
383 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
384 """Return a tuple of all elements in specific order.
386 The order allows records to be
387 found given their primary keys, starting from only the primary keys of
388 required dimensions (`tuple` [ `DimensionRecord` ]).
390 Unlike the table definition/topological order (which is what
391 DimensionUniverse.sorted gives you), when dimension A implies
392 dimension B, dimension A appears first.
393 """
394 done: Set[str] = set()
395 order = []
397 def addToOrder(element: DimensionElement) -> None:
398 if element.name in done:
399 return
400 predecessors = set(element.required.names)
401 predecessors.discard(element.name)
402 if not done.issuperset(predecessors):
403 return
404 order.append(element)
405 done.add(element.name)
406 for other in element.implied:
407 addToOrder(other)
409 while not done.issuperset(self.required):
410 for dimension in self.required:
411 addToOrder(dimension)
413 order.extend(element for element in self.elements if element.name not in done)
414 return tuple(order)
416 @property
417 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
418 """Families represented by the spatial elements in this graph."""
419 return self.topology[TopologicalSpace.SPATIAL]
421 @property
422 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
423 """Families represented by the temporal elements in this graph."""
424 return self.topology[TopologicalSpace.TEMPORAL]
426 # Class attributes below are shadowed by instance attributes, and are
427 # present just to hold the docstrings for those instance attributes.
429 universe: DimensionUniverse
430 """The set of all known dimensions, of which this graph is a subset
431 (`DimensionUniverse`).
432 """
434 dimensions: NamedValueAbstractSet[Dimension]
435 """A true `~collections.abc.Set` of all true `Dimension` instances in the
436 graph (`NamedValueAbstractSet` of `Dimension`).
438 This is the set used for iteration, ``len()``, and most set-like operations
439 on `DimensionGraph` itself.
440 """
442 elements: NamedValueAbstractSet[DimensionElement]
443 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
444 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
445 `DimensionElement`).
447 This is the set used for dict-like lookups, including the ``in`` operator,
448 on `DimensionGraph` itself.
449 """
451 governors: NamedValueAbstractSet[GovernorDimension]
452 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
453 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
454 """
456 required: NamedValueAbstractSet[Dimension]
457 """The subset of `dimensions` whose elments must be directly identified via
458 their primary keys in a data ID in order to identify the rest of the
459 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
460 """
462 implied: NamedValueAbstractSet[Dimension]
463 """The subset of `dimensions` whose elements need not be directly
464 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
465 `Dimension`).
466 """
468 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
469 """Families of elements in this graph that can participate in topological
470 relationships (`Mapping` from `TopologicalSpace` to
471 `NamedValueAbstractSet` of `TopologicalFamily`).
472 """