Coverage for python/lsst/daf/butler/core/dimensions/_graph.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26import itertools
27from types import MappingProxyType
28from typing import (
29 AbstractSet,
30 Any,
31 Dict,
32 Iterable,
33 Iterator,
34 List,
35 Mapping,
36 Optional,
37 Set,
38 Tuple,
39 TYPE_CHECKING,
40 Union,
41)
43from ..named import NamedValueAbstractSet, NamedValueSet
44from ..utils import cached_getter, immutable
45from .._topology import TopologicalSpace, TopologicalFamily
46from ..json import from_json_generic, to_json_generic
48if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from ._universe import DimensionUniverse
50 from ._elements import DimensionElement, Dimension
51 from ._governor import GovernorDimension
52 from ...registry import Registry
55@immutable
56class DimensionGraph:
57 """An immutable, dependency-complete collection of dimensions.
59 `DimensionGraph` behaves in many respects like a set of `Dimension`
60 instances that maintains several special subsets and supersets of
61 related `DimensionElement` instances. It does not fully implement the
62 `collections.abc.Set` interface, as its automatic expansion of dependencies
63 would make set difference and XOR operations behave surprisingly.
65 It also provides dict-like lookup of `DimensionElement` instances from
66 their names.
68 Parameters
69 ----------
70 universe : `DimensionUniverse`
71 The special graph of all known dimensions of which this graph will be
72 a subset.
73 dimensions : iterable of `Dimension`, optional
74 An iterable of `Dimension` instances that must be included in the
75 graph. All (recursive) dependencies of these dimensions will also
76 be included. At most one of ``dimensions`` and ``names`` must be
77 provided.
78 names : iterable of `str`, optional
79 An iterable of the names of dimensiosn that must be included in the
80 graph. All (recursive) dependencies of these dimensions will also
81 be included. At most one of ``dimensions`` and ``names`` must be
82 provided.
83 conform : `bool`, optional
84 If `True` (default), expand to include dependencies. `False` should
85 only be used for callers that can guarantee that other arguments are
86 already correctly expanded, and is primarily for internal use.
88 Notes
89 -----
90 `DimensionGraph` should be used instead of other collections in most
91 contexts where a collection of dimensions is required and a
92 `DimensionUniverse` is available. Exceptions include cases where order
93 matters (and is different from the consistent ordering defined by the
94 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
95 required.
96 """
98 def __new__(
99 cls,
100 universe: DimensionUniverse,
101 dimensions: Optional[Iterable[Dimension]] = None,
102 names: Optional[Iterable[str]] = None,
103 conform: bool = True
104 ) -> DimensionGraph:
105 conformedNames: Set[str]
106 if names is None:
107 if dimensions is None:
108 conformedNames = set()
109 else:
110 try:
111 # Optimize for NamedValueSet/NamedKeyDict, though that's
112 # not required.
113 conformedNames = set(dimensions.names) # type: ignore
114 except AttributeError:
115 conformedNames = set(d.name for d in dimensions)
116 else:
117 if dimensions is not None:
118 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
119 conformedNames = set(names)
120 if conform:
121 universe.expandDimensionNameSet(conformedNames)
122 # Look in the cache of existing graphs, with the expanded set of names.
123 cacheKey = frozenset(conformedNames)
124 self = universe._cache.get(cacheKey, None)
125 if self is not None:
126 return self
127 # This is apparently a new graph. Create it, and add it to the cache.
128 self = super().__new__(cls)
129 universe._cache[cacheKey] = self
130 self.universe = universe
131 # Reorder dimensions by iterating over the universe (which is
132 # ordered already) and extracting the ones in the set.
133 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
134 # Make a set that includes both the dimensions and any
135 # DimensionElements whose dependencies are in self.dimensions.
136 self.elements = NamedValueSet(e for e in universe.getStaticElements()
137 if e.required.names <= self.dimensions.names).freeze()
138 self._finish()
139 return self
141 def _finish(self) -> None:
142 # Make a set containing just the governor dimensions in this graph.
143 # Need local import to avoid cycle.
144 from ._governor import GovernorDimension
145 self.governors = NamedValueSet(
146 d for d in self.dimensions if isinstance(d, GovernorDimension)
147 ).freeze()
148 # Split dependencies up into "required" and "implied" subsets.
149 # Note that a dimension may be required in one graph and implied in
150 # another.
151 required: NamedValueSet[Dimension] = NamedValueSet()
152 implied: NamedValueSet[Dimension] = NamedValueSet()
153 for i1, dim1 in enumerate(self.dimensions):
154 for i2, dim2 in enumerate(self.dimensions):
155 if dim1.name in dim2.implied.names:
156 implied.add(dim1)
157 break
158 else:
159 # If no other dimension implies dim1, it's required.
160 required.add(dim1)
161 self.required = required.freeze()
162 self.implied = implied.freeze()
164 self.topology = MappingProxyType({
165 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
166 for space in TopologicalSpace.__members__.values()
167 })
169 # Build mappings from dimension to index; this is really for
170 # DataCoordinate, but we put it in DimensionGraph because many
171 # (many!) DataCoordinates will share the same DimensionGraph, and
172 # we want them to be lightweight. The order here is what's convenient
173 # for DataCoordinate: all required dimensions before all implied
174 # dimensions.
175 self._dataCoordinateIndices: Dict[str, int] = {
176 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
177 }
179 def __getnewargs__(self) -> tuple:
180 return (self.universe, None, tuple(self.dimensions.names), False)
182 def __deepcopy__(self, memo: dict) -> DimensionGraph:
183 # DimensionGraph is recursively immutable; see note in @immutable
184 # decorator.
185 return self
187 @property
188 def names(self) -> AbstractSet[str]:
189 """Set of the names of all dimensions in the graph (`KeysView`)."""
190 return self.dimensions.names
192 def to_simple(self, minimal: bool = False) -> List[str]:
193 """Convert this class to a simple python type.
195 This type is suitable for serialization.
197 Parameters
198 ----------
199 minimal : `bool`, optional
200 Use minimal serialization. Has no effect on for this class.
202 Returns
203 -------
204 names : `list`
205 The names of the dimensions.
206 """
207 # Names are all we can serialize.
208 return list(self.names)
210 @classmethod
211 def from_simple(cls, names: List[str],
212 universe: Optional[DimensionUniverse] = None,
213 registry: Optional[Registry] = None) -> DimensionGraph:
214 """Construct a new object from the simplified form.
216 This is assumed to support data data returned from the `to_simple`
217 method.
219 Parameters
220 ----------
221 names : `list` of `str`
222 The names of the dimensions.
223 universe : `DimensionUniverse`
224 The special graph of all known dimensions of which this graph will
225 be a subset. Can be `None` if `Registry` is provided.
226 registry : `lsst.daf.butler.Registry`, optional
227 Registry from which a universe can be extracted. Can be `None`
228 if universe is provided explicitly.
230 Returns
231 -------
232 graph : `DimensionGraph`
233 Newly-constructed object.
234 """
235 if universe is None and registry is None:
236 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
237 if universe is None and registry is not None:
238 universe = registry.dimensions
239 if universe is None:
240 # this is for mypy
241 raise ValueError("Unable to determine a usable universe")
243 return cls(names=names, universe=universe)
245 to_json = to_json_generic
246 from_json = classmethod(from_json_generic)
248 def __iter__(self) -> Iterator[Dimension]:
249 """Iterate over all dimensions in the graph.
251 (and true `Dimension` instances only).
252 """
253 return iter(self.dimensions)
255 def __len__(self) -> int:
256 """Return the number of dimensions in the graph.
258 (and true `Dimension` instances only).
259 """
260 return len(self.dimensions)
262 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
263 """Return `True` if the given element or element name is in the graph.
265 This test covers all `DimensionElement` instances in ``self.elements``,
266 not just true `Dimension` instances).
267 """
268 return element in self.elements
270 def __getitem__(self, name: str) -> DimensionElement:
271 """Return the element with the given name.
273 This lookup covers all `DimensionElement` instances in
274 ``self.elements``, not just true `Dimension` instances).
275 """
276 return self.elements[name]
278 def get(self, name: str, default: Any = None) -> DimensionElement:
279 """Return the element with the given name.
281 This lookup covers all `DimensionElement` instances in
282 ``self.elements``, not just true `Dimension` instances).
283 """
284 return self.elements.get(name, default)
286 def __str__(self) -> str:
287 return str(self.dimensions)
289 def __repr__(self) -> str:
290 return f"DimensionGraph({str(self)})"
292 def isdisjoint(self, other: DimensionGraph) -> bool:
293 """Test whether the intersection of two graphs is empty.
295 Returns `True` if either operand is the empty.
296 """
297 return self.dimensions.isdisjoint(other.dimensions)
299 def issubset(self, other: DimensionGraph) -> bool:
300 """Test whether all dimensions in ``self`` are also in ``other``.
302 Returns `True` if ``self`` is empty.
303 """
304 return self.dimensions <= other.dimensions
306 def issuperset(self, other: DimensionGraph) -> bool:
307 """Test whether all dimensions in ``other`` are also in ``self``.
309 Returns `True` if ``other`` is empty.
310 """
311 return self.dimensions >= other.dimensions
313 def __eq__(self, other: Any) -> bool:
314 """Test the arguments have exactly the same dimensions & elements."""
315 if isinstance(other, DimensionGraph):
316 return self.dimensions == other.dimensions
317 else:
318 return False
320 def __hash__(self) -> int:
321 return hash(tuple(self.dimensions.names))
323 def __le__(self, other: DimensionGraph) -> bool:
324 """Test whether ``self`` is a subset of ``other``."""
325 return self.dimensions <= other.dimensions
327 def __ge__(self, other: DimensionGraph) -> bool:
328 """Test whether ``self`` is a superset of ``other``."""
329 return self.dimensions >= other.dimensions
331 def __lt__(self, other: DimensionGraph) -> bool:
332 """Test whether ``self`` is a strict subset of ``other``."""
333 return self.dimensions < other.dimensions
335 def __gt__(self, other: DimensionGraph) -> bool:
336 """Test whether ``self`` is a strict superset of ``other``."""
337 return self.dimensions > other.dimensions
339 def union(self, *others: DimensionGraph) -> DimensionGraph:
340 """Construct a new graph with all dimensions in any of the operands.
342 The elements of the returned graph may exceed the naive union of
343 their elements, as some `DimensionElement` instances are included
344 in graphs whenever multiple dimensions are present, and those
345 dependency dimensions could have been provided by different operands.
346 """
347 names = set(self.names).union(*[other.names for other in others])
348 return DimensionGraph(self.universe, names=names)
350 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
351 """Construct a new graph with only dimensions in all of the operands.
353 See also `union`.
354 """
355 names = set(self.names).intersection(*[other.names for other in others])
356 return DimensionGraph(self.universe, names=names)
358 def __or__(self, other: DimensionGraph) -> DimensionGraph:
359 """Construct a new graph with all dimensions in any of the operands.
361 See `union`.
362 """
363 return self.union(other)
365 def __and__(self, other: DimensionGraph) -> DimensionGraph:
366 """Construct a new graph with only dimensions in all of the operands.
368 See `intersection`.
369 """
370 return self.intersection(other)
372 @property # type: ignore
373 @cached_getter
374 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
375 """Return a tuple of all elements in specific order.
377 The order allows records to be
378 found given their primary keys, starting from only the primary keys of
379 required dimensions (`tuple` [ `DimensionRecord` ]).
381 Unlike the table definition/topological order (which is what
382 DimensionUniverse.sorted gives you), when dimension A implies
383 dimension B, dimension A appears first.
384 """
385 done: Set[str] = set()
386 order = []
388 def addToOrder(element: DimensionElement) -> None:
389 if element.name in done:
390 return
391 predecessors = set(element.required.names)
392 predecessors.discard(element.name)
393 if not done.issuperset(predecessors):
394 return
395 order.append(element)
396 done.add(element.name)
397 for other in element.implied:
398 addToOrder(other)
400 while not done.issuperset(self.required):
401 for dimension in self.required:
402 addToOrder(dimension)
404 order.extend(element for element in self.elements if element.name not in done)
405 return tuple(order)
407 @property
408 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
409 """Families represented by the spatial elements in this graph."""
410 return self.topology[TopologicalSpace.SPATIAL]
412 @property
413 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
414 """Families represented by the temporal elements in this graph."""
415 return self.topology[TopologicalSpace.TEMPORAL]
417 # Class attributes below are shadowed by instance attributes, and are
418 # present just to hold the docstrings for those instance attributes.
420 universe: DimensionUniverse
421 """The set of all known dimensions, of which this graph is a subset
422 (`DimensionUniverse`).
423 """
425 dimensions: NamedValueAbstractSet[Dimension]
426 """A true `~collections.abc.Set` of all true `Dimension` instances in the
427 graph (`NamedValueAbstractSet` of `Dimension`).
429 This is the set used for iteration, ``len()``, and most set-like operations
430 on `DimensionGraph` itself.
431 """
433 elements: NamedValueAbstractSet[DimensionElement]
434 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
435 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
436 `DimensionElement`).
438 This is the set used for dict-like lookups, including the ``in`` operator,
439 on `DimensionGraph` itself.
440 """
442 governors: NamedValueAbstractSet[GovernorDimension]
443 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
444 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
445 """
447 required: NamedValueAbstractSet[Dimension]
448 """The subset of `dimensions` whose elments must be directly identified via
449 their primary keys in a data ID in order to identify the rest of the
450 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
451 """
453 implied: NamedValueAbstractSet[Dimension]
454 """The subset of `dimensions` whose elements need not be directly
455 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
456 `Dimension`).
457 """
459 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
460 """Families of elements in this graph that can participate in topological
461 relationships (`Mapping` from `TopologicalSpace` to
462 `NamedValueAbstractSet` of `TopologicalFamily`).
463 """