Coverage for python/lsst/daf/butler/core/dimensions/_graph.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26import itertools
27from types import MappingProxyType
28from typing import (
29 AbstractSet,
30 Any,
31 Dict,
32 Iterable,
33 Iterator,
34 List,
35 Mapping,
36 Optional,
37 Set,
38 Tuple,
39 TYPE_CHECKING,
40 Union,
41)
43from ..named import NamedValueAbstractSet, NamedValueSet
44from ..utils import cached_getter, immutable
45from .._topology import TopologicalSpace, TopologicalFamily
46from ..json import from_json_generic, to_json_generic
48if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from ._universe import DimensionUniverse
50 from ._elements import DimensionElement, Dimension
51 from ._governor import GovernorDimension
52 from ...registry import Registry
55@immutable
56class DimensionGraph:
57 """An immutable, dependency-complete collection of dimensions.
59 `DimensionGraph` behaves in many respects like a set of `Dimension`
60 instances that maintains several special subsets and supersets of
61 related `DimensionElement` instances. It does not fully implement the
62 `collections.abc.Set` interface, as its automatic expansion of dependencies
63 would make set difference and XOR operations behave surprisingly.
65 It also provides dict-like lookup of `DimensionElement` instances from
66 their names.
68 Parameters
69 ----------
70 universe : `DimensionUniverse`
71 The special graph of all known dimensions of which this graph will be
72 a subset.
73 dimensions : iterable of `Dimension`, optional
74 An iterable of `Dimension` instances that must be included in the
75 graph. All (recursive) dependencies of these dimensions will also
76 be included. At most one of ``dimensions`` and ``names`` must be
77 provided.
78 names : iterable of `str`, optional
79 An iterable of the names of dimensiosn that must be included in the
80 graph. All (recursive) dependencies of these dimensions will also
81 be included. At most one of ``dimensions`` and ``names`` must be
82 provided.
83 conform : `bool`, optional
84 If `True` (default), expand to include dependencies. `False` should
85 only be used for callers that can guarantee that other arguments are
86 already correctly expanded, and is primarily for internal use.
88 Notes
89 -----
90 `DimensionGraph` should be used instead of other collections in most
91 contexts where a collection of dimensions is required and a
92 `DimensionUniverse` is available. Exceptions include cases where order
93 matters (and is different from the consistent ordering defined by the
94 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are
95 required.
96 """
97 def __new__(
98 cls,
99 universe: DimensionUniverse,
100 dimensions: Optional[Iterable[Dimension]] = None,
101 names: Optional[Iterable[str]] = None,
102 conform: bool = True
103 ) -> DimensionGraph:
104 conformedNames: Set[str]
105 if names is None:
106 if dimensions is None:
107 conformedNames = set()
108 else:
109 try:
110 # Optimize for NamedValueSet/NamedKeyDict, though that's
111 # not required.
112 conformedNames = set(dimensions.names) # type: ignore
113 except AttributeError:
114 conformedNames = set(d.name for d in dimensions)
115 else:
116 if dimensions is not None:
117 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
118 conformedNames = set(names)
119 if conform:
120 universe.expandDimensionNameSet(conformedNames)
121 # Look in the cache of existing graphs, with the expanded set of names.
122 cacheKey = frozenset(conformedNames)
123 self = universe._cache.get(cacheKey, None)
124 if self is not None:
125 return self
126 # This is apparently a new graph. Create it, and add it to the cache.
127 self = super().__new__(cls)
128 universe._cache[cacheKey] = self
129 self.universe = universe
130 # Reorder dimensions by iterating over the universe (which is
131 # ordered already) and extracting the ones in the set.
132 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze()
133 # Make a set that includes both the dimensions and any
134 # DimensionElements whose dependencies are in self.dimensions.
135 self.elements = NamedValueSet(e for e in universe.getStaticElements()
136 if e.required.names <= self.dimensions.names).freeze()
137 self._finish()
138 return self
140 def _finish(self) -> None:
141 # Make a set containing just the governor dimensions in this graph.
142 # Need local import to avoid cycle.
143 from ._governor import GovernorDimension
144 self.governors = NamedValueSet(
145 d for d in self.dimensions if isinstance(d, GovernorDimension)
146 ).freeze()
147 # Split dependencies up into "required" and "implied" subsets.
148 # Note that a dimension may be required in one graph and implied in
149 # another.
150 required: NamedValueSet[Dimension] = NamedValueSet()
151 implied: NamedValueSet[Dimension] = NamedValueSet()
152 for i1, dim1 in enumerate(self.dimensions):
153 for i2, dim2 in enumerate(self.dimensions):
154 if dim1.name in dim2.implied.names:
155 implied.add(dim1)
156 break
157 else:
158 # If no other dimension implies dim1, it's required.
159 required.add(dim1)
160 self.required = required.freeze()
161 self.implied = implied.freeze()
163 self.topology = MappingProxyType({
164 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze()
165 for space in TopologicalSpace.__members__.values()
166 })
168 # Build mappings from dimension to index; this is really for
169 # DataCoordinate, but we put it in DimensionGraph because many
170 # (many!) DataCoordinates will share the same DimensionGraph, and
171 # we want them to be lightweight. The order here is what's convenient
172 # for DataCoordinate: all required dimensions before all implied
173 # dimensions.
174 self._dataCoordinateIndices: Dict[str, int] = {
175 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
176 }
178 def __getnewargs__(self) -> tuple:
179 return (self.universe, None, tuple(self.dimensions.names), False)
181 def __deepcopy__(self, memo: dict) -> DimensionGraph:
182 # DimensionGraph is recursively immutable; see note in @immutable
183 # decorator.
184 return self
186 @property
187 def names(self) -> AbstractSet[str]:
188 """A set of the names of all dimensions in the graph (`KeysView`).
189 """
190 return self.dimensions.names
192 def to_simple(self, minimal: bool = False) -> List[str]:
193 """Convert this class to a simple python type suitable for
194 serialization.
196 Parameters
197 ----------
198 minimal : `bool`, optional
199 Use minimal serialization. Has no effect on for this class.
201 Returns
202 -------
203 names : `list`
204 The names of the dimensions.
205 """
206 # Names are all we can serialize.
207 return list(self.names)
209 @classmethod
210 def from_simple(cls, names: List[str],
211 universe: Optional[DimensionUniverse] = None,
212 registry: Optional[Registry] = None) -> DimensionGraph:
213 """Construct a new object from the data returned from the `to_simple`
214 method.
216 Parameters
217 ----------
218 names : `list` of `str`
219 The names of the dimensions.
220 universe : `DimensionUniverse`
221 The special graph of all known dimensions of which this graph will
222 be a subset. Can be `None` if `Registry` is provided.
223 registry : `lsst.daf.butler.Registry`, optional
224 Registry from which a universe can be extracted. Can be `None`
225 if universe is provided explicitly.
227 Returns
228 -------
229 graph : `DimensionGraph`
230 Newly-constructed object.
231 """
232 if universe is None and registry is None:
233 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
234 if universe is None and registry is not None:
235 universe = registry.dimensions
236 if universe is None:
237 # this is for mypy
238 raise ValueError("Unable to determine a usable universe")
240 return cls(names=names, universe=universe)
242 to_json = to_json_generic
243 from_json = classmethod(from_json_generic)
245 def __iter__(self) -> Iterator[Dimension]:
246 """Iterate over all dimensions in the graph (and true `Dimension`
247 instances only).
248 """
249 return iter(self.dimensions)
251 def __len__(self) -> int:
252 """Return the number of dimensions in the graph (and true `Dimension`
253 instances only).
254 """
255 return len(self.dimensions)
257 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
258 """Return `True` if the given element or element name is in the graph.
260 This test covers all `DimensionElement` instances in ``self.elements``,
261 not just true `Dimension` instances).
262 """
263 return element in self.elements
265 def __getitem__(self, name: str) -> DimensionElement:
266 """Return the element with the given name.
268 This lookup covers all `DimensionElement` instances in
269 ``self.elements``, not just true `Dimension` instances).
270 """
271 return self.elements[name]
273 def get(self, name: str, default: Any = None) -> DimensionElement:
274 """Return the element with the given name.
276 This lookup covers all `DimensionElement` instances in
277 ``self.elements``, not just true `Dimension` instances).
278 """
279 return self.elements.get(name, default)
281 def __str__(self) -> str:
282 return str(self.dimensions)
284 def __repr__(self) -> str:
285 return f"DimensionGraph({str(self)})"
287 def isdisjoint(self, other: DimensionGraph) -> bool:
288 """Test whether the intersection of two graphs is empty.
290 Returns `True` if either operand is the empty.
291 """
292 return self.dimensions.isdisjoint(other.dimensions)
294 def issubset(self, other: DimensionGraph) -> bool:
295 """Test whether all dimensions in ``self`` are also in ``other``.
297 Returns `True` if ``self`` is empty.
298 """
299 return self.dimensions <= other.dimensions
301 def issuperset(self, other: DimensionGraph) -> bool:
302 """Test whether all dimensions in ``other`` are also in ``self``.
304 Returns `True` if ``other`` is empty.
305 """
306 return self.dimensions >= other.dimensions
308 def __eq__(self, other: Any) -> bool:
309 """Test whether ``self`` and ``other`` have exactly the same dimensions
310 and elements.
311 """
312 if isinstance(other, DimensionGraph):
313 return self.dimensions == other.dimensions
314 else:
315 return False
317 def __hash__(self) -> int:
318 return hash(tuple(self.dimensions.names))
320 def __le__(self, other: DimensionGraph) -> bool:
321 """Test whether ``self`` is a subset of ``other``.
322 """
323 return self.dimensions <= other.dimensions
325 def __ge__(self, other: DimensionGraph) -> bool:
326 """Test whether ``self`` is a superset of ``other``.
327 """
328 return self.dimensions >= other.dimensions
330 def __lt__(self, other: DimensionGraph) -> bool:
331 """Test whether ``self`` is a strict subset of ``other``.
332 """
333 return self.dimensions < other.dimensions
335 def __gt__(self, other: DimensionGraph) -> bool:
336 """Test whether ``self`` is a strict superset of ``other``.
337 """
338 return self.dimensions > other.dimensions
340 def union(self, *others: DimensionGraph) -> DimensionGraph:
341 """Construct a new graph containing all dimensions in any of the
342 operands.
344 The elements of the returned graph may exceed the naive union of
345 their elements, as some `DimensionElement` instances are included
346 in graphs whenever multiple dimensions are present, and those
347 dependency dimensions could have been provided by different operands.
348 """
349 names = set(self.names).union(*[other.names for other in others])
350 return DimensionGraph(self.universe, names=names)
352 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
353 """Construct a new graph containing only dimensions in all of the
354 operands.
355 """
356 names = set(self.names).intersection(*[other.names for other in others])
357 return DimensionGraph(self.universe, names=names)
359 def __or__(self, other: DimensionGraph) -> DimensionGraph:
360 """Construct a new graph containing all dimensions in any of the
361 operands.
363 See `union`.
364 """
365 return self.union(other)
367 def __and__(self, other: DimensionGraph) -> DimensionGraph:
368 """Construct a new graph containing only dimensions in all of the
369 operands.
370 """
371 return self.intersection(other)
373 @property # type: ignore
374 @cached_getter
375 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
376 """Return a tuple of all elements in an order allows records to be
377 found given their primary keys, starting from only the primary keys of
378 required dimensions (`tuple` [ `DimensionRecord` ]).
380 Unlike the table definition/topological order (which is what
381 DimensionUniverse.sorted gives you), when dimension A implies
382 dimension B, dimension A appears first.
383 """
384 done: Set[str] = set()
385 order = []
387 def addToOrder(element: DimensionElement) -> None:
388 if element.name in done:
389 return
390 predecessors = set(element.required.names)
391 predecessors.discard(element.name)
392 if not done.issuperset(predecessors):
393 return
394 order.append(element)
395 done.add(element.name)
396 for other in element.implied:
397 addToOrder(other)
399 while not done.issuperset(self.required):
400 for dimension in self.required:
401 addToOrder(dimension)
403 order.extend(element for element in self.elements if element.name not in done)
404 return tuple(order)
406 @property
407 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]:
408 """The `~TopologicalSpace.SPATIAL` families represented by the elements
409 in this graph.
410 """
411 return self.topology[TopologicalSpace.SPATIAL]
413 @property
414 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]:
415 """The `~TopologicalSpace.TEMPORAL` families represented by the
416 elements in this graph.
417 """
418 return self.topology[TopologicalSpace.TEMPORAL]
420 # Class attributes below are shadowed by instance attributes, and are
421 # present just to hold the docstrings for those instance attributes.
423 universe: DimensionUniverse
424 """The set of all known dimensions, of which this graph is a subset
425 (`DimensionUniverse`).
426 """
428 dimensions: NamedValueAbstractSet[Dimension]
429 """A true `~collections.abc.Set` of all true `Dimension` instances in the
430 graph (`NamedValueAbstractSet` of `Dimension`).
432 This is the set used for iteration, ``len()``, and most set-like operations
433 on `DimensionGraph` itself.
434 """
436 elements: NamedValueAbstractSet[DimensionElement]
437 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
438 graph; a superset of `dimensions` (`NamedValueAbstractSet` of
439 `DimensionElement`).
441 This is the set used for dict-like lookups, including the ``in`` operator,
442 on `DimensionGraph` itself.
443 """
445 governors: NamedValueAbstractSet[GovernorDimension]
446 """A true `~collections.abc.Set` of all true `GovernorDimension` instances
447 in the graph (`NamedValueAbstractSet` of `GovernorDimension`).
448 """
450 required: NamedValueAbstractSet[Dimension]
451 """The subset of `dimensions` whose elments must be directly identified via
452 their primary keys in a data ID in order to identify the rest of the
453 elements in the graph (`NamedValueAbstractSet` of `Dimension`).
454 """
456 implied: NamedValueAbstractSet[Dimension]
457 """The subset of `dimensions` whose elements need not be directly
458 identified via their primary keys in a data ID (`NamedValueAbstractSet` of
459 `Dimension`).
460 """
462 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]]
463 """Families of elements in this graph that can participate in topological
464 relationships (`Mapping` from `TopologicalSpace` to
465 `NamedValueAbstractSet` of `TopologicalFamily`).
466 """