Coverage for python/lsst/daf/butler/core/dimensions/graph.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, Tuple, TYPE_CHECKING
28from ..utils import NamedValueSet, NamedKeyDict, immutable
30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from .universe import DimensionUniverse
32 from .elements import DimensionElement, Dimension
35def _filterDependentElements(elements: NamedValueSet[DimensionElement],
36 prefer: NamedValueSet[DimensionElement]
37 ) -> NamedValueSet[DimensionElement]:
38 """Return a subset of the given set with only independent elements.
40 Parameters
41 ----------
42 elements : `NamedValueSet` of `DimensionElement`
43 The elements to be filtered.
44 prefer : `NamedValueSet` of `DimensionElement`
45 Elements to be included in the result in preference to others with
46 which they have a dependency relationship. When no preferred element
47 is given for a pair of related elements, the dependent is included
48 rather than the dependency.
50 Returns
51 -------
52 filtered : `NamedValueSet` of `DimensionElement`
53 The filtered set of elements. Order is unspecified.
54 """
55 resultNames = set()
56 for element in elements:
57 includedDependencyNames = frozenset(element._recursiveDependencyNames & resultNames)
58 if includedDependencyNames.isdisjoint(prefer.names):
59 resultNames.difference_update(includedDependencyNames)
60 resultNames.add(element.name)
61 return NamedValueSet(elements[name] for name in resultNames)
64@immutable
65class DimensionGraph:
66 """An immutable, dependency-complete collection of dimensions.
68 `DimensionGraph` behaves in many respects like a set of `Dimension`
69 instances that maintains several special subsets and supersets of
70 related `DimensionElement` instances. It does not fully implement the
71 `collections.abc.Set` interface, as its automatic expansion of dependencies
72 would make set difference and XOR operations behave surprisingly.
74 It also provides dict-like lookup of `DimensionElement` instances from
75 their names.
77 Parameters
78 ----------
79 universe : `DimensionUniverse`
80 The special graph of all known dimensions of which this graph will be
81 a subset.
82 dimensions : iterable of `Dimension`, optional
83 An iterable of `Dimension` instances that must be included in the
84 graph. All (recursive) dependencies of these dimensions will also
85 be included. At most one of ``dimensions`` and ``names`` must be
86 provided.
87 names : iterable of `str`, optional
88 An iterable of the names of dimensiosn that must be included in the
89 graph. All (recursive) dependencies of these dimensions will also
90 be included. At most one of ``dimensions`` and ``names`` must be
91 provided.
92 conform : `bool`, optional
93 If `True` (default), expand to include dependencies. `False` should
94 only be used for callers that can guarantee that other arguments are
95 already correctly expanded, and is primarily for internal use.
97 Notes
98 -----
99 `DimensionGraph` should be used instead of other collections in any context
100 where a collection of dimensions is required and a `DimensionUniverse` is
101 available.
103 While `DimensionUniverse` inherits from `DimensionGraph`, it should
104 otherwise not be used as a base class.
105 """
107 def __new__(cls, universe: DimensionUniverse,
108 dimensions: Optional[Iterable[Dimension]] = None,
109 names: Optional[Iterable[str]] = None,
110 conform: bool = True) -> DimensionGraph:
111 if names is None:
112 if dimensions is None:
113 names = ()
114 else:
115 try:
116 names = set(dimensions.names)
117 except AttributeError:
118 names = set(d.name for d in dimensions)
119 else:
120 if dimensions is not None:
121 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
122 names = set(names)
123 if conform:
124 # Expand given dimensions to include all dependencies.
125 for name in tuple(names): # iterate over a temporary copy so we can modify the original
126 names.update(universe[name]._recursiveDependencyNames)
127 # Look in the cache of existing graphs, with the expanded set of names.
128 cacheKey = frozenset(names)
129 self = universe._cache.get(cacheKey, None)
130 if self is not None:
131 return self
132 # This is apparently a new graph. Create it, and add it to the cache.
133 self = super().__new__(cls)
134 universe._cache[cacheKey] = self
135 self.universe = universe
136 # Reorder dimensions by iterating over the universe (which is
137 # ordered already) and extracting the ones in the set.
138 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names)
139 # Make a set that includes both the dimensions and any
140 # DimensionElements whose dependencies are in self.dimensions.
141 self.elements = NamedValueSet(e for e in universe.elements
142 if e._shouldBeInGraph(self.dimensions.names))
143 self._finish()
144 return self
146 def _finish(self):
147 """Complete construction of the graph.
149 This is intended for internal use by `DimensionGraph` and
150 `DimensionUniverse` only.
151 """
152 # Freeze the sets the constructor is responsible for populating.
153 self.dimensions.freeze()
154 self.elements.freeze()
156 # Split dependencies up into "required" and "implied" subsets.
157 # Note that a dimension may be required in one graph and implied in
158 # another.
159 self.required = NamedValueSet()
160 self.implied = NamedValueSet()
161 for i1, dim1 in enumerate(self.dimensions):
162 for i2, dim2 in enumerate(self.dimensions):
163 if dim1.name in dim2._impliedDependencyNames:
164 self.implied.add(dim1)
165 break
166 else:
167 # If no other dimension implies dim1, it's required.
168 self.required.add(dim1)
169 self.required.freeze()
170 self.implied.freeze()
172 # Compute sets of spatial and temporal elements.
173 # We keep the both sets with no redundancy resolution and those with
174 # KEEP_CHILD redundancy resolution for all elements. The latter is
175 # what is usually wanted (by e.g. ExpandedDataCoordinate), but the
176 # former is what we need to compute any other redundancy resolution
177 # on the fly.
178 self._allSpatial = NamedValueSet(element for element in self.elements if element.spatial)
179 self._allSpatial.freeze()
180 self._allTemporal = NamedValueSet(element for element in self.elements if element.temporal)
181 self._allTemporal.freeze()
182 self.spatial = _filterDependentElements(self._allSpatial, prefer=NamedValueSet())
183 self.spatial.freeze()
184 self.temporal = _filterDependentElements(self._allTemporal, prefer=NamedValueSet())
185 self.temporal.freeze()
187 # Build mappings from dimension to index; this is really for
188 # DataCoordinate, but we put it in DimensionGraph because many
189 # (many!) DataCoordinates will share the same DimensionGraph, and
190 # we want them to be lightweight.
191 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)})
192 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)})
193 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)})
195 def __getnewargs__(self) -> tuple:
196 return (self.universe, None, tuple(self.dimensions.names), False)
198 @property
199 def names(self) -> KeysView[str]:
200 """A set of the names of all dimensions in the graph (`KeysView`).
201 """
202 return self.dimensions.names
204 def __iter__(self) -> Iterator[Dimension]:
205 """Iterate over all dimensions in the graph (and true `Dimension`
206 instances only).
207 """
208 return iter(self.dimensions)
210 def __len__(self) -> int:
211 """Return the number of dimensions in the graph (and true `Dimension`
212 instances only).
213 """
214 return len(self.dimensions)
216 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
217 """Return `True` if the given element or element name is in the graph.
219 This test covers all `DimensionElement` instances in ``self.elements``,
220 not just true `Dimension` instances).
221 """
222 return element in self.elements
224 def __getitem__(self, name: str) -> DimensionElement:
225 """Return the element with the given name.
227 This lookup covers all `DimensionElement` instances in
228 ``self.elements``, not just true `Dimension` instances).
229 """
230 return self.elements[name]
232 def get(self, name: str, default: Any = None) -> DimensionElement:
233 """Return the element with the given name.
235 This lookup covers all `DimensionElement` instances in
236 ``self.elements``, not just true `Dimension` instances).
237 """
238 return self.elements.get(name, default)
240 def __str__(self) -> str:
241 return str(self.dimensions)
243 def __repr__(self) -> str:
244 return f"DimensionGraph({str(self)})"
246 def isdisjoint(self, other: DimensionGraph) -> bool:
247 """Test whether the intersection of two graphs is empty.
249 Returns `True` if either operand is the empty.
250 """
251 return self.dimensions.isdisjoint(other.dimensions)
253 def issubset(self, other: DimensionGraph) -> bool:
254 """Test whether all dimensions in ``self`` are also in ``other``.
256 Returns `True` if ``self`` is empty.
257 """
258 return self.dimensions.issubset(other.dimensions)
260 def issuperset(self, other: DimensionGraph) -> bool:
261 """Test whether all dimensions in ``other`` are also in ``self``.
263 Returns `True` if ``other`` is empty.
264 """
265 return self.dimensions.issuperset(other.dimensions)
267 def __eq__(self, other: DimensionGraph) -> bool:
268 """Test whether ``self`` and ``other`` have exactly the same dimensions
269 and elements.
270 """
271 return self.dimensions == other.dimensions
273 def __hash__(self) -> int:
274 return hash(tuple(self.dimensions.names))
276 def __le__(self, other: DimensionGraph) -> bool:
277 """Test whether ``self`` is a subset of ``other``.
278 """
279 return self.dimensions <= other.dimensions
281 def __ge__(self, other: DimensionGraph) -> bool:
282 """Test whether ``self`` is a superset of ``other``.
283 """
284 return self.dimensions >= other.dimensions
286 def __lt__(self, other: DimensionGraph) -> bool:
287 """Test whether ``self`` is a strict subset of ``other``.
288 """
289 return self.dimensions < other.dimensions
291 def __gt__(self, other: DimensionGraph) -> bool:
292 """Test whether ``self`` is a strict superset of ``other``.
293 """
294 return self.dimensions > other.dimensions
296 def union(self, *others: DimensionGraph):
297 """Construct a new graph containing all dimensions in any of the
298 operands.
300 The elements of the returned graph may exceed the naive union of
301 their elements, as some `DimensionElement` instances are included
302 in graphs whenever multiple dimensions are present, and those
303 dependency dimensions could have been provided by different operands.
304 """
305 names = set(self.names).union(*[other.names for other in others])
306 return DimensionGraph(self.universe, names=names)
308 def intersection(self, *others: DimensionGraph):
309 """Construct a new graph containing only dimensions in all of the
310 operands.
311 """
312 names = set(self.names).intersection(*[other.names for other in others])
313 return DimensionGraph(self.universe, names=names)
315 def __or__(self, other):
316 """Construct a new graph containing all dimensions in any of the
317 operands.
319 See `union`.
320 """
321 return self.union(other)
323 def __and__(self, other):
324 """Construct a new graph containing only dimensions in all of the
325 operands.
326 """
327 return self.intersection(other)
329 def getSpatial(self, *, independent: bool = True,
330 prefer: Optional[Iterable[DimensionElement]] = None
331 ) -> NamedValueSet[DimensionElement]:
332 """Return the elements that are associated with spatial regions,
333 possibly with some filtering.
335 Parameters
336 ----------
337 independent : `bool`
338 If `True` (default) ensure that all returned elements are
339 independent of each other, by resolving any dependencies between
340 spatial elements in favor of the dependent one (which is the one
341 with the smaller, more precise region). A graph that includes both
342 "tract" and "patch", for example, would have only "patch" returned
343 here if ``independent`` is `True`. If `False`, all spatial
344 elements are returned.
345 prefer : iterable of `DimensionElement`
346 Elements that should be returned instead of their dependents when
347 ``independent`` is `True` (ignored if ``independent`` is `False`).
348 For example, passing ``prefer=[tract]`` to a graph with both
349 "tract" and "patch" would result in only "tract" being returned.
351 Returns
352 -------
353 spatial : `NamedValueSet` of `DimensionElement`
354 Elements that have `DimensionElement.spatial` `True`, filtered
355 as specified by the arguments.
356 """
357 if not independent:
358 return self._allSpatial
359 elif prefer is None:
360 return self.spatial
361 else:
362 return _filterDependentElements(self._allSpatial,
363 prefer=NamedValueSet(self.elements[p] for p in prefer))
365 def getTemporal(self, *, independent: bool = True,
366 prefer: Optional[Iterable[DimensionElement]] = None
367 ) -> NamedValueSet[DimensionElement]:
368 """Return the elements that are associated with a timespan,
369 possibly with some filtering.
371 Parameters
372 ----------
373 independent : `bool`
374 If `True` (default) ensure that all returned elements are
375 independent of each other, by resolving any dependencies between
376 spatial elements in favor of the dependent one (which is the one
377 with the smaller, more precise timespans).
378 prefer : iterable of `DimensionElement`
379 Elements that should be returned instead of their dependents when
380 ``independent`` is `True` (ignored if ``independent`` is `False`).
382 Returns
383 -------
384 temporal : `NamedValueSet` of `DimensionElement`
385 Elements that have `DimensionElement.temporal` `True`, filtered
386 as specified by the arguments.
387 """
388 if not independent:
389 return self._allTemporal
390 elif prefer is None:
391 return self.temporal
392 else:
393 return _filterDependentElements(self._allTemporal,
394 prefer=NamedValueSet(self.elements[p] for p in prefer))
396 @property
397 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement]:
398 """Return a tuple of all elements in an order allows records to be
399 found given their primary keys, starting from only the primary keys of
400 required dimensions (`tuple` [ `DimensionRecord` ]).
402 Unlike the table definition/topological order (which is what
403 DimensionUniverse.sorted gives you), when dimension A implies
404 dimension B, dimension A appears first.
405 """
406 order = getattr(self, "_primaryKeyTraversalOrder", None)
407 if order is None:
408 done = set()
409 order = []
411 def addToOrder(element) -> bool:
412 if element.name in done:
413 return
414 predecessors = set(element.graph.required.names)
415 predecessors.discard(element.name)
416 if not done.issuperset(predecessors):
417 return
418 order.append(element)
419 done.add(element)
420 for other in element.implied:
421 addToOrder(other)
423 while not done.issuperset(self.required):
424 for dimension in self.required:
425 addToOrder(dimension)
427 order.extend(element for element in self.elements if element.name not in done)
428 order = tuple(order)
429 self._primaryKeyTraversalOrder = order
430 return order
432 # Class attributes below are shadowed by instance attributes, and are
433 # present just to hold the docstrings for those instance attributes.
435 universe: DimensionUniverse
436 """The set of all known dimensions, of which this graph is a subset
437 (`DimensionUniverse`).
438 """
440 dimensions: NamedValueSet[Dimension]
441 """A true `~collections.abc.Set` of all true `Dimension` instances in the
442 graph (`NamedValueSet` of `Dimension`).
444 This is the set used for iteration, ``len()``, and most set-like operations
445 on `DimensionGraph` itself.
446 """
448 elements: NamedValueSet[DimensionElement]
449 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
450 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`).
452 This is the set used for dict-like lookups, including the ``in`` operator,
453 on `DimensionGraph` itself.
454 """
456 required: NamedValueSet[Dimension]
457 """The subset of `dimensions` whose elments must be directly identified via
458 their primary keys in a data ID in order to identify the rest of the
459 elements in the graph (`NamedValueSet` of `Dimension`).
460 """
462 implied: NamedValueSet[Dimension]
463 """The subset of `dimensions` whose elements need not be directly
464 identified via their primary keys in a data ID (`NamedValueSet` of
465 `Dimension`).
466 """
468 spatial: NamedValueSet[DimensionElement]
469 """Elements that are associated with independent spatial regions
470 (`NamedValueSet` of `DimensionElement`).
472 The default filtering described in `getSpatial` is applied.
473 """
475 temporal: NamedValueSet[DimensionElement]
476 """Elements that are associated with independent spatial regions
477 (`NamedValueSet` of `DimensionElement`).
479 The default filtering described in `getTemporal` is applied.
480 """