Coverage for python/lsst/daf/butler/core/dimensions/graph.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, TYPE_CHECKING
28from ..utils import NamedValueSet, NamedKeyDict, immutable
30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from .universe import DimensionUniverse
32 from .elements import DimensionElement, Dimension
35def _filterDependentElements(elements: NamedValueSet[DimensionElement],
36 prefer: NamedValueSet[DimensionElement]
37 ) -> NamedValueSet[DimensionElement]:
38 """Return a subset of the given set with only independent elements.
40 Parameters
41 ----------
42 elements : `NamedValueSet` of `DimensionElement`
43 The elements to be filtered.
44 prefer : `NamedValueSet` of `DimensionElement`
45 Elements to be included in the result in preference to others with
46 which they have a dependency relationship. When no preferred element
47 is given for a pair of related elements, the dependent is included
48 rather than the dependency.
50 Returns
51 -------
52 filtered : `NamedValueSet` of `DimensionElement`
53 The filtered set of elements. Order is unspecified.
54 """
55 resultNames = set()
56 for element in elements:
57 includedDependencyNames = frozenset(element._recursiveDependencyNames & resultNames)
58 if includedDependencyNames.isdisjoint(prefer.names):
59 resultNames.difference_update(includedDependencyNames)
60 resultNames.add(element.name)
61 return NamedValueSet(elements[name] for name in resultNames)
64@immutable
65class DimensionGraph:
66 """An immutable, dependency-complete collection of dimensions.
68 `DimensionGraph` behaves in many respects like a set of `Dimension`
69 instances that maintains several special subsets and supersets of
70 related `DimensionElement` instances. It does not fully implement the
71 `collections.abc.Set` interface, as its automatic expansion of dependencies
72 would make set difference and XOR operations behave surprisingly.
74 It also provides dict-like lookup of `DimensionElement` instances from
75 their names.
77 Parameters
78 ----------
79 universe : `DimensionUniverse`
80 The special graph of all known dimensions of which this graph will be
81 a subset.
82 dimensions : iterable of `Dimension`, optional
83 An iterable of `Dimension` instances that must be included in the
84 graph. All (recursive) dependencies of these dimensions will also
85 be included. At most one of ``dimensions`` and ``names`` must be
86 provided.
87 names : iterable of `str`, optional
88 An iterable of the names of dimensiosn that must be included in the
89 graph. All (recursive) dependencies of these dimensions will also
90 be included. At most one of ``dimensions`` and ``names`` must be
91 provided.
92 conform : `bool`, optional
93 If `True` (default), expand to include dependencies. `False` should
94 only be used for callers that can guarantee that other arguments are
95 already correctly expanded, and is primarily for internal use.
97 Notes
98 -----
99 `DimensionGraph` should be used instead of other collections in any context
100 where a collection of dimensions is required and a `DimensionUniverse` is
101 available.
103 While `DimensionUniverse` inherits from `DimensionGraph`, it should
104 otherwise not be used as a base class.
105 """
107 def __new__(cls, universe: DimensionUniverse,
108 dimensions: Optional[Iterable[Dimension]] = None,
109 names: Optional[Iterable[str]] = None,
110 conform: bool = True) -> DimensionGraph:
111 if names is None:
112 if dimensions is None:
113 names = ()
114 else:
115 try:
116 names = set(dimensions.names)
117 except AttributeError:
118 names = set(d.name for d in dimensions)
119 else:
120 if dimensions is not None:
121 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
122 names = set(names)
123 if conform:
124 # Expand given dimensions to include all dependencies.
125 for name in tuple(names): # iterate over a temporary copy so we can modify the original
126 names.update(universe[name]._recursiveDependencyNames)
127 # Look in the cache of existing graphs, with the expanded set of names.
128 cacheKey = frozenset(names)
129 self = universe._cache.get(cacheKey, None)
130 if self is not None:
131 return self
132 # This is apparently a new graph. Create it, and add it to the cache.
133 self = super().__new__(cls)
134 universe._cache[cacheKey] = self
135 self.universe = universe
136 # Reorder dimensions by iterating over the universe (which is
137 # ordered already) and extracting the ones in the set.
138 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names)
139 # Make a set that includes both the dimensions and any
140 # DimensionElements whose dependencies are in self.dimensions.
141 self.elements = NamedValueSet(e for e in universe.elements
142 if e._shouldBeInGraph(self.dimensions.names))
143 self._finish()
144 return self
146 def _finish(self):
147 """Complete construction of the graph.
149 This is intended for internal use by `DimensionGraph` and
150 `DimensionUniverse` only.
151 """
152 # Freeze the sets the constructor is responsible for populating.
153 self.dimensions.freeze()
154 self.elements.freeze()
156 # Split dependencies up into "required" and "implied" subsets.
157 # Note that a dimension may be required in one graph and implied in
158 # another.
159 self.required = NamedValueSet()
160 self.implied = NamedValueSet()
161 for i1, dim1 in enumerate(self.dimensions):
162 for i2, dim2 in enumerate(self.dimensions):
163 if dim1.name in dim2._impliedDependencyNames:
164 self.implied.add(dim1)
165 break
166 else:
167 # If no other dimension implies dim1, it's required.
168 self.required.add(dim1)
169 self.required.freeze()
170 self.implied.freeze()
172 # Compute sets of spatial and temporal elements.
173 # We keep the both sets with no redundancy resolution and those with
174 # KEEP_CHILD redundancy resolution for all elements. The latter is
175 # what is usually wanted (by e.g. ExpandedDataCoordinate), but the
176 # former is what we need to compute any other redundancy resolution
177 # on the fly.
178 self._allSpatial = NamedValueSet(element for element in self.elements if element.spatial)
179 self._allSpatial.freeze()
180 self._allTemporal = NamedValueSet(element for element in self.elements if element.temporal)
181 self._allTemporal.freeze()
182 self.spatial = _filterDependentElements(self._allSpatial, prefer=NamedValueSet())
183 self.spatial.freeze()
184 self.temporal = _filterDependentElements(self._allTemporal, prefer=NamedValueSet())
185 self.temporal.freeze()
187 # Build mappings from dimension to index; this is really for
188 # DataCoordinate, but we put it in DimensionGraph because many
189 # (many!) DataCoordinates will share the same DimensionGraph, and
190 # we want them to be lightweight.
191 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)})
192 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)})
193 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)})
195 # Compute an element traversal order that allows element records to be
196 # found given their primary keys, starting from only the primary keys
197 # of required dimensions. Unlike the table definition/topological
198 # order (which is what DimensionUniverse.sorted gives you), when
199 # dimension A implies dimension B, dimension A appears first.
200 # This is really for DimensionDatabase/ExpandedDataCoordinate, but
201 # is stored here so we don't have to recompute it for every coordinate.
202 todo = set(self.elements)
203 self._primaryKeyTraversalOrder = []
205 def addToPrimaryKeyTraversalOrder(element):
206 if element in todo:
207 self._primaryKeyTraversalOrder.append(element)
208 todo.remove(element)
209 for other in element.implied:
210 addToPrimaryKeyTraversalOrder(other)
212 for dimension in self.required:
213 addToPrimaryKeyTraversalOrder(dimension)
215 self._primaryKeyTraversalOrder.extend(todo)
217 def __getnewargs__(self) -> tuple:
218 return (self.universe, None, tuple(self.dimensions.names), False)
220 @property
221 def names(self) -> KeysView[str]:
222 """A set of the names of all dimensions in the graph (`KeysView`).
223 """
224 return self.dimensions.names
226 def __iter__(self) -> Iterator[Dimension]:
227 """Iterate over all dimensions in the graph (and true `Dimension`
228 instances only).
229 """
230 return iter(self.dimensions)
232 def __len__(self) -> int:
233 """Return the number of dimensions in the graph (and true `Dimension`
234 instances only).
235 """
236 return len(self.dimensions)
238 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
239 """Return `True` if the given element or element name is in the graph.
241 This test covers all `DimensionElement` instances in ``self.elements``,
242 not just true `Dimension` instances).
243 """
244 return element in self.elements
246 def __getitem__(self, name: str) -> DimensionElement:
247 """Return the element with the given name.
249 This lookup covers all `DimensionElement` instances in
250 ``self.elements``, not just true `Dimension` instances).
251 """
252 return self.elements[name]
254 def get(self, name: str, default: Any = None) -> DimensionElement:
255 """Return the element with the given name.
257 This lookup covers all `DimensionElement` instances in
258 ``self.elements``, not just true `Dimension` instances).
259 """
260 return self.elements.get(name, default)
262 def __str__(self) -> str:
263 return str(self.dimensions)
265 def __repr__(self) -> str:
266 return f"DimensionGraph({str(self)})"
268 def isdisjoint(self, other: DimensionGraph) -> bool:
269 """Test whether the intersection of two graphs is empty.
271 Returns `True` if either operand is the empty.
272 """
273 return self.dimensions.isdisjoint(other.dimensions)
275 def issubset(self, other: DimensionGraph) -> bool:
276 """Test whether all dimensions in ``self`` are also in ``other``.
278 Returns `True` if ``self`` is empty.
279 """
280 return self.dimensions.issubset(other.dimensions)
282 def issuperset(self, other: DimensionGraph) -> bool:
283 """Test whether all dimensions in ``other`` are also in ``self``.
285 Returns `True` if ``other`` is empty.
286 """
287 return self.dimensions.issuperset(other.dimensions)
289 def __eq__(self, other: DimensionGraph) -> bool:
290 """Test whether ``self`` and ``other`` have exactly the same dimensions
291 and elements.
292 """
293 return self.dimensions == other.dimensions
295 def __hash__(self) -> int:
296 return hash(tuple(self.dimensions.names))
298 def __le__(self, other: DimensionGraph) -> bool:
299 """Test whether ``self`` is a subset of ``other``.
300 """
301 return self.dimensions <= other.dimensions
303 def __ge__(self, other: DimensionGraph) -> bool:
304 """Test whether ``self`` is a superset of ``other``.
305 """
306 return self.dimensions >= other.dimensions
308 def __lt__(self, other: DimensionGraph) -> bool:
309 """Test whether ``self`` is a strict subset of ``other``.
310 """
311 return self.dimensions < other.dimensions
313 def __gt__(self, other: DimensionGraph) -> bool:
314 """Test whether ``self`` is a strict superset of ``other``.
315 """
316 return self.dimensions > other.dimensions
318 def union(self, *others: DimensionGraph):
319 """Construct a new graph containing all dimensions in any of the
320 operands.
322 The elements of the returned graph may exceed the naive union of
323 their elements, as some `DimensionElement` instances are included
324 in graphs whenever multiple dimensions are present, and those
325 dependency dimensions could have been provided by different operands.
326 """
327 names = set(self.names).union(*[other.names for other in others])
328 return DimensionGraph(self.universe, names=names)
330 def intersection(self, *others: DimensionGraph):
331 """Construct a new graph containing only dimensions in all of the
332 operands.
333 """
334 names = set(self.names).intersection(*[other.names for other in others])
335 return DimensionGraph(self.universe, names=names)
337 def __or__(self, other):
338 """Construct a new graph containing all dimensions in any of the
339 operands.
341 See `union`.
342 """
343 return self.union(other)
345 def __and__(self, other):
346 """Construct a new graph containing only dimensions in all of the
347 operands.
348 """
349 return self.intersection(other)
351 def getSpatial(self, *, independent: bool = True,
352 prefer: Optional[Iterable[DimensionElement]] = None
353 ) -> NamedValueSet[DimensionElement]:
354 """Return the elements that are associated with spatial regions,
355 possibly with some filtering.
357 Parameters
358 ----------
359 independent : `bool`
360 If `True` (default) ensure that all returned elements are
361 independent of each other, by resolving any dependencies between
362 spatial elements in favor of the dependent one (which is the one
363 with the smaller, more precise region). A graph that includes both
364 "tract" and "patch", for example, would have only "patch" returned
365 here if ``independent`` is `True`. If `False`, all spatial
366 elements are returned.
367 prefer : iterable of `DimensionElement`
368 Elements that should be returned instead of their dependents when
369 ``independent`` is `True` (ignored if ``independent`` is `False`).
370 For example, passing ``prefer=[tract]`` to a graph with both
371 "tract" and "patch" would result in only "tract" being returned.
373 Returns
374 -------
375 spatial : `NamedValueSet` of `DimensionElement`
376 Elements that have `DimensionElement.spatial` `True`, filtered
377 as specified by the arguments.
378 """
379 if not independent:
380 return self._allSpatial
381 elif prefer is None:
382 return self.spatial
383 else:
384 return _filterDependentElements(self._allSpatial,
385 prefer=NamedValueSet(self.elements[p] for p in prefer))
387 def getTemporal(self, *, independent: bool = True,
388 prefer: Optional[Iterable[DimensionElement]] = None
389 ) -> NamedValueSet[DimensionElement]:
390 """Return the elements that are associated with a timespan,
391 possibly with some filtering.
393 Parameters
394 ----------
395 independent : `bool`
396 If `True` (default) ensure that all returned elements are
397 independent of each other, by resolving any dependencies between
398 spatial elements in favor of the dependent one (which is the one
399 with the smaller, more precise timespans).
400 prefer : iterable of `DimensionElement`
401 Elements that should be returned instead of their dependents when
402 ``independent`` is `True` (ignored if ``independent`` is `False`).
404 Returns
405 -------
406 temporal : `NamedValueSet` of `DimensionElement`
407 Elements that have `DimensionElement.temporal` `True`, filtered
408 as specified by the arguments.
409 """
410 if not independent:
411 return self._allTemporal
412 elif prefer is None:
413 return self.temporal
414 else:
415 return _filterDependentElements(self._allTemporal,
416 prefer=NamedValueSet(self.elements[p] for p in prefer))
418 # Class attributes below are shadowed by instance attributes, and are
419 # present just to hold the docstrings for those instance attributes.
421 universe: DimensionUniverse
422 """The set of all known dimensions, of which this graph is a subset
423 (`DimensionUniverse`).
424 """
426 dimensions: NamedValueSet[Dimension]
427 """A true `~collections.abc.Set` of all true `Dimension` instances in the
428 graph (`NamedValueSet` of `Dimension`).
430 This is the set used for iteration, ``len()``, and most set-like operations
431 on `DimensionGraph` itself.
432 """
434 elements: NamedValueSet[DimensionElement]
435 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
436 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`).
438 This is the set used for dict-like lookups, including the ``in`` operator,
439 on `DimensionGraph` itself.
440 """
442 required: NamedValueSet[Dimension]
443 """The subset of `dimensions` whose elments must be directly identified via
444 their primary keys in a data ID in order to identify the rest of the
445 elements in the graph (`NamedValueSet` of `Dimension`).
446 """
448 implied: NamedValueSet[Dimension]
449 """The subset of `dimensions` whose elements need not be directly
450 identified via their primary keys in a data ID (`NamedValueSet` of
451 `Dimension`).
452 """
454 spatial: NamedValueSet[DimensionElement]
455 """Elements that are associated with independent spatial regions
456 (`NamedValueSet` of `DimensionElement`).
458 The default filtering described in `getSpatial` is applied.
459 """
461 temporal: NamedValueSet[DimensionElement]
462 """Elements that are associated with independent spatial regions
463 (`NamedValueSet` of `DimensionElement`).
465 The default filtering described in `getTemporal` is applied.
466 """