Coverage for python/lsst/daf/butler/core/dimensions/graph.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, Tuple, TYPE_CHECKING
28from ..utils import NamedValueSet, NamedKeyDict, immutable
30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from .universe import DimensionUniverse
32 from .elements import DimensionElement, Dimension
35@immutable
36class DimensionGraph:
37 """An immutable, dependency-complete collection of dimensions.
39 `DimensionGraph` behaves in many respects like a set of `Dimension`
40 instances that maintains several special subsets and supersets of
41 related `DimensionElement` instances. It does not fully implement the
42 `collections.abc.Set` interface, as its automatic expansion of dependencies
43 would make set difference and XOR operations behave surprisingly.
45 It also provides dict-like lookup of `DimensionElement` instances from
46 their names.
48 Parameters
49 ----------
50 universe : `DimensionUniverse`
51 The special graph of all known dimensions of which this graph will be
52 a subset.
53 dimensions : iterable of `Dimension`, optional
54 An iterable of `Dimension` instances that must be included in the
55 graph. All (recursive) dependencies of these dimensions will also
56 be included. At most one of ``dimensions`` and ``names`` must be
57 provided.
58 names : iterable of `str`, optional
59 An iterable of the names of dimensiosn that must be included in the
60 graph. All (recursive) dependencies of these dimensions will also
61 be included. At most one of ``dimensions`` and ``names`` must be
62 provided.
63 conform : `bool`, optional
64 If `True` (default), expand to include dependencies. `False` should
65 only be used for callers that can guarantee that other arguments are
66 already correctly expanded, and is primarily for internal use.
68 Notes
69 -----
70 `DimensionGraph` should be used instead of other collections in any context
71 where a collection of dimensions is required and a `DimensionUniverse` is
72 available.
74 While `DimensionUniverse` inherits from `DimensionGraph`, it should
75 otherwise not be used as a base class.
76 """
78 def __new__(cls, universe: DimensionUniverse,
79 dimensions: Optional[Iterable[Dimension]] = None,
80 names: Optional[Iterable[str]] = None,
81 conform: bool = True) -> DimensionGraph:
82 if names is None:
83 if dimensions is None:
84 names = ()
85 else:
86 try:
87 names = set(dimensions.names)
88 except AttributeError:
89 names = set(d.name for d in dimensions)
90 else:
91 if dimensions is not None:
92 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
93 names = set(names)
94 if conform:
95 # Expand given dimensions to include all dependencies.
96 for name in tuple(names): # iterate over a temporary copy so we can modify the original
97 names.update(universe[name]._related.dependencies)
98 # Look in the cache of existing graphs, with the expanded set of names.
99 cacheKey = frozenset(names)
100 self = universe._cache.get(cacheKey, None)
101 if self is not None:
102 return self
103 # This is apparently a new graph. Create it, and add it to the cache.
104 self = super().__new__(cls)
105 universe._cache[cacheKey] = self
106 self.universe = universe
107 # Reorder dimensions by iterating over the universe (which is
108 # ordered already) and extracting the ones in the set.
109 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names)
110 # Make a set that includes both the dimensions and any
111 # DimensionElements whose dependencies are in self.dimensions.
112 self.elements = NamedValueSet(e for e in universe.elements
113 if e._shouldBeInGraph(self.dimensions.names))
114 self._finish()
115 return self
117 def _finish(self):
118 """Complete construction of the graph.
120 This is intended for internal use by `DimensionGraph` and
121 `DimensionUniverse` only.
122 """
123 # Freeze the sets the constructor is responsible for populating.
124 self.dimensions.freeze()
125 self.elements.freeze()
127 # Split dependencies up into "required" and "implied" subsets.
128 # Note that a dimension may be required in one graph and implied in
129 # another.
130 self.required = NamedValueSet()
131 self.implied = NamedValueSet()
132 for i1, dim1 in enumerate(self.dimensions):
133 for i2, dim2 in enumerate(self.dimensions):
134 if dim1.name in dim2._related.implied:
135 self.implied.add(dim1)
136 break
137 else:
138 # If no other dimension implies dim1, it's required.
139 self.required.add(dim1)
140 self.required.freeze()
141 self.implied.freeze()
143 # Compute sets of spatial and temporal elements.
144 # This contain the values of the `.spatial` and `.temporal` attributes
145 # of all elements, unless those attributes are not in the graph.
146 # In that case, the element whose attribute is not in the graph is
147 # added instead. This ensures that these sets contain the
148 # most-specific spatial and temporal elements, not the summary elements
149 # that aggregate them, unless the summaries are all that we have.
150 self.spatial = NamedValueSet()
151 self.temporal = NamedValueSet()
152 for element in self.elements:
153 if element.spatial is not None:
154 if element.spatial in self.elements:
155 self.spatial.add(element.spatial)
156 else:
157 self.spatial.add(element)
158 if element.temporal is not None:
159 if element.temporal in self.elements:
160 self.temporal.add(element.temporal)
161 else:
162 self.temporal.add(element)
163 self.spatial.freeze()
164 self.temporal.freeze()
166 # Build mappings from dimension to index; this is really for
167 # DataCoordinate, but we put it in DimensionGraph because many
168 # (many!) DataCoordinates will share the same DimensionGraph, and
169 # we want them to be lightweight.
170 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)})
171 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)})
172 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)})
174 def __getnewargs__(self) -> tuple:
175 return (self.universe, None, tuple(self.dimensions.names), False)
177 @property
178 def names(self) -> KeysView[str]:
179 """A set of the names of all dimensions in the graph (`KeysView`).
180 """
181 return self.dimensions.names
183 def __iter__(self) -> Iterator[Dimension]:
184 """Iterate over all dimensions in the graph (and true `Dimension`
185 instances only).
186 """
187 return iter(self.dimensions)
189 def __len__(self) -> int:
190 """Return the number of dimensions in the graph (and true `Dimension`
191 instances only).
192 """
193 return len(self.dimensions)
195 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
196 """Return `True` if the given element or element name is in the graph.
198 This test covers all `DimensionElement` instances in ``self.elements``,
199 not just true `Dimension` instances).
200 """
201 return element in self.elements
203 def __getitem__(self, name: str) -> DimensionElement:
204 """Return the element with the given name.
206 This lookup covers all `DimensionElement` instances in
207 ``self.elements``, not just true `Dimension` instances).
208 """
209 return self.elements[name]
211 def get(self, name: str, default: Any = None) -> DimensionElement:
212 """Return the element with the given name.
214 This lookup covers all `DimensionElement` instances in
215 ``self.elements``, not just true `Dimension` instances).
216 """
217 return self.elements.get(name, default)
219 def __str__(self) -> str:
220 return str(self.dimensions)
222 def __repr__(self) -> str:
223 return f"DimensionGraph({str(self)})"
225 def isdisjoint(self, other: DimensionGraph) -> bool:
226 """Test whether the intersection of two graphs is empty.
228 Returns `True` if either operand is the empty.
229 """
230 return self.dimensions.isdisjoint(other.dimensions)
232 def issubset(self, other: DimensionGraph) -> bool:
233 """Test whether all dimensions in ``self`` are also in ``other``.
235 Returns `True` if ``self`` is empty.
236 """
237 return self.dimensions.issubset(other.dimensions)
239 def issuperset(self, other: DimensionGraph) -> bool:
240 """Test whether all dimensions in ``other`` are also in ``self``.
242 Returns `True` if ``other`` is empty.
243 """
244 return self.dimensions.issuperset(other.dimensions)
246 def __eq__(self, other: DimensionGraph) -> bool:
247 """Test whether ``self`` and ``other`` have exactly the same dimensions
248 and elements.
249 """
250 return self.dimensions == other.dimensions
252 def __hash__(self) -> int:
253 return hash(tuple(self.dimensions.names))
255 def __le__(self, other: DimensionGraph) -> bool:
256 """Test whether ``self`` is a subset of ``other``.
257 """
258 return self.dimensions <= other.dimensions
260 def __ge__(self, other: DimensionGraph) -> bool:
261 """Test whether ``self`` is a superset of ``other``.
262 """
263 return self.dimensions >= other.dimensions
265 def __lt__(self, other: DimensionGraph) -> bool:
266 """Test whether ``self`` is a strict subset of ``other``.
267 """
268 return self.dimensions < other.dimensions
270 def __gt__(self, other: DimensionGraph) -> bool:
271 """Test whether ``self`` is a strict superset of ``other``.
272 """
273 return self.dimensions > other.dimensions
275 def union(self, *others: DimensionGraph):
276 """Construct a new graph containing all dimensions in any of the
277 operands.
279 The elements of the returned graph may exceed the naive union of
280 their elements, as some `DimensionElement` instances are included
281 in graphs whenever multiple dimensions are present, and those
282 dependency dimensions could have been provided by different operands.
283 """
284 names = set(self.names).union(*[other.names for other in others])
285 return DimensionGraph(self.universe, names=names)
287 def intersection(self, *others: DimensionGraph):
288 """Construct a new graph containing only dimensions in all of the
289 operands.
290 """
291 names = set(self.names).intersection(*[other.names for other in others])
292 return DimensionGraph(self.universe, names=names)
294 def __or__(self, other):
295 """Construct a new graph containing all dimensions in any of the
296 operands.
298 See `union`.
299 """
300 return self.union(other)
302 def __and__(self, other):
303 """Construct a new graph containing only dimensions in all of the
304 operands.
305 """
306 return self.intersection(other)
308 @property
309 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement]:
310 """Return a tuple of all elements in an order allows records to be
311 found given their primary keys, starting from only the primary keys of
312 required dimensions (`tuple` [ `DimensionRecord` ]).
314 Unlike the table definition/topological order (which is what
315 DimensionUniverse.sorted gives you), when dimension A implies
316 dimension B, dimension A appears first.
317 """
318 order = getattr(self, "_primaryKeyTraversalOrder", None)
319 if order is None:
320 done = set()
321 order = []
323 def addToOrder(element) -> bool:
324 if element.name in done:
325 return
326 predecessors = set(element.required.names)
327 predecessors.discard(element.name)
328 if not done.issuperset(predecessors):
329 return
330 order.append(element)
331 done.add(element)
332 for other in element.implied:
333 addToOrder(other)
335 while not done.issuperset(self.required):
336 for dimension in self.required:
337 addToOrder(dimension)
339 order.extend(element for element in self.elements if element.name not in done)
340 order = tuple(order)
341 self._primaryKeyTraversalOrder = order
342 return order
344 # Class attributes below are shadowed by instance attributes, and are
345 # present just to hold the docstrings for those instance attributes.
347 universe: DimensionUniverse
348 """The set of all known dimensions, of which this graph is a subset
349 (`DimensionUniverse`).
350 """
352 dimensions: NamedValueSet[Dimension]
353 """A true `~collections.abc.Set` of all true `Dimension` instances in the
354 graph (`NamedValueSet` of `Dimension`).
356 This is the set used for iteration, ``len()``, and most set-like operations
357 on `DimensionGraph` itself.
358 """
360 elements: NamedValueSet[DimensionElement]
361 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
362 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`).
364 This is the set used for dict-like lookups, including the ``in`` operator,
365 on `DimensionGraph` itself.
366 """
368 required: NamedValueSet[Dimension]
369 """The subset of `dimensions` whose elments must be directly identified via
370 their primary keys in a data ID in order to identify the rest of the
371 elements in the graph (`NamedValueSet` of `Dimension`).
372 """
374 implied: NamedValueSet[Dimension]
375 """The subset of `dimensions` whose elements need not be directly
376 identified via their primary keys in a data ID (`NamedValueSet` of
377 `Dimension`).
378 """
380 spatial: NamedValueSet[DimensionElement]
381 """Elements that are associated with independent spatial regions
382 (`NamedValueSet` of `DimensionElement`).
383 """
385 temporal: NamedValueSet[DimensionElement]
386 """Elements that are associated with independent spatial regions
387 (`NamedValueSet` of `DimensionElement`).
388 """