Coverage for python/lsst/daf/butler/core/dimensions/graph.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DimensionGraph"]
26import itertools
27from typing import (
28 Any,
29 Dict,
30 Iterable,
31 Iterator,
32 KeysView,
33 Optional,
34 Set,
35 Tuple,
36 TYPE_CHECKING,
37 Union,
38)
40from ..named import NamedValueSet
41from ..utils import immutable
43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from .universe import DimensionUniverse
45 from .elements import DimensionElement, Dimension
48@immutable
49class DimensionGraph:
50 """An immutable, dependency-complete collection of dimensions.
52 `DimensionGraph` behaves in many respects like a set of `Dimension`
53 instances that maintains several special subsets and supersets of
54 related `DimensionElement` instances. It does not fully implement the
55 `collections.abc.Set` interface, as its automatic expansion of dependencies
56 would make set difference and XOR operations behave surprisingly.
58 It also provides dict-like lookup of `DimensionElement` instances from
59 their names.
61 Parameters
62 ----------
63 universe : `DimensionUniverse`
64 The special graph of all known dimensions of which this graph will be
65 a subset.
66 dimensions : iterable of `Dimension`, optional
67 An iterable of `Dimension` instances that must be included in the
68 graph. All (recursive) dependencies of these dimensions will also
69 be included. At most one of ``dimensions`` and ``names`` must be
70 provided.
71 names : iterable of `str`, optional
72 An iterable of the names of dimensiosn that must be included in the
73 graph. All (recursive) dependencies of these dimensions will also
74 be included. At most one of ``dimensions`` and ``names`` must be
75 provided.
76 conform : `bool`, optional
77 If `True` (default), expand to include dependencies. `False` should
78 only be used for callers that can guarantee that other arguments are
79 already correctly expanded, and is primarily for internal use.
81 Notes
82 -----
83 `DimensionGraph` should be used instead of other collections in any context
84 where a collection of dimensions is required and a `DimensionUniverse` is
85 available.
87 While `DimensionUniverse` inherits from `DimensionGraph`, it should
88 otherwise not be used as a base class.
89 """
91 def __new__(cls, universe: DimensionUniverse,
92 dimensions: Optional[Iterable[Dimension]] = None,
93 names: Optional[Iterable[str]] = None,
94 conform: bool = True) -> DimensionGraph:
95 conformedNames: Set[str]
96 if names is None:
97 if dimensions is None:
98 conformedNames = set()
99 else:
100 try:
101 # Optimize for NamedValueSet/NamedKeyDict, though that's
102 # not required.
103 conformedNames = set(dimensions.names) # type: ignore
104 except AttributeError:
105 conformedNames = set(d.name for d in dimensions)
106 else:
107 if dimensions is not None:
108 raise TypeError("Only one of 'dimensions' and 'names' may be provided.")
109 conformedNames = set(names)
110 if conform:
111 # Expand given dimensions to include all dependencies.
112 for name in tuple(conformedNames): # iterate over a temporary copy so we can modify the original
113 conformedNames.update(universe[name]._related.dependencies)
114 # Look in the cache of existing graphs, with the expanded set of names.
115 cacheKey = frozenset(conformedNames)
116 self = universe._cache.get(cacheKey, None)
117 if self is not None:
118 return self
119 # This is apparently a new graph. Create it, and add it to the cache.
120 self = super().__new__(cls)
121 universe._cache[cacheKey] = self
122 self.universe = universe
123 # Reorder dimensions by iterating over the universe (which is
124 # ordered already) and extracting the ones in the set.
125 self.dimensions = NamedValueSet(universe.sorted(conformedNames))
126 # Make a set that includes both the dimensions and any
127 # DimensionElements whose dependencies are in self.dimensions.
128 self.elements = NamedValueSet(e for e in universe.getStaticElements()
129 if e._shouldBeInGraph(self.dimensions.names))
130 self._finish()
131 return self
133 def _finish(self) -> None:
134 """Complete construction of the graph.
136 This is intended for internal use by `DimensionGraph` and
137 `DimensionUniverse` only.
138 """
139 # Freeze the sets the constructor is responsible for populating.
140 self.dimensions.freeze()
141 self.elements.freeze()
143 # Split dependencies up into "required" and "implied" subsets.
144 # Note that a dimension may be required in one graph and implied in
145 # another.
146 self.required = NamedValueSet()
147 self.implied = NamedValueSet()
148 for i1, dim1 in enumerate(self.dimensions):
149 for i2, dim2 in enumerate(self.dimensions):
150 if dim1.name in dim2._related.implied:
151 self.implied.add(dim1)
152 break
153 else:
154 # If no other dimension implies dim1, it's required.
155 self.required.add(dim1)
156 self.required.freeze()
157 self.implied.freeze()
159 # Compute sets of spatial and temporal elements.
160 # This contain the values of the `.spatial` and `.temporal` attributes
161 # of all elements, unless those attributes are not in the graph.
162 # In that case, the element whose attribute is not in the graph is
163 # added instead. This ensures that these sets contain the
164 # most-specific spatial and temporal elements, not the summary elements
165 # that aggregate them, unless the summaries are all that we have.
166 self.spatial = NamedValueSet()
167 self.temporal = NamedValueSet()
168 for element in self.elements:
169 if element.spatial is not None:
170 if element.spatial in self.elements:
171 self.spatial.add(element.spatial)
172 else:
173 self.spatial.add(element)
174 if element.temporal is not None:
175 if element.temporal in self.elements:
176 self.temporal.add(element.temporal)
177 else:
178 self.temporal.add(element)
179 self.spatial.freeze()
180 self.temporal.freeze()
182 # Build mappings from dimension to index; this is really for
183 # DataCoordinate, but we put it in DimensionGraph because many
184 # (many!) DataCoordinates will share the same DimensionGraph, and
185 # we want them to be lightweight. The order here is what's convenient
186 # for DataCoordinate: all required dimensions before all implied
187 # dimensions.
188 self._dataCoordinateIndices: Dict[str, int] = {
189 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names))
190 }
192 def __getnewargs__(self) -> tuple:
193 return (self.universe, None, tuple(self.dimensions.names), False)
195 @property
196 def names(self) -> KeysView[str]:
197 """A set of the names of all dimensions in the graph (`KeysView`).
198 """
199 return self.dimensions.names
201 def __iter__(self) -> Iterator[Dimension]:
202 """Iterate over all dimensions in the graph (and true `Dimension`
203 instances only).
204 """
205 return iter(self.dimensions)
207 def __len__(self) -> int:
208 """Return the number of dimensions in the graph (and true `Dimension`
209 instances only).
210 """
211 return len(self.dimensions)
213 def __contains__(self, element: Union[str, DimensionElement]) -> bool:
214 """Return `True` if the given element or element name is in the graph.
216 This test covers all `DimensionElement` instances in ``self.elements``,
217 not just true `Dimension` instances).
218 """
219 return element in self.elements
221 def __getitem__(self, name: str) -> DimensionElement:
222 """Return the element with the given name.
224 This lookup covers all `DimensionElement` instances in
225 ``self.elements``, not just true `Dimension` instances).
226 """
227 return self.elements[name]
229 def get(self, name: str, default: Any = None) -> DimensionElement:
230 """Return the element with the given name.
232 This lookup covers all `DimensionElement` instances in
233 ``self.elements``, not just true `Dimension` instances).
234 """
235 return self.elements.get(name, default)
237 def __str__(self) -> str:
238 return str(self.dimensions)
240 def __repr__(self) -> str:
241 return f"DimensionGraph({str(self)})"
243 @classmethod
244 def decode(cls, encoded: bytes, *, universe: DimensionUniverse) -> DimensionGraph:
245 """Construct a `DimensionGraph` from its encoded representation.
247 Parameters
248 ----------
249 encoded : `bytes`
250 Byte string produced by `DimensionGraph.encode`.
251 universe : `DimensionUniverse`
252 Universe the new graph is a part of. Must have the same dimensions
253 as the original universe.
255 Returns
256 -------
257 graph : `DimensionGraph`
258 A new (or possibly cached) `DimensionGraph` instance matching the
259 given encoding.
260 """
261 dimensions = []
262 mask = int.from_bytes(encoded, "big")
263 for dimension in universe.getStaticDimensions():
264 index = universe.getDimensionIndex(dimension.name)
265 if mask & (1 << index):
266 dimensions.append(dimension)
267 return cls(universe, dimensions=dimensions, conform=False)
269 def encode(self) -> bytes:
270 """Encode a `DimensionGraph` into a byte string.
272 Returns
273 -------
274 encoded : `bytes`
275 Encoded representation of the graph. Length is guaranteed to be
276 equal to `DimensionUniverse.getEncodeLength`.
277 """
278 mask = 0
279 for dimension in self.dimensions:
280 index = self.universe.getDimensionIndex(dimension.name)
281 mask |= (1 << index)
282 return mask.to_bytes(self.universe.getEncodeLength(), byteorder="big")
284 def isdisjoint(self, other: DimensionGraph) -> bool:
285 """Test whether the intersection of two graphs is empty.
287 Returns `True` if either operand is the empty.
288 """
289 return self.dimensions.isdisjoint(other.dimensions)
291 def issubset(self, other: DimensionGraph) -> bool:
292 """Test whether all dimensions in ``self`` are also in ``other``.
294 Returns `True` if ``self`` is empty.
295 """
296 return self.dimensions.issubset(other.dimensions)
298 def issuperset(self, other: DimensionGraph) -> bool:
299 """Test whether all dimensions in ``other`` are also in ``self``.
301 Returns `True` if ``other`` is empty.
302 """
303 return self.dimensions.issuperset(other.dimensions)
305 def __eq__(self, other: Any) -> bool:
306 """Test whether ``self`` and ``other`` have exactly the same dimensions
307 and elements.
308 """
309 if isinstance(other, DimensionGraph):
310 return self.dimensions == other.dimensions
311 else:
312 return False
314 def __hash__(self) -> int:
315 return hash(tuple(self.dimensions.names))
317 def __le__(self, other: DimensionGraph) -> bool:
318 """Test whether ``self`` is a subset of ``other``.
319 """
320 return self.dimensions <= other.dimensions
322 def __ge__(self, other: DimensionGraph) -> bool:
323 """Test whether ``self`` is a superset of ``other``.
324 """
325 return self.dimensions >= other.dimensions
327 def __lt__(self, other: DimensionGraph) -> bool:
328 """Test whether ``self`` is a strict subset of ``other``.
329 """
330 return self.dimensions < other.dimensions
332 def __gt__(self, other: DimensionGraph) -> bool:
333 """Test whether ``self`` is a strict superset of ``other``.
334 """
335 return self.dimensions > other.dimensions
337 def union(self, *others: DimensionGraph) -> DimensionGraph:
338 """Construct a new graph containing all dimensions in any of the
339 operands.
341 The elements of the returned graph may exceed the naive union of
342 their elements, as some `DimensionElement` instances are included
343 in graphs whenever multiple dimensions are present, and those
344 dependency dimensions could have been provided by different operands.
345 """
346 names = set(self.names).union(*[other.names for other in others])
347 return DimensionGraph(self.universe, names=names)
349 def intersection(self, *others: DimensionGraph) -> DimensionGraph:
350 """Construct a new graph containing only dimensions in all of the
351 operands.
352 """
353 names = set(self.names).intersection(*[other.names for other in others])
354 return DimensionGraph(self.universe, names=names)
356 def __or__(self, other: DimensionGraph) -> DimensionGraph:
357 """Construct a new graph containing all dimensions in any of the
358 operands.
360 See `union`.
361 """
362 return self.union(other)
364 def __and__(self, other: DimensionGraph) -> DimensionGraph:
365 """Construct a new graph containing only dimensions in all of the
366 operands.
367 """
368 return self.intersection(other)
370 @property
371 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]:
372 """Return a tuple of all elements in an order allows records to be
373 found given their primary keys, starting from only the primary keys of
374 required dimensions (`tuple` [ `DimensionRecord` ]).
376 Unlike the table definition/topological order (which is what
377 DimensionUniverse.sorted gives you), when dimension A implies
378 dimension B, dimension A appears first.
379 """
380 order = getattr(self, "_primaryKeyTraversalOrder", None)
381 if order is None:
382 done: Set[str] = set()
383 order = []
385 def addToOrder(element: DimensionElement) -> None:
386 if element.name in done:
387 return
388 predecessors = set(element.required.names)
389 predecessors.discard(element.name)
390 if not done.issuperset(predecessors):
391 return
392 order.append(element)
393 done.add(element.name)
394 for other in element.implied:
395 addToOrder(other)
397 while not done.issuperset(self.required):
398 for dimension in self.required:
399 addToOrder(dimension)
401 order.extend(element for element in self.elements if element.name not in done)
402 order = tuple(order)
403 self._primaryKeyTraversalOrder = order
404 return order
406 # Class attributes below are shadowed by instance attributes, and are
407 # present just to hold the docstrings for those instance attributes.
409 universe: DimensionUniverse
410 """The set of all known dimensions, of which this graph is a subset
411 (`DimensionUniverse`).
412 """
414 dimensions: NamedValueSet[Dimension]
415 """A true `~collections.abc.Set` of all true `Dimension` instances in the
416 graph (`NamedValueSet` of `Dimension`).
418 This is the set used for iteration, ``len()``, and most set-like operations
419 on `DimensionGraph` itself.
420 """
422 elements: NamedValueSet[DimensionElement]
423 """A true `~collections.abc.Set` of all `DimensionElement` instances in the
424 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`).
426 This is the set used for dict-like lookups, including the ``in`` operator,
427 on `DimensionGraph` itself.
428 """
430 required: NamedValueSet[Dimension]
431 """The subset of `dimensions` whose elments must be directly identified via
432 their primary keys in a data ID in order to identify the rest of the
433 elements in the graph (`NamedValueSet` of `Dimension`).
434 """
436 implied: NamedValueSet[Dimension]
437 """The subset of `dimensions` whose elements need not be directly
438 identified via their primary keys in a data ID (`NamedValueSet` of
439 `Dimension`).
440 """
442 spatial: NamedValueSet[DimensionElement]
443 """Elements that are associated with independent spatial regions
444 (`NamedValueSet` of `DimensionElement`).
445 """
447 temporal: NamedValueSet[DimensionElement]
448 """Elements that are associated with independent spatial regions
449 (`NamedValueSet` of `DimensionElement`).
450 """