Coverage for python/lsst/daf/butler/core/dimensions/coordinate.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId")
26from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING
28from lsst.sphgeom import Region
29from ..utils import IndexedTupleDict, immutable
30from ..timespan import Timespan
31from .graph import DimensionGraph
33if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 from .elements import DimensionElement, Dimension
35 from .universe import DimensionUniverse
36 from .records import DimensionRecord
39@immutable
40class DataCoordinate(IndexedTupleDict):
41 """An immutable data ID dictionary that guarantees that its key-value pairs
42 identify all required dimensions in a `DimensionGraph`.
44 `DataCoordinate` instances should usually be constructed via the
45 `standardize` class method; the constructor is reserved for callers that
46 can guarantee that the ``values`` tuple has exactly the right elements.
48 Parameters
49 ----------
50 graph : `DimensionGraph`
51 The dimensions identified by this instance.
52 values : `tuple`
53 Tuple of primary key values for the given dimensions.
55 Notes
56 -----
57 Like any data ID class, `DataCoordinate` behaves like a dictionary,
58 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`,
59 both `Dimension` instances and `str` names thereof may be used as keys in
60 lookup operations.
62 Subclasses are permitted to support lookup for any dimension in
63 ``self.graph.dimensions``, but the base class only supports lookup for
64 those in ``self.graph.required``, which is the minimal set needed to
65 identify all others in a `Registry`. Both the base class and subclasses
66 define comparisons, iterators, and the `keys`, `values`, and `items` views
67 to just the ``self.graph.required`` subset in order to guarantee true
68 (i.e. Liskov) substitutability.
69 """
71 __slots__ = ("graph",)
73 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]):
74 self = super().__new__(cls, graph._requiredIndices, values)
75 self.graph = graph
76 return self
78 @staticmethod
79 def standardize(mapping: Optional[Mapping[str, Any]] = None, *,
80 graph: Optional[DimensionGraph] = None,
81 universe: Optional[DimensionUniverse] = None,
82 **kwds) -> DataCoordinate:
83 """Adapt an arbitrary mapping and/or additional arguments into a true
84 `DataCoordinate`, or augment an existing one.
86 Parameters
87 ----------
88 mapping : `~collections.abc.Mapping`, optional
89 An informal data ID that maps dimension names to their primary key
90 values (may also be a true `DataCoordinate`).
91 graph : `DimensionGraph`
92 The dimensions to be identified by the new `DataCoordinate`.
93 If not provided, will be inferred from the keys of ``mapping``,
94 and ``universe`` must be provided unless ``mapping`` is already a
95 `DataCoordinate`.
96 universe : `DimensionUniverse`
97 All known dimensions and their relationships; used to expand
98 and validate dependencies when ``graph`` is not provided.
99 kwds
100 Additional keyword arguments are treated like additional key-value
101 pairs in ``mapping``.
103 Returns
104 -------
105 coordinate : `DataCoordinate`
106 A validated `DataCoordinate` instance. May be a subclass instance
107 if and only if ``mapping`` is a subclass instance and ``graph``
108 is a subset of ``mapping.graph``.
110 Raises
111 ------
112 TypeError
113 Raised if the set of optional arguments provided is not supported.
114 KeyError
115 Raised if a key-value pair for a required dimension is missing.
117 Notes
118 -----
119 Because `DataCoordinate` stores only values for required dimensions,
120 key-value pairs for other related dimensions will be ignored and
121 excluded from the result. This means that a `DataCoordinate` may
122 contain *fewer* key-value pairs than the informal data ID dictionary
123 it was constructed from.
124 """
125 if isinstance(mapping, DataCoordinate):
126 if graph is None:
127 if not kwds:
128 # Already standardized to exactly what we want.
129 return mapping
130 elif mapping.graph.issuperset(graph):
131 # Already standardized; just return the relevant subset.
132 return mapping.subset(graph)
133 assert universe is None or universe == mapping.universe
134 universe = mapping.universe
135 if kwds:
136 if mapping:
137 try:
138 d = dict(mapping.byName(), **kwds)
139 except AttributeError:
140 d = dict(mapping, **kwds)
141 else:
142 d = kwds
143 elif mapping:
144 try:
145 d = mapping.byName()
146 except AttributeError:
147 d = mapping
148 else:
149 d = {}
150 if graph is None:
151 if universe is None:
152 raise TypeError("universe must be provided if graph is not.")
153 graph = DimensionGraph(universe, names=d.keys())
154 try:
155 values = tuple(d[name] for name in graph.required.names)
156 except KeyError as err:
157 raise KeyError(f"No value in data ID for required dimension {err}.") from err
158 return DataCoordinate(graph, values)
160 def byName(self) -> Dict[str, Any]:
161 """Return a true `dict` keyed by `str` dimension name and the same
162 values as ``self``.
163 """
164 return {k.name: v for k, v in self.items()}
166 def __getnewargs__(self) -> tuple:
167 # Implements pickle support (in addition to methods provided by
168 # @immutable decorator).
169 return (self.graph, self.values())
171 def __hash__(self) -> int:
172 return hash((self.graph, self.values()))
174 def __eq__(self, other: DataCoordinate) -> bool:
175 try:
176 # Optimized code path for DataCoordinate comparisons.
177 return self.graph == other.graph and self.values() == other.values()
178 except AttributeError:
179 # Also support comparison with informal data ID dictionaries that
180 # map dimension name to value.
181 return self.byName() == other
183 def __str__(self):
184 return f"{self.byName()}"
186 def __repr__(self):
187 return f"DataCoordinate({self.graph}, {self.values()})"
189 def fingerprint(self, update):
190 """Update a secure hash function with the values in this data ID.
192 Parameters
193 ----------
194 update : `~collections.abc.Callable`
195 Callable that accepts a single `bytes` argument to update
196 the hash; usually the ``update`` method of an instance from
197 the ``hashlib`` module.
198 """
199 for k, v in self.items():
200 update(k.name.encode("utf8"))
201 if isinstance(v, int):
202 update(v.to_bytes(64, "big", signed=False))
203 elif isinstance(v, str):
204 update(v.encode("utf8"))
205 else:
206 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).")
208 def matches(self, other: DataCoordinate) -> bool:
209 """Test whether the values of all keys in both coordinates are equal.
211 Parameters
212 ----------
213 other : `DataCoordinate`
214 The other coordinate to compare to.
216 Returns
217 -------
218 consistent : `bool`
219 `True` if all keys that are in in both ``other`` and ``self``
220 are associated with the same values, and `False` otherwise.
221 `True` if there are no keys in common.
222 """
223 d = getattr(other, "full", other)
224 return all(self[k] == d[k] for k in (self.keys() & d.keys()))
226 def subset(self, graph: DimensionGraph) -> DataCoordinate:
227 """Return a new `DataCoordinate` whose graph is a subset of
228 ``self.graph``.
230 Subclasses may override this method to return a subclass instance.
232 Parameters
233 ----------
234 graph : `DimensionGraph`
235 The dimensions identified by the returned `DataCoordinate`.
237 Returns
238 -------
239 coordinate : `DataCoordinate`
240 A `DataCoordinate` instance that identifies only the given
241 dimensions.
243 Raises
244 ------
245 KeyError
246 Raised if ``graph`` is not a subset of ``self.graph``, and hence
247 one or more dimensions has no associated primary key value.
248 """
249 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required))
251 @property
252 def universe(self) -> DimensionUniverse:
253 """The universe that defines all known dimensions compatible with
254 this coordinate (`DimensionUniverse`).
255 """
256 return self.graph.universe
258 # Class attributes below are shadowed by instance attributes, and are
259 # present just to hold the docstrings for those instance attributes.
261 graph: DimensionGraph
262 """The dimensions identified by this data ID (`DimensionGraph`).
264 Note that values are only required to be present for dimensions in
265 ``self.graph.required``; all others may be retrieved (from a `Registry`)
266 given these.
267 """
270DataId = Union[DataCoordinate, Mapping[str, Any]]
271"""A type-annotation alias for signatures that accept both informal data ID
272dictionaries and validated `DataCoordinate` instances.
273"""
276def _intersectRegions(*args: Region) -> Optional[Region]:
277 """Return the intersection of several regions.
279 For internal use by `ExpandedDataCoordinate` only.
281 If no regions are provided, returns `None`.
283 This is currently a placeholder; it actually returns `NotImplemented`
284 (it does *not* raise an exception) when multiple regions are given, which
285 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
286 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
287 we can't compute its region, and at present we don't have a high-level use
288 case for the regions of these particular data IDs.
289 """
290 if len(args) == 0:
291 return None
292 elif len(args) == 1:
293 return args[0]
294 else:
295 return NotImplemented
298@immutable
299class ExpandedDataCoordinate(DataCoordinate):
300 """A data ID that has been expanded to include all relevant metadata.
302 Instances should usually be obtained by calling `Registry.expandDataId`.
304 Parameters
305 ----------
306 graph : `DimensionGraph`
307 The dimensions identified by this instance.
308 values : `tuple`
309 Tuple of primary key values for the given dimensions.
310 records : `~collections.abc.Mapping`
311 Dictionary mapping `DimensionElement` to `DimensionRecord`.
312 full : `~collections.abc.Mapping`
313 Dictionary mapping dimensions to their primary key values for all
314 dimensions in the graph, not just required ones. Ignored unless
315 ``conform`` is `False.`
316 region : `sphgeom.Region`, optional
317 Region on the sky associated with this data ID, or `None` if there
318 are no spatial dimensions. At present, this may be the special value
319 `NotImplemented` if there multiple spatial dimensions identified; in
320 the future this will be replaced with the intersection. Ignored unless
321 ``conform`` is `False`.Timespan
322 timespan : `Timespan`, optionalTimespan
323 Timespan associated with this data ID, or `None` if there are no
324 temporal dimensions.
325 Ignored unless ``conform`` is `False`.
326 conform : `bool`, optional
327 If `True` (default), adapt arguments from arbitrary mappings to the
328 custom dictionary types and check that all expected key-value pairs are
329 present. `False` is only for internal use.
331 Notes
332 -----
333 To maintain Liskov substitutability with `DataCoordinate`,
334 `ExpandedDataCoordinate` mostly acts like a mapping that contains only
335 values for its graph's required dimensions, even though it also contains
336 values for all implied dimensions - its length, iteration, and
337 keys/values/items views reflect only required dimensions. Values for
338 the primary keys of implied dimensions can be obtained from the `full`
339 attribute, and are also accessible in dict lookups and the ``in`` operator.
340 """
342 __slots__ = ("records", "full", "region", "timespan")
344 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *,
345 records: Mapping[DimensionElement, DimensionRecord],
346 full: Optional[Mapping[Dimension, Any]] = None,
347 region: Optional[Region] = None,
348 timespan: Optional[Timespan] = None,
349 conform: bool = True):
350 self = super().__new__(cls, graph, values)
351 if conform:
352 self.records = IndexedTupleDict(
353 indices=graph._elementIndices,
354 values=tuple(records[element] for element in graph.elements)
355 )
356 self.full = IndexedTupleDict(
357 indices=graph._dimensionIndices,
358 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None)
359 for dimension in graph.dimensions)
360 )
361 regions = []
362 for element in self.graph.spatial:
363 record = self.records[element.name]
364 if record is None or record.region is None:
365 self.region = None
366 break
367 else:
368 regions.append(record.region)
369 else:
370 self.region = _intersectRegions(*regions)
371 timespans = []
372 for element in self.graph.temporal:
373 record = self.records[element.name]
374 if record is None or record.timespan is None:
375 self.timespan = None
376 break
377 else:
378 timespans.append(record.timespan)
379 else:
380 self.timespan = Timespan.intersection(*timespans)
381 else:
382 self.records = records
383 self.full = full
384 self.region = region
385 self.timespan = timespan
386 return self
388 def __contains__(self, key: Union[DimensionElement, str]) -> bool:
389 return key in self.full
391 def __getitem__(self, key: Union[DimensionElement, str]) -> Any:
392 return self.full[key]
394 def __repr__(self):
395 return f"ExpandedDataCoordinate({self.graph}, {self.values()})"
397 def pack(self, name: str, *, returnMaxBits: bool = False) -> int:
398 """Pack this data ID into an integer.
400 Parameters
401 ----------
402 name : `str`
403 Name of the `DimensionPacker` algorithm (as defined in the
404 dimension configuration).
405 returnMaxBits : `bool`, optional
406 If `True` (`False` is default), return the maximum number of
407 nonzero bits in the returned integer across all data IDs.
409 Returns
410 -------
411 packed : `int`
412 Integer ID. This ID is unique only across data IDs that have
413 the same values for the packer's "fixed" dimensions.
414 maxBits : `int`, optional
415 Maximum number of nonzero bits in ``packed``. Not returned unless
416 ``returnMaxBits`` is `True`.
417 """
418 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
420 def matches(self, other) -> bool:
421 # Docstring inherited from DataCoordinate.matches.
422 d = getattr(other, "full", other)
423 return all(self[k] == d[k] for k in (self.full.keys() & d.keys()))
425 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate:
426 # Docstring inherited from DataCoordinate.subset.
427 return ExpandedDataCoordinate(
428 graph,
429 tuple(self[dimension] for dimension in graph.required),
430 records=self.records,
431 conform=True
432 )
434 def __getnewargs_ex__(self) -> Tuple(tuple, dict):
435 return (
436 (self.graph, self.values()),
437 dict(
438 records=self.records,
439 full=self.full,
440 region=self.region,
441 timespan=self.timespan,
442 conform=False,
443 )
444 )
446 # Class attributes below are shadowed by instance attributes, and are
447 # present just to hold the docstrings for those instance attributes.
449 full: IndexedTupleDict[Dimension, Any]
450 """Dictionary mapping dimensions to their primary key values for all
451 dimensions in the graph, not just required ones (`IndexedTupleDict`).
453 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
454 as well as `Dimension` instance.
455 """
457 records: IndexedTupleDict[DimensionElement, DimensionRecord]
458 """Dictionary mapping `DimensionElement` to the associated
459 `DimensionRecord` (`IndexedTupleDict`).
461 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
462 as well as `DimensionElement` instance.
463 """
465 region: Optional[Region]
466 """Region on the sky associated with this data ID, or `None` if there
467 are no spatial dimensions (`sphgeom.Region`).
469 At present, this may be the special value `NotImplemented` if there
470 multiple spatial dimensions identified; in the future this will be replaced
471 with the intersection.
472 """
474 timespan: Optional[Timespan]
475 """Timespan associated with this data ID, or `None` if there are no
476 temporal dimensions (`TimeSpan`).
477 """