Coverage for python/lsst/daf/butler/core/dimensions/coordinate.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId")
26import numbers
27from typing import (
28 Any,
29 Callable,
30 Mapping,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34 Union,
35)
37from lsst.sphgeom import Region
38from ..named import IndexedTupleDict, NamedKeyMapping
39from ..timespan import Timespan
40from .elements import Dimension
41from .graph import DimensionGraph
43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from .elements import DimensionElement
45 from .universe import DimensionUniverse
46 from .records import DimensionRecord
49class DataCoordinate(IndexedTupleDict[Dimension, Any]):
50 """An immutable data ID dictionary that guarantees that its key-value pairs
51 identify all required dimensions in a `DimensionGraph`.
53 `DataCoordinate` instances should usually be constructed via the
54 `standardize` class method; the constructor is reserved for callers that
55 can guarantee that the ``values`` tuple has exactly the right elements.
57 Parameters
58 ----------
59 graph : `DimensionGraph`
60 The dimensions identified by this instance.
61 values : `tuple`
62 Tuple of primary key values for the given dimensions.
64 Notes
65 -----
66 Like any data ID class, `DataCoordinate` behaves like a dictionary,
67 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`,
68 both `Dimension` instances and `str` names thereof may be used as keys in
69 lookup operations.
71 Subclasses are permitted to support lookup for any dimension in
72 ``self.graph.dimensions``, but the base class only supports lookup for
73 those in ``self.graph.required``, which is the minimal set needed to
74 identify all others in a `Registry`. Both the base class and subclasses
75 define comparisons, iterators, and the `keys`, `values`, and `items` views
76 to just the ``self.graph.required`` subset in order to guarantee true
77 (i.e. Liskov) substitutability.
78 """
80 __slots__ = ("_graph",)
82 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...]):
83 super().__init__(graph._requiredIndices, values)
84 self._graph = graph
86 @staticmethod
87 def standardize(mapping: Optional[Union[Mapping[str, Any], NamedKeyMapping[Dimension, Any]]] = None, *,
88 graph: Optional[DimensionGraph] = None,
89 universe: Optional[DimensionUniverse] = None,
90 **kwargs: Any) -> DataCoordinate:
91 """Adapt an arbitrary mapping and/or additional arguments into a true
92 `DataCoordinate`, or augment an existing one.
94 Parameters
95 ----------
96 mapping : `~collections.abc.Mapping`, optional
97 An informal data ID that maps dimension names to their primary key
98 values (may also be a true `DataCoordinate`).
99 graph : `DimensionGraph`
100 The dimensions to be identified by the new `DataCoordinate`.
101 If not provided, will be inferred from the keys of ``mapping``,
102 and ``universe`` must be provided unless ``mapping`` is already a
103 `DataCoordinate`.
104 universe : `DimensionUniverse`
105 All known dimensions and their relationships; used to expand
106 and validate dependencies when ``graph`` is not provided.
107 **kwargs
108 Additional keyword arguments are treated like additional key-value
109 pairs in ``mapping``.
111 Returns
112 -------
113 coordinate : `DataCoordinate`
114 A validated `DataCoordinate` instance. May be a subclass instance
115 if and only if ``mapping`` is a subclass instance and ``graph``
116 is a subset of ``mapping.graph``.
118 Raises
119 ------
120 TypeError
121 Raised if the set of optional arguments provided is not supported.
122 KeyError
123 Raised if a key-value pair for a required dimension is missing.
125 Notes
126 -----
127 Because `DataCoordinate` stores only values for required dimensions,
128 key-value pairs for other related dimensions will be ignored and
129 excluded from the result. This means that a `DataCoordinate` may
130 contain *fewer* key-value pairs than the informal data ID dictionary
131 it was constructed from.
132 """
133 if isinstance(mapping, DataCoordinate):
134 if graph is None:
135 if not kwargs:
136 # Already standardized to exactly what we want.
137 return mapping
138 elif mapping.graph.issuperset(graph):
139 # Already standardized; just return the relevant subset.
140 return mapping.subset(graph)
141 assert universe is None or universe == mapping.universe
142 universe = mapping.universe
143 d: Mapping[str, Any]
144 if kwargs:
145 if mapping:
146 if isinstance(mapping, NamedKeyMapping):
147 d = dict(mapping.byName(), **kwargs)
148 else:
149 d = dict(mapping, **kwargs)
150 else:
151 d = kwargs
152 elif mapping:
153 if isinstance(mapping, NamedKeyMapping):
154 d = mapping.byName()
155 else:
156 d = mapping
157 else:
158 d = {}
159 if graph is None:
160 if universe is None:
161 raise TypeError("universe must be provided if graph is not.")
162 graph = DimensionGraph(universe, names=d.keys())
163 try:
164 values = tuple(d[name] for name in graph.required.names)
165 # some backends cannot handle numpy.int64 type which is
166 # a subclass of numbers.Integral, convert that to int.
167 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values)
168 except KeyError as err:
169 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
170 return DataCoordinate(graph, values)
172 def __hash__(self) -> int:
173 return hash((self.graph, self.values()))
175 def __eq__(self, other: Any) -> bool:
176 try:
177 # Optimized code path for DataCoordinate comparisons.
178 return self.graph == other.graph and self.values() == other.values()
179 except AttributeError:
180 # We can't reliably compare to informal data ID dictionaries
181 # we don't know if any extra keys they might have are consistent
182 # with an `ExpandedDataCoordinate` version of ``self`` (which
183 # should compare as equal) or something else (which should
184 # compare as not equal).
185 # We don't even want to return `NotImplemented` and tell Python
186 # to delegate to ``other.__eq__``, because that could also be
187 # misleading. We raise TypeError instead.
188 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially "
189 "misleading results.") from None
191 def __str__(self) -> str:
192 return f"{self.byName()}"
194 def __repr__(self) -> str:
195 return f"DataCoordinate({self.graph}, {self.values()})"
197 def fingerprint(self, update: Callable[[bytes], None]) -> None:
198 """Update a secure hash function with the values in this data ID.
200 Parameters
201 ----------
202 update : `~collections.abc.Callable`
203 Callable that accepts a single `bytes` argument to update
204 the hash; usually the ``update`` method of an instance from
205 the ``hashlib`` module.
206 """
207 for k, v in self.items():
208 update(k.name.encode("utf8"))
209 if isinstance(v, numbers.Integral):
210 update(int(v).to_bytes(64, "big", signed=False))
211 elif isinstance(v, str):
212 update(v.encode("utf8"))
213 else:
214 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).")
216 def subset(self, graph: DimensionGraph) -> DataCoordinate:
217 """Return a new `DataCoordinate` whose graph is a subset of
218 ``self.graph``.
220 Subclasses may override this method to return a subclass instance.
222 Parameters
223 ----------
224 graph : `DimensionGraph`
225 The dimensions identified by the returned `DataCoordinate`.
227 Returns
228 -------
229 coordinate : `DataCoordinate`
230 A `DataCoordinate` instance that identifies only the given
231 dimensions.
233 Raises
234 ------
235 KeyError
236 Raised if ``graph`` is not a subset of ``self.graph``, and hence
237 one or more dimensions has no associated primary key value.
238 """
239 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required))
241 @property
242 def universe(self) -> DimensionUniverse:
243 """The universe that defines all known dimensions compatible with
244 this coordinate (`DimensionUniverse`).
245 """
246 return self.graph.universe
248 @property
249 def graph(self) -> DimensionGraph:
250 """The dimensions identified by this data ID (`DimensionGraph`).
252 Note that values are only required to be present for dimensions in
253 ``self.graph.required``; all others may be retrieved (from a
254 `Registry`) given these.
255 """
256 return self._graph
259DataId = Union[DataCoordinate, Mapping[str, Any]]
260"""A type-annotation alias for signatures that accept both informal data ID
261dictionaries and validated `DataCoordinate` instances.
262"""
265def _intersectRegions(*args: Region) -> Optional[Region]:
266 """Return the intersection of several regions.
268 For internal use by `ExpandedDataCoordinate` only.
270 If no regions are provided, returns `None`.
272 This is currently a placeholder; it actually returns `NotImplemented`
273 (it does *not* raise an exception) when multiple regions are given, which
274 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
275 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
276 we can't compute its region, and at present we don't have a high-level use
277 case for the regions of these particular data IDs.
278 """
279 if len(args) == 0:
280 return None
281 elif len(args) == 1:
282 return args[0]
283 else:
284 return NotImplemented
287class ExpandedDataCoordinate(DataCoordinate):
288 """A data ID that has been expanded to include all relevant metadata.
290 Instances should usually be obtained by calling `Registry.expandDataId`.
292 Parameters
293 ----------
294 graph : `DimensionGraph`
295 The dimensions identified by this instance.
296 values : `tuple`
297 Tuple of primary key values for the given dimensions.
298 records : `~collections.abc.Mapping`
299 Dictionary mapping `DimensionElement` to `DimensionRecord`.
300 full : `~collections.abc.Mapping`
301 Dictionary mapping dimensions to their primary key values for all
302 dimensions in the graph, not just required ones. Ignored unless
303 ``conform`` is `False.`
304 region : `sphgeom.Region`, optional
305 Region on the sky associated with this data ID, or `None` if there
306 are no spatial dimensions. At present, this may be the special value
307 `NotImplemented` if there multiple spatial dimensions identified; in
308 the future this will be replaced with the intersection. Ignored unless
309 ``conform`` is `False`.Timespan
310 timespan : `Timespan`, optionalTimespan
311 Timespan associated with this data ID, or `None` if there are no
312 temporal dimensions.
313 Ignored unless ``conform`` is `False`.
314 conform : `bool`, optional
315 If `True` (default), adapt arguments from arbitrary mappings to the
316 custom dictionary types and check that all expected key-value pairs are
317 present. `False` is only for internal use.
319 Notes
320 -----
321 To maintain Liskov substitutability with `DataCoordinate`,
322 `ExpandedDataCoordinate` mostly acts like a mapping that contains only
323 values for its graph's required dimensions, even though it also contains
324 values for all implied dimensions - its length, iteration, and
325 keys/values/items views reflect only required dimensions. Values for
326 the primary keys of implied dimensions can be obtained from the `full`
327 attribute, and are also accessible in dict lookups and the ``in`` operator.
328 """
330 __slots__ = ("_records", "_full", "_region", "_timespan")
332 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...], *,
333 records: NamedKeyMapping[DimensionElement, Optional[DimensionRecord]],
334 full: Optional[NamedKeyMapping[Dimension, Any]] = None,
335 region: Optional[Region] = None,
336 timespan: Optional[Timespan] = None,
337 conform: bool = True):
338 super().__init__(graph, values)
339 if conform:
340 self._records = IndexedTupleDict(
341 indices=graph._elementIndices,
342 values=tuple(records[element.name] for element in graph.elements)
343 )
344 self._full = IndexedTupleDict(
345 indices=graph._dimensionIndices,
346 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None)
347 for dimension in graph.dimensions)
348 )
349 regions = []
350 for element in self.graph.spatial:
351 record = self.records[element.name]
352 # DimensionRecord subclasses for spatial elements always have a
353 # .region, but they're dynamic so this can't be type-checked.
354 if record is None or record.region is None: # type: ignore
355 self._region = None
356 break
357 else:
358 regions.append(record.region) # type:ignore
359 else:
360 self._region = _intersectRegions(*regions)
361 timespans = []
362 for element in self.graph.temporal:
363 record = self.records[element.name]
364 # DimensionRecord subclasses for temporal elements always have
365 # .timespan, but they're dynamic so this can't be type-checked.
366 if record is None or record.timespan is None: # type:ignore
367 self._timespan = None
368 break
369 else:
370 timespans.append(record.timespan) # type:ignore
371 else:
372 self._timespan = Timespan.intersection(*timespans)
373 else:
374 # User has declared that the types are correct; ignore them.
375 self._records = records # type: ignore
376 self._full = full # type: ignore
377 self._region = region
378 self._timespan = timespan
380 def __contains__(self, key: Any) -> bool:
381 return key in self.full
383 def __getitem__(self, key: Union[Dimension, str]) -> Any:
384 return self.full[key]
386 def __repr__(self) -> str:
387 return f"ExpandedDataCoordinate({self.graph}, {self.values()})"
389 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
390 """Pack this data ID into an integer.
392 Parameters
393 ----------
394 name : `str`
395 Name of the `DimensionPacker` algorithm (as defined in the
396 dimension configuration).
397 returnMaxBits : `bool`, optional
398 If `True` (`False` is default), return the maximum number of
399 nonzero bits in the returned integer across all data IDs.
401 Returns
402 -------
403 packed : `int`
404 Integer ID. This ID is unique only across data IDs that have
405 the same values for the packer's "fixed" dimensions.
406 maxBits : `int`, optional
407 Maximum number of nonzero bits in ``packed``. Not returned unless
408 ``returnMaxBits`` is `True`.
409 """
410 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
412 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate:
413 # Docstring inherited from DataCoordinate.subset.
414 return ExpandedDataCoordinate(
415 graph,
416 tuple(self[dimension] for dimension in graph.required),
417 records=self.records,
418 conform=True
419 )
421 @property
422 def full(self) -> NamedKeyMapping[Dimension, Any]:
423 """Dictionary mapping dimensions to their primary key values for all
424 dimensions in the graph, not just required ones (`NamedKeyMapping`).
426 Like `DataCoordinate` itself, this dictionary can be indexed by `str`
427 name as well as `Dimension` instance.
428 """
429 return self._full
431 @property
432 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
433 """Dictionary mapping `DimensionElement` to the associated
434 `DimensionRecord` (`NamedKeyMapping`).
436 Like `DataCoordinate` itself, this dictionary can be indexed by `str`
437 name as well as `DimensionElement` instance.
438 """
439 return self._records
441 @property
442 def region(self) -> Optional[Region]:
443 """Region on the sky associated with this data ID, or `None` if there
444 are no spatial dimensions (`sphgeom.Region`).
446 At present, this may be the special value `NotImplemented` if there
447 multiple spatial dimensions identified; in the future this will be
448 replaced with the intersection.
449 """
450 return self._region
452 @property
453 def timespan(self) -> Optional[Timespan]:
454 """Timespan associated with this data ID, or `None` if there are no
455 temporal dimensions (`TimeSpan`).
456 """
457 return self._timespan