Coverage for python/lsst/daf/butler/core/dimensions/coordinate.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId")
26import numbers
27from typing import (
28 Any,
29 Callable,
30 Mapping,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34 Union,
35)
37from lsst.sphgeom import Region
38from ..named import IndexedTupleDict, NamedKeyMapping
39from ..timespan import Timespan
40from .elements import Dimension
41from .graph import DimensionGraph
43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from .elements import DimensionElement
45 from .universe import DimensionUniverse
46 from .records import DimensionRecord
49class DataCoordinate(IndexedTupleDict[Dimension, Any]):
50 """An immutable data ID dictionary that guarantees that its key-value pairs
51 identify all required dimensions in a `DimensionGraph`.
53 `DataCoordinate` instances should usually be constructed via the
54 `standardize` class method; the constructor is reserved for callers that
55 can guarantee that the ``values`` tuple has exactly the right elements.
57 Parameters
58 ----------
59 graph : `DimensionGraph`
60 The dimensions identified by this instance.
61 values : `tuple`
62 Tuple of primary key values for the given dimensions.
64 Notes
65 -----
66 Like any data ID class, `DataCoordinate` behaves like a dictionary,
67 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`,
68 both `Dimension` instances and `str` names thereof may be used as keys in
69 lookup operations.
71 Subclasses are permitted to support lookup for any dimension in
72 ``self.graph.dimensions``, but the base class only supports lookup for
73 those in ``self.graph.required``, which is the minimal set needed to
74 identify all others in a `Registry`. Both the base class and subclasses
75 define comparisons, iterators, and the `keys`, `values`, and `items` views
76 to just the ``self.graph.required`` subset in order to guarantee true
77 (i.e. Liskov) substitutability.
78 """
80 __slots__ = ("_graph",)
82 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...]):
83 super().__init__(graph._requiredIndices, values)
84 self._graph = graph
86 @staticmethod
87 def standardize(mapping: Optional[Union[Mapping[str, Any], NamedKeyMapping[Dimension, Any]]] = None, *,
88 graph: Optional[DimensionGraph] = None,
89 universe: Optional[DimensionUniverse] = None,
90 **kwargs: Any) -> DataCoordinate:
91 """Adapt an arbitrary mapping and/or additional arguments into a true
92 `DataCoordinate`, or augment an existing one.
94 Parameters
95 ----------
96 mapping : `~collections.abc.Mapping`, optional
97 An informal data ID that maps dimension names to their primary key
98 values (may also be a true `DataCoordinate`).
99 graph : `DimensionGraph`
100 The dimensions to be identified by the new `DataCoordinate`.
101 If not provided, will be inferred from the keys of ``mapping``,
102 and ``universe`` must be provided unless ``mapping`` is already a
103 `DataCoordinate`.
104 universe : `DimensionUniverse`
105 All known dimensions and their relationships; used to expand
106 and validate dependencies when ``graph`` is not provided.
107 **kwargs
108 Additional keyword arguments are treated like additional key-value
109 pairs in ``mapping``.
111 Returns
112 -------
113 coordinate : `DataCoordinate`
114 A validated `DataCoordinate` instance. May be a subclass instance
115 if and only if ``mapping`` is a subclass instance and ``graph``
116 is a subset of ``mapping.graph``.
118 Raises
119 ------
120 TypeError
121 Raised if the set of optional arguments provided is not supported.
122 KeyError
123 Raised if a key-value pair for a required dimension is missing.
125 Notes
126 -----
127 Because `DataCoordinate` stores only values for required dimensions,
128 key-value pairs for other related dimensions will be ignored and
129 excluded from the result. This means that a `DataCoordinate` may
130 contain *fewer* key-value pairs than the informal data ID dictionary
131 it was constructed from.
132 """
133 if isinstance(mapping, DataCoordinate):
134 if graph is None:
135 if not kwargs:
136 # Already standardized to exactly what we want.
137 return mapping
138 elif mapping.graph.issuperset(graph):
139 # Already standardized; just return the relevant subset.
140 return mapping.subset(graph)
141 assert universe is None or universe == mapping.universe
142 universe = mapping.universe
143 d: Mapping[str, Any]
144 if kwargs:
145 if mapping:
146 if isinstance(mapping, NamedKeyMapping):
147 d = dict(mapping.byName(), **kwargs)
148 else:
149 d = dict(mapping, **kwargs)
150 else:
151 d = kwargs
152 elif mapping:
153 if isinstance(mapping, NamedKeyMapping):
154 d = mapping.byName()
155 else:
156 d = mapping
157 else:
158 d = {}
159 if graph is None:
160 if universe is None:
161 raise TypeError("universe must be provided if graph is not.")
162 graph = DimensionGraph(universe, names=d.keys())
163 try:
164 values = tuple(d[name] for name in graph.required.names)
165 # some backends cannot handle numpy.int64 type which is
166 # a subclass of numbers.Integral, convert that to int.
167 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values)
168 except KeyError as err:
169 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
170 return DataCoordinate(graph, values)
172 def __hash__(self) -> int:
173 return hash((self.graph, self.values()))
175 def __eq__(self, other: Any) -> bool:
176 try:
177 # Optimized code path for DataCoordinate comparisons.
178 return self.graph == other.graph and self.values() == other.values()
179 except AttributeError:
180 # We can't reliably compare to informal data ID dictionaries
181 # we don't know if any extra keys they might have are consistent
182 # with an `ExpandedDataCoordinate` version of ``self`` (which
183 # should compare as equal) or something else (which should
184 # compare as not equal).
185 # We don't even want to return `NotImplemented` and tell Python
186 # to delegate to ``other.__eq__``, because that could also be
187 # misleading. We raise TypeError instead.
188 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially "
189 "misleading results.") from None
191 def __repr__(self) -> str:
192 # We can't make repr yield something that could be exec'd here without
193 # printing out the whole DimensionUniverse the graph is derived from.
194 # So we print something that mostly looks like a dict, but doesn't
195 # quote it's keys: that's both more compact and something that can't
196 # be mistaken for an actual dict or something that could be exec'd.
197 return "{{{}}}".format(', '.join(f"{k.name}: {v!r}" for k, v in self.items()))
199 def fingerprint(self, update: Callable[[bytes], None]) -> None:
200 """Update a secure hash function with the values in this data ID.
202 Parameters
203 ----------
204 update : `~collections.abc.Callable`
205 Callable that accepts a single `bytes` argument to update
206 the hash; usually the ``update`` method of an instance from
207 the ``hashlib`` module.
208 """
209 for k, v in self.items():
210 update(k.name.encode("utf8"))
211 if isinstance(v, numbers.Integral):
212 update(int(v).to_bytes(64, "big", signed=False))
213 elif isinstance(v, str):
214 update(v.encode("utf8"))
215 else:
216 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).")
218 def subset(self, graph: DimensionGraph) -> DataCoordinate:
219 """Return a new `DataCoordinate` whose graph is a subset of
220 ``self.graph``.
222 Subclasses may override this method to return a subclass instance.
224 Parameters
225 ----------
226 graph : `DimensionGraph`
227 The dimensions identified by the returned `DataCoordinate`.
229 Returns
230 -------
231 coordinate : `DataCoordinate`
232 A `DataCoordinate` instance that identifies only the given
233 dimensions.
235 Raises
236 ------
237 KeyError
238 Raised if ``graph`` is not a subset of ``self.graph``, and hence
239 one or more dimensions has no associated primary key value.
240 """
241 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required))
243 @property
244 def universe(self) -> DimensionUniverse:
245 """The universe that defines all known dimensions compatible with
246 this coordinate (`DimensionUniverse`).
247 """
248 return self.graph.universe
250 @property
251 def graph(self) -> DimensionGraph:
252 """The dimensions identified by this data ID (`DimensionGraph`).
254 Note that values are only required to be present for dimensions in
255 ``self.graph.required``; all others may be retrieved (from a
256 `Registry`) given these.
257 """
258 return self._graph
261DataId = Union[DataCoordinate, Mapping[str, Any]]
262"""A type-annotation alias for signatures that accept both informal data ID
263dictionaries and validated `DataCoordinate` instances.
264"""
267def _intersectRegions(*args: Region) -> Optional[Region]:
268 """Return the intersection of several regions.
270 For internal use by `ExpandedDataCoordinate` only.
272 If no regions are provided, returns `None`.
274 This is currently a placeholder; it actually returns `NotImplemented`
275 (it does *not* raise an exception) when multiple regions are given, which
276 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
277 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
278 we can't compute its region, and at present we don't have a high-level use
279 case for the regions of these particular data IDs.
280 """
281 if len(args) == 0:
282 return None
283 elif len(args) == 1:
284 return args[0]
285 else:
286 return NotImplemented
289class ExpandedDataCoordinate(DataCoordinate):
290 """A data ID that has been expanded to include all relevant metadata.
292 Instances should usually be obtained by calling `Registry.expandDataId`.
294 Parameters
295 ----------
296 graph : `DimensionGraph`
297 The dimensions identified by this instance.
298 values : `tuple`
299 Tuple of primary key values for the given dimensions.
300 records : `~collections.abc.Mapping`
301 Dictionary mapping `DimensionElement` to `DimensionRecord`.
302 full : `~collections.abc.Mapping`
303 Dictionary mapping dimensions to their primary key values for all
304 dimensions in the graph, not just required ones. Ignored unless
305 ``conform`` is `False.`
306 region : `sphgeom.Region`, optional
307 Region on the sky associated with this data ID, or `None` if there
308 are no spatial dimensions. At present, this may be the special value
309 `NotImplemented` if there multiple spatial dimensions identified; in
310 the future this will be replaced with the intersection. Ignored unless
311 ``conform`` is `False`.Timespan
312 timespan : `Timespan`, optionalTimespan
313 Timespan associated with this data ID, or `None` if there are no
314 temporal dimensions.
315 Ignored unless ``conform`` is `False`.
316 conform : `bool`, optional
317 If `True` (default), adapt arguments from arbitrary mappings to the
318 custom dictionary types and check that all expected key-value pairs are
319 present. `False` is only for internal use.
321 Notes
322 -----
323 To maintain Liskov substitutability with `DataCoordinate`,
324 `ExpandedDataCoordinate` mostly acts like a mapping that contains only
325 values for its graph's required dimensions, even though it also contains
326 values for all implied dimensions - its length, iteration, and
327 keys/values/items views reflect only required dimensions. Values for
328 the primary keys of implied dimensions can be obtained from the `full`
329 attribute, and are also accessible in dict lookups and the ``in`` operator.
330 """
332 __slots__ = ("_records", "_full", "_region", "_timespan")
334 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...], *,
335 records: NamedKeyMapping[DimensionElement, Optional[DimensionRecord]],
336 full: Optional[NamedKeyMapping[Dimension, Any]] = None,
337 region: Optional[Region] = None,
338 timespan: Optional[Timespan] = None,
339 conform: bool = True):
340 super().__init__(graph, values)
341 if conform:
342 self._records = IndexedTupleDict(
343 indices=graph._elementIndices,
344 values=tuple(records[element.name] for element in graph.elements)
345 )
346 self._full = IndexedTupleDict(
347 indices=graph._dimensionIndices,
348 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None)
349 for dimension in graph.dimensions)
350 )
351 regions = []
352 for element in self.graph.spatial:
353 record = self.records[element.name]
354 # DimensionRecord subclasses for spatial elements always have a
355 # .region, but they're dynamic so this can't be type-checked.
356 if record is None or record.region is None: # type: ignore
357 self._region = None
358 break
359 else:
360 regions.append(record.region) # type:ignore
361 else:
362 self._region = _intersectRegions(*regions)
363 timespans = []
364 for element in self.graph.temporal:
365 record = self.records[element.name]
366 # DimensionRecord subclasses for temporal elements always have
367 # .timespan, but they're dynamic so this can't be type-checked.
368 if record is None or record.timespan is None: # type:ignore
369 self._timespan = None
370 break
371 else:
372 timespans.append(record.timespan) # type:ignore
373 else:
374 self._timespan = Timespan.intersection(*timespans)
375 else:
376 # User has declared that the types are correct; ignore them.
377 self._records = records # type: ignore
378 self._full = full # type: ignore
379 self._region = region
380 self._timespan = timespan
382 def __contains__(self, key: Any) -> bool:
383 return key in self.full
385 def __getitem__(self, key: Union[Dimension, str]) -> Any:
386 return self.full[key]
388 def __repr__(self) -> str:
389 # See DataCoordinate.__repr__ comment for reasoning behind this form.
390 # The expanded version just includes key-value pairs for implied
391 # dimensions.
392 return "{{{}}}".format(', '.join(f"{k.name}: {v!r}" for k, v in self.full.items()))
394 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
395 """Pack this data ID into an integer.
397 Parameters
398 ----------
399 name : `str`
400 Name of the `DimensionPacker` algorithm (as defined in the
401 dimension configuration).
402 returnMaxBits : `bool`, optional
403 If `True` (`False` is default), return the maximum number of
404 nonzero bits in the returned integer across all data IDs.
406 Returns
407 -------
408 packed : `int`
409 Integer ID. This ID is unique only across data IDs that have
410 the same values for the packer's "fixed" dimensions.
411 maxBits : `int`, optional
412 Maximum number of nonzero bits in ``packed``. Not returned unless
413 ``returnMaxBits`` is `True`.
414 """
415 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
417 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate:
418 # Docstring inherited from DataCoordinate.subset.
419 return ExpandedDataCoordinate(
420 graph,
421 tuple(self[dimension] for dimension in graph.required),
422 records=self.records,
423 conform=True
424 )
426 @property
427 def full(self) -> NamedKeyMapping[Dimension, Any]:
428 """Dictionary mapping dimensions to their primary key values for all
429 dimensions in the graph, not just required ones (`NamedKeyMapping`).
431 Like `DataCoordinate` itself, this dictionary can be indexed by `str`
432 name as well as `Dimension` instance.
433 """
434 return self._full
436 @property
437 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
438 """Dictionary mapping `DimensionElement` to the associated
439 `DimensionRecord` (`NamedKeyMapping`).
441 Like `DataCoordinate` itself, this dictionary can be indexed by `str`
442 name as well as `DimensionElement` instance.
443 """
444 return self._records
446 @property
447 def region(self) -> Optional[Region]:
448 """Region on the sky associated with this data ID, or `None` if there
449 are no spatial dimensions (`sphgeom.Region`).
451 At present, this may be the special value `NotImplemented` if there
452 multiple spatial dimensions identified; in the future this will be
453 replaced with the intersection.
454 """
455 return self._region
457 @property
458 def timespan(self) -> Optional[Timespan]:
459 """Timespan associated with this data ID, or `None` if there are no
460 temporal dimensions (`TimeSpan`).
461 """
462 return self._timespan