Coverage for python/lsst/daf/butler/core/dimensions/coordinate.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId")
26import numbers
27from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING
29from lsst.sphgeom import Region
30from ..utils import IndexedTupleDict, immutable
31from ..timespan import Timespan
32from .graph import DimensionGraph
34if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 from .elements import DimensionElement, Dimension
36 from .universe import DimensionUniverse
37 from .records import DimensionRecord
40@immutable
41class DataCoordinate(IndexedTupleDict):
42 """An immutable data ID dictionary that guarantees that its key-value pairs
43 identify all required dimensions in a `DimensionGraph`.
45 `DataCoordinate` instances should usually be constructed via the
46 `standardize` class method; the constructor is reserved for callers that
47 can guarantee that the ``values`` tuple has exactly the right elements.
49 Parameters
50 ----------
51 graph : `DimensionGraph`
52 The dimensions identified by this instance.
53 values : `tuple`
54 Tuple of primary key values for the given dimensions.
56 Notes
57 -----
58 Like any data ID class, `DataCoordinate` behaves like a dictionary,
59 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`,
60 both `Dimension` instances and `str` names thereof may be used as keys in
61 lookup operations.
63 Subclasses are permitted to support lookup for any dimension in
64 ``self.graph.dimensions``, but the base class only supports lookup for
65 those in ``self.graph.required``, which is the minimal set needed to
66 identify all others in a `Registry`. Both the base class and subclasses
67 define comparisons, iterators, and the `keys`, `values`, and `items` views
68 to just the ``self.graph.required`` subset in order to guarantee true
69 (i.e. Liskov) substitutability.
70 """
72 __slots__ = ("graph",)
74 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]):
75 self = super().__new__(cls, graph._requiredIndices, values)
76 self.graph = graph
77 return self
79 @staticmethod
80 def standardize(mapping: Optional[Mapping[str, Any]] = None, *,
81 graph: Optional[DimensionGraph] = None,
82 universe: Optional[DimensionUniverse] = None,
83 **kwds) -> DataCoordinate:
84 """Adapt an arbitrary mapping and/or additional arguments into a true
85 `DataCoordinate`, or augment an existing one.
87 Parameters
88 ----------
89 mapping : `~collections.abc.Mapping`, optional
90 An informal data ID that maps dimension names to their primary key
91 values (may also be a true `DataCoordinate`).
92 graph : `DimensionGraph`
93 The dimensions to be identified by the new `DataCoordinate`.
94 If not provided, will be inferred from the keys of ``mapping``,
95 and ``universe`` must be provided unless ``mapping`` is already a
96 `DataCoordinate`.
97 universe : `DimensionUniverse`
98 All known dimensions and their relationships; used to expand
99 and validate dependencies when ``graph`` is not provided.
100 kwds
101 Additional keyword arguments are treated like additional key-value
102 pairs in ``mapping``.
104 Returns
105 -------
106 coordinate : `DataCoordinate`
107 A validated `DataCoordinate` instance. May be a subclass instance
108 if and only if ``mapping`` is a subclass instance and ``graph``
109 is a subset of ``mapping.graph``.
111 Raises
112 ------
113 TypeError
114 Raised if the set of optional arguments provided is not supported.
115 KeyError
116 Raised if a key-value pair for a required dimension is missing.
118 Notes
119 -----
120 Because `DataCoordinate` stores only values for required dimensions,
121 key-value pairs for other related dimensions will be ignored and
122 excluded from the result. This means that a `DataCoordinate` may
123 contain *fewer* key-value pairs than the informal data ID dictionary
124 it was constructed from.
125 """
126 if isinstance(mapping, DataCoordinate):
127 if graph is None:
128 if not kwds:
129 # Already standardized to exactly what we want.
130 return mapping
131 elif mapping.graph.issuperset(graph):
132 # Already standardized; just return the relevant subset.
133 return mapping.subset(graph)
134 assert universe is None or universe == mapping.universe
135 universe = mapping.universe
136 if kwds:
137 if mapping:
138 try:
139 d = dict(mapping.byName(), **kwds)
140 except AttributeError:
141 d = dict(mapping, **kwds)
142 else:
143 d = kwds
144 elif mapping:
145 try:
146 d = mapping.byName()
147 except AttributeError:
148 d = mapping
149 else:
150 d = {}
151 if graph is None:
152 if universe is None:
153 raise TypeError("universe must be provided if graph is not.")
154 graph = DimensionGraph(universe, names=d.keys())
155 try:
156 values = tuple(d[name] for name in graph.required.names)
157 # some backends cannot handle numpy.int64 type which is
158 # a subclass of numbers.Integral, convert that to int.
159 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values)
160 except KeyError as err:
161 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
162 return DataCoordinate(graph, values)
164 def byName(self) -> Dict[str, Any]:
165 """Return a true `dict` keyed by `str` dimension name and the same
166 values as ``self``.
167 """
168 return {k.name: v for k, v in self.items()}
170 def __getnewargs__(self) -> tuple:
171 # Implements pickle support (in addition to methods provided by
172 # @immutable decorator).
173 return (self.graph, self.values())
175 def __hash__(self) -> int:
176 return hash((self.graph, self.values()))
178 def __eq__(self, other: DataCoordinate) -> bool:
179 try:
180 # Optimized code path for DataCoordinate comparisons.
181 return self.graph == other.graph and self.values() == other.values()
182 except AttributeError:
183 # Also support comparison with informal data ID dictionaries that
184 # map dimension name to value.
185 return self.byName() == other
187 def __str__(self):
188 return f"{self.byName()}"
190 def __repr__(self):
191 return f"DataCoordinate({self.graph}, {self.values()})"
193 def fingerprint(self, update):
194 """Update a secure hash function with the values in this data ID.
196 Parameters
197 ----------
198 update : `~collections.abc.Callable`
199 Callable that accepts a single `bytes` argument to update
200 the hash; usually the ``update`` method of an instance from
201 the ``hashlib`` module.
202 """
203 for k, v in self.items():
204 update(k.name.encode("utf8"))
205 if isinstance(v, numbers.Integral):
206 update(int(v).to_bytes(64, "big", signed=False))
207 elif isinstance(v, str):
208 update(v.encode("utf8"))
209 else:
210 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).")
212 def matches(self, other: DataCoordinate) -> bool:
213 """Test whether the values of all keys in both coordinates are equal.
215 Parameters
216 ----------
217 other : `DataCoordinate`
218 The other coordinate to compare to.
220 Returns
221 -------
222 consistent : `bool`
223 `True` if all keys that are in in both ``other`` and ``self``
224 are associated with the same values, and `False` otherwise.
225 `True` if there are no keys in common.
226 """
227 d = getattr(other, "full", other)
228 return all(self[k] == d[k] for k in (self.keys() & d.keys()))
230 def subset(self, graph: DimensionGraph) -> DataCoordinate:
231 """Return a new `DataCoordinate` whose graph is a subset of
232 ``self.graph``.
234 Subclasses may override this method to return a subclass instance.
236 Parameters
237 ----------
238 graph : `DimensionGraph`
239 The dimensions identified by the returned `DataCoordinate`.
241 Returns
242 -------
243 coordinate : `DataCoordinate`
244 A `DataCoordinate` instance that identifies only the given
245 dimensions.
247 Raises
248 ------
249 KeyError
250 Raised if ``graph`` is not a subset of ``self.graph``, and hence
251 one or more dimensions has no associated primary key value.
252 """
253 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required))
255 @property
256 def universe(self) -> DimensionUniverse:
257 """The universe that defines all known dimensions compatible with
258 this coordinate (`DimensionUniverse`).
259 """
260 return self.graph.universe
262 # Class attributes below are shadowed by instance attributes, and are
263 # present just to hold the docstrings for those instance attributes.
265 graph: DimensionGraph
266 """The dimensions identified by this data ID (`DimensionGraph`).
268 Note that values are only required to be present for dimensions in
269 ``self.graph.required``; all others may be retrieved (from a `Registry`)
270 given these.
271 """
274DataId = Union[DataCoordinate, Mapping[str, Any]]
275"""A type-annotation alias for signatures that accept both informal data ID
276dictionaries and validated `DataCoordinate` instances.
277"""
280def _intersectRegions(*args: Region) -> Optional[Region]:
281 """Return the intersection of several regions.
283 For internal use by `ExpandedDataCoordinate` only.
285 If no regions are provided, returns `None`.
287 This is currently a placeholder; it actually returns `NotImplemented`
288 (it does *not* raise an exception) when multiple regions are given, which
289 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
290 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
291 we can't compute its region, and at present we don't have a high-level use
292 case for the regions of these particular data IDs.
293 """
294 if len(args) == 0:
295 return None
296 elif len(args) == 1:
297 return args[0]
298 else:
299 return NotImplemented
302@immutable
303class ExpandedDataCoordinate(DataCoordinate):
304 """A data ID that has been expanded to include all relevant metadata.
306 Instances should usually be obtained by calling `Registry.expandDataId`.
308 Parameters
309 ----------
310 graph : `DimensionGraph`
311 The dimensions identified by this instance.
312 values : `tuple`
313 Tuple of primary key values for the given dimensions.
314 records : `~collections.abc.Mapping`
315 Dictionary mapping `DimensionElement` to `DimensionRecord`.
316 full : `~collections.abc.Mapping`
317 Dictionary mapping dimensions to their primary key values for all
318 dimensions in the graph, not just required ones. Ignored unless
319 ``conform`` is `False.`
320 region : `sphgeom.Region`, optional
321 Region on the sky associated with this data ID, or `None` if there
322 are no spatial dimensions. At present, this may be the special value
323 `NotImplemented` if there multiple spatial dimensions identified; in
324 the future this will be replaced with the intersection. Ignored unless
325 ``conform`` is `False`.Timespan
326 timespan : `Timespan`, optionalTimespan
327 Timespan associated with this data ID, or `None` if there are no
328 temporal dimensions.
329 Ignored unless ``conform`` is `False`.
330 conform : `bool`, optional
331 If `True` (default), adapt arguments from arbitrary mappings to the
332 custom dictionary types and check that all expected key-value pairs are
333 present. `False` is only for internal use.
335 Notes
336 -----
337 To maintain Liskov substitutability with `DataCoordinate`,
338 `ExpandedDataCoordinate` mostly acts like a mapping that contains only
339 values for its graph's required dimensions, even though it also contains
340 values for all implied dimensions - its length, iteration, and
341 keys/values/items views reflect only required dimensions. Values for
342 the primary keys of implied dimensions can be obtained from the `full`
343 attribute, and are also accessible in dict lookups and the ``in`` operator.
344 """
346 __slots__ = ("records", "full", "region", "timespan")
348 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *,
349 records: Mapping[DimensionElement, DimensionRecord],
350 full: Optional[Mapping[Dimension, Any]] = None,
351 region: Optional[Region] = None,
352 timespan: Optional[Timespan] = None,
353 conform: bool = True):
354 self = super().__new__(cls, graph, values)
355 if conform:
356 self.records = IndexedTupleDict(
357 indices=graph._elementIndices,
358 values=tuple(records[element] for element in graph.elements)
359 )
360 self.full = IndexedTupleDict(
361 indices=graph._dimensionIndices,
362 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None)
363 for dimension in graph.dimensions)
364 )
365 regions = []
366 for element in self.graph.spatial:
367 record = self.records[element.name]
368 if record is None or record.region is None:
369 self.region = None
370 break
371 else:
372 regions.append(record.region)
373 else:
374 self.region = _intersectRegions(*regions)
375 timespans = []
376 for element in self.graph.temporal:
377 record = self.records[element.name]
378 if record is None or record.timespan is None:
379 self.timespan = None
380 break
381 else:
382 timespans.append(record.timespan)
383 else:
384 self.timespan = Timespan.intersection(*timespans)
385 else:
386 self.records = records
387 self.full = full
388 self.region = region
389 self.timespan = timespan
390 return self
392 def __contains__(self, key: Union[DimensionElement, str]) -> bool:
393 return key in self.full
395 def __getitem__(self, key: Union[DimensionElement, str]) -> Any:
396 return self.full[key]
398 def __repr__(self):
399 return f"ExpandedDataCoordinate({self.graph}, {self.values()})"
401 def pack(self, name: str, *, returnMaxBits: bool = False) -> int:
402 """Pack this data ID into an integer.
404 Parameters
405 ----------
406 name : `str`
407 Name of the `DimensionPacker` algorithm (as defined in the
408 dimension configuration).
409 returnMaxBits : `bool`, optional
410 If `True` (`False` is default), return the maximum number of
411 nonzero bits in the returned integer across all data IDs.
413 Returns
414 -------
415 packed : `int`
416 Integer ID. This ID is unique only across data IDs that have
417 the same values for the packer's "fixed" dimensions.
418 maxBits : `int`, optional
419 Maximum number of nonzero bits in ``packed``. Not returned unless
420 ``returnMaxBits`` is `True`.
421 """
422 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
424 def matches(self, other) -> bool:
425 # Docstring inherited from DataCoordinate.matches.
426 d = getattr(other, "full", other)
427 return all(self[k] == d[k] for k in (self.full.keys() & d.keys()))
429 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate:
430 # Docstring inherited from DataCoordinate.subset.
431 return ExpandedDataCoordinate(
432 graph,
433 tuple(self[dimension] for dimension in graph.required),
434 records=self.records,
435 conform=True
436 )
438 def __getnewargs_ex__(self) -> Tuple(tuple, dict):
439 return (
440 (self.graph, self.values()),
441 dict(
442 records=self.records,
443 full=self.full,
444 region=self.region,
445 timespan=self.timespan,
446 conform=False,
447 )
448 )
450 # Class attributes below are shadowed by instance attributes, and are
451 # present just to hold the docstrings for those instance attributes.
453 full: IndexedTupleDict[Dimension, Any]
454 """Dictionary mapping dimensions to their primary key values for all
455 dimensions in the graph, not just required ones (`IndexedTupleDict`).
457 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
458 as well as `Dimension` instance.
459 """
461 records: IndexedTupleDict[DimensionElement, DimensionRecord]
462 """Dictionary mapping `DimensionElement` to the associated
463 `DimensionRecord` (`IndexedTupleDict`).
465 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
466 as well as `DimensionElement` instance.
467 """
469 region: Optional[Region]
470 """Region on the sky associated with this data ID, or `None` if there
471 are no spatial dimensions (`sphgeom.Region`).
473 At present, this may be the special value `NotImplemented` if there
474 multiple spatial dimensions identified; in the future this will be replaced
475 with the intersection.
476 """
478 timespan: Optional[Timespan]
479 """Timespan associated with this data ID, or `None` if there are no
480 temporal dimensions (`TimeSpan`).
481 """