Coverage for python/lsst/daf/butler/core/dimensions/coordinate.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId")
26import numbers
27from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING
29from lsst.sphgeom import Region
30from ..utils import IndexedTupleDict, immutable
31from ..timespan import Timespan
32from .graph import DimensionGraph
34if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 from .elements import DimensionElement, Dimension
36 from .universe import DimensionUniverse
37 from .records import DimensionRecord
40@immutable
41class DataCoordinate(IndexedTupleDict):
42 """An immutable data ID dictionary that guarantees that its key-value pairs
43 identify all required dimensions in a `DimensionGraph`.
45 `DataCoordinate` instances should usually be constructed via the
46 `standardize` class method; the constructor is reserved for callers that
47 can guarantee that the ``values`` tuple has exactly the right elements.
49 Parameters
50 ----------
51 graph : `DimensionGraph`
52 The dimensions identified by this instance.
53 values : `tuple`
54 Tuple of primary key values for the given dimensions.
56 Notes
57 -----
58 Like any data ID class, `DataCoordinate` behaves like a dictionary,
59 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`,
60 both `Dimension` instances and `str` names thereof may be used as keys in
61 lookup operations.
63 Subclasses are permitted to support lookup for any dimension in
64 ``self.graph.dimensions``, but the base class only supports lookup for
65 those in ``self.graph.required``, which is the minimal set needed to
66 identify all others in a `Registry`. Both the base class and subclasses
67 define comparisons, iterators, and the `keys`, `values`, and `items` views
68 to just the ``self.graph.required`` subset in order to guarantee true
69 (i.e. Liskov) substitutability.
70 """
72 __slots__ = ("graph",)
74 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]):
75 self = super().__new__(cls, graph._requiredIndices, values)
76 self.graph = graph
77 return self
79 @staticmethod
80 def standardize(mapping: Optional[Mapping[str, Any]] = None, *,
81 graph: Optional[DimensionGraph] = None,
82 universe: Optional[DimensionUniverse] = None,
83 **kwds) -> DataCoordinate:
84 """Adapt an arbitrary mapping and/or additional arguments into a true
85 `DataCoordinate`, or augment an existing one.
87 Parameters
88 ----------
89 mapping : `~collections.abc.Mapping`, optional
90 An informal data ID that maps dimension names to their primary key
91 values (may also be a true `DataCoordinate`).
92 graph : `DimensionGraph`
93 The dimensions to be identified by the new `DataCoordinate`.
94 If not provided, will be inferred from the keys of ``mapping``,
95 and ``universe`` must be provided unless ``mapping`` is already a
96 `DataCoordinate`.
97 universe : `DimensionUniverse`
98 All known dimensions and their relationships; used to expand
99 and validate dependencies when ``graph`` is not provided.
100 kwds
101 Additional keyword arguments are treated like additional key-value
102 pairs in ``mapping``.
104 Returns
105 -------
106 coordinate : `DataCoordinate`
107 A validated `DataCoordinate` instance. May be a subclass instance
108 if and only if ``mapping`` is a subclass instance and ``graph``
109 is a subset of ``mapping.graph``.
111 Raises
112 ------
113 TypeError
114 Raised if the set of optional arguments provided is not supported.
115 KeyError
116 Raised if a key-value pair for a required dimension is missing.
118 Notes
119 -----
120 Because `DataCoordinate` stores only values for required dimensions,
121 key-value pairs for other related dimensions will be ignored and
122 excluded from the result. This means that a `DataCoordinate` may
123 contain *fewer* key-value pairs than the informal data ID dictionary
124 it was constructed from.
125 """
126 if isinstance(mapping, DataCoordinate):
127 if graph is None:
128 if not kwds:
129 # Already standardized to exactly what we want.
130 return mapping
131 elif mapping.graph.issuperset(graph):
132 # Already standardized; just return the relevant subset.
133 return mapping.subset(graph)
134 assert universe is None or universe == mapping.universe
135 universe = mapping.universe
136 if kwds:
137 if mapping:
138 try:
139 d = dict(mapping.byName(), **kwds)
140 except AttributeError:
141 d = dict(mapping, **kwds)
142 else:
143 d = kwds
144 elif mapping:
145 try:
146 d = mapping.byName()
147 except AttributeError:
148 d = mapping
149 else:
150 d = {}
151 if graph is None:
152 if universe is None:
153 raise TypeError("universe must be provided if graph is not.")
154 graph = DimensionGraph(universe, names=d.keys())
155 try:
156 values = tuple(d[name] for name in graph.required.names)
157 # some backends cannot handle numpy.int64 type which is
158 # a subclass of numbers.Integral, convert that to int.
159 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values)
160 except KeyError as err:
161 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
162 return DataCoordinate(graph, values)
164 def byName(self) -> Dict[str, Any]:
165 """Return a true `dict` keyed by `str` dimension name and the same
166 values as ``self``.
167 """
168 return {k.name: v for k, v in self.items()}
170 def __getnewargs__(self) -> tuple:
171 # Implements pickle support (in addition to methods provided by
172 # @immutable decorator).
173 return (self.graph, self.values())
175 def __hash__(self) -> int:
176 return hash((self.graph, self.values()))
178 def __eq__(self, other: DataCoordinate) -> bool:
179 try:
180 # Optimized code path for DataCoordinate comparisons.
181 return self.graph == other.graph and self.values() == other.values()
182 except AttributeError:
183 # We can't reliably compare to informal data ID dictionaries
184 # we don't know if any extra keys they might have are consistent
185 # with an `ExpandedDataCoordinate` version of ``self`` (which
186 # should compare as equal) or something else (which should
187 # compare as not equal).
188 # We don't even want to return `NotImplemented` and tell Python
189 # to delegate to ``other.__eq__``, because that could also be
190 # misleading. We raise TypeError instead.
191 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially "
192 "misleading results.") from None
194 def __str__(self):
195 return f"{self.byName()}"
197 def __repr__(self):
198 return f"DataCoordinate({self.graph}, {self.values()})"
200 def fingerprint(self, update):
201 """Update a secure hash function with the values in this data ID.
203 Parameters
204 ----------
205 update : `~collections.abc.Callable`
206 Callable that accepts a single `bytes` argument to update
207 the hash; usually the ``update`` method of an instance from
208 the ``hashlib`` module.
209 """
210 for k, v in self.items():
211 update(k.name.encode("utf8"))
212 if isinstance(v, numbers.Integral):
213 update(int(v).to_bytes(64, "big", signed=False))
214 elif isinstance(v, str):
215 update(v.encode("utf8"))
216 else:
217 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).")
219 def subset(self, graph: DimensionGraph) -> DataCoordinate:
220 """Return a new `DataCoordinate` whose graph is a subset of
221 ``self.graph``.
223 Subclasses may override this method to return a subclass instance.
225 Parameters
226 ----------
227 graph : `DimensionGraph`
228 The dimensions identified by the returned `DataCoordinate`.
230 Returns
231 -------
232 coordinate : `DataCoordinate`
233 A `DataCoordinate` instance that identifies only the given
234 dimensions.
236 Raises
237 ------
238 KeyError
239 Raised if ``graph`` is not a subset of ``self.graph``, and hence
240 one or more dimensions has no associated primary key value.
241 """
242 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required))
244 @property
245 def universe(self) -> DimensionUniverse:
246 """The universe that defines all known dimensions compatible with
247 this coordinate (`DimensionUniverse`).
248 """
249 return self.graph.universe
251 # Class attributes below are shadowed by instance attributes, and are
252 # present just to hold the docstrings for those instance attributes.
254 graph: DimensionGraph
255 """The dimensions identified by this data ID (`DimensionGraph`).
257 Note that values are only required to be present for dimensions in
258 ``self.graph.required``; all others may be retrieved (from a `Registry`)
259 given these.
260 """
263DataId = Union[DataCoordinate, Mapping[str, Any]]
264"""A type-annotation alias for signatures that accept both informal data ID
265dictionaries and validated `DataCoordinate` instances.
266"""
269def _intersectRegions(*args: Region) -> Optional[Region]:
270 """Return the intersection of several regions.
272 For internal use by `ExpandedDataCoordinate` only.
274 If no regions are provided, returns `None`.
276 This is currently a placeholder; it actually returns `NotImplemented`
277 (it does *not* raise an exception) when multiple regions are given, which
278 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
279 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
280 we can't compute its region, and at present we don't have a high-level use
281 case for the regions of these particular data IDs.
282 """
283 if len(args) == 0:
284 return None
285 elif len(args) == 1:
286 return args[0]
287 else:
288 return NotImplemented
291@immutable
292class ExpandedDataCoordinate(DataCoordinate):
293 """A data ID that has been expanded to include all relevant metadata.
295 Instances should usually be obtained by calling `Registry.expandDataId`.
297 Parameters
298 ----------
299 graph : `DimensionGraph`
300 The dimensions identified by this instance.
301 values : `tuple`
302 Tuple of primary key values for the given dimensions.
303 records : `~collections.abc.Mapping`
304 Dictionary mapping `DimensionElement` to `DimensionRecord`.
305 full : `~collections.abc.Mapping`
306 Dictionary mapping dimensions to their primary key values for all
307 dimensions in the graph, not just required ones. Ignored unless
308 ``conform`` is `False.`
309 region : `sphgeom.Region`, optional
310 Region on the sky associated with this data ID, or `None` if there
311 are no spatial dimensions. At present, this may be the special value
312 `NotImplemented` if there multiple spatial dimensions identified; in
313 the future this will be replaced with the intersection. Ignored unless
314 ``conform`` is `False`.Timespan
315 timespan : `Timespan`, optionalTimespan
316 Timespan associated with this data ID, or `None` if there are no
317 temporal dimensions.
318 Ignored unless ``conform`` is `False`.
319 conform : `bool`, optional
320 If `True` (default), adapt arguments from arbitrary mappings to the
321 custom dictionary types and check that all expected key-value pairs are
322 present. `False` is only for internal use.
324 Notes
325 -----
326 To maintain Liskov substitutability with `DataCoordinate`,
327 `ExpandedDataCoordinate` mostly acts like a mapping that contains only
328 values for its graph's required dimensions, even though it also contains
329 values for all implied dimensions - its length, iteration, and
330 keys/values/items views reflect only required dimensions. Values for
331 the primary keys of implied dimensions can be obtained from the `full`
332 attribute, and are also accessible in dict lookups and the ``in`` operator.
333 """
335 __slots__ = ("records", "full", "region", "timespan")
337 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *,
338 records: Mapping[DimensionElement, DimensionRecord],
339 full: Optional[Mapping[Dimension, Any]] = None,
340 region: Optional[Region] = None,
341 timespan: Optional[Timespan] = None,
342 conform: bool = True):
343 self = super().__new__(cls, graph, values)
344 if conform:
345 self.records = IndexedTupleDict(
346 indices=graph._elementIndices,
347 values=tuple(records[element] for element in graph.elements)
348 )
349 self.full = IndexedTupleDict(
350 indices=graph._dimensionIndices,
351 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None)
352 for dimension in graph.dimensions)
353 )
354 regions = []
355 for element in self.graph.spatial:
356 record = self.records[element.name]
357 if record is None or record.region is None:
358 self.region = None
359 break
360 else:
361 regions.append(record.region)
362 else:
363 self.region = _intersectRegions(*regions)
364 timespans = []
365 for element in self.graph.temporal:
366 record = self.records[element.name]
367 if record is None or record.timespan is None:
368 self.timespan = None
369 break
370 else:
371 timespans.append(record.timespan)
372 else:
373 self.timespan = Timespan.intersection(*timespans)
374 else:
375 self.records = records
376 self.full = full
377 self.region = region
378 self.timespan = timespan
379 return self
381 def __contains__(self, key: Union[DimensionElement, str]) -> bool:
382 return key in self.full
384 def __getitem__(self, key: Union[DimensionElement, str]) -> Any:
385 return self.full[key]
387 def __repr__(self):
388 return f"ExpandedDataCoordinate({self.graph}, {self.values()})"
390 def pack(self, name: str, *, returnMaxBits: bool = False) -> int:
391 """Pack this data ID into an integer.
393 Parameters
394 ----------
395 name : `str`
396 Name of the `DimensionPacker` algorithm (as defined in the
397 dimension configuration).
398 returnMaxBits : `bool`, optional
399 If `True` (`False` is default), return the maximum number of
400 nonzero bits in the returned integer across all data IDs.
402 Returns
403 -------
404 packed : `int`
405 Integer ID. This ID is unique only across data IDs that have
406 the same values for the packer's "fixed" dimensions.
407 maxBits : `int`, optional
408 Maximum number of nonzero bits in ``packed``. Not returned unless
409 ``returnMaxBits`` is `True`.
410 """
411 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
413 def matches(self, other) -> bool:
414 # Docstring inherited from DataCoordinate.matches.
415 d = getattr(other, "full", other)
416 return all(self[k] == d[k] for k in (self.full.keys() & d.keys()))
418 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate:
419 # Docstring inherited from DataCoordinate.subset.
420 return ExpandedDataCoordinate(
421 graph,
422 tuple(self[dimension] for dimension in graph.required),
423 records=self.records,
424 conform=True
425 )
427 def __getnewargs_ex__(self) -> Tuple(tuple, dict):
428 return (
429 (self.graph, self.values()),
430 dict(
431 records=self.records,
432 full=self.full,
433 region=self.region,
434 timespan=self.timespan,
435 conform=False,
436 )
437 )
439 # Class attributes below are shadowed by instance attributes, and are
440 # present just to hold the docstrings for those instance attributes.
442 full: IndexedTupleDict[Dimension, Any]
443 """Dictionary mapping dimensions to their primary key values for all
444 dimensions in the graph, not just required ones (`IndexedTupleDict`).
446 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
447 as well as `Dimension` instance.
448 """
450 records: IndexedTupleDict[DimensionElement, DimensionRecord]
451 """Dictionary mapping `DimensionElement` to the associated
452 `DimensionRecord` (`IndexedTupleDict`).
454 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name
455 as well as `DimensionElement` instance.
456 """
458 region: Optional[Region]
459 """Region on the sky associated with this data ID, or `None` if there
460 are no spatial dimensions (`sphgeom.Region`).
462 At present, this may be the special value `NotImplemented` if there
463 multiple spatial dimensions identified; in the future this will be replaced
464 with the intersection.
465 """
467 timespan: Optional[Timespan]
468 """Timespan associated with this data ID, or `None` if there are no
469 temporal dimensions (`TimeSpan`).
470 """