Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 28%
335 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
23# Design notes for this module are in
24# doc/lsst.daf.butler/dev/dataCoordinate.py.
25#
27from __future__ import annotations
29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate")
31import numbers
32from abc import abstractmethod
33from typing import (
34 TYPE_CHECKING,
35 AbstractSet,
36 Any,
37 Dict,
38 Iterator,
39 Literal,
40 Mapping,
41 Optional,
42 Tuple,
43 Union,
44 overload,
45)
47from lsst.sphgeom import Region
48from pydantic import BaseModel
50from ..json import from_json_pydantic, to_json_pydantic
51from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping
52from ..timespan import Timespan
53from ._elements import Dimension, DimensionElement
54from ._graph import DimensionGraph
55from ._records import DimensionRecord, SerializedDimensionRecord
57if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 from ...registry import Registry
59 from ._universe import DimensionUniverse
61DataIdKey = Union[str, Dimension]
62"""Type annotation alias for the keys that can be used to index a
63DataCoordinate.
64"""
66# Pydantic will cast int to str if str is first in the Union.
67DataIdValue = Union[int, str, None]
68"""Type annotation alias for the values that can be present in a
69DataCoordinate or other data ID.
70"""
73class SerializedDataCoordinate(BaseModel):
74 """Simplified model for serializing a `DataCoordinate`."""
76 dataId: Dict[str, DataIdValue]
77 records: Optional[Dict[str, SerializedDimensionRecord]] = None
79 @classmethod
80 def direct(cls, *, dataId: Dict[str, DataIdValue], records: Dict[str, Dict]) -> SerializedDataCoordinate:
81 """Construct a `SerializedDataCoordinate` directly without validators.
83 This differs from the pydantic "construct" method in that the arguments
84 are explicitly what the model requires, and it will recurse through
85 members, constructing them from their corresponding `direct` methods.
87 This method should only be called when the inputs are trusted.
88 """
89 node = SerializedDataCoordinate.__new__(cls)
90 setter = object.__setattr__
91 setter(node, "dataId", dataId)
92 setter(
93 node,
94 "records",
95 records
96 if records is None
97 else {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()},
98 )
99 setter(node, "__fields_set__", {"dataId", "records"})
100 return node
103def _intersectRegions(*args: Region) -> Optional[Region]:
104 """Return the intersection of several regions.
106 For internal use by `ExpandedDataCoordinate` only.
108 If no regions are provided, returns `None`.
110 This is currently a placeholder; it actually returns `NotImplemented`
111 (it does *not* raise an exception) when multiple regions are given, which
112 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
113 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
114 we can't compute its region, and at present we don't have a high-level use
115 case for the regions of these particular data IDs.
116 """
117 if len(args) == 0:
118 return None
119 elif len(args) == 1:
120 return args[0]
121 else:
122 return NotImplemented
125class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
126 """Data ID dictionary.
128 An immutable data ID dictionary that guarantees that its key-value pairs
129 identify at least all required dimensions in a `DimensionGraph`.
131 `DataCoordinate` itself is an ABC, but provides `staticmethod` factory
132 functions for private concrete implementations that should be sufficient
133 for most purposes. `standardize` is the most flexible and safe of these;
134 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are
135 more specialized and perform little or no checking of inputs.
137 Notes
138 -----
139 Like any data ID class, `DataCoordinate` behaves like a dictionary, but
140 with some subtleties:
142 - Both `Dimension` instances and `str` names thereof may be used as keys
143 in lookup operations, but iteration (and `keys`) will yield `Dimension`
144 instances. The `names` property can be used to obtain the corresponding
145 `str` names.
147 - Lookups for implied dimensions (those in ``self.graph.implied``) are
148 supported if and only if `hasFull` returns `True`, and are never
149 included in iteration or `keys`. The `full` property may be used to
150 obtain a mapping whose keys do include implied dimensions.
152 - Equality comparison with other mappings is supported, but it always
153 considers only required dimensions (as well as requiring both operands
154 to identify the same dimensions). This is not quite consistent with the
155 way mappings usually work - normally differing keys imply unequal
156 mappings - but it makes sense in this context because data IDs with the
157 same values for required dimensions but different values for implied
158 dimensions represent a serious problem with the data that
159 `DataCoordinate` cannot generally recognize on its own, and a data ID
160 that knows implied dimension values should still be able to compare as
161 equal to one that does not. This is of course not the way comparisons
162 between simple `dict` data IDs work, and hence using a `DataCoordinate`
163 instance for at least one operand in any data ID comparison is strongly
164 recommended.
165 """
167 __slots__ = ()
169 _serializedType = SerializedDataCoordinate
171 @staticmethod
172 def standardize(
173 mapping: Optional[NameLookupMapping[Dimension, DataIdValue]] = None,
174 *,
175 graph: Optional[DimensionGraph] = None,
176 universe: Optional[DimensionUniverse] = None,
177 defaults: Optional[DataCoordinate] = None,
178 **kwargs: Any,
179 ) -> DataCoordinate:
180 """Standardize the supplied dataId.
182 Adapts an arbitrary mapping and/or additional arguments into a true
183 `DataCoordinate`, or augment an existing one.
185 Parameters
186 ----------
187 mapping : `~collections.abc.Mapping`, optional
188 An informal data ID that maps dimensions or dimension names to
189 their primary key values (may also be a true `DataCoordinate`).
190 graph : `DimensionGraph`
191 The dimensions to be identified by the new `DataCoordinate`.
192 If not provided, will be inferred from the keys of ``mapping`` and
193 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
194 is already a `DataCoordinate`.
195 universe : `DimensionUniverse`
196 All known dimensions and their relationships; used to expand
197 and validate dependencies when ``graph`` is not provided.
198 defaults : `DataCoordinate`, optional
199 Default dimension key-value pairs to use when needed. These are
200 never used to infer ``graph``, and are ignored if a different value
201 is provided for the same key in ``mapping`` or `**kwargs``.
202 **kwargs
203 Additional keyword arguments are treated like additional key-value
204 pairs in ``mapping``.
206 Returns
207 -------
208 coordinate : `DataCoordinate`
209 A validated `DataCoordinate` instance.
211 Raises
212 ------
213 TypeError
214 Raised if the set of optional arguments provided is not supported.
215 KeyError
216 Raised if a key-value pair for a required dimension is missing.
217 """
218 d: Dict[str, DataIdValue] = {}
219 if isinstance(mapping, DataCoordinate):
220 if graph is None:
221 if not kwargs:
222 # Already standardized to exactly what we want.
223 return mapping
224 elif kwargs.keys().isdisjoint(graph.dimensions.names):
225 # User provided kwargs, but told us not to use them by
226 # passing in dimensions that are disjoint from those kwargs.
227 # This is not necessarily user error - it's a useful pattern
228 # to pass in all of the key-value pairs you have and let the
229 # code here pull out only what it needs.
230 return mapping.subset(graph)
231 assert universe is None or universe == mapping.universe
232 universe = mapping.universe
233 d.update((name, mapping[name]) for name in mapping.graph.required.names)
234 if mapping.hasFull():
235 d.update((name, mapping[name]) for name in mapping.graph.implied.names)
236 elif isinstance(mapping, NamedKeyMapping):
237 d.update(mapping.byName())
238 elif mapping is not None:
239 d.update(mapping)
240 d.update(kwargs)
241 if graph is None:
242 if defaults is not None:
243 universe = defaults.universe
244 elif universe is None:
245 raise TypeError("universe must be provided if graph is not.")
246 graph = DimensionGraph(universe, names=d.keys())
247 if not graph.dimensions:
248 return DataCoordinate.makeEmpty(graph.universe)
249 if defaults is not None:
250 if defaults.hasFull():
251 for k, v in defaults.full.items():
252 d.setdefault(k.name, v)
253 else:
254 for k, v in defaults.items():
255 d.setdefault(k.name, v)
256 if d.keys() >= graph.dimensions.names:
257 values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
258 else:
259 try:
260 values = tuple(d[name] for name in graph.required.names)
261 except KeyError as err:
262 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
263 # Some backends cannot handle numpy.int64 type which is a subclass of
264 # numbers.Integral; convert that to int.
265 values = tuple(
266 int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore
267 )
268 return _BasicTupleDataCoordinate(graph, values)
270 @staticmethod
271 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
272 """Return an empty `DataCoordinate`.
274 It identifies the null set of dimensions.
276 Parameters
277 ----------
278 universe : `DimensionUniverse`
279 Universe to which this null dimension set belongs.
281 Returns
282 -------
283 dataId : `DataCoordinate`
284 A data ID object that identifies no dimensions. `hasFull` and
285 `hasRecords` are guaranteed to return `True`, because both `full`
286 and `records` are just empty mappings.
287 """
288 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
290 @staticmethod
291 def fromRequiredValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
292 """Construct a `DataCoordinate` from required dimension values.
294 This is a low-level interface with at most assertion-level checking of
295 inputs. Most callers should use `standardize` instead.
297 Parameters
298 ----------
299 graph : `DimensionGraph`
300 Dimensions this data ID will identify.
301 values : `tuple` [ `int` or `str` ]
302 Tuple of primary key values corresponding to ``graph.required``,
303 in that order.
305 Returns
306 -------
307 dataId : `DataCoordinate`
308 A data ID object that identifies the given dimensions.
309 ``dataId.hasFull()`` will return `True` if and only if
310 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
311 return `True`.
312 """
313 assert len(graph.required) == len(
314 values
315 ), f"Inconsistency between dimensions {graph.required} and required values {values}."
316 return _BasicTupleDataCoordinate(graph, values)
318 @staticmethod
319 def fromFullValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
320 """Construct a `DataCoordinate` from all dimension values.
322 This is a low-level interface with at most assertion-level checking of
323 inputs. Most callers should use `standardize` instead.
325 Parameters
326 ----------
327 graph : `DimensionGraph`
328 Dimensions this data ID will identify.
329 values : `tuple` [ `int` or `str` ]
330 Tuple of primary key values corresponding to
331 ``itertools.chain(graph.required, graph.implied)``, in that order.
332 Note that this is _not_ the same order as ``graph.dimensions``,
333 though these contain the same elements.
335 Returns
336 -------
337 dataId : `DataCoordinate`
338 A data ID object that identifies the given dimensions.
339 ``dataId.hasFull()`` will return `True` if and only if
340 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
341 return `True`.
342 """
343 assert len(graph.dimensions) == len(
344 values
345 ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}."
346 return _BasicTupleDataCoordinate(graph, values)
348 def __hash__(self) -> int:
349 return hash((self.graph,) + tuple(self[d.name] for d in self.graph.required))
351 def __eq__(self, other: Any) -> bool:
352 if not isinstance(other, DataCoordinate):
353 other = DataCoordinate.standardize(other, universe=self.universe)
354 return self.graph == other.graph and all(self[d.name] == other[d.name] for d in self.graph.required)
356 def __repr__(self) -> str:
357 # We can't make repr yield something that could be exec'd here without
358 # printing out the whole DimensionUniverse the graph is derived from.
359 # So we print something that mostly looks like a dict, but doesn't
360 # quote its keys: that's both more compact and something that can't
361 # be mistaken for an actual dict or something that could be exec'd.
362 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names]
363 if self.hasFull() and self.graph.required != self.graph.dimensions:
364 terms.append("...")
365 return "{{{}}}".format(", ".join(terms))
367 def __lt__(self, other: Any) -> bool:
368 # Allow DataCoordinate to be sorted
369 if not isinstance(other, type(self)):
370 return NotImplemented
371 # Form tuple of tuples for each DataCoordinate:
372 # Unlike repr() we only use required keys here to ensure that
373 # __eq__ can not be true simultaneously with __lt__ being true.
374 self_kv = tuple(self.items())
375 other_kv = tuple(other.items())
377 return self_kv < other_kv
379 def __iter__(self) -> Iterator[Dimension]:
380 return iter(self.keys())
382 def __len__(self) -> int:
383 return len(self.keys())
385 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
386 return self.graph.required
388 @property
389 def names(self) -> AbstractSet[str]:
390 """Names of the required dimensions identified by this data ID.
392 They are returned in the same order as `keys`
393 (`collections.abc.Set` [ `str` ]).
394 """
395 return self.keys().names
397 @abstractmethod
398 def subset(self, graph: DimensionGraph) -> DataCoordinate:
399 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``.
401 Parameters
402 ----------
403 graph : `DimensionGraph`
404 The dimensions identified by the returned `DataCoordinate`.
406 Returns
407 -------
408 coordinate : `DataCoordinate`
409 A `DataCoordinate` instance that identifies only the given
410 dimensions. May be ``self`` if ``graph == self.graph``.
412 Raises
413 ------
414 KeyError
415 Raised if the primary key value for one or more required dimensions
416 is unknown. This may happen if ``graph.issubset(self.graph)`` is
417 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if
418 ``self.hasFull()`` is `False` and
419 ``graph.required.issubset(self.graph.required)`` is `False`. As
420 an example of the latter case, consider trying to go from a data ID
421 with dimensions {instrument, physical_filter, band} to
422 just {instrument, band}; band is implied by
423 physical_filter and hence would have no value in the original data
424 ID if ``self.hasFull()`` is `False`.
426 Notes
427 -----
428 If `hasFull` and `hasRecords` return `True` on ``self``, they will
429 return `True` (respectively) on the returned `DataCoordinate` as well.
430 The converse does not hold.
431 """
432 raise NotImplementedError()
434 @abstractmethod
435 def union(self, other: DataCoordinate) -> DataCoordinate:
436 """Combine two data IDs.
438 Yields a new one that identifies all dimensions that either of them
439 identify.
441 Parameters
442 ----------
443 other : `DataCoordinate`
444 Data ID to combine with ``self``.
446 Returns
447 -------
448 unioned : `DataCoordinate`
449 A `DataCoordinate` instance that satisfies
450 ``unioned.graph == self.graph.union(other.graph)``. Will preserve
451 ``hasFull`` and ``hasRecords`` whenever possible.
453 Notes
454 -----
455 No checking for consistency is performed on values for keys that
456 ``self`` and ``other`` have in common, and which value is included in
457 the returned data ID is not specified.
458 """
459 raise NotImplementedError()
461 @abstractmethod
462 def expanded(
463 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
464 ) -> DataCoordinate:
465 """Return a `DataCoordinate` that holds the given records.
467 Guarantees that `hasRecords` returns `True`.
469 This is a low-level interface with at most assertion-level checking of
470 inputs. Most callers should use `Registry.expandDataId` instead.
472 Parameters
473 ----------
474 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
475 A `NamedKeyMapping` with `DimensionElement` keys or a regular
476 `Mapping` with `str` (`DimensionElement` name) keys and
477 `DimensionRecord` values. Keys must cover all elements in
478 ``self.graph.elements``. Values may be `None`, but only to reflect
479 actual NULL values in the database, not just records that have not
480 been fetched.
481 """
482 raise NotImplementedError()
484 @property
485 def universe(self) -> DimensionUniverse:
486 """Universe that defines all known compatible dimensions.
488 The univers will be compatible with this coordinate
489 (`DimensionUniverse`).
490 """
491 return self.graph.universe
493 @property
494 @abstractmethod
495 def graph(self) -> DimensionGraph:
496 """Dimensions identified by this data ID (`DimensionGraph`).
498 Note that values are only required to be present for dimensions in
499 ``self.graph.required``; all others may be retrieved (from a
500 `Registry`) given these.
501 """
502 raise NotImplementedError()
504 @abstractmethod
505 def hasFull(self) -> bool:
506 """Whether this data ID contains implied and required values.
508 Returns
509 -------
510 state : `bool`
511 If `True`, `__getitem__`, `get`, and `__contains__` (but not
512 `keys`!) will act as though the mapping includes key-value pairs
513 for implied dimensions, and the `full` property may be used. If
514 `False`, these operations only include key-value pairs for required
515 dimensions, and accessing `full` is an error. Always `True` if
516 there are no implied dimensions.
517 """
518 raise NotImplementedError()
520 @property
521 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
522 """Return mapping for all dimensions in ``self.graph``.
524 The mapping includes key-value pairs for all dimensions in
525 ``self.graph``, including implied (`NamedKeyMapping`).
527 Accessing this attribute if `hasFull` returns `False` is a logic error
528 that may raise an exception of unspecified type either immediately or
529 when implied keys are accessed via the returned mapping, depending on
530 the implementation and whether assertions are enabled.
531 """
532 assert self.hasFull(), "full may only be accessed if hasFull() returns True."
533 return _DataCoordinateFullView(self)
535 @abstractmethod
536 def hasRecords(self) -> bool:
537 """Whether this data ID contains records.
539 These are the records for all of the dimension elements it identifies.
541 Returns
542 -------
543 state : `bool`
544 If `True`, the following attributes may be accessed:
546 - `records`
547 - `region`
548 - `timespan`
549 - `pack`
551 If `False`, accessing any of these is considered a logic error.
552 """
553 raise NotImplementedError()
555 @property
556 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
557 """Return the records.
559 Returns a mapping that contains `DimensionRecord` objects for all
560 elements identified by this data ID (`NamedKeyMapping`).
562 The values of this mapping may be `None` if and only if there is no
563 record for that element with these dimensions in the database (which
564 means some foreign key field must have a NULL value).
566 Accessing this attribute if `hasRecords` returns `False` is a logic
567 error that may raise an exception of unspecified type either
568 immediately or when the returned mapping is used, depending on the
569 implementation and whether assertions are enabled.
570 """
571 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
572 return _DataCoordinateRecordsView(self)
574 @abstractmethod
575 def _record(self, name: str) -> Optional[DimensionRecord]:
576 """Protected implementation hook that backs the ``records`` attribute.
578 Parameters
579 ----------
580 name : `str`
581 The name of a `DimensionElement`, guaranteed to be in
582 ``self.graph.elements.names``.
584 Returns
585 -------
586 record : `DimensionRecord` or `None`
587 The dimension record for the given element identified by this
588 data ID, or `None` if there is no such record.
589 """
590 raise NotImplementedError()
592 @property
593 def region(self) -> Optional[Region]:
594 """Spatial region associated with this data ID.
596 (`lsst.sphgeom.Region` or `None`).
598 This is `None` if and only if ``self.graph.spatial`` is empty.
600 Accessing this attribute if `hasRecords` returns `False` is a logic
601 error that may or may not raise an exception, depending on the
602 implementation and whether assertions are enabled.
603 """
604 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
605 regions = []
606 for family in self.graph.spatial:
607 element = family.choose(self.graph.elements)
608 record = self._record(element.name)
609 if record is None or record.region is None:
610 return None
611 else:
612 regions.append(record.region)
613 return _intersectRegions(*regions)
615 @property
616 def timespan(self) -> Optional[Timespan]:
617 """Temporal interval associated with this data ID.
619 (`Timespan` or `None`).
621 This is `None` if and only if ``self.graph.timespan`` is empty.
623 Accessing this attribute if `hasRecords` returns `False` is a logic
624 error that may or may not raise an exception, depending on the
625 implementation and whether assertions are enabled.
626 """
627 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
628 timespans = []
629 for family in self.graph.temporal:
630 element = family.choose(self.graph.elements)
631 record = self._record(element.name)
632 # DimensionRecord subclasses for temporal elements always have
633 # .timespan, but they're dynamic so this can't be type-checked.
634 if record is None or record.timespan is None:
635 return None
636 else:
637 timespans.append(record.timespan)
638 if not timespans:
639 return None
640 elif len(timespans) == 1:
641 return timespans[0]
642 else:
643 return Timespan.intersection(*timespans)
645 @overload
646 def pack(self, name: str, *, returnMaxBits: Literal[True]) -> Tuple[int, int]:
647 ...
649 @overload
650 def pack(self, name: str, *, returnMaxBits: Literal[False]) -> int:
651 ...
653 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
654 """Pack this data ID into an integer.
656 Parameters
657 ----------
658 name : `str`
659 Name of the `DimensionPacker` algorithm (as defined in the
660 dimension configuration).
661 returnMaxBits : `bool`, optional
662 If `True` (`False` is default), return the maximum number of
663 nonzero bits in the returned integer across all data IDs.
665 Returns
666 -------
667 packed : `int`
668 Integer ID. This ID is unique only across data IDs that have
669 the same values for the packer's "fixed" dimensions.
670 maxBits : `int`, optional
671 Maximum number of nonzero bits in ``packed``. Not returned unless
672 ``returnMaxBits`` is `True`.
674 Notes
675 -----
676 Accessing this attribute if `hasRecords` returns `False` is a logic
677 error that may or may not raise an exception, depending on the
678 implementation and whether assertions are enabled.
679 """
680 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True."
681 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
683 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
684 """Convert this class to a simple python type.
686 This is suitable for serialization.
688 Parameters
689 ----------
690 minimal : `bool`, optional
691 Use minimal serialization. If set the records will not be attached.
693 Returns
694 -------
695 simple : `SerializedDataCoordinate`
696 The object converted to simple form.
697 """
698 # Convert to a dict form
699 if self.hasFull():
700 dataId = self.full.byName()
701 else:
702 dataId = self.byName()
703 records: Optional[Dict[str, SerializedDimensionRecord]]
704 if not minimal and self.hasRecords():
705 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None}
706 else:
707 records = None
709 return SerializedDataCoordinate(dataId=dataId, records=records)
711 @classmethod
712 def from_simple(
713 cls,
714 simple: SerializedDataCoordinate,
715 universe: Optional[DimensionUniverse] = None,
716 registry: Optional[Registry] = None,
717 ) -> DataCoordinate:
718 """Construct a new object from the simplified form.
720 The data is assumed to be of the form returned from the `to_simple`
721 method.
723 Parameters
724 ----------
725 simple : `dict` of [`str`, `Any`]
726 The `dict` returned by `to_simple()`.
727 universe : `DimensionUniverse`
728 The special graph of all known dimensions.
729 registry : `lsst.daf.butler.Registry`, optional
730 Registry from which a universe can be extracted. Can be `None`
731 if universe is provided explicitly.
733 Returns
734 -------
735 dataId : `DataCoordinate`
736 Newly-constructed object.
737 """
738 if universe is None and registry is None:
739 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
740 if universe is None and registry is not None:
741 universe = registry.dimensions
742 if universe is None:
743 # this is for mypy
744 raise ValueError("Unable to determine a usable universe")
746 dataId = cls.standardize(simple.dataId, universe=universe)
747 if simple.records:
748 dataId = dataId.expanded(
749 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
750 )
751 return dataId
753 to_json = to_json_pydantic
754 from_json = classmethod(from_json_pydantic)
757DataId = Union[DataCoordinate, Mapping[str, Any]]
758"""A type-annotation alias for signatures that accept both informal data ID
759dictionaries and validated `DataCoordinate` instances.
760"""
763class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]):
764 """View class for `DataCoordinate.full`.
766 Provides the default implementation for
767 `DataCoordinate.full`.
769 Parameters
770 ----------
771 target : `DataCoordinate`
772 The `DataCoordinate` instance this object provides a view of.
773 """
775 def __init__(self, target: DataCoordinate):
776 self._target = target
778 __slots__ = ("_target",)
780 def __repr__(self) -> str:
781 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names]
782 return "{{{}}}".format(", ".join(terms))
784 def __getitem__(self, key: DataIdKey) -> DataIdValue:
785 return self._target[key]
787 def __iter__(self) -> Iterator[Dimension]:
788 return iter(self.keys())
790 def __len__(self) -> int:
791 return len(self.keys())
793 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
794 return self._target.graph.dimensions
796 @property
797 def names(self) -> AbstractSet[str]:
798 # Docstring inherited from `NamedKeyMapping`.
799 return self.keys().names
802class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]):
803 """View class for `DataCoordinate.records`.
805 Provides the default implementation for
806 `DataCoordinate.records`.
808 Parameters
809 ----------
810 target : `DataCoordinate`
811 The `DataCoordinate` instance this object provides a view of.
812 """
814 def __init__(self, target: DataCoordinate):
815 self._target = target
817 __slots__ = ("_target",)
819 def __repr__(self) -> str:
820 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names]
821 return "{{{}}}".format(", ".join(terms))
823 def __str__(self) -> str:
824 return "\n".join(str(v) for v in self.values())
826 def __getitem__(self, key: Union[DimensionElement, str]) -> Optional[DimensionRecord]:
827 if isinstance(key, DimensionElement):
828 key = key.name
829 return self._target._record(key)
831 def __iter__(self) -> Iterator[DimensionElement]:
832 return iter(self.keys())
834 def __len__(self) -> int:
835 return len(self.keys())
837 def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore
838 return self._target.graph.elements
840 @property
841 def names(self) -> AbstractSet[str]:
842 # Docstring inherited from `NamedKeyMapping`.
843 return self.keys().names
846class _BasicTupleDataCoordinate(DataCoordinate):
847 """Standard implementation of `DataCoordinate`.
849 Backed by a tuple of values.
851 This class should only be accessed outside this module via the
852 `DataCoordinate` interface, and should only be constructed via the static
853 methods there.
855 Parameters
856 ----------
857 graph : `DimensionGraph`
858 The dimensions to be identified.
859 values : `tuple` [ `int` or `str` ]
860 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May
861 include values for just required dimensions (which always come first)
862 or all dimensions.
863 """
865 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...]):
866 self._graph = graph
867 self._values = values
869 __slots__ = ("_graph", "_values")
871 @property
872 def graph(self) -> DimensionGraph:
873 # Docstring inherited from DataCoordinate.
874 return self._graph
876 def __getitem__(self, key: DataIdKey) -> DataIdValue:
877 # Docstring inherited from DataCoordinate.
878 if isinstance(key, Dimension):
879 key = key.name
880 index = self._graph._dataCoordinateIndices[key]
881 try:
882 return self._values[index]
883 except IndexError:
884 # Caller asked for an implied dimension, but this object only has
885 # values for the required ones.
886 raise KeyError(key) from None
888 def subset(self, graph: DimensionGraph) -> DataCoordinate:
889 # Docstring inherited from DataCoordinate.
890 if self._graph == graph:
891 return self
892 elif self.hasFull() or self._graph.required >= graph.dimensions:
893 return _BasicTupleDataCoordinate(
894 graph,
895 tuple(self[k] for k in graph._dataCoordinateIndices.keys()),
896 )
897 else:
898 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names))
900 def union(self, other: DataCoordinate) -> DataCoordinate:
901 # Docstring inherited from DataCoordinate.
902 graph = self.graph.union(other.graph)
903 # See if one or both input data IDs is already what we want to return;
904 # if so, return the most complete one we have.
905 if other.graph == graph:
906 if self.graph == graph:
907 # Input data IDs have the same graph (which is also the result
908 # graph), but may not have the same content.
909 # other might have records; self does not, so try other first.
910 # If it at least has full values, it's no worse than self.
911 if other.hasFull():
912 return other
913 else:
914 return self
915 elif other.hasFull():
916 return other
917 # There's some chance that neither self nor other has full values,
918 # but together provide enough to the union to. Let the general
919 # case below handle that.
920 elif self.graph == graph:
921 # No chance at returning records. If self has full values, it's
922 # the best we can do.
923 if self.hasFull():
924 return self
925 # General case with actual merging of dictionaries.
926 values = self.full.byName() if self.hasFull() else self.byName()
927 values.update(other.full.byName() if other.hasFull() else other.byName())
928 return DataCoordinate.standardize(values, graph=graph)
930 def expanded(
931 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
932 ) -> DataCoordinate:
933 # Docstring inherited from DataCoordinate
934 values = self._values
935 if not self.hasFull():
936 # Extract a complete values tuple from the attributes of the given
937 # records. It's possible for these to be inconsistent with
938 # self._values (which is a serious problem, of course), but we've
939 # documented this as a no-checking API.
940 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied)
941 return _ExpandedTupleDataCoordinate(self._graph, values, records)
943 def hasFull(self) -> bool:
944 # Docstring inherited from DataCoordinate.
945 return len(self._values) == len(self._graph._dataCoordinateIndices)
947 def hasRecords(self) -> bool:
948 # Docstring inherited from DataCoordinate.
949 return False
951 def _record(self, name: str) -> Optional[DimensionRecord]:
952 # Docstring inherited from DataCoordinate.
953 assert False
956class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate):
957 """A `DataCoordinate` implementation that can hold `DimensionRecord`.
959 This class should only be accessed outside this module via the
960 `DataCoordinate` interface, and should only be constructed via calls to
961 `DataCoordinate.expanded`.
963 Parameters
964 ----------
965 graph : `DimensionGraph`
966 The dimensions to be identified.
967 values : `tuple` [ `int` or `str` ]
968 Data ID values, ordered to match ``graph._dataCoordinateIndices``.
969 May include values for just required dimensions (which always come
970 first) or all dimensions.
971 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
972 A `NamedKeyMapping` with `DimensionElement` keys or a regular
973 `Mapping` with `str` (`DimensionElement` name) keys and
974 `DimensionRecord` values. Keys must cover all elements in
975 ``self.graph.elements``. Values may be `None`, but only to reflect
976 actual NULL values in the database, not just records that have not
977 been fetched.
978 """
980 def __init__(
981 self,
982 graph: DimensionGraph,
983 values: Tuple[DataIdValue, ...],
984 records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]],
985 ):
986 super().__init__(graph, values)
987 assert super().hasFull(), "This implementation requires full dimension records."
988 self._records = records
990 __slots__ = ("_records",)
992 def subset(self, graph: DimensionGraph) -> DataCoordinate:
993 # Docstring inherited from DataCoordinate.
994 if self._graph == graph:
995 return self
996 return _ExpandedTupleDataCoordinate(
997 graph, tuple(self[k] for k in graph._dataCoordinateIndices.keys()), records=self._records
998 )
1000 def expanded(
1001 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
1002 ) -> DataCoordinate:
1003 # Docstring inherited from DataCoordinate.
1004 return self
1006 def union(self, other: DataCoordinate) -> DataCoordinate:
1007 # Docstring inherited from DataCoordinate.
1008 graph = self.graph.union(other.graph)
1009 # See if one or both input data IDs is already what we want to return;
1010 # if so, return the most complete one we have.
1011 if self.graph == graph:
1012 # self has records, so even if other is also a valid result, it's
1013 # no better.
1014 return self
1015 if other.graph == graph:
1016 # If other has full values, and self does not identify some of
1017 # those, it's the base we can do. It may have records, too.
1018 if other.hasFull():
1019 return other
1020 # If other does not have full values, there's a chance self may
1021 # provide the values needed to complete it. For example, self
1022 # could be {band} while other could be
1023 # {instrument, physical_filter, band}, with band unknown.
1024 # General case with actual merging of dictionaries.
1025 values = self.full.byName()
1026 values.update(other.full.byName() if other.hasFull() else other.byName())
1027 basic = DataCoordinate.standardize(values, graph=graph)
1028 # See if we can add records.
1029 if self.hasRecords() and other.hasRecords():
1030 # Sometimes the elements of a union of graphs can contain elements
1031 # that weren't in either input graph (because graph unions are only
1032 # on dimensions). e.g. {visit} | {detector} brings along
1033 # visit_detector_region.
1034 elements = set(graph.elements.names)
1035 elements -= self.graph.elements.names
1036 elements -= other.graph.elements.names
1037 if not elements:
1038 records = NamedKeyDict[DimensionElement, Optional[DimensionRecord]](self.records)
1039 records.update(other.records)
1040 return basic.expanded(records.freeze())
1041 return basic
1043 def hasFull(self) -> bool:
1044 # Docstring inherited from DataCoordinate.
1045 return True
1047 def hasRecords(self) -> bool:
1048 # Docstring inherited from DataCoordinate.
1049 return True
1051 def _record(self, name: str) -> Optional[DimensionRecord]:
1052 # Docstring inherited from DataCoordinate.
1053 return self._records[name]