Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 35%
364 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-02 09:34 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-02 09:34 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
23# Design notes for this module are in
24# doc/lsst.daf.butler/dev/dataCoordinate.py.
25#
27from __future__ import annotations
29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate")
31import numbers
32from abc import abstractmethod
33from collections.abc import Iterator, Mapping, Set
34from typing import TYPE_CHECKING, Any, ClassVar, Literal, overload
36from deprecated.sphinx import deprecated
37from lsst.daf.butler._compat import _BaseModelCompat
38from lsst.sphgeom import IntersectionRegion, Region
40from ..json import from_json_pydantic, to_json_pydantic
41from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping
42from ..persistenceContext import PersistenceContextVars
43from ..timespan import Timespan
44from ._elements import Dimension, DimensionElement
45from ._graph import DimensionGraph
46from ._records import DimensionRecord, SerializedDimensionRecord
48if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
49 from ...registry import Registry
50 from ._universe import DimensionUniverse
52DataIdKey = str | Dimension
53"""Type annotation alias for the keys that can be used to index a
54DataCoordinate.
55"""
57# Pydantic will cast int to str if str is first in the Union.
58DataIdValue = int | str | None
59"""Type annotation alias for the values that can be present in a
60DataCoordinate or other data ID.
61"""
64class SerializedDataCoordinate(_BaseModelCompat):
65 """Simplified model for serializing a `DataCoordinate`."""
67 dataId: dict[str, DataIdValue]
68 records: dict[str, SerializedDimensionRecord] | None = None
70 @classmethod
71 def direct(
72 cls, *, dataId: dict[str, DataIdValue], records: dict[str, dict] | None
73 ) -> SerializedDataCoordinate:
74 """Construct a `SerializedDataCoordinate` directly without validators.
76 This differs from the pydantic "construct" method in that the arguments
77 are explicitly what the model requires, and it will recurse through
78 members, constructing them from their corresponding `direct` methods.
80 This method should only be called when the inputs are trusted.
81 """
82 key = (frozenset(dataId.items()), records is not None)
83 cache = PersistenceContextVars.serializedDataCoordinateMapping.get()
84 if cache is not None and (result := cache.get(key)) is not None:
85 return result
87 if records is None:
88 serialized_records = None
89 else:
90 serialized_records = {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()}
92 node = cls.model_construct(dataId=dataId, records=serialized_records)
94 if cache is not None:
95 cache[key] = node
96 return node
99def _intersectRegions(*args: Region) -> Region | None:
100 """Return the intersection of several regions.
102 For internal use by `ExpandedDataCoordinate` only.
104 If no regions are provided, returns `None`.
105 """
106 if len(args) == 0:
107 return None
108 else:
109 result = args[0]
110 for n in range(1, len(args)):
111 result = IntersectionRegion(result, args[n])
112 return result
115class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
116 """Data ID dictionary.
118 An immutable data ID dictionary that guarantees that its key-value pairs
119 identify at least all required dimensions in a `DimensionGraph`.
121 `DataCoordinate` itself is an ABC, but provides `staticmethod` factory
122 functions for private concrete implementations that should be sufficient
123 for most purposes. `standardize` is the most flexible and safe of these;
124 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are
125 more specialized and perform little or no checking of inputs.
127 Notes
128 -----
129 Like any data ID class, `DataCoordinate` behaves like a dictionary, but
130 with some subtleties:
132 - Both `Dimension` instances and `str` names thereof may be used as keys
133 in lookup operations, but iteration (and `keys`) will yield `Dimension`
134 instances. The `names` property can be used to obtain the corresponding
135 `str` names.
137 - Lookups for implied dimensions (those in ``self.graph.implied``) are
138 supported if and only if `hasFull` returns `True`, and are never
139 included in iteration or `keys`. The `full` property may be used to
140 obtain a mapping whose keys do include implied dimensions.
142 - Equality comparison with other mappings is supported, but it always
143 considers only required dimensions (as well as requiring both operands
144 to identify the same dimensions). This is not quite consistent with the
145 way mappings usually work - normally differing keys imply unequal
146 mappings - but it makes sense in this context because data IDs with the
147 same values for required dimensions but different values for implied
148 dimensions represent a serious problem with the data that
149 `DataCoordinate` cannot generally recognize on its own, and a data ID
150 that knows implied dimension values should still be able to compare as
151 equal to one that does not. This is of course not the way comparisons
152 between simple `dict` data IDs work, and hence using a `DataCoordinate`
153 instance for at least one operand in any data ID comparison is strongly
154 recommended.
156 See Also
157 --------
158 :ref:`lsst.daf.butler-dimensions_data_ids`
159 """
161 __slots__ = ()
163 _serializedType = SerializedDataCoordinate
165 @staticmethod
166 def standardize(
167 mapping: NameLookupMapping[Dimension, DataIdValue] | None = None,
168 *,
169 graph: DimensionGraph | None = None,
170 universe: DimensionUniverse | None = None,
171 defaults: DataCoordinate | None = None,
172 **kwargs: Any,
173 ) -> DataCoordinate:
174 """Standardize the supplied dataId.
176 Adapts an arbitrary mapping and/or additional arguments into a true
177 `DataCoordinate`, or augment an existing one.
179 Parameters
180 ----------
181 mapping : `~collections.abc.Mapping`, optional
182 An informal data ID that maps dimensions or dimension names to
183 their primary key values (may also be a true `DataCoordinate`).
184 graph : `DimensionGraph`
185 The dimensions to be identified by the new `DataCoordinate`.
186 If not provided, will be inferred from the keys of ``mapping`` and
187 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
188 is already a `DataCoordinate`.
189 universe : `DimensionUniverse`
190 All known dimensions and their relationships; used to expand
191 and validate dependencies when ``graph`` is not provided.
192 defaults : `DataCoordinate`, optional
193 Default dimension key-value pairs to use when needed. These are
194 never used to infer ``graph``, and are ignored if a different value
195 is provided for the same key in ``mapping`` or `**kwargs``.
196 **kwargs
197 Additional keyword arguments are treated like additional key-value
198 pairs in ``mapping``.
200 Returns
201 -------
202 coordinate : `DataCoordinate`
203 A validated `DataCoordinate` instance.
205 Raises
206 ------
207 TypeError
208 Raised if the set of optional arguments provided is not supported.
209 KeyError
210 Raised if a key-value pair for a required dimension is missing.
211 """
212 d: dict[str, DataIdValue] = {}
213 if isinstance(mapping, DataCoordinate):
214 if graph is None:
215 if not kwargs:
216 # Already standardized to exactly what we want.
217 return mapping
218 elif kwargs.keys().isdisjoint(graph.dimensions.names):
219 # User provided kwargs, but told us not to use them by
220 # passing in dimensions that are disjoint from those kwargs.
221 # This is not necessarily user error - it's a useful pattern
222 # to pass in all of the key-value pairs you have and let the
223 # code here pull out only what it needs.
224 return mapping.subset(graph)
225 assert universe is None or universe == mapping.universe
226 universe = mapping.universe
227 d.update((name, mapping[name]) for name in mapping.graph.required.names)
228 if mapping.hasFull():
229 d.update((name, mapping[name]) for name in mapping.graph.implied.names)
230 elif isinstance(mapping, NamedKeyMapping):
231 d.update(mapping.byName())
232 elif mapping is not None:
233 d.update(mapping)
234 d.update(kwargs)
235 if graph is None:
236 if defaults is not None:
237 universe = defaults.universe
238 elif universe is None:
239 raise TypeError("universe must be provided if graph is not.")
240 graph = DimensionGraph(universe, names=d.keys())
241 if not graph.dimensions:
242 return DataCoordinate.makeEmpty(graph.universe)
243 if defaults is not None:
244 if defaults.hasFull():
245 for k, v in defaults.full.items():
246 d.setdefault(k.name, v)
247 else:
248 for k, v in defaults.items():
249 d.setdefault(k.name, v)
250 if d.keys() >= graph.dimensions.names:
251 values = tuple(d[name] for name in graph._dataCoordinateIndices)
252 else:
253 try:
254 values = tuple(d[name] for name in graph.required.names)
255 except KeyError as err:
256 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
257 # Some backends cannot handle numpy.int64 type which is a subclass of
258 # numbers.Integral; convert that to int.
259 values = tuple(
260 int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore
261 )
262 return _BasicTupleDataCoordinate(graph, values)
264 @staticmethod
265 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
266 """Return an empty `DataCoordinate`.
268 It identifies the null set of dimensions.
270 Parameters
271 ----------
272 universe : `DimensionUniverse`
273 Universe to which this null dimension set belongs.
275 Returns
276 -------
277 dataId : `DataCoordinate`
278 A data ID object that identifies no dimensions. `hasFull` and
279 `hasRecords` are guaranteed to return `True`, because both `full`
280 and `records` are just empty mappings.
281 """
282 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
284 @staticmethod
285 def fromRequiredValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) -> DataCoordinate:
286 """Construct a `DataCoordinate` from required dimension values.
288 This is a low-level interface with at most assertion-level checking of
289 inputs. Most callers should use `standardize` instead.
291 Parameters
292 ----------
293 graph : `DimensionGraph`
294 Dimensions this data ID will identify.
295 values : `tuple` [ `int` or `str` ]
296 Tuple of primary key values corresponding to ``graph.required``,
297 in that order.
299 Returns
300 -------
301 dataId : `DataCoordinate`
302 A data ID object that identifies the given dimensions.
303 ``dataId.hasFull()`` will return `True` if and only if
304 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
305 return `True`.
306 """
307 assert len(graph.required) == len(
308 values
309 ), f"Inconsistency between dimensions {graph.required} and required values {values}."
310 return _BasicTupleDataCoordinate(graph, values)
312 @staticmethod
313 def fromFullValues(graph: DimensionGraph, values: tuple[DataIdValue, ...]) -> DataCoordinate:
314 """Construct a `DataCoordinate` from all dimension values.
316 This is a low-level interface with at most assertion-level checking of
317 inputs. Most callers should use `standardize` instead.
319 Parameters
320 ----------
321 graph : `DimensionGraph`
322 Dimensions this data ID will identify.
323 values : `tuple` [ `int` or `str` ]
324 Tuple of primary key values corresponding to
325 ``itertools.chain(graph.required, graph.implied)``, in that order.
326 Note that this is _not_ the same order as ``graph.dimensions``,
327 though these contain the same elements.
329 Returns
330 -------
331 dataId : `DataCoordinate`
332 A data ID object that identifies the given dimensions.
333 ``dataId.hasFull()`` will return `True` if and only if
334 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
335 return `True`.
336 """
337 assert len(graph.dimensions) == len(
338 values
339 ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}."
340 return _BasicTupleDataCoordinate(graph, values)
342 def __hash__(self) -> int:
343 return hash((self.graph,) + self.values_tuple())
345 def __eq__(self, other: Any) -> bool:
346 if not isinstance(other, DataCoordinate):
347 other = DataCoordinate.standardize(other, universe=self.universe)
348 return self.graph == other.graph and self.values_tuple() == other.values_tuple()
350 def __repr__(self) -> str:
351 # We can't make repr yield something that could be exec'd here without
352 # printing out the whole DimensionUniverse the graph is derived from.
353 # So we print something that mostly looks like a dict, but doesn't
354 # quote its keys: that's both more compact and something that can't
355 # be mistaken for an actual dict or something that could be exec'd.
356 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names]
357 if self.hasFull() and self.graph.required != self.graph.dimensions:
358 terms.append("...")
359 return "{{{}}}".format(", ".join(terms))
361 def __lt__(self, other: Any) -> bool:
362 # Allow DataCoordinate to be sorted
363 if not isinstance(other, type(self)):
364 return NotImplemented
365 # Form tuple of tuples for each DataCoordinate:
366 # Unlike repr() we only use required keys here to ensure that
367 # __eq__ can not be true simultaneously with __lt__ being true.
368 self_kv = tuple(self.items())
369 other_kv = tuple(other.items())
371 return self_kv < other_kv
373 def __iter__(self) -> Iterator[Dimension]:
374 return iter(self.keys())
376 def __len__(self) -> int:
377 return len(self.keys())
379 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
380 return self.graph.required
382 @property
383 def names(self) -> Set[str]:
384 """Names of the required dimensions identified by this data ID.
386 They are returned in the same order as `keys`
387 (`collections.abc.Set` [ `str` ]).
388 """
389 return self.keys().names
391 @abstractmethod
392 def subset(self, graph: DimensionGraph) -> DataCoordinate:
393 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``.
395 Parameters
396 ----------
397 graph : `DimensionGraph`
398 The dimensions identified by the returned `DataCoordinate`.
400 Returns
401 -------
402 coordinate : `DataCoordinate`
403 A `DataCoordinate` instance that identifies only the given
404 dimensions. May be ``self`` if ``graph == self.graph``.
406 Raises
407 ------
408 KeyError
409 Raised if the primary key value for one or more required dimensions
410 is unknown. This may happen if ``graph.issubset(self.graph)`` is
411 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if
412 ``self.hasFull()`` is `False` and
413 ``graph.required.issubset(self.graph.required)`` is `False`. As
414 an example of the latter case, consider trying to go from a data ID
415 with dimensions {instrument, physical_filter, band} to
416 just {instrument, band}; band is implied by
417 physical_filter and hence would have no value in the original data
418 ID if ``self.hasFull()`` is `False`.
420 Notes
421 -----
422 If `hasFull` and `hasRecords` return `True` on ``self``, they will
423 return `True` (respectively) on the returned `DataCoordinate` as well.
424 The converse does not hold.
425 """
426 raise NotImplementedError()
428 @abstractmethod
429 def union(self, other: DataCoordinate) -> DataCoordinate:
430 """Combine two data IDs.
432 Yields a new one that identifies all dimensions that either of them
433 identify.
435 Parameters
436 ----------
437 other : `DataCoordinate`
438 Data ID to combine with ``self``.
440 Returns
441 -------
442 unioned : `DataCoordinate`
443 A `DataCoordinate` instance that satisfies
444 ``unioned.graph == self.graph.union(other.graph)``. Will preserve
445 ``hasFull`` and ``hasRecords`` whenever possible.
447 Notes
448 -----
449 No checking for consistency is performed on values for keys that
450 ``self`` and ``other`` have in common, and which value is included in
451 the returned data ID is not specified.
452 """
453 raise NotImplementedError()
455 @abstractmethod
456 def expanded(
457 self, records: NameLookupMapping[DimensionElement, DimensionRecord | None]
458 ) -> DataCoordinate:
459 """Return a `DataCoordinate` that holds the given records.
461 Guarantees that `hasRecords` returns `True`.
463 This is a low-level interface with at most assertion-level checking of
464 inputs. Most callers should use `Registry.expandDataId` instead.
466 Parameters
467 ----------
468 records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or \
469 `None` ]
470 A `NamedKeyMapping` with `DimensionElement` keys or a regular
471 `~collections.abc.Mapping` with `str` (`DimensionElement` name)
472 keys and `DimensionRecord` values. Keys must cover all elements in
473 ``self.graph.elements``. Values may be `None`, but only to reflect
474 actual NULL values in the database, not just records that have not
475 been fetched.
476 """
477 raise NotImplementedError()
479 @property
480 def universe(self) -> DimensionUniverse:
481 """Universe that defines all known compatible dimensions.
483 The univers will be compatible with this coordinate
484 (`DimensionUniverse`).
485 """
486 return self.graph.universe
488 @property
489 @abstractmethod
490 def graph(self) -> DimensionGraph:
491 """Dimensions identified by this data ID (`DimensionGraph`).
493 Note that values are only required to be present for dimensions in
494 ``self.graph.required``; all others may be retrieved (from a
495 `Registry`) given these.
496 """
497 raise NotImplementedError()
499 @abstractmethod
500 def hasFull(self) -> bool:
501 """Whether this data ID contains implied and required values.
503 Returns
504 -------
505 state : `bool`
506 If `True`, `__getitem__`, `get`, and `__contains__` (but not
507 `keys`!) will act as though the mapping includes key-value pairs
508 for implied dimensions, and the `full` property may be used. If
509 `False`, these operations only include key-value pairs for required
510 dimensions, and accessing `full` is an error. Always `True` if
511 there are no implied dimensions.
512 """
513 raise NotImplementedError()
515 @property
516 @abstractmethod
517 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
518 """Return mapping for all dimensions in ``self.graph``.
520 The mapping includes key-value pairs for all dimensions in
521 ``self.graph``, including implied (`NamedKeyMapping`).
523 Accessing this attribute if `hasFull` returns `False` is a logic error
524 that may raise an exception of unspecified type either immediately or
525 when implied keys are accessed via the returned mapping, depending on
526 the implementation and whether assertions are enabled.
527 """
528 raise NotImplementedError()
530 @abstractmethod
531 def values_tuple(self) -> tuple[DataIdValue, ...]:
532 """Return the required values (only) of this data ID as a tuple.
534 In contexts where all data IDs have the same dimensions, comparing and
535 hashing these tuples can be *much* faster than comparing the original
536 `DataCoordinate` instances.
537 """
538 raise NotImplementedError()
540 @abstractmethod
541 def hasRecords(self) -> bool:
542 """Whether this data ID contains records.
544 These are the records for all of the dimension elements it identifies.
546 Returns
547 -------
548 state : `bool`
549 If `True`, the following attributes may be accessed:
551 - `records`
552 - `region`
553 - `timespan`
554 - `pack`
556 If `False`, accessing any of these is considered a logic error.
557 """
558 raise NotImplementedError()
560 @property
561 def records(self) -> NamedKeyMapping[DimensionElement, DimensionRecord | None]:
562 """Return the records.
564 Returns a mapping that contains `DimensionRecord` objects for all
565 elements identified by this data ID (`NamedKeyMapping`).
567 The values of this mapping may be `None` if and only if there is no
568 record for that element with these dimensions in the database (which
569 means some foreign key field must have a NULL value).
571 Accessing this attribute if `hasRecords` returns `False` is a logic
572 error that may raise an exception of unspecified type either
573 immediately or when the returned mapping is used, depending on the
574 implementation and whether assertions are enabled.
575 """
576 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
577 return _DataCoordinateRecordsView(self)
579 @abstractmethod
580 def _record(self, name: str) -> DimensionRecord | None:
581 """Protected implementation hook that backs the ``records`` attribute.
583 Parameters
584 ----------
585 name : `str`
586 The name of a `DimensionElement`, guaranteed to be in
587 ``self.graph.elements.names``.
589 Returns
590 -------
591 record : `DimensionRecord` or `None`
592 The dimension record for the given element identified by this
593 data ID, or `None` if there is no such record.
594 """
595 raise NotImplementedError()
597 @property
598 def region(self) -> Region | None:
599 """Spatial region associated with this data ID.
601 (`lsst.sphgeom.Region` or `None`).
603 This is `None` if and only if ``self.graph.spatial`` is empty.
605 Accessing this attribute if `hasRecords` returns `False` is a logic
606 error that may or may not raise an exception, depending on the
607 implementation and whether assertions are enabled.
608 """
609 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
610 regions = []
611 for family in self.graph.spatial:
612 element = family.choose(self.graph.elements)
613 record = self._record(element.name)
614 if record is None or record.region is None:
615 return None
616 else:
617 regions.append(record.region)
618 return _intersectRegions(*regions)
620 @property
621 def timespan(self) -> Timespan | None:
622 """Temporal interval associated with this data ID.
624 (`Timespan` or `None`).
626 This is `None` if and only if ``self.graph.timespan`` is empty.
628 Accessing this attribute if `hasRecords` returns `False` is a logic
629 error that may or may not raise an exception, depending on the
630 implementation and whether assertions are enabled.
631 """
632 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
633 timespans = []
634 for family in self.graph.temporal:
635 element = family.choose(self.graph.elements)
636 record = self._record(element.name)
637 # DimensionRecord subclasses for temporal elements always have
638 # .timespan, but they're dynamic so this can't be type-checked.
639 if record is None or record.timespan is None:
640 return None
641 else:
642 timespans.append(record.timespan)
643 if not timespans:
644 return None
645 elif len(timespans) == 1:
646 return timespans[0]
647 else:
648 return Timespan.intersection(*timespans)
650 @overload
651 def pack(self, name: str, *, returnMaxBits: Literal[True]) -> tuple[int, int]:
652 ...
654 @overload
655 def pack(self, name: str, *, returnMaxBits: Literal[False]) -> int:
656 ...
658 # TODO: Remove this method and its overloads above on DM-38687.
659 @deprecated(
660 "Deprecated in favor of configurable dimension packers. Will be removed after v26.",
661 version="v26",
662 category=FutureWarning,
663 )
664 def pack(self, name: str, *, returnMaxBits: bool = False) -> tuple[int, int] | int:
665 """Pack this data ID into an integer.
667 Parameters
668 ----------
669 name : `str`
670 Name of the `DimensionPacker` algorithm (as defined in the
671 dimension configuration).
672 returnMaxBits : `bool`, optional
673 If `True` (`False` is default), return the maximum number of
674 nonzero bits in the returned integer across all data IDs.
676 Returns
677 -------
678 packed : `int`
679 Integer ID. This ID is unique only across data IDs that have
680 the same values for the packer's "fixed" dimensions.
681 maxBits : `int`, optional
682 Maximum number of nonzero bits in ``packed``. Not returned unless
683 ``returnMaxBits`` is `True`.
685 Notes
686 -----
687 Accessing this attribute if `hasRecords` returns `False` is a logic
688 error that may or may not raise an exception, depending on the
689 implementation and whether assertions are enabled.
690 """
691 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True."
692 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
694 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
695 """Convert this class to a simple python type.
697 This is suitable for serialization.
699 Parameters
700 ----------
701 minimal : `bool`, optional
702 Use minimal serialization. If set the records will not be attached.
704 Returns
705 -------
706 simple : `SerializedDataCoordinate`
707 The object converted to simple form.
708 """
709 # Convert to a dict form
710 if self.hasFull():
711 dataId = self.full.byName()
712 else:
713 dataId = self.byName()
714 records: dict[str, SerializedDimensionRecord] | None
715 if not minimal and self.hasRecords():
716 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None}
717 else:
718 records = None
720 return SerializedDataCoordinate(dataId=dataId, records=records)
722 @classmethod
723 def from_simple(
724 cls,
725 simple: SerializedDataCoordinate,
726 universe: DimensionUniverse | None = None,
727 registry: Registry | None = None,
728 ) -> DataCoordinate:
729 """Construct a new object from the simplified form.
731 The data is assumed to be of the form returned from the `to_simple`
732 method.
734 Parameters
735 ----------
736 simple : `dict` of [`str`, `Any`]
737 The `dict` returned by `to_simple()`.
738 universe : `DimensionUniverse`
739 The special graph of all known dimensions.
740 registry : `lsst.daf.butler.Registry`, optional
741 Registry from which a universe can be extracted. Can be `None`
742 if universe is provided explicitly.
744 Returns
745 -------
746 dataId : `DataCoordinate`
747 Newly-constructed object.
748 """
749 key = (frozenset(simple.dataId.items()), simple.records is not None)
750 cache = PersistenceContextVars.dataCoordinates.get()
751 if cache is not None and (result := cache.get(key)) is not None:
752 return result
753 if universe is None and registry is None:
754 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
755 if universe is None and registry is not None:
756 universe = registry.dimensions
757 if universe is None:
758 # this is for mypy
759 raise ValueError("Unable to determine a usable universe")
761 dataId = cls.standardize(simple.dataId, universe=universe)
762 if simple.records:
763 dataId = dataId.expanded(
764 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
765 )
766 if cache is not None:
767 cache[key] = dataId
768 return dataId
770 to_json = to_json_pydantic
771 from_json: ClassVar = classmethod(from_json_pydantic)
774DataId = DataCoordinate | Mapping[str, Any]
775"""A type-annotation alias for signatures that accept both informal data ID
776dictionaries and validated `DataCoordinate` instances.
777"""
780class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]):
781 """View class for `DataCoordinate.full`.
783 Provides the default implementation for
784 `DataCoordinate.full`.
786 Parameters
787 ----------
788 target : `DataCoordinate`
789 The `DataCoordinate` instance this object provides a view of.
790 """
792 def __init__(self, target: _BasicTupleDataCoordinate):
793 self._target = target
795 __slots__ = ("_target",)
797 def __repr__(self) -> str:
798 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names]
799 return "{{{}}}".format(", ".join(terms))
801 def __getitem__(self, key: DataIdKey) -> DataIdValue:
802 return self._target[key]
804 def __iter__(self) -> Iterator[Dimension]:
805 return iter(self.keys())
807 def __len__(self) -> int:
808 return len(self.keys())
810 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
811 return self._target.graph.dimensions
813 @property
814 def names(self) -> Set[str]:
815 # Docstring inherited from `NamedKeyMapping`.
816 return self.keys().names
819class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, DimensionRecord | None]):
820 """View class for `DataCoordinate.records`.
822 Provides the default implementation for
823 `DataCoordinate.records`.
825 Parameters
826 ----------
827 target : `DataCoordinate`
828 The `DataCoordinate` instance this object provides a view of.
829 """
831 def __init__(self, target: DataCoordinate):
832 self._target = target
834 __slots__ = ("_target",)
836 def __repr__(self) -> str:
837 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names]
838 return "{{{}}}".format(", ".join(terms))
840 def __str__(self) -> str:
841 return "\n".join(str(v) for v in self.values())
843 def __getitem__(self, key: DimensionElement | str) -> DimensionRecord | None:
844 if isinstance(key, DimensionElement):
845 key = key.name
846 return self._target._record(key)
848 def __iter__(self) -> Iterator[DimensionElement]:
849 return iter(self.keys())
851 def __len__(self) -> int:
852 return len(self.keys())
854 def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore
855 return self._target.graph.elements
857 @property
858 def names(self) -> Set[str]:
859 # Docstring inherited from `NamedKeyMapping`.
860 return self.keys().names
863class _BasicTupleDataCoordinate(DataCoordinate):
864 """Standard implementation of `DataCoordinate`.
866 Backed by a tuple of values.
868 This class should only be accessed outside this module via the
869 `DataCoordinate` interface, and should only be constructed via the static
870 methods there.
872 Parameters
873 ----------
874 graph : `DimensionGraph`
875 The dimensions to be identified.
876 values : `tuple` [ `int` or `str` ]
877 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May
878 include values for just required dimensions (which always come first)
879 or all dimensions.
880 """
882 def __init__(self, graph: DimensionGraph, values: tuple[DataIdValue, ...]):
883 self._graph = graph
884 self._values = values
886 __slots__ = ("_graph", "_values")
888 @property
889 def graph(self) -> DimensionGraph:
890 # Docstring inherited from DataCoordinate.
891 return self._graph
893 def __getitem__(self, key: DataIdKey) -> DataIdValue:
894 # Docstring inherited from DataCoordinate.
895 if isinstance(key, Dimension):
896 key = key.name
897 index = self._graph._dataCoordinateIndices[key]
898 try:
899 return self._values[index]
900 except IndexError:
901 # Caller asked for an implied dimension, but this object only has
902 # values for the required ones.
903 raise KeyError(key) from None
905 def byName(self) -> dict[str, DataIdValue]:
906 # Docstring inheritance.
907 # Reimplementation is for optimization; `values_tuple()` is much faster
908 # to iterate over than values() because it doesn't go through
909 # `__getitem__`.
910 return dict(zip(self.names, self.values_tuple(), strict=True))
912 def subset(self, graph: DimensionGraph) -> DataCoordinate:
913 # Docstring inherited from DataCoordinate.
914 if self._graph == graph:
915 return self
916 elif self.hasFull() or self._graph.required >= graph.dimensions:
917 return _BasicTupleDataCoordinate(
918 graph,
919 tuple(self[k] for k in graph._dataCoordinateIndices),
920 )
921 else:
922 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names))
924 def union(self, other: DataCoordinate) -> DataCoordinate:
925 # Docstring inherited from DataCoordinate.
926 graph = self.graph.union(other.graph)
927 # See if one or both input data IDs is already what we want to return;
928 # if so, return the most complete one we have.
929 if other.graph == graph:
930 if self.graph == graph:
931 # Input data IDs have the same graph (which is also the result
932 # graph), but may not have the same content.
933 # other might have records; self does not, so try other first.
934 # If it at least has full values, it's no worse than self.
935 if other.hasFull():
936 return other
937 else:
938 return self
939 elif other.hasFull():
940 return other
941 # There's some chance that neither self nor other has full values,
942 # but together provide enough to the union to. Let the general
943 # case below handle that.
944 elif self.graph == graph and self.hasFull():
945 # No chance at returning records. If self has full values, it's
946 # the best we can do.
947 return self
948 # General case with actual merging of dictionaries.
949 values = self.full.byName() if self.hasFull() else self.byName()
950 values.update(other.full.byName() if other.hasFull() else other.byName())
951 return DataCoordinate.standardize(values, graph=graph)
953 @property
954 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
955 # Docstring inherited.
956 assert self.hasFull(), "full may only be accessed if hasFull() returns True."
957 return _DataCoordinateFullView(self)
959 def expanded(
960 self, records: NameLookupMapping[DimensionElement, DimensionRecord | None]
961 ) -> DataCoordinate:
962 # Docstring inherited from DataCoordinate
963 values = self._values
964 if not self.hasFull():
965 # Extract a complete values tuple from the attributes of the given
966 # records. It's possible for these to be inconsistent with
967 # self._values (which is a serious problem, of course), but we've
968 # documented this as a no-checking API.
969 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied)
970 return _ExpandedTupleDataCoordinate(self._graph, values, records)
972 def hasFull(self) -> bool:
973 # Docstring inherited from DataCoordinate.
974 return len(self._values) == len(self._graph._dataCoordinateIndices)
976 def hasRecords(self) -> bool:
977 # Docstring inherited from DataCoordinate.
978 return False
980 def values_tuple(self) -> tuple[DataIdValue, ...]:
981 # Docstring inherited from DataCoordinate.
982 return self._values[: len(self._graph.required)]
984 def _record(self, name: str) -> DimensionRecord | None:
985 # Docstring inherited from DataCoordinate.
986 raise AssertionError()
988 def __reduce__(self) -> tuple[Any, ...]:
989 return (_BasicTupleDataCoordinate, (self._graph, self._values))
991 def __getattr__(self, name: str) -> Any:
992 if name in self.graph.elements.names:
993 raise AttributeError(
994 f"Dimension record attribute {name!r} is only available on expanded DataCoordinates."
995 )
996 raise AttributeError(name)
999class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate):
1000 """A `DataCoordinate` implementation that can hold `DimensionRecord`.
1002 This class should only be accessed outside this module via the
1003 `DataCoordinate` interface, and should only be constructed via calls to
1004 `DataCoordinate.expanded`.
1006 Parameters
1007 ----------
1008 graph : `DimensionGraph`
1009 The dimensions to be identified.
1010 values : `tuple` [ `int` or `str` ]
1011 Data ID values, ordered to match ``graph._dataCoordinateIndices``.
1012 May include values for just required dimensions (which always come
1013 first) or all dimensions.
1014 records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or `None` ]
1015 A `NamedKeyMapping` with `DimensionElement` keys or a regular
1016 `~collections.abc.Mapping` with `str` (`DimensionElement` name) keys
1017 and `DimensionRecord` values. Keys must cover all elements in
1018 ``self.graph.elements``. Values may be `None`, but only to reflect
1019 actual NULL values in the database, not just records that have not
1020 been fetched.
1021 """
1023 def __init__(
1024 self,
1025 graph: DimensionGraph,
1026 values: tuple[DataIdValue, ...],
1027 records: NameLookupMapping[DimensionElement, DimensionRecord | None],
1028 ):
1029 super().__init__(graph, values)
1030 assert super().hasFull(), "This implementation requires full dimension records."
1031 self._records = records
1033 __slots__ = ("_records",)
1035 def subset(self, graph: DimensionGraph) -> DataCoordinate:
1036 # Docstring inherited from DataCoordinate.
1037 if self._graph == graph:
1038 return self
1039 return _ExpandedTupleDataCoordinate(
1040 graph, tuple(self[k] for k in graph._dataCoordinateIndices), records=self._records
1041 )
1043 def expanded(
1044 self, records: NameLookupMapping[DimensionElement, DimensionRecord | None]
1045 ) -> DataCoordinate:
1046 # Docstring inherited from DataCoordinate.
1047 return self
1049 def union(self, other: DataCoordinate) -> DataCoordinate:
1050 # Docstring inherited from DataCoordinate.
1051 graph = self.graph.union(other.graph)
1052 # See if one or both input data IDs is already what we want to return;
1053 # if so, return the most complete one we have.
1054 if self.graph == graph:
1055 # self has records, so even if other is also a valid result, it's
1056 # no better.
1057 return self
1058 if other.graph == graph and other.hasFull():
1059 # If other has full values, and self does not identify some of
1060 # those, it's the base we can do. It may have records, too.
1061 return other
1062 # If other does not have full values, there's a chance self may
1063 # provide the values needed to complete it. For example, self
1064 # could be {band} while other could be
1065 # {instrument, physical_filter, band}, with band unknown.
1066 # General case with actual merging of dictionaries.
1067 values = self.full.byName()
1068 values.update(other.full.byName() if other.hasFull() else other.byName())
1069 basic = DataCoordinate.standardize(values, graph=graph)
1070 # See if we can add records.
1071 if self.hasRecords() and other.hasRecords():
1072 # Sometimes the elements of a union of graphs can contain elements
1073 # that weren't in either input graph (because graph unions are only
1074 # on dimensions). e.g. {visit} | {detector} brings along
1075 # visit_detector_region.
1076 elements = set(graph.elements.names)
1077 elements -= self.graph.elements.names
1078 elements -= other.graph.elements.names
1079 if not elements:
1080 records = NamedKeyDict[DimensionElement, DimensionRecord | None](self.records)
1081 records.update(other.records)
1082 return basic.expanded(records.freeze())
1083 return basic
1085 def hasFull(self) -> bool:
1086 # Docstring inherited from DataCoordinate.
1087 return True
1089 def hasRecords(self) -> bool:
1090 # Docstring inherited from DataCoordinate.
1091 return True
1093 def _record(self, name: str) -> DimensionRecord | None:
1094 # Docstring inherited from DataCoordinate.
1095 return self._records[name]
1097 def __reduce__(self) -> tuple[Any, ...]:
1098 return (_ExpandedTupleDataCoordinate, (self._graph, self._values, self._records))
1100 def __getattr__(self, name: str) -> Any:
1101 try:
1102 return self._record(name)
1103 except KeyError:
1104 raise AttributeError(name) from None
1106 def __dir__(self) -> list[str]:
1107 result = list(super().__dir__())
1108 result.extend(self.graph.elements.names)
1109 return result