Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 26%
343 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
23# Design notes for this module are in
24# doc/lsst.daf.butler/dev/dataCoordinate.py.
25#
27from __future__ import annotations
29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate")
31import numbers
32from abc import abstractmethod
33from typing import (
34 TYPE_CHECKING,
35 AbstractSet,
36 Any,
37 Dict,
38 Iterator,
39 Literal,
40 Mapping,
41 Optional,
42 Tuple,
43 Union,
44 overload,
45)
47from lsst.sphgeom import IntersectionRegion, Region
48from pydantic import BaseModel
50from ..json import from_json_pydantic, to_json_pydantic
51from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping
52from ..timespan import Timespan
53from ._elements import Dimension, DimensionElement
54from ._graph import DimensionGraph
55from ._records import DimensionRecord, SerializedDimensionRecord
57if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
58 from ...registry import Registry
59 from ._universe import DimensionUniverse
61DataIdKey = Union[str, Dimension]
62"""Type annotation alias for the keys that can be used to index a
63DataCoordinate.
64"""
66# Pydantic will cast int to str if str is first in the Union.
67DataIdValue = Union[int, str, None]
68"""Type annotation alias for the values that can be present in a
69DataCoordinate or other data ID.
70"""
73class SerializedDataCoordinate(BaseModel):
74 """Simplified model for serializing a `DataCoordinate`."""
76 dataId: Dict[str, DataIdValue]
77 records: Optional[Dict[str, SerializedDimensionRecord]] = None
79 @classmethod
80 def direct(cls, *, dataId: Dict[str, DataIdValue], records: Dict[str, Dict]) -> SerializedDataCoordinate:
81 """Construct a `SerializedDataCoordinate` directly without validators.
83 This differs from the pydantic "construct" method in that the arguments
84 are explicitly what the model requires, and it will recurse through
85 members, constructing them from their corresponding `direct` methods.
87 This method should only be called when the inputs are trusted.
88 """
89 node = SerializedDataCoordinate.__new__(cls)
90 setter = object.__setattr__
91 setter(node, "dataId", dataId)
92 setter(
93 node,
94 "records",
95 records
96 if records is None
97 else {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()},
98 )
99 setter(node, "__fields_set__", {"dataId", "records"})
100 return node
103def _intersectRegions(*args: Region) -> Optional[Region]:
104 """Return the intersection of several regions.
106 For internal use by `ExpandedDataCoordinate` only.
108 If no regions are provided, returns `None`.
109 """
110 if len(args) == 0:
111 return None
112 else:
113 result = args[0]
114 for n in range(1, len(args)):
115 result = IntersectionRegion(result, args[n])
116 return result
119class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
120 """Data ID dictionary.
122 An immutable data ID dictionary that guarantees that its key-value pairs
123 identify at least all required dimensions in a `DimensionGraph`.
125 `DataCoordinate` itself is an ABC, but provides `staticmethod` factory
126 functions for private concrete implementations that should be sufficient
127 for most purposes. `standardize` is the most flexible and safe of these;
128 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are
129 more specialized and perform little or no checking of inputs.
131 Notes
132 -----
133 Like any data ID class, `DataCoordinate` behaves like a dictionary, but
134 with some subtleties:
136 - Both `Dimension` instances and `str` names thereof may be used as keys
137 in lookup operations, but iteration (and `keys`) will yield `Dimension`
138 instances. The `names` property can be used to obtain the corresponding
139 `str` names.
141 - Lookups for implied dimensions (those in ``self.graph.implied``) are
142 supported if and only if `hasFull` returns `True`, and are never
143 included in iteration or `keys`. The `full` property may be used to
144 obtain a mapping whose keys do include implied dimensions.
146 - Equality comparison with other mappings is supported, but it always
147 considers only required dimensions (as well as requiring both operands
148 to identify the same dimensions). This is not quite consistent with the
149 way mappings usually work - normally differing keys imply unequal
150 mappings - but it makes sense in this context because data IDs with the
151 same values for required dimensions but different values for implied
152 dimensions represent a serious problem with the data that
153 `DataCoordinate` cannot generally recognize on its own, and a data ID
154 that knows implied dimension values should still be able to compare as
155 equal to one that does not. This is of course not the way comparisons
156 between simple `dict` data IDs work, and hence using a `DataCoordinate`
157 instance for at least one operand in any data ID comparison is strongly
158 recommended.
160 See also
161 --------
162 :ref:`lsst.daf.butler-dimensions_data_ids`
163 """
165 __slots__ = ()
167 _serializedType = SerializedDataCoordinate
169 @staticmethod
170 def standardize(
171 mapping: Optional[NameLookupMapping[Dimension, DataIdValue]] = None,
172 *,
173 graph: Optional[DimensionGraph] = None,
174 universe: Optional[DimensionUniverse] = None,
175 defaults: Optional[DataCoordinate] = None,
176 **kwargs: Any,
177 ) -> DataCoordinate:
178 """Standardize the supplied dataId.
180 Adapts an arbitrary mapping and/or additional arguments into a true
181 `DataCoordinate`, or augment an existing one.
183 Parameters
184 ----------
185 mapping : `~collections.abc.Mapping`, optional
186 An informal data ID that maps dimensions or dimension names to
187 their primary key values (may also be a true `DataCoordinate`).
188 graph : `DimensionGraph`
189 The dimensions to be identified by the new `DataCoordinate`.
190 If not provided, will be inferred from the keys of ``mapping`` and
191 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
192 is already a `DataCoordinate`.
193 universe : `DimensionUniverse`
194 All known dimensions and their relationships; used to expand
195 and validate dependencies when ``graph`` is not provided.
196 defaults : `DataCoordinate`, optional
197 Default dimension key-value pairs to use when needed. These are
198 never used to infer ``graph``, and are ignored if a different value
199 is provided for the same key in ``mapping`` or `**kwargs``.
200 **kwargs
201 Additional keyword arguments are treated like additional key-value
202 pairs in ``mapping``.
204 Returns
205 -------
206 coordinate : `DataCoordinate`
207 A validated `DataCoordinate` instance.
209 Raises
210 ------
211 TypeError
212 Raised if the set of optional arguments provided is not supported.
213 KeyError
214 Raised if a key-value pair for a required dimension is missing.
215 """
216 d: Dict[str, DataIdValue] = {}
217 if isinstance(mapping, DataCoordinate):
218 if graph is None:
219 if not kwargs:
220 # Already standardized to exactly what we want.
221 return mapping
222 elif kwargs.keys().isdisjoint(graph.dimensions.names):
223 # User provided kwargs, but told us not to use them by
224 # passing in dimensions that are disjoint from those kwargs.
225 # This is not necessarily user error - it's a useful pattern
226 # to pass in all of the key-value pairs you have and let the
227 # code here pull out only what it needs.
228 return mapping.subset(graph)
229 assert universe is None or universe == mapping.universe
230 universe = mapping.universe
231 d.update((name, mapping[name]) for name in mapping.graph.required.names)
232 if mapping.hasFull():
233 d.update((name, mapping[name]) for name in mapping.graph.implied.names)
234 elif isinstance(mapping, NamedKeyMapping):
235 d.update(mapping.byName())
236 elif mapping is not None:
237 d.update(mapping)
238 d.update(kwargs)
239 if graph is None:
240 if defaults is not None:
241 universe = defaults.universe
242 elif universe is None:
243 raise TypeError("universe must be provided if graph is not.")
244 graph = DimensionGraph(universe, names=d.keys())
245 if not graph.dimensions:
246 return DataCoordinate.makeEmpty(graph.universe)
247 if defaults is not None:
248 if defaults.hasFull():
249 for k, v in defaults.full.items():
250 d.setdefault(k.name, v)
251 else:
252 for k, v in defaults.items():
253 d.setdefault(k.name, v)
254 if d.keys() >= graph.dimensions.names:
255 values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
256 else:
257 try:
258 values = tuple(d[name] for name in graph.required.names)
259 except KeyError as err:
260 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
261 # Some backends cannot handle numpy.int64 type which is a subclass of
262 # numbers.Integral; convert that to int.
263 values = tuple(
264 int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore
265 )
266 return _BasicTupleDataCoordinate(graph, values)
268 @staticmethod
269 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
270 """Return an empty `DataCoordinate`.
272 It identifies the null set of dimensions.
274 Parameters
275 ----------
276 universe : `DimensionUniverse`
277 Universe to which this null dimension set belongs.
279 Returns
280 -------
281 dataId : `DataCoordinate`
282 A data ID object that identifies no dimensions. `hasFull` and
283 `hasRecords` are guaranteed to return `True`, because both `full`
284 and `records` are just empty mappings.
285 """
286 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
288 @staticmethod
289 def fromRequiredValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
290 """Construct a `DataCoordinate` from required dimension values.
292 This is a low-level interface with at most assertion-level checking of
293 inputs. Most callers should use `standardize` instead.
295 Parameters
296 ----------
297 graph : `DimensionGraph`
298 Dimensions this data ID will identify.
299 values : `tuple` [ `int` or `str` ]
300 Tuple of primary key values corresponding to ``graph.required``,
301 in that order.
303 Returns
304 -------
305 dataId : `DataCoordinate`
306 A data ID object that identifies the given dimensions.
307 ``dataId.hasFull()`` will return `True` if and only if
308 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
309 return `True`.
310 """
311 assert len(graph.required) == len(
312 values
313 ), f"Inconsistency between dimensions {graph.required} and required values {values}."
314 return _BasicTupleDataCoordinate(graph, values)
316 @staticmethod
317 def fromFullValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
318 """Construct a `DataCoordinate` from all dimension values.
320 This is a low-level interface with at most assertion-level checking of
321 inputs. Most callers should use `standardize` instead.
323 Parameters
324 ----------
325 graph : `DimensionGraph`
326 Dimensions this data ID will identify.
327 values : `tuple` [ `int` or `str` ]
328 Tuple of primary key values corresponding to
329 ``itertools.chain(graph.required, graph.implied)``, in that order.
330 Note that this is _not_ the same order as ``graph.dimensions``,
331 though these contain the same elements.
333 Returns
334 -------
335 dataId : `DataCoordinate`
336 A data ID object that identifies the given dimensions.
337 ``dataId.hasFull()`` will return `True` if and only if
338 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
339 return `True`.
340 """
341 assert len(graph.dimensions) == len(
342 values
343 ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}."
344 return _BasicTupleDataCoordinate(graph, values)
346 def __hash__(self) -> int:
347 return hash((self.graph,) + tuple(self[d.name] for d in self.graph.required))
349 def __eq__(self, other: Any) -> bool:
350 if not isinstance(other, DataCoordinate):
351 other = DataCoordinate.standardize(other, universe=self.universe)
352 return self.graph == other.graph and all(self[d.name] == other[d.name] for d in self.graph.required)
354 def __repr__(self) -> str:
355 # We can't make repr yield something that could be exec'd here without
356 # printing out the whole DimensionUniverse the graph is derived from.
357 # So we print something that mostly looks like a dict, but doesn't
358 # quote its keys: that's both more compact and something that can't
359 # be mistaken for an actual dict or something that could be exec'd.
360 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names]
361 if self.hasFull() and self.graph.required != self.graph.dimensions:
362 terms.append("...")
363 return "{{{}}}".format(", ".join(terms))
365 def __lt__(self, other: Any) -> bool:
366 # Allow DataCoordinate to be sorted
367 if not isinstance(other, type(self)):
368 return NotImplemented
369 # Form tuple of tuples for each DataCoordinate:
370 # Unlike repr() we only use required keys here to ensure that
371 # __eq__ can not be true simultaneously with __lt__ being true.
372 self_kv = tuple(self.items())
373 other_kv = tuple(other.items())
375 return self_kv < other_kv
377 def __iter__(self) -> Iterator[Dimension]:
378 return iter(self.keys())
380 def __len__(self) -> int:
381 return len(self.keys())
383 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
384 return self.graph.required
386 @property
387 def names(self) -> AbstractSet[str]:
388 """Names of the required dimensions identified by this data ID.
390 They are returned in the same order as `keys`
391 (`collections.abc.Set` [ `str` ]).
392 """
393 return self.keys().names
395 @abstractmethod
396 def subset(self, graph: DimensionGraph) -> DataCoordinate:
397 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``.
399 Parameters
400 ----------
401 graph : `DimensionGraph`
402 The dimensions identified by the returned `DataCoordinate`.
404 Returns
405 -------
406 coordinate : `DataCoordinate`
407 A `DataCoordinate` instance that identifies only the given
408 dimensions. May be ``self`` if ``graph == self.graph``.
410 Raises
411 ------
412 KeyError
413 Raised if the primary key value for one or more required dimensions
414 is unknown. This may happen if ``graph.issubset(self.graph)`` is
415 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if
416 ``self.hasFull()`` is `False` and
417 ``graph.required.issubset(self.graph.required)`` is `False`. As
418 an example of the latter case, consider trying to go from a data ID
419 with dimensions {instrument, physical_filter, band} to
420 just {instrument, band}; band is implied by
421 physical_filter and hence would have no value in the original data
422 ID if ``self.hasFull()`` is `False`.
424 Notes
425 -----
426 If `hasFull` and `hasRecords` return `True` on ``self``, they will
427 return `True` (respectively) on the returned `DataCoordinate` as well.
428 The converse does not hold.
429 """
430 raise NotImplementedError()
432 @abstractmethod
433 def union(self, other: DataCoordinate) -> DataCoordinate:
434 """Combine two data IDs.
436 Yields a new one that identifies all dimensions that either of them
437 identify.
439 Parameters
440 ----------
441 other : `DataCoordinate`
442 Data ID to combine with ``self``.
444 Returns
445 -------
446 unioned : `DataCoordinate`
447 A `DataCoordinate` instance that satisfies
448 ``unioned.graph == self.graph.union(other.graph)``. Will preserve
449 ``hasFull`` and ``hasRecords`` whenever possible.
451 Notes
452 -----
453 No checking for consistency is performed on values for keys that
454 ``self`` and ``other`` have in common, and which value is included in
455 the returned data ID is not specified.
456 """
457 raise NotImplementedError()
459 @abstractmethod
460 def expanded(
461 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
462 ) -> DataCoordinate:
463 """Return a `DataCoordinate` that holds the given records.
465 Guarantees that `hasRecords` returns `True`.
467 This is a low-level interface with at most assertion-level checking of
468 inputs. Most callers should use `Registry.expandDataId` instead.
470 Parameters
471 ----------
472 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
473 A `NamedKeyMapping` with `DimensionElement` keys or a regular
474 `Mapping` with `str` (`DimensionElement` name) keys and
475 `DimensionRecord` values. Keys must cover all elements in
476 ``self.graph.elements``. Values may be `None`, but only to reflect
477 actual NULL values in the database, not just records that have not
478 been fetched.
479 """
480 raise NotImplementedError()
482 @property
483 def universe(self) -> DimensionUniverse:
484 """Universe that defines all known compatible dimensions.
486 The univers will be compatible with this coordinate
487 (`DimensionUniverse`).
488 """
489 return self.graph.universe
491 @property
492 @abstractmethod
493 def graph(self) -> DimensionGraph:
494 """Dimensions identified by this data ID (`DimensionGraph`).
496 Note that values are only required to be present for dimensions in
497 ``self.graph.required``; all others may be retrieved (from a
498 `Registry`) given these.
499 """
500 raise NotImplementedError()
502 @abstractmethod
503 def hasFull(self) -> bool:
504 """Whether this data ID contains implied and required values.
506 Returns
507 -------
508 state : `bool`
509 If `True`, `__getitem__`, `get`, and `__contains__` (but not
510 `keys`!) will act as though the mapping includes key-value pairs
511 for implied dimensions, and the `full` property may be used. If
512 `False`, these operations only include key-value pairs for required
513 dimensions, and accessing `full` is an error. Always `True` if
514 there are no implied dimensions.
515 """
516 raise NotImplementedError()
518 @property
519 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
520 """Return mapping for all dimensions in ``self.graph``.
522 The mapping includes key-value pairs for all dimensions in
523 ``self.graph``, including implied (`NamedKeyMapping`).
525 Accessing this attribute if `hasFull` returns `False` is a logic error
526 that may raise an exception of unspecified type either immediately or
527 when implied keys are accessed via the returned mapping, depending on
528 the implementation and whether assertions are enabled.
529 """
530 assert self.hasFull(), "full may only be accessed if hasFull() returns True."
531 return _DataCoordinateFullView(self)
533 @abstractmethod
534 def hasRecords(self) -> bool:
535 """Whether this data ID contains records.
537 These are the records for all of the dimension elements it identifies.
539 Returns
540 -------
541 state : `bool`
542 If `True`, the following attributes may be accessed:
544 - `records`
545 - `region`
546 - `timespan`
547 - `pack`
549 If `False`, accessing any of these is considered a logic error.
550 """
551 raise NotImplementedError()
553 @property
554 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
555 """Return the records.
557 Returns a mapping that contains `DimensionRecord` objects for all
558 elements identified by this data ID (`NamedKeyMapping`).
560 The values of this mapping may be `None` if and only if there is no
561 record for that element with these dimensions in the database (which
562 means some foreign key field must have a NULL value).
564 Accessing this attribute if `hasRecords` returns `False` is a logic
565 error that may raise an exception of unspecified type either
566 immediately or when the returned mapping is used, depending on the
567 implementation and whether assertions are enabled.
568 """
569 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
570 return _DataCoordinateRecordsView(self)
572 @abstractmethod
573 def _record(self, name: str) -> Optional[DimensionRecord]:
574 """Protected implementation hook that backs the ``records`` attribute.
576 Parameters
577 ----------
578 name : `str`
579 The name of a `DimensionElement`, guaranteed to be in
580 ``self.graph.elements.names``.
582 Returns
583 -------
584 record : `DimensionRecord` or `None`
585 The dimension record for the given element identified by this
586 data ID, or `None` if there is no such record.
587 """
588 raise NotImplementedError()
590 @property
591 def region(self) -> Optional[Region]:
592 """Spatial region associated with this data ID.
594 (`lsst.sphgeom.Region` or `None`).
596 This is `None` if and only if ``self.graph.spatial`` is empty.
598 Accessing this attribute if `hasRecords` returns `False` is a logic
599 error that may or may not raise an exception, depending on the
600 implementation and whether assertions are enabled.
601 """
602 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
603 regions = []
604 for family in self.graph.spatial:
605 element = family.choose(self.graph.elements)
606 record = self._record(element.name)
607 if record is None or record.region is None:
608 return None
609 else:
610 regions.append(record.region)
611 return _intersectRegions(*regions)
613 @property
614 def timespan(self) -> Optional[Timespan]:
615 """Temporal interval associated with this data ID.
617 (`Timespan` or `None`).
619 This is `None` if and only if ``self.graph.timespan`` is empty.
621 Accessing this attribute if `hasRecords` returns `False` is a logic
622 error that may or may not raise an exception, depending on the
623 implementation and whether assertions are enabled.
624 """
625 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
626 timespans = []
627 for family in self.graph.temporal:
628 element = family.choose(self.graph.elements)
629 record = self._record(element.name)
630 # DimensionRecord subclasses for temporal elements always have
631 # .timespan, but they're dynamic so this can't be type-checked.
632 if record is None or record.timespan is None:
633 return None
634 else:
635 timespans.append(record.timespan)
636 if not timespans:
637 return None
638 elif len(timespans) == 1:
639 return timespans[0]
640 else:
641 return Timespan.intersection(*timespans)
643 @overload
644 def pack(self, name: str, *, returnMaxBits: Literal[True]) -> Tuple[int, int]:
645 ...
647 @overload
648 def pack(self, name: str, *, returnMaxBits: Literal[False]) -> int:
649 ...
651 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
652 """Pack this data ID into an integer.
654 Parameters
655 ----------
656 name : `str`
657 Name of the `DimensionPacker` algorithm (as defined in the
658 dimension configuration).
659 returnMaxBits : `bool`, optional
660 If `True` (`False` is default), return the maximum number of
661 nonzero bits in the returned integer across all data IDs.
663 Returns
664 -------
665 packed : `int`
666 Integer ID. This ID is unique only across data IDs that have
667 the same values for the packer's "fixed" dimensions.
668 maxBits : `int`, optional
669 Maximum number of nonzero bits in ``packed``. Not returned unless
670 ``returnMaxBits`` is `True`.
672 Notes
673 -----
674 Accessing this attribute if `hasRecords` returns `False` is a logic
675 error that may or may not raise an exception, depending on the
676 implementation and whether assertions are enabled.
677 """
678 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True."
679 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
681 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
682 """Convert this class to a simple python type.
684 This is suitable for serialization.
686 Parameters
687 ----------
688 minimal : `bool`, optional
689 Use minimal serialization. If set the records will not be attached.
691 Returns
692 -------
693 simple : `SerializedDataCoordinate`
694 The object converted to simple form.
695 """
696 # Convert to a dict form
697 if self.hasFull():
698 dataId = self.full.byName()
699 else:
700 dataId = self.byName()
701 records: Optional[Dict[str, SerializedDimensionRecord]]
702 if not minimal and self.hasRecords():
703 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None}
704 else:
705 records = None
707 return SerializedDataCoordinate(dataId=dataId, records=records)
709 @classmethod
710 def from_simple(
711 cls,
712 simple: SerializedDataCoordinate,
713 universe: Optional[DimensionUniverse] = None,
714 registry: Optional[Registry] = None,
715 ) -> DataCoordinate:
716 """Construct a new object from the simplified form.
718 The data is assumed to be of the form returned from the `to_simple`
719 method.
721 Parameters
722 ----------
723 simple : `dict` of [`str`, `Any`]
724 The `dict` returned by `to_simple()`.
725 universe : `DimensionUniverse`
726 The special graph of all known dimensions.
727 registry : `lsst.daf.butler.Registry`, optional
728 Registry from which a universe can be extracted. Can be `None`
729 if universe is provided explicitly.
731 Returns
732 -------
733 dataId : `DataCoordinate`
734 Newly-constructed object.
735 """
736 if universe is None and registry is None:
737 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
738 if universe is None and registry is not None:
739 universe = registry.dimensions
740 if universe is None:
741 # this is for mypy
742 raise ValueError("Unable to determine a usable universe")
744 dataId = cls.standardize(simple.dataId, universe=universe)
745 if simple.records:
746 dataId = dataId.expanded(
747 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
748 )
749 return dataId
751 to_json = to_json_pydantic
752 from_json = classmethod(from_json_pydantic)
755DataId = Union[DataCoordinate, Mapping[str, Any]]
756"""A type-annotation alias for signatures that accept both informal data ID
757dictionaries and validated `DataCoordinate` instances.
758"""
761class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]):
762 """View class for `DataCoordinate.full`.
764 Provides the default implementation for
765 `DataCoordinate.full`.
767 Parameters
768 ----------
769 target : `DataCoordinate`
770 The `DataCoordinate` instance this object provides a view of.
771 """
773 def __init__(self, target: DataCoordinate):
774 self._target = target
776 __slots__ = ("_target",)
778 def __repr__(self) -> str:
779 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names]
780 return "{{{}}}".format(", ".join(terms))
782 def __getitem__(self, key: DataIdKey) -> DataIdValue:
783 return self._target[key]
785 def __iter__(self) -> Iterator[Dimension]:
786 return iter(self.keys())
788 def __len__(self) -> int:
789 return len(self.keys())
791 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
792 return self._target.graph.dimensions
794 @property
795 def names(self) -> AbstractSet[str]:
796 # Docstring inherited from `NamedKeyMapping`.
797 return self.keys().names
800class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]):
801 """View class for `DataCoordinate.records`.
803 Provides the default implementation for
804 `DataCoordinate.records`.
806 Parameters
807 ----------
808 target : `DataCoordinate`
809 The `DataCoordinate` instance this object provides a view of.
810 """
812 def __init__(self, target: DataCoordinate):
813 self._target = target
815 __slots__ = ("_target",)
817 def __repr__(self) -> str:
818 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names]
819 return "{{{}}}".format(", ".join(terms))
821 def __str__(self) -> str:
822 return "\n".join(str(v) for v in self.values())
824 def __getitem__(self, key: Union[DimensionElement, str]) -> Optional[DimensionRecord]:
825 if isinstance(key, DimensionElement):
826 key = key.name
827 return self._target._record(key)
829 def __iter__(self) -> Iterator[DimensionElement]:
830 return iter(self.keys())
832 def __len__(self) -> int:
833 return len(self.keys())
835 def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore
836 return self._target.graph.elements
838 @property
839 def names(self) -> AbstractSet[str]:
840 # Docstring inherited from `NamedKeyMapping`.
841 return self.keys().names
844class _BasicTupleDataCoordinate(DataCoordinate):
845 """Standard implementation of `DataCoordinate`.
847 Backed by a tuple of values.
849 This class should only be accessed outside this module via the
850 `DataCoordinate` interface, and should only be constructed via the static
851 methods there.
853 Parameters
854 ----------
855 graph : `DimensionGraph`
856 The dimensions to be identified.
857 values : `tuple` [ `int` or `str` ]
858 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May
859 include values for just required dimensions (which always come first)
860 or all dimensions.
861 """
863 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...]):
864 self._graph = graph
865 self._values = values
867 __slots__ = ("_graph", "_values")
869 @property
870 def graph(self) -> DimensionGraph:
871 # Docstring inherited from DataCoordinate.
872 return self._graph
874 def __getitem__(self, key: DataIdKey) -> DataIdValue:
875 # Docstring inherited from DataCoordinate.
876 if isinstance(key, Dimension):
877 key = key.name
878 index = self._graph._dataCoordinateIndices[key]
879 try:
880 return self._values[index]
881 except IndexError:
882 # Caller asked for an implied dimension, but this object only has
883 # values for the required ones.
884 raise KeyError(key) from None
886 def subset(self, graph: DimensionGraph) -> DataCoordinate:
887 # Docstring inherited from DataCoordinate.
888 if self._graph == graph:
889 return self
890 elif self.hasFull() or self._graph.required >= graph.dimensions:
891 return _BasicTupleDataCoordinate(
892 graph,
893 tuple(self[k] for k in graph._dataCoordinateIndices.keys()),
894 )
895 else:
896 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names))
898 def union(self, other: DataCoordinate) -> DataCoordinate:
899 # Docstring inherited from DataCoordinate.
900 graph = self.graph.union(other.graph)
901 # See if one or both input data IDs is already what we want to return;
902 # if so, return the most complete one we have.
903 if other.graph == graph:
904 if self.graph == graph:
905 # Input data IDs have the same graph (which is also the result
906 # graph), but may not have the same content.
907 # other might have records; self does not, so try other first.
908 # If it at least has full values, it's no worse than self.
909 if other.hasFull():
910 return other
911 else:
912 return self
913 elif other.hasFull():
914 return other
915 # There's some chance that neither self nor other has full values,
916 # but together provide enough to the union to. Let the general
917 # case below handle that.
918 elif self.graph == graph:
919 # No chance at returning records. If self has full values, it's
920 # the best we can do.
921 if self.hasFull():
922 return self
923 # General case with actual merging of dictionaries.
924 values = self.full.byName() if self.hasFull() else self.byName()
925 values.update(other.full.byName() if other.hasFull() else other.byName())
926 return DataCoordinate.standardize(values, graph=graph)
928 def expanded(
929 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
930 ) -> DataCoordinate:
931 # Docstring inherited from DataCoordinate
932 values = self._values
933 if not self.hasFull():
934 # Extract a complete values tuple from the attributes of the given
935 # records. It's possible for these to be inconsistent with
936 # self._values (which is a serious problem, of course), but we've
937 # documented this as a no-checking API.
938 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied)
939 return _ExpandedTupleDataCoordinate(self._graph, values, records)
941 def hasFull(self) -> bool:
942 # Docstring inherited from DataCoordinate.
943 return len(self._values) == len(self._graph._dataCoordinateIndices)
945 def hasRecords(self) -> bool:
946 # Docstring inherited from DataCoordinate.
947 return False
949 def _record(self, name: str) -> Optional[DimensionRecord]:
950 # Docstring inherited from DataCoordinate.
951 assert False
953 def __reduce__(self) -> tuple[Any, ...]:
954 return (_BasicTupleDataCoordinate, (self._graph, self._values))
956 def __getattr__(self, name: str) -> Any:
957 if name in self.graph.elements.names:
958 raise AttributeError(
959 f"Dimension record attribute {name!r} is only available on expanded DataCoordinates."
960 )
961 raise AttributeError(name)
964class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate):
965 """A `DataCoordinate` implementation that can hold `DimensionRecord`.
967 This class should only be accessed outside this module via the
968 `DataCoordinate` interface, and should only be constructed via calls to
969 `DataCoordinate.expanded`.
971 Parameters
972 ----------
973 graph : `DimensionGraph`
974 The dimensions to be identified.
975 values : `tuple` [ `int` or `str` ]
976 Data ID values, ordered to match ``graph._dataCoordinateIndices``.
977 May include values for just required dimensions (which always come
978 first) or all dimensions.
979 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
980 A `NamedKeyMapping` with `DimensionElement` keys or a regular
981 `Mapping` with `str` (`DimensionElement` name) keys and
982 `DimensionRecord` values. Keys must cover all elements in
983 ``self.graph.elements``. Values may be `None`, but only to reflect
984 actual NULL values in the database, not just records that have not
985 been fetched.
986 """
988 def __init__(
989 self,
990 graph: DimensionGraph,
991 values: Tuple[DataIdValue, ...],
992 records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]],
993 ):
994 super().__init__(graph, values)
995 assert super().hasFull(), "This implementation requires full dimension records."
996 self._records = records
998 __slots__ = ("_records",)
1000 def subset(self, graph: DimensionGraph) -> DataCoordinate:
1001 # Docstring inherited from DataCoordinate.
1002 if self._graph == graph:
1003 return self
1004 return _ExpandedTupleDataCoordinate(
1005 graph, tuple(self[k] for k in graph._dataCoordinateIndices.keys()), records=self._records
1006 )
1008 def expanded(
1009 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
1010 ) -> DataCoordinate:
1011 # Docstring inherited from DataCoordinate.
1012 return self
1014 def union(self, other: DataCoordinate) -> DataCoordinate:
1015 # Docstring inherited from DataCoordinate.
1016 graph = self.graph.union(other.graph)
1017 # See if one or both input data IDs is already what we want to return;
1018 # if so, return the most complete one we have.
1019 if self.graph == graph:
1020 # self has records, so even if other is also a valid result, it's
1021 # no better.
1022 return self
1023 if other.graph == graph:
1024 # If other has full values, and self does not identify some of
1025 # those, it's the base we can do. It may have records, too.
1026 if other.hasFull():
1027 return other
1028 # If other does not have full values, there's a chance self may
1029 # provide the values needed to complete it. For example, self
1030 # could be {band} while other could be
1031 # {instrument, physical_filter, band}, with band unknown.
1032 # General case with actual merging of dictionaries.
1033 values = self.full.byName()
1034 values.update(other.full.byName() if other.hasFull() else other.byName())
1035 basic = DataCoordinate.standardize(values, graph=graph)
1036 # See if we can add records.
1037 if self.hasRecords() and other.hasRecords():
1038 # Sometimes the elements of a union of graphs can contain elements
1039 # that weren't in either input graph (because graph unions are only
1040 # on dimensions). e.g. {visit} | {detector} brings along
1041 # visit_detector_region.
1042 elements = set(graph.elements.names)
1043 elements -= self.graph.elements.names
1044 elements -= other.graph.elements.names
1045 if not elements:
1046 records = NamedKeyDict[DimensionElement, Optional[DimensionRecord]](self.records)
1047 records.update(other.records)
1048 return basic.expanded(records.freeze())
1049 return basic
1051 def hasFull(self) -> bool:
1052 # Docstring inherited from DataCoordinate.
1053 return True
1055 def hasRecords(self) -> bool:
1056 # Docstring inherited from DataCoordinate.
1057 return True
1059 def _record(self, name: str) -> Optional[DimensionRecord]:
1060 # Docstring inherited from DataCoordinate.
1061 return self._records[name]
1063 def __reduce__(self) -> tuple[Any, ...]:
1064 return (_ExpandedTupleDataCoordinate, (self._graph, self._values, self._records))
1066 def __getattr__(self, name: str) -> Any:
1067 try:
1068 return self._record(name)
1069 except KeyError:
1070 raise AttributeError(name) from None
1072 def __dir__(self) -> list[str]:
1073 result = list(super().__dir__())
1074 result.extend(self.graph.elements.names)
1075 return result