Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 26%
336 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
23# Design notes for this module are in
24# doc/lsst.daf.butler/dev/dataCoordinate.py.
25#
27from __future__ import annotations
29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate")
31import numbers
32from abc import abstractmethod
33from typing import (
34 TYPE_CHECKING,
35 AbstractSet,
36 Any,
37 Dict,
38 Iterator,
39 Literal,
40 Mapping,
41 Optional,
42 Tuple,
43 Union,
44 overload,
45)
47from lsst.sphgeom import IntersectionRegion, Region
48from pydantic import BaseModel
50from ..json import from_json_pydantic, to_json_pydantic
51from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping
52from ..timespan import Timespan
53from ._elements import Dimension, DimensionElement
54from ._graph import DimensionGraph
55from ._records import DimensionRecord, SerializedDimensionRecord
57if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 from ...registry import Registry
59 from ._universe import DimensionUniverse
61DataIdKey = Union[str, Dimension]
62"""Type annotation alias for the keys that can be used to index a
63DataCoordinate.
64"""
66# Pydantic will cast int to str if str is first in the Union.
67DataIdValue = Union[int, str, None]
68"""Type annotation alias for the values that can be present in a
69DataCoordinate or other data ID.
70"""
73class SerializedDataCoordinate(BaseModel):
74 """Simplified model for serializing a `DataCoordinate`."""
76 dataId: Dict[str, DataIdValue]
77 records: Optional[Dict[str, SerializedDimensionRecord]] = None
79 @classmethod
80 def direct(cls, *, dataId: Dict[str, DataIdValue], records: Dict[str, Dict]) -> SerializedDataCoordinate:
81 """Construct a `SerializedDataCoordinate` directly without validators.
83 This differs from the pydantic "construct" method in that the arguments
84 are explicitly what the model requires, and it will recurse through
85 members, constructing them from their corresponding `direct` methods.
87 This method should only be called when the inputs are trusted.
88 """
89 node = SerializedDataCoordinate.__new__(cls)
90 setter = object.__setattr__
91 setter(node, "dataId", dataId)
92 setter(
93 node,
94 "records",
95 records
96 if records is None
97 else {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()},
98 )
99 setter(node, "__fields_set__", {"dataId", "records"})
100 return node
103def _intersectRegions(*args: Region) -> Optional[Region]:
104 """Return the intersection of several regions.
106 For internal use by `ExpandedDataCoordinate` only.
108 If no regions are provided, returns `None`.
109 """
110 if len(args) == 0:
111 return None
112 else:
113 result = args[0]
114 for n in range(1, len(args)):
115 result = IntersectionRegion(result, args[n])
116 return result
119class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
120 """Data ID dictionary.
122 An immutable data ID dictionary that guarantees that its key-value pairs
123 identify at least all required dimensions in a `DimensionGraph`.
125 `DataCoordinate` itself is an ABC, but provides `staticmethod` factory
126 functions for private concrete implementations that should be sufficient
127 for most purposes. `standardize` is the most flexible and safe of these;
128 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are
129 more specialized and perform little or no checking of inputs.
131 Notes
132 -----
133 Like any data ID class, `DataCoordinate` behaves like a dictionary, but
134 with some subtleties:
136 - Both `Dimension` instances and `str` names thereof may be used as keys
137 in lookup operations, but iteration (and `keys`) will yield `Dimension`
138 instances. The `names` property can be used to obtain the corresponding
139 `str` names.
141 - Lookups for implied dimensions (those in ``self.graph.implied``) are
142 supported if and only if `hasFull` returns `True`, and are never
143 included in iteration or `keys`. The `full` property may be used to
144 obtain a mapping whose keys do include implied dimensions.
146 - Equality comparison with other mappings is supported, but it always
147 considers only required dimensions (as well as requiring both operands
148 to identify the same dimensions). This is not quite consistent with the
149 way mappings usually work - normally differing keys imply unequal
150 mappings - but it makes sense in this context because data IDs with the
151 same values for required dimensions but different values for implied
152 dimensions represent a serious problem with the data that
153 `DataCoordinate` cannot generally recognize on its own, and a data ID
154 that knows implied dimension values should still be able to compare as
155 equal to one that does not. This is of course not the way comparisons
156 between simple `dict` data IDs work, and hence using a `DataCoordinate`
157 instance for at least one operand in any data ID comparison is strongly
158 recommended.
159 """
161 __slots__ = ()
163 _serializedType = SerializedDataCoordinate
165 @staticmethod
166 def standardize(
167 mapping: Optional[NameLookupMapping[Dimension, DataIdValue]] = None,
168 *,
169 graph: Optional[DimensionGraph] = None,
170 universe: Optional[DimensionUniverse] = None,
171 defaults: Optional[DataCoordinate] = None,
172 **kwargs: Any,
173 ) -> DataCoordinate:
174 """Standardize the supplied dataId.
176 Adapts an arbitrary mapping and/or additional arguments into a true
177 `DataCoordinate`, or augment an existing one.
179 Parameters
180 ----------
181 mapping : `~collections.abc.Mapping`, optional
182 An informal data ID that maps dimensions or dimension names to
183 their primary key values (may also be a true `DataCoordinate`).
184 graph : `DimensionGraph`
185 The dimensions to be identified by the new `DataCoordinate`.
186 If not provided, will be inferred from the keys of ``mapping`` and
187 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
188 is already a `DataCoordinate`.
189 universe : `DimensionUniverse`
190 All known dimensions and their relationships; used to expand
191 and validate dependencies when ``graph`` is not provided.
192 defaults : `DataCoordinate`, optional
193 Default dimension key-value pairs to use when needed. These are
194 never used to infer ``graph``, and are ignored if a different value
195 is provided for the same key in ``mapping`` or `**kwargs``.
196 **kwargs
197 Additional keyword arguments are treated like additional key-value
198 pairs in ``mapping``.
200 Returns
201 -------
202 coordinate : `DataCoordinate`
203 A validated `DataCoordinate` instance.
205 Raises
206 ------
207 TypeError
208 Raised if the set of optional arguments provided is not supported.
209 KeyError
210 Raised if a key-value pair for a required dimension is missing.
211 """
212 d: Dict[str, DataIdValue] = {}
213 if isinstance(mapping, DataCoordinate):
214 if graph is None:
215 if not kwargs:
216 # Already standardized to exactly what we want.
217 return mapping
218 elif kwargs.keys().isdisjoint(graph.dimensions.names):
219 # User provided kwargs, but told us not to use them by
220 # passing in dimensions that are disjoint from those kwargs.
221 # This is not necessarily user error - it's a useful pattern
222 # to pass in all of the key-value pairs you have and let the
223 # code here pull out only what it needs.
224 return mapping.subset(graph)
225 assert universe is None or universe == mapping.universe
226 universe = mapping.universe
227 d.update((name, mapping[name]) for name in mapping.graph.required.names)
228 if mapping.hasFull():
229 d.update((name, mapping[name]) for name in mapping.graph.implied.names)
230 elif isinstance(mapping, NamedKeyMapping):
231 d.update(mapping.byName())
232 elif mapping is not None:
233 d.update(mapping)
234 d.update(kwargs)
235 if graph is None:
236 if defaults is not None:
237 universe = defaults.universe
238 elif universe is None:
239 raise TypeError("universe must be provided if graph is not.")
240 graph = DimensionGraph(universe, names=d.keys())
241 if not graph.dimensions:
242 return DataCoordinate.makeEmpty(graph.universe)
243 if defaults is not None:
244 if defaults.hasFull():
245 for k, v in defaults.full.items():
246 d.setdefault(k.name, v)
247 else:
248 for k, v in defaults.items():
249 d.setdefault(k.name, v)
250 if d.keys() >= graph.dimensions.names:
251 values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
252 else:
253 try:
254 values = tuple(d[name] for name in graph.required.names)
255 except KeyError as err:
256 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
257 # Some backends cannot handle numpy.int64 type which is a subclass of
258 # numbers.Integral; convert that to int.
259 values = tuple(
260 int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore
261 )
262 return _BasicTupleDataCoordinate(graph, values)
264 @staticmethod
265 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
266 """Return an empty `DataCoordinate`.
268 It identifies the null set of dimensions.
270 Parameters
271 ----------
272 universe : `DimensionUniverse`
273 Universe to which this null dimension set belongs.
275 Returns
276 -------
277 dataId : `DataCoordinate`
278 A data ID object that identifies no dimensions. `hasFull` and
279 `hasRecords` are guaranteed to return `True`, because both `full`
280 and `records` are just empty mappings.
281 """
282 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
284 @staticmethod
285 def fromRequiredValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
286 """Construct a `DataCoordinate` from required dimension values.
288 This is a low-level interface with at most assertion-level checking of
289 inputs. Most callers should use `standardize` instead.
291 Parameters
292 ----------
293 graph : `DimensionGraph`
294 Dimensions this data ID will identify.
295 values : `tuple` [ `int` or `str` ]
296 Tuple of primary key values corresponding to ``graph.required``,
297 in that order.
299 Returns
300 -------
301 dataId : `DataCoordinate`
302 A data ID object that identifies the given dimensions.
303 ``dataId.hasFull()`` will return `True` if and only if
304 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
305 return `True`.
306 """
307 assert len(graph.required) == len(
308 values
309 ), f"Inconsistency between dimensions {graph.required} and required values {values}."
310 return _BasicTupleDataCoordinate(graph, values)
312 @staticmethod
313 def fromFullValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
314 """Construct a `DataCoordinate` from all dimension values.
316 This is a low-level interface with at most assertion-level checking of
317 inputs. Most callers should use `standardize` instead.
319 Parameters
320 ----------
321 graph : `DimensionGraph`
322 Dimensions this data ID will identify.
323 values : `tuple` [ `int` or `str` ]
324 Tuple of primary key values corresponding to
325 ``itertools.chain(graph.required, graph.implied)``, in that order.
326 Note that this is _not_ the same order as ``graph.dimensions``,
327 though these contain the same elements.
329 Returns
330 -------
331 dataId : `DataCoordinate`
332 A data ID object that identifies the given dimensions.
333 ``dataId.hasFull()`` will return `True` if and only if
334 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
335 return `True`.
336 """
337 assert len(graph.dimensions) == len(
338 values
339 ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}."
340 return _BasicTupleDataCoordinate(graph, values)
342 def __hash__(self) -> int:
343 return hash((self.graph,) + tuple(self[d.name] for d in self.graph.required))
345 def __eq__(self, other: Any) -> bool:
346 if not isinstance(other, DataCoordinate):
347 other = DataCoordinate.standardize(other, universe=self.universe)
348 return self.graph == other.graph and all(self[d.name] == other[d.name] for d in self.graph.required)
350 def __repr__(self) -> str:
351 # We can't make repr yield something that could be exec'd here without
352 # printing out the whole DimensionUniverse the graph is derived from.
353 # So we print something that mostly looks like a dict, but doesn't
354 # quote its keys: that's both more compact and something that can't
355 # be mistaken for an actual dict or something that could be exec'd.
356 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names]
357 if self.hasFull() and self.graph.required != self.graph.dimensions:
358 terms.append("...")
359 return "{{{}}}".format(", ".join(terms))
361 def __lt__(self, other: Any) -> bool:
362 # Allow DataCoordinate to be sorted
363 if not isinstance(other, type(self)):
364 return NotImplemented
365 # Form tuple of tuples for each DataCoordinate:
366 # Unlike repr() we only use required keys here to ensure that
367 # __eq__ can not be true simultaneously with __lt__ being true.
368 self_kv = tuple(self.items())
369 other_kv = tuple(other.items())
371 return self_kv < other_kv
373 def __iter__(self) -> Iterator[Dimension]:
374 return iter(self.keys())
376 def __len__(self) -> int:
377 return len(self.keys())
379 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
380 return self.graph.required
382 @property
383 def names(self) -> AbstractSet[str]:
384 """Names of the required dimensions identified by this data ID.
386 They are returned in the same order as `keys`
387 (`collections.abc.Set` [ `str` ]).
388 """
389 return self.keys().names
391 @abstractmethod
392 def subset(self, graph: DimensionGraph) -> DataCoordinate:
393 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``.
395 Parameters
396 ----------
397 graph : `DimensionGraph`
398 The dimensions identified by the returned `DataCoordinate`.
400 Returns
401 -------
402 coordinate : `DataCoordinate`
403 A `DataCoordinate` instance that identifies only the given
404 dimensions. May be ``self`` if ``graph == self.graph``.
406 Raises
407 ------
408 KeyError
409 Raised if the primary key value for one or more required dimensions
410 is unknown. This may happen if ``graph.issubset(self.graph)`` is
411 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if
412 ``self.hasFull()`` is `False` and
413 ``graph.required.issubset(self.graph.required)`` is `False`. As
414 an example of the latter case, consider trying to go from a data ID
415 with dimensions {instrument, physical_filter, band} to
416 just {instrument, band}; band is implied by
417 physical_filter and hence would have no value in the original data
418 ID if ``self.hasFull()`` is `False`.
420 Notes
421 -----
422 If `hasFull` and `hasRecords` return `True` on ``self``, they will
423 return `True` (respectively) on the returned `DataCoordinate` as well.
424 The converse does not hold.
425 """
426 raise NotImplementedError()
428 @abstractmethod
429 def union(self, other: DataCoordinate) -> DataCoordinate:
430 """Combine two data IDs.
432 Yields a new one that identifies all dimensions that either of them
433 identify.
435 Parameters
436 ----------
437 other : `DataCoordinate`
438 Data ID to combine with ``self``.
440 Returns
441 -------
442 unioned : `DataCoordinate`
443 A `DataCoordinate` instance that satisfies
444 ``unioned.graph == self.graph.union(other.graph)``. Will preserve
445 ``hasFull`` and ``hasRecords`` whenever possible.
447 Notes
448 -----
449 No checking for consistency is performed on values for keys that
450 ``self`` and ``other`` have in common, and which value is included in
451 the returned data ID is not specified.
452 """
453 raise NotImplementedError()
455 @abstractmethod
456 def expanded(
457 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
458 ) -> DataCoordinate:
459 """Return a `DataCoordinate` that holds the given records.
461 Guarantees that `hasRecords` returns `True`.
463 This is a low-level interface with at most assertion-level checking of
464 inputs. Most callers should use `Registry.expandDataId` instead.
466 Parameters
467 ----------
468 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
469 A `NamedKeyMapping` with `DimensionElement` keys or a regular
470 `Mapping` with `str` (`DimensionElement` name) keys and
471 `DimensionRecord` values. Keys must cover all elements in
472 ``self.graph.elements``. Values may be `None`, but only to reflect
473 actual NULL values in the database, not just records that have not
474 been fetched.
475 """
476 raise NotImplementedError()
478 @property
479 def universe(self) -> DimensionUniverse:
480 """Universe that defines all known compatible dimensions.
482 The univers will be compatible with this coordinate
483 (`DimensionUniverse`).
484 """
485 return self.graph.universe
487 @property
488 @abstractmethod
489 def graph(self) -> DimensionGraph:
490 """Dimensions identified by this data ID (`DimensionGraph`).
492 Note that values are only required to be present for dimensions in
493 ``self.graph.required``; all others may be retrieved (from a
494 `Registry`) given these.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def hasFull(self) -> bool:
500 """Whether this data ID contains implied and required values.
502 Returns
503 -------
504 state : `bool`
505 If `True`, `__getitem__`, `get`, and `__contains__` (but not
506 `keys`!) will act as though the mapping includes key-value pairs
507 for implied dimensions, and the `full` property may be used. If
508 `False`, these operations only include key-value pairs for required
509 dimensions, and accessing `full` is an error. Always `True` if
510 there are no implied dimensions.
511 """
512 raise NotImplementedError()
514 @property
515 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
516 """Return mapping for all dimensions in ``self.graph``.
518 The mapping includes key-value pairs for all dimensions in
519 ``self.graph``, including implied (`NamedKeyMapping`).
521 Accessing this attribute if `hasFull` returns `False` is a logic error
522 that may raise an exception of unspecified type either immediately or
523 when implied keys are accessed via the returned mapping, depending on
524 the implementation and whether assertions are enabled.
525 """
526 assert self.hasFull(), "full may only be accessed if hasFull() returns True."
527 return _DataCoordinateFullView(self)
529 @abstractmethod
530 def hasRecords(self) -> bool:
531 """Whether this data ID contains records.
533 These are the records for all of the dimension elements it identifies.
535 Returns
536 -------
537 state : `bool`
538 If `True`, the following attributes may be accessed:
540 - `records`
541 - `region`
542 - `timespan`
543 - `pack`
545 If `False`, accessing any of these is considered a logic error.
546 """
547 raise NotImplementedError()
549 @property
550 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
551 """Return the records.
553 Returns a mapping that contains `DimensionRecord` objects for all
554 elements identified by this data ID (`NamedKeyMapping`).
556 The values of this mapping may be `None` if and only if there is no
557 record for that element with these dimensions in the database (which
558 means some foreign key field must have a NULL value).
560 Accessing this attribute if `hasRecords` returns `False` is a logic
561 error that may raise an exception of unspecified type either
562 immediately or when the returned mapping is used, depending on the
563 implementation and whether assertions are enabled.
564 """
565 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
566 return _DataCoordinateRecordsView(self)
568 @abstractmethod
569 def _record(self, name: str) -> Optional[DimensionRecord]:
570 """Protected implementation hook that backs the ``records`` attribute.
572 Parameters
573 ----------
574 name : `str`
575 The name of a `DimensionElement`, guaranteed to be in
576 ``self.graph.elements.names``.
578 Returns
579 -------
580 record : `DimensionRecord` or `None`
581 The dimension record for the given element identified by this
582 data ID, or `None` if there is no such record.
583 """
584 raise NotImplementedError()
586 @property
587 def region(self) -> Optional[Region]:
588 """Spatial region associated with this data ID.
590 (`lsst.sphgeom.Region` or `None`).
592 This is `None` if and only if ``self.graph.spatial`` is empty.
594 Accessing this attribute if `hasRecords` returns `False` is a logic
595 error that may or may not raise an exception, depending on the
596 implementation and whether assertions are enabled.
597 """
598 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
599 regions = []
600 for family in self.graph.spatial:
601 element = family.choose(self.graph.elements)
602 record = self._record(element.name)
603 if record is None or record.region is None:
604 return None
605 else:
606 regions.append(record.region)
607 return _intersectRegions(*regions)
609 @property
610 def timespan(self) -> Optional[Timespan]:
611 """Temporal interval associated with this data ID.
613 (`Timespan` or `None`).
615 This is `None` if and only if ``self.graph.timespan`` is empty.
617 Accessing this attribute if `hasRecords` returns `False` is a logic
618 error that may or may not raise an exception, depending on the
619 implementation and whether assertions are enabled.
620 """
621 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
622 timespans = []
623 for family in self.graph.temporal:
624 element = family.choose(self.graph.elements)
625 record = self._record(element.name)
626 # DimensionRecord subclasses for temporal elements always have
627 # .timespan, but they're dynamic so this can't be type-checked.
628 if record is None or record.timespan is None:
629 return None
630 else:
631 timespans.append(record.timespan)
632 if not timespans:
633 return None
634 elif len(timespans) == 1:
635 return timespans[0]
636 else:
637 return Timespan.intersection(*timespans)
639 @overload
640 def pack(self, name: str, *, returnMaxBits: Literal[True]) -> Tuple[int, int]:
641 ...
643 @overload
644 def pack(self, name: str, *, returnMaxBits: Literal[False]) -> int:
645 ...
647 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
648 """Pack this data ID into an integer.
650 Parameters
651 ----------
652 name : `str`
653 Name of the `DimensionPacker` algorithm (as defined in the
654 dimension configuration).
655 returnMaxBits : `bool`, optional
656 If `True` (`False` is default), return the maximum number of
657 nonzero bits in the returned integer across all data IDs.
659 Returns
660 -------
661 packed : `int`
662 Integer ID. This ID is unique only across data IDs that have
663 the same values for the packer's "fixed" dimensions.
664 maxBits : `int`, optional
665 Maximum number of nonzero bits in ``packed``. Not returned unless
666 ``returnMaxBits`` is `True`.
668 Notes
669 -----
670 Accessing this attribute if `hasRecords` returns `False` is a logic
671 error that may or may not raise an exception, depending on the
672 implementation and whether assertions are enabled.
673 """
674 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True."
675 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
677 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
678 """Convert this class to a simple python type.
680 This is suitable for serialization.
682 Parameters
683 ----------
684 minimal : `bool`, optional
685 Use minimal serialization. If set the records will not be attached.
687 Returns
688 -------
689 simple : `SerializedDataCoordinate`
690 The object converted to simple form.
691 """
692 # Convert to a dict form
693 if self.hasFull():
694 dataId = self.full.byName()
695 else:
696 dataId = self.byName()
697 records: Optional[Dict[str, SerializedDimensionRecord]]
698 if not minimal and self.hasRecords():
699 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None}
700 else:
701 records = None
703 return SerializedDataCoordinate(dataId=dataId, records=records)
705 @classmethod
706 def from_simple(
707 cls,
708 simple: SerializedDataCoordinate,
709 universe: Optional[DimensionUniverse] = None,
710 registry: Optional[Registry] = None,
711 ) -> DataCoordinate:
712 """Construct a new object from the simplified form.
714 The data is assumed to be of the form returned from the `to_simple`
715 method.
717 Parameters
718 ----------
719 simple : `dict` of [`str`, `Any`]
720 The `dict` returned by `to_simple()`.
721 universe : `DimensionUniverse`
722 The special graph of all known dimensions.
723 registry : `lsst.daf.butler.Registry`, optional
724 Registry from which a universe can be extracted. Can be `None`
725 if universe is provided explicitly.
727 Returns
728 -------
729 dataId : `DataCoordinate`
730 Newly-constructed object.
731 """
732 if universe is None and registry is None:
733 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
734 if universe is None and registry is not None:
735 universe = registry.dimensions
736 if universe is None:
737 # this is for mypy
738 raise ValueError("Unable to determine a usable universe")
740 dataId = cls.standardize(simple.dataId, universe=universe)
741 if simple.records:
742 dataId = dataId.expanded(
743 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
744 )
745 return dataId
747 to_json = to_json_pydantic
748 from_json = classmethod(from_json_pydantic)
751DataId = Union[DataCoordinate, Mapping[str, Any]]
752"""A type-annotation alias for signatures that accept both informal data ID
753dictionaries and validated `DataCoordinate` instances.
754"""
757class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]):
758 """View class for `DataCoordinate.full`.
760 Provides the default implementation for
761 `DataCoordinate.full`.
763 Parameters
764 ----------
765 target : `DataCoordinate`
766 The `DataCoordinate` instance this object provides a view of.
767 """
769 def __init__(self, target: DataCoordinate):
770 self._target = target
772 __slots__ = ("_target",)
774 def __repr__(self) -> str:
775 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names]
776 return "{{{}}}".format(", ".join(terms))
778 def __getitem__(self, key: DataIdKey) -> DataIdValue:
779 return self._target[key]
781 def __iter__(self) -> Iterator[Dimension]:
782 return iter(self.keys())
784 def __len__(self) -> int:
785 return len(self.keys())
787 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore
788 return self._target.graph.dimensions
790 @property
791 def names(self) -> AbstractSet[str]:
792 # Docstring inherited from `NamedKeyMapping`.
793 return self.keys().names
796class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]):
797 """View class for `DataCoordinate.records`.
799 Provides the default implementation for
800 `DataCoordinate.records`.
802 Parameters
803 ----------
804 target : `DataCoordinate`
805 The `DataCoordinate` instance this object provides a view of.
806 """
808 def __init__(self, target: DataCoordinate):
809 self._target = target
811 __slots__ = ("_target",)
813 def __repr__(self) -> str:
814 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names]
815 return "{{{}}}".format(", ".join(terms))
817 def __str__(self) -> str:
818 return "\n".join(str(v) for v in self.values())
820 def __getitem__(self, key: Union[DimensionElement, str]) -> Optional[DimensionRecord]:
821 if isinstance(key, DimensionElement):
822 key = key.name
823 return self._target._record(key)
825 def __iter__(self) -> Iterator[DimensionElement]:
826 return iter(self.keys())
828 def __len__(self) -> int:
829 return len(self.keys())
831 def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore
832 return self._target.graph.elements
834 @property
835 def names(self) -> AbstractSet[str]:
836 # Docstring inherited from `NamedKeyMapping`.
837 return self.keys().names
840class _BasicTupleDataCoordinate(DataCoordinate):
841 """Standard implementation of `DataCoordinate`.
843 Backed by a tuple of values.
845 This class should only be accessed outside this module via the
846 `DataCoordinate` interface, and should only be constructed via the static
847 methods there.
849 Parameters
850 ----------
851 graph : `DimensionGraph`
852 The dimensions to be identified.
853 values : `tuple` [ `int` or `str` ]
854 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May
855 include values for just required dimensions (which always come first)
856 or all dimensions.
857 """
859 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...]):
860 self._graph = graph
861 self._values = values
863 __slots__ = ("_graph", "_values")
865 @property
866 def graph(self) -> DimensionGraph:
867 # Docstring inherited from DataCoordinate.
868 return self._graph
870 def __getitem__(self, key: DataIdKey) -> DataIdValue:
871 # Docstring inherited from DataCoordinate.
872 if isinstance(key, Dimension):
873 key = key.name
874 index = self._graph._dataCoordinateIndices[key]
875 try:
876 return self._values[index]
877 except IndexError:
878 # Caller asked for an implied dimension, but this object only has
879 # values for the required ones.
880 raise KeyError(key) from None
882 def subset(self, graph: DimensionGraph) -> DataCoordinate:
883 # Docstring inherited from DataCoordinate.
884 if self._graph == graph:
885 return self
886 elif self.hasFull() or self._graph.required >= graph.dimensions:
887 return _BasicTupleDataCoordinate(
888 graph,
889 tuple(self[k] for k in graph._dataCoordinateIndices.keys()),
890 )
891 else:
892 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names))
894 def union(self, other: DataCoordinate) -> DataCoordinate:
895 # Docstring inherited from DataCoordinate.
896 graph = self.graph.union(other.graph)
897 # See if one or both input data IDs is already what we want to return;
898 # if so, return the most complete one we have.
899 if other.graph == graph:
900 if self.graph == graph:
901 # Input data IDs have the same graph (which is also the result
902 # graph), but may not have the same content.
903 # other might have records; self does not, so try other first.
904 # If it at least has full values, it's no worse than self.
905 if other.hasFull():
906 return other
907 else:
908 return self
909 elif other.hasFull():
910 return other
911 # There's some chance that neither self nor other has full values,
912 # but together provide enough to the union to. Let the general
913 # case below handle that.
914 elif self.graph == graph:
915 # No chance at returning records. If self has full values, it's
916 # the best we can do.
917 if self.hasFull():
918 return self
919 # General case with actual merging of dictionaries.
920 values = self.full.byName() if self.hasFull() else self.byName()
921 values.update(other.full.byName() if other.hasFull() else other.byName())
922 return DataCoordinate.standardize(values, graph=graph)
924 def expanded(
925 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
926 ) -> DataCoordinate:
927 # Docstring inherited from DataCoordinate
928 values = self._values
929 if not self.hasFull():
930 # Extract a complete values tuple from the attributes of the given
931 # records. It's possible for these to be inconsistent with
932 # self._values (which is a serious problem, of course), but we've
933 # documented this as a no-checking API.
934 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied)
935 return _ExpandedTupleDataCoordinate(self._graph, values, records)
937 def hasFull(self) -> bool:
938 # Docstring inherited from DataCoordinate.
939 return len(self._values) == len(self._graph._dataCoordinateIndices)
941 def hasRecords(self) -> bool:
942 # Docstring inherited from DataCoordinate.
943 return False
945 def _record(self, name: str) -> Optional[DimensionRecord]:
946 # Docstring inherited from DataCoordinate.
947 assert False
950class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate):
951 """A `DataCoordinate` implementation that can hold `DimensionRecord`.
953 This class should only be accessed outside this module via the
954 `DataCoordinate` interface, and should only be constructed via calls to
955 `DataCoordinate.expanded`.
957 Parameters
958 ----------
959 graph : `DimensionGraph`
960 The dimensions to be identified.
961 values : `tuple` [ `int` or `str` ]
962 Data ID values, ordered to match ``graph._dataCoordinateIndices``.
963 May include values for just required dimensions (which always come
964 first) or all dimensions.
965 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
966 A `NamedKeyMapping` with `DimensionElement` keys or a regular
967 `Mapping` with `str` (`DimensionElement` name) keys and
968 `DimensionRecord` values. Keys must cover all elements in
969 ``self.graph.elements``. Values may be `None`, but only to reflect
970 actual NULL values in the database, not just records that have not
971 been fetched.
972 """
974 def __init__(
975 self,
976 graph: DimensionGraph,
977 values: Tuple[DataIdValue, ...],
978 records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]],
979 ):
980 super().__init__(graph, values)
981 assert super().hasFull(), "This implementation requires full dimension records."
982 self._records = records
984 __slots__ = ("_records",)
986 def subset(self, graph: DimensionGraph) -> DataCoordinate:
987 # Docstring inherited from DataCoordinate.
988 if self._graph == graph:
989 return self
990 return _ExpandedTupleDataCoordinate(
991 graph, tuple(self[k] for k in graph._dataCoordinateIndices.keys()), records=self._records
992 )
994 def expanded(
995 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
996 ) -> DataCoordinate:
997 # Docstring inherited from DataCoordinate.
998 return self
1000 def union(self, other: DataCoordinate) -> DataCoordinate:
1001 # Docstring inherited from DataCoordinate.
1002 graph = self.graph.union(other.graph)
1003 # See if one or both input data IDs is already what we want to return;
1004 # if so, return the most complete one we have.
1005 if self.graph == graph:
1006 # self has records, so even if other is also a valid result, it's
1007 # no better.
1008 return self
1009 if other.graph == graph:
1010 # If other has full values, and self does not identify some of
1011 # those, it's the base we can do. It may have records, too.
1012 if other.hasFull():
1013 return other
1014 # If other does not have full values, there's a chance self may
1015 # provide the values needed to complete it. For example, self
1016 # could be {band} while other could be
1017 # {instrument, physical_filter, band}, with band unknown.
1018 # General case with actual merging of dictionaries.
1019 values = self.full.byName()
1020 values.update(other.full.byName() if other.hasFull() else other.byName())
1021 basic = DataCoordinate.standardize(values, graph=graph)
1022 # See if we can add records.
1023 if self.hasRecords() and other.hasRecords():
1024 # Sometimes the elements of a union of graphs can contain elements
1025 # that weren't in either input graph (because graph unions are only
1026 # on dimensions). e.g. {visit} | {detector} brings along
1027 # visit_detector_region.
1028 elements = set(graph.elements.names)
1029 elements -= self.graph.elements.names
1030 elements -= other.graph.elements.names
1031 if not elements:
1032 records = NamedKeyDict[DimensionElement, Optional[DimensionRecord]](self.records)
1033 records.update(other.records)
1034 return basic.expanded(records.freeze())
1035 return basic
1037 def hasFull(self) -> bool:
1038 # Docstring inherited from DataCoordinate.
1039 return True
1041 def hasRecords(self) -> bool:
1042 # Docstring inherited from DataCoordinate.
1043 return True
1045 def _record(self, name: str) -> Optional[DimensionRecord]:
1046 # Docstring inherited from DataCoordinate.
1047 return self._records[name]