Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 28%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
23# Design notes for this module are in
24# doc/lsst.daf.butler/dev/dataCoordinate.py.
25#
27from __future__ import annotations
29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate")
31from abc import abstractmethod
32import numbers
33from typing import (
34 AbstractSet,
35 Any,
36 Dict,
37 Iterator,
38 Mapping,
39 Optional,
40 Tuple,
41 TYPE_CHECKING,
42 Union,
43)
44from pydantic import BaseModel
46from lsst.sphgeom import Region
47from ..named import NamedKeyDict, NamedKeyMapping, NameLookupMapping, NamedValueAbstractSet
48from ..timespan import Timespan
49from ._elements import Dimension, DimensionElement
50from ._graph import DimensionGraph
51from ._records import DimensionRecord, SerializedDimensionRecord
52from ..json import from_json_pydantic, to_json_pydantic
54if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from ._universe import DimensionUniverse
56 from ...registry import Registry
58DataIdKey = Union[str, Dimension]
59"""Type annotation alias for the keys that can be used to index a
60DataCoordinate.
61"""
63# Pydantic will cast int to str if str is first in the Union.
64DataIdValue = Union[int, str, None]
65"""Type annotation alias for the values that can be present in a
66DataCoordinate or other data ID.
67"""
70class SerializedDataCoordinate(BaseModel):
71 """Simplified model for serializing a `DataCoordinate`."""
73 dataId: Dict[str, DataIdValue]
74 records: Optional[Dict[str, SerializedDimensionRecord]] = None
77def _intersectRegions(*args: Region) -> Optional[Region]:
78 """Return the intersection of several regions.
80 For internal use by `ExpandedDataCoordinate` only.
82 If no regions are provided, returns `None`.
84 This is currently a placeholder; it actually returns `NotImplemented`
85 (it does *not* raise an exception) when multiple regions are given, which
86 propagates to `ExpandedDataCoordinate`. This reflects the fact that we
87 don't want to fail to construct an `ExpandedDataCoordinate` entirely when
88 we can't compute its region, and at present we don't have a high-level use
89 case for the regions of these particular data IDs.
90 """
91 if len(args) == 0:
92 return None
93 elif len(args) == 1:
94 return args[0]
95 else:
96 return NotImplemented
99class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]):
100 """Data ID dictionary.
102 An immutable data ID dictionary that guarantees that its key-value pairs
103 identify at least all required dimensions in a `DimensionGraph`.
105 `DataCoordinateSet` itself is an ABC, but provides `staticmethod` factory
106 functions for private concrete implementations that should be sufficient
107 for most purposes. `standardize` is the most flexible and safe of these;
108 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are
109 more specialized and perform little or no checking of inputs.
111 Notes
112 -----
113 Like any data ID class, `DataCoordinate` behaves like a dictionary, but
114 with some subtleties:
116 - Both `Dimension` instances and `str` names thereof may be used as keys
117 in lookup operations, but iteration (and `keys`) will yield `Dimension`
118 instances. The `names` property can be used to obtain the corresponding
119 `str` names.
121 - Lookups for implied dimensions (those in ``self.graph.implied``) are
122 supported if and only if `hasFull` returns `True`, and are never
123 included in iteration or `keys`. The `full` property may be used to
124 obtain a mapping whose keys do include implied dimensions.
126 - Equality comparison with other mappings is supported, but it always
127 considers only required dimensions (as well as requiring both operands
128 to identify the same dimensions). This is not quite consistent with the
129 way mappings usually work - normally differing keys imply unequal
130 mappings - but it makes sense in this context because data IDs with the
131 same values for required dimensions but different values for implied
132 dimensions represent a serious problem with the data that
133 `DataCoordinate` cannot generally recognize on its own, and a data ID
134 that knows implied dimension values should still be able to compare as
135 equal to one that does not. This is of course not the way comparisons
136 between simple `dict` data IDs work, and hence using a `DataCoordinate`
137 instance for at least one operand in any data ID comparison is strongly
138 recommended.
139 """
141 __slots__ = ()
143 _serializedType = SerializedDataCoordinate
145 @staticmethod
146 def standardize(
147 mapping: Optional[NameLookupMapping[Dimension, DataIdValue]] = None,
148 *,
149 graph: Optional[DimensionGraph] = None,
150 universe: Optional[DimensionUniverse] = None,
151 defaults: Optional[DataCoordinate] = None,
152 **kwargs: Any
153 ) -> DataCoordinate:
154 """Standardize the supplied dataId.
156 Adapts an arbitrary mapping and/or additional arguments into a true
157 `DataCoordinate`, or augment an existing one.
159 Parameters
160 ----------
161 mapping : `~collections.abc.Mapping`, optional
162 An informal data ID that maps dimensions or dimension names to
163 their primary key values (may also be a true `DataCoordinate`).
164 graph : `DimensionGraph`
165 The dimensions to be identified by the new `DataCoordinate`.
166 If not provided, will be inferred from the keys of ``mapping`` and
167 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
168 is already a `DataCoordinate`.
169 universe : `DimensionUniverse`
170 All known dimensions and their relationships; used to expand
171 and validate dependencies when ``graph`` is not provided.
172 defaults : `DataCoordinate`, optional
173 Default dimension key-value pairs to use when needed. These are
174 never used to infer ``graph``, and are ignored if a different value
175 is provided for the same key in ``mapping`` or `**kwargs``.
176 **kwargs
177 Additional keyword arguments are treated like additional key-value
178 pairs in ``mapping``.
180 Returns
181 -------
182 coordinate : `DataCoordinate`
183 A validated `DataCoordinate` instance.
185 Raises
186 ------
187 TypeError
188 Raised if the set of optional arguments provided is not supported.
189 KeyError
190 Raised if a key-value pair for a required dimension is missing.
191 """
192 d: Dict[str, DataIdValue] = {}
193 if isinstance(mapping, DataCoordinate):
194 if graph is None:
195 if not kwargs:
196 # Already standardized to exactly what we want.
197 return mapping
198 elif kwargs.keys().isdisjoint(graph.dimensions.names):
199 # User provided kwargs, but told us not to use them by
200 # passing in dimensions that are disjoint from those kwargs.
201 # This is not necessarily user error - it's a useful pattern
202 # to pass in all of the key-value pairs you have and let the
203 # code here pull out only what it needs.
204 return mapping.subset(graph)
205 assert universe is None or universe == mapping.universe
206 universe = mapping.universe
207 d.update((name, mapping[name]) for name in mapping.graph.required.names)
208 if mapping.hasFull():
209 d.update((name, mapping[name]) for name in mapping.graph.implied.names)
210 elif isinstance(mapping, NamedKeyMapping):
211 d.update(mapping.byName())
212 elif mapping is not None:
213 d.update(mapping)
214 d.update(kwargs)
215 if graph is None:
216 if defaults is not None:
217 universe = defaults.universe
218 elif universe is None:
219 raise TypeError("universe must be provided if graph is not.")
220 graph = DimensionGraph(universe, names=d.keys())
221 if not graph.dimensions:
222 return DataCoordinate.makeEmpty(graph.universe)
223 if defaults is not None:
224 if defaults.hasFull():
225 for k, v in defaults.full.items():
226 d.setdefault(k.name, v)
227 else:
228 for k, v in defaults.items():
229 d.setdefault(k.name, v)
230 if d.keys() >= graph.dimensions.names:
231 values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
232 else:
233 try:
234 values = tuple(d[name] for name in graph.required.names)
235 except KeyError as err:
236 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
237 # Some backends cannot handle numpy.int64 type which is a subclass of
238 # numbers.Integral; convert that to int.
239 values = tuple(int(val) if isinstance(val, numbers.Integral) # type: ignore
240 else val for val in values)
241 return _BasicTupleDataCoordinate(graph, values)
243 @staticmethod
244 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
245 """Return an empty `DataCoordinate`.
247 It identifies the null set of dimensions.
249 Parameters
250 ----------
251 universe : `DimensionUniverse`
252 Universe to which this null dimension set belongs.
254 Returns
255 -------
256 dataId : `DataCoordinate`
257 A data ID object that identifies no dimensions. `hasFull` and
258 `hasRecords` are guaranteed to return `True`, because both `full`
259 and `records` are just empty mappings.
260 """
261 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
263 @staticmethod
264 def fromRequiredValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
265 """Construct a `DataCoordinate` from required dimension values.
267 This is a low-level interface with at most assertion-level checking of
268 inputs. Most callers should use `standardize` instead.
270 Parameters
271 ----------
272 graph : `DimensionGraph`
273 Dimensions this data ID will identify.
274 values : `tuple` [ `int` or `str` ]
275 Tuple of primary key values corresponding to ``graph.required``,
276 in that order.
278 Returns
279 -------
280 dataId : `DataCoordinate`
281 A data ID object that identifies the given dimensions.
282 ``dataId.hasFull()`` will return `True` if and only if
283 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
284 return `True`.
285 """
286 assert len(graph.required) == len(values), \
287 f"Inconsistency between dimensions {graph.required} and required values {values}."
288 return _BasicTupleDataCoordinate(graph, values)
290 @staticmethod
291 def fromFullValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate:
292 """Construct a `DataCoordinate` from all dimension values.
294 This is a low-level interface with at most assertion-level checking of
295 inputs. Most callers should use `standardize` instead.
297 Parameters
298 ----------
299 graph : `DimensionGraph`
300 Dimensions this data ID will identify.
301 values : `tuple` [ `int` or `str` ]
302 Tuple of primary key values corresponding to
303 ``itertools.chain(graph.required, graph.implied)``, in that order.
304 Note that this is _not_ the same order as ``graph.dimensions``,
305 though these contain the same elements.
307 Returns
308 -------
309 dataId : `DataCoordinate`
310 A data ID object that identifies the given dimensions.
311 ``dataId.hasFull()`` will return `True` if and only if
312 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never
313 return `True`.
314 """
315 assert len(graph.dimensions) == len(values), \
316 f"Inconsistency between dimensions {graph.dimensions} and full values {values}."
317 return _BasicTupleDataCoordinate(graph, values)
319 def __hash__(self) -> int:
320 return hash((self.graph,) + tuple(self[d.name] for d in self.graph.required))
322 def __eq__(self, other: Any) -> bool:
323 if not isinstance(other, DataCoordinate):
324 other = DataCoordinate.standardize(other, universe=self.universe)
325 return self.graph == other.graph and all(self[d.name] == other[d.name] for d in self.graph.required)
327 def __repr__(self) -> str:
328 # We can't make repr yield something that could be exec'd here without
329 # printing out the whole DimensionUniverse the graph is derived from.
330 # So we print something that mostly looks like a dict, but doesn't
331 # quote its keys: that's both more compact and something that can't
332 # be mistaken for an actual dict or something that could be exec'd.
333 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names]
334 if self.hasFull() and self.graph.required != self.graph.dimensions:
335 terms.append("...")
336 return "{{{}}}".format(', '.join(terms))
338 def __lt__(self, other: Any) -> bool:
339 # Allow DataCoordinate to be sorted
340 if not isinstance(other, type(self)):
341 return NotImplemented
342 # Form tuple of tuples for each DataCoordinate:
343 # Unlike repr() we only use required keys here to ensure that
344 # __eq__ can not be true simultaneously with __lt__ being true.
345 self_kv = tuple(self.items())
346 other_kv = tuple(other.items())
348 return self_kv < other_kv
350 def __iter__(self) -> Iterator[Dimension]:
351 return iter(self.keys())
353 def __len__(self) -> int:
354 return len(self.keys())
356 def keys(self) -> NamedValueAbstractSet[Dimension]:
357 return self.graph.required
359 @property
360 def names(self) -> AbstractSet[str]:
361 """Names of the required dimensions identified by this data ID.
363 They are returned in the same order as `keys`
364 (`collections.abc.Set` [ `str` ]).
365 """
366 return self.keys().names
368 @abstractmethod
369 def subset(self, graph: DimensionGraph) -> DataCoordinate:
370 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``.
372 Parameters
373 ----------
374 graph : `DimensionGraph`
375 The dimensions identified by the returned `DataCoordinate`.
377 Returns
378 -------
379 coordinate : `DataCoordinate`
380 A `DataCoordinate` instance that identifies only the given
381 dimensions. May be ``self`` if ``graph == self.graph``.
383 Raises
384 ------
385 KeyError
386 Raised if the primary key value for one or more required dimensions
387 is unknown. This may happen if ``graph.issubset(self.graph)`` is
388 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if
389 ``self.hasFull()`` is `False` and
390 ``graph.required.issubset(self.graph.required)`` is `False`. As
391 an example of the latter case, consider trying to go from a data ID
392 with dimensions {instrument, physical_filter, band} to
393 just {instrument, band}; band is implied by
394 physical_filter and hence would have no value in the original data
395 ID if ``self.hasFull()`` is `False`.
397 Notes
398 -----
399 If `hasFull` and `hasRecords` return `True` on ``self``, they will
400 return `True` (respectively) on the returned `DataCoordinate` as well.
401 The converse does not hold.
402 """
403 raise NotImplementedError()
405 @abstractmethod
406 def union(self, other: DataCoordinate) -> DataCoordinate:
407 """Combine two data IDs.
409 Yields a new one that identifies all dimensions that either of them
410 identify.
412 Parameters
413 ----------
414 other : `DataCoordinate`
415 Data ID to combine with ``self``.
417 Returns
418 -------
419 unioned : `DataCoordinate`
420 A `DataCoordinate` instance that satisfies
421 ``unioned.graph == self.graph.union(other.graph)``. Will preserve
422 ``hasFull`` and ``hasRecords`` whenever possible.
424 Notes
425 -----
426 No checking for consistency is performed on values for keys that
427 ``self`` and ``other`` have in common, and which value is included in
428 the returned data ID is not specified.
429 """
430 raise NotImplementedError()
432 @abstractmethod
433 def expanded(self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
434 ) -> DataCoordinate:
435 """Return a `DataCoordinate` that holds the given records.
437 Guarantees that `hasRecords` returns `True`.
439 This is a low-level interface with at most assertion-level checking of
440 inputs. Most callers should use `Registry.expandDataId` instead.
442 Parameters
443 ----------
444 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
445 A `NamedKeyMapping` with `DimensionElement` keys or a regular
446 `Mapping` with `str` (`DimensionElement` name) keys and
447 `DimensionRecord` values. Keys must cover all elements in
448 ``self.graph.elements``. Values may be `None`, but only to reflect
449 actual NULL values in the database, not just records that have not
450 been fetched.
451 """
452 raise NotImplementedError()
454 @property
455 def universe(self) -> DimensionUniverse:
456 """Universe that defines all known compatible dimensions.
458 The univers will be compatible with this coordinate
459 (`DimensionUniverse`).
460 """
461 return self.graph.universe
463 @property
464 @abstractmethod
465 def graph(self) -> DimensionGraph:
466 """Dimensions identified by this data ID (`DimensionGraph`).
468 Note that values are only required to be present for dimensions in
469 ``self.graph.required``; all others may be retrieved (from a
470 `Registry`) given these.
471 """
472 raise NotImplementedError()
474 @abstractmethod
475 def hasFull(self) -> bool:
476 """Whether this data ID contains implied and required values.
478 Returns
479 -------
480 state : `bool`
481 If `True`, `__getitem__`, `get`, and `__contains__` (but not
482 `keys`!) will act as though the mapping includes key-value pairs
483 for implied dimensions, and the `full` property may be used. If
484 `False`, these operations only include key-value pairs for required
485 dimensions, and accessing `full` is an error. Always `True` if
486 there are no implied dimensions.
487 """
488 raise NotImplementedError()
490 @property
491 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]:
492 """Return mapping for all dimensions in ``self.graph``.
494 The mapping includes key-value pairs for all dimensions in
495 ``self.graph``, including implied (`NamedKeyMapping`).
497 Accessing this attribute if `hasFull` returns `False` is a logic error
498 that may raise an exception of unspecified type either immediately or
499 when implied keys are accessed via the returned mapping, depending on
500 the implementation and whether assertions are enabled.
501 """
502 assert self.hasFull(), "full may only be accessed if hasFull() returns True."
503 return _DataCoordinateFullView(self)
505 @abstractmethod
506 def hasRecords(self) -> bool:
507 """Whether this data ID contains records.
509 These are the records for all of the dimension elements it identifies.
511 Returns
512 -------
513 state : `bool`
514 If `True`, the following attributes may be accessed:
516 - `records`
517 - `region`
518 - `timespan`
519 - `pack`
521 If `False`, accessing any of these is considered a logic error.
522 """
523 raise NotImplementedError()
525 @property
526 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]:
527 """Return the records.
529 Returns a mapping that contains `DimensionRecord` objects for all
530 elements identified by this data ID (`NamedKeyMapping`).
532 The values of this mapping may be `None` if and only if there is no
533 record for that element with these dimensions in the database (which
534 means some foreign key field must have a NULL value).
536 Accessing this attribute if `hasRecords` returns `False` is a logic
537 error that may raise an exception of unspecified type either
538 immediately or when the returned mapping is used, depending on the
539 implementation and whether assertions are enabled.
540 """
541 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
542 return _DataCoordinateRecordsView(self)
544 @abstractmethod
545 def _record(self, name: str) -> Optional[DimensionRecord]:
546 """Protected implementation hook that backs the ``records`` attribute.
548 Parameters
549 ----------
550 name : `str`
551 The name of a `DimensionElement`, guaranteed to be in
552 ``self.graph.elements.names``.
554 Returns
555 -------
556 record : `DimensionRecord` or `None`
557 The dimension record for the given element identified by this
558 data ID, or `None` if there is no such record.
559 """
560 raise NotImplementedError()
562 @property
563 def region(self) -> Optional[Region]:
564 """Spatial region associated with this data ID.
566 (`lsst.sphgeom.Region` or `None`).
568 This is `None` if and only if ``self.graph.spatial`` is empty.
570 Accessing this attribute if `hasRecords` returns `False` is a logic
571 error that may or may not raise an exception, depending on the
572 implementation and whether assertions are enabled.
573 """
574 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
575 regions = []
576 for family in self.graph.spatial:
577 element = family.choose(self.graph.elements)
578 record = self._record(element.name)
579 if record is None or record.region is None:
580 return None
581 else:
582 regions.append(record.region)
583 return _intersectRegions(*regions)
585 @property
586 def timespan(self) -> Optional[Timespan]:
587 """Temporal interval associated with this data ID.
589 (`Timespan` or `None`).
591 This is `None` if and only if ``self.graph.timespan`` is empty.
593 Accessing this attribute if `hasRecords` returns `False` is a logic
594 error that may or may not raise an exception, depending on the
595 implementation and whether assertions are enabled.
596 """
597 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
598 timespans = []
599 for family in self.graph.temporal:
600 element = family.choose(self.graph.elements)
601 record = self._record(element.name)
602 # DimensionRecord subclasses for temporal elements always have
603 # .timespan, but they're dynamic so this can't be type-checked.
604 if record is None or record.timespan is None:
605 return None
606 else:
607 timespans.append(record.timespan)
608 return Timespan.intersection(*timespans)
610 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]:
611 """Pack this data ID into an integer.
613 Parameters
614 ----------
615 name : `str`
616 Name of the `DimensionPacker` algorithm (as defined in the
617 dimension configuration).
618 returnMaxBits : `bool`, optional
619 If `True` (`False` is default), return the maximum number of
620 nonzero bits in the returned integer across all data IDs.
622 Returns
623 -------
624 packed : `int`
625 Integer ID. This ID is unique only across data IDs that have
626 the same values for the packer's "fixed" dimensions.
627 maxBits : `int`, optional
628 Maximum number of nonzero bits in ``packed``. Not returned unless
629 ``returnMaxBits`` is `True`.
631 Notes
632 -----
633 Accessing this attribute if `hasRecords` returns `False` is a logic
634 error that may or may not raise an exception, depending on the
635 implementation and whether assertions are enabled.
636 """
637 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True."
638 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits)
640 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
641 """Convert this class to a simple python type.
643 This is suitable for serialization.
645 Parameters
646 ----------
647 minimal : `bool`, optional
648 Use minimal serialization. If set the records will not be attached.
650 Returns
651 -------
652 simple : `SerializedDataCoordinate`
653 The object converted to simple form.
654 """
655 # Convert to a dict form
656 if self.hasFull():
657 dataId = self.full.byName()
658 else:
659 dataId = self.byName()
660 records: Optional[Dict[str, SerializedDimensionRecord]]
661 if not minimal and self.hasRecords():
662 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None}
663 else:
664 records = None
666 return SerializedDataCoordinate(dataId=dataId, records=records)
668 @classmethod
669 def from_simple(cls, simple: SerializedDataCoordinate,
670 universe: Optional[DimensionUniverse] = None,
671 registry: Optional[Registry] = None) -> DataCoordinate:
672 """Construct a new object from the simplified form.
674 The data is assumed to be of the form returned from the `to_simple`
675 method.
677 Parameters
678 ----------
679 simple : `dict` of [`str`, `Any`]
680 The `dict` returned by `to_simple()`.
681 universe : `DimensionUniverse`
682 The special graph of all known dimensions.
683 registry : `lsst.daf.butler.Registry`, optional
684 Registry from which a universe can be extracted. Can be `None`
685 if universe is provided explicitly.
687 Returns
688 -------
689 dataId : `DataCoordinate`
690 Newly-constructed object.
691 """
692 if universe is None and registry is None:
693 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
694 if universe is None and registry is not None:
695 universe = registry.dimensions
696 if universe is None:
697 # this is for mypy
698 raise ValueError("Unable to determine a usable universe")
700 dataId = cls.standardize(simple.dataId, universe=universe)
701 if simple.records:
702 dataId = dataId.expanded({k: DimensionRecord.from_simple(v, universe=universe)
703 for k, v in simple.records.items()})
704 return dataId
706 to_json = to_json_pydantic
707 from_json = classmethod(from_json_pydantic)
710DataId = Union[DataCoordinate, Mapping[str, Any]]
711"""A type-annotation alias for signatures that accept both informal data ID
712dictionaries and validated `DataCoordinate` instances.
713"""
716class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]):
717 """View class for `DataCoordinate.full`.
719 Provides the default implementation for
720 `DataCoordinate.full`.
722 Parameters
723 ----------
724 target : `DataCoordinate`
725 The `DataCoordinate` instance this object provides a view of.
726 """
728 def __init__(self, target: DataCoordinate):
729 self._target = target
731 __slots__ = ("_target",)
733 def __repr__(self) -> str:
734 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names]
735 return "{{{}}}".format(', '.join(terms))
737 def __getitem__(self, key: DataIdKey) -> DataIdValue:
738 return self._target[key]
740 def __iter__(self) -> Iterator[Dimension]:
741 return iter(self.keys())
743 def __len__(self) -> int:
744 return len(self.keys())
746 def keys(self) -> NamedValueAbstractSet[Dimension]:
747 return self._target.graph.dimensions
749 @property
750 def names(self) -> AbstractSet[str]:
751 # Docstring inherited from `NamedKeyMapping`.
752 return self.keys().names
755class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]):
756 """View class for `DataCoordinate.records`.
758 Provides the default implementation for
759 `DataCoordinate.records`.
761 Parameters
762 ----------
763 target : `DataCoordinate`
764 The `DataCoordinate` instance this object provides a view of.
765 """
767 def __init__(self, target: DataCoordinate):
768 self._target = target
770 __slots__ = ("_target",)
772 def __repr__(self) -> str:
773 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names]
774 return "{{{}}}".format(', '.join(terms))
776 def __str__(self) -> str:
777 return "\n".join(str(v) for v in self.values())
779 def __getitem__(self, key: Union[DimensionElement, str]) -> Optional[DimensionRecord]:
780 if isinstance(key, DimensionElement):
781 key = key.name
782 return self._target._record(key)
784 def __iter__(self) -> Iterator[DimensionElement]:
785 return iter(self.keys())
787 def __len__(self) -> int:
788 return len(self.keys())
790 def keys(self) -> NamedValueAbstractSet[DimensionElement]:
791 return self._target.graph.elements
793 @property
794 def names(self) -> AbstractSet[str]:
795 # Docstring inherited from `NamedKeyMapping`.
796 return self.keys().names
799class _BasicTupleDataCoordinate(DataCoordinate):
800 """Standard implementation of `DataCoordinate`.
802 Backed by a tuple of values.
804 This class should only be accessed outside this module via the
805 `DataCoordinate` interface, and should only be constructed via the static
806 methods there.
808 Parameters
809 ----------
810 graph : `DimensionGraph`
811 The dimensions to be identified.
812 values : `tuple` [ `int` or `str` ]
813 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May
814 include values for just required dimensions (which always come first)
815 or all dimensions.
816 """
818 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...]):
819 self._graph = graph
820 self._values = values
822 __slots__ = ("_graph", "_values")
824 @property
825 def graph(self) -> DimensionGraph:
826 # Docstring inherited from DataCoordinate.
827 return self._graph
829 def __getitem__(self, key: DataIdKey) -> DataIdValue:
830 # Docstring inherited from DataCoordinate.
831 if isinstance(key, Dimension):
832 key = key.name
833 index = self._graph._dataCoordinateIndices[key]
834 try:
835 return self._values[index]
836 except IndexError:
837 # Caller asked for an implied dimension, but this object only has
838 # values for the required ones.
839 raise KeyError(key) from None
841 def subset(self, graph: DimensionGraph) -> DataCoordinate:
842 # Docstring inherited from DataCoordinate.
843 if self._graph == graph:
844 return self
845 elif self.hasFull() or self._graph.required >= graph.dimensions:
846 return _BasicTupleDataCoordinate(
847 graph,
848 tuple(self[k] for k in graph._dataCoordinateIndices.keys()),
849 )
850 else:
851 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names))
853 def union(self, other: DataCoordinate) -> DataCoordinate:
854 # Docstring inherited from DataCoordinate.
855 graph = self.graph.union(other.graph)
856 # See if one or both input data IDs is already what we want to return;
857 # if so, return the most complete one we have.
858 if other.graph == graph:
859 if self.graph == graph:
860 # Input data IDs have the same graph (which is also the result
861 # graph), but may not have the same content.
862 # other might have records; self does not, so try other first.
863 # If it at least has full values, it's no worse than self.
864 if other.hasFull():
865 return other
866 else:
867 return self
868 elif other.hasFull():
869 return other
870 # There's some chance that neither self nor other has full values,
871 # but together provide enough to the union to. Let the general
872 # case below handle that.
873 elif self.graph == graph:
874 # No chance at returning records. If self has full values, it's
875 # the best we can do.
876 if self.hasFull():
877 return self
878 # General case with actual merging of dictionaries.
879 values = self.full.byName() if self.hasFull() else self.byName()
880 values.update(other.full.byName() if other.hasFull() else other.byName())
881 return DataCoordinate.standardize(values, graph=graph)
883 def expanded(self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
884 ) -> DataCoordinate:
885 # Docstring inherited from DataCoordinate
886 values = self._values
887 if not self.hasFull():
888 # Extract a complete values tuple from the attributes of the given
889 # records. It's possible for these to be inconsistent with
890 # self._values (which is a serious problem, of course), but we've
891 # documented this as a no-checking API.
892 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied)
893 return _ExpandedTupleDataCoordinate(self._graph, values, records)
895 def hasFull(self) -> bool:
896 # Docstring inherited from DataCoordinate.
897 return len(self._values) == len(self._graph._dataCoordinateIndices)
899 def hasRecords(self) -> bool:
900 # Docstring inherited from DataCoordinate.
901 return False
903 def _record(self, name: str) -> Optional[DimensionRecord]:
904 # Docstring inherited from DataCoordinate.
905 assert False
908class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate):
909 """A `DataCoordinate` implementation that can hold `DimensionRecord`.
911 This class should only be accessed outside this module via the
912 `DataCoordinate` interface, and should only be constructed via calls to
913 `DataCoordinate.expanded`.
915 Parameters
916 ----------
917 graph : `DimensionGraph`
918 The dimensions to be identified.
919 values : `tuple` [ `int` or `str` ]
920 Data ID values, ordered to match ``graph._dataCoordinateIndices``.
921 May include values for just required dimensions (which always come
922 first) or all dimensions.
923 records : `Mapping` [ `str`, `DimensionRecord` or `None` ]
924 A `NamedKeyMapping` with `DimensionElement` keys or a regular
925 `Mapping` with `str` (`DimensionElement` name) keys and
926 `DimensionRecord` values. Keys must cover all elements in
927 ``self.graph.elements``. Values may be `None`, but only to reflect
928 actual NULL values in the database, not just records that have not
929 been fetched.
930 """
932 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...],
933 records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]):
934 super().__init__(graph, values)
935 assert super().hasFull(), "This implementation requires full dimension records."
936 self._records = records
938 __slots__ = ("_records",)
940 def subset(self, graph: DimensionGraph) -> DataCoordinate:
941 # Docstring inherited from DataCoordinate.
942 if self._graph == graph:
943 return self
944 return _ExpandedTupleDataCoordinate(graph,
945 tuple(self[k] for k in graph._dataCoordinateIndices.keys()),
946 records=self._records)
948 def expanded(self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]]
949 ) -> DataCoordinate:
950 # Docstring inherited from DataCoordinate.
951 return self
953 def union(self, other: DataCoordinate) -> DataCoordinate:
954 # Docstring inherited from DataCoordinate.
955 graph = self.graph.union(other.graph)
956 # See if one or both input data IDs is already what we want to return;
957 # if so, return the most complete one we have.
958 if self.graph == graph:
959 # self has records, so even if other is also a valid result, it's
960 # no better.
961 return self
962 if other.graph == graph:
963 # If other has full values, and self does not identify some of
964 # those, it's the base we can do. It may have records, too.
965 if other.hasFull():
966 return other
967 # If other does not have full values, there's a chance self may
968 # provide the values needed to complete it. For example, self
969 # could be {band} while other could be
970 # {instrument, physical_filter, band}, with band unknown.
971 # General case with actual merging of dictionaries.
972 values = self.full.byName()
973 values.update(other.full.byName() if other.hasFull() else other.byName())
974 basic = DataCoordinate.standardize(values, graph=graph)
975 # See if we can add records.
976 if self.hasRecords() and other.hasRecords():
977 # Sometimes the elements of a union of graphs can contain elements
978 # that weren't in either input graph (because graph unions are only
979 # on dimensions). e.g. {visit} | {detector} brings along
980 # visit_detector_region.
981 elements = set(graph.elements.names)
982 elements -= self.graph.elements.names
983 elements -= other.graph.elements.names
984 if not elements:
985 records = NamedKeyDict[DimensionElement, Optional[DimensionRecord]](self.records)
986 records.update(other.records)
987 return basic.expanded(records.freeze())
988 return basic
990 def hasFull(self) -> bool:
991 # Docstring inherited from DataCoordinate.
992 return True
994 def hasRecords(self) -> bool:
995 # Docstring inherited from DataCoordinate.
996 return True
998 def _record(self, name: str) -> Optional[DimensionRecord]:
999 # Docstring inherited from DataCoordinate.
1000 return self._records[name]