Coverage for python / lsst / daf / butler / dimensions / _coordinate.py: 34%
389 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28#
29# Design notes for this module are in
30# doc/lsst.daf.butler/dev/dataCoordinate.py.
31#
33from __future__ import annotations
35__all__ = (
36 "DataCoordinate",
37 "DataId",
38 "SerializedDataCoordinate",
39 "SerializedDataId",
40)
42import numbers
43from abc import abstractmethod
44from collections.abc import Callable, Iterable, Iterator, Mapping
45from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, cast, overload
47import pydantic
49from lsst.sphgeom import IntersectionRegion, Region
51from .._exceptions import DimensionNameError
52from .._timespan import Timespan
53from ..json import from_json_pydantic, to_json_pydantic
54from ..persistence_context import PersistenceContextVars
55from ._group import DimensionGroup
56from ._records import DataIdValue, DimensionRecord, SerializedDimensionRecord
58if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
59 from ..registry import Registry
60 from ._universe import DimensionUniverse
62SerializedDataId: TypeAlias = dict[str, DataIdValue]
63"""Simplified model for serializing the ``mapping`` property of
64`DataCoordinate`.
65"""
68class SerializedDataCoordinate(pydantic.BaseModel):
69 """Simplified model for serializing a `DataCoordinate`."""
71 dataId: SerializedDataId
72 records: dict[str, SerializedDimensionRecord] | None = None
74 @classmethod
75 def direct(cls, *, dataId: SerializedDataId, records: dict[str, dict] | None) -> SerializedDataCoordinate:
76 """Construct a `SerializedDataCoordinate` directly without validators.
78 Parameters
79 ----------
80 dataId : `SerializedDataId`
81 The data ID.
82 records : `dict` or `None`
83 The dimension records.
85 Notes
86 -----
87 This differs from the pydantic "construct" method in that the arguments
88 are explicitly what the model requires, and it will recurse through
89 members, constructing them from their corresponding `direct` methods.
91 This method should only be called when the inputs are trusted.
92 """
93 key = (frozenset(dataId.items()), records is not None)
94 cache = PersistenceContextVars.serializedDataCoordinateMapping.get()
95 if cache is not None and (result := cache.get(key)) is not None:
96 return result
98 if records is None:
99 serialized_records = None
100 else:
101 serialized_records = {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()}
103 node = cls.model_construct(dataId=dataId, records=serialized_records)
105 if cache is not None:
106 cache[key] = node
107 return node
110def _intersectRegions(*args: Region) -> Region | None:
111 """Return the intersection of several regions.
113 For internal use by `ExpandedDataCoordinate` only.
115 If no regions are provided, returns `None`.
116 """
117 if len(args) == 0:
118 return None
119 else:
120 result = args[0]
121 for n in range(1, len(args)):
122 result = IntersectionRegion(result, args[n])
123 return result
126class DataCoordinate:
127 """A validated data ID.
129 DataCoordinate guarantees that its key-value pairs identify at least all
130 required dimensions in a `DimensionGroup`.
132 Notes
133 -----
134 `DataCoordinate` is an ABC, but it provides `staticmethod` factory
135 functions for private concrete implementations that should be sufficient
136 for most purposes. `standardize` is the most flexible and safe of these;
137 the others (`make_empty`, `from_required_values`, and `from_full_values`)
138 are more specialized and perform little or no checking of inputs.
140 Lookups for implied dimensions (those in ``self.dimensions.implied``) are
141 supported if and only if `has_full_values` is `True`. This also sets the
142 keys of the ``mapping`` attribute. This means that `DataCoordinate`
143 equality is not the same as testing for equality on the ``mapping``
144 attribute (instead, it is the same as testing for equality on the
145 ``required`` attribute).
147 See also :ref:`lsst.daf.butler-dimensions_data_ids`
148 """
150 __slots__ = ()
152 _serializedType: ClassVar[type[pydantic.BaseModel]] = SerializedDataCoordinate
154 @staticmethod
155 def standardize(
156 mapping: Mapping[str, DataIdValue] | DataCoordinate | None = None,
157 *,
158 dimensions: Iterable[str] | DimensionGroup | None = None,
159 universe: DimensionUniverse | None = None,
160 defaults: DataCoordinate | None = None,
161 **kwargs: Any,
162 ) -> DataCoordinate:
163 """Standardize the supplied dataId.
165 Adapts an arbitrary mapping and/or additional arguments into a true
166 `DataCoordinate`, or augment an existing one.
168 Parameters
169 ----------
170 mapping : `~collections.abc.Mapping` or `DataCoordinate`, optional
171 An informal data ID that maps dimensions or dimension names to
172 their primary key values (may also be a true `DataCoordinate`).
173 dimensions : `~collections.abc.Iterable` [ `str` ], `DimensionGroup`, \
174 optional
175 The dimensions to be identified by the new `DataCoordinate`. If not
176 provided, will be inferred from the keys of ``mapping`` and
177 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
178 is already a `DataCoordinate`.
179 universe : `DimensionUniverse`
180 All known dimensions and their relationships; used to expand and
181 validate dependencies when ``dimensions`` is not provided.
182 defaults : `DataCoordinate`, optional
183 Default dimension key-value pairs to use when needed. These are
184 never used to infer ``group``, and are ignored if a different value
185 is provided for the same key in ``mapping`` or ``**kwargs``.
186 **kwargs
187 Additional keyword arguments are treated like additional key-value
188 pairs in ``mapping``.
190 Returns
191 -------
192 coordinate : `DataCoordinate`
193 A validated `DataCoordinate` instance.
195 Raises
196 ------
197 TypeError
198 Raised if the set of optional arguments provided is not supported.
199 DimensionNameError
200 Raised if a key-value pair for a required dimension is missing.
201 """
202 universe = universe or getattr(dimensions, "universe", None) or getattr(mapping, "universe", None)
203 if universe is None:
204 raise TypeError("universe must be provided, either directly or via dimensions or mapping.")
205 if dimensions is not None:
206 dimensions = universe.conform(dimensions)
207 new_mapping: dict[str, DataIdValue] = {}
208 if isinstance(mapping, DataCoordinate):
209 if dimensions is None:
210 if not kwargs:
211 # Already standardized to exactly what we want.
212 return mapping
213 elif kwargs.keys().isdisjoint(dimensions.names):
214 # User provided kwargs, but told us not to use them by
215 # passing in dimensions that are disjoint from those kwargs.
216 # This is not necessarily user error - it's a useful pattern
217 # to pass in all of the key-value pairs you have and let the
218 # code here pull out only what it needs.
219 return mapping.subset(dimensions.names)
220 new_mapping.update((name, mapping[name]) for name in mapping.dimensions.required)
221 if mapping.hasFull():
222 new_mapping.update((name, mapping[name]) for name in mapping.dimensions.implied)
223 elif mapping is not None:
224 new_mapping.update(mapping)
225 new_mapping.update(kwargs)
226 if dimensions is None:
227 if defaults is not None:
228 universe = defaults.universe
229 elif universe is None:
230 raise TypeError("universe must be provided if dimensions is not.")
231 dimensions = DimensionGroup(universe, new_mapping.keys())
232 if not dimensions:
233 return DataCoordinate.make_empty(universe)
234 # Some backends cannot handle numpy.int64 type which is a subclass of
235 # numbers.Integral; convert that to int.
236 for k, v in new_mapping.items():
237 if isinstance(v, numbers.Integral): # type: ignore
238 new_mapping[k] = int(v) # type: ignore
239 if defaults is not None:
240 for k, v in defaults.mapping.items():
241 new_mapping.setdefault(k, v)
242 if new_mapping.keys() >= dimensions.names:
243 return DataCoordinate.from_full_values(
244 dimensions, tuple(new_mapping[name] for name in dimensions.data_coordinate_keys)
245 )
246 else:
247 try:
248 values = tuple(new_mapping[name] for name in dimensions.required)
249 except KeyError as err:
250 raise DimensionNameError(
251 f"No value in data ID ({mapping}) for required dimension {err}."
252 ) from err
253 return DataCoordinate.from_required_values(dimensions, values)
255 @property
256 @abstractmethod
257 def mapping(self) -> Mapping[str, DataIdValue]:
258 """A mapping view of the data ID with keys for all dimensions it has
259 values for.
260 """
261 raise NotImplementedError()
263 @property
264 @abstractmethod
265 def required(self) -> Mapping[str, DataIdValue]:
266 """A mapping view of the data ID with keys for just its required
267 dimensions.
268 """
269 raise NotImplementedError()
271 @property
272 @abstractmethod
273 def required_values(self) -> tuple[DataIdValue, ...]:
274 """The required values (only) of this data ID as a tuple.
276 Element order is consistent with `required`.
278 In contexts where all data IDs have the same dimensions, comparing and
279 hashing these tuples can be much faster than comparing the original
280 `DataCoordinate` instances.
281 """
282 raise NotImplementedError()
284 @property
285 def full_values(self) -> tuple[DataIdValue, ...]:
286 """The full values (only) of this data ID as a tuple.
288 Element order is consistent with `DimensionGroup.data_coordinate_keys`,
289 i.e. all required dimensions followed by all implied dimensions.
290 """
291 raise ValueError(f"DataCoordinate {self} has only required values.")
293 @staticmethod
294 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate:
295 """Return an empty `DataCoordinate`.
297 It identifies the null set of dimensions.
299 Parameters
300 ----------
301 universe : `DimensionUniverse`
302 Universe to which this null dimension set belongs.
304 Returns
305 -------
306 dataId : `DataCoordinate`
307 A data ID object that identifies no dimensions. `hasFull` and
308 `hasRecords` are guaranteed to return `True`, because both
309 `full_values` and `records` are just empty mappings.
310 """
311 return DataCoordinate.make_empty(universe)
313 @staticmethod
314 def make_empty(universe: DimensionUniverse) -> DataCoordinate:
315 """Return an empty `DataCoordinate`.
317 It identifies the null set of dimensions.
319 Parameters
320 ----------
321 universe : `DimensionUniverse`
322 Universe to which this null dimension set belongs.
324 Returns
325 -------
326 data_id : `DataCoordinate`
327 A data ID object that identifies no dimensions. `hasFull` and
328 `hasRecords` are guaranteed to return `True`, because both
329 `full_values` and `records` are just empty mappings.
330 """
331 return _ExpandedTupleDataCoordinate(universe.empty, (), {})
333 @staticmethod
334 def from_required_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...]) -> DataCoordinate:
335 """Construct a `DataCoordinate` from required dimension values.
337 This is a low-level interface with at most assertion-level checking of
338 inputs. Most callers should use `standardize` instead.
340 Parameters
341 ----------
342 dimensions : `DimensionGroup`
343 Dimensions this data ID will identify.
344 values : `tuple` [ `int` or `str` ]
345 Tuple of primary key values corresponding to
346 ``dimensions.required``, in that order.
348 Returns
349 -------
350 data_id : `DataCoordinate`
351 A data ID object that identifies the given dimensions.
352 ``dataId.hasFull()`` will return `True` only if
353 ``dimensions.implied`` is empty. ``dataId.hasRecords()`` will
354 return `True` if and only if ``dimensions`` is empty.
355 """
356 assert len(dimensions.required) == len(values), (
357 f"Inconsistency between dimensions {dimensions.required} and required values {values}."
358 )
359 if not dimensions:
360 return DataCoordinate.make_empty(dimensions.universe)
361 if not dimensions.implied:
362 return _FullTupleDataCoordinate(dimensions, values)
363 return _RequiredTupleDataCoordinate(dimensions, values)
365 @staticmethod
366 def from_full_values(dimensions: DimensionGroup, values: tuple[DataIdValue, ...]) -> DataCoordinate:
367 """Construct a `DataCoordinate` from all dimension values.
369 This is a low-level interface with at most assertion-level checking of
370 inputs. Most callers should use `standardize` instead.
372 Parameters
373 ----------
374 dimensions : `DimensionGroup`
375 Dimensions this data ID will identify.
376 values : `tuple` [ `int` or `str` ]
377 Tuple of primary key values corresponding to
378 ``itertools.chain(dimensions.required, dimensions.implied)``, in
379 that order. Note that this is _not_ the same order as
380 ``dimensions.names``, though these contain the same elements.
382 Returns
383 -------
384 data_id : `DataCoordinate`
385 A data ID object that identifies the given dimensions.
386 ``dataId.hasFull()`` will always return `True`.
387 ``dataId.hasRecords()`` will only return `True` if ``dimensions``
388 is empty.
389 """
390 assert len(dimensions) == len(values), (
391 f"Inconsistency between dimensions {dimensions.data_coordinate_keys} and full values {values}."
392 )
393 if not dimensions:
394 return DataCoordinate.make_empty(dimensions.universe)
395 return _FullTupleDataCoordinate(dimensions, values)
397 def __bool__(self) -> bool:
398 return bool(self.dimensions)
400 def __hash__(self) -> int:
401 return hash((self.dimensions,) + self.required_values)
403 def __eq__(self, other: Any) -> bool:
404 if not isinstance(other, DataCoordinate):
405 other = DataCoordinate.standardize(other, universe=self.universe)
406 return self.dimensions == other.dimensions and self.required_values == other.required_values
408 @abstractmethod
409 def __getitem__(self, key: str) -> DataIdValue:
410 raise NotImplementedError()
412 def __contains__(self, key: str) -> bool:
413 try:
414 self.__getitem__(key)
415 return True
416 except KeyError:
417 return False
419 @overload
420 def get(self, key: str) -> DataIdValue | None: ... 420 ↛ exitline 420 didn't return from function 'get' because
422 @overload
423 def get(self, key: str, default: int) -> int: ... 423 ↛ exitline 423 didn't return from function 'get' because
425 @overload
426 def get(self, key: str, default: str) -> str: ... 426 ↛ exitline 426 didn't return from function 'get' because
428 @overload
429 def get(self, key: str, default: DataIdValue | None) -> DataIdValue | None: ... 429 ↛ exitline 429 didn't return from function 'get' because
431 def get(self, key: str, default: DataIdValue | None = None) -> DataIdValue | None:
432 try:
433 return self.__getitem__(key)
434 except KeyError:
435 return default
437 def __repr__(self) -> str:
438 # We can't make repr yield something that could be exec'd here without
439 # printing out the whole DimensionUniverse.
440 return str(self.mapping)
442 def __lt__(self, other: Any) -> bool:
443 if not isinstance(other, DataCoordinate):
444 return NotImplemented
445 # Unlike repr() we only use required keys here to ensure that __eq__
446 # can not be true simultaneously with __lt__ being true.
447 return self.required_values < other.required_values
449 @abstractmethod
450 def subset(self, dimensions: DimensionGroup | Iterable[str]) -> DataCoordinate:
451 """Return a `DataCoordinate` whose diensions are a subset of
452 ``self.dimensions``.
454 Parameters
455 ----------
456 dimensions : `DimensionGroup` or `~collections.abc.Iterable` [ `str` ]
457 The dimensions identified by the returned `DataCoordinate`.
459 Returns
460 -------
461 coordinate : `DataCoordinate`
462 A `DataCoordinate` instance that identifies only the given
463 dimensions. May be ``self`` if ``dimensions == self.dimensions``.
465 Raises
466 ------
467 KeyError
468 Raised if the primary key value for one or more required dimensions
469 is unknown. This may happen even if the required subset of the new
470 dimensions are not a subset of the dimensions actually known by
471 this data ID.. As an example, consider trying to go from a data ID
472 with dimensions {instrument, physical_filter, band} to just
473 {instrument, band}; band is implied by physical_filter and hence
474 would have no value in the original data ID if ``self.hasFull()``
475 is `False`.
477 Notes
478 -----
479 If `hasFull` and `hasRecords` return `True` on ``self``, they will
480 return `True` (respectively) on the returned `DataCoordinate` as well.
481 The converse does not hold.
482 """
483 raise NotImplementedError()
485 @abstractmethod
486 def union(self, other: DataCoordinate) -> DataCoordinate:
487 """Combine two data IDs.
489 Yields a new one that identifies all dimensions that either of them
490 identify.
492 Parameters
493 ----------
494 other : `DataCoordinate`
495 Data ID to combine with ``self``.
497 Returns
498 -------
499 unioned : `DataCoordinate`
500 A `DataCoordinate` instance that satisfies
501 ``unioned.dimensions == self.dimensions.union(other.dimensions)``.
502 Will preserve ``hasFull`` and ``hasRecords`` whenever possible.
504 Notes
505 -----
506 No checking for consistency is performed on values for keys that
507 ``self`` and ``other`` have in common, and which value is included in
508 the returned data ID is not specified.
509 """
510 raise NotImplementedError()
512 @abstractmethod
513 def expanded(self, records: Mapping[str, DimensionRecord | None]) -> DataCoordinate:
514 """Return a `DataCoordinate` that holds the given records.
516 Guarantees that `hasRecords` returns `True`.
518 This is a low-level interface with at most assertion-level checking of
519 inputs. Most callers should use `Registry.expandDataId` instead.
521 Parameters
522 ----------
523 records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or \
524 `None` ]
525 A`~collections.abc.Mapping` with `str` (dimension element name)
526 keys and `DimensionRecord` values. Keys must cover all elements in
527 ``self.dimensions.elements``. Values may be `None`, but only to
528 reflect actual NULL values in the database, not just records that
529 have not been fetched.
530 """
531 raise NotImplementedError()
533 @property
534 def universe(self) -> DimensionUniverse:
535 """Universe that defines all known compatible dimensions.
537 The universe will be compatible with this coordinate
538 (`DimensionUniverse`).
539 """
540 return self.dimensions.universe
542 @property
543 @abstractmethod
544 def dimensions(self) -> DimensionGroup:
545 """Dimensions identified by this data ID (`DimensionGroup`).
547 Note that values are only required to be present for dimensions in
548 ``self.dimensions.required``; all others may be retrieved (from a
549 `Registry`) given these.
550 """
551 raise NotImplementedError()
553 @abstractmethod
554 def hasFull(self) -> bool:
555 """Whether this data ID contains implied and required values.
557 Returns
558 -------
559 state : `bool`
560 If `True`, ``__getitem__``, `get`, and ``__contains__`` (but not
561 ``keys``!) will act as though the mapping includes key-value pairs
562 for implied dimensions, and the ``full`` property may be used. If
563 `False`, these operations only include key-value pairs for required
564 dimensions, and accessing ``full`` is an error. Always `True` if
565 there are no implied dimensions.
566 """
567 raise NotImplementedError()
569 @abstractmethod
570 def hasRecords(self) -> bool:
571 """Whether this data ID contains records.
573 These are the records for all of the dimension elements it identifies.
575 Returns
576 -------
577 state : `bool`
578 If `True`, the following attributes may be accessed:
580 - `records`
581 - `region`
582 - `timespan`
584 If `False`, accessing any of these is considered a logic error.
585 """
586 raise NotImplementedError()
588 @property
589 def records(self) -> Mapping[str, DimensionRecord | None]:
590 """A mapping that contains `DimensionRecord` objects for all
591 elements identified by this data ID.
593 Notes
594 -----
595 The values of this mapping may be `None` if and only if there is no
596 record for that element with these dimensions in the database (which
597 means some foreign key field must have a NULL value).
599 Accessing this attribute if `hasRecords` returns `False` is a logic
600 error that may raise an exception of unspecified type either
601 immediately or when the returned mapping is used, depending on the
602 implementation and whether assertions are enabled.
603 """
604 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True."
605 return _DataCoordinateRecordsView(self)
607 @abstractmethod
608 def _record(self, name: str) -> DimensionRecord | None:
609 """Protected implementation hook that backs the ``records`` attribute.
611 Parameters
612 ----------
613 name : `str`
614 The name of a `DimensionElement`, guaranteed to be in
615 ``self.dimensions.elements``.
617 Returns
618 -------
619 record : `DimensionRecord` or `None`
620 The dimension record for the given element identified by this
621 data ID, or `None` if there is no such record.
622 """
623 raise NotImplementedError()
625 @property
626 def region(self) -> Region | None:
627 """Spatial region associated with this data ID.
629 (`lsst.sphgeom.Region` or `None`).
631 This is `None` if and only if ``self.dimensions.spatial`` is empty.
633 Accessing this attribute if `hasRecords` returns `False` is a logic
634 error that may or may not raise an exception, depending on the
635 implementation and whether assertions are enabled.
636 """
637 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True."
638 regions = []
639 for family in self.dimensions.spatial:
640 element = family.choose(self.dimensions)
641 record = self._record(element.name)
642 if record is None or record.region is None:
643 return None
644 else:
645 regions.append(record.region)
646 return _intersectRegions(*regions)
648 @property
649 def timespan(self) -> Timespan | None:
650 """Temporal interval associated with this data ID.
652 (`Timespan` or `None`).
654 This is `None` if and only if ``self.dimensions.temporal`` is empty.
656 Accessing this attribute if `hasRecords` returns `False` is a logic
657 error that may or may not raise an exception, depending on the
658 implementation and whether assertions are enabled.
659 """
660 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True."
661 timespans = []
662 for family in self.dimensions.temporal:
663 element = family.choose(self.dimensions)
664 record = self._record(element.name)
665 # DimensionRecord subclasses for temporal elements always have
666 # .timespan, but they're dynamic so this can't be type-checked.
667 if record is None or record.timespan is None:
668 return None
669 else:
670 timespans.append(record.timespan)
671 if not timespans:
672 return None
673 elif len(timespans) == 1:
674 return timespans[0]
675 else:
676 return Timespan.intersection(*timespans)
678 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate:
679 """Convert this class to a simple python type.
681 This is suitable for serialization.
683 Parameters
684 ----------
685 minimal : `bool`, optional
686 Use minimal serialization. If set the records will not be attached.
688 Returns
689 -------
690 simple : `SerializedDataCoordinate`
691 The object converted to simple form.
692 """
693 # Convert to a dict form
694 records: dict[str, SerializedDimensionRecord] | None
695 if not minimal and self.hasRecords():
696 records = {
697 k: v.to_simple() for k in self.dimensions.elements if (v := self.records[k]) is not None
698 }
699 else:
700 records = None
702 return SerializedDataCoordinate(dataId=dict(self.mapping), records=records)
704 @classmethod
705 def from_simple(
706 cls,
707 simple: SerializedDataCoordinate,
708 universe: DimensionUniverse | None = None,
709 registry: Registry | None = None,
710 ) -> DataCoordinate:
711 """Construct a new object from the simplified form.
713 The data is assumed to be of the form returned from the `to_simple`
714 method.
716 Parameters
717 ----------
718 simple : `dict` of [`str`, `typing.Any`]
719 The `dict` returned by `to_simple()`.
720 universe : `DimensionUniverse`
721 Object that manages all known dimensions.
722 registry : `lsst.daf.butler.Registry`, optional
723 Registry from which a universe can be extracted. Can be `None`
724 if universe is provided explicitly.
726 Returns
727 -------
728 dataId : `DataCoordinate`
729 Newly-constructed object.
730 """
731 key = (frozenset(simple.dataId.items()), simple.records is not None)
732 cache = PersistenceContextVars.dataCoordinates.get()
733 if cache is not None and (result := cache.get(key)) is not None:
734 return result
735 if universe is None and registry is None:
736 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
737 if universe is None and registry is not None:
738 universe = registry.dimensions
739 if universe is None:
740 # this is for mypy
741 raise ValueError("Unable to determine a usable universe")
743 dataId = cls.standardize(simple.dataId, universe=universe)
744 if simple.records:
745 dataId = dataId.expanded(
746 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
747 )
748 if cache is not None:
749 cache[key] = dataId
750 return dataId
752 to_json = to_json_pydantic
753 from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic))
755 @property
756 def dataId(self) -> Self:
757 """Return this `DataCoordinate` instance, unmodified."""
758 return self
761DataId = DataCoordinate | Mapping[str, Any]
762"""A type-annotation alias for signatures that accept both informal data ID
763dictionaries and validated `DataCoordinate` instances.
764"""
767class _DataCoordinateRecordsView(Mapping[str, DimensionRecord | None]):
768 """View class for `DataCoordinate.records`.
770 Provides the default implementation for
771 `DataCoordinate.records`.
773 Parameters
774 ----------
775 target : `DataCoordinate`
776 The `DataCoordinate` instance this object provides a view of.
777 """
779 def __init__(self, target: DataCoordinate):
780 self._target = target
782 __slots__ = ("_target",)
784 def __repr__(self) -> str:
785 terms = [f"{d}: {self[d]!r}" for d in self._target.dimensions.elements]
786 return "{{{}}}".format(", ".join(terms))
788 def __str__(self) -> str:
789 return "\n".join(str(v) for v in self.values())
791 def __getitem__(self, key: str) -> DimensionRecord | None:
792 return self._target._record(key)
794 def __iter__(self) -> Iterator[str]:
795 return iter(self._target.dimensions.elements)
797 def __len__(self) -> int:
798 return len(self._target.dimensions.elements)
801class _BasicTupleDataCoordinate(DataCoordinate):
802 """Intermediate base class for the standard implementation of
803 `DataCoordinate`.
805 This class should only be accessed outside this module via the
806 `DataCoordinate` interface, and should only be constructed via the static
807 methods there.
809 Parameters
810 ----------
811 dimensions : `DimensionGroup`
812 The dimensions to be identified.
813 values : `tuple` [ `int` or `str` ]
814 Data ID values, ordered to match
815 ``dimensions.data_coordinate_keys``. May include values for just
816 required dimensions (which always come first) or all dimensions
817 (concrete subclasses implementations will care which).
818 """
820 def __init__(self, dimensions: DimensionGroup, values: tuple[DataIdValue, ...]):
821 self._dimensions = dimensions
822 self._values = values
824 __slots__ = ("_dimensions", "_values")
826 @property
827 def dimensions(self) -> DimensionGroup:
828 # Docstring inherited from DataCoordinate.
829 return self._dimensions
831 @property
832 def required(self) -> Mapping[str, DataIdValue]:
833 # Docstring inherited from DataCoordinate.
834 return _DataCoordinateRequiredMappingView(self)
836 def __getitem__(self, key: str) -> DataIdValue:
837 # Docstring inherited from DataCoordinate.
839 index = self._dimensions._data_coordinate_indices[key]
840 try:
841 return self._values[index]
842 except IndexError:
843 # Caller asked for an implied dimension, but this object only has
844 # values for the required ones.
845 raise KeyError(key) from None
847 def hasRecords(self) -> bool:
848 # Docstring inherited from DataCoordinate.
849 return False
851 def _record(self, name: str) -> DimensionRecord | None:
852 # Docstring inherited from DataCoordinate.
853 raise AssertionError()
855 def __getattr__(self, name: str) -> Any:
856 if name in self.dimensions.elements:
857 raise AttributeError(
858 f"Dimension record attribute {name!r} is only available on expanded DataCoordinates."
859 )
860 raise AttributeError(name)
863class _DataCoordinateRequiredMappingView(Mapping[str, DataIdValue]):
864 """A DataCoordinate Mapping view class whose keys are just the required
865 dimensions.
866 """
868 def __init__(self, target: DataCoordinate):
869 self._target = target
871 __slots__ = ("_target",)
873 def __getitem__(self, key: str) -> DataIdValue:
874 if key not in self._target.dimensions.required:
875 raise KeyError(key)
876 return self._target[key]
878 def __len__(self) -> int:
879 return len(self._target.dimensions.required)
881 def __iter__(self) -> Iterator[str]:
882 return iter(self._target.dimensions.required)
884 def __repr__(self) -> str:
885 return f"{{{', '.join(f'{k}: {v!r}' for k, v in self.items())}}}"
888class _DataCoordinateFullMappingView(Mapping[str, DataIdValue]):
889 """A DataCoordinate Mapping view class whose keys are all dimensions."""
891 def __init__(self, target: DataCoordinate):
892 self._target = target
894 __slots__ = ("_target",)
896 def __getitem__(self, key: str) -> DataIdValue:
897 return self._target[key]
899 def __len__(self) -> int:
900 return len(self._target.dimensions)
902 def __iter__(self) -> Iterator[str]:
903 return iter(self._target.dimensions.data_coordinate_keys)
905 def __repr__(self) -> str:
906 return f"{{{', '.join(f'{k}: {v!r}' for k, v in self.items())}}}"
909class _RequiredTupleDataCoordinate(_BasicTupleDataCoordinate):
910 """A `DataCoordinate` implementation that has values for required
911 dimensions only, when implied dimensions already exist.
913 Note that `_FullTupleDataCoordinate` should be used if there are no
914 implied dimensions.
916 This class should only be accessed outside this module via the
917 `DataCoordinate` interface, and should only be constructed via calls to
918 `DataCoordinate.from_full_values`.
919 """
921 __slots__ = ()
923 @property
924 def mapping(self) -> Mapping[str, DataIdValue]:
925 # Docstring inherited from DataCoordinate.
926 return _DataCoordinateRequiredMappingView(self)
928 @property
929 def required_values(self) -> tuple[DataIdValue, ...]:
930 # Docstring inherited from DataCoordinate.
931 return self._values
933 def subset(self, dimensions: DimensionGroup | Iterable[str]) -> DataCoordinate:
934 # Docstring inherited from DataCoordinate.
935 dimensions = self.universe.conform(dimensions)
936 if self._dimensions == dimensions:
937 return self
938 elif self._dimensions.required >= dimensions.names:
939 return DataCoordinate.from_full_values(
940 dimensions,
941 tuple(self[k] for k in dimensions.data_coordinate_keys),
942 )
943 else:
944 return DataCoordinate.from_required_values(
945 dimensions, tuple(self[k] for k in dimensions.required)
946 )
948 def union(self, other: DataCoordinate) -> DataCoordinate:
949 # Docstring inherited from DataCoordinate.
950 dimensions = self.dimensions.union(other.dimensions)
951 # See if the other one is already what we want to return. We don't
952 # shortcut-return 'self' because `other` might have full values or
953 # even records, and we want to return the more complete data ID.
954 if other.dimensions == dimensions:
955 return other
956 # General case with actual merging of dictionaries.
957 values = dict(self.mapping)
958 values.update(other.mapping)
959 return DataCoordinate.standardize(values, dimensions=dimensions)
961 def expanded(self, records: Mapping[str, DimensionRecord | None]) -> DataCoordinate:
962 # Docstring inherited from DataCoordinate
963 # Extract a complete values tuple from the attributes of the given
964 # records. It's possible for these to be inconsistent with
965 # self._values (which is a serious problem, of course), but we've
966 # documented this as a no-checking API.
967 values = self._values + tuple(
968 getattr(records[d], self.universe.dimensions[d].primaryKey.name) for d in self._dimensions.implied
969 )
970 return _ExpandedTupleDataCoordinate(self._dimensions, values, records)
972 def hasFull(self) -> bool:
973 # Docstring inherited from DataCoordinate.
974 return False
976 def __reduce__(self) -> tuple[Any, ...]:
977 return (_RequiredTupleDataCoordinate, (self._dimensions, self._values))
980class _FullTupleDataCoordinate(_BasicTupleDataCoordinate):
981 """A `DataCoordinate` implementation that has values for all dimensions.
983 This class should only be accessed outside this module via the
984 `DataCoordinate` interface, and should only be constructed via calls to
985 `DataCoordinate.from_full_values`.
986 """
988 __slots__ = ()
990 @property
991 def mapping(self) -> Mapping[str, DataIdValue]:
992 # Docstring inherited from DataCoordinate.
993 return _DataCoordinateFullMappingView(self)
995 @property
996 def required_values(self) -> tuple[DataIdValue, ...]:
997 # Docstring inherited from DataCoordinate.
998 return self._values[: len(self._dimensions.required)]
1000 @property
1001 def full_values(self) -> tuple[DataIdValue, ...]:
1002 # Docstring inherited from DataCoordinate.
1003 return self._values
1005 def subset(self, dimensions: DimensionGroup | Iterable[str]) -> DataCoordinate:
1006 # Docstring inherited from DataCoordinate.
1007 dimensions = self.universe.conform(dimensions)
1008 if self._dimensions == dimensions:
1009 return self
1011 try:
1012 values = tuple(self[k] for k in dimensions.data_coordinate_keys)
1013 except KeyError as e:
1014 raise DimensionNameError(f"Data ID is missing value for dimension {str(e)}.")
1015 return DataCoordinate.from_full_values(dimensions, values)
1017 def union(self, other: DataCoordinate) -> DataCoordinate:
1018 # Docstring inherited from DataCoordinate.
1019 dimensions = self.dimensions.union(other.dimensions)
1020 # See if one or both input data IDs is already what we want to return;
1021 # if so, return the most complete one we have.
1022 if other.dimensions == dimensions and other.hasRecords():
1023 return other
1024 elif self.dimensions == dimensions and not other.hasRecords():
1025 return self
1026 # General case with actual merging of dictionaries.
1027 values = dict(self.mapping)
1028 values.update(other.mapping)
1029 return DataCoordinate.standardize(values, dimensions=dimensions)
1031 def expanded(self, records: Mapping[str, DimensionRecord | None]) -> DataCoordinate:
1032 # Docstring inherited from DataCoordinate
1033 return _ExpandedTupleDataCoordinate(self._dimensions, self._values, records)
1035 def hasFull(self) -> bool:
1036 # Docstring inherited from DataCoordinate.
1037 return True
1039 def __reduce__(self) -> tuple[Any, ...]:
1040 return (_FullTupleDataCoordinate, (self._dimensions, self._values))
1043class _ExpandedTupleDataCoordinate(_FullTupleDataCoordinate):
1044 """A `DataCoordinate` implementation that directly holds `DimensionRecord`
1045 objects relevant to it.
1047 This class should only be accessed outside this module via the
1048 `DataCoordinate` interface, and should only be constructed via calls to
1049 `DataCoordinate.expanded`.
1051 Parameters
1052 ----------
1053 dimensions : `DimensionGroup`
1054 The dimensions to be identified.
1055 values : `tuple` [ `int` or `str` ]
1056 Data ID values, ordered to match
1057 ``dimensions._data_coordinate_indices``. Just include values for all
1058 dimensions.
1059 records : `~collections.abc.Mapping` [ `str`, `DimensionRecord` or `None` ]
1060 A `~collections.abc.Mapping` with `str` (dimension element name) keys
1061 and `DimensionRecord` values. Keys must cover all elements in
1062 ``self.dimensions.elements``. Values may be `None`, but only to
1063 reflect actual NULL values in the database, not just records that have
1064 not been fetched.
1065 """
1067 def __init__(
1068 self,
1069 dimensions: DimensionGroup,
1070 values: tuple[DataIdValue, ...],
1071 records: Mapping[str, DimensionRecord | None],
1072 ):
1073 super().__init__(dimensions, values)
1074 assert super().hasFull(), "This implementation requires full dimension records."
1075 self._records = records
1077 __slots__ = ("_records",)
1079 def subset(self, dimensions: DimensionGroup | Iterable[str]) -> DataCoordinate:
1080 # Docstring inherited from DataCoordinate.
1081 return super().subset(dimensions).expanded(self._records)
1083 def expanded(self, records: Mapping[str, DimensionRecord | None]) -> DataCoordinate:
1084 # Docstring inherited from DataCoordinate.
1085 return self
1087 def union(self, other: DataCoordinate) -> DataCoordinate:
1088 # Docstring inherited from DataCoordinate.
1089 result = super().union(other)
1090 if not result.hasRecords() and other.hasRecords():
1091 records = {e: self._record(e) for e in self.dimensions.elements} | {
1092 e: other._record(e) for e in other.dimensions.elements
1093 }
1094 if records.keys() >= result.dimensions.elements:
1095 return result.expanded(records)
1096 return result
1098 def hasRecords(self) -> bool:
1099 # Docstring inherited from DataCoordinate.
1100 return True
1102 def _record(self, name: str) -> DimensionRecord | None:
1103 # Docstring inherited from DataCoordinate.
1104 return self._records[name]
1106 def __reduce__(self) -> tuple[Any, ...]:
1107 return (_ExpandedTupleDataCoordinate, (self._dimensions, self._values, self._records))
1109 def __getattr__(self, name: str) -> Any:
1110 try:
1111 return self._record(name)
1112 except KeyError:
1113 raise AttributeError(name) from None
1115 def __dir__(self) -> list[str]:
1116 result = list(super().__dir__())
1117 result.extend(self.dimensions.elements)
1118 return result