Coverage for python / lsst / daf / butler / dimensions / _elements.py: 56%
186 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "Dimension",
32 "DimensionCombination",
33 "DimensionElement",
34)
36from abc import abstractmethod
37from collections.abc import Callable
38from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Self, TypeAlias, Union, cast
40import pydantic
41from pydantic_core import core_schema
43from lsst.utils.classes import cached_getter
45from .. import arrow_utils, column_spec, ddl, pydantic_utils
46from .._named import NamedValueAbstractSet, NamedValueSet
47from .._topology import TopologicalRelationshipEndpoint
48from ..json import from_json_generic, to_json_generic
50if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
51 from ..registry import Registry
52 from ._governor import GovernorDimension
53 from ._group import DimensionGroup
54 from ._records import DimensionRecord
55 from ._schema import DimensionRecordSchema
56 from ._universe import DimensionUniverse
58KeyColumnSpec: TypeAlias = Annotated[
59 Union[
60 column_spec.IntColumnSpec,
61 column_spec.StringColumnSpec,
62 column_spec.HashColumnSpec,
63 ],
64 pydantic.Field(discriminator="type"),
65]
67MetadataColumnSpec: TypeAlias = Annotated[
68 Union[
69 column_spec.IntColumnSpec,
70 column_spec.StringColumnSpec,
71 column_spec.FloatColumnSpec,
72 column_spec.HashColumnSpec,
73 column_spec.BoolColumnSpec,
74 ],
75 pydantic.Field(discriminator="type"),
76]
79class DimensionElement(TopologicalRelationshipEndpoint):
80 """A label and/or metadata in the dimensions system.
82 A named data-organization concept that defines a label and/or metadata
83 in the dimensions system.
85 A `DimensionElement` instance typically corresponds to a _logical_ table in
86 the `Registry`: either an actual database table or a way of generating rows
87 on-the-fly that can similarly participate in queries. The rows in that
88 table are represented by instances of a `DimensionRecord` subclass. Most
89 `DimensionElement` instances are instances of its `Dimension` subclass,
90 which is used for elements that can be used as data ID keys.
92 Notes
93 -----
94 `DimensionElement` instances should always be constructed by and retrieved
95 from a `DimensionUniverse`. They are immutable after they are fully
96 constructed, and should never be copied.
98 Pickling a `DimensionElement` just records its name and universe;
99 unpickling one actually just looks up the element via the singleton
100 dictionary of all universes. This allows pickle to be used to transfer
101 elements between processes, but only when each process initializes its own
102 instance of the same `DimensionUniverse`.
103 """
105 def __str__(self) -> str:
106 return self.name
108 def __repr__(self) -> str:
109 return f"{type(self).__name__}({self.name})"
111 def __eq__(self, other: Any) -> bool:
112 try:
113 return self.name == other.name
114 except AttributeError:
115 # TODO: try removing this fallback; it's not really consistent with
116 # base class intent, and it could be confusing
117 return self.name == other
119 def __hash__(self) -> int:
120 return hash(self.name)
122 # TODO: try removing comparison operators; DimensionUniverse.sorted should
123 # be adequate.
125 def __lt__(self, other: DimensionElement) -> bool:
126 try:
127 return self.universe.getElementIndex(self.name) < self.universe.getElementIndex(other.name)
128 except KeyError:
129 return NotImplemented
131 def __le__(self, other: DimensionElement) -> bool:
132 try:
133 return self.universe.getElementIndex(self.name) <= self.universe.getElementIndex(other.name)
134 except KeyError:
135 return NotImplemented
137 def __gt__(self, other: DimensionElement) -> bool:
138 try:
139 return self.universe.getElementIndex(self.name) > self.universe.getElementIndex(other.name)
140 except KeyError:
141 return NotImplemented
143 def __ge__(self, other: DimensionElement) -> bool:
144 try:
145 return self.universe.getElementIndex(self.name) >= self.universe.getElementIndex(other.name)
146 except KeyError:
147 return NotImplemented
149 @classmethod
150 def _unpickle(cls, universe: DimensionUniverse, name: str) -> DimensionElement:
151 """Callable used for unpickling.
153 For internal use only.
154 """
155 return universe[name]
157 def __reduce__(self) -> tuple:
158 return (self._unpickle, (self.universe, self.name))
160 def __deepcopy__(self, memo: dict) -> DimensionElement:
161 # DimensionElement is recursively immutable; see note in @immutable
162 # decorator.
163 return self
165 def to_simple(self, minimal: bool = False) -> str:
166 """Convert this class to a simple python type.
168 This is suitable for serialization.
170 Parameters
171 ----------
172 minimal : `bool`, optional
173 Use minimal serialization. Has no effect on for this class.
175 Returns
176 -------
177 simple : `str`
178 The object converted to a single string.
179 """
180 return self.name
182 @classmethod
183 def from_simple(
184 cls, simple: str, universe: DimensionUniverse | None = None, registry: Registry | None = None
185 ) -> DimensionElement:
186 """Construct a new object from the simplified form.
188 Usually the data is returned from the `to_simple` method.
190 Parameters
191 ----------
192 simple : `str`
193 The value returned by `to_simple()`.
194 universe : `DimensionUniverse`
195 The special graph of all known dimensions.
196 registry : `lsst.daf.butler.Registry`, optional
197 Registry from which a universe can be extracted. Can be `None`
198 if universe is provided explicitly.
200 Returns
201 -------
202 dataId : `DimensionElement`
203 Newly-constructed object.
204 """
205 if universe is None and registry is None:
206 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
207 if universe is None and registry is not None:
208 universe = registry.dimensions
209 if universe is None:
210 # this is for mypy
211 raise ValueError("Unable to determine a usable universe")
213 return universe[simple]
215 to_json = to_json_generic
216 from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_generic))
218 def hasTable(self) -> bool:
219 """Indicate if this element is associated with a table.
221 Return `True` if this element is associated with a table
222 (even if that table "belongs" to another element).
223 """
224 return self.has_own_table or self.implied_union_target is not None
226 universe: DimensionUniverse
227 """The universe of all compatible dimensions with which this element is
228 associated (`DimensionUniverse`).
229 """
231 @property
232 @cached_getter
233 def governor(self) -> GovernorDimension | None:
234 """Return the governor dimension.
236 This is the `GovernorDimension` that is a required dependency of this
237 element, or `None` if there is no such dimension (`GovernorDimension`
238 or `None`).
239 """
240 if len(self.minimal_group.governors) == 1:
241 (result,) = self.minimal_group.governors
242 return cast("GovernorDimension", self.universe[result])
243 elif len(self.minimal_group.governors) > 1:
244 raise RuntimeError(
245 f"Dimension element {self.name} has multiple governors: {self.minimal_group.governors}."
246 )
247 else:
248 return None
250 @property
251 @abstractmethod
252 def required(self) -> NamedValueAbstractSet[Dimension]:
253 """Return the required dimensions.
255 Dimensions that are necessary to uniquely identify a record of this
256 dimension element.
258 For elements with a database representation, these dimension are
259 exactly those used to form the (possibly compound) primary key, and all
260 dimensions here that are not ``self`` are also used to form foreign
261 keys.
263 For `Dimension` instances, this should be exactly the same as
264 ``graph.required``, but that may not be true for `DimensionElement`
265 instances in general. When they differ, there are multiple
266 combinations of dimensions that uniquely identify this element, but
267 this one is more direct.
268 """
269 raise NotImplementedError()
271 @property
272 @abstractmethod
273 def implied(self) -> NamedValueAbstractSet[Dimension]:
274 """Return the implied dimensions.
276 Other dimensions that are uniquely identified directly by a record
277 of this dimension element.
279 For elements with a database representation, these are exactly the
280 dimensions used to form foreign key constraints whose fields are not
281 (wholly) also part of the primary key.
283 Unlike ``self.graph.implied``, this set is not expanded recursively.
284 """
285 raise NotImplementedError()
287 @property
288 @cached_getter
289 def dimensions(self) -> NamedValueAbstractSet[Dimension]:
290 """Return all dimensions.
292 The union of `required` and `implied`, with all elements in
293 `required` before any elements in `implied`.
295 This differs from ``self.graph.dimensions`` both in order and in
296 content:
298 - as in ``self.implied``, implied dimensions are not expanded
299 recursively here;
300 - implied dimensions appear after required dimensions here, instead of
301 being topologically ordered.
303 As a result, this set is ordered consistently with
304 ``self.RecordClass.fields``.
305 """
306 return NamedValueSet(list(self.required) + list(self.implied)).freeze()
308 @property
309 @cached_getter
310 def minimal_group(self) -> DimensionGroup:
311 """Return minimal dimension group that includes this element.
313 ``self.minimal_group.required`` includes all dimensions whose primary
314 key values are sufficient (often necessary) to uniquely identify
315 ``self`` (including ``self`` if ``isinstance(self, Dimension)``.
316 ``self.minimal_group.implied`` includes all dimensions also identified
317 (possibly recursively) by this set.
318 """
319 return self.universe.conform(self.dimensions.names)
321 @property
322 @cached_getter
323 def RecordClass(self) -> type[DimensionRecord]:
324 """Return the record subclass for this element.
326 The `DimensionRecord` subclass used to hold records for this element
327 (`type`).
329 Because `DimensionRecord` subclasses are generated dynamically, this
330 type cannot be imported directly and hence can only be obtained from
331 this attribute.
332 """
333 from ._records import _subclassDimensionRecord
335 return _subclassDimensionRecord(self)
337 @property
338 def alternate_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]:
339 """Additional unique key fields for this dimension element that are not
340 the primary key (`NamedValueAbstractSet` of `KeyColumnSpec`).
342 This is always empty for elements that are not dimensions.
344 If this dimension has required dependencies, the keys of those
345 dimensions are also included in the unique constraints defined for
346 these alternate keys.
347 """
348 return NamedValueSet().freeze()
350 @property
351 @abstractmethod
352 def metadata_columns(self) -> NamedValueAbstractSet[MetadataColumnSpec]:
353 """Additional metadata fields included in this element's table.
355 (`NamedValueSet` of `MetadataColumnSpec`).
356 """
357 raise NotImplementedError()
359 @property
360 @cached_getter
361 def metadata(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
362 """Additional metadata fields included in this element's table.
364 (`NamedValueSet` of `FieldSpec`).
365 """
366 return NamedValueSet([column_spec.to_sql_spec() for column_spec in self.metadata_columns]).freeze()
368 @property
369 def viewOf(self) -> str | None:
370 """Name of another table this element's records are drawn from.
372 (`str` or `None`).
373 """
374 return self.implied_union_target.name if self.implied_union_target is not None else None
376 @property
377 def alwaysJoin(self) -> bool:
378 """Indicate if the element should always be included.
380 If `True`, always include this element in any query or data ID in
381 which its ``required`` dimensions appear, because it defines a
382 relationship between those dimensions that must always be satisfied.
383 """
384 return False
386 @property
387 def has_own_table(self) -> bool:
388 """Whether this element should have its own table in the database."""
389 return self.implied_union_target is None
391 @property
392 def implied_union_target(self) -> DimensionElement | None:
393 """If not `None`, another element whose implied values for this element
394 form the set of allowable values.
396 For example, in the default dimension universe, the allowed values for
397 ``band`` is the union of all ``band`` values in the ``physical_filter``
398 table, so the `implied_union_target` for ``band`` is
399 ``physical_filter``.
400 """
401 return None
403 @property
404 def defines_relationships(self) -> bool:
405 """Whether this element's records define one or more relationships that
406 must be satisfied in rows over dimensions that include it.
407 """
408 return bool(self.implied)
410 @property
411 def is_cached(self) -> bool:
412 """Whether this element's records should be aggressively cached,
413 because they are small in number and rarely inserted.
414 """
415 return False
417 @property
418 @abstractmethod
419 def populated_by(self) -> Dimension | None:
420 """The dimension that this element's records are always inserted,
421 exported, and imported alongside.
423 Notes
424 -----
425 When this is `None` (as it will be, at least at first, for any data
426 repositories created before this attribute was added), records for
427 this element will often need to be exported manually when datasets
428 associated with some other related dimension are exported, in order for
429 the post-import data repository to function as expected.
430 """
431 raise NotImplementedError()
433 @property
434 @cached_getter
435 def schema(self) -> DimensionRecordSchema:
436 """A description of the columns in this element's records and (at least
437 conceptual) table.
438 """
439 from ._schema import DimensionRecordSchema
441 return DimensionRecordSchema(self)
443 @property
444 @abstractmethod
445 def documentation(self) -> str:
446 """Extended description of this dimension element."""
447 raise NotImplementedError()
449 @classmethod
450 def _validate(cls, data: Any, info: pydantic.ValidationInfo) -> DimensionElement:
451 """Pydantic validator (deserializer) for `DimensionElement`.
453 This satisfies the `pydantic.WithInfoPlainValidatorFunction` signature.
454 """
455 universe = pydantic_utils.get_universe_from_context(info.context)
456 return universe[data]
458 def _serialize(self) -> str:
459 """Pydantic serializer for `DimensionElement`.
461 This satisfies the `pydantic.PlainSerializerFunction` signature.
462 """
463 return self.name
465 @classmethod
466 def __get_pydantic_core_schema__(
467 cls, source_type: Any, handler: pydantic.GetCoreSchemaHandler
468 ) -> core_schema.CoreSchema:
469 # This is the Pydantic hook for overriding serialization, validation,
470 # and JSON schema generation.
471 str_schema = core_schema.str_schema()
472 from_str_schema = core_schema.chain_schema(
473 [str_schema, core_schema.with_info_plain_validator_function(cls._validate)]
474 )
475 return core_schema.json_or_python_schema(
476 # When deserializing from JSON, expect it to be a `str`
477 json_schema=from_str_schema,
478 # When deserializing from Python, first see if it's already a
479 # DimensionElement and then try conversion from `str`.
480 python_schema=core_schema.union_schema(
481 [core_schema.is_instance_schema(DimensionElement), from_str_schema]
482 ),
483 # When serializing convert it to a `str`.
484 serialization=core_schema.plain_serializer_function_ser_schema(
485 cls._serialize, return_schema=str_schema
486 ),
487 )
490class Dimension(DimensionElement):
491 """A dimension.
493 A named data-organization concept that can be used as a key in a data
494 ID.
495 """
497 @property
498 @abstractmethod
499 def unique_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]:
500 """Descriptions of unique identifiers for this dimension.
502 All fields that can individually be used to identify records of this
503 element, given the primary keys of all required dependencies
504 (`NamedValueAbstractSet` of `KeyColumnSpec`).
505 """
506 raise NotImplementedError()
508 @property
509 @cached_getter
510 def primary_key(self) -> KeyColumnSpec:
511 """The primary key field for this dimension (`KeyColumnSpec`).
513 Note that the database primary keys for dimension tables are in general
514 compound; this field is the only field in the database primary key that
515 is not also a foreign key (to a required dependency dimension table).
516 """
517 primary_ey, *_ = self.unique_keys
518 return primary_ey
520 @property
521 @cached_getter
522 def alternate_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]:
523 # Docstring inherited.
524 _, *alternate_keys = self.unique_keys
525 return NamedValueSet(alternate_keys).freeze()
527 @property
528 @cached_getter
529 def uniqueKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
530 """Return the unique fields.
532 All fields that can individually be used to identify records of this
533 element, given the primary keys of all required dependencies
534 (`NamedValueAbstractSet` of `FieldSpec`).
535 """
536 return NamedValueSet(
537 [column_spec.to_sql_spec(primaryKey=(n == 0)) for n, column_spec in enumerate(self.unique_keys)]
538 )
540 @property
541 @cached_getter
542 def primaryKey(self) -> ddl.FieldSpec:
543 """Return primary key field for this dimension (`FieldSpec`).
545 Note that the database primary keys for dimension tables are in general
546 compound; this field is the only field in the database primary key that
547 is not also a foreign key (to a required dependency dimension table).
548 """
549 primaryKey, *_ = self.uniqueKeys
550 return primaryKey
552 @property
553 @cached_getter
554 def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
555 """Return alternate keys.
557 Additional unique key fields for this dimension that are not the
558 primary key (`NamedValueAbstractSet` of `FieldSpec`).
560 If this dimension has required dependencies, the keys of those
561 dimensions are also included in the unique constraints defined for
562 these alternate keys.
563 """
564 _, *alternateKeys = self.uniqueKeys
565 return NamedValueSet(alternateKeys).freeze()
567 @property
568 def populated_by(self) -> Dimension:
569 # Docstring inherited.
570 return self
572 def to_arrow(self, dimensions: DimensionGroup, spec: KeyColumnSpec | None = None) -> arrow_utils.ToArrow:
573 """Return an object that converts the primary key value for this
574 dimension to column in an Arrow table.
576 Parameters
577 ----------
578 dimensions : `DimensionGroup`
579 Full set of dimensions over which the rows of the table are unique
580 or close to unique. This is used to determine whether to use
581 Arrow's dictionary encoding to compress duplicate values.
582 spec : `KeyColumnSpec`, optional
583 Column specification for this dimension. If not provided, a copy
584 of `primary_key` the the field name replaced with the dimension
585 name will be used, which is appropriate for when this dimension
586 appears in data ID or the dimension record tables of other
587 dimension elements.
589 Returns
590 -------
591 converter : `~lsst.daf.butler.arrow_utils.ToArrow`
592 Converter for this dimension's primary key.
593 """
594 if spec is None:
595 spec = self.primary_key.model_copy(update={"name": self.name})
596 if dimensions != self.minimal_group and spec.type != "int":
597 # Values are large and will be duplicated in rows that are unique
598 # over these dimensions, so dictionary encoding may help a lot.
599 return spec.to_arrow().dictionary_encoded()
600 else:
601 return spec.to_arrow()
604class DimensionCombination(DimensionElement):
605 """Element with extra information.
607 A `DimensionElement` that provides extra metadata and/or relationship
608 endpoint information for a combination of dimensions.
609 """