Coverage for python / lsst / daf / butler / dimensions / _records.py: 24%
200 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 08:18 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 08:18 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "DataIdKey",
32 "DataIdValue",
33 "DimensionRecord",
34 "SerializedDimensionRecord",
35 "SerializedKeyValueDimensionRecord",
36)
38import itertools
39from collections.abc import Callable, Hashable
40from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, cast
42import pydantic
43from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
45import lsst.sphgeom
46from lsst.utils.classes import immutable
48from .._timespan import Timespan
49from ..column_spec import make_tuple_type_adapter
50from ..json import from_json_pydantic, to_json_pydantic
51from ..persistence_context import PersistenceContextVars
52from ._elements import Dimension, DimensionElement
54if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
55 from ..registry import Registry
56 from ._coordinate import DataCoordinate
57 from ._schema import DimensionElementFields
58 from ._universe import DimensionUniverse
61DataIdKey: TypeAlias = str
62"""Type annotation alias for the keys that can be used to index a
63DataCoordinate.
64"""
66# Pydantic will cast int to str if str is first in the Union.
67DataIdValue: TypeAlias = int | str
68"""Type annotation alias for the values that can be present in a
69DataCoordinate or other data ID.
70"""
72SerializedKeyValueDimensionRecord: TypeAlias = list[Any]
73"""Type annotation alias for the serialized form of DimensionRecord used in
74container serialization (e.g. `DimensionRecordSet`).
75"""
78def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
79 """Unpickle implementation for `DimensionRecord` subclasses.
81 For internal use by `DimensionRecord`.
82 """
83 return definition.RecordClass(**mapping)
86def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
87 """Create a dynamic subclass of `DimensionRecord` for the given element.
89 For internal use by `DimensionRecord`.
90 """
91 from ._schema import DimensionElementFields
93 fields = DimensionElementFields(definition)
94 slots = list(fields.standard.names)
95 if definition.spatial:
96 slots.append("region")
97 if definition.temporal:
98 slots.append("timespan")
100 key_type_adapter = make_tuple_type_adapter(definition.schema.required)
101 value_type_adapter = make_tuple_type_adapter(
102 itertools.chain(definition.schema.implied, definition.schema.remainder)
103 )
105 d = {
106 "definition": definition,
107 "__slots__": tuple(slots),
108 "fields": fields,
109 "_key_type_adapter": key_type_adapter,
110 "_value_type_adapter": value_type_adapter,
111 }
112 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
115class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"):
116 """Base model for a specific serialized record content."""
119_SIMPLE_RECORD_CLASS_CACHE: dict[
120 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
121] = {}
124def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
125 from ._schema import DimensionElementFields
127 # Cache on the definition (which hashes as the name) and the
128 # associated universe.
129 cache_key = (definition, definition.universe)
130 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
131 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
133 fields = DimensionElementFields(definition)
134 members = {}
135 # Prefer strict typing for external data
136 type_map = {
137 str: StrictStr,
138 float: StrictFloat,
139 bool: StrictBool,
140 int: StrictInt,
141 }
143 for field in fields.standard:
144 field_type = field.getPythonType()
145 field_type = type_map.get(field_type, field_type)
146 if field.nullable:
147 field_type = field_type | None # type: ignore
148 members[field.name] = (field_type, ...)
149 if definition.temporal:
150 members["timespan"] = (Timespan | None, ...) # type: ignore
151 if definition.spatial:
152 members["region"] = (str | None, ...) # type: ignore
154 # For the new derived class name need to convert to camel case.
155 # so "day_obs" -> "DayObs".
156 derived_name = "".join([part.capitalize() for part in definition.name.split("_")])
158 model = create_model(
159 f"SpecificSerializedDimensionRecord{derived_name}",
160 __base__=SpecificSerializedDimensionRecord,
161 **members, # type: ignore
162 )
164 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
165 return model
168# While supporting pydantic v1 and v2 keep this outside the model.
169_serialized_dimension_record_schema_extra = {
170 "examples": [
171 {
172 "definition": "detector",
173 "record": {
174 "instrument": "HSC",
175 "id": 72,
176 "full_name": "0_01",
177 "name_in_raft": "01",
178 "raft": "0",
179 "purpose": "SCIENCE",
180 },
181 }
182 ]
183}
186class SerializedDimensionRecord(BaseModel):
187 """Simplified model for serializing a `DimensionRecord`."""
189 definition: str = Field(
190 ...,
191 title="Name of dimension associated with this record.",
192 examples=["exposure"],
193 )
195 # Use strict types to prevent casting
196 record: dict[str, None | StrictBool | StrictInt | StrictFloat | StrictStr | Timespan] = Field(
197 ...,
198 title="Dimension record keys and values.",
199 examples=[
200 {
201 "definition": "exposure",
202 "record": {
203 "instrument": "LATISS",
204 "exposure": 2021050300044,
205 "obs_id": "AT_O_20210503_00044",
206 },
207 }
208 ],
209 )
211 model_config = {
212 "json_schema_extra": _serialized_dimension_record_schema_extra, # type: ignore[typeddict-item]
213 }
215 @classmethod
216 def direct(
217 cls,
218 *,
219 definition: str,
220 record: dict[str, Any],
221 ) -> SerializedDimensionRecord:
222 """Construct a `SerializedDimensionRecord` directly without validators.
224 Parameters
225 ----------
226 definition : `str`
227 The name of the record.
228 record : `dict`
229 A dictionary representation of the record content.
231 Returns
232 -------
233 rec : `SerializedDimensionRecord`
234 A model representing the dimension records.
236 Notes
237 -----
238 This differs from the pydantic "construct" method in that the arguments
239 are explicitly what the model requires, and it will recurse through
240 members, constructing them from their corresponding `direct` methods.
242 This method should only be called when the inputs are trusted.
243 """
244 # This method requires tuples as values of the mapping, but JSON
245 # readers will read things in as lists. Be kind and transparently
246 # transform to tuples.
247 _recItems = {
248 k: (v if type(v) is not list else Timespan(begin=None, end=None, _nsec=tuple(v))) # type: ignore
249 for k, v in record.items()
250 }
252 # Type ignore because the ternary statement seems to confuse mypy
253 # based on conflicting inferred types of v.
254 key = (
255 definition,
256 frozenset(_recItems.items()),
257 )
258 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
259 if cache is not None and (result := cache.get(key)) is not None:
260 return result
262 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore
264 if cache is not None:
265 cache[key] = node
266 return node
269@immutable
270class DimensionRecord:
271 """Base class for the Python representation of database records.
273 Parameters
274 ----------
275 **kwargs
276 Field values for this record. Unrecognized keys are ignored. If this
277 is the record for a `Dimension`, its primary key value may be provided
278 with the actual name of the field (e.g. "id" or "name"), the name of
279 the `Dimension`, or both. If this record class has a "timespan"
280 attribute, "datetime_begin" and "datetime_end" keyword arguments may
281 be provided instead of a single "timespan" keyword argument (but are
282 ignored if a "timespan" argument is provided).
284 Notes
285 -----
286 `DimensionRecord` subclasses are created dynamically for each
287 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
288 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
289 itself is pure abstract, but does not use the `abc` module to indicate this
290 because it does not have overridable methods.
292 Record classes have attributes that correspond exactly to the
293 `~DimensionElementFields.standard` fields in the related database table,
294 plus "region" and "timespan" attributes for spatial and/or temporal
295 elements (respectively).
297 Instances are usually obtained from a `Registry`, but can be constructed
298 directly from Python as well.
300 `DimensionRecord` instances are immutable.
301 """
303 # Derived classes are required to define __slots__ as well, and it's those
304 # derived-class slots that other methods on the base class expect to see
305 # when they access self.__slots__.
306 __slots__ = ("dataId",)
308 _serializedType: ClassVar[type[BaseModel]] = SerializedDimensionRecord
310 _key_type_adapter: ClassVar[pydantic.TypeAdapter[tuple[Any, ...]]]
311 _value_type_adapter: ClassVar[pydantic.TypeAdapter[tuple[Any, ...]]]
313 def __init__(self, **kwargs: Any):
314 # Accept either the dimension name or the actual name of its primary
315 # key field; ensure both are present in the dict for convenience below.
316 if isinstance(self.definition, Dimension):
317 v = kwargs.get(self.definition.primaryKey.name)
318 if v is None:
319 v = kwargs.get(self.definition.name)
320 if v is None:
321 raise ValueError(
322 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
323 )
324 kwargs[self.definition.primaryKey.name] = v
325 else:
326 v2 = kwargs.setdefault(self.definition.name, v)
327 if v != v2:
328 raise ValueError(
329 "Multiple inconsistent values for "
330 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
331 )
333 from ._coordinate import DataCoordinate
335 object.__setattr__(
336 self,
337 "dataId",
338 DataCoordinate.from_required_values(
339 self.definition.minimal_group,
340 tuple(kwargs[dimension] for dimension in self.definition.required.names),
341 ),
342 )
343 # Don't need the primary key value aliased to the dimension name
344 # anymore.
345 kwargs.pop(self.definition.name, None)
347 for name in self.__slots__:
348 # Note that we remove from kwargs as we go, to make sure there's
349 # nothing left at the end.
350 object.__setattr__(self, name, kwargs.pop(name, None))
351 # Support 'datetime_begin' and 'datetime_end' instead of 'timespan' for
352 # backwards compatibility, but if one is provided both must be.
353 if self.definition.temporal is not None and self.timespan is None and "datetime_begin" in kwargs:
354 object.__setattr__(
355 self,
356 "timespan",
357 Timespan(
358 kwargs.pop("datetime_begin"),
359 kwargs.pop("datetime_end"),
360 ),
361 )
363 if kwargs:
364 raise TypeError(f"Invalid fields for {self.definition} dimension record: {set(kwargs.keys())}.")
366 def __eq__(self, other: Any) -> bool:
367 if type(other) is not type(self):
368 return False
369 return all(getattr(self, name) == getattr(other, name) for name in self.__slots__)
371 def __hash__(self) -> int:
372 return hash(self.dataId.required_values)
374 def __str__(self) -> str:
375 lines = [f"{self.definition.name}:"]
376 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
377 return "\n".join(lines)
379 def __repr__(self) -> str:
380 return "{}.RecordClass({})".format(
381 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
382 )
384 def __reduce__(self) -> tuple:
385 mapping = {name: getattr(self, name) for name in self.__slots__}
386 return (_reconstructDimensionRecord, (self.definition, mapping))
388 def _repr_html_(self) -> str:
389 """Override the default representation in IPython/Jupyter notebooks.
391 This gives a more readable output that understands embedded newlines.
392 """
393 return f"<pre>{self}<pre>"
395 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
396 """Convert this class to a simple python type.
398 This makes it suitable for serialization.
400 Parameters
401 ----------
402 minimal : `bool`, optional
403 Use minimal serialization. Has no effect on for this class.
405 Returns
406 -------
407 names : `list`
408 The names of the dimensions.
409 """
410 # The DataId is sufficient if you are willing to do a deferred
411 # query. This may not be overly useful since to reconstruct
412 # a collection of records will require repeated registry queries.
413 # For now do not implement minimal form.
414 key = (id(self.definition), self.dataId)
415 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
416 if cache is not None and (result := cache.get(key)) is not None:
417 return result
419 mapping = {name: getattr(self, name) for name in self.__slots__}
420 for k, v in mapping.items():
421 if isinstance(v, lsst.sphgeom.Region):
422 # YAML serialization specifies the class when it
423 # doesn't have to. This is partly for explicitness
424 # and also history. Here use a different approach.
425 # This code needs to be migrated to sphgeom
426 mapping[k] = v.encode().hex()
427 if isinstance(v, bytes):
428 # We actually can't handle serializing out to bytes for
429 # hash objects, encode it here to a hex string
430 mapping[k] = v.hex()
431 definition = self.definition.to_simple(minimal=minimal)
432 dimRec = SerializedDimensionRecord(definition=definition, record=mapping)
433 if cache is not None:
434 cache[key] = dimRec
435 return dimRec
437 @classmethod
438 def from_simple(
439 cls,
440 simple: SerializedDimensionRecord,
441 universe: DimensionUniverse | None = None,
442 registry: Registry | None = None,
443 cacheKey: Hashable | None = None,
444 ) -> DimensionRecord:
445 """Construct a new object from the simplified form.
447 This is generally data returned from the `to_simple`
448 method.
450 Parameters
451 ----------
452 simple : `SerializedDimensionRecord`
453 Value return from `to_simple`.
454 universe : `DimensionUniverse`
455 The special graph of all known dimensions of which this graph will
456 be a subset. Can be `None` if `Registry` is provided.
457 registry : `lsst.daf.butler.Registry`, optional
458 Registry from which a universe can be extracted. Can be `None`
459 if universe is provided explicitly.
460 cacheKey : `collections.abc.Hashable` or `None`
461 If this is not `None`, it will be used as a key for any cached
462 reconstruction instead of calculating a value from the serialized
463 format.
465 Returns
466 -------
467 record : `DimensionRecord`
468 Newly-constructed object.
469 """
470 if universe is None and registry is None:
471 raise ValueError("One of universe or registry is required to convert names to a DimensionGroup")
472 if universe is None and registry is not None:
473 universe = registry.dimensions
474 if universe is None:
475 # this is for mypy
476 raise ValueError("Unable to determine a usable universe")
477 # Type ignore because the ternary statement seems to confuse mypy
478 # based on conflicting inferred types of v.
479 key = cacheKey or (
480 simple.definition,
481 frozenset(simple.record.items()), # type: ignore
482 )
483 cache = PersistenceContextVars.dimensionRecords.get()
484 if cache is not None and (result := cache.get(key)) is not None:
485 return result
487 definition = DimensionElement.from_simple(simple.definition, universe=universe)
489 # Create a specialist subclass model with type validation.
490 # This allows us to do simple checks of external data (possibly
491 # sent as JSON) since for now _reconstructDimensionRecord does not
492 # do any validation.
493 record_model_cls = _createSimpleRecordSubclass(definition)
494 record_model = record_model_cls(**simple.record)
496 # Region and hash have to be converted to native form; for now assume
497 # that the keys are special. We make the mapping we need to pass to
498 # the DimensionRecord constructor via getattr, because we don't
499 # model_dump re-disassembling things like Timespans that we've already
500 # assembled.
501 mapping = {k: getattr(record_model, k) for k in definition.schema.names}
503 if mapping.get("region") is not None:
504 mapping["region"] = lsst.sphgeom.Region.decode(bytes.fromhex(mapping["region"]))
505 if "hash" in mapping:
506 mapping["hash"] = bytes.fromhex(mapping["hash"].decode())
508 dimRec = _reconstructDimensionRecord(definition, mapping)
509 if cache is not None:
510 cache[key] = dimRec
511 return dimRec
513 to_json = to_json_pydantic
514 from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic))
516 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
517 """Return a vanilla `dict` representation of this record.
519 Parameters
520 ----------
521 splitTimespan : `bool`, optional
522 If `True` (`False` is default) transform any "timespan" key value
523 from a `Timespan` instance into a pair of regular
524 ("datetime_begin", "datetime_end") fields.
525 """
526 results = {name: getattr(self, name) for name in self.__slots__}
527 if splitTimespan:
528 timespan = results.pop("timespan", None)
529 if timespan is not None:
530 results["datetime_begin"] = timespan.begin
531 results["datetime_end"] = timespan.end
532 return results
534 def get(self, name: str) -> Any:
535 """Return a single metadata value associated with this record.
537 Parameters
538 ----------
539 name : `str`
540 Key of the metadata value to be retrieved.
542 Returns
543 -------
544 value : `typing.Any`
545 The metadata value.
547 Raises
548 ------
549 KeyError
550 If the given name is not a valid key in this dimension record.
551 """
552 if name not in self.__slots__:
553 raise KeyError(f"'{name}' is not a valid record key for dimension '{self.definition.name}'")
555 return getattr(self, name)
557 def serialize_key_value(self) -> SerializedKeyValueDimensionRecord:
558 """Serialize this record to a `list` that can be sliced into a key
559 (data ID values) / value (everything else) pair.
561 Returns
562 -------
563 raw : `list`
564 List of values with JSON-compatible types.
566 Notes
567 -----
568 Unlike `to_simple` / `from_simple`, this serialization approach does
569 not encode the ``definition`` element in the serialized form. This is
570 expected to be serialized separately (e.g. as part of a homogeneous set
571 of dimension records).
572 """
573 key = list(self.dataId.required_values)
574 value = []
575 for name in self.definition.schema.implied.names:
576 value.append(getattr(self, name))
577 for name in self.definition.schema.remainder.names:
578 value.append(getattr(self, name))
579 return key + self._value_type_adapter.dump_python(tuple(value), mode="json")
581 @classmethod
582 def deserialize_key(
583 cls, raw: SerializedKeyValueDimensionRecord
584 ) -> tuple[tuple[DataIdValue, ...], SerializedKeyValueDimensionRecord]:
585 """Deserialize just the key slice of the raw `list` serializeation of a
586 dimension record.
588 Parameters
589 ----------
590 raw : `list`
591 Serialized list with JSON-compatible types, as returned by
592 `serialize_key_value`.
594 Returns
595 -------
596 key : `tuple`
597 Validated tuple of required data ID values that uniquely identify
598 this record, extracted from the head of ``raw``.
599 raw_value : `list`
600 Remaining unvalidated fields.
601 """
602 n = len(cls.definition.minimal_group.required)
603 return cls._key_type_adapter.validate_python(raw[:n]), raw[n:]
605 @classmethod
606 def deserialize_value(
607 cls, key: tuple[DataIdValue, ...], raw_value: SerializedKeyValueDimensionRecord
608 ) -> DimensionRecord:
609 """Deserialize the value slice of the raw `list` form of serialized
610 dimension record.
612 Parameters
613 ----------
614 key : `tuple`
615 Validated tuple of required data ID values that uniquely identify
616 this record, as returned by `deserialize_key`.
617 raw_value : `list`
618 Serialized list with JSON-compatible types, with just the non-key
619 items, as returned by `deserialize_key`.
621 Returns
622 -------
623 record : `DimensionRecord`
624 A fully-validated `DimensionRecord` with this subclass.
625 """
626 from ._coordinate import DataCoordinate
628 result = object.__new__(cls) # bypass the usual __init__
629 result.dataId = DataCoordinate.from_required_values(cls.definition.minimal_group, key)
630 value = cls._value_type_adapter.validate_python(raw_value)
631 for name, val in zip(cls.definition.schema.names, key + value):
632 setattr(result, name, val)
633 return result
635 # DimensionRecord subclasses are dynamically created, so static type
636 # checkers can't know about them or their attributes. To avoid having to
637 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
638 # checkers not to worry about missing attributes.
639 def __getattr__(self, name: str) -> Any:
640 raise AttributeError(name)
642 # Class attributes below are shadowed by instance attributes, and are
643 # present just to hold the docstrings for those instance attributes.
645 dataId: DataCoordinate
646 """A dict-like identifier for this record's primary keys
647 (`DataCoordinate`).
648 """
650 definition: ClassVar[DimensionElement]
651 """The `DimensionElement` whose records this class represents
652 (`DimensionElement`).
653 """
655 fields: ClassVar[DimensionElementFields]
656 """A categorized view of the fields in this class
657 (`DimensionElementFields`).
658 """