Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 24%
161 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from collections.abc import Hashable
27from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
29import lsst.sphgeom
30from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
31from lsst.utils.classes import immutable
32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
34from ..json import from_json_pydantic, to_json_pydantic
35from ..persistenceContext import PersistenceContextVars
36from ..timespan import Timespan, TimespanDatabaseRepresentation
37from ._elements import Dimension, DimensionElement
39if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
40 from ...registry import Registry
41 from ._coordinate import DataCoordinate
42 from ._graph import DimensionUniverse
43 from ._schema import DimensionElementFields
46def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
47 """Unpickle implementation for `DimensionRecord` subclasses.
49 For internal use by `DimensionRecord`.
50 """
51 return definition.RecordClass(**mapping)
54def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
55 """Create a dynamic subclass of `DimensionRecord` for the given element.
57 For internal use by `DimensionRecord`.
58 """
59 from ._schema import DimensionElementFields
61 fields = DimensionElementFields(definition)
62 slots = list(fields.standard.names)
63 if definition.spatial:
64 slots.append("region")
65 if definition.temporal:
66 slots.append(TimespanDatabaseRepresentation.NAME)
67 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
68 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
71class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"):
72 """Base model for a specific serialized record content."""
75_SIMPLE_RECORD_CLASS_CACHE: dict[
76 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
77] = {}
80def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
81 from ._schema import DimensionElementFields
83 # Cache on the definition (which hashes as the name) and the
84 # associated universe.
85 cache_key = (definition, definition.universe)
86 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
87 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
89 fields = DimensionElementFields(definition)
90 members = {}
91 # Prefer strict typing for external data
92 type_map = {
93 str: StrictStr,
94 float: StrictFloat,
95 bool: StrictBool,
96 int: StrictInt,
97 }
99 for field in fields.standard:
100 field_type = field.getPythonType()
101 field_type = type_map.get(field_type, field_type)
102 if field.nullable:
103 field_type = Optional[field_type] # type: ignore
104 members[field.name] = (field_type, ...)
105 if definition.temporal:
106 members["timespan"] = (Tuple[int, int], ...) # type: ignore
107 if definition.spatial:
108 members["region"] = (str, ...)
110 # mypy does not seem to like create_model
111 model = create_model(
112 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
113 __base__=SpecificSerializedDimensionRecord,
114 **members, # type: ignore
115 )
117 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
118 return model
121# While supporting pydantic v1 and v2 keep this outside the model.
122_serialized_dimension_record_schema_extra = {
123 "examples": [
124 {
125 "definition": "detector",
126 "record": {
127 "instrument": "HSC",
128 "id": 72,
129 "full_name": "0_01",
130 "name_in_raft": "01",
131 "raft": "0",
132 "purpose": "SCIENCE",
133 },
134 }
135 ]
136}
139class SerializedDimensionRecord(_BaseModelCompat):
140 """Simplified model for serializing a `DimensionRecord`."""
142 definition: str = Field(
143 ...,
144 title="Name of dimension associated with this record.",
145 examples=["exposure"],
146 )
148 # Use strict types to prevent casting
149 record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field(
150 ...,
151 title="Dimension record keys and values.",
152 examples=[
153 {
154 "definition": "exposure",
155 "record": {
156 "instrument": "LATISS",
157 "exposure": 2021050300044,
158 "obs_id": "AT_O_20210503_00044",
159 },
160 }
161 ],
162 )
164 if PYDANTIC_V2: 164 ↛ 165line 164 didn't jump to line 165
165 model_config = {
166 "json_schema_extra": _serialized_dimension_record_schema_extra, # type: ignore[typeddict-item]
167 }
168 else:
170 class Config:
171 """Local configuration overrides for model."""
173 schema_extra = _serialized_dimension_record_schema_extra
175 @classmethod
176 def direct(
177 cls,
178 *,
179 definition: str,
180 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]],
181 ) -> SerializedDimensionRecord:
182 """Construct a `SerializedDimensionRecord` directly without validators.
184 This differs from the pydantic "construct" method in that the arguments
185 are explicitly what the model requires, and it will recurse through
186 members, constructing them from their corresponding `direct` methods.
188 This method should only be called when the inputs are trusted.
189 """
190 # This method requires tuples as values of the mapping, but JSON
191 # readers will read things in as lists. Be kind and transparently
192 # transform to tuples
193 _recItems = {
194 k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721
195 }
197 # Type ignore because the ternary statement seems to confuse mypy
198 # based on conflicting inferred types of v.
199 key = (
200 definition,
201 frozenset(_recItems.items()),
202 )
203 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
204 if cache is not None and (result := cache.get(key)) is not None:
205 return result
207 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore
209 if cache is not None:
210 cache[key] = node
211 return node
214@immutable
215class DimensionRecord:
216 """Base class for the Python representation of database records.
218 Parameters
219 ----------
220 **kwargs
221 Field values for this record. Unrecognized keys are ignored. If this
222 is the record for a `Dimension`, its primary key value may be provided
223 with the actual name of the field (e.g. "id" or "name"), the name of
224 the `Dimension`, or both. If this record class has a "timespan"
225 attribute, "datetime_begin" and "datetime_end" keyword arguments may
226 be provided instead of a single "timespan" keyword argument (but are
227 ignored if a "timespan" argument is provided).
229 Notes
230 -----
231 `DimensionRecord` subclasses are created dynamically for each
232 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
233 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
234 itself is pure abstract, but does not use the `abc` module to indicate this
235 because it does not have overridable methods.
237 Record classes have attributes that correspond exactly to the
238 `~DimensionElementFields.standard` fields in the related database table,
239 plus "region" and "timespan" attributes for spatial and/or temporal
240 elements (respectively).
242 Instances are usually obtained from a `Registry`, but can be constructed
243 directly from Python as well.
245 `DimensionRecord` instances are immutable.
246 """
248 # Derived classes are required to define __slots__ as well, and it's those
249 # derived-class slots that other methods on the base class expect to see
250 # when they access self.__slots__.
251 __slots__ = ("dataId",)
253 _serializedType = SerializedDimensionRecord
255 def __init__(self, **kwargs: Any):
256 # Accept either the dimension name or the actual name of its primary
257 # key field; ensure both are present in the dict for convenience below.
258 if isinstance(self.definition, Dimension):
259 v = kwargs.get(self.definition.primaryKey.name)
260 if v is None:
261 v = kwargs.get(self.definition.name)
262 if v is None:
263 raise ValueError(
264 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
265 )
266 kwargs[self.definition.primaryKey.name] = v
267 else:
268 v2 = kwargs.setdefault(self.definition.name, v)
269 if v != v2:
270 raise ValueError(
271 "Multiple inconsistent values for "
272 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
273 )
274 for name in self.__slots__:
275 object.__setattr__(self, name, kwargs.get(name))
276 if self.definition.temporal is not None and self.timespan is None:
277 object.__setattr__(
278 self,
279 "timespan",
280 Timespan(
281 kwargs.get("datetime_begin"),
282 kwargs.get("datetime_end"),
283 ),
284 )
286 from ._coordinate import DataCoordinate
288 object.__setattr__(
289 self,
290 "dataId",
291 DataCoordinate.fromRequiredValues(
292 self.definition.graph,
293 tuple(kwargs[dimension] for dimension in self.definition.required.names),
294 ),
295 )
297 def __eq__(self, other: Any) -> bool:
298 if type(other) != type(self):
299 return False
300 return self.dataId == other.dataId
302 def __hash__(self) -> int:
303 return hash(self.dataId)
305 def __str__(self) -> str:
306 lines = [f"{self.definition.name}:"]
307 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
308 return "\n".join(lines)
310 def __repr__(self) -> str:
311 return "{}.RecordClass({})".format(
312 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
313 )
315 def __reduce__(self) -> tuple:
316 mapping = {name: getattr(self, name) for name in self.__slots__}
317 return (_reconstructDimensionRecord, (self.definition, mapping))
319 def _repr_html_(self) -> str:
320 """Override the default representation in IPython/Jupyter notebooks.
322 This gives a more readable output that understands embedded newlines.
323 """
324 return f"<pre>{self}<pre>"
326 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
327 """Convert this class to a simple python type.
329 This makes it suitable for serialization.
331 Parameters
332 ----------
333 minimal : `bool`, optional
334 Use minimal serialization. Has no effect on for this class.
336 Returns
337 -------
338 names : `list`
339 The names of the dimensions.
340 """
341 # The DataId is sufficient if you are willing to do a deferred
342 # query. This may not be overly useful since to reconstruct
343 # a collection of records will require repeated registry queries.
344 # For now do not implement minimal form.
346 mapping = {name: getattr(self, name) for name in self.__slots__}
347 # If the item in mapping supports simplification update it
348 for k, v in mapping.items():
349 try:
350 mapping[k] = v.to_simple(minimal=minimal)
351 except AttributeError:
352 if isinstance(v, lsst.sphgeom.Region):
353 # YAML serialization specifies the class when it
354 # doesn't have to. This is partly for explicitness
355 # and also history. Here use a different approach.
356 # This code needs to be migrated to sphgeom
357 mapping[k] = v.encode().hex()
358 if isinstance(v, bytes):
359 # We actually can't handle serializing out to bytes for
360 # hash objects, encode it here to a hex string
361 mapping[k] = v.hex()
362 definition = self.definition.to_simple(minimal=minimal)
363 return SerializedDimensionRecord(definition=definition, record=mapping)
365 @classmethod
366 def from_simple(
367 cls,
368 simple: SerializedDimensionRecord,
369 universe: DimensionUniverse | None = None,
370 registry: Registry | None = None,
371 cacheKey: Hashable | None = None,
372 ) -> DimensionRecord:
373 """Construct a new object from the simplified form.
375 This is generally data returned from the `to_simple`
376 method.
378 Parameters
379 ----------
380 simple : `SerializedDimensionRecord`
381 Value return from `to_simple`.
382 universe : `DimensionUniverse`
383 The special graph of all known dimensions of which this graph will
384 be a subset. Can be `None` if `Registry` is provided.
385 registry : `lsst.daf.butler.Registry`, optional
386 Registry from which a universe can be extracted. Can be `None`
387 if universe is provided explicitly.
388 cacheKey : `Hashable` or `None`
389 If this is not None, it will be used as a key for any cached
390 reconstruction instead of calculating a value from the serialized
391 format.
393 Returns
394 -------
395 record : `DimensionRecord`
396 Newly-constructed object.
397 """
398 if universe is None and registry is None:
399 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
400 if universe is None and registry is not None:
401 universe = registry.dimensions
402 if universe is None:
403 # this is for mypy
404 raise ValueError("Unable to determine a usable universe")
405 # Type ignore because the ternary statement seems to confuse mypy
406 # based on conflicting inferred types of v.
407 key = cacheKey or (
408 simple.definition,
409 frozenset(simple.record.items()), # type: ignore
410 )
411 cache = PersistenceContextVars.dimensionRecords.get()
412 if cache is not None and (result := cache.get(key)) is not None:
413 return result
415 definition = DimensionElement.from_simple(simple.definition, universe=universe)
417 # Create a specialist subclass model with type validation.
418 # This allows us to do simple checks of external data (possibly
419 # sent as JSON) since for now _reconstructDimensionRecord does not
420 # do any validation.
421 record_model_cls = _createSimpleRecordSubclass(definition)
422 record_model = record_model_cls(**simple.record)
424 # Timespan and region have to be converted to native form
425 # for now assume that those keys are special
426 rec = record_model.model_dump()
428 if (ts := "timespan") in rec:
429 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
430 if (reg := "region") in rec:
431 encoded = bytes.fromhex(rec[reg])
432 rec[reg] = lsst.sphgeom.Region.decode(encoded)
433 if (hsh := "hash") in rec:
434 rec[hsh] = bytes.fromhex(rec[hsh].decode())
436 dimRec = _reconstructDimensionRecord(definition, rec)
437 if cache is not None:
438 cache[key] = dimRec
439 return dimRec
441 to_json = to_json_pydantic
442 from_json: ClassVar = classmethod(from_json_pydantic)
444 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
445 """Return a vanilla `dict` representation of this record.
447 Parameters
448 ----------
449 splitTimespan : `bool`, optional
450 If `True` (`False` is default) transform any "timespan" key value
451 from a `Timespan` instance into a pair of regular
452 ("datetime_begin", "datetime_end") fields.
453 """
454 results = {name: getattr(self, name) for name in self.__slots__}
455 if splitTimespan:
456 timespan = results.pop("timespan", None)
457 if timespan is not None:
458 results["datetime_begin"] = timespan.begin
459 results["datetime_end"] = timespan.end
460 return results
462 # DimensionRecord subclasses are dynamically created, so static type
463 # checkers can't know about them or their attributes. To avoid having to
464 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
465 # checkers not to worry about missing attributes.
466 def __getattr__(self, name: str) -> Any:
467 raise AttributeError(name)
469 # Class attributes below are shadowed by instance attributes, and are
470 # present just to hold the docstrings for those instance attributes.
472 dataId: DataCoordinate
473 """A dict-like identifier for this record's primary keys
474 (`DataCoordinate`).
475 """
477 definition: ClassVar[DimensionElement]
478 """The `DimensionElement` whose records this class represents
479 (`DimensionElement`).
480 """
482 fields: ClassVar[DimensionElementFields]
483 """A categorized view of the fields in this class
484 (`DimensionElementFields`).
485 """