Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 22%
161 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
28import lsst.sphgeom
29from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
30from lsst.utils.classes import immutable
31from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
33from ..json import from_json_pydantic, to_json_pydantic
34from ..persistenceContext import PersistenceContextVars
35from ..timespan import Timespan, TimespanDatabaseRepresentation
36from ._elements import Dimension, DimensionElement
38if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
39 from ...registry import Registry
40 from ._coordinate import DataCoordinate
41 from ._graph import DimensionUniverse
42 from ._schema import DimensionElementFields
45def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
46 """Unpickle implementation for `DimensionRecord` subclasses.
48 For internal use by `DimensionRecord`.
49 """
50 return definition.RecordClass(**mapping)
53def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
54 """Create a dynamic subclass of `DimensionRecord` for the given element.
56 For internal use by `DimensionRecord`.
57 """
58 from ._schema import DimensionElementFields
60 fields = DimensionElementFields(definition)
61 slots = list(fields.standard.names)
62 if definition.spatial:
63 slots.append("region")
64 if definition.temporal:
65 slots.append(TimespanDatabaseRepresentation.NAME)
66 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
67 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
70class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"):
71 """Base model for a specific serialized record content."""
74_SIMPLE_RECORD_CLASS_CACHE: dict[
75 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
76] = {}
79def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
80 from ._schema import DimensionElementFields
82 # Cache on the definition (which hashes as the name) and the
83 # associated universe.
84 cache_key = (definition, definition.universe)
85 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
86 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
88 fields = DimensionElementFields(definition)
89 members = {}
90 # Prefer strict typing for external data
91 type_map = {
92 str: StrictStr,
93 float: StrictFloat,
94 bool: StrictBool,
95 int: StrictInt,
96 }
98 for field in fields.standard:
99 field_type = field.getPythonType()
100 field_type = type_map.get(field_type, field_type)
101 if field.nullable:
102 field_type = Optional[field_type] # type: ignore
103 members[field.name] = (field_type, ...)
104 if definition.temporal:
105 members["timespan"] = (Tuple[int, int], ...) # type: ignore
106 if definition.spatial:
107 members["region"] = (str, ...)
109 # mypy does not seem to like create_model
110 model = create_model(
111 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
112 __base__=SpecificSerializedDimensionRecord,
113 **members, # type: ignore
114 )
116 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
117 return model
120class SerializedDimensionRecord(_BaseModelCompat):
121 """Simplified model for serializing a `DimensionRecord`."""
123 definition: str = Field(
124 ...,
125 title="Name of dimension associated with this record.",
126 example="exposure",
127 )
129 # Use strict types to prevent casting
130 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
131 ...,
132 title="Dimension record keys and values.",
133 example={
134 "definition": "exposure",
135 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"},
136 },
137 )
139 if not PYDANTIC_V2: 139 ↛ 158line 139 didn't jump to line 158, because the condition on line 139 was never false
141 class Config:
142 """Local configuration overrides for model."""
144 schema_extra = {
145 "example": {
146 "definition": "detector",
147 "record": {
148 "instrument": "HSC",
149 "id": 72,
150 "full_name": "0_01",
151 "name_in_raft": "01",
152 "raft": "0",
153 "purpose": "SCIENCE",
154 },
155 }
156 }
158 @classmethod
159 def direct(
160 cls,
161 *,
162 definition: str,
163 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]],
164 ) -> SerializedDimensionRecord:
165 """Construct a `SerializedDimensionRecord` directly without validators.
167 This differs from the pydantic "construct" method in that the arguments
168 are explicitly what the model requires, and it will recurse through
169 members, constructing them from their corresponding `direct` methods.
171 This method should only be called when the inputs are trusted.
172 """
173 _recItems = record.items()
174 # Type ignore because the ternary statement seems to confuse mypy
175 # based on conflicting inferred types of v.
176 key = (
177 definition,
178 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
179 )
180 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
181 if cache is not None and (result := cache.get(key)) is not None:
182 return result
184 # This method requires tuples as values of the mapping, but JSON
185 # readers will read things in as lists. Be kind and transparently
186 # transform to tuples
187 serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
189 node = cls.model_construct(definition=definition, record=serialized_record) # type: ignore
191 if cache is not None:
192 cache[key] = node
193 return node
196@immutable
197class DimensionRecord:
198 """Base class for the Python representation of database records.
200 Parameters
201 ----------
202 **kwargs
203 Field values for this record. Unrecognized keys are ignored. If this
204 is the record for a `Dimension`, its primary key value may be provided
205 with the actual name of the field (e.g. "id" or "name"), the name of
206 the `Dimension`, or both. If this record class has a "timespan"
207 attribute, "datetime_begin" and "datetime_end" keyword arguments may
208 be provided instead of a single "timespan" keyword argument (but are
209 ignored if a "timespan" argument is provided).
211 Notes
212 -----
213 `DimensionRecord` subclasses are created dynamically for each
214 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
215 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
216 itself is pure abstract, but does not use the `abc` module to indicate this
217 because it does not have overridable methods.
219 Record classes have attributes that correspond exactly to the
220 `~DimensionElementFields.standard` fields in the related database table,
221 plus "region" and "timespan" attributes for spatial and/or temporal
222 elements (respectively).
224 Instances are usually obtained from a `Registry`, but can be constructed
225 directly from Python as well.
227 `DimensionRecord` instances are immutable.
228 """
230 # Derived classes are required to define __slots__ as well, and it's those
231 # derived-class slots that other methods on the base class expect to see
232 # when they access self.__slots__.
233 __slots__ = ("dataId",)
235 _serializedType = SerializedDimensionRecord
237 def __init__(self, **kwargs: Any):
238 # Accept either the dimension name or the actual name of its primary
239 # key field; ensure both are present in the dict for convenience below.
240 if isinstance(self.definition, Dimension):
241 v = kwargs.get(self.definition.primaryKey.name)
242 if v is None:
243 v = kwargs.get(self.definition.name)
244 if v is None:
245 raise ValueError(
246 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
247 )
248 kwargs[self.definition.primaryKey.name] = v
249 else:
250 v2 = kwargs.setdefault(self.definition.name, v)
251 if v != v2:
252 raise ValueError(
253 "Multiple inconsistent values for "
254 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
255 )
256 for name in self.__slots__:
257 object.__setattr__(self, name, kwargs.get(name))
258 if self.definition.temporal is not None:
259 if self.timespan is None:
260 object.__setattr__(
261 self,
262 "timespan",
263 Timespan(
264 kwargs.get("datetime_begin"),
265 kwargs.get("datetime_end"),
266 ),
267 )
269 from ._coordinate import DataCoordinate
271 object.__setattr__(
272 self,
273 "dataId",
274 DataCoordinate.fromRequiredValues(
275 self.definition.graph,
276 tuple(kwargs[dimension] for dimension in self.definition.required.names),
277 ),
278 )
280 def __eq__(self, other: Any) -> bool:
281 if type(other) != type(self):
282 return False
283 return self.dataId == other.dataId
285 def __hash__(self) -> int:
286 return hash(self.dataId)
288 def __str__(self) -> str:
289 lines = [f"{self.definition.name}:"]
290 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
291 return "\n".join(lines)
293 def __repr__(self) -> str:
294 return "{}.RecordClass({})".format(
295 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
296 )
298 def __reduce__(self) -> tuple:
299 mapping = {name: getattr(self, name) for name in self.__slots__}
300 return (_reconstructDimensionRecord, (self.definition, mapping))
302 def _repr_html_(self) -> str:
303 """Override the default representation in IPython/Jupyter notebooks.
305 This gives a more readable output that understands embedded newlines.
306 """
307 return f"<pre>{self}<pre>"
309 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
310 """Convert this class to a simple python type.
312 This makes it suitable for serialization.
314 Parameters
315 ----------
316 minimal : `bool`, optional
317 Use minimal serialization. Has no effect on for this class.
319 Returns
320 -------
321 names : `list`
322 The names of the dimensions.
323 """
324 # The DataId is sufficient if you are willing to do a deferred
325 # query. This may not be overly useful since to reconstruct
326 # a collection of records will require repeated registry queries.
327 # For now do not implement minimal form.
329 mapping = {name: getattr(self, name) for name in self.__slots__}
330 # If the item in mapping supports simplification update it
331 for k, v in mapping.items():
332 try:
333 mapping[k] = v.to_simple(minimal=minimal)
334 except AttributeError:
335 if isinstance(v, lsst.sphgeom.Region):
336 # YAML serialization specifies the class when it
337 # doesn't have to. This is partly for explicitness
338 # and also history. Here use a different approach.
339 # This code needs to be migrated to sphgeom
340 mapping[k] = v.encode().hex()
341 if isinstance(v, bytes):
342 # We actually can't handle serializing out to bytes for
343 # hash objects, encode it here to a hex string
344 mapping[k] = v.hex()
345 definition = self.definition.to_simple(minimal=minimal)
346 return SerializedDimensionRecord(definition=definition, record=mapping)
348 @classmethod
349 def from_simple(
350 cls,
351 simple: SerializedDimensionRecord,
352 universe: DimensionUniverse | None = None,
353 registry: Registry | None = None,
354 ) -> DimensionRecord:
355 """Construct a new object from the simplified form.
357 This is generally data returned from the `to_simple`
358 method.
360 Parameters
361 ----------
362 simple : `SerializedDimensionRecord`
363 Value return from `to_simple`.
364 universe : `DimensionUniverse`
365 The special graph of all known dimensions of which this graph will
366 be a subset. Can be `None` if `Registry` is provided.
367 registry : `lsst.daf.butler.Registry`, optional
368 Registry from which a universe can be extracted. Can be `None`
369 if universe is provided explicitly.
371 Returns
372 -------
373 record : `DimensionRecord`
374 Newly-constructed object.
375 """
376 if universe is None and registry is None:
377 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
378 if universe is None and registry is not None:
379 universe = registry.dimensions
380 if universe is None:
381 # this is for mypy
382 raise ValueError("Unable to determine a usable universe")
383 _recItems = simple.record.items()
384 # Type ignore because the ternary statement seems to confuse mypy
385 # based on conflicting inferred types of v.
386 key = (
387 simple.definition,
388 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
389 )
390 cache = PersistenceContextVars.dimensionRecords.get()
391 if cache is not None and (result := cache.get(key)) is not None:
392 return result
394 definition = DimensionElement.from_simple(simple.definition, universe=universe)
396 # Create a specialist subclass model with type validation.
397 # This allows us to do simple checks of external data (possibly
398 # sent as JSON) since for now _reconstructDimensionRecord does not
399 # do any validation.
400 record_model_cls = _createSimpleRecordSubclass(definition)
401 record_model = record_model_cls(**simple.record)
403 # Timespan and region have to be converted to native form
404 # for now assume that those keys are special
405 rec = record_model.dict()
407 if (ts := "timespan") in rec:
408 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
409 if (reg := "region") in rec:
410 encoded = bytes.fromhex(rec[reg])
411 rec[reg] = lsst.sphgeom.Region.decode(encoded)
412 if (hsh := "hash") in rec:
413 rec[hsh] = bytes.fromhex(rec[hsh].decode())
415 dimRec = _reconstructDimensionRecord(definition, rec)
416 if cache is not None:
417 cache[key] = dimRec
418 return dimRec
420 to_json = to_json_pydantic
421 from_json: ClassVar = classmethod(from_json_pydantic)
423 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
424 """Return a vanilla `dict` representation of this record.
426 Parameters
427 ----------
428 splitTimespan : `bool`, optional
429 If `True` (`False` is default) transform any "timespan" key value
430 from a `Timespan` instance into a pair of regular
431 ("datetime_begin", "datetime_end") fields.
432 """
433 results = {name: getattr(self, name) for name in self.__slots__}
434 if splitTimespan:
435 timespan = results.pop("timespan", None)
436 if timespan is not None:
437 results["datetime_begin"] = timespan.begin
438 results["datetime_end"] = timespan.end
439 return results
441 # DimensionRecord subclasses are dynamically created, so static type
442 # checkers can't know about them or their attributes. To avoid having to
443 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
444 # checkers not to worry about missing attributes.
445 def __getattr__(self, name: str) -> Any:
446 raise AttributeError(name)
448 # Class attributes below are shadowed by instance attributes, and are
449 # present just to hold the docstrings for those instance attributes.
451 dataId: DataCoordinate
452 """A dict-like identifier for this record's primary keys
453 (`DataCoordinate`).
454 """
456 definition: ClassVar[DimensionElement]
457 """The `DimensionElement` whose records this class represents
458 (`DimensionElement`).
459 """
461 fields: ClassVar[DimensionElementFields]
462 """A categorized view of the fields in this class
463 (`DimensionElementFields`).
464 """