Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 23%
159 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from collections.abc import Hashable
27from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
29import lsst.sphgeom
30from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
31from lsst.utils.classes import immutable
32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
34from ..json import from_json_pydantic, to_json_pydantic
35from ..persistenceContext import PersistenceContextVars
36from ..timespan import Timespan, TimespanDatabaseRepresentation
37from ._elements import Dimension, DimensionElement
39if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
40 from ...registry import Registry
41 from ._coordinate import DataCoordinate
42 from ._graph import DimensionUniverse
43 from ._schema import DimensionElementFields
46def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
47 """Unpickle implementation for `DimensionRecord` subclasses.
49 For internal use by `DimensionRecord`.
50 """
51 return definition.RecordClass(**mapping)
54def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
55 """Create a dynamic subclass of `DimensionRecord` for the given element.
57 For internal use by `DimensionRecord`.
58 """
59 from ._schema import DimensionElementFields
61 fields = DimensionElementFields(definition)
62 slots = list(fields.standard.names)
63 if definition.spatial:
64 slots.append("region")
65 if definition.temporal:
66 slots.append(TimespanDatabaseRepresentation.NAME)
67 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
68 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
71class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"):
72 """Base model for a specific serialized record content."""
75_SIMPLE_RECORD_CLASS_CACHE: dict[
76 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
77] = {}
80def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
81 from ._schema import DimensionElementFields
83 # Cache on the definition (which hashes as the name) and the
84 # associated universe.
85 cache_key = (definition, definition.universe)
86 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
87 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
89 fields = DimensionElementFields(definition)
90 members = {}
91 # Prefer strict typing for external data
92 type_map = {
93 str: StrictStr,
94 float: StrictFloat,
95 bool: StrictBool,
96 int: StrictInt,
97 }
99 for field in fields.standard:
100 field_type = field.getPythonType()
101 field_type = type_map.get(field_type, field_type)
102 if field.nullable:
103 field_type = Optional[field_type] # type: ignore
104 members[field.name] = (field_type, ...)
105 if definition.temporal:
106 members["timespan"] = (Tuple[int, int], ...) # type: ignore
107 if definition.spatial:
108 members["region"] = (str, ...)
110 # mypy does not seem to like create_model
111 model = create_model(
112 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
113 __base__=SpecificSerializedDimensionRecord,
114 **members, # type: ignore
115 )
117 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
118 return model
121class SerializedDimensionRecord(_BaseModelCompat):
122 """Simplified model for serializing a `DimensionRecord`."""
124 definition: str = Field(
125 ...,
126 title="Name of dimension associated with this record.",
127 example="exposure",
128 )
130 # Use strict types to prevent casting
131 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
132 ...,
133 title="Dimension record keys and values.",
134 example={
135 "definition": "exposure",
136 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"},
137 },
138 )
140 if not PYDANTIC_V2: 140 ↛ 159line 140 didn't jump to line 159, because the condition on line 140 was never false
142 class Config:
143 """Local configuration overrides for model."""
145 schema_extra = {
146 "example": {
147 "definition": "detector",
148 "record": {
149 "instrument": "HSC",
150 "id": 72,
151 "full_name": "0_01",
152 "name_in_raft": "01",
153 "raft": "0",
154 "purpose": "SCIENCE",
155 },
156 }
157 }
159 @classmethod
160 def direct(
161 cls,
162 *,
163 definition: str,
164 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]],
165 ) -> SerializedDimensionRecord:
166 """Construct a `SerializedDimensionRecord` directly without validators.
168 This differs from the pydantic "construct" method in that the arguments
169 are explicitly what the model requires, and it will recurse through
170 members, constructing them from their corresponding `direct` methods.
172 This method should only be called when the inputs are trusted.
173 """
174 # This method requires tuples as values of the mapping, but JSON
175 # readers will read things in as lists. Be kind and transparently
176 # transform to tuples
177 _recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
179 # Type ignore because the ternary statement seems to confuse mypy
180 # based on conflicting inferred types of v.
181 key = (
182 definition,
183 frozenset(_recItems.items()),
184 )
185 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
186 if cache is not None and (result := cache.get(key)) is not None:
187 return result
189 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore
191 if cache is not None:
192 cache[key] = node
193 return node
196@immutable
197class DimensionRecord:
198 """Base class for the Python representation of database records.
200 Parameters
201 ----------
202 **kwargs
203 Field values for this record. Unrecognized keys are ignored. If this
204 is the record for a `Dimension`, its primary key value may be provided
205 with the actual name of the field (e.g. "id" or "name"), the name of
206 the `Dimension`, or both. If this record class has a "timespan"
207 attribute, "datetime_begin" and "datetime_end" keyword arguments may
208 be provided instead of a single "timespan" keyword argument (but are
209 ignored if a "timespan" argument is provided).
211 Notes
212 -----
213 `DimensionRecord` subclasses are created dynamically for each
214 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
215 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
216 itself is pure abstract, but does not use the `abc` module to indicate this
217 because it does not have overridable methods.
219 Record classes have attributes that correspond exactly to the
220 `~DimensionElementFields.standard` fields in the related database table,
221 plus "region" and "timespan" attributes for spatial and/or temporal
222 elements (respectively).
224 Instances are usually obtained from a `Registry`, but can be constructed
225 directly from Python as well.
227 `DimensionRecord` instances are immutable.
228 """
230 # Derived classes are required to define __slots__ as well, and it's those
231 # derived-class slots that other methods on the base class expect to see
232 # when they access self.__slots__.
233 __slots__ = ("dataId",)
235 _serializedType = SerializedDimensionRecord
237 def __init__(self, **kwargs: Any):
238 # Accept either the dimension name or the actual name of its primary
239 # key field; ensure both are present in the dict for convenience below.
240 if isinstance(self.definition, Dimension):
241 v = kwargs.get(self.definition.primaryKey.name)
242 if v is None:
243 v = kwargs.get(self.definition.name)
244 if v is None:
245 raise ValueError(
246 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
247 )
248 kwargs[self.definition.primaryKey.name] = v
249 else:
250 v2 = kwargs.setdefault(self.definition.name, v)
251 if v != v2:
252 raise ValueError(
253 "Multiple inconsistent values for "
254 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
255 )
256 for name in self.__slots__:
257 object.__setattr__(self, name, kwargs.get(name))
258 if self.definition.temporal is not None and self.timespan is None:
259 object.__setattr__(
260 self,
261 "timespan",
262 Timespan(
263 kwargs.get("datetime_begin"),
264 kwargs.get("datetime_end"),
265 ),
266 )
268 from ._coordinate import DataCoordinate
270 object.__setattr__(
271 self,
272 "dataId",
273 DataCoordinate.fromRequiredValues(
274 self.definition.graph,
275 tuple(kwargs[dimension] for dimension in self.definition.required.names),
276 ),
277 )
279 def __eq__(self, other: Any) -> bool:
280 if type(other) != type(self):
281 return False
282 return self.dataId == other.dataId
284 def __hash__(self) -> int:
285 return hash(self.dataId)
287 def __str__(self) -> str:
288 lines = [f"{self.definition.name}:"]
289 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
290 return "\n".join(lines)
292 def __repr__(self) -> str:
293 return "{}.RecordClass({})".format(
294 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
295 )
297 def __reduce__(self) -> tuple:
298 mapping = {name: getattr(self, name) for name in self.__slots__}
299 return (_reconstructDimensionRecord, (self.definition, mapping))
301 def _repr_html_(self) -> str:
302 """Override the default representation in IPython/Jupyter notebooks.
304 This gives a more readable output that understands embedded newlines.
305 """
306 return f"<pre>{self}<pre>"
308 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
309 """Convert this class to a simple python type.
311 This makes it suitable for serialization.
313 Parameters
314 ----------
315 minimal : `bool`, optional
316 Use minimal serialization. Has no effect on for this class.
318 Returns
319 -------
320 names : `list`
321 The names of the dimensions.
322 """
323 # The DataId is sufficient if you are willing to do a deferred
324 # query. This may not be overly useful since to reconstruct
325 # a collection of records will require repeated registry queries.
326 # For now do not implement minimal form.
328 mapping = {name: getattr(self, name) for name in self.__slots__}
329 # If the item in mapping supports simplification update it
330 for k, v in mapping.items():
331 try:
332 mapping[k] = v.to_simple(minimal=minimal)
333 except AttributeError:
334 if isinstance(v, lsst.sphgeom.Region):
335 # YAML serialization specifies the class when it
336 # doesn't have to. This is partly for explicitness
337 # and also history. Here use a different approach.
338 # This code needs to be migrated to sphgeom
339 mapping[k] = v.encode().hex()
340 if isinstance(v, bytes):
341 # We actually can't handle serializing out to bytes for
342 # hash objects, encode it here to a hex string
343 mapping[k] = v.hex()
344 definition = self.definition.to_simple(minimal=minimal)
345 return SerializedDimensionRecord(definition=definition, record=mapping)
347 @classmethod
348 def from_simple(
349 cls,
350 simple: SerializedDimensionRecord,
351 universe: DimensionUniverse | None = None,
352 registry: Registry | None = None,
353 cacheKey: Hashable | None = None,
354 ) -> DimensionRecord:
355 """Construct a new object from the simplified form.
357 This is generally data returned from the `to_simple`
358 method.
360 Parameters
361 ----------
362 simple : `SerializedDimensionRecord`
363 Value return from `to_simple`.
364 universe : `DimensionUniverse`
365 The special graph of all known dimensions of which this graph will
366 be a subset. Can be `None` if `Registry` is provided.
367 registry : `lsst.daf.butler.Registry`, optional
368 Registry from which a universe can be extracted. Can be `None`
369 if universe is provided explicitly.
370 cacheKey : `Hashable` or `None`
371 If this is not None, it will be used as a key for any cached
372 reconstruction instead of calculating a value from the serialized
373 format.
375 Returns
376 -------
377 record : `DimensionRecord`
378 Newly-constructed object.
379 """
380 if universe is None and registry is None:
381 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
382 if universe is None and registry is not None:
383 universe = registry.dimensions
384 if universe is None:
385 # this is for mypy
386 raise ValueError("Unable to determine a usable universe")
387 # Type ignore because the ternary statement seems to confuse mypy
388 # based on conflicting inferred types of v.
389 key = cacheKey or (
390 simple.definition,
391 frozenset(simple.record.items()), # type: ignore
392 )
393 cache = PersistenceContextVars.dimensionRecords.get()
394 if cache is not None and (result := cache.get(key)) is not None:
395 return result
397 definition = DimensionElement.from_simple(simple.definition, universe=universe)
399 # Create a specialist subclass model with type validation.
400 # This allows us to do simple checks of external data (possibly
401 # sent as JSON) since for now _reconstructDimensionRecord does not
402 # do any validation.
403 record_model_cls = _createSimpleRecordSubclass(definition)
404 record_model = record_model_cls(**simple.record)
406 # Timespan and region have to be converted to native form
407 # for now assume that those keys are special
408 rec = record_model.dict()
410 if (ts := "timespan") in rec:
411 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
412 if (reg := "region") in rec:
413 encoded = bytes.fromhex(rec[reg])
414 rec[reg] = lsst.sphgeom.Region.decode(encoded)
415 if (hsh := "hash") in rec:
416 rec[hsh] = bytes.fromhex(rec[hsh].decode())
418 dimRec = _reconstructDimensionRecord(definition, rec)
419 if cache is not None:
420 cache[key] = dimRec
421 return dimRec
423 to_json = to_json_pydantic
424 from_json: ClassVar = classmethod(from_json_pydantic)
426 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
427 """Return a vanilla `dict` representation of this record.
429 Parameters
430 ----------
431 splitTimespan : `bool`, optional
432 If `True` (`False` is default) transform any "timespan" key value
433 from a `Timespan` instance into a pair of regular
434 ("datetime_begin", "datetime_end") fields.
435 """
436 results = {name: getattr(self, name) for name in self.__slots__}
437 if splitTimespan:
438 timespan = results.pop("timespan", None)
439 if timespan is not None:
440 results["datetime_begin"] = timespan.begin
441 results["datetime_end"] = timespan.end
442 return results
444 # DimensionRecord subclasses are dynamically created, so static type
445 # checkers can't know about them or their attributes. To avoid having to
446 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
447 # checkers not to worry about missing attributes.
448 def __getattr__(self, name: str) -> Any:
449 raise AttributeError(name)
451 # Class attributes below are shadowed by instance attributes, and are
452 # present just to hold the docstrings for those instance attributes.
454 dataId: DataCoordinate
455 """A dict-like identifier for this record's primary keys
456 (`DataCoordinate`).
457 """
459 definition: ClassVar[DimensionElement]
460 """The `DimensionElement` whose records this class represents
461 (`DimensionElement`).
462 """
464 fields: ClassVar[DimensionElementFields]
465 """A categorized view of the fields in this class
466 (`DimensionElementFields`).
467 """