Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 21%
165 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 10:56 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
28import lsst.sphgeom
29from lsst.utils.classes import immutable
31try:
32 from pydantic.v1 import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
33except ModuleNotFoundError:
34 from pydantic import ( # type: ignore
35 BaseModel,
36 Field,
37 StrictBool,
38 StrictFloat,
39 StrictInt,
40 StrictStr,
41 create_model,
42 )
44from ..json import from_json_pydantic, to_json_pydantic
45from ..persistenceContext import PersistenceContextVars
46from ..timespan import Timespan, TimespanDatabaseRepresentation
47from ._elements import Dimension, DimensionElement
49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
50 from ...registry import Registry
51 from ._coordinate import DataCoordinate
52 from ._graph import DimensionUniverse
53 from ._schema import DimensionElementFields
56def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
57 """Unpickle implementation for `DimensionRecord` subclasses.
59 For internal use by `DimensionRecord`.
60 """
61 return definition.RecordClass(**mapping)
64def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
65 """Create a dynamic subclass of `DimensionRecord` for the given element.
67 For internal use by `DimensionRecord`.
68 """
69 from ._schema import DimensionElementFields
71 fields = DimensionElementFields(definition)
72 slots = list(fields.standard.names)
73 if definition.spatial:
74 slots.append("region")
75 if definition.temporal:
76 slots.append(TimespanDatabaseRepresentation.NAME)
77 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
78 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
81class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"):
82 """Base model for a specific serialized record content."""
85_SIMPLE_RECORD_CLASS_CACHE: dict[
86 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
87] = {}
90def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
91 from ._schema import DimensionElementFields
93 # Cache on the definition (which hashes as the name) and the
94 # associated universe.
95 cache_key = (definition, definition.universe)
96 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
97 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
99 fields = DimensionElementFields(definition)
100 members = {}
101 # Prefer strict typing for external data
102 type_map = {
103 str: StrictStr,
104 float: StrictFloat,
105 bool: StrictBool,
106 int: StrictInt,
107 }
109 for field in fields.standard:
110 field_type = field.getPythonType()
111 field_type = type_map.get(field_type, field_type)
112 if field.nullable:
113 field_type = Optional[field_type] # type: ignore
114 members[field.name] = (field_type, ...)
115 if definition.temporal:
116 members["timespan"] = (Tuple[int, int], ...) # type: ignore
117 if definition.spatial:
118 members["region"] = (str, ...)
120 # mypy does not seem to like create_model
121 model = create_model(
122 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
123 __base__=SpecificSerializedDimensionRecord,
124 **members, # type: ignore
125 )
127 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
128 return model
131class SerializedDimensionRecord(BaseModel):
132 """Simplified model for serializing a `DimensionRecord`."""
134 definition: str = Field(
135 ...,
136 title="Name of dimension associated with this record.",
137 example="exposure",
138 )
140 # Use strict types to prevent casting
141 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
142 ...,
143 title="Dimension record keys and values.",
144 example={
145 "definition": "exposure",
146 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"},
147 },
148 )
150 class Config:
151 """Local configuration overrides for model."""
153 schema_extra = {
154 "example": {
155 "definition": "detector",
156 "record": {
157 "instrument": "HSC",
158 "id": 72,
159 "full_name": "0_01",
160 "name_in_raft": "01",
161 "raft": "0",
162 "purpose": "SCIENCE",
163 },
164 }
165 }
167 @classmethod
168 def direct(
169 cls,
170 *,
171 definition: str,
172 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]],
173 ) -> SerializedDimensionRecord:
174 """Construct a `SerializedDimensionRecord` directly without validators.
176 This differs from the pydantic "construct" method in that the arguments
177 are explicitly what the model requires, and it will recurse through
178 members, constructing them from their corresponding `direct` methods.
180 This method should only be called when the inputs are trusted.
181 """
182 _recItems = record.items()
183 # Type ignore because the ternary statement seems to confuse mypy
184 # based on conflicting inferred types of v.
185 key = (
186 definition,
187 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
188 )
189 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
190 if cache is not None and (result := cache.get(key)) is not None:
191 return result
192 node = SerializedDimensionRecord.__new__(cls)
193 setter = object.__setattr__
194 setter(node, "definition", definition)
195 # This method requires tuples as values of the mapping, but JSON
196 # readers will read things in as lists. Be kind and transparently
197 # transform to tuples
198 setter(
199 node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
200 )
201 setter(node, "__fields_set__", {"definition", "record"})
202 if cache is not None:
203 cache[key] = node
204 return node
207@immutable
208class DimensionRecord:
209 """Base class for the Python representation of database records.
211 Parameters
212 ----------
213 **kwargs
214 Field values for this record. Unrecognized keys are ignored. If this
215 is the record for a `Dimension`, its primary key value may be provided
216 with the actual name of the field (e.g. "id" or "name"), the name of
217 the `Dimension`, or both. If this record class has a "timespan"
218 attribute, "datetime_begin" and "datetime_end" keyword arguments may
219 be provided instead of a single "timespan" keyword argument (but are
220 ignored if a "timespan" argument is provided).
222 Notes
223 -----
224 `DimensionRecord` subclasses are created dynamically for each
225 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
226 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
227 itself is pure abstract, but does not use the `abc` module to indicate this
228 because it does not have overridable methods.
230 Record classes have attributes that correspond exactly to the
231 `~DimensionElementFields.standard` fields in the related database table,
232 plus "region" and "timespan" attributes for spatial and/or temporal
233 elements (respectively).
235 Instances are usually obtained from a `Registry`, but can be constructed
236 directly from Python as well.
238 `DimensionRecord` instances are immutable.
239 """
241 # Derived classes are required to define __slots__ as well, and it's those
242 # derived-class slots that other methods on the base class expect to see
243 # when they access self.__slots__.
244 __slots__ = ("dataId",)
246 _serializedType = SerializedDimensionRecord
248 def __init__(self, **kwargs: Any):
249 # Accept either the dimension name or the actual name of its primary
250 # key field; ensure both are present in the dict for convenience below.
251 if isinstance(self.definition, Dimension):
252 v = kwargs.get(self.definition.primaryKey.name)
253 if v is None:
254 v = kwargs.get(self.definition.name)
255 if v is None:
256 raise ValueError(
257 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
258 )
259 kwargs[self.definition.primaryKey.name] = v
260 else:
261 v2 = kwargs.setdefault(self.definition.name, v)
262 if v != v2:
263 raise ValueError(
264 "Multiple inconsistent values for "
265 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
266 )
267 for name in self.__slots__:
268 object.__setattr__(self, name, kwargs.get(name))
269 if self.definition.temporal is not None:
270 if self.timespan is None:
271 object.__setattr__(
272 self,
273 "timespan",
274 Timespan(
275 kwargs.get("datetime_begin"),
276 kwargs.get("datetime_end"),
277 ),
278 )
280 from ._coordinate import DataCoordinate
282 object.__setattr__(
283 self,
284 "dataId",
285 DataCoordinate.fromRequiredValues(
286 self.definition.graph,
287 tuple(kwargs[dimension] for dimension in self.definition.required.names),
288 ),
289 )
291 def __eq__(self, other: Any) -> bool:
292 if type(other) != type(self):
293 return False
294 return self.dataId == other.dataId
296 def __hash__(self) -> int:
297 return hash(self.dataId)
299 def __str__(self) -> str:
300 lines = [f"{self.definition.name}:"]
301 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
302 return "\n".join(lines)
304 def __repr__(self) -> str:
305 return "{}.RecordClass({})".format(
306 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
307 )
309 def __reduce__(self) -> tuple:
310 mapping = {name: getattr(self, name) for name in self.__slots__}
311 return (_reconstructDimensionRecord, (self.definition, mapping))
313 def _repr_html_(self) -> str:
314 """Override the default representation in IPython/Jupyter notebooks.
316 This gives a more readable output that understands embedded newlines.
317 """
318 return f"<pre>{self}<pre>"
320 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
321 """Convert this class to a simple python type.
323 This makes it suitable for serialization.
325 Parameters
326 ----------
327 minimal : `bool`, optional
328 Use minimal serialization. Has no effect on for this class.
330 Returns
331 -------
332 names : `list`
333 The names of the dimensions.
334 """
335 # The DataId is sufficient if you are willing to do a deferred
336 # query. This may not be overly useful since to reconstruct
337 # a collection of records will require repeated registry queries.
338 # For now do not implement minimal form.
340 mapping = {name: getattr(self, name) for name in self.__slots__}
341 # If the item in mapping supports simplification update it
342 for k, v in mapping.items():
343 try:
344 mapping[k] = v.to_simple(minimal=minimal)
345 except AttributeError:
346 if isinstance(v, lsst.sphgeom.Region):
347 # YAML serialization specifies the class when it
348 # doesn't have to. This is partly for explicitness
349 # and also history. Here use a different approach.
350 # This code needs to be migrated to sphgeom
351 mapping[k] = v.encode().hex()
352 if isinstance(v, bytes):
353 # We actually can't handle serializing out to bytes for
354 # hash objects, encode it here to a hex string
355 mapping[k] = v.hex()
356 definition = self.definition.to_simple(minimal=minimal)
357 return SerializedDimensionRecord(definition=definition, record=mapping)
359 @classmethod
360 def from_simple(
361 cls,
362 simple: SerializedDimensionRecord,
363 universe: DimensionUniverse | None = None,
364 registry: Registry | None = None,
365 ) -> DimensionRecord:
366 """Construct a new object from the simplified form.
368 This is generally data returned from the `to_simple`
369 method.
371 Parameters
372 ----------
373 simple : `SerializedDimensionRecord`
374 Value return from `to_simple`.
375 universe : `DimensionUniverse`
376 The special graph of all known dimensions of which this graph will
377 be a subset. Can be `None` if `Registry` is provided.
378 registry : `lsst.daf.butler.Registry`, optional
379 Registry from which a universe can be extracted. Can be `None`
380 if universe is provided explicitly.
382 Returns
383 -------
384 record : `DimensionRecord`
385 Newly-constructed object.
386 """
387 if universe is None and registry is None:
388 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
389 if universe is None and registry is not None:
390 universe = registry.dimensions
391 if universe is None:
392 # this is for mypy
393 raise ValueError("Unable to determine a usable universe")
394 _recItems = simple.record.items()
395 # Type ignore because the ternary statement seems to confuse mypy
396 # based on conflicting inferred types of v.
397 key = (
398 simple.definition,
399 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
400 )
401 cache = PersistenceContextVars.dimensionRecords.get()
402 if cache is not None and (result := cache.get(key)) is not None:
403 return result
405 definition = DimensionElement.from_simple(simple.definition, universe=universe)
407 # Create a specialist subclass model with type validation.
408 # This allows us to do simple checks of external data (possibly
409 # sent as JSON) since for now _reconstructDimensionRecord does not
410 # do any validation.
411 record_model_cls = _createSimpleRecordSubclass(definition)
412 record_model = record_model_cls(**simple.record)
414 # Timespan and region have to be converted to native form
415 # for now assume that those keys are special
416 rec = record_model.dict()
418 if (ts := "timespan") in rec:
419 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
420 if (reg := "region") in rec:
421 encoded = bytes.fromhex(rec[reg])
422 rec[reg] = lsst.sphgeom.Region.decode(encoded)
423 if (hsh := "hash") in rec:
424 rec[hsh] = bytes.fromhex(rec[hsh].decode())
426 dimRec = _reconstructDimensionRecord(definition, rec)
427 if cache is not None:
428 cache[key] = dimRec
429 return dimRec
431 to_json = to_json_pydantic
432 from_json: ClassVar = classmethod(from_json_pydantic)
434 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
435 """Return a vanilla `dict` representation of this record.
437 Parameters
438 ----------
439 splitTimespan : `bool`, optional
440 If `True` (`False` is default) transform any "timespan" key value
441 from a `Timespan` instance into a pair of regular
442 ("datetime_begin", "datetime_end") fields.
443 """
444 results = {name: getattr(self, name) for name in self.__slots__}
445 if splitTimespan:
446 timespan = results.pop("timespan", None)
447 if timespan is not None:
448 results["datetime_begin"] = timespan.begin
449 results["datetime_end"] = timespan.end
450 return results
452 # DimensionRecord subclasses are dynamically created, so static type
453 # checkers can't know about them or their attributes. To avoid having to
454 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
455 # checkers not to worry about missing attributes.
456 def __getattr__(self, name: str) -> Any:
457 raise AttributeError(name)
459 # Class attributes below are shadowed by instance attributes, and are
460 # present just to hold the docstrings for those instance attributes.
462 dataId: DataCoordinate
463 """A dict-like identifier for this record's primary keys
464 (`DataCoordinate`).
465 """
467 definition: ClassVar[DimensionElement]
468 """The `DimensionElement` whose records this class represents
469 (`DimensionElement`).
470 """
472 fields: ClassVar[DimensionElementFields]
473 """A categorized view of the fields in this class
474 (`DimensionElementFields`).
475 """