Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 25%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from typing import (
27 Any,
28 ClassVar,
29 Dict,
30 Optional,
31 Tuple,
32 TYPE_CHECKING,
33 Type,
34 Union,
35)
36from pydantic import BaseModel, create_model, StrictStr, StrictInt, StrictBool, StrictFloat, Field
38import lsst.sphgeom
40from .._topology import SpatialRegionDatabaseRepresentation
41from ..timespan import Timespan, TimespanDatabaseRepresentation
42from ..utils import immutable
43from ._elements import Dimension, DimensionElement
44from ..json import from_json_pydantic, to_json_pydantic
46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ._coordinate import DataCoordinate
48 from ._schema import DimensionElementFields
49 from ._graph import DimensionUniverse
50 from ...registry import Registry
53def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord:
54 """Unpickle implementation for `DimensionRecord` subclasses.
56 For internal use by `DimensionRecord`.
57 """
58 return definition.RecordClass(**mapping)
61def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]:
62 """Create a dynamic subclass of `DimensionRecord` for the given element.
64 For internal use by `DimensionRecord`.
65 """
66 from ._schema import DimensionElementFields
67 fields = DimensionElementFields(definition)
68 slots = list(fields.standard.names)
69 if definition.spatial:
70 slots.append(SpatialRegionDatabaseRepresentation.NAME)
71 if definition.temporal:
72 slots.append(TimespanDatabaseRepresentation.NAME)
73 d = {
74 "definition": definition,
75 "__slots__": tuple(slots),
76 "fields": fields
77 }
78 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
81class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"):
82 """Base model for a specific serialized record content."""
85_SIMPLE_RECORD_CLASS_CACHE: Dict[Tuple[DimensionElement, DimensionUniverse],
86 Type[SpecificSerializedDimensionRecord]] = {}
89def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]:
90 from ._schema import DimensionElementFields
91 # Cache on the definition (which hashes as the name) and the
92 # associated universe.
93 cache_key = (definition, definition.universe)
94 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
95 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
97 fields = DimensionElementFields(definition)
98 members = {}
99 # Prefer strict typing for external data
100 type_map = {str: StrictStr,
101 float: StrictFloat,
102 bool: StrictBool,
103 int: StrictInt,
104 }
106 for field in fields.standard:
107 field_type = field.getPythonType()
108 field_type = type_map.get(field_type, field_type)
109 if field.nullable:
110 field_type = Optional[field_type] # type: ignore
111 members[field.name] = (field_type, ...)
112 if definition.temporal:
113 members["timespan"] = (Tuple[int, int], ...) # type: ignore
114 if definition.spatial:
115 members["region"] = (str, ...)
117 # mypy does not seem to like create_model
118 model = create_model(f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
119 __base__=SpecificSerializedDimensionRecord, **members) # type: ignore
121 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
122 return model
125class SerializedDimensionRecord(BaseModel):
126 """Simplified model for serializing a `DimensionRecord`."""
128 definition: str = Field(
129 ...,
130 title="Name of dimension associated with this record.",
131 example="exposure",
132 )
134 # Use strict types to prevent casting
135 record: Dict[str,
136 Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field(
137 ...,
138 title="Dimension record keys and values.",
139 example={"definition": "exposure",
140 "record": {"instrument": "LATISS",
141 "exposure": 2021050300044,
142 "obs_id": "AT_O_20210503_00044"}},
143 )
145 class Config:
146 """Local configuration overrides for model."""
148 schema_extra = {
149 "example": {
150 "definition": "detector",
151 "record": {
152 "instrument": "HSC",
153 "id": 72,
154 "full_name": "0_01",
155 "name_in_raft": "01",
156 "raft": "0",
157 "purpose": "SCIENCE",
158 }
159 }
160 }
163@immutable
164class DimensionRecord:
165 """Base class for the Python representation of database records.
167 Parameters
168 ----------
169 **kwargs
170 Field values for this record. Unrecognized keys are ignored. If this
171 is the record for a `Dimension`, its primary key value may be provided
172 with the actual name of the field (e.g. "id" or "name"), the name of
173 the `Dimension`, or both. If this record class has a "timespan"
174 attribute, "datetime_begin" and "datetime_end" keyword arguments may
175 be provided instead of a single "timespan" keyword argument (but are
176 ignored if a "timespan" argument is provided).
178 Notes
179 -----
180 `DimensionRecord` subclasses are created dynamically for each
181 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
182 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
183 itself is pure abstract, but does not use the `abc` module to indicate this
184 because it does not have overridable methods.
186 Record classes have attributes that correspond exactly to the
187 `~DimensionElementFields.standard` fields in the related database table,
188 plus "region" and "timespan" attributes for spatial and/or temporal
189 elements (respectively).
191 Instances are usually obtained from a `Registry`, but can be constructed
192 directly from Python as well.
194 `DimensionRecord` instances are immutable.
195 """
197 # Derived classes are required to define __slots__ as well, and it's those
198 # derived-class slots that other methods on the base class expect to see
199 # when they access self.__slots__.
200 __slots__ = ("dataId",)
202 _serializedType = SerializedDimensionRecord
204 def __init__(self, **kwargs: Any):
205 # Accept either the dimension name or the actual name of its primary
206 # key field; ensure both are present in the dict for convenience below.
207 if isinstance(self.definition, Dimension):
208 v = kwargs.get(self.definition.primaryKey.name)
209 if v is None:
210 v = kwargs.get(self.definition.name)
211 if v is None:
212 raise ValueError(
213 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
214 )
215 kwargs[self.definition.primaryKey.name] = v
216 else:
217 v2 = kwargs.setdefault(self.definition.name, v)
218 if v != v2:
219 raise ValueError(
220 f"Multiple inconsistent values for "
221 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
222 )
223 for name in self.__slots__:
224 object.__setattr__(self, name, kwargs.get(name))
225 if self.definition.temporal is not None:
226 if self.timespan is None:
227 object.__setattr__(
228 self,
229 "timespan",
230 Timespan(
231 kwargs.get("datetime_begin"),
232 kwargs.get("datetime_end"),
233 )
234 )
236 from ._coordinate import DataCoordinate
237 object.__setattr__(
238 self,
239 "dataId",
240 DataCoordinate.fromRequiredValues(
241 self.definition.graph,
242 tuple(kwargs[dimension] for dimension in self.definition.required.names)
243 )
244 )
246 def __eq__(self, other: Any) -> bool:
247 if type(other) != type(self):
248 return False
249 return self.dataId == other.dataId
251 def __hash__(self) -> int:
252 return hash(self.dataId)
254 def __str__(self) -> str:
255 lines = [f"{self.definition.name}:"]
256 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
257 return "\n".join(lines)
259 def __repr__(self) -> str:
260 return "{}.RecordClass({})".format(
261 self.definition.name,
262 ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
263 )
265 def __reduce__(self) -> tuple:
266 mapping = {name: getattr(self, name) for name in self.__slots__}
267 return (_reconstructDimensionRecord, (self.definition, mapping))
269 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
270 """Convert this class to a simple python type.
272 This makes it suitable for serialization.
274 Parameters
275 ----------
276 minimal : `bool`, optional
277 Use minimal serialization. Has no effect on for this class.
279 Returns
280 -------
281 names : `list`
282 The names of the dimensions.
283 """
284 # The DataId is sufficient if you are willing to do a deferred
285 # query. This may not be overly useful since to reconstruct
286 # a collection of records will require repeated registry queries.
287 # For now do not implement minimal form.
289 mapping = {name: getattr(self, name) for name in self.__slots__}
290 # If the item in mapping supports simplification update it
291 for k, v in mapping.items():
292 try:
293 mapping[k] = v.to_simple(minimal=minimal)
294 except AttributeError:
295 if isinstance(v, lsst.sphgeom.Region):
296 # YAML serialization specifies the class when it
297 # doesn't have to. This is partly for explicitness
298 # and also history. Here use a different approach.
299 # This code needs to be migrated to sphgeom
300 mapping[k] = v.encode().hex()
302 definition = self.definition.to_simple(minimal=minimal)
303 return SerializedDimensionRecord(definition=definition, record=mapping)
305 @classmethod
306 def from_simple(cls, simple: SerializedDimensionRecord,
307 universe: Optional[DimensionUniverse] = None,
308 registry: Optional[Registry] = None) -> DimensionRecord:
309 """Construct a new object from the simplified form.
311 This is generally data returned from the `to_simple`
312 method.
314 Parameters
315 ----------
316 simple : `SerializedDimensionRecord`
317 Value return from `to_simple`.
318 universe : `DimensionUniverse`
319 The special graph of all known dimensions of which this graph will
320 be a subset. Can be `None` if `Registry` is provided.
321 registry : `lsst.daf.butler.Registry`, optional
322 Registry from which a universe can be extracted. Can be `None`
323 if universe is provided explicitly.
325 Returns
326 -------
327 record : `DimensionRecord`
328 Newly-constructed object.
329 """
330 if universe is None and registry is None:
331 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
332 if universe is None and registry is not None:
333 universe = registry.dimensions
334 if universe is None:
335 # this is for mypy
336 raise ValueError("Unable to determine a usable universe")
338 definition = DimensionElement.from_simple(simple.definition, universe=universe)
340 # Create a specialist subclass model with type validation.
341 # This allows us to do simple checks of external data (possibly
342 # sent as JSON) since for now _reconstructDimensionRecord does not
343 # do any validation.
344 record_model_cls = _createSimpleRecordSubclass(definition)
345 record_model = record_model_cls(**simple.record)
347 # Timespan and region have to be converted to native form
348 # for now assume that those keys are special
349 rec = record_model.dict()
351 if (ts := "timespan") in rec:
352 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
353 if (reg := "region") in rec:
354 encoded = bytes.fromhex(rec[reg])
355 rec[reg] = lsst.sphgeom.Region.decode(encoded)
357 return _reconstructDimensionRecord(definition, rec)
359 to_json = to_json_pydantic
360 from_json = classmethod(from_json_pydantic)
362 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]:
363 """Return a vanilla `dict` representation of this record.
365 Parameters
366 ----------
367 splitTimespan : `bool`, optional
368 If `True` (`False` is default) transform any "timespan" key value
369 from a `Timespan` instance into a pair of regular
370 ("datetime_begin", "datetime_end") fields.
371 """
372 results = {name: getattr(self, name) for name in self.__slots__}
373 if splitTimespan:
374 timespan = results.pop("timespan", None)
375 if timespan is not None:
376 results["datetime_begin"] = timespan.begin
377 results["datetime_end"] = timespan.end
378 return results
380 # DimensionRecord subclasses are dynamically created, so static type
381 # checkers can't know about them or their attributes. To avoid having to
382 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
383 # checkers not to worry about missing attributes.
384 def __getattr__(self, name: str) -> Any:
385 raise AttributeError(name)
387 # Class attributes below are shadowed by instance attributes, and are
388 # present just to hold the docstrings for those instance attributes.
390 dataId: DataCoordinate
391 """A dict-like identifier for this record's primary keys
392 (`DataCoordinate`).
393 """
395 definition: ClassVar[DimensionElement]
396 """The `DimensionElement` whose records this class represents
397 (`DimensionElement`).
398 """
400 fields: ClassVar[DimensionElementFields]
401 """A categorized view of the fields in this class
402 (`DimensionElementFields`).
403 """