Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 24%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DimensionRecord", "SerializedDimensionRecord")
26from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Tuple, Type, Union
28import lsst.sphgeom
29from lsst.utils.classes import immutable
30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
32from .._topology import SpatialRegionDatabaseRepresentation
33from ..json import from_json_pydantic, to_json_pydantic
34from ..timespan import Timespan, TimespanDatabaseRepresentation
35from ._elements import Dimension, DimensionElement
37if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true
38 from ...registry import Registry
39 from ._coordinate import DataCoordinate
40 from ._graph import DimensionUniverse
41 from ._schema import DimensionElementFields
44def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord:
45 """Unpickle implementation for `DimensionRecord` subclasses.
47 For internal use by `DimensionRecord`.
48 """
49 return definition.RecordClass(**mapping)
52def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]:
53 """Create a dynamic subclass of `DimensionRecord` for the given element.
55 For internal use by `DimensionRecord`.
56 """
57 from ._schema import DimensionElementFields
59 fields = DimensionElementFields(definition)
60 slots = list(fields.standard.names)
61 if definition.spatial:
62 slots.append(SpatialRegionDatabaseRepresentation.NAME)
63 if definition.temporal:
64 slots.append(TimespanDatabaseRepresentation.NAME)
65 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
66 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
69class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"):
70 """Base model for a specific serialized record content."""
73_SIMPLE_RECORD_CLASS_CACHE: Dict[
74 Tuple[DimensionElement, DimensionUniverse], Type[SpecificSerializedDimensionRecord]
75] = {}
78def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]:
79 from ._schema import DimensionElementFields
81 # Cache on the definition (which hashes as the name) and the
82 # associated universe.
83 cache_key = (definition, definition.universe)
84 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
85 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
87 fields = DimensionElementFields(definition)
88 members = {}
89 # Prefer strict typing for external data
90 type_map = {
91 str: StrictStr,
92 float: StrictFloat,
93 bool: StrictBool,
94 int: StrictInt,
95 }
97 for field in fields.standard:
98 field_type = field.getPythonType()
99 field_type = type_map.get(field_type, field_type)
100 if field.nullable:
101 field_type = Optional[field_type] # type: ignore
102 members[field.name] = (field_type, ...)
103 if definition.temporal:
104 members["timespan"] = (Tuple[int, int], ...) # type: ignore
105 if definition.spatial:
106 members["region"] = (str, ...)
108 # mypy does not seem to like create_model
109 model = create_model(
110 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
111 __base__=SpecificSerializedDimensionRecord,
112 **members, # type: ignore
113 )
115 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
116 return model
119class SerializedDimensionRecord(BaseModel):
120 """Simplified model for serializing a `DimensionRecord`."""
122 definition: str = Field(
123 ...,
124 title="Name of dimension associated with this record.",
125 example="exposure",
126 )
128 # Use strict types to prevent casting
129 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field(
130 ...,
131 title="Dimension record keys and values.",
132 example={
133 "definition": "exposure",
134 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"},
135 },
136 )
138 class Config:
139 """Local configuration overrides for model."""
141 schema_extra = {
142 "example": {
143 "definition": "detector",
144 "record": {
145 "instrument": "HSC",
146 "id": 72,
147 "full_name": "0_01",
148 "name_in_raft": "01",
149 "raft": "0",
150 "purpose": "SCIENCE",
151 },
152 }
153 }
155 @classmethod
156 def direct(
157 cls,
158 *,
159 definition: str,
160 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]],
161 ) -> SerializedDimensionRecord:
162 """Construct a `SerializedDimensionRecord` directly without validators.
164 This differs from the pydantic "construct" method in that the arguments
165 are explicitly what the model requires, and it will recurse through
166 members, constructing them from their corresponding `direct` methods.
168 This method should only be called when the inputs are trusted.
169 """
170 node = cls.construct(definition=definition, record=record)
171 node = SerializedDimensionRecord.__new__(cls)
172 setter = object.__setattr__
173 setter(node, "definition", definition)
174 # This method requires tuples as values of the mapping, but JSON
175 # readers will read things in as lists. Be kind and transparently
176 # transform to tuples
177 setter(
178 node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
179 )
180 setter(node, "__fields_set__", {"definition", "record"})
181 return node
184@immutable
185class DimensionRecord:
186 """Base class for the Python representation of database records.
188 Parameters
189 ----------
190 **kwargs
191 Field values for this record. Unrecognized keys are ignored. If this
192 is the record for a `Dimension`, its primary key value may be provided
193 with the actual name of the field (e.g. "id" or "name"), the name of
194 the `Dimension`, or both. If this record class has a "timespan"
195 attribute, "datetime_begin" and "datetime_end" keyword arguments may
196 be provided instead of a single "timespan" keyword argument (but are
197 ignored if a "timespan" argument is provided).
199 Notes
200 -----
201 `DimensionRecord` subclasses are created dynamically for each
202 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
203 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
204 itself is pure abstract, but does not use the `abc` module to indicate this
205 because it does not have overridable methods.
207 Record classes have attributes that correspond exactly to the
208 `~DimensionElementFields.standard` fields in the related database table,
209 plus "region" and "timespan" attributes for spatial and/or temporal
210 elements (respectively).
212 Instances are usually obtained from a `Registry`, but can be constructed
213 directly from Python as well.
215 `DimensionRecord` instances are immutable.
216 """
218 # Derived classes are required to define __slots__ as well, and it's those
219 # derived-class slots that other methods on the base class expect to see
220 # when they access self.__slots__.
221 __slots__ = ("dataId",)
223 _serializedType = SerializedDimensionRecord
225 def __init__(self, **kwargs: Any):
226 # Accept either the dimension name or the actual name of its primary
227 # key field; ensure both are present in the dict for convenience below.
228 if isinstance(self.definition, Dimension):
229 v = kwargs.get(self.definition.primaryKey.name)
230 if v is None:
231 v = kwargs.get(self.definition.name)
232 if v is None:
233 raise ValueError(
234 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
235 )
236 kwargs[self.definition.primaryKey.name] = v
237 else:
238 v2 = kwargs.setdefault(self.definition.name, v)
239 if v != v2:
240 raise ValueError(
241 f"Multiple inconsistent values for "
242 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
243 )
244 for name in self.__slots__:
245 object.__setattr__(self, name, kwargs.get(name))
246 if self.definition.temporal is not None:
247 if self.timespan is None:
248 object.__setattr__(
249 self,
250 "timespan",
251 Timespan(
252 kwargs.get("datetime_begin"),
253 kwargs.get("datetime_end"),
254 ),
255 )
257 from ._coordinate import DataCoordinate
259 object.__setattr__(
260 self,
261 "dataId",
262 DataCoordinate.fromRequiredValues(
263 self.definition.graph,
264 tuple(kwargs[dimension] for dimension in self.definition.required.names),
265 ),
266 )
268 def __eq__(self, other: Any) -> bool:
269 if type(other) != type(self):
270 return False
271 return self.dataId == other.dataId
273 def __hash__(self) -> int:
274 return hash(self.dataId)
276 def __str__(self) -> str:
277 lines = [f"{self.definition.name}:"]
278 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
279 return "\n".join(lines)
281 def __repr__(self) -> str:
282 return "{}.RecordClass({})".format(
283 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
284 )
286 def __reduce__(self) -> tuple:
287 mapping = {name: getattr(self, name) for name in self.__slots__}
288 return (_reconstructDimensionRecord, (self.definition, mapping))
290 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
291 """Convert this class to a simple python type.
293 This makes it suitable for serialization.
295 Parameters
296 ----------
297 minimal : `bool`, optional
298 Use minimal serialization. Has no effect on for this class.
300 Returns
301 -------
302 names : `list`
303 The names of the dimensions.
304 """
305 # The DataId is sufficient if you are willing to do a deferred
306 # query. This may not be overly useful since to reconstruct
307 # a collection of records will require repeated registry queries.
308 # For now do not implement minimal form.
310 mapping = {name: getattr(self, name) for name in self.__slots__}
311 # If the item in mapping supports simplification update it
312 for k, v in mapping.items():
313 try:
314 mapping[k] = v.to_simple(minimal=minimal)
315 except AttributeError:
316 if isinstance(v, lsst.sphgeom.Region):
317 # YAML serialization specifies the class when it
318 # doesn't have to. This is partly for explicitness
319 # and also history. Here use a different approach.
320 # This code needs to be migrated to sphgeom
321 mapping[k] = v.encode().hex()
322 if isinstance(v, bytes):
323 # We actually can't handle serializing out to bytes for
324 # hash objects, encode it here to a hex string
325 mapping[k] = v.hex()
326 definition = self.definition.to_simple(minimal=minimal)
327 return SerializedDimensionRecord(definition=definition, record=mapping)
329 @classmethod
330 def from_simple(
331 cls,
332 simple: SerializedDimensionRecord,
333 universe: Optional[DimensionUniverse] = None,
334 registry: Optional[Registry] = None,
335 ) -> DimensionRecord:
336 """Construct a new object from the simplified form.
338 This is generally data returned from the `to_simple`
339 method.
341 Parameters
342 ----------
343 simple : `SerializedDimensionRecord`
344 Value return from `to_simple`.
345 universe : `DimensionUniverse`
346 The special graph of all known dimensions of which this graph will
347 be a subset. Can be `None` if `Registry` is provided.
348 registry : `lsst.daf.butler.Registry`, optional
349 Registry from which a universe can be extracted. Can be `None`
350 if universe is provided explicitly.
352 Returns
353 -------
354 record : `DimensionRecord`
355 Newly-constructed object.
356 """
357 if universe is None and registry is None:
358 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
359 if universe is None and registry is not None:
360 universe = registry.dimensions
361 if universe is None:
362 # this is for mypy
363 raise ValueError("Unable to determine a usable universe")
365 definition = DimensionElement.from_simple(simple.definition, universe=universe)
367 # Create a specialist subclass model with type validation.
368 # This allows us to do simple checks of external data (possibly
369 # sent as JSON) since for now _reconstructDimensionRecord does not
370 # do any validation.
371 record_model_cls = _createSimpleRecordSubclass(definition)
372 record_model = record_model_cls(**simple.record)
374 # Timespan and region have to be converted to native form
375 # for now assume that those keys are special
376 rec = record_model.dict()
378 if (ts := "timespan") in rec:
379 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
380 if (reg := "region") in rec:
381 encoded = bytes.fromhex(rec[reg])
382 rec[reg] = lsst.sphgeom.Region.decode(encoded)
383 if (hsh := "hash") in rec:
384 rec[hsh] = bytes.fromhex(rec[hsh].decode())
386 return _reconstructDimensionRecord(definition, rec)
388 to_json = to_json_pydantic
389 from_json = classmethod(from_json_pydantic)
391 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]:
392 """Return a vanilla `dict` representation of this record.
394 Parameters
395 ----------
396 splitTimespan : `bool`, optional
397 If `True` (`False` is default) transform any "timespan" key value
398 from a `Timespan` instance into a pair of regular
399 ("datetime_begin", "datetime_end") fields.
400 """
401 results = {name: getattr(self, name) for name in self.__slots__}
402 if splitTimespan:
403 timespan = results.pop("timespan", None)
404 if timespan is not None:
405 results["datetime_begin"] = timespan.begin
406 results["datetime_end"] = timespan.end
407 return results
409 # DimensionRecord subclasses are dynamically created, so static type
410 # checkers can't know about them or their attributes. To avoid having to
411 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
412 # checkers not to worry about missing attributes.
413 def __getattr__(self, name: str) -> Any:
414 raise AttributeError(name)
416 # Class attributes below are shadowed by instance attributes, and are
417 # present just to hold the docstrings for those instance attributes.
419 dataId: DataCoordinate
420 """A dict-like identifier for this record's primary keys
421 (`DataCoordinate`).
422 """
424 definition: ClassVar[DimensionElement]
425 """The `DimensionElement` whose records this class represents
426 (`DimensionElement`).
427 """
429 fields: ClassVar[DimensionElementFields]
430 """A categorized view of the fields in this class
431 (`DimensionElementFields`).
432 """