Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 23%
168 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("DimensionRecord", "SerializedDimensionRecord")
32from collections.abc import Hashable
33from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple
35import lsst.sphgeom
36from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
37from lsst.utils.classes import immutable
38from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
40from ..json import from_json_pydantic, to_json_pydantic
41from ..persistenceContext import PersistenceContextVars
42from ..timespan import Timespan, TimespanDatabaseRepresentation
43from ._elements import Dimension, DimensionElement
45if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
46 from ...registry import Registry
47 from ._coordinate import DataCoordinate
48 from ._graph import DimensionUniverse
49 from ._schema import DimensionElementFields
52def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord:
53 """Unpickle implementation for `DimensionRecord` subclasses.
55 For internal use by `DimensionRecord`.
56 """
57 return definition.RecordClass(**mapping)
60def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]:
61 """Create a dynamic subclass of `DimensionRecord` for the given element.
63 For internal use by `DimensionRecord`.
64 """
65 from ._schema import DimensionElementFields
67 fields = DimensionElementFields(definition)
68 slots = list(fields.standard.names)
69 if definition.spatial:
70 slots.append("region")
71 if definition.temporal:
72 slots.append(TimespanDatabaseRepresentation.NAME)
73 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields}
74 return type(definition.name + ".RecordClass", (DimensionRecord,), d)
77class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"):
78 """Base model for a specific serialized record content."""
81_SIMPLE_RECORD_CLASS_CACHE: dict[
82 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord]
83] = {}
86def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]:
87 from ._schema import DimensionElementFields
89 # Cache on the definition (which hashes as the name) and the
90 # associated universe.
91 cache_key = (definition, definition.universe)
92 if cache_key in _SIMPLE_RECORD_CLASS_CACHE:
93 return _SIMPLE_RECORD_CLASS_CACHE[cache_key]
95 fields = DimensionElementFields(definition)
96 members = {}
97 # Prefer strict typing for external data
98 type_map = {
99 str: StrictStr,
100 float: StrictFloat,
101 bool: StrictBool,
102 int: StrictInt,
103 }
105 for field in fields.standard:
106 field_type = field.getPythonType()
107 field_type = type_map.get(field_type, field_type)
108 if field.nullable:
109 field_type = Optional[field_type] # type: ignore
110 members[field.name] = (field_type, ...)
111 if definition.temporal:
112 members["timespan"] = (Tuple[int, int], ...) # type: ignore
113 if definition.spatial:
114 members["region"] = (str, ...)
116 # mypy does not seem to like create_model
117 model = create_model(
118 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}",
119 __base__=SpecificSerializedDimensionRecord,
120 **members, # type: ignore
121 )
123 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model
124 return model
127# While supporting pydantic v1 and v2 keep this outside the model.
128_serialized_dimension_record_schema_extra = {
129 "examples": [
130 {
131 "definition": "detector",
132 "record": {
133 "instrument": "HSC",
134 "id": 72,
135 "full_name": "0_01",
136 "name_in_raft": "01",
137 "raft": "0",
138 "purpose": "SCIENCE",
139 },
140 }
141 ]
142}
145class SerializedDimensionRecord(_BaseModelCompat):
146 """Simplified model for serializing a `DimensionRecord`."""
148 definition: str = Field(
149 ...,
150 title="Name of dimension associated with this record.",
151 examples=["exposure"],
152 )
154 # Use strict types to prevent casting
155 record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field(
156 ...,
157 title="Dimension record keys and values.",
158 examples=[
159 {
160 "definition": "exposure",
161 "record": {
162 "instrument": "LATISS",
163 "exposure": 2021050300044,
164 "obs_id": "AT_O_20210503_00044",
165 },
166 }
167 ],
168 )
170 if PYDANTIC_V2: 170 ↛ 171line 170 didn't jump to line 171
171 model_config = {
172 "json_schema_extra": _serialized_dimension_record_schema_extra, # type: ignore[typeddict-item]
173 }
174 else:
176 class Config:
177 """Local configuration overrides for model."""
179 schema_extra = _serialized_dimension_record_schema_extra
181 @classmethod
182 def direct(
183 cls,
184 *,
185 definition: str,
186 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]],
187 ) -> SerializedDimensionRecord:
188 """Construct a `SerializedDimensionRecord` directly without validators.
190 This differs from the pydantic "construct" method in that the arguments
191 are explicitly what the model requires, and it will recurse through
192 members, constructing them from their corresponding `direct` methods.
194 This method should only be called when the inputs are trusted.
195 """
196 # This method requires tuples as values of the mapping, but JSON
197 # readers will read things in as lists. Be kind and transparently
198 # transform to tuples
199 _recItems = {
200 k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721
201 }
203 # Type ignore because the ternary statement seems to confuse mypy
204 # based on conflicting inferred types of v.
205 key = (
206 definition,
207 frozenset(_recItems.items()),
208 )
209 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
210 if cache is not None and (result := cache.get(key)) is not None:
211 return result
213 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore
215 if cache is not None:
216 cache[key] = node
217 return node
220@immutable
221class DimensionRecord:
222 """Base class for the Python representation of database records.
224 Parameters
225 ----------
226 **kwargs
227 Field values for this record. Unrecognized keys are ignored. If this
228 is the record for a `Dimension`, its primary key value may be provided
229 with the actual name of the field (e.g. "id" or "name"), the name of
230 the `Dimension`, or both. If this record class has a "timespan"
231 attribute, "datetime_begin" and "datetime_end" keyword arguments may
232 be provided instead of a single "timespan" keyword argument (but are
233 ignored if a "timespan" argument is provided).
235 Notes
236 -----
237 `DimensionRecord` subclasses are created dynamically for each
238 `DimensionElement` in a `DimensionUniverse`, and are accessible via the
239 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class
240 itself is pure abstract, but does not use the `abc` module to indicate this
241 because it does not have overridable methods.
243 Record classes have attributes that correspond exactly to the
244 `~DimensionElementFields.standard` fields in the related database table,
245 plus "region" and "timespan" attributes for spatial and/or temporal
246 elements (respectively).
248 Instances are usually obtained from a `Registry`, but can be constructed
249 directly from Python as well.
251 `DimensionRecord` instances are immutable.
252 """
254 # Derived classes are required to define __slots__ as well, and it's those
255 # derived-class slots that other methods on the base class expect to see
256 # when they access self.__slots__.
257 __slots__ = ("dataId",)
259 _serializedType = SerializedDimensionRecord
261 def __init__(self, **kwargs: Any):
262 # Accept either the dimension name or the actual name of its primary
263 # key field; ensure both are present in the dict for convenience below.
264 if isinstance(self.definition, Dimension):
265 v = kwargs.get(self.definition.primaryKey.name)
266 if v is None:
267 v = kwargs.get(self.definition.name)
268 if v is None:
269 raise ValueError(
270 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}."
271 )
272 kwargs[self.definition.primaryKey.name] = v
273 else:
274 v2 = kwargs.setdefault(self.definition.name, v)
275 if v != v2:
276 raise ValueError(
277 "Multiple inconsistent values for "
278 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
279 )
280 for name in self.__slots__:
281 object.__setattr__(self, name, kwargs.get(name))
282 if self.definition.temporal is not None and self.timespan is None:
283 object.__setattr__(
284 self,
285 "timespan",
286 Timespan(
287 kwargs.get("datetime_begin"),
288 kwargs.get("datetime_end"),
289 ),
290 )
292 from ._coordinate import DataCoordinate
294 object.__setattr__(
295 self,
296 "dataId",
297 DataCoordinate.fromRequiredValues(
298 self.definition.graph,
299 tuple(kwargs[dimension] for dimension in self.definition.required.names),
300 ),
301 )
303 def __eq__(self, other: Any) -> bool:
304 if type(other) != type(self):
305 return False
306 return self.dataId == other.dataId
308 def __hash__(self) -> int:
309 return hash(self.dataId)
311 def __str__(self) -> str:
312 lines = [f"{self.definition.name}:"]
313 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__)
314 return "\n".join(lines)
316 def __repr__(self) -> str:
317 return "{}.RecordClass({})".format(
318 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__)
319 )
321 def __reduce__(self) -> tuple:
322 mapping = {name: getattr(self, name) for name in self.__slots__}
323 return (_reconstructDimensionRecord, (self.definition, mapping))
325 def _repr_html_(self) -> str:
326 """Override the default representation in IPython/Jupyter notebooks.
328 This gives a more readable output that understands embedded newlines.
329 """
330 return f"<pre>{self}<pre>"
332 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord:
333 """Convert this class to a simple python type.
335 This makes it suitable for serialization.
337 Parameters
338 ----------
339 minimal : `bool`, optional
340 Use minimal serialization. Has no effect on for this class.
342 Returns
343 -------
344 names : `list`
345 The names of the dimensions.
346 """
347 # The DataId is sufficient if you are willing to do a deferred
348 # query. This may not be overly useful since to reconstruct
349 # a collection of records will require repeated registry queries.
350 # For now do not implement minimal form.
351 key = (id(self.definition), self.dataId)
352 cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
353 if cache is not None and (result := cache.get(key)) is not None:
354 return result
356 mapping = {name: getattr(self, name) for name in self.__slots__}
357 # If the item in mapping supports simplification update it
358 for k, v in mapping.items():
359 try:
360 mapping[k] = v.to_simple(minimal=minimal)
361 except AttributeError:
362 if isinstance(v, lsst.sphgeom.Region):
363 # YAML serialization specifies the class when it
364 # doesn't have to. This is partly for explicitness
365 # and also history. Here use a different approach.
366 # This code needs to be migrated to sphgeom
367 mapping[k] = v.encode().hex()
368 if isinstance(v, bytes):
369 # We actually can't handle serializing out to bytes for
370 # hash objects, encode it here to a hex string
371 mapping[k] = v.hex()
372 definition = self.definition.to_simple(minimal=minimal)
373 dimRec = SerializedDimensionRecord(definition=definition, record=mapping)
374 if cache is not None:
375 cache[key] = dimRec
376 return dimRec
378 @classmethod
379 def from_simple(
380 cls,
381 simple: SerializedDimensionRecord,
382 universe: DimensionUniverse | None = None,
383 registry: Registry | None = None,
384 cacheKey: Hashable | None = None,
385 ) -> DimensionRecord:
386 """Construct a new object from the simplified form.
388 This is generally data returned from the `to_simple`
389 method.
391 Parameters
392 ----------
393 simple : `SerializedDimensionRecord`
394 Value return from `to_simple`.
395 universe : `DimensionUniverse`
396 The special graph of all known dimensions of which this graph will
397 be a subset. Can be `None` if `Registry` is provided.
398 registry : `lsst.daf.butler.Registry`, optional
399 Registry from which a universe can be extracted. Can be `None`
400 if universe is provided explicitly.
401 cacheKey : `Hashable` or `None`
402 If this is not None, it will be used as a key for any cached
403 reconstruction instead of calculating a value from the serialized
404 format.
406 Returns
407 -------
408 record : `DimensionRecord`
409 Newly-constructed object.
410 """
411 if universe is None and registry is None:
412 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph")
413 if universe is None and registry is not None:
414 universe = registry.dimensions
415 if universe is None:
416 # this is for mypy
417 raise ValueError("Unable to determine a usable universe")
418 # Type ignore because the ternary statement seems to confuse mypy
419 # based on conflicting inferred types of v.
420 key = cacheKey or (
421 simple.definition,
422 frozenset(simple.record.items()), # type: ignore
423 )
424 cache = PersistenceContextVars.dimensionRecords.get()
425 if cache is not None and (result := cache.get(key)) is not None:
426 return result
428 definition = DimensionElement.from_simple(simple.definition, universe=universe)
430 # Create a specialist subclass model with type validation.
431 # This allows us to do simple checks of external data (possibly
432 # sent as JSON) since for now _reconstructDimensionRecord does not
433 # do any validation.
434 record_model_cls = _createSimpleRecordSubclass(definition)
435 record_model = record_model_cls(**simple.record)
437 # Timespan and region have to be converted to native form
438 # for now assume that those keys are special
439 rec = record_model.model_dump()
441 if (ts := "timespan") in rec:
442 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)
443 if (reg := "region") in rec:
444 encoded = bytes.fromhex(rec[reg])
445 rec[reg] = lsst.sphgeom.Region.decode(encoded)
446 if (hsh := "hash") in rec:
447 rec[hsh] = bytes.fromhex(rec[hsh].decode())
449 dimRec = _reconstructDimensionRecord(definition, rec)
450 if cache is not None:
451 cache[key] = dimRec
452 return dimRec
454 to_json = to_json_pydantic
455 from_json: ClassVar = classmethod(from_json_pydantic)
457 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]:
458 """Return a vanilla `dict` representation of this record.
460 Parameters
461 ----------
462 splitTimespan : `bool`, optional
463 If `True` (`False` is default) transform any "timespan" key value
464 from a `Timespan` instance into a pair of regular
465 ("datetime_begin", "datetime_end") fields.
466 """
467 results = {name: getattr(self, name) for name in self.__slots__}
468 if splitTimespan:
469 timespan = results.pop("timespan", None)
470 if timespan is not None:
471 results["datetime_begin"] = timespan.begin
472 results["datetime_end"] = timespan.end
473 return results
475 # DimensionRecord subclasses are dynamically created, so static type
476 # checkers can't know about them or their attributes. To avoid having to
477 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type
478 # checkers not to worry about missing attributes.
479 def __getattr__(self, name: str) -> Any:
480 raise AttributeError(name)
482 # Class attributes below are shadowed by instance attributes, and are
483 # present just to hold the docstrings for those instance attributes.
485 dataId: DataCoordinate
486 """A dict-like identifier for this record's primary keys
487 (`DataCoordinate`).
488 """
490 definition: ClassVar[DimensionElement]
491 """The `DimensionElement` whose records this class represents
492 (`DimensionElement`).
493 """
495 fields: ClassVar[DimensionElementFields]
496 """A categorized view of the fields in this class
497 (`DimensionElementFields`).
498 """