Coverage for python / lsst / daf / butler / dimensions / _records.py: 24%

200 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DataIdKey", 

32 "DataIdValue", 

33 "DimensionRecord", 

34 "SerializedDimensionRecord", 

35 "SerializedKeyValueDimensionRecord", 

36) 

37 

38import itertools 

39from collections.abc import Callable, Hashable 

40from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, cast 

41 

42import pydantic 

43from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

44 

45import lsst.sphgeom 

46from lsst.utils.classes import immutable 

47 

48from .._timespan import Timespan 

49from ..column_spec import make_tuple_type_adapter 

50from ..json import from_json_pydantic, to_json_pydantic 

51from ..persistence_context import PersistenceContextVars 

52from ._elements import Dimension, DimensionElement 

53 

54if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

55 from ..registry import Registry 

56 from ._coordinate import DataCoordinate 

57 from ._schema import DimensionElementFields 

58 from ._universe import DimensionUniverse 

59 

60 

61DataIdKey: TypeAlias = str 

62"""Type annotation alias for the keys that can be used to index a 

63DataCoordinate. 

64""" 

65 

66# Pydantic will cast int to str if str is first in the Union. 

67DataIdValue: TypeAlias = int | str 

68"""Type annotation alias for the values that can be present in a 

69DataCoordinate or other data ID. 

70""" 

71 

72SerializedKeyValueDimensionRecord: TypeAlias = list[Any] 

73"""Type annotation alias for the serialized form of DimensionRecord used in 

74container serialization (e.g. `DimensionRecordSet`). 

75""" 

76 

77 

78def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: 

79 """Unpickle implementation for `DimensionRecord` subclasses. 

80 

81 For internal use by `DimensionRecord`. 

82 """ 

83 return definition.RecordClass(**mapping) 

84 

85 

86def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]: 

87 """Create a dynamic subclass of `DimensionRecord` for the given element. 

88 

89 For internal use by `DimensionRecord`. 

90 """ 

91 from ._schema import DimensionElementFields 

92 

93 fields = DimensionElementFields(definition) 

94 slots = list(fields.standard.names) 

95 if definition.spatial: 

96 slots.append("region") 

97 if definition.temporal: 

98 slots.append("timespan") 

99 

100 key_type_adapter = make_tuple_type_adapter(definition.schema.required) 

101 value_type_adapter = make_tuple_type_adapter( 

102 itertools.chain(definition.schema.implied, definition.schema.remainder) 

103 ) 

104 

105 d = { 

106 "definition": definition, 

107 "__slots__": tuple(slots), 

108 "fields": fields, 

109 "_key_type_adapter": key_type_adapter, 

110 "_value_type_adapter": value_type_adapter, 

111 } 

112 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

113 

114 

115class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

116 """Base model for a specific serialized record content.""" 

117 

118 

119_SIMPLE_RECORD_CLASS_CACHE: dict[ 

120 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord] 

121] = {} 

122 

123 

124def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]: 

125 from ._schema import DimensionElementFields 

126 

127 # Cache on the definition (which hashes as the name) and the 

128 # associated universe. 

129 cache_key = (definition, definition.universe) 

130 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

131 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

132 

133 fields = DimensionElementFields(definition) 

134 members = {} 

135 # Prefer strict typing for external data 

136 type_map = { 

137 str: StrictStr, 

138 float: StrictFloat, 

139 bool: StrictBool, 

140 int: StrictInt, 

141 } 

142 

143 for field in fields.standard: 

144 field_type = field.getPythonType() 

145 field_type = type_map.get(field_type, field_type) 

146 if field.nullable: 

147 field_type = field_type | None # type: ignore 

148 members[field.name] = (field_type, ...) 

149 if definition.temporal: 

150 members["timespan"] = (Timespan | None, ...) # type: ignore 

151 if definition.spatial: 

152 members["region"] = (str | None, ...) # type: ignore 

153 

154 # For the new derived class name need to convert to camel case. 

155 # so "day_obs" -> "DayObs". 

156 derived_name = "".join([part.capitalize() for part in definition.name.split("_")]) 

157 

158 model = create_model( 

159 f"SpecificSerializedDimensionRecord{derived_name}", 

160 __base__=SpecificSerializedDimensionRecord, 

161 **members, # type: ignore 

162 ) 

163 

164 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

165 return model 

166 

167 

168# While supporting pydantic v1 and v2 keep this outside the model. 

169_serialized_dimension_record_schema_extra = { 

170 "examples": [ 

171 { 

172 "definition": "detector", 

173 "record": { 

174 "instrument": "HSC", 

175 "id": 72, 

176 "full_name": "0_01", 

177 "name_in_raft": "01", 

178 "raft": "0", 

179 "purpose": "SCIENCE", 

180 }, 

181 } 

182 ] 

183} 

184 

185 

186class SerializedDimensionRecord(BaseModel): 

187 """Simplified model for serializing a `DimensionRecord`.""" 

188 

189 definition: str = Field( 

190 ..., 

191 title="Name of dimension associated with this record.", 

192 examples=["exposure"], 

193 ) 

194 

195 # Use strict types to prevent casting 

196 record: dict[str, None | StrictBool | StrictInt | StrictFloat | StrictStr | Timespan] = Field( 

197 ..., 

198 title="Dimension record keys and values.", 

199 examples=[ 

200 { 

201 "definition": "exposure", 

202 "record": { 

203 "instrument": "LATISS", 

204 "exposure": 2021050300044, 

205 "obs_id": "AT_O_20210503_00044", 

206 }, 

207 } 

208 ], 

209 ) 

210 

211 model_config = { 

212 "json_schema_extra": _serialized_dimension_record_schema_extra, # type: ignore[typeddict-item] 

213 } 

214 

215 @classmethod 

216 def direct( 

217 cls, 

218 *, 

219 definition: str, 

220 record: dict[str, Any], 

221 ) -> SerializedDimensionRecord: 

222 """Construct a `SerializedDimensionRecord` directly without validators. 

223 

224 Parameters 

225 ---------- 

226 definition : `str` 

227 The name of the record. 

228 record : `dict` 

229 A dictionary representation of the record content. 

230 

231 Returns 

232 ------- 

233 rec : `SerializedDimensionRecord` 

234 A model representing the dimension records. 

235 

236 Notes 

237 ----- 

238 This differs from the pydantic "construct" method in that the arguments 

239 are explicitly what the model requires, and it will recurse through 

240 members, constructing them from their corresponding `direct` methods. 

241 

242 This method should only be called when the inputs are trusted. 

243 """ 

244 # This method requires tuples as values of the mapping, but JSON 

245 # readers will read things in as lists. Be kind and transparently 

246 # transform to tuples. 

247 _recItems = { 

248 k: (v if type(v) is not list else Timespan(begin=None, end=None, _nsec=tuple(v))) # type: ignore 

249 for k, v in record.items() 

250 } 

251 

252 # Type ignore because the ternary statement seems to confuse mypy 

253 # based on conflicting inferred types of v. 

254 key = ( 

255 definition, 

256 frozenset(_recItems.items()), 

257 ) 

258 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

259 if cache is not None and (result := cache.get(key)) is not None: 

260 return result 

261 

262 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore 

263 

264 if cache is not None: 

265 cache[key] = node 

266 return node 

267 

268 

269@immutable 

270class DimensionRecord: 

271 """Base class for the Python representation of database records. 

272 

273 Parameters 

274 ---------- 

275 **kwargs 

276 Field values for this record. Unrecognized keys are ignored. If this 

277 is the record for a `Dimension`, its primary key value may be provided 

278 with the actual name of the field (e.g. "id" or "name"), the name of 

279 the `Dimension`, or both. If this record class has a "timespan" 

280 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

281 be provided instead of a single "timespan" keyword argument (but are 

282 ignored if a "timespan" argument is provided). 

283 

284 Notes 

285 ----- 

286 `DimensionRecord` subclasses are created dynamically for each 

287 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

288 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

289 itself is pure abstract, but does not use the `abc` module to indicate this 

290 because it does not have overridable methods. 

291 

292 Record classes have attributes that correspond exactly to the 

293 `~DimensionElementFields.standard` fields in the related database table, 

294 plus "region" and "timespan" attributes for spatial and/or temporal 

295 elements (respectively). 

296 

297 Instances are usually obtained from a `Registry`, but can be constructed 

298 directly from Python as well. 

299 

300 `DimensionRecord` instances are immutable. 

301 """ 

302 

303 # Derived classes are required to define __slots__ as well, and it's those 

304 # derived-class slots that other methods on the base class expect to see 

305 # when they access self.__slots__. 

306 __slots__ = ("dataId",) 

307 

308 _serializedType: ClassVar[type[BaseModel]] = SerializedDimensionRecord 

309 

310 _key_type_adapter: ClassVar[pydantic.TypeAdapter[tuple[Any, ...]]] 

311 _value_type_adapter: ClassVar[pydantic.TypeAdapter[tuple[Any, ...]]] 

312 

313 def __init__(self, **kwargs: Any): 

314 # Accept either the dimension name or the actual name of its primary 

315 # key field; ensure both are present in the dict for convenience below. 

316 if isinstance(self.definition, Dimension): 

317 v = kwargs.get(self.definition.primaryKey.name) 

318 if v is None: 

319 v = kwargs.get(self.definition.name) 

320 if v is None: 

321 raise ValueError( 

322 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

323 ) 

324 kwargs[self.definition.primaryKey.name] = v 

325 else: 

326 v2 = kwargs.setdefault(self.definition.name, v) 

327 if v != v2: 

328 raise ValueError( 

329 "Multiple inconsistent values for " 

330 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

331 ) 

332 

333 from ._coordinate import DataCoordinate 

334 

335 object.__setattr__( 

336 self, 

337 "dataId", 

338 DataCoordinate.from_required_values( 

339 self.definition.minimal_group, 

340 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

341 ), 

342 ) 

343 # Don't need the primary key value aliased to the dimension name 

344 # anymore. 

345 kwargs.pop(self.definition.name, None) 

346 

347 for name in self.__slots__: 

348 # Note that we remove from kwargs as we go, to make sure there's 

349 # nothing left at the end. 

350 object.__setattr__(self, name, kwargs.pop(name, None)) 

351 # Support 'datetime_begin' and 'datetime_end' instead of 'timespan' for 

352 # backwards compatibility, but if one is provided both must be. 

353 if self.definition.temporal is not None and self.timespan is None and "datetime_begin" in kwargs: 

354 object.__setattr__( 

355 self, 

356 "timespan", 

357 Timespan( 

358 kwargs.pop("datetime_begin"), 

359 kwargs.pop("datetime_end"), 

360 ), 

361 ) 

362 

363 if kwargs: 

364 raise TypeError(f"Invalid fields for {self.definition} dimension record: {set(kwargs.keys())}.") 

365 

366 def __eq__(self, other: Any) -> bool: 

367 if type(other) is not type(self): 

368 return False 

369 return all(getattr(self, name) == getattr(other, name) for name in self.__slots__) 

370 

371 def __hash__(self) -> int: 

372 return hash(self.dataId.required_values) 

373 

374 def __str__(self) -> str: 

375 lines = [f"{self.definition.name}:"] 

376 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

377 return "\n".join(lines) 

378 

379 def __repr__(self) -> str: 

380 return "{}.RecordClass({})".format( 

381 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

382 ) 

383 

384 def __reduce__(self) -> tuple: 

385 mapping = {name: getattr(self, name) for name in self.__slots__} 

386 return (_reconstructDimensionRecord, (self.definition, mapping)) 

387 

388 def _repr_html_(self) -> str: 

389 """Override the default representation in IPython/Jupyter notebooks. 

390 

391 This gives a more readable output that understands embedded newlines. 

392 """ 

393 return f"<pre>{self}<pre>" 

394 

395 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

396 """Convert this class to a simple python type. 

397 

398 This makes it suitable for serialization. 

399 

400 Parameters 

401 ---------- 

402 minimal : `bool`, optional 

403 Use minimal serialization. Has no effect on for this class. 

404 

405 Returns 

406 ------- 

407 names : `list` 

408 The names of the dimensions. 

409 """ 

410 # The DataId is sufficient if you are willing to do a deferred 

411 # query. This may not be overly useful since to reconstruct 

412 # a collection of records will require repeated registry queries. 

413 # For now do not implement minimal form. 

414 key = (id(self.definition), self.dataId) 

415 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

416 if cache is not None and (result := cache.get(key)) is not None: 

417 return result 

418 

419 mapping = {name: getattr(self, name) for name in self.__slots__} 

420 for k, v in mapping.items(): 

421 if isinstance(v, lsst.sphgeom.Region): 

422 # YAML serialization specifies the class when it 

423 # doesn't have to. This is partly for explicitness 

424 # and also history. Here use a different approach. 

425 # This code needs to be migrated to sphgeom 

426 mapping[k] = v.encode().hex() 

427 if isinstance(v, bytes): 

428 # We actually can't handle serializing out to bytes for 

429 # hash objects, encode it here to a hex string 

430 mapping[k] = v.hex() 

431 definition = self.definition.to_simple(minimal=minimal) 

432 dimRec = SerializedDimensionRecord(definition=definition, record=mapping) 

433 if cache is not None: 

434 cache[key] = dimRec 

435 return dimRec 

436 

437 @classmethod 

438 def from_simple( 

439 cls, 

440 simple: SerializedDimensionRecord, 

441 universe: DimensionUniverse | None = None, 

442 registry: Registry | None = None, 

443 cacheKey: Hashable | None = None, 

444 ) -> DimensionRecord: 

445 """Construct a new object from the simplified form. 

446 

447 This is generally data returned from the `to_simple` 

448 method. 

449 

450 Parameters 

451 ---------- 

452 simple : `SerializedDimensionRecord` 

453 Value return from `to_simple`. 

454 universe : `DimensionUniverse` 

455 The special graph of all known dimensions of which this graph will 

456 be a subset. Can be `None` if `Registry` is provided. 

457 registry : `lsst.daf.butler.Registry`, optional 

458 Registry from which a universe can be extracted. Can be `None` 

459 if universe is provided explicitly. 

460 cacheKey : `collections.abc.Hashable` or `None` 

461 If this is not `None`, it will be used as a key for any cached 

462 reconstruction instead of calculating a value from the serialized 

463 format. 

464 

465 Returns 

466 ------- 

467 record : `DimensionRecord` 

468 Newly-constructed object. 

469 """ 

470 if universe is None and registry is None: 

471 raise ValueError("One of universe or registry is required to convert names to a DimensionGroup") 

472 if universe is None and registry is not None: 

473 universe = registry.dimensions 

474 if universe is None: 

475 # this is for mypy 

476 raise ValueError("Unable to determine a usable universe") 

477 # Type ignore because the ternary statement seems to confuse mypy 

478 # based on conflicting inferred types of v. 

479 key = cacheKey or ( 

480 simple.definition, 

481 frozenset(simple.record.items()), # type: ignore 

482 ) 

483 cache = PersistenceContextVars.dimensionRecords.get() 

484 if cache is not None and (result := cache.get(key)) is not None: 

485 return result 

486 

487 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

488 

489 # Create a specialist subclass model with type validation. 

490 # This allows us to do simple checks of external data (possibly 

491 # sent as JSON) since for now _reconstructDimensionRecord does not 

492 # do any validation. 

493 record_model_cls = _createSimpleRecordSubclass(definition) 

494 record_model = record_model_cls(**simple.record) 

495 

496 # Region and hash have to be converted to native form; for now assume 

497 # that the keys are special. We make the mapping we need to pass to 

498 # the DimensionRecord constructor via getattr, because we don't 

499 # model_dump re-disassembling things like Timespans that we've already 

500 # assembled. 

501 mapping = {k: getattr(record_model, k) for k in definition.schema.names} 

502 

503 if mapping.get("region") is not None: 

504 mapping["region"] = lsst.sphgeom.Region.decode(bytes.fromhex(mapping["region"])) 

505 if "hash" in mapping: 

506 mapping["hash"] = bytes.fromhex(mapping["hash"].decode()) 

507 

508 dimRec = _reconstructDimensionRecord(definition, mapping) 

509 if cache is not None: 

510 cache[key] = dimRec 

511 return dimRec 

512 

513 to_json = to_json_pydantic 

514 from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_pydantic)) 

515 

516 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]: 

517 """Return a vanilla `dict` representation of this record. 

518 

519 Parameters 

520 ---------- 

521 splitTimespan : `bool`, optional 

522 If `True` (`False` is default) transform any "timespan" key value 

523 from a `Timespan` instance into a pair of regular 

524 ("datetime_begin", "datetime_end") fields. 

525 """ 

526 results = {name: getattr(self, name) for name in self.__slots__} 

527 if splitTimespan: 

528 timespan = results.pop("timespan", None) 

529 if timespan is not None: 

530 results["datetime_begin"] = timespan.begin 

531 results["datetime_end"] = timespan.end 

532 return results 

533 

534 def get(self, name: str) -> Any: 

535 """Return a single metadata value associated with this record. 

536 

537 Parameters 

538 ---------- 

539 name : `str` 

540 Key of the metadata value to be retrieved. 

541 

542 Returns 

543 ------- 

544 value : `typing.Any` 

545 The metadata value. 

546 

547 Raises 

548 ------ 

549 KeyError 

550 If the given name is not a valid key in this dimension record. 

551 """ 

552 if name not in self.__slots__: 

553 raise KeyError(f"'{name}' is not a valid record key for dimension '{self.definition.name}'") 

554 

555 return getattr(self, name) 

556 

557 def serialize_key_value(self) -> SerializedKeyValueDimensionRecord: 

558 """Serialize this record to a `list` that can be sliced into a key 

559 (data ID values) / value (everything else) pair. 

560 

561 Returns 

562 ------- 

563 raw : `list` 

564 List of values with JSON-compatible types. 

565 

566 Notes 

567 ----- 

568 Unlike `to_simple` / `from_simple`, this serialization approach does 

569 not encode the ``definition`` element in the serialized form. This is 

570 expected to be serialized separately (e.g. as part of a homogeneous set 

571 of dimension records). 

572 """ 

573 key = list(self.dataId.required_values) 

574 value = [] 

575 for name in self.definition.schema.implied.names: 

576 value.append(getattr(self, name)) 

577 for name in self.definition.schema.remainder.names: 

578 value.append(getattr(self, name)) 

579 return key + self._value_type_adapter.dump_python(tuple(value), mode="json") 

580 

581 @classmethod 

582 def deserialize_key( 

583 cls, raw: SerializedKeyValueDimensionRecord 

584 ) -> tuple[tuple[DataIdValue, ...], SerializedKeyValueDimensionRecord]: 

585 """Deserialize just the key slice of the raw `list` serializeation of a 

586 dimension record. 

587 

588 Parameters 

589 ---------- 

590 raw : `list` 

591 Serialized list with JSON-compatible types, as returned by 

592 `serialize_key_value`. 

593 

594 Returns 

595 ------- 

596 key : `tuple` 

597 Validated tuple of required data ID values that uniquely identify 

598 this record, extracted from the head of ``raw``. 

599 raw_value : `list` 

600 Remaining unvalidated fields. 

601 """ 

602 n = len(cls.definition.minimal_group.required) 

603 return cls._key_type_adapter.validate_python(raw[:n]), raw[n:] 

604 

605 @classmethod 

606 def deserialize_value( 

607 cls, key: tuple[DataIdValue, ...], raw_value: SerializedKeyValueDimensionRecord 

608 ) -> DimensionRecord: 

609 """Deserialize the value slice of the raw `list` form of serialized 

610 dimension record. 

611 

612 Parameters 

613 ---------- 

614 key : `tuple` 

615 Validated tuple of required data ID values that uniquely identify 

616 this record, as returned by `deserialize_key`. 

617 raw_value : `list` 

618 Serialized list with JSON-compatible types, with just the non-key 

619 items, as returned by `deserialize_key`. 

620 

621 Returns 

622 ------- 

623 record : `DimensionRecord` 

624 A fully-validated `DimensionRecord` with this subclass. 

625 """ 

626 from ._coordinate import DataCoordinate 

627 

628 result = object.__new__(cls) # bypass the usual __init__ 

629 result.dataId = DataCoordinate.from_required_values(cls.definition.minimal_group, key) 

630 value = cls._value_type_adapter.validate_python(raw_value) 

631 for name, val in zip(cls.definition.schema.names, key + value): 

632 setattr(result, name, val) 

633 return result 

634 

635 # DimensionRecord subclasses are dynamically created, so static type 

636 # checkers can't know about them or their attributes. To avoid having to 

637 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

638 # checkers not to worry about missing attributes. 

639 def __getattr__(self, name: str) -> Any: 

640 raise AttributeError(name) 

641 

642 # Class attributes below are shadowed by instance attributes, and are 

643 # present just to hold the docstrings for those instance attributes. 

644 

645 dataId: DataCoordinate 

646 """A dict-like identifier for this record's primary keys 

647 (`DataCoordinate`). 

648 """ 

649 

650 definition: ClassVar[DimensionElement] 

651 """The `DimensionElement` whose records this class represents 

652 (`DimensionElement`). 

653 """ 

654 

655 fields: ClassVar[DimensionElementFields] 

656 """A categorized view of the fields in this class 

657 (`DimensionElementFields`). 

658 """