Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 23%

168 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-02 09:34 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from collections.abc import Hashable 

27from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple 

28 

29import lsst.sphgeom 

30from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

31from lsst.utils.classes import immutable 

32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

33 

34from ..json import from_json_pydantic, to_json_pydantic 

35from ..persistenceContext import PersistenceContextVars 

36from ..timespan import Timespan, TimespanDatabaseRepresentation 

37from ._elements import Dimension, DimensionElement 

38 

39if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

40 from ...registry import Registry 

41 from ._coordinate import DataCoordinate 

42 from ._graph import DimensionUniverse 

43 from ._schema import DimensionElementFields 

44 

45 

46def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: 

47 """Unpickle implementation for `DimensionRecord` subclasses. 

48 

49 For internal use by `DimensionRecord`. 

50 """ 

51 return definition.RecordClass(**mapping) 

52 

53 

54def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]: 

55 """Create a dynamic subclass of `DimensionRecord` for the given element. 

56 

57 For internal use by `DimensionRecord`. 

58 """ 

59 from ._schema import DimensionElementFields 

60 

61 fields = DimensionElementFields(definition) 

62 slots = list(fields.standard.names) 

63 if definition.spatial: 

64 slots.append("region") 

65 if definition.temporal: 

66 slots.append(TimespanDatabaseRepresentation.NAME) 

67 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

68 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

69 

70 

71class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"): 

72 """Base model for a specific serialized record content.""" 

73 

74 

75_SIMPLE_RECORD_CLASS_CACHE: dict[ 

76 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord] 

77] = {} 

78 

79 

80def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]: 

81 from ._schema import DimensionElementFields 

82 

83 # Cache on the definition (which hashes as the name) and the 

84 # associated universe. 

85 cache_key = (definition, definition.universe) 

86 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

87 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

88 

89 fields = DimensionElementFields(definition) 

90 members = {} 

91 # Prefer strict typing for external data 

92 type_map = { 

93 str: StrictStr, 

94 float: StrictFloat, 

95 bool: StrictBool, 

96 int: StrictInt, 

97 } 

98 

99 for field in fields.standard: 

100 field_type = field.getPythonType() 

101 field_type = type_map.get(field_type, field_type) 

102 if field.nullable: 

103 field_type = Optional[field_type] # type: ignore 

104 members[field.name] = (field_type, ...) 

105 if definition.temporal: 

106 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

107 if definition.spatial: 

108 members["region"] = (str, ...) 

109 

110 # mypy does not seem to like create_model 

111 model = create_model( 

112 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

113 __base__=SpecificSerializedDimensionRecord, 

114 **members, # type: ignore 

115 ) 

116 

117 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

118 return model 

119 

120 

121# While supporting pydantic v1 and v2 keep this outside the model. 

122_serialized_dimension_record_schema_extra = { 

123 "examples": [ 

124 { 

125 "definition": "detector", 

126 "record": { 

127 "instrument": "HSC", 

128 "id": 72, 

129 "full_name": "0_01", 

130 "name_in_raft": "01", 

131 "raft": "0", 

132 "purpose": "SCIENCE", 

133 }, 

134 } 

135 ] 

136} 

137 

138 

139class SerializedDimensionRecord(_BaseModelCompat): 

140 """Simplified model for serializing a `DimensionRecord`.""" 

141 

142 definition: str = Field( 

143 ..., 

144 title="Name of dimension associated with this record.", 

145 examples=["exposure"], 

146 ) 

147 

148 # Use strict types to prevent casting 

149 record: dict[str, None | StrictInt | StrictFloat | StrictStr | StrictBool | tuple[int, int]] = Field( 

150 ..., 

151 title="Dimension record keys and values.", 

152 examples=[ 

153 { 

154 "definition": "exposure", 

155 "record": { 

156 "instrument": "LATISS", 

157 "exposure": 2021050300044, 

158 "obs_id": "AT_O_20210503_00044", 

159 }, 

160 } 

161 ], 

162 ) 

163 

164 if PYDANTIC_V2: 164 ↛ 165line 164 didn't jump to line 165

165 model_config = { 

166 "json_schema_extra": _serialized_dimension_record_schema_extra, # type: ignore[typeddict-item] 

167 } 

168 else: 

169 

170 class Config: 

171 """Local configuration overrides for model.""" 

172 

173 schema_extra = _serialized_dimension_record_schema_extra 

174 

175 @classmethod 

176 def direct( 

177 cls, 

178 *, 

179 definition: str, 

180 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]], 

181 ) -> SerializedDimensionRecord: 

182 """Construct a `SerializedDimensionRecord` directly without validators. 

183 

184 This differs from the pydantic "construct" method in that the arguments 

185 are explicitly what the model requires, and it will recurse through 

186 members, constructing them from their corresponding `direct` methods. 

187 

188 This method should only be called when the inputs are trusted. 

189 """ 

190 # This method requires tuples as values of the mapping, but JSON 

191 # readers will read things in as lists. Be kind and transparently 

192 # transform to tuples 

193 _recItems = { 

194 k: v if type(v) != list else tuple(v) for k, v in record.items() # type: ignore # noqa: E721 

195 } 

196 

197 # Type ignore because the ternary statement seems to confuse mypy 

198 # based on conflicting inferred types of v. 

199 key = ( 

200 definition, 

201 frozenset(_recItems.items()), 

202 ) 

203 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

204 if cache is not None and (result := cache.get(key)) is not None: 

205 return result 

206 

207 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore 

208 

209 if cache is not None: 

210 cache[key] = node 

211 return node 

212 

213 

214@immutable 

215class DimensionRecord: 

216 """Base class for the Python representation of database records. 

217 

218 Parameters 

219 ---------- 

220 **kwargs 

221 Field values for this record. Unrecognized keys are ignored. If this 

222 is the record for a `Dimension`, its primary key value may be provided 

223 with the actual name of the field (e.g. "id" or "name"), the name of 

224 the `Dimension`, or both. If this record class has a "timespan" 

225 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

226 be provided instead of a single "timespan" keyword argument (but are 

227 ignored if a "timespan" argument is provided). 

228 

229 Notes 

230 ----- 

231 `DimensionRecord` subclasses are created dynamically for each 

232 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

233 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

234 itself is pure abstract, but does not use the `abc` module to indicate this 

235 because it does not have overridable methods. 

236 

237 Record classes have attributes that correspond exactly to the 

238 `~DimensionElementFields.standard` fields in the related database table, 

239 plus "region" and "timespan" attributes for spatial and/or temporal 

240 elements (respectively). 

241 

242 Instances are usually obtained from a `Registry`, but can be constructed 

243 directly from Python as well. 

244 

245 `DimensionRecord` instances are immutable. 

246 """ 

247 

248 # Derived classes are required to define __slots__ as well, and it's those 

249 # derived-class slots that other methods on the base class expect to see 

250 # when they access self.__slots__. 

251 __slots__ = ("dataId",) 

252 

253 _serializedType = SerializedDimensionRecord 

254 

255 def __init__(self, **kwargs: Any): 

256 # Accept either the dimension name or the actual name of its primary 

257 # key field; ensure both are present in the dict for convenience below. 

258 if isinstance(self.definition, Dimension): 

259 v = kwargs.get(self.definition.primaryKey.name) 

260 if v is None: 

261 v = kwargs.get(self.definition.name) 

262 if v is None: 

263 raise ValueError( 

264 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

265 ) 

266 kwargs[self.definition.primaryKey.name] = v 

267 else: 

268 v2 = kwargs.setdefault(self.definition.name, v) 

269 if v != v2: 

270 raise ValueError( 

271 "Multiple inconsistent values for " 

272 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

273 ) 

274 for name in self.__slots__: 

275 object.__setattr__(self, name, kwargs.get(name)) 

276 if self.definition.temporal is not None and self.timespan is None: 

277 object.__setattr__( 

278 self, 

279 "timespan", 

280 Timespan( 

281 kwargs.get("datetime_begin"), 

282 kwargs.get("datetime_end"), 

283 ), 

284 ) 

285 

286 from ._coordinate import DataCoordinate 

287 

288 object.__setattr__( 

289 self, 

290 "dataId", 

291 DataCoordinate.fromRequiredValues( 

292 self.definition.graph, 

293 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

294 ), 

295 ) 

296 

297 def __eq__(self, other: Any) -> bool: 

298 if type(other) != type(self): 

299 return False 

300 return self.dataId == other.dataId 

301 

302 def __hash__(self) -> int: 

303 return hash(self.dataId) 

304 

305 def __str__(self) -> str: 

306 lines = [f"{self.definition.name}:"] 

307 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

308 return "\n".join(lines) 

309 

310 def __repr__(self) -> str: 

311 return "{}.RecordClass({})".format( 

312 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

313 ) 

314 

315 def __reduce__(self) -> tuple: 

316 mapping = {name: getattr(self, name) for name in self.__slots__} 

317 return (_reconstructDimensionRecord, (self.definition, mapping)) 

318 

319 def _repr_html_(self) -> str: 

320 """Override the default representation in IPython/Jupyter notebooks. 

321 

322 This gives a more readable output that understands embedded newlines. 

323 """ 

324 return f"<pre>{self}<pre>" 

325 

326 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

327 """Convert this class to a simple python type. 

328 

329 This makes it suitable for serialization. 

330 

331 Parameters 

332 ---------- 

333 minimal : `bool`, optional 

334 Use minimal serialization. Has no effect on for this class. 

335 

336 Returns 

337 ------- 

338 names : `list` 

339 The names of the dimensions. 

340 """ 

341 # The DataId is sufficient if you are willing to do a deferred 

342 # query. This may not be overly useful since to reconstruct 

343 # a collection of records will require repeated registry queries. 

344 # For now do not implement minimal form. 

345 key = (id(self.definition), self.dataId) 

346 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

347 if cache is not None and (result := cache.get(key)) is not None: 

348 return result 

349 

350 mapping = {name: getattr(self, name) for name in self.__slots__} 

351 # If the item in mapping supports simplification update it 

352 for k, v in mapping.items(): 

353 try: 

354 mapping[k] = v.to_simple(minimal=minimal) 

355 except AttributeError: 

356 if isinstance(v, lsst.sphgeom.Region): 

357 # YAML serialization specifies the class when it 

358 # doesn't have to. This is partly for explicitness 

359 # and also history. Here use a different approach. 

360 # This code needs to be migrated to sphgeom 

361 mapping[k] = v.encode().hex() 

362 if isinstance(v, bytes): 

363 # We actually can't handle serializing out to bytes for 

364 # hash objects, encode it here to a hex string 

365 mapping[k] = v.hex() 

366 definition = self.definition.to_simple(minimal=minimal) 

367 dimRec = SerializedDimensionRecord(definition=definition, record=mapping) 

368 if cache is not None: 

369 cache[key] = dimRec 

370 return dimRec 

371 

372 @classmethod 

373 def from_simple( 

374 cls, 

375 simple: SerializedDimensionRecord, 

376 universe: DimensionUniverse | None = None, 

377 registry: Registry | None = None, 

378 cacheKey: Hashable | None = None, 

379 ) -> DimensionRecord: 

380 """Construct a new object from the simplified form. 

381 

382 This is generally data returned from the `to_simple` 

383 method. 

384 

385 Parameters 

386 ---------- 

387 simple : `SerializedDimensionRecord` 

388 Value return from `to_simple`. 

389 universe : `DimensionUniverse` 

390 The special graph of all known dimensions of which this graph will 

391 be a subset. Can be `None` if `Registry` is provided. 

392 registry : `lsst.daf.butler.Registry`, optional 

393 Registry from which a universe can be extracted. Can be `None` 

394 if universe is provided explicitly. 

395 cacheKey : `Hashable` or `None` 

396 If this is not None, it will be used as a key for any cached 

397 reconstruction instead of calculating a value from the serialized 

398 format. 

399 

400 Returns 

401 ------- 

402 record : `DimensionRecord` 

403 Newly-constructed object. 

404 """ 

405 if universe is None and registry is None: 

406 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

407 if universe is None and registry is not None: 

408 universe = registry.dimensions 

409 if universe is None: 

410 # this is for mypy 

411 raise ValueError("Unable to determine a usable universe") 

412 # Type ignore because the ternary statement seems to confuse mypy 

413 # based on conflicting inferred types of v. 

414 key = cacheKey or ( 

415 simple.definition, 

416 frozenset(simple.record.items()), # type: ignore 

417 ) 

418 cache = PersistenceContextVars.dimensionRecords.get() 

419 if cache is not None and (result := cache.get(key)) is not None: 

420 return result 

421 

422 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

423 

424 # Create a specialist subclass model with type validation. 

425 # This allows us to do simple checks of external data (possibly 

426 # sent as JSON) since for now _reconstructDimensionRecord does not 

427 # do any validation. 

428 record_model_cls = _createSimpleRecordSubclass(definition) 

429 record_model = record_model_cls(**simple.record) 

430 

431 # Timespan and region have to be converted to native form 

432 # for now assume that those keys are special 

433 rec = record_model.model_dump() 

434 

435 if (ts := "timespan") in rec: 

436 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

437 if (reg := "region") in rec: 

438 encoded = bytes.fromhex(rec[reg]) 

439 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

440 if (hsh := "hash") in rec: 

441 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

442 

443 dimRec = _reconstructDimensionRecord(definition, rec) 

444 if cache is not None: 

445 cache[key] = dimRec 

446 return dimRec 

447 

448 to_json = to_json_pydantic 

449 from_json: ClassVar = classmethod(from_json_pydantic) 

450 

451 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]: 

452 """Return a vanilla `dict` representation of this record. 

453 

454 Parameters 

455 ---------- 

456 splitTimespan : `bool`, optional 

457 If `True` (`False` is default) transform any "timespan" key value 

458 from a `Timespan` instance into a pair of regular 

459 ("datetime_begin", "datetime_end") fields. 

460 """ 

461 results = {name: getattr(self, name) for name in self.__slots__} 

462 if splitTimespan: 

463 timespan = results.pop("timespan", None) 

464 if timespan is not None: 

465 results["datetime_begin"] = timespan.begin 

466 results["datetime_end"] = timespan.end 

467 return results 

468 

469 # DimensionRecord subclasses are dynamically created, so static type 

470 # checkers can't know about them or their attributes. To avoid having to 

471 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

472 # checkers not to worry about missing attributes. 

473 def __getattr__(self, name: str) -> Any: 

474 raise AttributeError(name) 

475 

476 # Class attributes below are shadowed by instance attributes, and are 

477 # present just to hold the docstrings for those instance attributes. 

478 

479 dataId: DataCoordinate 

480 """A dict-like identifier for this record's primary keys 

481 (`DataCoordinate`). 

482 """ 

483 

484 definition: ClassVar[DimensionElement] 

485 """The `DimensionElement` whose records this class represents 

486 (`DimensionElement`). 

487 """ 

488 

489 fields: ClassVar[DimensionElementFields] 

490 """A categorized view of the fields in this class 

491 (`DimensionElementFields`). 

492 """