Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 23%

159 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from collections.abc import Hashable 

27from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple 

28 

29import lsst.sphgeom 

30from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

31from lsst.utils.classes import immutable 

32from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

33 

34from ..json import from_json_pydantic, to_json_pydantic 

35from ..persistenceContext import PersistenceContextVars 

36from ..timespan import Timespan, TimespanDatabaseRepresentation 

37from ._elements import Dimension, DimensionElement 

38 

39if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

40 from ...registry import Registry 

41 from ._coordinate import DataCoordinate 

42 from ._graph import DimensionUniverse 

43 from ._schema import DimensionElementFields 

44 

45 

46def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: 

47 """Unpickle implementation for `DimensionRecord` subclasses. 

48 

49 For internal use by `DimensionRecord`. 

50 """ 

51 return definition.RecordClass(**mapping) 

52 

53 

54def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]: 

55 """Create a dynamic subclass of `DimensionRecord` for the given element. 

56 

57 For internal use by `DimensionRecord`. 

58 """ 

59 from ._schema import DimensionElementFields 

60 

61 fields = DimensionElementFields(definition) 

62 slots = list(fields.standard.names) 

63 if definition.spatial: 

64 slots.append("region") 

65 if definition.temporal: 

66 slots.append(TimespanDatabaseRepresentation.NAME) 

67 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

68 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

69 

70 

71class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"): 

72 """Base model for a specific serialized record content.""" 

73 

74 

75_SIMPLE_RECORD_CLASS_CACHE: dict[ 

76 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord] 

77] = {} 

78 

79 

80def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]: 

81 from ._schema import DimensionElementFields 

82 

83 # Cache on the definition (which hashes as the name) and the 

84 # associated universe. 

85 cache_key = (definition, definition.universe) 

86 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

87 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

88 

89 fields = DimensionElementFields(definition) 

90 members = {} 

91 # Prefer strict typing for external data 

92 type_map = { 

93 str: StrictStr, 

94 float: StrictFloat, 

95 bool: StrictBool, 

96 int: StrictInt, 

97 } 

98 

99 for field in fields.standard: 

100 field_type = field.getPythonType() 

101 field_type = type_map.get(field_type, field_type) 

102 if field.nullable: 

103 field_type = Optional[field_type] # type: ignore 

104 members[field.name] = (field_type, ...) 

105 if definition.temporal: 

106 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

107 if definition.spatial: 

108 members["region"] = (str, ...) 

109 

110 # mypy does not seem to like create_model 

111 model = create_model( 

112 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

113 __base__=SpecificSerializedDimensionRecord, 

114 **members, # type: ignore 

115 ) 

116 

117 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

118 return model 

119 

120 

121class SerializedDimensionRecord(_BaseModelCompat): 

122 """Simplified model for serializing a `DimensionRecord`.""" 

123 

124 definition: str = Field( 

125 ..., 

126 title="Name of dimension associated with this record.", 

127 example="exposure", 

128 ) 

129 

130 # Use strict types to prevent casting 

131 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field( 

132 ..., 

133 title="Dimension record keys and values.", 

134 example={ 

135 "definition": "exposure", 

136 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"}, 

137 }, 

138 ) 

139 

140 if not PYDANTIC_V2: 140 ↛ 159line 140 didn't jump to line 159, because the condition on line 140 was never false

141 

142 class Config: 

143 """Local configuration overrides for model.""" 

144 

145 schema_extra = { 

146 "example": { 

147 "definition": "detector", 

148 "record": { 

149 "instrument": "HSC", 

150 "id": 72, 

151 "full_name": "0_01", 

152 "name_in_raft": "01", 

153 "raft": "0", 

154 "purpose": "SCIENCE", 

155 }, 

156 } 

157 } 

158 

159 @classmethod 

160 def direct( 

161 cls, 

162 *, 

163 definition: str, 

164 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]], 

165 ) -> SerializedDimensionRecord: 

166 """Construct a `SerializedDimensionRecord` directly without validators. 

167 

168 This differs from the pydantic "construct" method in that the arguments 

169 are explicitly what the model requires, and it will recurse through 

170 members, constructing them from their corresponding `direct` methods. 

171 

172 This method should only be called when the inputs are trusted. 

173 """ 

174 # This method requires tuples as values of the mapping, but JSON 

175 # readers will read things in as lists. Be kind and transparently 

176 # transform to tuples 

177 _recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore 

178 

179 # Type ignore because the ternary statement seems to confuse mypy 

180 # based on conflicting inferred types of v. 

181 key = ( 

182 definition, 

183 frozenset(_recItems.items()), 

184 ) 

185 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

186 if cache is not None and (result := cache.get(key)) is not None: 

187 return result 

188 

189 node = cls.model_construct(definition=definition, record=_recItems) # type: ignore 

190 

191 if cache is not None: 

192 cache[key] = node 

193 return node 

194 

195 

196@immutable 

197class DimensionRecord: 

198 """Base class for the Python representation of database records. 

199 

200 Parameters 

201 ---------- 

202 **kwargs 

203 Field values for this record. Unrecognized keys are ignored. If this 

204 is the record for a `Dimension`, its primary key value may be provided 

205 with the actual name of the field (e.g. "id" or "name"), the name of 

206 the `Dimension`, or both. If this record class has a "timespan" 

207 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

208 be provided instead of a single "timespan" keyword argument (but are 

209 ignored if a "timespan" argument is provided). 

210 

211 Notes 

212 ----- 

213 `DimensionRecord` subclasses are created dynamically for each 

214 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

215 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

216 itself is pure abstract, but does not use the `abc` module to indicate this 

217 because it does not have overridable methods. 

218 

219 Record classes have attributes that correspond exactly to the 

220 `~DimensionElementFields.standard` fields in the related database table, 

221 plus "region" and "timespan" attributes for spatial and/or temporal 

222 elements (respectively). 

223 

224 Instances are usually obtained from a `Registry`, but can be constructed 

225 directly from Python as well. 

226 

227 `DimensionRecord` instances are immutable. 

228 """ 

229 

230 # Derived classes are required to define __slots__ as well, and it's those 

231 # derived-class slots that other methods on the base class expect to see 

232 # when they access self.__slots__. 

233 __slots__ = ("dataId",) 

234 

235 _serializedType = SerializedDimensionRecord 

236 

237 def __init__(self, **kwargs: Any): 

238 # Accept either the dimension name or the actual name of its primary 

239 # key field; ensure both are present in the dict for convenience below. 

240 if isinstance(self.definition, Dimension): 

241 v = kwargs.get(self.definition.primaryKey.name) 

242 if v is None: 

243 v = kwargs.get(self.definition.name) 

244 if v is None: 

245 raise ValueError( 

246 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

247 ) 

248 kwargs[self.definition.primaryKey.name] = v 

249 else: 

250 v2 = kwargs.setdefault(self.definition.name, v) 

251 if v != v2: 

252 raise ValueError( 

253 "Multiple inconsistent values for " 

254 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

255 ) 

256 for name in self.__slots__: 

257 object.__setattr__(self, name, kwargs.get(name)) 

258 if self.definition.temporal is not None and self.timespan is None: 

259 object.__setattr__( 

260 self, 

261 "timespan", 

262 Timespan( 

263 kwargs.get("datetime_begin"), 

264 kwargs.get("datetime_end"), 

265 ), 

266 ) 

267 

268 from ._coordinate import DataCoordinate 

269 

270 object.__setattr__( 

271 self, 

272 "dataId", 

273 DataCoordinate.fromRequiredValues( 

274 self.definition.graph, 

275 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

276 ), 

277 ) 

278 

279 def __eq__(self, other: Any) -> bool: 

280 if type(other) != type(self): 

281 return False 

282 return self.dataId == other.dataId 

283 

284 def __hash__(self) -> int: 

285 return hash(self.dataId) 

286 

287 def __str__(self) -> str: 

288 lines = [f"{self.definition.name}:"] 

289 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

290 return "\n".join(lines) 

291 

292 def __repr__(self) -> str: 

293 return "{}.RecordClass({})".format( 

294 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

295 ) 

296 

297 def __reduce__(self) -> tuple: 

298 mapping = {name: getattr(self, name) for name in self.__slots__} 

299 return (_reconstructDimensionRecord, (self.definition, mapping)) 

300 

301 def _repr_html_(self) -> str: 

302 """Override the default representation in IPython/Jupyter notebooks. 

303 

304 This gives a more readable output that understands embedded newlines. 

305 """ 

306 return f"<pre>{self}<pre>" 

307 

308 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

309 """Convert this class to a simple python type. 

310 

311 This makes it suitable for serialization. 

312 

313 Parameters 

314 ---------- 

315 minimal : `bool`, optional 

316 Use minimal serialization. Has no effect on for this class. 

317 

318 Returns 

319 ------- 

320 names : `list` 

321 The names of the dimensions. 

322 """ 

323 # The DataId is sufficient if you are willing to do a deferred 

324 # query. This may not be overly useful since to reconstruct 

325 # a collection of records will require repeated registry queries. 

326 # For now do not implement minimal form. 

327 

328 mapping = {name: getattr(self, name) for name in self.__slots__} 

329 # If the item in mapping supports simplification update it 

330 for k, v in mapping.items(): 

331 try: 

332 mapping[k] = v.to_simple(minimal=minimal) 

333 except AttributeError: 

334 if isinstance(v, lsst.sphgeom.Region): 

335 # YAML serialization specifies the class when it 

336 # doesn't have to. This is partly for explicitness 

337 # and also history. Here use a different approach. 

338 # This code needs to be migrated to sphgeom 

339 mapping[k] = v.encode().hex() 

340 if isinstance(v, bytes): 

341 # We actually can't handle serializing out to bytes for 

342 # hash objects, encode it here to a hex string 

343 mapping[k] = v.hex() 

344 definition = self.definition.to_simple(minimal=minimal) 

345 return SerializedDimensionRecord(definition=definition, record=mapping) 

346 

347 @classmethod 

348 def from_simple( 

349 cls, 

350 simple: SerializedDimensionRecord, 

351 universe: DimensionUniverse | None = None, 

352 registry: Registry | None = None, 

353 cacheKey: Hashable | None = None, 

354 ) -> DimensionRecord: 

355 """Construct a new object from the simplified form. 

356 

357 This is generally data returned from the `to_simple` 

358 method. 

359 

360 Parameters 

361 ---------- 

362 simple : `SerializedDimensionRecord` 

363 Value return from `to_simple`. 

364 universe : `DimensionUniverse` 

365 The special graph of all known dimensions of which this graph will 

366 be a subset. Can be `None` if `Registry` is provided. 

367 registry : `lsst.daf.butler.Registry`, optional 

368 Registry from which a universe can be extracted. Can be `None` 

369 if universe is provided explicitly. 

370 cacheKey : `Hashable` or `None` 

371 If this is not None, it will be used as a key for any cached 

372 reconstruction instead of calculating a value from the serialized 

373 format. 

374 

375 Returns 

376 ------- 

377 record : `DimensionRecord` 

378 Newly-constructed object. 

379 """ 

380 if universe is None and registry is None: 

381 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

382 if universe is None and registry is not None: 

383 universe = registry.dimensions 

384 if universe is None: 

385 # this is for mypy 

386 raise ValueError("Unable to determine a usable universe") 

387 # Type ignore because the ternary statement seems to confuse mypy 

388 # based on conflicting inferred types of v. 

389 key = cacheKey or ( 

390 simple.definition, 

391 frozenset(simple.record.items()), # type: ignore 

392 ) 

393 cache = PersistenceContextVars.dimensionRecords.get() 

394 if cache is not None and (result := cache.get(key)) is not None: 

395 return result 

396 

397 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

398 

399 # Create a specialist subclass model with type validation. 

400 # This allows us to do simple checks of external data (possibly 

401 # sent as JSON) since for now _reconstructDimensionRecord does not 

402 # do any validation. 

403 record_model_cls = _createSimpleRecordSubclass(definition) 

404 record_model = record_model_cls(**simple.record) 

405 

406 # Timespan and region have to be converted to native form 

407 # for now assume that those keys are special 

408 rec = record_model.dict() 

409 

410 if (ts := "timespan") in rec: 

411 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

412 if (reg := "region") in rec: 

413 encoded = bytes.fromhex(rec[reg]) 

414 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

415 if (hsh := "hash") in rec: 

416 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

417 

418 dimRec = _reconstructDimensionRecord(definition, rec) 

419 if cache is not None: 

420 cache[key] = dimRec 

421 return dimRec 

422 

423 to_json = to_json_pydantic 

424 from_json: ClassVar = classmethod(from_json_pydantic) 

425 

426 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]: 

427 """Return a vanilla `dict` representation of this record. 

428 

429 Parameters 

430 ---------- 

431 splitTimespan : `bool`, optional 

432 If `True` (`False` is default) transform any "timespan" key value 

433 from a `Timespan` instance into a pair of regular 

434 ("datetime_begin", "datetime_end") fields. 

435 """ 

436 results = {name: getattr(self, name) for name in self.__slots__} 

437 if splitTimespan: 

438 timespan = results.pop("timespan", None) 

439 if timespan is not None: 

440 results["datetime_begin"] = timespan.begin 

441 results["datetime_end"] = timespan.end 

442 return results 

443 

444 # DimensionRecord subclasses are dynamically created, so static type 

445 # checkers can't know about them or their attributes. To avoid having to 

446 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

447 # checkers not to worry about missing attributes. 

448 def __getattr__(self, name: str) -> Any: 

449 raise AttributeError(name) 

450 

451 # Class attributes below are shadowed by instance attributes, and are 

452 # present just to hold the docstrings for those instance attributes. 

453 

454 dataId: DataCoordinate 

455 """A dict-like identifier for this record's primary keys 

456 (`DataCoordinate`). 

457 """ 

458 

459 definition: ClassVar[DimensionElement] 

460 """The `DimensionElement` whose records this class represents 

461 (`DimensionElement`). 

462 """ 

463 

464 fields: ClassVar[DimensionElementFields] 

465 """A categorized view of the fields in this class 

466 (`DimensionElementFields`). 

467 """