Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 21%

165 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple 

27 

28import lsst.sphgeom 

29from lsst.utils.classes import immutable 

30 

31try: 

32 from pydantic.v1 import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

33except ModuleNotFoundError: 

34 from pydantic import ( # type: ignore 

35 BaseModel, 

36 Field, 

37 StrictBool, 

38 StrictFloat, 

39 StrictInt, 

40 StrictStr, 

41 create_model, 

42 ) 

43 

44from ..json import from_json_pydantic, to_json_pydantic 

45from ..persistenceContext import PersistenceContextVars 

46from ..timespan import Timespan, TimespanDatabaseRepresentation 

47from ._elements import Dimension, DimensionElement 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

50 from ...registry import Registry 

51 from ._coordinate import DataCoordinate 

52 from ._graph import DimensionUniverse 

53 from ._schema import DimensionElementFields 

54 

55 

56def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: 

57 """Unpickle implementation for `DimensionRecord` subclasses. 

58 

59 For internal use by `DimensionRecord`. 

60 """ 

61 return definition.RecordClass(**mapping) 

62 

63 

64def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]: 

65 """Create a dynamic subclass of `DimensionRecord` for the given element. 

66 

67 For internal use by `DimensionRecord`. 

68 """ 

69 from ._schema import DimensionElementFields 

70 

71 fields = DimensionElementFields(definition) 

72 slots = list(fields.standard.names) 

73 if definition.spatial: 

74 slots.append("region") 

75 if definition.temporal: 

76 slots.append(TimespanDatabaseRepresentation.NAME) 

77 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

78 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

79 

80 

81class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

82 """Base model for a specific serialized record content.""" 

83 

84 

85_SIMPLE_RECORD_CLASS_CACHE: dict[ 

86 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord] 

87] = {} 

88 

89 

90def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]: 

91 from ._schema import DimensionElementFields 

92 

93 # Cache on the definition (which hashes as the name) and the 

94 # associated universe. 

95 cache_key = (definition, definition.universe) 

96 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

97 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

98 

99 fields = DimensionElementFields(definition) 

100 members = {} 

101 # Prefer strict typing for external data 

102 type_map = { 

103 str: StrictStr, 

104 float: StrictFloat, 

105 bool: StrictBool, 

106 int: StrictInt, 

107 } 

108 

109 for field in fields.standard: 

110 field_type = field.getPythonType() 

111 field_type = type_map.get(field_type, field_type) 

112 if field.nullable: 

113 field_type = Optional[field_type] # type: ignore 

114 members[field.name] = (field_type, ...) 

115 if definition.temporal: 

116 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

117 if definition.spatial: 

118 members["region"] = (str, ...) 

119 

120 # mypy does not seem to like create_model 

121 model = create_model( 

122 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

123 __base__=SpecificSerializedDimensionRecord, 

124 **members, # type: ignore 

125 ) 

126 

127 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

128 return model 

129 

130 

131class SerializedDimensionRecord(BaseModel): 

132 """Simplified model for serializing a `DimensionRecord`.""" 

133 

134 definition: str = Field( 

135 ..., 

136 title="Name of dimension associated with this record.", 

137 example="exposure", 

138 ) 

139 

140 # Use strict types to prevent casting 

141 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field( 

142 ..., 

143 title="Dimension record keys and values.", 

144 example={ 

145 "definition": "exposure", 

146 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"}, 

147 }, 

148 ) 

149 

150 class Config: 

151 """Local configuration overrides for model.""" 

152 

153 schema_extra = { 

154 "example": { 

155 "definition": "detector", 

156 "record": { 

157 "instrument": "HSC", 

158 "id": 72, 

159 "full_name": "0_01", 

160 "name_in_raft": "01", 

161 "raft": "0", 

162 "purpose": "SCIENCE", 

163 }, 

164 } 

165 } 

166 

167 @classmethod 

168 def direct( 

169 cls, 

170 *, 

171 definition: str, 

172 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]], 

173 ) -> SerializedDimensionRecord: 

174 """Construct a `SerializedDimensionRecord` directly without validators. 

175 

176 This differs from the pydantic "construct" method in that the arguments 

177 are explicitly what the model requires, and it will recurse through 

178 members, constructing them from their corresponding `direct` methods. 

179 

180 This method should only be called when the inputs are trusted. 

181 """ 

182 _recItems = record.items() 

183 # Type ignore because the ternary statement seems to confuse mypy 

184 # based on conflicting inferred types of v. 

185 key = ( 

186 definition, 

187 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore 

188 ) 

189 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

190 if cache is not None and (result := cache.get(key)) is not None: 

191 return result 

192 node = SerializedDimensionRecord.__new__(cls) 

193 setter = object.__setattr__ 

194 setter(node, "definition", definition) 

195 # This method requires tuples as values of the mapping, but JSON 

196 # readers will read things in as lists. Be kind and transparently 

197 # transform to tuples 

198 setter( 

199 node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore 

200 ) 

201 setter(node, "__fields_set__", {"definition", "record"}) 

202 if cache is not None: 

203 cache[key] = node 

204 return node 

205 

206 

207@immutable 

208class DimensionRecord: 

209 """Base class for the Python representation of database records. 

210 

211 Parameters 

212 ---------- 

213 **kwargs 

214 Field values for this record. Unrecognized keys are ignored. If this 

215 is the record for a `Dimension`, its primary key value may be provided 

216 with the actual name of the field (e.g. "id" or "name"), the name of 

217 the `Dimension`, or both. If this record class has a "timespan" 

218 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

219 be provided instead of a single "timespan" keyword argument (but are 

220 ignored if a "timespan" argument is provided). 

221 

222 Notes 

223 ----- 

224 `DimensionRecord` subclasses are created dynamically for each 

225 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

226 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

227 itself is pure abstract, but does not use the `abc` module to indicate this 

228 because it does not have overridable methods. 

229 

230 Record classes have attributes that correspond exactly to the 

231 `~DimensionElementFields.standard` fields in the related database table, 

232 plus "region" and "timespan" attributes for spatial and/or temporal 

233 elements (respectively). 

234 

235 Instances are usually obtained from a `Registry`, but can be constructed 

236 directly from Python as well. 

237 

238 `DimensionRecord` instances are immutable. 

239 """ 

240 

241 # Derived classes are required to define __slots__ as well, and it's those 

242 # derived-class slots that other methods on the base class expect to see 

243 # when they access self.__slots__. 

244 __slots__ = ("dataId",) 

245 

246 _serializedType = SerializedDimensionRecord 

247 

248 def __init__(self, **kwargs: Any): 

249 # Accept either the dimension name or the actual name of its primary 

250 # key field; ensure both are present in the dict for convenience below. 

251 if isinstance(self.definition, Dimension): 

252 v = kwargs.get(self.definition.primaryKey.name) 

253 if v is None: 

254 v = kwargs.get(self.definition.name) 

255 if v is None: 

256 raise ValueError( 

257 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

258 ) 

259 kwargs[self.definition.primaryKey.name] = v 

260 else: 

261 v2 = kwargs.setdefault(self.definition.name, v) 

262 if v != v2: 

263 raise ValueError( 

264 "Multiple inconsistent values for " 

265 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

266 ) 

267 for name in self.__slots__: 

268 object.__setattr__(self, name, kwargs.get(name)) 

269 if self.definition.temporal is not None: 

270 if self.timespan is None: 

271 object.__setattr__( 

272 self, 

273 "timespan", 

274 Timespan( 

275 kwargs.get("datetime_begin"), 

276 kwargs.get("datetime_end"), 

277 ), 

278 ) 

279 

280 from ._coordinate import DataCoordinate 

281 

282 object.__setattr__( 

283 self, 

284 "dataId", 

285 DataCoordinate.fromRequiredValues( 

286 self.definition.graph, 

287 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

288 ), 

289 ) 

290 

291 def __eq__(self, other: Any) -> bool: 

292 if type(other) != type(self): 

293 return False 

294 return self.dataId == other.dataId 

295 

296 def __hash__(self) -> int: 

297 return hash(self.dataId) 

298 

299 def __str__(self) -> str: 

300 lines = [f"{self.definition.name}:"] 

301 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

302 return "\n".join(lines) 

303 

304 def __repr__(self) -> str: 

305 return "{}.RecordClass({})".format( 

306 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

307 ) 

308 

309 def __reduce__(self) -> tuple: 

310 mapping = {name: getattr(self, name) for name in self.__slots__} 

311 return (_reconstructDimensionRecord, (self.definition, mapping)) 

312 

313 def _repr_html_(self) -> str: 

314 """Override the default representation in IPython/Jupyter notebooks. 

315 

316 This gives a more readable output that understands embedded newlines. 

317 """ 

318 return f"<pre>{self}<pre>" 

319 

320 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

321 """Convert this class to a simple python type. 

322 

323 This makes it suitable for serialization. 

324 

325 Parameters 

326 ---------- 

327 minimal : `bool`, optional 

328 Use minimal serialization. Has no effect on for this class. 

329 

330 Returns 

331 ------- 

332 names : `list` 

333 The names of the dimensions. 

334 """ 

335 # The DataId is sufficient if you are willing to do a deferred 

336 # query. This may not be overly useful since to reconstruct 

337 # a collection of records will require repeated registry queries. 

338 # For now do not implement minimal form. 

339 

340 mapping = {name: getattr(self, name) for name in self.__slots__} 

341 # If the item in mapping supports simplification update it 

342 for k, v in mapping.items(): 

343 try: 

344 mapping[k] = v.to_simple(minimal=minimal) 

345 except AttributeError: 

346 if isinstance(v, lsst.sphgeom.Region): 

347 # YAML serialization specifies the class when it 

348 # doesn't have to. This is partly for explicitness 

349 # and also history. Here use a different approach. 

350 # This code needs to be migrated to sphgeom 

351 mapping[k] = v.encode().hex() 

352 if isinstance(v, bytes): 

353 # We actually can't handle serializing out to bytes for 

354 # hash objects, encode it here to a hex string 

355 mapping[k] = v.hex() 

356 definition = self.definition.to_simple(minimal=minimal) 

357 return SerializedDimensionRecord(definition=definition, record=mapping) 

358 

359 @classmethod 

360 def from_simple( 

361 cls, 

362 simple: SerializedDimensionRecord, 

363 universe: DimensionUniverse | None = None, 

364 registry: Registry | None = None, 

365 ) -> DimensionRecord: 

366 """Construct a new object from the simplified form. 

367 

368 This is generally data returned from the `to_simple` 

369 method. 

370 

371 Parameters 

372 ---------- 

373 simple : `SerializedDimensionRecord` 

374 Value return from `to_simple`. 

375 universe : `DimensionUniverse` 

376 The special graph of all known dimensions of which this graph will 

377 be a subset. Can be `None` if `Registry` is provided. 

378 registry : `lsst.daf.butler.Registry`, optional 

379 Registry from which a universe can be extracted. Can be `None` 

380 if universe is provided explicitly. 

381 

382 Returns 

383 ------- 

384 record : `DimensionRecord` 

385 Newly-constructed object. 

386 """ 

387 if universe is None and registry is None: 

388 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

389 if universe is None and registry is not None: 

390 universe = registry.dimensions 

391 if universe is None: 

392 # this is for mypy 

393 raise ValueError("Unable to determine a usable universe") 

394 _recItems = simple.record.items() 

395 # Type ignore because the ternary statement seems to confuse mypy 

396 # based on conflicting inferred types of v. 

397 key = ( 

398 simple.definition, 

399 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore 

400 ) 

401 cache = PersistenceContextVars.dimensionRecords.get() 

402 if cache is not None and (result := cache.get(key)) is not None: 

403 return result 

404 

405 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

406 

407 # Create a specialist subclass model with type validation. 

408 # This allows us to do simple checks of external data (possibly 

409 # sent as JSON) since for now _reconstructDimensionRecord does not 

410 # do any validation. 

411 record_model_cls = _createSimpleRecordSubclass(definition) 

412 record_model = record_model_cls(**simple.record) 

413 

414 # Timespan and region have to be converted to native form 

415 # for now assume that those keys are special 

416 rec = record_model.dict() 

417 

418 if (ts := "timespan") in rec: 

419 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

420 if (reg := "region") in rec: 

421 encoded = bytes.fromhex(rec[reg]) 

422 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

423 if (hsh := "hash") in rec: 

424 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

425 

426 dimRec = _reconstructDimensionRecord(definition, rec) 

427 if cache is not None: 

428 cache[key] = dimRec 

429 return dimRec 

430 

431 to_json = to_json_pydantic 

432 from_json: ClassVar = classmethod(from_json_pydantic) 

433 

434 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]: 

435 """Return a vanilla `dict` representation of this record. 

436 

437 Parameters 

438 ---------- 

439 splitTimespan : `bool`, optional 

440 If `True` (`False` is default) transform any "timespan" key value 

441 from a `Timespan` instance into a pair of regular 

442 ("datetime_begin", "datetime_end") fields. 

443 """ 

444 results = {name: getattr(self, name) for name in self.__slots__} 

445 if splitTimespan: 

446 timespan = results.pop("timespan", None) 

447 if timespan is not None: 

448 results["datetime_begin"] = timespan.begin 

449 results["datetime_end"] = timespan.end 

450 return results 

451 

452 # DimensionRecord subclasses are dynamically created, so static type 

453 # checkers can't know about them or their attributes. To avoid having to 

454 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

455 # checkers not to worry about missing attributes. 

456 def __getattr__(self, name: str) -> Any: 

457 raise AttributeError(name) 

458 

459 # Class attributes below are shadowed by instance attributes, and are 

460 # present just to hold the docstrings for those instance attributes. 

461 

462 dataId: DataCoordinate 

463 """A dict-like identifier for this record's primary keys 

464 (`DataCoordinate`). 

465 """ 

466 

467 definition: ClassVar[DimensionElement] 

468 """The `DimensionElement` whose records this class represents 

469 (`DimensionElement`). 

470 """ 

471 

472 fields: ClassVar[DimensionElementFields] 

473 """A categorized view of the fields in this class 

474 (`DimensionElementFields`). 

475 """