Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 22%

161 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-21 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple 

27 

28import lsst.sphgeom 

29from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

30from lsst.utils.classes import immutable 

31from pydantic import Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

32 

33from ..json import from_json_pydantic, to_json_pydantic 

34from ..persistenceContext import PersistenceContextVars 

35from ..timespan import Timespan, TimespanDatabaseRepresentation 

36from ._elements import Dimension, DimensionElement 

37 

38if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

39 from ...registry import Registry 

40 from ._coordinate import DataCoordinate 

41 from ._graph import DimensionUniverse 

42 from ._schema import DimensionElementFields 

43 

44 

45def _reconstructDimensionRecord(definition: DimensionElement, mapping: dict[str, Any]) -> DimensionRecord: 

46 """Unpickle implementation for `DimensionRecord` subclasses. 

47 

48 For internal use by `DimensionRecord`. 

49 """ 

50 return definition.RecordClass(**mapping) 

51 

52 

53def _subclassDimensionRecord(definition: DimensionElement) -> type[DimensionRecord]: 

54 """Create a dynamic subclass of `DimensionRecord` for the given element. 

55 

56 For internal use by `DimensionRecord`. 

57 """ 

58 from ._schema import DimensionElementFields 

59 

60 fields = DimensionElementFields(definition) 

61 slots = list(fields.standard.names) 

62 if definition.spatial: 

63 slots.append("region") 

64 if definition.temporal: 

65 slots.append(TimespanDatabaseRepresentation.NAME) 

66 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

67 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

68 

69 

70class SpecificSerializedDimensionRecord(_BaseModelCompat, extra="forbid"): 

71 """Base model for a specific serialized record content.""" 

72 

73 

74_SIMPLE_RECORD_CLASS_CACHE: dict[ 

75 tuple[DimensionElement, DimensionUniverse], type[SpecificSerializedDimensionRecord] 

76] = {} 

77 

78 

79def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSerializedDimensionRecord]: 

80 from ._schema import DimensionElementFields 

81 

82 # Cache on the definition (which hashes as the name) and the 

83 # associated universe. 

84 cache_key = (definition, definition.universe) 

85 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

86 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

87 

88 fields = DimensionElementFields(definition) 

89 members = {} 

90 # Prefer strict typing for external data 

91 type_map = { 

92 str: StrictStr, 

93 float: StrictFloat, 

94 bool: StrictBool, 

95 int: StrictInt, 

96 } 

97 

98 for field in fields.standard: 

99 field_type = field.getPythonType() 

100 field_type = type_map.get(field_type, field_type) 

101 if field.nullable: 

102 field_type = Optional[field_type] # type: ignore 

103 members[field.name] = (field_type, ...) 

104 if definition.temporal: 

105 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

106 if definition.spatial: 

107 members["region"] = (str, ...) 

108 

109 # mypy does not seem to like create_model 

110 model = create_model( 

111 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

112 __base__=SpecificSerializedDimensionRecord, 

113 **members, # type: ignore 

114 ) 

115 

116 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

117 return model 

118 

119 

120class SerializedDimensionRecord(_BaseModelCompat): 

121 """Simplified model for serializing a `DimensionRecord`.""" 

122 

123 definition: str = Field( 

124 ..., 

125 title="Name of dimension associated with this record.", 

126 example="exposure", 

127 ) 

128 

129 # Use strict types to prevent casting 

130 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field( 

131 ..., 

132 title="Dimension record keys and values.", 

133 example={ 

134 "definition": "exposure", 

135 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"}, 

136 }, 

137 ) 

138 

139 if not PYDANTIC_V2: 139 ↛ 158line 139 didn't jump to line 158, because the condition on line 139 was never false

140 

141 class Config: 

142 """Local configuration overrides for model.""" 

143 

144 schema_extra = { 

145 "example": { 

146 "definition": "detector", 

147 "record": { 

148 "instrument": "HSC", 

149 "id": 72, 

150 "full_name": "0_01", 

151 "name_in_raft": "01", 

152 "raft": "0", 

153 "purpose": "SCIENCE", 

154 }, 

155 } 

156 } 

157 

158 @classmethod 

159 def direct( 

160 cls, 

161 *, 

162 definition: str, 

163 record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]], 

164 ) -> SerializedDimensionRecord: 

165 """Construct a `SerializedDimensionRecord` directly without validators. 

166 

167 This differs from the pydantic "construct" method in that the arguments 

168 are explicitly what the model requires, and it will recurse through 

169 members, constructing them from their corresponding `direct` methods. 

170 

171 This method should only be called when the inputs are trusted. 

172 """ 

173 _recItems = record.items() 

174 # Type ignore because the ternary statement seems to confuse mypy 

175 # based on conflicting inferred types of v. 

176 key = ( 

177 definition, 

178 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore 

179 ) 

180 cache = PersistenceContextVars.serializedDimensionRecordMapping.get() 

181 if cache is not None and (result := cache.get(key)) is not None: 

182 return result 

183 

184 # This method requires tuples as values of the mapping, but JSON 

185 # readers will read things in as lists. Be kind and transparently 

186 # transform to tuples 

187 serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore 

188 

189 node = cls.model_construct(definition=definition, record=serialized_record) # type: ignore 

190 

191 if cache is not None: 

192 cache[key] = node 

193 return node 

194 

195 

196@immutable 

197class DimensionRecord: 

198 """Base class for the Python representation of database records. 

199 

200 Parameters 

201 ---------- 

202 **kwargs 

203 Field values for this record. Unrecognized keys are ignored. If this 

204 is the record for a `Dimension`, its primary key value may be provided 

205 with the actual name of the field (e.g. "id" or "name"), the name of 

206 the `Dimension`, or both. If this record class has a "timespan" 

207 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

208 be provided instead of a single "timespan" keyword argument (but are 

209 ignored if a "timespan" argument is provided). 

210 

211 Notes 

212 ----- 

213 `DimensionRecord` subclasses are created dynamically for each 

214 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

215 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

216 itself is pure abstract, but does not use the `abc` module to indicate this 

217 because it does not have overridable methods. 

218 

219 Record classes have attributes that correspond exactly to the 

220 `~DimensionElementFields.standard` fields in the related database table, 

221 plus "region" and "timespan" attributes for spatial and/or temporal 

222 elements (respectively). 

223 

224 Instances are usually obtained from a `Registry`, but can be constructed 

225 directly from Python as well. 

226 

227 `DimensionRecord` instances are immutable. 

228 """ 

229 

230 # Derived classes are required to define __slots__ as well, and it's those 

231 # derived-class slots that other methods on the base class expect to see 

232 # when they access self.__slots__. 

233 __slots__ = ("dataId",) 

234 

235 _serializedType = SerializedDimensionRecord 

236 

237 def __init__(self, **kwargs: Any): 

238 # Accept either the dimension name or the actual name of its primary 

239 # key field; ensure both are present in the dict for convenience below. 

240 if isinstance(self.definition, Dimension): 

241 v = kwargs.get(self.definition.primaryKey.name) 

242 if v is None: 

243 v = kwargs.get(self.definition.name) 

244 if v is None: 

245 raise ValueError( 

246 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

247 ) 

248 kwargs[self.definition.primaryKey.name] = v 

249 else: 

250 v2 = kwargs.setdefault(self.definition.name, v) 

251 if v != v2: 

252 raise ValueError( 

253 "Multiple inconsistent values for " 

254 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

255 ) 

256 for name in self.__slots__: 

257 object.__setattr__(self, name, kwargs.get(name)) 

258 if self.definition.temporal is not None: 

259 if self.timespan is None: 

260 object.__setattr__( 

261 self, 

262 "timespan", 

263 Timespan( 

264 kwargs.get("datetime_begin"), 

265 kwargs.get("datetime_end"), 

266 ), 

267 ) 

268 

269 from ._coordinate import DataCoordinate 

270 

271 object.__setattr__( 

272 self, 

273 "dataId", 

274 DataCoordinate.fromRequiredValues( 

275 self.definition.graph, 

276 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

277 ), 

278 ) 

279 

280 def __eq__(self, other: Any) -> bool: 

281 if type(other) != type(self): 

282 return False 

283 return self.dataId == other.dataId 

284 

285 def __hash__(self) -> int: 

286 return hash(self.dataId) 

287 

288 def __str__(self) -> str: 

289 lines = [f"{self.definition.name}:"] 

290 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

291 return "\n".join(lines) 

292 

293 def __repr__(self) -> str: 

294 return "{}.RecordClass({})".format( 

295 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

296 ) 

297 

298 def __reduce__(self) -> tuple: 

299 mapping = {name: getattr(self, name) for name in self.__slots__} 

300 return (_reconstructDimensionRecord, (self.definition, mapping)) 

301 

302 def _repr_html_(self) -> str: 

303 """Override the default representation in IPython/Jupyter notebooks. 

304 

305 This gives a more readable output that understands embedded newlines. 

306 """ 

307 return f"<pre>{self}<pre>" 

308 

309 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

310 """Convert this class to a simple python type. 

311 

312 This makes it suitable for serialization. 

313 

314 Parameters 

315 ---------- 

316 minimal : `bool`, optional 

317 Use minimal serialization. Has no effect on for this class. 

318 

319 Returns 

320 ------- 

321 names : `list` 

322 The names of the dimensions. 

323 """ 

324 # The DataId is sufficient if you are willing to do a deferred 

325 # query. This may not be overly useful since to reconstruct 

326 # a collection of records will require repeated registry queries. 

327 # For now do not implement minimal form. 

328 

329 mapping = {name: getattr(self, name) for name in self.__slots__} 

330 # If the item in mapping supports simplification update it 

331 for k, v in mapping.items(): 

332 try: 

333 mapping[k] = v.to_simple(minimal=minimal) 

334 except AttributeError: 

335 if isinstance(v, lsst.sphgeom.Region): 

336 # YAML serialization specifies the class when it 

337 # doesn't have to. This is partly for explicitness 

338 # and also history. Here use a different approach. 

339 # This code needs to be migrated to sphgeom 

340 mapping[k] = v.encode().hex() 

341 if isinstance(v, bytes): 

342 # We actually can't handle serializing out to bytes for 

343 # hash objects, encode it here to a hex string 

344 mapping[k] = v.hex() 

345 definition = self.definition.to_simple(minimal=minimal) 

346 return SerializedDimensionRecord(definition=definition, record=mapping) 

347 

348 @classmethod 

349 def from_simple( 

350 cls, 

351 simple: SerializedDimensionRecord, 

352 universe: DimensionUniverse | None = None, 

353 registry: Registry | None = None, 

354 ) -> DimensionRecord: 

355 """Construct a new object from the simplified form. 

356 

357 This is generally data returned from the `to_simple` 

358 method. 

359 

360 Parameters 

361 ---------- 

362 simple : `SerializedDimensionRecord` 

363 Value return from `to_simple`. 

364 universe : `DimensionUniverse` 

365 The special graph of all known dimensions of which this graph will 

366 be a subset. Can be `None` if `Registry` is provided. 

367 registry : `lsst.daf.butler.Registry`, optional 

368 Registry from which a universe can be extracted. Can be `None` 

369 if universe is provided explicitly. 

370 

371 Returns 

372 ------- 

373 record : `DimensionRecord` 

374 Newly-constructed object. 

375 """ 

376 if universe is None and registry is None: 

377 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

378 if universe is None and registry is not None: 

379 universe = registry.dimensions 

380 if universe is None: 

381 # this is for mypy 

382 raise ValueError("Unable to determine a usable universe") 

383 _recItems = simple.record.items() 

384 # Type ignore because the ternary statement seems to confuse mypy 

385 # based on conflicting inferred types of v. 

386 key = ( 

387 simple.definition, 

388 frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore 

389 ) 

390 cache = PersistenceContextVars.dimensionRecords.get() 

391 if cache is not None and (result := cache.get(key)) is not None: 

392 return result 

393 

394 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

395 

396 # Create a specialist subclass model with type validation. 

397 # This allows us to do simple checks of external data (possibly 

398 # sent as JSON) since for now _reconstructDimensionRecord does not 

399 # do any validation. 

400 record_model_cls = _createSimpleRecordSubclass(definition) 

401 record_model = record_model_cls(**simple.record) 

402 

403 # Timespan and region have to be converted to native form 

404 # for now assume that those keys are special 

405 rec = record_model.dict() 

406 

407 if (ts := "timespan") in rec: 

408 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

409 if (reg := "region") in rec: 

410 encoded = bytes.fromhex(rec[reg]) 

411 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

412 if (hsh := "hash") in rec: 

413 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

414 

415 dimRec = _reconstructDimensionRecord(definition, rec) 

416 if cache is not None: 

417 cache[key] = dimRec 

418 return dimRec 

419 

420 to_json = to_json_pydantic 

421 from_json: ClassVar = classmethod(from_json_pydantic) 

422 

423 def toDict(self, splitTimespan: bool = False) -> dict[str, Any]: 

424 """Return a vanilla `dict` representation of this record. 

425 

426 Parameters 

427 ---------- 

428 splitTimespan : `bool`, optional 

429 If `True` (`False` is default) transform any "timespan" key value 

430 from a `Timespan` instance into a pair of regular 

431 ("datetime_begin", "datetime_end") fields. 

432 """ 

433 results = {name: getattr(self, name) for name in self.__slots__} 

434 if splitTimespan: 

435 timespan = results.pop("timespan", None) 

436 if timespan is not None: 

437 results["datetime_begin"] = timespan.begin 

438 results["datetime_end"] = timespan.end 

439 return results 

440 

441 # DimensionRecord subclasses are dynamically created, so static type 

442 # checkers can't know about them or their attributes. To avoid having to 

443 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

444 # checkers not to worry about missing attributes. 

445 def __getattr__(self, name: str) -> Any: 

446 raise AttributeError(name) 

447 

448 # Class attributes below are shadowed by instance attributes, and are 

449 # present just to hold the docstrings for those instance attributes. 

450 

451 dataId: DataCoordinate 

452 """A dict-like identifier for this record's primary keys 

453 (`DataCoordinate`). 

454 """ 

455 

456 definition: ClassVar[DimensionElement] 

457 """The `DimensionElement` whose records this class represents 

458 (`DimensionElement`). 

459 """ 

460 

461 fields: ClassVar[DimensionElementFields] 

462 """A categorized view of the fields in this class 

463 (`DimensionElementFields`). 

464 """