Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 22%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 02:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Tuple, Type, Union 

27 

28import lsst.sphgeom 

29from lsst.utils.classes import immutable 

30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

31 

32from ..json import from_json_pydantic, to_json_pydantic 

33from ..timespan import Timespan, TimespanDatabaseRepresentation 

34from ._elements import Dimension, DimensionElement 

35 

36if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

37 from ...registry import Registry 

38 from ._coordinate import DataCoordinate 

39 from ._graph import DimensionUniverse 

40 from ._schema import DimensionElementFields 

41 

42 

43def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord: 

44 """Unpickle implementation for `DimensionRecord` subclasses. 

45 

46 For internal use by `DimensionRecord`. 

47 """ 

48 return definition.RecordClass(**mapping) 

49 

50 

51def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]: 

52 """Create a dynamic subclass of `DimensionRecord` for the given element. 

53 

54 For internal use by `DimensionRecord`. 

55 """ 

56 from ._schema import DimensionElementFields 

57 

58 fields = DimensionElementFields(definition) 

59 slots = list(fields.standard.names) 

60 if definition.spatial: 

61 slots.append("region") 

62 if definition.temporal: 

63 slots.append(TimespanDatabaseRepresentation.NAME) 

64 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

65 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

66 

67 

68class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

69 """Base model for a specific serialized record content.""" 

70 

71 

72_SIMPLE_RECORD_CLASS_CACHE: Dict[ 

73 Tuple[DimensionElement, DimensionUniverse], Type[SpecificSerializedDimensionRecord] 

74] = {} 

75 

76 

77def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]: 

78 from ._schema import DimensionElementFields 

79 

80 # Cache on the definition (which hashes as the name) and the 

81 # associated universe. 

82 cache_key = (definition, definition.universe) 

83 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

84 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

85 

86 fields = DimensionElementFields(definition) 

87 members = {} 

88 # Prefer strict typing for external data 

89 type_map = { 

90 str: StrictStr, 

91 float: StrictFloat, 

92 bool: StrictBool, 

93 int: StrictInt, 

94 } 

95 

96 for field in fields.standard: 

97 field_type = field.getPythonType() 

98 field_type = type_map.get(field_type, field_type) 

99 if field.nullable: 

100 field_type = Optional[field_type] # type: ignore 

101 members[field.name] = (field_type, ...) 

102 if definition.temporal: 

103 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

104 if definition.spatial: 

105 members["region"] = (str, ...) 

106 

107 # mypy does not seem to like create_model 

108 model = create_model( 

109 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

110 __base__=SpecificSerializedDimensionRecord, 

111 **members, # type: ignore 

112 ) 

113 

114 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

115 return model 

116 

117 

118class SerializedDimensionRecord(BaseModel): 

119 """Simplified model for serializing a `DimensionRecord`.""" 

120 

121 definition: str = Field( 

122 ..., 

123 title="Name of dimension associated with this record.", 

124 example="exposure", 

125 ) 

126 

127 # Use strict types to prevent casting 

128 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field( 

129 ..., 

130 title="Dimension record keys and values.", 

131 example={ 

132 "definition": "exposure", 

133 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"}, 

134 }, 

135 ) 

136 

137 class Config: 

138 """Local configuration overrides for model.""" 

139 

140 schema_extra = { 

141 "example": { 

142 "definition": "detector", 

143 "record": { 

144 "instrument": "HSC", 

145 "id": 72, 

146 "full_name": "0_01", 

147 "name_in_raft": "01", 

148 "raft": "0", 

149 "purpose": "SCIENCE", 

150 }, 

151 } 

152 } 

153 

154 @classmethod 

155 def direct( 

156 cls, 

157 *, 

158 definition: str, 

159 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]], 

160 ) -> SerializedDimensionRecord: 

161 """Construct a `SerializedDimensionRecord` directly without validators. 

162 

163 This differs from the pydantic "construct" method in that the arguments 

164 are explicitly what the model requires, and it will recurse through 

165 members, constructing them from their corresponding `direct` methods. 

166 

167 This method should only be called when the inputs are trusted. 

168 """ 

169 node = cls.construct(definition=definition, record=record) 

170 node = SerializedDimensionRecord.__new__(cls) 

171 setter = object.__setattr__ 

172 setter(node, "definition", definition) 

173 # This method requires tuples as values of the mapping, but JSON 

174 # readers will read things in as lists. Be kind and transparently 

175 # transform to tuples 

176 setter( 

177 node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore 

178 ) 

179 setter(node, "__fields_set__", {"definition", "record"}) 

180 return node 

181 

182 

183@immutable 

184class DimensionRecord: 

185 """Base class for the Python representation of database records. 

186 

187 Parameters 

188 ---------- 

189 **kwargs 

190 Field values for this record. Unrecognized keys are ignored. If this 

191 is the record for a `Dimension`, its primary key value may be provided 

192 with the actual name of the field (e.g. "id" or "name"), the name of 

193 the `Dimension`, or both. If this record class has a "timespan" 

194 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

195 be provided instead of a single "timespan" keyword argument (but are 

196 ignored if a "timespan" argument is provided). 

197 

198 Notes 

199 ----- 

200 `DimensionRecord` subclasses are created dynamically for each 

201 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

202 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

203 itself is pure abstract, but does not use the `abc` module to indicate this 

204 because it does not have overridable methods. 

205 

206 Record classes have attributes that correspond exactly to the 

207 `~DimensionElementFields.standard` fields in the related database table, 

208 plus "region" and "timespan" attributes for spatial and/or temporal 

209 elements (respectively). 

210 

211 Instances are usually obtained from a `Registry`, but can be constructed 

212 directly from Python as well. 

213 

214 `DimensionRecord` instances are immutable. 

215 """ 

216 

217 # Derived classes are required to define __slots__ as well, and it's those 

218 # derived-class slots that other methods on the base class expect to see 

219 # when they access self.__slots__. 

220 __slots__ = ("dataId",) 

221 

222 _serializedType = SerializedDimensionRecord 

223 

224 def __init__(self, **kwargs: Any): 

225 # Accept either the dimension name or the actual name of its primary 

226 # key field; ensure both are present in the dict for convenience below. 

227 if isinstance(self.definition, Dimension): 

228 v = kwargs.get(self.definition.primaryKey.name) 

229 if v is None: 

230 v = kwargs.get(self.definition.name) 

231 if v is None: 

232 raise ValueError( 

233 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

234 ) 

235 kwargs[self.definition.primaryKey.name] = v 

236 else: 

237 v2 = kwargs.setdefault(self.definition.name, v) 

238 if v != v2: 

239 raise ValueError( 

240 "Multiple inconsistent values for " 

241 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

242 ) 

243 for name in self.__slots__: 

244 object.__setattr__(self, name, kwargs.get(name)) 

245 if self.definition.temporal is not None: 

246 if self.timespan is None: 

247 object.__setattr__( 

248 self, 

249 "timespan", 

250 Timespan( 

251 kwargs.get("datetime_begin"), 

252 kwargs.get("datetime_end"), 

253 ), 

254 ) 

255 

256 from ._coordinate import DataCoordinate 

257 

258 object.__setattr__( 

259 self, 

260 "dataId", 

261 DataCoordinate.fromRequiredValues( 

262 self.definition.graph, 

263 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

264 ), 

265 ) 

266 

267 def __eq__(self, other: Any) -> bool: 

268 if type(other) != type(self): 

269 return False 

270 return self.dataId == other.dataId 

271 

272 def __hash__(self) -> int: 

273 return hash(self.dataId) 

274 

275 def __str__(self) -> str: 

276 lines = [f"{self.definition.name}:"] 

277 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

278 return "\n".join(lines) 

279 

280 def __repr__(self) -> str: 

281 return "{}.RecordClass({})".format( 

282 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

283 ) 

284 

285 def __reduce__(self) -> tuple: 

286 mapping = {name: getattr(self, name) for name in self.__slots__} 

287 return (_reconstructDimensionRecord, (self.definition, mapping)) 

288 

289 def _repr_html_(self) -> str: 

290 """Override the default representation in IPython/Jupyter notebooks. 

291 

292 This gives a more readable output that understands embedded newlines. 

293 """ 

294 return f"<pre>{self}<pre>" 

295 

296 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

297 """Convert this class to a simple python type. 

298 

299 This makes it suitable for serialization. 

300 

301 Parameters 

302 ---------- 

303 minimal : `bool`, optional 

304 Use minimal serialization. Has no effect on for this class. 

305 

306 Returns 

307 ------- 

308 names : `list` 

309 The names of the dimensions. 

310 """ 

311 # The DataId is sufficient if you are willing to do a deferred 

312 # query. This may not be overly useful since to reconstruct 

313 # a collection of records will require repeated registry queries. 

314 # For now do not implement minimal form. 

315 

316 mapping = {name: getattr(self, name) for name in self.__slots__} 

317 # If the item in mapping supports simplification update it 

318 for k, v in mapping.items(): 

319 try: 

320 mapping[k] = v.to_simple(minimal=minimal) 

321 except AttributeError: 

322 if isinstance(v, lsst.sphgeom.Region): 

323 # YAML serialization specifies the class when it 

324 # doesn't have to. This is partly for explicitness 

325 # and also history. Here use a different approach. 

326 # This code needs to be migrated to sphgeom 

327 mapping[k] = v.encode().hex() 

328 if isinstance(v, bytes): 

329 # We actually can't handle serializing out to bytes for 

330 # hash objects, encode it here to a hex string 

331 mapping[k] = v.hex() 

332 definition = self.definition.to_simple(minimal=minimal) 

333 return SerializedDimensionRecord(definition=definition, record=mapping) 

334 

335 @classmethod 

336 def from_simple( 

337 cls, 

338 simple: SerializedDimensionRecord, 

339 universe: Optional[DimensionUniverse] = None, 

340 registry: Optional[Registry] = None, 

341 ) -> DimensionRecord: 

342 """Construct a new object from the simplified form. 

343 

344 This is generally data returned from the `to_simple` 

345 method. 

346 

347 Parameters 

348 ---------- 

349 simple : `SerializedDimensionRecord` 

350 Value return from `to_simple`. 

351 universe : `DimensionUniverse` 

352 The special graph of all known dimensions of which this graph will 

353 be a subset. Can be `None` if `Registry` is provided. 

354 registry : `lsst.daf.butler.Registry`, optional 

355 Registry from which a universe can be extracted. Can be `None` 

356 if universe is provided explicitly. 

357 

358 Returns 

359 ------- 

360 record : `DimensionRecord` 

361 Newly-constructed object. 

362 """ 

363 if universe is None and registry is None: 

364 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

365 if universe is None and registry is not None: 

366 universe = registry.dimensions 

367 if universe is None: 

368 # this is for mypy 

369 raise ValueError("Unable to determine a usable universe") 

370 

371 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

372 

373 # Create a specialist subclass model with type validation. 

374 # This allows us to do simple checks of external data (possibly 

375 # sent as JSON) since for now _reconstructDimensionRecord does not 

376 # do any validation. 

377 record_model_cls = _createSimpleRecordSubclass(definition) 

378 record_model = record_model_cls(**simple.record) 

379 

380 # Timespan and region have to be converted to native form 

381 # for now assume that those keys are special 

382 rec = record_model.dict() 

383 

384 if (ts := "timespan") in rec: 

385 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

386 if (reg := "region") in rec: 

387 encoded = bytes.fromhex(rec[reg]) 

388 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

389 if (hsh := "hash") in rec: 

390 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

391 

392 return _reconstructDimensionRecord(definition, rec) 

393 

394 to_json = to_json_pydantic 

395 from_json: ClassVar = classmethod(from_json_pydantic) 

396 

397 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]: 

398 """Return a vanilla `dict` representation of this record. 

399 

400 Parameters 

401 ---------- 

402 splitTimespan : `bool`, optional 

403 If `True` (`False` is default) transform any "timespan" key value 

404 from a `Timespan` instance into a pair of regular 

405 ("datetime_begin", "datetime_end") fields. 

406 """ 

407 results = {name: getattr(self, name) for name in self.__slots__} 

408 if splitTimespan: 

409 timespan = results.pop("timespan", None) 

410 if timespan is not None: 

411 results["datetime_begin"] = timespan.begin 

412 results["datetime_end"] = timespan.end 

413 return results 

414 

415 # DimensionRecord subclasses are dynamically created, so static type 

416 # checkers can't know about them or their attributes. To avoid having to 

417 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

418 # checkers not to worry about missing attributes. 

419 def __getattr__(self, name: str) -> Any: 

420 raise AttributeError(name) 

421 

422 # Class attributes below are shadowed by instance attributes, and are 

423 # present just to hold the docstrings for those instance attributes. 

424 

425 dataId: DataCoordinate 

426 """A dict-like identifier for this record's primary keys 

427 (`DataCoordinate`). 

428 """ 

429 

430 definition: ClassVar[DimensionElement] 

431 """The `DimensionElement` whose records this class represents 

432 (`DimensionElement`). 

433 """ 

434 

435 fields: ClassVar[DimensionElementFields] 

436 """A categorized view of the fields in this class 

437 (`DimensionElementFields`). 

438 """