Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 24%

153 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-15 02:34 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Tuple, Type, Union 

27 

28import lsst.sphgeom 

29from lsst.utils.classes import immutable 

30from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model 

31 

32from .._topology import SpatialRegionDatabaseRepresentation 

33from ..json import from_json_pydantic, to_json_pydantic 

34from ..timespan import Timespan, TimespanDatabaseRepresentation 

35from ._elements import Dimension, DimensionElement 

36 

37if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true

38 from ...registry import Registry 

39 from ._coordinate import DataCoordinate 

40 from ._graph import DimensionUniverse 

41 from ._schema import DimensionElementFields 

42 

43 

44def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord: 

45 """Unpickle implementation for `DimensionRecord` subclasses. 

46 

47 For internal use by `DimensionRecord`. 

48 """ 

49 return definition.RecordClass(**mapping) 

50 

51 

52def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]: 

53 """Create a dynamic subclass of `DimensionRecord` for the given element. 

54 

55 For internal use by `DimensionRecord`. 

56 """ 

57 from ._schema import DimensionElementFields 

58 

59 fields = DimensionElementFields(definition) 

60 slots = list(fields.standard.names) 

61 if definition.spatial: 

62 slots.append(SpatialRegionDatabaseRepresentation.NAME) 

63 if definition.temporal: 

64 slots.append(TimespanDatabaseRepresentation.NAME) 

65 d = {"definition": definition, "__slots__": tuple(slots), "fields": fields} 

66 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

67 

68 

69class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

70 """Base model for a specific serialized record content.""" 

71 

72 

73_SIMPLE_RECORD_CLASS_CACHE: Dict[ 

74 Tuple[DimensionElement, DimensionUniverse], Type[SpecificSerializedDimensionRecord] 

75] = {} 

76 

77 

78def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]: 

79 from ._schema import DimensionElementFields 

80 

81 # Cache on the definition (which hashes as the name) and the 

82 # associated universe. 

83 cache_key = (definition, definition.universe) 

84 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

85 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

86 

87 fields = DimensionElementFields(definition) 

88 members = {} 

89 # Prefer strict typing for external data 

90 type_map = { 

91 str: StrictStr, 

92 float: StrictFloat, 

93 bool: StrictBool, 

94 int: StrictInt, 

95 } 

96 

97 for field in fields.standard: 

98 field_type = field.getPythonType() 

99 field_type = type_map.get(field_type, field_type) 

100 if field.nullable: 

101 field_type = Optional[field_type] # type: ignore 

102 members[field.name] = (field_type, ...) 

103 if definition.temporal: 

104 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

105 if definition.spatial: 

106 members["region"] = (str, ...) 

107 

108 # mypy does not seem to like create_model 

109 model = create_model( 

110 f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

111 __base__=SpecificSerializedDimensionRecord, 

112 **members, # type: ignore 

113 ) 

114 

115 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

116 return model 

117 

118 

119class SerializedDimensionRecord(BaseModel): 

120 """Simplified model for serializing a `DimensionRecord`.""" 

121 

122 definition: str = Field( 

123 ..., 

124 title="Name of dimension associated with this record.", 

125 example="exposure", 

126 ) 

127 

128 # Use strict types to prevent casting 

129 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field( 

130 ..., 

131 title="Dimension record keys and values.", 

132 example={ 

133 "definition": "exposure", 

134 "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"}, 

135 }, 

136 ) 

137 

138 class Config: 

139 """Local configuration overrides for model.""" 

140 

141 schema_extra = { 

142 "example": { 

143 "definition": "detector", 

144 "record": { 

145 "instrument": "HSC", 

146 "id": 72, 

147 "full_name": "0_01", 

148 "name_in_raft": "01", 

149 "raft": "0", 

150 "purpose": "SCIENCE", 

151 }, 

152 } 

153 } 

154 

155 @classmethod 

156 def direct( 

157 cls, 

158 *, 

159 definition: str, 

160 record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]], 

161 ) -> SerializedDimensionRecord: 

162 """Construct a `SerializedDimensionRecord` directly without validators. 

163 

164 This differs from the pydantic "construct" method in that the arguments 

165 are explicitly what the model requires, and it will recurse through 

166 members, constructing them from their corresponding `direct` methods. 

167 

168 This method should only be called when the inputs are trusted. 

169 """ 

170 node = cls.construct(definition=definition, record=record) 

171 node = SerializedDimensionRecord.__new__(cls) 

172 setter = object.__setattr__ 

173 setter(node, "definition", definition) 

174 # This method requires tuples as values of the mapping, but JSON 

175 # readers will read things in as lists. Be kind and transparently 

176 # transform to tuples 

177 setter( 

178 node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore 

179 ) 

180 setter(node, "__fields_set__", {"definition", "record"}) 

181 return node 

182 

183 

184@immutable 

185class DimensionRecord: 

186 """Base class for the Python representation of database records. 

187 

188 Parameters 

189 ---------- 

190 **kwargs 

191 Field values for this record. Unrecognized keys are ignored. If this 

192 is the record for a `Dimension`, its primary key value may be provided 

193 with the actual name of the field (e.g. "id" or "name"), the name of 

194 the `Dimension`, or both. If this record class has a "timespan" 

195 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

196 be provided instead of a single "timespan" keyword argument (but are 

197 ignored if a "timespan" argument is provided). 

198 

199 Notes 

200 ----- 

201 `DimensionRecord` subclasses are created dynamically for each 

202 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

203 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

204 itself is pure abstract, but does not use the `abc` module to indicate this 

205 because it does not have overridable methods. 

206 

207 Record classes have attributes that correspond exactly to the 

208 `~DimensionElementFields.standard` fields in the related database table, 

209 plus "region" and "timespan" attributes for spatial and/or temporal 

210 elements (respectively). 

211 

212 Instances are usually obtained from a `Registry`, but can be constructed 

213 directly from Python as well. 

214 

215 `DimensionRecord` instances are immutable. 

216 """ 

217 

218 # Derived classes are required to define __slots__ as well, and it's those 

219 # derived-class slots that other methods on the base class expect to see 

220 # when they access self.__slots__. 

221 __slots__ = ("dataId",) 

222 

223 _serializedType = SerializedDimensionRecord 

224 

225 def __init__(self, **kwargs: Any): 

226 # Accept either the dimension name or the actual name of its primary 

227 # key field; ensure both are present in the dict for convenience below. 

228 if isinstance(self.definition, Dimension): 

229 v = kwargs.get(self.definition.primaryKey.name) 

230 if v is None: 

231 v = kwargs.get(self.definition.name) 

232 if v is None: 

233 raise ValueError( 

234 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

235 ) 

236 kwargs[self.definition.primaryKey.name] = v 

237 else: 

238 v2 = kwargs.setdefault(self.definition.name, v) 

239 if v != v2: 

240 raise ValueError( 

241 f"Multiple inconsistent values for " 

242 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

243 ) 

244 for name in self.__slots__: 

245 object.__setattr__(self, name, kwargs.get(name)) 

246 if self.definition.temporal is not None: 

247 if self.timespan is None: 

248 object.__setattr__( 

249 self, 

250 "timespan", 

251 Timespan( 

252 kwargs.get("datetime_begin"), 

253 kwargs.get("datetime_end"), 

254 ), 

255 ) 

256 

257 from ._coordinate import DataCoordinate 

258 

259 object.__setattr__( 

260 self, 

261 "dataId", 

262 DataCoordinate.fromRequiredValues( 

263 self.definition.graph, 

264 tuple(kwargs[dimension] for dimension in self.definition.required.names), 

265 ), 

266 ) 

267 

268 def __eq__(self, other: Any) -> bool: 

269 if type(other) != type(self): 

270 return False 

271 return self.dataId == other.dataId 

272 

273 def __hash__(self) -> int: 

274 return hash(self.dataId) 

275 

276 def __str__(self) -> str: 

277 lines = [f"{self.definition.name}:"] 

278 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

279 return "\n".join(lines) 

280 

281 def __repr__(self) -> str: 

282 return "{}.RecordClass({})".format( 

283 self.definition.name, ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

284 ) 

285 

286 def __reduce__(self) -> tuple: 

287 mapping = {name: getattr(self, name) for name in self.__slots__} 

288 return (_reconstructDimensionRecord, (self.definition, mapping)) 

289 

290 def _repr_html_(self) -> str: 

291 """Override the default representation in IPython/Jupyter notebooks. 

292 

293 This gives a more readable output that understands embedded newlines. 

294 """ 

295 return f"<pre>{self}<pre>" 

296 

297 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

298 """Convert this class to a simple python type. 

299 

300 This makes it suitable for serialization. 

301 

302 Parameters 

303 ---------- 

304 minimal : `bool`, optional 

305 Use minimal serialization. Has no effect on for this class. 

306 

307 Returns 

308 ------- 

309 names : `list` 

310 The names of the dimensions. 

311 """ 

312 # The DataId is sufficient if you are willing to do a deferred 

313 # query. This may not be overly useful since to reconstruct 

314 # a collection of records will require repeated registry queries. 

315 # For now do not implement minimal form. 

316 

317 mapping = {name: getattr(self, name) for name in self.__slots__} 

318 # If the item in mapping supports simplification update it 

319 for k, v in mapping.items(): 

320 try: 

321 mapping[k] = v.to_simple(minimal=minimal) 

322 except AttributeError: 

323 if isinstance(v, lsst.sphgeom.Region): 

324 # YAML serialization specifies the class when it 

325 # doesn't have to. This is partly for explicitness 

326 # and also history. Here use a different approach. 

327 # This code needs to be migrated to sphgeom 

328 mapping[k] = v.encode().hex() 

329 if isinstance(v, bytes): 

330 # We actually can't handle serializing out to bytes for 

331 # hash objects, encode it here to a hex string 

332 mapping[k] = v.hex() 

333 definition = self.definition.to_simple(minimal=minimal) 

334 return SerializedDimensionRecord(definition=definition, record=mapping) 

335 

336 @classmethod 

337 def from_simple( 

338 cls, 

339 simple: SerializedDimensionRecord, 

340 universe: Optional[DimensionUniverse] = None, 

341 registry: Optional[Registry] = None, 

342 ) -> DimensionRecord: 

343 """Construct a new object from the simplified form. 

344 

345 This is generally data returned from the `to_simple` 

346 method. 

347 

348 Parameters 

349 ---------- 

350 simple : `SerializedDimensionRecord` 

351 Value return from `to_simple`. 

352 universe : `DimensionUniverse` 

353 The special graph of all known dimensions of which this graph will 

354 be a subset. Can be `None` if `Registry` is provided. 

355 registry : `lsst.daf.butler.Registry`, optional 

356 Registry from which a universe can be extracted. Can be `None` 

357 if universe is provided explicitly. 

358 

359 Returns 

360 ------- 

361 record : `DimensionRecord` 

362 Newly-constructed object. 

363 """ 

364 if universe is None and registry is None: 

365 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

366 if universe is None and registry is not None: 

367 universe = registry.dimensions 

368 if universe is None: 

369 # this is for mypy 

370 raise ValueError("Unable to determine a usable universe") 

371 

372 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

373 

374 # Create a specialist subclass model with type validation. 

375 # This allows us to do simple checks of external data (possibly 

376 # sent as JSON) since for now _reconstructDimensionRecord does not 

377 # do any validation. 

378 record_model_cls = _createSimpleRecordSubclass(definition) 

379 record_model = record_model_cls(**simple.record) 

380 

381 # Timespan and region have to be converted to native form 

382 # for now assume that those keys are special 

383 rec = record_model.dict() 

384 

385 if (ts := "timespan") in rec: 

386 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

387 if (reg := "region") in rec: 

388 encoded = bytes.fromhex(rec[reg]) 

389 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

390 if (hsh := "hash") in rec: 

391 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

392 

393 return _reconstructDimensionRecord(definition, rec) 

394 

395 to_json = to_json_pydantic 

396 from_json = classmethod(from_json_pydantic) 

397 

398 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]: 

399 """Return a vanilla `dict` representation of this record. 

400 

401 Parameters 

402 ---------- 

403 splitTimespan : `bool`, optional 

404 If `True` (`False` is default) transform any "timespan" key value 

405 from a `Timespan` instance into a pair of regular 

406 ("datetime_begin", "datetime_end") fields. 

407 """ 

408 results = {name: getattr(self, name) for name in self.__slots__} 

409 if splitTimespan: 

410 timespan = results.pop("timespan", None) 

411 if timespan is not None: 

412 results["datetime_begin"] = timespan.begin 

413 results["datetime_end"] = timespan.end 

414 return results 

415 

416 # DimensionRecord subclasses are dynamically created, so static type 

417 # checkers can't know about them or their attributes. To avoid having to 

418 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

419 # checkers not to worry about missing attributes. 

420 def __getattr__(self, name: str) -> Any: 

421 raise AttributeError(name) 

422 

423 # Class attributes below are shadowed by instance attributes, and are 

424 # present just to hold the docstrings for those instance attributes. 

425 

426 dataId: DataCoordinate 

427 """A dict-like identifier for this record's primary keys 

428 (`DataCoordinate`). 

429 """ 

430 

431 definition: ClassVar[DimensionElement] 

432 """The `DimensionElement` whose records this class represents 

433 (`DimensionElement`). 

434 """ 

435 

436 fields: ClassVar[DimensionElementFields] 

437 """A categorized view of the fields in this class 

438 (`DimensionElementFields`). 

439 """