Coverage for python/lsst/daf/butler/core/dimensions/_records.py: 24%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

149 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import ( 

27 Any, 

28 ClassVar, 

29 Dict, 

30 Optional, 

31 Tuple, 

32 TYPE_CHECKING, 

33 Type, 

34 Union, 

35) 

36from pydantic import BaseModel, create_model, StrictStr, StrictInt, StrictBool, StrictFloat, Field 

37 

38import lsst.sphgeom 

39from lsst.utils.classes import immutable 

40 

41from .._topology import SpatialRegionDatabaseRepresentation 

42from ..timespan import Timespan, TimespanDatabaseRepresentation 

43from ._elements import Dimension, DimensionElement 

44from ..json import from_json_pydantic, to_json_pydantic 

45 

46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ._coordinate import DataCoordinate 

48 from ._schema import DimensionElementFields 

49 from ._graph import DimensionUniverse 

50 from ...registry import Registry 

51 

52 

53def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord: 

54 """Unpickle implementation for `DimensionRecord` subclasses. 

55 

56 For internal use by `DimensionRecord`. 

57 """ 

58 return definition.RecordClass(**mapping) 

59 

60 

61def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]: 

62 """Create a dynamic subclass of `DimensionRecord` for the given element. 

63 

64 For internal use by `DimensionRecord`. 

65 """ 

66 from ._schema import DimensionElementFields 

67 fields = DimensionElementFields(definition) 

68 slots = list(fields.standard.names) 

69 if definition.spatial: 

70 slots.append(SpatialRegionDatabaseRepresentation.NAME) 

71 if definition.temporal: 

72 slots.append(TimespanDatabaseRepresentation.NAME) 

73 d = { 

74 "definition": definition, 

75 "__slots__": tuple(slots), 

76 "fields": fields 

77 } 

78 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

79 

80 

81class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

82 """Base model for a specific serialized record content.""" 

83 

84 

85_SIMPLE_RECORD_CLASS_CACHE: Dict[Tuple[DimensionElement, DimensionUniverse], 

86 Type[SpecificSerializedDimensionRecord]] = {} 

87 

88 

89def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]: 

90 from ._schema import DimensionElementFields 

91 # Cache on the definition (which hashes as the name) and the 

92 # associated universe. 

93 cache_key = (definition, definition.universe) 

94 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

95 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

96 

97 fields = DimensionElementFields(definition) 

98 members = {} 

99 # Prefer strict typing for external data 

100 type_map = {str: StrictStr, 

101 float: StrictFloat, 

102 bool: StrictBool, 

103 int: StrictInt, 

104 } 

105 

106 for field in fields.standard: 

107 field_type = field.getPythonType() 

108 field_type = type_map.get(field_type, field_type) 

109 if field.nullable: 

110 field_type = Optional[field_type] # type: ignore 

111 members[field.name] = (field_type, ...) 

112 if definition.temporal: 

113 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

114 if definition.spatial: 

115 members["region"] = (str, ...) 

116 

117 # mypy does not seem to like create_model 

118 model = create_model(f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

119 __base__=SpecificSerializedDimensionRecord, **members) # type: ignore 

120 

121 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

122 return model 

123 

124 

125class SerializedDimensionRecord(BaseModel): 

126 """Simplified model for serializing a `DimensionRecord`.""" 

127 

128 definition: str = Field( 

129 ..., 

130 title="Name of dimension associated with this record.", 

131 example="exposure", 

132 ) 

133 

134 # Use strict types to prevent casting 

135 record: Dict[str, 

136 Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field( 

137 ..., 

138 title="Dimension record keys and values.", 

139 example={"definition": "exposure", 

140 "record": {"instrument": "LATISS", 

141 "exposure": 2021050300044, 

142 "obs_id": "AT_O_20210503_00044"}}, 

143 ) 

144 

145 class Config: 

146 """Local configuration overrides for model.""" 

147 

148 schema_extra = { 

149 "example": { 

150 "definition": "detector", 

151 "record": { 

152 "instrument": "HSC", 

153 "id": 72, 

154 "full_name": "0_01", 

155 "name_in_raft": "01", 

156 "raft": "0", 

157 "purpose": "SCIENCE", 

158 } 

159 } 

160 } 

161 

162 @classmethod 

163 def direct(cls, *, definition: str, record: Dict[str, Union[None, StrictFloat, StrictStr, StrictBool, 

164 StrictInt, Tuple[int, int]]] 

165 ) -> SerializedDimensionRecord: 

166 """Construct a `SerializedDimensionRecord` directly without validators. 

167 

168 This differs from the pydantic "construct" method in that the arguments 

169 are explicitly what the model requires, and it will recurse through 

170 members, constructing them from their corresponding `direct` methods. 

171 

172 This method should only be called when the inputs are trusted. 

173 """ 

174 node = cls.construct(definition=definition, record=record) 

175 node = SerializedDimensionRecord.__new__(cls) 

176 setter = object.__setattr__ 

177 setter(node, 'definition', definition) 

178 # This method requires tuples as values of the mapping, but JSON 

179 # readers will read things in as lists. Be kind and transparently 

180 # transform to tuples 

181 setter(node, 'record', {k: v if type(v) != list else tuple(v) # type: ignore 

182 for k, v in record.items()}) 

183 setter(node, '__fields_set__', {'definition', 'record'}) 

184 return node 

185 

186 

187@immutable 

188class DimensionRecord: 

189 """Base class for the Python representation of database records. 

190 

191 Parameters 

192 ---------- 

193 **kwargs 

194 Field values for this record. Unrecognized keys are ignored. If this 

195 is the record for a `Dimension`, its primary key value may be provided 

196 with the actual name of the field (e.g. "id" or "name"), the name of 

197 the `Dimension`, or both. If this record class has a "timespan" 

198 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

199 be provided instead of a single "timespan" keyword argument (but are 

200 ignored if a "timespan" argument is provided). 

201 

202 Notes 

203 ----- 

204 `DimensionRecord` subclasses are created dynamically for each 

205 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

206 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

207 itself is pure abstract, but does not use the `abc` module to indicate this 

208 because it does not have overridable methods. 

209 

210 Record classes have attributes that correspond exactly to the 

211 `~DimensionElementFields.standard` fields in the related database table, 

212 plus "region" and "timespan" attributes for spatial and/or temporal 

213 elements (respectively). 

214 

215 Instances are usually obtained from a `Registry`, but can be constructed 

216 directly from Python as well. 

217 

218 `DimensionRecord` instances are immutable. 

219 """ 

220 

221 # Derived classes are required to define __slots__ as well, and it's those 

222 # derived-class slots that other methods on the base class expect to see 

223 # when they access self.__slots__. 

224 __slots__ = ("dataId",) 

225 

226 _serializedType = SerializedDimensionRecord 

227 

228 def __init__(self, **kwargs: Any): 

229 # Accept either the dimension name or the actual name of its primary 

230 # key field; ensure both are present in the dict for convenience below. 

231 if isinstance(self.definition, Dimension): 

232 v = kwargs.get(self.definition.primaryKey.name) 

233 if v is None: 

234 v = kwargs.get(self.definition.name) 

235 if v is None: 

236 raise ValueError( 

237 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

238 ) 

239 kwargs[self.definition.primaryKey.name] = v 

240 else: 

241 v2 = kwargs.setdefault(self.definition.name, v) 

242 if v != v2: 

243 raise ValueError( 

244 f"Multiple inconsistent values for " 

245 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

246 ) 

247 for name in self.__slots__: 

248 object.__setattr__(self, name, kwargs.get(name)) 

249 if self.definition.temporal is not None: 

250 if self.timespan is None: 

251 object.__setattr__( 

252 self, 

253 "timespan", 

254 Timespan( 

255 kwargs.get("datetime_begin"), 

256 kwargs.get("datetime_end"), 

257 ) 

258 ) 

259 

260 from ._coordinate import DataCoordinate 

261 object.__setattr__( 

262 self, 

263 "dataId", 

264 DataCoordinate.fromRequiredValues( 

265 self.definition.graph, 

266 tuple(kwargs[dimension] for dimension in self.definition.required.names) 

267 ) 

268 ) 

269 

270 def __eq__(self, other: Any) -> bool: 

271 if type(other) != type(self): 

272 return False 

273 return self.dataId == other.dataId 

274 

275 def __hash__(self) -> int: 

276 return hash(self.dataId) 

277 

278 def __str__(self) -> str: 

279 lines = [f"{self.definition.name}:"] 

280 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

281 return "\n".join(lines) 

282 

283 def __repr__(self) -> str: 

284 return "{}.RecordClass({})".format( 

285 self.definition.name, 

286 ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

287 ) 

288 

289 def __reduce__(self) -> tuple: 

290 mapping = {name: getattr(self, name) for name in self.__slots__} 

291 return (_reconstructDimensionRecord, (self.definition, mapping)) 

292 

293 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

294 """Convert this class to a simple python type. 

295 

296 This makes it suitable for serialization. 

297 

298 Parameters 

299 ---------- 

300 minimal : `bool`, optional 

301 Use minimal serialization. Has no effect on for this class. 

302 

303 Returns 

304 ------- 

305 names : `list` 

306 The names of the dimensions. 

307 """ 

308 # The DataId is sufficient if you are willing to do a deferred 

309 # query. This may not be overly useful since to reconstruct 

310 # a collection of records will require repeated registry queries. 

311 # For now do not implement minimal form. 

312 

313 mapping = {name: getattr(self, name) for name in self.__slots__} 

314 # If the item in mapping supports simplification update it 

315 for k, v in mapping.items(): 

316 try: 

317 mapping[k] = v.to_simple(minimal=minimal) 

318 except AttributeError: 

319 if isinstance(v, lsst.sphgeom.Region): 

320 # YAML serialization specifies the class when it 

321 # doesn't have to. This is partly for explicitness 

322 # and also history. Here use a different approach. 

323 # This code needs to be migrated to sphgeom 

324 mapping[k] = v.encode().hex() 

325 if isinstance(v, bytes): 

326 # We actually can't handle serializing out to bytes for 

327 # hash objects, encode it here to a hex string 

328 mapping[k] = v.hex() 

329 definition = self.definition.to_simple(minimal=minimal) 

330 return SerializedDimensionRecord(definition=definition, record=mapping) 

331 

332 @classmethod 

333 def from_simple(cls, simple: SerializedDimensionRecord, 

334 universe: Optional[DimensionUniverse] = None, 

335 registry: Optional[Registry] = None) -> DimensionRecord: 

336 """Construct a new object from the simplified form. 

337 

338 This is generally data returned from the `to_simple` 

339 method. 

340 

341 Parameters 

342 ---------- 

343 simple : `SerializedDimensionRecord` 

344 Value return from `to_simple`. 

345 universe : `DimensionUniverse` 

346 The special graph of all known dimensions of which this graph will 

347 be a subset. Can be `None` if `Registry` is provided. 

348 registry : `lsst.daf.butler.Registry`, optional 

349 Registry from which a universe can be extracted. Can be `None` 

350 if universe is provided explicitly. 

351 

352 Returns 

353 ------- 

354 record : `DimensionRecord` 

355 Newly-constructed object. 

356 """ 

357 if universe is None and registry is None: 

358 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

359 if universe is None and registry is not None: 

360 universe = registry.dimensions 

361 if universe is None: 

362 # this is for mypy 

363 raise ValueError("Unable to determine a usable universe") 

364 

365 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

366 

367 # Create a specialist subclass model with type validation. 

368 # This allows us to do simple checks of external data (possibly 

369 # sent as JSON) since for now _reconstructDimensionRecord does not 

370 # do any validation. 

371 record_model_cls = _createSimpleRecordSubclass(definition) 

372 record_model = record_model_cls(**simple.record) 

373 

374 # Timespan and region have to be converted to native form 

375 # for now assume that those keys are special 

376 rec = record_model.dict() 

377 

378 if (ts := "timespan") in rec: 

379 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

380 if (reg := "region") in rec: 

381 encoded = bytes.fromhex(rec[reg]) 

382 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

383 if (hsh := "hash") in rec: 

384 rec[hsh] = bytes.fromhex(rec[hsh].decode()) 

385 

386 return _reconstructDimensionRecord(definition, rec) 

387 

388 to_json = to_json_pydantic 

389 from_json = classmethod(from_json_pydantic) 

390 

391 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]: 

392 """Return a vanilla `dict` representation of this record. 

393 

394 Parameters 

395 ---------- 

396 splitTimespan : `bool`, optional 

397 If `True` (`False` is default) transform any "timespan" key value 

398 from a `Timespan` instance into a pair of regular 

399 ("datetime_begin", "datetime_end") fields. 

400 """ 

401 results = {name: getattr(self, name) for name in self.__slots__} 

402 if splitTimespan: 

403 timespan = results.pop("timespan", None) 

404 if timespan is not None: 

405 results["datetime_begin"] = timespan.begin 

406 results["datetime_end"] = timespan.end 

407 return results 

408 

409 # DimensionRecord subclasses are dynamically created, so static type 

410 # checkers can't know about them or their attributes. To avoid having to 

411 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

412 # checkers not to worry about missing attributes. 

413 def __getattr__(self, name: str) -> Any: 

414 raise AttributeError(name) 

415 

416 # Class attributes below are shadowed by instance attributes, and are 

417 # present just to hold the docstrings for those instance attributes. 

418 

419 dataId: DataCoordinate 

420 """A dict-like identifier for this record's primary keys 

421 (`DataCoordinate`). 

422 """ 

423 

424 definition: ClassVar[DimensionElement] 

425 """The `DimensionElement` whose records this class represents 

426 (`DimensionElement`). 

427 """ 

428 

429 fields: ClassVar[DimensionElementFields] 

430 """A categorized view of the fields in this class 

431 (`DimensionElementFields`). 

432 """