Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DimensionRecord", "SerializedDimensionRecord") 

25 

26from typing import ( 

27 Any, 

28 ClassVar, 

29 Dict, 

30 Optional, 

31 Tuple, 

32 TYPE_CHECKING, 

33 Type, 

34 Union, 

35) 

36from pydantic import BaseModel, create_model, StrictStr, StrictInt, StrictBool, StrictFloat, Field 

37 

38import lsst.sphgeom 

39from lsst.utils.classes import immutable 

40 

41from .._topology import SpatialRegionDatabaseRepresentation 

42from ..timespan import Timespan, TimespanDatabaseRepresentation 

43from ._elements import Dimension, DimensionElement 

44from ..json import from_json_pydantic, to_json_pydantic 

45 

46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ._coordinate import DataCoordinate 

48 from ._schema import DimensionElementFields 

49 from ._graph import DimensionUniverse 

50 from ...registry import Registry 

51 

52 

53def _reconstructDimensionRecord(definition: DimensionElement, mapping: Dict[str, Any]) -> DimensionRecord: 

54 """Unpickle implementation for `DimensionRecord` subclasses. 

55 

56 For internal use by `DimensionRecord`. 

57 """ 

58 return definition.RecordClass(**mapping) 

59 

60 

61def _subclassDimensionRecord(definition: DimensionElement) -> Type[DimensionRecord]: 

62 """Create a dynamic subclass of `DimensionRecord` for the given element. 

63 

64 For internal use by `DimensionRecord`. 

65 """ 

66 from ._schema import DimensionElementFields 

67 fields = DimensionElementFields(definition) 

68 slots = list(fields.standard.names) 

69 if definition.spatial: 

70 slots.append(SpatialRegionDatabaseRepresentation.NAME) 

71 if definition.temporal: 

72 slots.append(TimespanDatabaseRepresentation.NAME) 

73 d = { 

74 "definition": definition, 

75 "__slots__": tuple(slots), 

76 "fields": fields 

77 } 

78 return type(definition.name + ".RecordClass", (DimensionRecord,), d) 

79 

80 

81class SpecificSerializedDimensionRecord(BaseModel, extra="forbid"): 

82 """Base model for a specific serialized record content.""" 

83 

84 

85_SIMPLE_RECORD_CLASS_CACHE: Dict[Tuple[DimensionElement, DimensionUniverse], 

86 Type[SpecificSerializedDimensionRecord]] = {} 

87 

88 

89def _createSimpleRecordSubclass(definition: DimensionElement) -> Type[SpecificSerializedDimensionRecord]: 

90 from ._schema import DimensionElementFields 

91 # Cache on the definition (which hashes as the name) and the 

92 # associated universe. 

93 cache_key = (definition, definition.universe) 

94 if cache_key in _SIMPLE_RECORD_CLASS_CACHE: 

95 return _SIMPLE_RECORD_CLASS_CACHE[cache_key] 

96 

97 fields = DimensionElementFields(definition) 

98 members = {} 

99 # Prefer strict typing for external data 

100 type_map = {str: StrictStr, 

101 float: StrictFloat, 

102 bool: StrictBool, 

103 int: StrictInt, 

104 } 

105 

106 for field in fields.standard: 

107 field_type = field.getPythonType() 

108 field_type = type_map.get(field_type, field_type) 

109 if field.nullable: 

110 field_type = Optional[field_type] # type: ignore 

111 members[field.name] = (field_type, ...) 

112 if definition.temporal: 

113 members["timespan"] = (Tuple[int, int], ...) # type: ignore 

114 if definition.spatial: 

115 members["region"] = (str, ...) 

116 

117 # mypy does not seem to like create_model 

118 model = create_model(f"SpecificSerializedDimensionRecord{definition.name.capitalize()}", 

119 __base__=SpecificSerializedDimensionRecord, **members) # type: ignore 

120 

121 _SIMPLE_RECORD_CLASS_CACHE[cache_key] = model 

122 return model 

123 

124 

125class SerializedDimensionRecord(BaseModel): 

126 """Simplified model for serializing a `DimensionRecord`.""" 

127 

128 definition: str = Field( 

129 ..., 

130 title="Name of dimension associated with this record.", 

131 example="exposure", 

132 ) 

133 

134 # Use strict types to prevent casting 

135 record: Dict[str, 

136 Union[None, StrictFloat, StrictStr, StrictBool, StrictInt, Tuple[int, int]]] = Field( 

137 ..., 

138 title="Dimension record keys and values.", 

139 example={"definition": "exposure", 

140 "record": {"instrument": "LATISS", 

141 "exposure": 2021050300044, 

142 "obs_id": "AT_O_20210503_00044"}}, 

143 ) 

144 

145 class Config: 

146 """Local configuration overrides for model.""" 

147 

148 schema_extra = { 

149 "example": { 

150 "definition": "detector", 

151 "record": { 

152 "instrument": "HSC", 

153 "id": 72, 

154 "full_name": "0_01", 

155 "name_in_raft": "01", 

156 "raft": "0", 

157 "purpose": "SCIENCE", 

158 } 

159 } 

160 } 

161 

162 

163@immutable 

164class DimensionRecord: 

165 """Base class for the Python representation of database records. 

166 

167 Parameters 

168 ---------- 

169 **kwargs 

170 Field values for this record. Unrecognized keys are ignored. If this 

171 is the record for a `Dimension`, its primary key value may be provided 

172 with the actual name of the field (e.g. "id" or "name"), the name of 

173 the `Dimension`, or both. If this record class has a "timespan" 

174 attribute, "datetime_begin" and "datetime_end" keyword arguments may 

175 be provided instead of a single "timespan" keyword argument (but are 

176 ignored if a "timespan" argument is provided). 

177 

178 Notes 

179 ----- 

180 `DimensionRecord` subclasses are created dynamically for each 

181 `DimensionElement` in a `DimensionUniverse`, and are accessible via the 

182 `DimensionElement.RecordClass` attribute. The `DimensionRecord` base class 

183 itself is pure abstract, but does not use the `abc` module to indicate this 

184 because it does not have overridable methods. 

185 

186 Record classes have attributes that correspond exactly to the 

187 `~DimensionElementFields.standard` fields in the related database table, 

188 plus "region" and "timespan" attributes for spatial and/or temporal 

189 elements (respectively). 

190 

191 Instances are usually obtained from a `Registry`, but can be constructed 

192 directly from Python as well. 

193 

194 `DimensionRecord` instances are immutable. 

195 """ 

196 

197 # Derived classes are required to define __slots__ as well, and it's those 

198 # derived-class slots that other methods on the base class expect to see 

199 # when they access self.__slots__. 

200 __slots__ = ("dataId",) 

201 

202 _serializedType = SerializedDimensionRecord 

203 

204 def __init__(self, **kwargs: Any): 

205 # Accept either the dimension name or the actual name of its primary 

206 # key field; ensure both are present in the dict for convenience below. 

207 if isinstance(self.definition, Dimension): 

208 v = kwargs.get(self.definition.primaryKey.name) 

209 if v is None: 

210 v = kwargs.get(self.definition.name) 

211 if v is None: 

212 raise ValueError( 

213 f"No value provided for {self.definition.name}.{self.definition.primaryKey.name}." 

214 ) 

215 kwargs[self.definition.primaryKey.name] = v 

216 else: 

217 v2 = kwargs.setdefault(self.definition.name, v) 

218 if v != v2: 

219 raise ValueError( 

220 f"Multiple inconsistent values for " 

221 f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}." 

222 ) 

223 for name in self.__slots__: 

224 object.__setattr__(self, name, kwargs.get(name)) 

225 if self.definition.temporal is not None: 

226 if self.timespan is None: 

227 object.__setattr__( 

228 self, 

229 "timespan", 

230 Timespan( 

231 kwargs.get("datetime_begin"), 

232 kwargs.get("datetime_end"), 

233 ) 

234 ) 

235 

236 from ._coordinate import DataCoordinate 

237 object.__setattr__( 

238 self, 

239 "dataId", 

240 DataCoordinate.fromRequiredValues( 

241 self.definition.graph, 

242 tuple(kwargs[dimension] for dimension in self.definition.required.names) 

243 ) 

244 ) 

245 

246 def __eq__(self, other: Any) -> bool: 

247 if type(other) != type(self): 

248 return False 

249 return self.dataId == other.dataId 

250 

251 def __hash__(self) -> int: 

252 return hash(self.dataId) 

253 

254 def __str__(self) -> str: 

255 lines = [f"{self.definition.name}:"] 

256 lines.extend(f" {name}: {getattr(self, name)!r}" for name in self.__slots__) 

257 return "\n".join(lines) 

258 

259 def __repr__(self) -> str: 

260 return "{}.RecordClass({})".format( 

261 self.definition.name, 

262 ", ".join(f"{name}={getattr(self, name)!r}" for name in self.__slots__) 

263 ) 

264 

265 def __reduce__(self) -> tuple: 

266 mapping = {name: getattr(self, name) for name in self.__slots__} 

267 return (_reconstructDimensionRecord, (self.definition, mapping)) 

268 

269 def to_simple(self, minimal: bool = False) -> SerializedDimensionRecord: 

270 """Convert this class to a simple python type. 

271 

272 This makes it suitable for serialization. 

273 

274 Parameters 

275 ---------- 

276 minimal : `bool`, optional 

277 Use minimal serialization. Has no effect on for this class. 

278 

279 Returns 

280 ------- 

281 names : `list` 

282 The names of the dimensions. 

283 """ 

284 # The DataId is sufficient if you are willing to do a deferred 

285 # query. This may not be overly useful since to reconstruct 

286 # a collection of records will require repeated registry queries. 

287 # For now do not implement minimal form. 

288 

289 mapping = {name: getattr(self, name) for name in self.__slots__} 

290 # If the item in mapping supports simplification update it 

291 for k, v in mapping.items(): 

292 try: 

293 mapping[k] = v.to_simple(minimal=minimal) 

294 except AttributeError: 

295 if isinstance(v, lsst.sphgeom.Region): 

296 # YAML serialization specifies the class when it 

297 # doesn't have to. This is partly for explicitness 

298 # and also history. Here use a different approach. 

299 # This code needs to be migrated to sphgeom 

300 mapping[k] = v.encode().hex() 

301 

302 definition = self.definition.to_simple(minimal=minimal) 

303 return SerializedDimensionRecord(definition=definition, record=mapping) 

304 

305 @classmethod 

306 def from_simple(cls, simple: SerializedDimensionRecord, 

307 universe: Optional[DimensionUniverse] = None, 

308 registry: Optional[Registry] = None) -> DimensionRecord: 

309 """Construct a new object from the simplified form. 

310 

311 This is generally data returned from the `to_simple` 

312 method. 

313 

314 Parameters 

315 ---------- 

316 simple : `SerializedDimensionRecord` 

317 Value return from `to_simple`. 

318 universe : `DimensionUniverse` 

319 The special graph of all known dimensions of which this graph will 

320 be a subset. Can be `None` if `Registry` is provided. 

321 registry : `lsst.daf.butler.Registry`, optional 

322 Registry from which a universe can be extracted. Can be `None` 

323 if universe is provided explicitly. 

324 

325 Returns 

326 ------- 

327 record : `DimensionRecord` 

328 Newly-constructed object. 

329 """ 

330 if universe is None and registry is None: 

331 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

332 if universe is None and registry is not None: 

333 universe = registry.dimensions 

334 if universe is None: 

335 # this is for mypy 

336 raise ValueError("Unable to determine a usable universe") 

337 

338 definition = DimensionElement.from_simple(simple.definition, universe=universe) 

339 

340 # Create a specialist subclass model with type validation. 

341 # This allows us to do simple checks of external data (possibly 

342 # sent as JSON) since for now _reconstructDimensionRecord does not 

343 # do any validation. 

344 record_model_cls = _createSimpleRecordSubclass(definition) 

345 record_model = record_model_cls(**simple.record) 

346 

347 # Timespan and region have to be converted to native form 

348 # for now assume that those keys are special 

349 rec = record_model.dict() 

350 

351 if (ts := "timespan") in rec: 

352 rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry) 

353 if (reg := "region") in rec: 

354 encoded = bytes.fromhex(rec[reg]) 

355 rec[reg] = lsst.sphgeom.Region.decode(encoded) 

356 

357 return _reconstructDimensionRecord(definition, rec) 

358 

359 to_json = to_json_pydantic 

360 from_json = classmethod(from_json_pydantic) 

361 

362 def toDict(self, splitTimespan: bool = False) -> Dict[str, Any]: 

363 """Return a vanilla `dict` representation of this record. 

364 

365 Parameters 

366 ---------- 

367 splitTimespan : `bool`, optional 

368 If `True` (`False` is default) transform any "timespan" key value 

369 from a `Timespan` instance into a pair of regular 

370 ("datetime_begin", "datetime_end") fields. 

371 """ 

372 results = {name: getattr(self, name) for name in self.__slots__} 

373 if splitTimespan: 

374 timespan = results.pop("timespan", None) 

375 if timespan is not None: 

376 results["datetime_begin"] = timespan.begin 

377 results["datetime_end"] = timespan.end 

378 return results 

379 

380 # DimensionRecord subclasses are dynamically created, so static type 

381 # checkers can't know about them or their attributes. To avoid having to 

382 # put "type: ignore", everywhere, add a dummy __getattr__ that tells type 

383 # checkers not to worry about missing attributes. 

384 def __getattr__(self, name: str) -> Any: 

385 raise AttributeError(name) 

386 

387 # Class attributes below are shadowed by instance attributes, and are 

388 # present just to hold the docstrings for those instance attributes. 

389 

390 dataId: DataCoordinate 

391 """A dict-like identifier for this record's primary keys 

392 (`DataCoordinate`). 

393 """ 

394 

395 definition: ClassVar[DimensionElement] 

396 """The `DimensionElement` whose records this class represents 

397 (`DimensionElement`). 

398 """ 

399 

400 fields: ClassVar[DimensionElementFields] 

401 """A categorized view of the fields in this class 

402 (`DimensionElementFields`). 

403 """