Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 SimpleQuery, 

42 Timespan, 

43) 

44from ._versioning import VersionedExtension 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ._database import Database, StaticTablesContext 

48 from ._dimensions import DimensionRecordStorageManager 

49 from ._collections import CollectionManager, CollectionRecord, RunRecord 

50 

51 

52class DatasetRecordStorage(ABC): 

53 """An interface that manages the records associated with a particular 

54 `DatasetType`. 

55 

56 Parameters 

57 ---------- 

58 datasetType : `DatasetType` 

59 Dataset type whose records this object manages. 

60 """ 

61 def __init__(self, datasetType: DatasetType): 

62 self.datasetType = datasetType 

63 

64 @abstractmethod 

65 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: 

66 """Insert one or more dataset entries into the database. 

67 

68 Parameters 

69 ---------- 

70 run : `RunRecord` 

71 The record object describing the `~CollectionType.RUN` collection 

72 this dataset will be associated with. 

73 dataIds : `Iterable` [ `DataCoordinate` ] 

74 Expanded data IDs (`DataCoordinate` instances) for the 

75 datasets to be added. The dimensions of all data IDs must be the 

76 same as ``self.datasetType.dimensions``. 

77 

78 Returns 

79 ------- 

80 datasets : `Iterable` [ `DatasetRef` ] 

81 References to the inserted datasets. 

82 """ 

83 raise NotImplementedError() 

84 

85 @abstractmethod 

86 def find(self, collection: CollectionRecord, dataId: DataCoordinate, 

87 timespan: Optional[Timespan] = None) -> Optional[DatasetRef]: 

88 """Search a collection for a dataset with the given data ID. 

89 

90 Parameters 

91 ---------- 

92 collection : `CollectionRecord` 

93 The record object describing the collection to search for the 

94 dataset. May have any `CollectionType`. 

95 dataId: `DataCoordinate` 

96 Complete (but not necessarily expanded) data ID to search with, 

97 with ``dataId.graph == self.datasetType.dimensions``. 

98 timespan : `Timespan`, optional 

99 A timespan that the validity range of the dataset must overlap. 

100 Required if ``collection.type is CollectionType.CALIBRATION``, and 

101 ignored otherwise. 

102 

103 Returns 

104 ------- 

105 ref : `DatasetRef` 

106 A resolved `DatasetRef` (without components populated), or `None` 

107 if no matching dataset was found. 

108 """ 

109 raise NotImplementedError() 

110 

111 @abstractmethod 

112 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

113 """Fully delete the given datasets from the registry. 

114 

115 Parameters 

116 ---------- 

117 datasets : `Iterable` [ `DatasetRef` ] 

118 Datasets to be deleted. All datasets must be resolved and have 

119 the same `DatasetType` as ``self``. 

120 

121 Raises 

122 ------ 

123 AmbiguousDatasetError 

124 Raised if any of the given `DatasetRef` instances is unresolved. 

125 """ 

126 raise NotImplementedError() 

127 

128 @abstractmethod 

129 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

130 """Associate one or more datasets with a collection. 

131 

132 Parameters 

133 ---------- 

134 collection : `CollectionRecord` 

135 The record object describing the collection. ``collection.type`` 

136 must be `~CollectionType.TAGGED`. 

137 datasets : `Iterable` [ `DatasetRef` ] 

138 Datasets to be associated. All datasets must be resolved and have 

139 the same `DatasetType` as ``self``. 

140 

141 Raises 

142 ------ 

143 AmbiguousDatasetError 

144 Raised if any of the given `DatasetRef` instances is unresolved. 

145 

146 Notes 

147 ----- 

148 Associating a dataset with into collection that already contains a 

149 different dataset with the same `DatasetType` and data ID will remove 

150 the existing dataset from that collection. 

151 

152 Associating the same dataset into a collection multiple times is a 

153 no-op, but is still not permitted on read-only databases. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

159 """Remove one or more datasets from a collection. 

160 

161 Parameters 

162 ---------- 

163 collection : `CollectionRecord` 

164 The record object describing the collection. ``collection.type`` 

165 must be `~CollectionType.TAGGED`. 

166 datasets : `Iterable` [ `DatasetRef` ] 

167 Datasets to be disassociated. All datasets must be resolved and 

168 have the same `DatasetType` as ``self``. 

169 

170 Raises 

171 ------ 

172 AmbiguousDatasetError 

173 Raised if any of the given `DatasetRef` instances is unresolved. 

174 """ 

175 raise NotImplementedError() 

176 

177 @abstractmethod 

178 def certify(self, collection: CollectionRecord, datasets: Iterable[DatasetRef], 

179 timespan: Timespan) -> None: 

180 """Associate one or more datasets with a calibration collection and a 

181 validity range within it. 

182 

183 Parameters 

184 ---------- 

185 collection : `CollectionRecord` 

186 The record object describing the collection. ``collection.type`` 

187 must be `~CollectionType.CALIBRATION`. 

188 datasets : `Iterable` [ `DatasetRef` ] 

189 Datasets to be associated. All datasets must be resolved and have 

190 the same `DatasetType` as ``self``. 

191 timespan : `Timespan` 

192 The validity range for these datasets within the collection. 

193 

194 Raises 

195 ------ 

196 AmbiguousDatasetError 

197 Raised if any of the given `DatasetRef` instances is unresolved. 

198 ConflictingDefinitionError 

199 Raised if the collection already contains a different dataset with 

200 the same `DatasetType` and data ID and an overlapping validity 

201 range. 

202 TypeError 

203 Raised if 

204 ``collection.type is not CollectionType.CALIBRATION`` or if 

205 ``self.datasetType.isCalibration() is False``. 

206 """ 

207 raise NotImplementedError() 

208 

209 @abstractmethod 

210 def decertify(self, collection: CollectionRecord, timespan: Timespan, *, 

211 dataIds: Optional[Iterable[DataCoordinate]] = None) -> None: 

212 """Remove or adjust datasets to clear a validity range within a 

213 calibration collection. 

214 

215 Parameters 

216 ---------- 

217 collection : `CollectionRecord` 

218 The record object describing the collection. ``collection.type`` 

219 must be `~CollectionType.CALIBRATION`. 

220 timespan : `Timespan` 

221 The validity range to remove datasets from within the collection. 

222 Datasets that overlap this range but are not contained by it will 

223 have their validity ranges adjusted to not overlap it, which may 

224 split a single dataset validity range into two. 

225 dataIds : `Iterable` [ `DataCoordinate` ], optional 

226 Data IDs that should be decertified within the given validity range 

227 If `None`, all data IDs for ``self.datasetType`` will be 

228 decertified. 

229 

230 Raises 

231 ------ 

232 TypeError 

233 Raised if ``collection.type is not CollectionType.CALIBRATION``. 

234 """ 

235 raise NotImplementedError() 

236 

237 @abstractmethod 

238 def select(self, collection: CollectionRecord, 

239 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

240 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select, 

241 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

242 timespan: SimpleQuery.Select.Or[Optional[Timespan]] = SimpleQuery.Select, 

243 ingestDate: SimpleQuery.Select.Or[Optional[Timespan]] = None, 

244 ) -> Optional[SimpleQuery]: 

245 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

246 this `DatasetType`. 

247 

248 All arguments can either be a value that constrains the query or 

249 the `SimpleQuery.Select` tag object to indicate that the value should 

250 be returned in the columns in the ``SELECT`` clause. The default is 

251 `SimpleQuery.Select`. 

252 

253 Parameters 

254 ---------- 

255 collection : `CollectionRecord` 

256 The record object describing the collection to query. May not be 

257 of type `CollectionType.CHAINED`. 

258 dataId : `DataCoordinate` or `Select` 

259 The data ID to restrict results with, or an instruction to return 

260 the data ID via columns with names 

261 ``self.datasetType.dimensions.names``. 

262 id : `int`, `Select` or None, 

263 The integer primary key value for the dataset, an instruction to 

264 return it via a ``id`` column, or `None` to ignore it 

265 entirely. 

266 run : `None` or `Select` 

267 If `Select` (default), include the dataset's run key value (as 

268 column labeled with the return value of 

269 ``CollectionManager.getRunForiegnKeyName``). 

270 If `None`, do not include this column (to constrain the run, 

271 pass a `RunRecord` as the ``collection`` argument instead). 

272 timespan : `None`, `Select`, or `Timespan` 

273 If `Select` (default), include the validity range timespan in the 

274 result columns. If a `Timespan` instance, constrain the results to 

275 those whose validity ranges overlap that given timespan. Ignored 

276 unless ``collection.type is CollectionType.CALIBRATION``. 

277 ingestDate : `None`, `Select`, or `Timespan` 

278 If `Select` include the ingest timestamp in the result columns. 

279 If a `Timespan` instance, constrain the results to those whose 

280 ingest times which are inside given timespan and also include 

281 timestamp in the result columns. If `None` (default) then there is 

282 no constraint and timestamp is not returned. 

283 

284 Returns 

285 ------- 

286 query : `SimpleQuery` or `None` 

287 A struct containing the SQLAlchemy object that representing a 

288 simple ``SELECT`` query, or `None` if it is known that there are 

289 no datasets of this `DatasetType` that match the given constraints. 

290 """ 

291 raise NotImplementedError() 

292 

293 datasetType: DatasetType 

294 """Dataset type whose records this object manages (`DatasetType`). 

295 """ 

296 

297 

298class DatasetRecordStorageManager(VersionedExtension): 

299 """An interface that manages the tables that describe datasets. 

300 

301 `DatasetRecordStorageManager` primarily serves as a container and factory 

302 for `DatasetRecordStorage` instances, which each provide access to the 

303 records for a different `DatasetType`. 

304 """ 

305 

306 @classmethod 

307 @abstractmethod 

308 def initialize( 

309 cls, 

310 db: Database, 

311 context: StaticTablesContext, *, 

312 collections: CollectionManager, 

313 dimensions: DimensionRecordStorageManager, 

314 ) -> DatasetRecordStorageManager: 

315 """Construct an instance of the manager. 

316 

317 Parameters 

318 ---------- 

319 db : `Database` 

320 Interface to the underlying database engine and namespace. 

321 context : `StaticTablesContext` 

322 Context object obtained from `Database.declareStaticTables`; used 

323 to declare any tables that should always be present. 

324 collections: `CollectionManager` 

325 Manager object for the collections in this `Registry`. 

326 dimensions : `DimensionRecordStorageManager` 

327 Manager object for the dimensions in this `Registry`. 

328 

329 Returns 

330 ------- 

331 manager : `DatasetRecordStorageManager` 

332 An instance of a concrete `DatasetRecordStorageManager` subclass. 

333 """ 

334 raise NotImplementedError() 

335 

336 @classmethod 

337 @abstractmethod 

338 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

339 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

340 **kwargs: Any) -> ddl.FieldSpec: 

341 """Add a foreign key (field and constraint) referencing the dataset 

342 table. 

343 

344 Parameters 

345 ---------- 

346 tableSpec : `ddl.TableSpec` 

347 Specification for the table that should reference the dataset 

348 table. Will be modified in place. 

349 name: `str`, optional 

350 A name to use for the prefix of the new field; the full name is 

351 ``{name}_id``. 

352 onDelete: `str`, optional 

353 One of "CASCADE" or "SET NULL", indicating what should happen to 

354 the referencing row if the collection row is deleted. `None` 

355 indicates that this should be an integrity error. 

356 constraint: `bool`, optional 

357 If `False` (`True` is default), add a field that can be joined to 

358 the dataset primary key, but do not add a foreign key constraint. 

359 **kwargs 

360 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

361 constructor (only the ``name`` and ``dtype`` arguments are 

362 otherwise provided). 

363 

364 Returns 

365 ------- 

366 idSpec : `ddl.FieldSpec` 

367 Specification for the ID field. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def refresh(self) -> None: 

373 """Ensure all other operations on this manager are aware of any 

374 dataset types that may have been registered by other clients since 

375 it was initialized or last refreshed. 

376 """ 

377 raise NotImplementedError() 

378 

379 def __getitem__(self, name: str) -> DatasetRecordStorage: 

380 """Return the object that provides access to the records associated 

381 with the given `DatasetType` name. 

382 

383 This is simply a convenience wrapper for `find` that raises `KeyError` 

384 when the dataset type is not found. 

385 

386 Returns 

387 ------- 

388 records : `DatasetRecordStorage` 

389 The object representing the records for the given dataset type. 

390 

391 Raises 

392 ------ 

393 KeyError 

394 Raised if there is no dataset type with the given name. 

395 

396 Notes 

397 ----- 

398 Dataset types registered by another client of the same repository since 

399 the last call to `initialize` or `refresh` may not be found. 

400 """ 

401 result = self.find(name) 

402 if result is None: 

403 raise KeyError(f"Dataset type with name '{name}' not found.") 

404 return result 

405 

406 @abstractmethod 

407 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

408 """Return an object that provides access to the records associated with 

409 the given `DatasetType` name, if one exists. 

410 

411 Parameters 

412 ---------- 

413 name : `str` 

414 Name of the dataset type. 

415 

416 Returns 

417 ------- 

418 records : `DatasetRecordStorage` or `None` 

419 The object representing the records for the given dataset type, or 

420 `None` if there are no records for that dataset type. 

421 

422 Notes 

423 ----- 

424 Dataset types registered by another client of the same repository since 

425 the last call to `initialize` or `refresh` may not be found. 

426 """ 

427 raise NotImplementedError() 

428 

429 @abstractmethod 

430 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

431 """Ensure that this `Registry` can hold records for the given 

432 `DatasetType`, creating new tables as necessary. 

433 

434 Parameters 

435 ---------- 

436 datasetType : `DatasetType` 

437 Dataset type for which a table should created (as necessary) and 

438 an associated `DatasetRecordStorage` returned. 

439 

440 Returns 

441 ------- 

442 records : `DatasetRecordStorage` 

443 The object representing the records for the given dataset type. 

444 inserted : `bool` 

445 `True` if the dataset type did not exist in the registry before. 

446 

447 Notes 

448 ----- 

449 This operation may not be invoked within a `Database.transaction` 

450 context. 

451 """ 

452 raise NotImplementedError() 

453 

454 @abstractmethod 

455 def remove(self, name: str) -> None: 

456 """Remove the dataset type. 

457 

458 Parameters 

459 ---------- 

460 name : `str` 

461 Name of the dataset type. 

462 """ 

463 raise NotImplementedError() 

464 

465 @abstractmethod 

466 def __iter__(self) -> Iterator[DatasetType]: 

467 """Return an iterator over the the dataset types present in this layer. 

468 

469 Notes 

470 ----- 

471 Dataset types registered by another client of the same layer since 

472 the last call to `initialize` or `refresh` may not be included. 

473 """ 

474 raise NotImplementedError() 

475 

476 @abstractmethod 

477 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

478 """Return a `DatasetRef` for the given dataset primary key 

479 value. 

480 

481 Parameters 

482 ---------- 

483 id : `int` 

484 Autoincrement primary key value for the dataset. 

485 

486 Returns 

487 ------- 

488 ref : `DatasetRef` or `None` 

489 Object representing the dataset, or `None` if no dataset with the 

490 given primary key values exists in this layer. 

491 """ 

492 raise NotImplementedError()