Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 SimpleQuery, 

42 Timespan, 

43) 

44from ._versioning import VersionedExtension 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ..summaries import CollectionSummary 

48 from ._database import Database, StaticTablesContext 

49 from ._dimensions import DimensionRecordStorageManager 

50 from ._collections import CollectionManager, CollectionRecord, RunRecord 

51 

52 

53class DatasetRecordStorage(ABC): 

54 """An interface that manages the records associated with a particular 

55 `DatasetType`. 

56 

57 Parameters 

58 ---------- 

59 datasetType : `DatasetType` 

60 Dataset type whose records this object manages. 

61 """ 

62 def __init__(self, datasetType: DatasetType): 

63 self.datasetType = datasetType 

64 

65 @abstractmethod 

66 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: 

67 """Insert one or more dataset entries into the database. 

68 

69 Parameters 

70 ---------- 

71 run : `RunRecord` 

72 The record object describing the `~CollectionType.RUN` collection 

73 this dataset will be associated with. 

74 dataIds : `Iterable` [ `DataCoordinate` ] 

75 Expanded data IDs (`DataCoordinate` instances) for the 

76 datasets to be added. The dimensions of all data IDs must be the 

77 same as ``self.datasetType.dimensions``. 

78 

79 Returns 

80 ------- 

81 datasets : `Iterable` [ `DatasetRef` ] 

82 References to the inserted datasets. 

83 """ 

84 raise NotImplementedError() 

85 

86 @abstractmethod 

87 def find(self, collection: CollectionRecord, dataId: DataCoordinate, 

88 timespan: Optional[Timespan] = None) -> Optional[DatasetRef]: 

89 """Search a collection for a dataset with the given data ID. 

90 

91 Parameters 

92 ---------- 

93 collection : `CollectionRecord` 

94 The record object describing the collection to search for the 

95 dataset. May have any `CollectionType`. 

96 dataId: `DataCoordinate` 

97 Complete (but not necessarily expanded) data ID to search with, 

98 with ``dataId.graph == self.datasetType.dimensions``. 

99 timespan : `Timespan`, optional 

100 A timespan that the validity range of the dataset must overlap. 

101 Required if ``collection.type is CollectionType.CALIBRATION``, and 

102 ignored otherwise. 

103 

104 Returns 

105 ------- 

106 ref : `DatasetRef` 

107 A resolved `DatasetRef` (without components populated), or `None` 

108 if no matching dataset was found. 

109 """ 

110 raise NotImplementedError() 

111 

112 @abstractmethod 

113 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

114 """Fully delete the given datasets from the registry. 

115 

116 Parameters 

117 ---------- 

118 datasets : `Iterable` [ `DatasetRef` ] 

119 Datasets to be deleted. All datasets must be resolved and have 

120 the same `DatasetType` as ``self``. 

121 

122 Raises 

123 ------ 

124 AmbiguousDatasetError 

125 Raised if any of the given `DatasetRef` instances is unresolved. 

126 """ 

127 raise NotImplementedError() 

128 

129 @abstractmethod 

130 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

131 """Associate one or more datasets with a collection. 

132 

133 Parameters 

134 ---------- 

135 collection : `CollectionRecord` 

136 The record object describing the collection. ``collection.type`` 

137 must be `~CollectionType.TAGGED`. 

138 datasets : `Iterable` [ `DatasetRef` ] 

139 Datasets to be associated. All datasets must be resolved and have 

140 the same `DatasetType` as ``self``. 

141 

142 Raises 

143 ------ 

144 AmbiguousDatasetError 

145 Raised if any of the given `DatasetRef` instances is unresolved. 

146 

147 Notes 

148 ----- 

149 Associating a dataset with into collection that already contains a 

150 different dataset with the same `DatasetType` and data ID will remove 

151 the existing dataset from that collection. 

152 

153 Associating the same dataset into a collection multiple times is a 

154 no-op, but is still not permitted on read-only databases. 

155 """ 

156 raise NotImplementedError() 

157 

158 @abstractmethod 

159 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

160 """Remove one or more datasets from a collection. 

161 

162 Parameters 

163 ---------- 

164 collection : `CollectionRecord` 

165 The record object describing the collection. ``collection.type`` 

166 must be `~CollectionType.TAGGED`. 

167 datasets : `Iterable` [ `DatasetRef` ] 

168 Datasets to be disassociated. All datasets must be resolved and 

169 have the same `DatasetType` as ``self``. 

170 

171 Raises 

172 ------ 

173 AmbiguousDatasetError 

174 Raised if any of the given `DatasetRef` instances is unresolved. 

175 """ 

176 raise NotImplementedError() 

177 

178 @abstractmethod 

179 def certify(self, collection: CollectionRecord, datasets: Iterable[DatasetRef], 

180 timespan: Timespan) -> None: 

181 """Associate one or more datasets with a calibration collection and a 

182 validity range within it. 

183 

184 Parameters 

185 ---------- 

186 collection : `CollectionRecord` 

187 The record object describing the collection. ``collection.type`` 

188 must be `~CollectionType.CALIBRATION`. 

189 datasets : `Iterable` [ `DatasetRef` ] 

190 Datasets to be associated. All datasets must be resolved and have 

191 the same `DatasetType` as ``self``. 

192 timespan : `Timespan` 

193 The validity range for these datasets within the collection. 

194 

195 Raises 

196 ------ 

197 AmbiguousDatasetError 

198 Raised if any of the given `DatasetRef` instances is unresolved. 

199 ConflictingDefinitionError 

200 Raised if the collection already contains a different dataset with 

201 the same `DatasetType` and data ID and an overlapping validity 

202 range. 

203 TypeError 

204 Raised if 

205 ``collection.type is not CollectionType.CALIBRATION`` or if 

206 ``self.datasetType.isCalibration() is False``. 

207 """ 

208 raise NotImplementedError() 

209 

210 @abstractmethod 

211 def decertify(self, collection: CollectionRecord, timespan: Timespan, *, 

212 dataIds: Optional[Iterable[DataCoordinate]] = None) -> None: 

213 """Remove or adjust datasets to clear a validity range within a 

214 calibration collection. 

215 

216 Parameters 

217 ---------- 

218 collection : `CollectionRecord` 

219 The record object describing the collection. ``collection.type`` 

220 must be `~CollectionType.CALIBRATION`. 

221 timespan : `Timespan` 

222 The validity range to remove datasets from within the collection. 

223 Datasets that overlap this range but are not contained by it will 

224 have their validity ranges adjusted to not overlap it, which may 

225 split a single dataset validity range into two. 

226 dataIds : `Iterable` [ `DataCoordinate` ], optional 

227 Data IDs that should be decertified within the given validity range 

228 If `None`, all data IDs for ``self.datasetType`` will be 

229 decertified. 

230 

231 Raises 

232 ------ 

233 TypeError 

234 Raised if ``collection.type is not CollectionType.CALIBRATION``. 

235 """ 

236 raise NotImplementedError() 

237 

238 @abstractmethod 

239 def select(self, collection: CollectionRecord, 

240 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

241 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select, 

242 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

243 timespan: SimpleQuery.Select.Or[Optional[Timespan]] = SimpleQuery.Select, 

244 ingestDate: SimpleQuery.Select.Or[Optional[Timespan]] = None, 

245 ) -> Optional[SimpleQuery]: 

246 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

247 this `DatasetType`. 

248 

249 All arguments can either be a value that constrains the query or 

250 the `SimpleQuery.Select` tag object to indicate that the value should 

251 be returned in the columns in the ``SELECT`` clause. The default is 

252 `SimpleQuery.Select`. 

253 

254 Parameters 

255 ---------- 

256 collection : `CollectionRecord` 

257 The record object describing the collection to query. May not be 

258 of type `CollectionType.CHAINED`. 

259 dataId : `DataCoordinate` or `Select` 

260 The data ID to restrict results with, or an instruction to return 

261 the data ID via columns with names 

262 ``self.datasetType.dimensions.names``. 

263 id : `int`, `Select` or None, 

264 The integer primary key value for the dataset, an instruction to 

265 return it via a ``id`` column, or `None` to ignore it 

266 entirely. 

267 run : `None` or `Select` 

268 If `Select` (default), include the dataset's run key value (as 

269 column labeled with the return value of 

270 ``CollectionManager.getRunForiegnKeyName``). 

271 If `None`, do not include this column (to constrain the run, 

272 pass a `RunRecord` as the ``collection`` argument instead). 

273 timespan : `None`, `Select`, or `Timespan` 

274 If `Select` (default), include the validity range timespan in the 

275 result columns. If a `Timespan` instance, constrain the results to 

276 those whose validity ranges overlap that given timespan. Ignored 

277 unless ``collection.type is CollectionType.CALIBRATION``. 

278 ingestDate : `None`, `Select`, or `Timespan` 

279 If `Select` include the ingest timestamp in the result columns. 

280 If a `Timespan` instance, constrain the results to those whose 

281 ingest times which are inside given timespan and also include 

282 timestamp in the result columns. If `None` (default) then there is 

283 no constraint and timestamp is not returned. 

284 

285 Returns 

286 ------- 

287 query : `SimpleQuery` or `None` 

288 A struct containing the SQLAlchemy object that representing a 

289 simple ``SELECT`` query, or `None` if it is known that there are 

290 no datasets of this `DatasetType` that match the given constraints. 

291 """ 

292 raise NotImplementedError() 

293 

294 datasetType: DatasetType 

295 """Dataset type whose records this object manages (`DatasetType`). 

296 """ 

297 

298 

299class DatasetRecordStorageManager(VersionedExtension): 

300 """An interface that manages the tables that describe datasets. 

301 

302 `DatasetRecordStorageManager` primarily serves as a container and factory 

303 for `DatasetRecordStorage` instances, which each provide access to the 

304 records for a different `DatasetType`. 

305 """ 

306 

307 @classmethod 

308 @abstractmethod 

309 def initialize( 

310 cls, 

311 db: Database, 

312 context: StaticTablesContext, *, 

313 collections: CollectionManager, 

314 dimensions: DimensionRecordStorageManager, 

315 ) -> DatasetRecordStorageManager: 

316 """Construct an instance of the manager. 

317 

318 Parameters 

319 ---------- 

320 db : `Database` 

321 Interface to the underlying database engine and namespace. 

322 context : `StaticTablesContext` 

323 Context object obtained from `Database.declareStaticTables`; used 

324 to declare any tables that should always be present. 

325 collections: `CollectionManager` 

326 Manager object for the collections in this `Registry`. 

327 dimensions : `DimensionRecordStorageManager` 

328 Manager object for the dimensions in this `Registry`. 

329 

330 Returns 

331 ------- 

332 manager : `DatasetRecordStorageManager` 

333 An instance of a concrete `DatasetRecordStorageManager` subclass. 

334 """ 

335 raise NotImplementedError() 

336 

337 @classmethod 

338 @abstractmethod 

339 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

340 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

341 **kwargs: Any) -> ddl.FieldSpec: 

342 """Add a foreign key (field and constraint) referencing the dataset 

343 table. 

344 

345 Parameters 

346 ---------- 

347 tableSpec : `ddl.TableSpec` 

348 Specification for the table that should reference the dataset 

349 table. Will be modified in place. 

350 name: `str`, optional 

351 A name to use for the prefix of the new field; the full name is 

352 ``{name}_id``. 

353 onDelete: `str`, optional 

354 One of "CASCADE" or "SET NULL", indicating what should happen to 

355 the referencing row if the collection row is deleted. `None` 

356 indicates that this should be an integrity error. 

357 constraint: `bool`, optional 

358 If `False` (`True` is default), add a field that can be joined to 

359 the dataset primary key, but do not add a foreign key constraint. 

360 **kwargs 

361 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

362 constructor (only the ``name`` and ``dtype`` arguments are 

363 otherwise provided). 

364 

365 Returns 

366 ------- 

367 idSpec : `ddl.FieldSpec` 

368 Specification for the ID field. 

369 """ 

370 raise NotImplementedError() 

371 

372 @abstractmethod 

373 def refresh(self) -> None: 

374 """Ensure all other operations on this manager are aware of any 

375 dataset types that may have been registered by other clients since 

376 it was initialized or last refreshed. 

377 """ 

378 raise NotImplementedError() 

379 

380 def __getitem__(self, name: str) -> DatasetRecordStorage: 

381 """Return the object that provides access to the records associated 

382 with the given `DatasetType` name. 

383 

384 This is simply a convenience wrapper for `find` that raises `KeyError` 

385 when the dataset type is not found. 

386 

387 Returns 

388 ------- 

389 records : `DatasetRecordStorage` 

390 The object representing the records for the given dataset type. 

391 

392 Raises 

393 ------ 

394 KeyError 

395 Raised if there is no dataset type with the given name. 

396 

397 Notes 

398 ----- 

399 Dataset types registered by another client of the same repository since 

400 the last call to `initialize` or `refresh` may not be found. 

401 """ 

402 result = self.find(name) 

403 if result is None: 

404 raise KeyError(f"Dataset type with name '{name}' not found.") 

405 return result 

406 

407 @abstractmethod 

408 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

409 """Return an object that provides access to the records associated with 

410 the given `DatasetType` name, if one exists. 

411 

412 Parameters 

413 ---------- 

414 name : `str` 

415 Name of the dataset type. 

416 

417 Returns 

418 ------- 

419 records : `DatasetRecordStorage` or `None` 

420 The object representing the records for the given dataset type, or 

421 `None` if there are no records for that dataset type. 

422 

423 Notes 

424 ----- 

425 Dataset types registered by another client of the same repository since 

426 the last call to `initialize` or `refresh` may not be found. 

427 """ 

428 raise NotImplementedError() 

429 

430 @abstractmethod 

431 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

432 """Ensure that this `Registry` can hold records for the given 

433 `DatasetType`, creating new tables as necessary. 

434 

435 Parameters 

436 ---------- 

437 datasetType : `DatasetType` 

438 Dataset type for which a table should created (as necessary) and 

439 an associated `DatasetRecordStorage` returned. 

440 

441 Returns 

442 ------- 

443 records : `DatasetRecordStorage` 

444 The object representing the records for the given dataset type. 

445 inserted : `bool` 

446 `True` if the dataset type did not exist in the registry before. 

447 

448 Notes 

449 ----- 

450 This operation may not be invoked within a `Database.transaction` 

451 context. 

452 """ 

453 raise NotImplementedError() 

454 

455 @abstractmethod 

456 def remove(self, name: str) -> None: 

457 """Remove the dataset type. 

458 

459 Parameters 

460 ---------- 

461 name : `str` 

462 Name of the dataset type. 

463 """ 

464 raise NotImplementedError() 

465 

466 @abstractmethod 

467 def __iter__(self) -> Iterator[DatasetType]: 

468 """Return an iterator over the the dataset types present in this layer. 

469 

470 Notes 

471 ----- 

472 Dataset types registered by another client of the same layer since 

473 the last call to `initialize` or `refresh` may not be included. 

474 """ 

475 raise NotImplementedError() 

476 

477 @abstractmethod 

478 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

479 """Return a `DatasetRef` for the given dataset primary key 

480 value. 

481 

482 Parameters 

483 ---------- 

484 id : `int` 

485 Autoincrement primary key value for the dataset. 

486 

487 Returns 

488 ------- 

489 ref : `DatasetRef` or `None` 

490 Object representing the dataset, or `None` if no dataset with the 

491 given primary key values exists in this layer. 

492 """ 

493 raise NotImplementedError() 

494 

495 @abstractmethod 

496 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary: 

497 """Return a summary for the given collection. 

498 

499 Parameters 

500 ---------- 

501 collection : `CollectionRecord` 

502 Record describing the collection for which a summary is to be 

503 retrieved. 

504 

505 Returns 

506 ------- 

507 summary : `CollectionSummary` 

508 Summary of the dataset types and governor dimension values in 

509 this collection. 

510 """ 

511 raise NotImplementedError()