Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Dict, 

30 Iterable, 

31 Iterator, 

32 Optional, 

33 Tuple, 

34 TYPE_CHECKING, 

35) 

36 

37from ...core import ( 

38 DataCoordinate, 

39 DatasetRef, 

40 DatasetType, 

41 ddl, 

42 ExpandedDataCoordinate, 

43 Quantum, 

44) 

45from ..simpleQuery import Select 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from ...core import DimensionUniverse 

49 from ._database import Database, StaticTablesContext 

50 from ._collections import CollectionManager, CollectionRecord, RunRecord 

51 from ..simpleQuery import SimpleQuery 

52 

53 

54class DatasetRecordStorage(ABC): 

55 """An interface that manages the records associated with a particular 

56 `DatasetType`. 

57 

58 Parameters 

59 ---------- 

60 datasetType : `DatasetType` 

61 Dataset type whose records this object manages. 

62 """ 

63 def __init__(self, datasetType: DatasetType): 

64 self.datasetType = datasetType 

65 

66 @abstractmethod 

67 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *, 

68 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]: 

69 """Insert one or more dataset entries into the database. 

70 

71 Parameters 

72 ---------- 

73 run : `RunRecord` 

74 The record object describing the `~CollectionType.RUN` collection 

75 this dataset will be associated with. 

76 dataIds : `Iterable` [ `ExpandedDataCoordinate` ] 

77 Expanded data IDs (`ExpandedDataCoordinate` instances) for the 

78 datasets to be added. The dimensions of all data IDs must be the 

79 same as ``self.datasetType.dimensions``. 

80 quantum : `Quantum`, optional 

81 The `Quantum` instance that should be recorded as responsible for 

82 producing this dataset. 

83 

84 Returns 

85 ------- 

86 datasets : `Iterable` [ `DatasetRef` ] 

87 References to the inserted datasets. 

88 

89 Notes 

90 ----- 

91 This method does not insert component datasets recursively, as those 

92 have a different `DatasetType` than their parent and hence are managed 

93 by a different `DatasetRecordStorage` instance. 

94 """ 

95 raise NotImplementedError() 

96 

97 @abstractmethod 

98 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

99 """Search a collection for a dataset with the given data ID. 

100 

101 Parameters 

102 ---------- 

103 collection : `CollectionRecord` 

104 The record object describing the collection to search for the 

105 dataset. May have any `CollectionType`. 

106 dataId: `DataCoordinate` 

107 Complete (but not necessarily expanded) data ID to search with, 

108 with ``dataId.graph == self.datasetType.dimensions``. 

109 

110 Returns 

111 ------- 

112 ref : `DatasetRef` or `None` 

113 A resolved `DatasetRef` (without components populated), or `None` 

114 if no matching dataset was found. 

115 """ 

116 raise NotImplementedError() 

117 

118 @abstractmethod 

119 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

120 """Associate one or more datasets with a collection. 

121 

122 Parameters 

123 ---------- 

124 collection : `CollectionRecord` 

125 The record object describing the collection. ``collection.type`` 

126 must be `~CollectionType.TAGGED`. 

127 datasets : `Iterable` [ `DatasetRef` ] 

128 Datasets to be associated. All datasets must be resolved and have 

129 the same `DatasetType` as ``self``. 

130 

131 Raises 

132 ------ 

133 AmbiguousDatasetError 

134 Raised if any of the given `DatasetRef` instances is unresolved. 

135 

136 Notes 

137 ----- 

138 Associating a dataset with into collection that already contains a 

139 different dataset with the same `DatasetType` and data ID will remove 

140 the existing dataset from that collection. 

141 

142 Associating the same dataset into a collection multiple times is a 

143 no-op, but is still not permitted on read-only databases. 

144 """ 

145 raise NotImplementedError() 

146 

147 @abstractmethod 

148 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

149 """Remove one or more datasets from a collection. 

150 

151 Parameters 

152 ---------- 

153 collection : `CollectionRecord` 

154 The record object describing the collection. ``collection.type`` 

155 must be `~CollectionType.TAGGED`. 

156 datasets : `Iterable` [ `DatasetRef` ] 

157 Datasets to be disassociated. All datasets must be resolved and 

158 have the same `DatasetType` as ``self``. 

159 

160 Raises 

161 ------ 

162 AmbiguousDatasetError 

163 Raised if any of the given `DatasetRef` instances is unresolved. 

164 """ 

165 raise NotImplementedError() 

166 

167 @abstractmethod 

168 def select(self, collection: CollectionRecord, 

169 dataId: Select.Or[DataCoordinate] = Select, 

170 id: Select.Or[Optional[int]] = Select, 

171 run: Select.Or[None] = Select, 

172 ) -> Optional[SimpleQuery]: 

173 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

174 this `DatasetType`. 

175 

176 All arguments can either be a value that constrains the query or 

177 the `Select` tag object to indicate that the value should be returned 

178 in the columns in the ``SELECT`` clause. The default is `Select`. 

179 

180 Parameters 

181 ---------- 

182 collection : `CollectionRecord` 

183 The record object describing the collection to query. May not be 

184 of type `CollectionType.CHAINED`. 

185 dataId : `DataCoordinate` or `Select` 

186 The data ID to restrict results with, or an instruction to return 

187 the data ID via columns with names 

188 ``self.datasetType.dimensions.names``. 

189 id : `int`, `Select` or None, 

190 The integer primary key value for the dataset, an instruction to 

191 return it via a ``id`` column, or `None` to ignore it 

192 entirely. 

193 run : `None` or `Select` 

194 If `Select` (default), include the dataset's run key value (as 

195 column labeled with the return value of 

196 ``CollectionManager.getRunForiegnKeyName``). 

197 If `None`, do not include this column (to constrain the run, 

198 pass a `RunRecord` as the ``collection`` argument instead.) 

199 

200 Returns 

201 ------- 

202 query : `SimpleQuery` or `None` 

203 A struct containing the SQLAlchemy object that representing a 

204 simple ``SELECT`` query, or `None` if it is known that there are 

205 no datasets of this `DatasetType` that match the given constraints. 

206 """ 

207 raise NotImplementedError() 

208 

209 datasetType: DatasetType 

210 """Dataset type whose records this object manages (`DatasetType`). 

211 """ 

212 

213 

214class DatasetRecordStorageManager(ABC): 

215 """An interface that manages the tables that describe datasets. 

216 

217 `DatasetRecordStorageManager` primarily serves as a container and factory 

218 for `DatasetRecordStorage` instances, which each provide access to the 

219 records for a different `DatasetType`. 

220 """ 

221 

222 @classmethod 

223 @abstractmethod 

224 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

225 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

226 """Construct an instance of the manager. 

227 

228 Parameters 

229 ---------- 

230 db : `Database` 

231 Interface to the underlying database engine and namespace. 

232 context : `StaticTablesContext` 

233 Context object obtained from `Database.declareStaticTables`; used 

234 to declare any tables that should always be present. 

235 collections: `CollectionManager` 

236 Manager object for the collections in this `Registry`. 

237 universe : `DimensionUniverse` 

238 Universe graph containing all dimensions known to this `Registry`. 

239 

240 Returns 

241 ------- 

242 manager : `DatasetRecordStorageManager` 

243 An instance of a concrete `DatasetRecordStorageManager` subclass. 

244 """ 

245 raise NotImplementedError() 

246 

247 @classmethod 

248 @abstractmethod 

249 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

250 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

251 **kwargs: Any) -> ddl.FieldSpec: 

252 """Add a foreign key (field and constraint) referencing the dataset 

253 table. 

254 

255 Parameters 

256 ---------- 

257 tableSpec : `ddl.TableSpec` 

258 Specification for the table that should reference the dataset 

259 table. Will be modified in place. 

260 name: `str`, optional 

261 A name to use for the prefix of the new field; the full name is 

262 ``{name}_id``. 

263 onDelete: `str`, optional 

264 One of "CASCADE" or "SET NULL", indicating what should happen to 

265 the referencing row if the collection row is deleted. `None` 

266 indicates that this should be an integrity error. 

267 constraint: `bool`, optional 

268 If `False` (`True` is default), add a field that can be joined to 

269 the dataset primary key, but do not add a foreign key constraint. 

270 **kwargs 

271 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

272 constructor (only the ``name`` and ``dtype`` arguments are 

273 otherwise provided). 

274 

275 Returns 

276 ------- 

277 idSpec : `ddl.FieldSpec` 

278 Specification for the ID field. 

279 """ 

280 raise NotImplementedError() 

281 

282 @abstractmethod 

283 def refresh(self, *, universe: DimensionUniverse) -> None: 

284 """Ensure all other operations on this manager are aware of any 

285 dataset types that may have been registered by other clients since 

286 it was initialized or last refreshed. 

287 """ 

288 raise NotImplementedError() 

289 

290 @abstractmethod 

291 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

292 """Return an object that provides access to the records associated with 

293 the given `DatasetType`, if one exists. 

294 

295 Parameters 

296 ---------- 

297 name : `str` 

298 Name of the dataset type. 

299 

300 Returns 

301 ------- 

302 records : `DatasetRecordStorage` or `None` 

303 The object representing the records for the given dataset type, or 

304 `None` if there are no records for that dataset type. 

305 

306 Notes 

307 ----- 

308 Dataset types registered by another client of the same repository since 

309 the last call to `initialize` or `refresh` may not be found. 

310 """ 

311 raise NotImplementedError() 

312 

313 @abstractmethod 

314 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

315 """Ensure that this `Registry` can hold records for the given 

316 `DatasetType`, creating new tables as necessary. 

317 

318 Parameters 

319 ---------- 

320 datasetType : `DatasetType` 

321 Dataset type for which a table should created (as necessary) and 

322 an associated `DatasetRecordStorage` returned. 

323 

324 Returns 

325 ------- 

326 records : `DatasetRecordStorage` 

327 The object representing the records for the given dataset type. 

328 inserted : `bool` 

329 `True` if the dataset type did not exist in the registry before. 

330 

331 Notes 

332 ----- 

333 This operation may not be invoked within a `Database.transaction` 

334 context. 

335 """ 

336 raise NotImplementedError() 

337 

338 @abstractmethod 

339 def __iter__(self) -> Iterator[DatasetType]: 

340 """Return an iterator over the the dataset types present in this layer. 

341 

342 Notes 

343 ----- 

344 Dataset types registered by another client of the same layer since 

345 the last call to `initialize` or `refresh` may not be included. 

346 """ 

347 raise NotImplementedError() 

348 

349 @abstractmethod 

350 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

351 """Return a `DatasetRef` for the given dataset primary key 

352 value. 

353 

354 Parameters 

355 ---------- 

356 id : `int` 

357 Autoincrement primary key value for the dataset. 

358 

359 Returns 

360 ------- 

361 ref : `DatasetRef` or `None` 

362 Object representing the dataset, or `None` if no dataset with the 

363 given primary key values exists in this layer. 

364 """ 

365 raise NotImplementedError() 

366 

367 @abstractmethod 

368 def attachComponents(self, composites: Iterable[Tuple[DatasetRef, Dict[str, DatasetRef]]] 

369 ) -> Iterator[DatasetRef]: 

370 """Attach components to one or more datasets. 

371 

372 Parameters 

373 ---------- 

374 composites : `Iterable` [ `tuple` [ `DatasetRef`, `dict` ] ] 

375 Iterable over parents and dictionaries of components. Both parent 

376 and child `DatasetRef` instances must be resolved, and dict keys 

377 are assumed (not necessarily checked) to match the component names 

378 in the parent's storage class. 

379 

380 Yields 

381 ------ 

382 parent : `DatasetRef` 

383 Parent `DatasetRef` instances with `DatasetRef.components` 

384 dictionaries updated to include new components. 

385 """ 

386 raise NotImplementedError() 

387 

388 @abstractmethod 

389 def fetchComponents(self, ref: DatasetRef) -> DatasetRef: 

390 """Load references for all components to a `DatasetRef`. 

391 

392 Parameters 

393 ---------- 

394 ref : `DatasetRef` 

395 Reference to the parent dataset. If this dataset is not a 

396 composite it will be returned unmodified. 

397 

398 Returns 

399 ------- 

400 parent : `DatasetRef` 

401 Version of ``ref`` with components attached. 

402 

403 Raises 

404 ------ 

405 AmbiguousDatasetError 

406 Raised if the given `DatasetRef` is unresolved. 

407 """ 

408 raise NotImplementedError()