Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Dict, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 ExpandedDataCoordinate, 

42 Quantum, 

43) 

44from ..simpleQuery import Select 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ...core import DimensionUniverse 

48 from ._database import Database, StaticTablesContext 

49 from ._collections import CollectionManager, CollectionRecord, RunRecord 

50 from ..simpleQuery import SimpleQuery 

51 

52 

53class DatasetRecordStorage(ABC): 

54 """An interface that manages the records associated with a particular 

55 `DatasetType`. 

56 

57 Parameters 

58 ---------- 

59 datasetType : `DatasetType` 

60 Dataset type whose records this object manages. 

61 """ 

62 def __init__(self, datasetType: DatasetType): 

63 self.datasetType = datasetType 

64 

65 @abstractmethod 

66 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *, 

67 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]: 

68 """Insert one or more dataset entries into the database. 

69 

70 Parameters 

71 ---------- 

72 run : `RunRecord` 

73 The record object describing the `~CollectionType.RUN` collection 

74 this dataset will be associated with. 

75 dataIds : `Iterable` [ `ExpandedDataCoordinate` ] 

76 Expanded data IDs (`ExpandedDataCoordinate` instances) for the 

77 datasets to be added. The dimensions of all data IDs must be the 

78 same as ``self.datasetType.dimensions``. 

79 quantum : `Quantum`, optional 

80 The `Quantum` instance that should be recorded as responsible for 

81 producing this dataset. 

82 

83 Returns 

84 ------- 

85 datasets : `Iterable` [ `DatasetRef` ] 

86 References to the inserted datasets. 

87 

88 Notes 

89 ----- 

90 This method does not insert component datasets recursively, as those 

91 have a different `DatasetType` than their parent and hence are managed 

92 by a different `DatasetRecordStorage` instance. 

93 """ 

94 raise NotImplementedError() 

95 

96 @abstractmethod 

97 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

98 """Search a collection for a dataset with the given data ID. 

99 

100 Parameters 

101 ---------- 

102 collection : `CollectionRecord` 

103 The record object describing the collection to search for the 

104 dataset. May have any `CollectionType`. 

105 dataId: `DataCoordinate` 

106 Complete (but not necessarily expanded) data ID to search with, 

107 with ``dataId.graph == self.datasetType.dimensions``. 

108 

109 Returns 

110 ------- 

111 ref : `DatasetRef` or `None` 

112 A resolved `DatasetRef` (without components populated), or `None` 

113 if no matching dataset was found. 

114 """ 

115 raise NotImplementedError() 

116 

117 @abstractmethod 

118 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]): 

119 """Associate one or more datasets with a collection. 

120 

121 Parameters 

122 ---------- 

123 collection : `CollectionRecord` 

124 The record object describing the collection. ``collection.type`` 

125 must be `~CollectionType.TAGGED`. 

126 datasets : `Iterable` [ `DatasetRef` ] 

127 Datasets to be associated. All datasets must be resolved and have 

128 the same `DatasetType` as ``self``. 

129 

130 Raises 

131 ------ 

132 AmbiguousDatasetError 

133 Raised if any of the given `DatasetRef` instances is unresolved. 

134 

135 Notes 

136 ----- 

137 Associating a dataset with into collection that already contains a 

138 different dataset with the same `DatasetType` and data ID will remove 

139 the existing dataset from that collection. 

140 

141 Associating the same dataset into a collection multiple times is a 

142 no-op, but is still not permitted on read-only databases. 

143 """ 

144 raise NotImplementedError() 

145 

146 @abstractmethod 

147 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]): 

148 """Remove one or more datasets from a collection. 

149 

150 Parameters 

151 ---------- 

152 collection : `CollectionRecord` 

153 The record object describing the collection. ``collection.type`` 

154 must be `~CollectionType.TAGGED`. 

155 datasets : `Iterable` [ `DatasetRef` ] 

156 Datasets to be disassociated. All datasets must be resolved and 

157 have the same `DatasetType` as ``self``. 

158 

159 Raises 

160 ------ 

161 AmbiguousDatasetError 

162 Raised if any of the given `DatasetRef` instances is unresolved. 

163 """ 

164 raise NotImplementedError() 

165 

166 @abstractmethod 

167 def select(self, collection: CollectionRecord, 

168 dataId: Select.Or[DataCoordinate] = Select, 

169 id: Select.Or[Optional[int]] = Select, 

170 run: Select.Or[None] = Select, 

171 ) -> Optional[SimpleQuery]: 

172 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

173 this `DatasetType`. 

174 

175 All arguments can either be a value that constrains the query or 

176 the `Select` tag object to indicate that the value should be returned 

177 in the columns in the ``SELECT`` clause. The default is `Select`. 

178 

179 Parameters 

180 ---------- 

181 collection : `CollectionRecord` 

182 The record object describing the collection to query. May not be 

183 of type `CollectionType.CHAINED`. 

184 dataId : `DataCoordinate` or `Select` 

185 The data ID to restrict results with, or an instruction to return 

186 the data ID via columns with names 

187 ``self.datasetType.dimensions.names``. 

188 id : `int`, `Select` or None, 

189 The integer primary key value for the dataset, an instruction to 

190 return it via a ``id`` column, or `None` to ignore it 

191 entirely. 

192 run : `None` or `Select` 

193 If `Select` (default), include the dataset's run key value (as 

194 column labeled with the return value of 

195 ``CollectionManager.getRunForiegnKeyName``). 

196 If `None`, do not include this column (to constrain the run, 

197 pass a `RunRecord` as the ``collection`` argument instead.) 

198 

199 Returns 

200 ------- 

201 query : `SimpleQuery` or `None` 

202 A struct containing the SQLAlchemy object that representing a 

203 simple ``SELECT`` query, or `None` if it is known that there are 

204 no datasets of this `DatasetType` that match the given constraints. 

205 """ 

206 raise NotImplementedError() 

207 

208 datasetType: DatasetType 

209 """Dataset type whose records this object manages (`DatasetType`). 

210 """ 

211 

212 

213class DatasetRecordStorageManager(ABC): 

214 """An interface that manages the tables that describe datasets. 

215 

216 `DatasetRecordStorageManager` primarily serves as a container and factory 

217 for `DatasetRecordStorage` instances, which each provide access to the 

218 records for a different `DatasetType`. 

219 """ 

220 

221 @classmethod 

222 @abstractmethod 

223 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

224 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

225 """Construct an instance of the manager. 

226 

227 Parameters 

228 ---------- 

229 db : `Database` 

230 Interface to the underlying database engine and namespace. 

231 context : `StaticTablesContext` 

232 Context object obtained from `Database.declareStaticTables`; used 

233 to declare any tables that should always be present. 

234 collections: `CollectionManager` 

235 Manager object for the collections in this `Registry`. 

236 universe : `DimensionUniverse` 

237 Universe graph containing all dimensions known to this `Registry`. 

238 

239 Returns 

240 ------- 

241 manager : `DatasetRecordStorageManager` 

242 An instance of a concrete `DatasetRecordStorageManager` subclass. 

243 """ 

244 raise NotImplementedError() 

245 

246 @classmethod 

247 @abstractmethod 

248 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset", 

249 constraint: bool = True, onDelete: Optional[str] = None, 

250 **kwargs) -> ddl.FieldSpec: 

251 """Add a foreign key (field and constraint) referencing the dataset 

252 table. 

253 

254 Parameters 

255 ---------- 

256 tableSpec : `ddl.TableSpec` 

257 Specification for the table that should reference the dataset 

258 table. Will be modified in place. 

259 name: `str`, optional 

260 A name to use for the prefix of the new field; the full name is 

261 ``{name}_id``. 

262 onDelete: `str`, optional 

263 One of "CASCADE" or "SET NULL", indicating what should happen to 

264 the referencing row if the collection row is deleted. `None` 

265 indicates that this should be an integrity error. 

266 constraint: `bool`, optional 

267 If `False` (`True` is default), add a field that can be joined to 

268 the dataset primary key, but do not add a foreign key constraint. 

269 **kwargs 

270 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

271 constructor (only the ``name`` and ``dtype`` arguments are 

272 otherwise provided). 

273 

274 Returns 

275 ------- 

276 idSpec : `ddl.FieldSpec` 

277 Specification for the ID field. 

278 """ 

279 raise NotImplementedError() 

280 

281 @abstractmethod 

282 def refresh(self, *, universe: DimensionUniverse): 

283 """Ensure all other operations on this manager are aware of any 

284 dataset types that may have been registered by other clients since 

285 it was initialized or last refreshed. 

286 """ 

287 raise NotImplementedError() 

288 

289 @abstractmethod 

290 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

291 """Return an object that provides access to the records associated with 

292 the given `DatasetType`, if one exists. 

293 

294 Parameters 

295 ---------- 

296 name : `str` 

297 Name of the dataset type. 

298 

299 Returns 

300 ------- 

301 records : `DatasetRecordStorage` or `None` 

302 The object representing the records for the given dataset type, or 

303 `None` if there are no records for that dataset type. 

304 

305 Note 

306 ---- 

307 Dataset types registered by another client of the same repository since 

308 the last call to `initialize` or `refresh` may not be found. 

309 """ 

310 raise NotImplementedError() 

311 

312 @abstractmethod 

313 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

314 """Ensure that this `Registry` can hold records for the given 

315 `DatasetType`, creating new tables as necessary. 

316 

317 Parameters 

318 ---------- 

319 datasetType : `DatasetType` 

320 Dataset type for which a table should created (as necessary) and 

321 an associated `DatasetRecordStorage` returned. 

322 

323 Returns 

324 ------- 

325 records : `DatasetRecordStorage` 

326 The object representing the records for the given dataset type. 

327 inserted : `bool` 

328 `True` if the dataset type did not exist in the registry before. 

329 

330 Notes 

331 ----- 

332 This operation may not be invoked within a `Database.transaction` 

333 context. 

334 """ 

335 raise NotImplementedError() 

336 

337 @abstractmethod 

338 def __iter__(self) -> Iterator[DatasetType]: 

339 """Return an iterator over the the dataset types present in this layer. 

340 

341 Note 

342 ---- 

343 Dataset types registered by another client of the same layer since 

344 the last call to `initialize` or `refresh` may not be included. 

345 """ 

346 raise NotImplementedError() 

347 

348 @abstractmethod 

349 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

350 """Return a `DatasetRef` for the given dataset primary key 

351 value. 

352 

353 Parameters 

354 ---------- 

355 id : `int` 

356 Autoincrement primary key value for the dataset. 

357 

358 Returns 

359 ------- 

360 ref : `DatasetRef` or `None` 

361 Object representing the dataset, or `None` if no dataset with the 

362 given primary key values exists in this layer. 

363 """ 

364 raise NotImplementedError() 

365 

366 @abstractmethod 

367 def attachComponents(self, composites: Iterable[Tuple[DatasetRef, Dict[str, DatasetRef]]] 

368 ) -> Iterator[DatasetRef]: 

369 """Attach components to one or more datasets. 

370 

371 Parameters 

372 ---------- 

373 composites : `Iterable` [ `tuple` [ `DatasetRef`, `dict` ] ] 

374 Iterable over parents and dictionaries of components. Both parent 

375 and child `DatasetRef` instances must be resolved, and dict keys 

376 are assumed (not necessarily checked) to match the component names 

377 in the parent's storage class. 

378 

379 Yields 

380 ------ 

381 parent : `DatasetRef` 

382 Parent `DatasetRef` instances with `DatasetRef.components` 

383 dictionaries updated to include new components. 

384 """ 

385 raise NotImplementedError() 

386 

387 @abstractmethod 

388 def fetchComponents(self, ref: DatasetRef) -> DatasetRef: 

389 """Load references for all components to a `DatasetRef`. 

390 

391 Parameters 

392 ---------- 

393 ref : `DatasetRef` 

394 Reference to the parent dataset. If this dataset is not a 

395 composite it will be returned unmodified. 

396 

397 Returns 

398 ------- 

399 parent : `DatasetRef 

400 Version of ``ref`` with components attached. 

401 

402 Raises 

403 ------ 

404 AmbiguousDatasetError 

405 Raised if the given `DatasetRef` is unresolved. 

406 """ 

407 raise NotImplementedError()