Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 ExpandedDataCoordinate, 

42 Quantum, 

43) 

44from ..simpleQuery import Select 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ...core import DimensionUniverse 

48 from ._database import Database, StaticTablesContext 

49 from ._collections import CollectionManager, CollectionRecord, RunRecord 

50 from ..simpleQuery import SimpleQuery 

51 

52 

53class DatasetRecordStorage(ABC): 

54 """An interface that manages the records associated with a particular 

55 `DatasetType`. 

56 

57 Parameters 

58 ---------- 

59 datasetType : `DatasetType` 

60 Dataset type whose records this object manages. 

61 """ 

62 def __init__(self, datasetType: DatasetType): 

63 self.datasetType = datasetType 

64 

65 @abstractmethod 

66 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *, 

67 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]: 

68 """Insert one or more dataset entries into the database. 

69 

70 Parameters 

71 ---------- 

72 run : `RunRecord` 

73 The record object describing the `~CollectionType.RUN` collection 

74 this dataset will be associated with. 

75 dataIds : `Iterable` [ `ExpandedDataCoordinate` ] 

76 Expanded data IDs (`ExpandedDataCoordinate` instances) for the 

77 datasets to be added. The dimensions of all data IDs must be the 

78 same as ``self.datasetType.dimensions``. 

79 quantum : `Quantum`, optional 

80 The `Quantum` instance that should be recorded as responsible for 

81 producing this dataset. 

82 

83 Returns 

84 ------- 

85 datasets : `Iterable` [ `DatasetRef` ] 

86 References to the inserted datasets. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

92 """Search a collection for a dataset with the given data ID. 

93 

94 Parameters 

95 ---------- 

96 collection : `CollectionRecord` 

97 The record object describing the collection to search for the 

98 dataset. May have any `CollectionType`. 

99 dataId: `DataCoordinate` 

100 Complete (but not necessarily expanded) data ID to search with, 

101 with ``dataId.graph == self.datasetType.dimensions``. 

102 

103 Returns 

104 ------- 

105 ref : `DatasetRef` 

106 A resolved `DatasetRef` (without components populated), or `None` 

107 if no matching dataset was found. 

108 """ 

109 raise NotImplementedError() 

110 

111 @abstractmethod 

112 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

113 """Fully delete the given datasets from the registry. 

114 

115 Parameters 

116 ---------- 

117 datasets : `Iterable` [ `DatasetRef` ] 

118 Datasets to be deleted. All datasets must be resolved and have 

119 the same `DatasetType` as ``self``. 

120 

121 Raises 

122 ------ 

123 AmbiguousDatasetError 

124 Raised if any of the given `DatasetRef` instances is unresolved. 

125 """ 

126 raise NotImplementedError() 

127 

128 @abstractmethod 

129 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

130 """Associate one or more datasets with a collection. 

131 

132 Parameters 

133 ---------- 

134 collection : `CollectionRecord` 

135 The record object describing the collection. ``collection.type`` 

136 must be `~CollectionType.TAGGED`. 

137 datasets : `Iterable` [ `DatasetRef` ] 

138 Datasets to be associated. All datasets must be resolved and have 

139 the same `DatasetType` as ``self``. 

140 

141 Raises 

142 ------ 

143 AmbiguousDatasetError 

144 Raised if any of the given `DatasetRef` instances is unresolved. 

145 

146 Notes 

147 ----- 

148 Associating a dataset with into collection that already contains a 

149 different dataset with the same `DatasetType` and data ID will remove 

150 the existing dataset from that collection. 

151 

152 Associating the same dataset into a collection multiple times is a 

153 no-op, but is still not permitted on read-only databases. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

159 """Remove one or more datasets from a collection. 

160 

161 Parameters 

162 ---------- 

163 collection : `CollectionRecord` 

164 The record object describing the collection. ``collection.type`` 

165 must be `~CollectionType.TAGGED`. 

166 datasets : `Iterable` [ `DatasetRef` ] 

167 Datasets to be disassociated. All datasets must be resolved and 

168 have the same `DatasetType` as ``self``. 

169 

170 Raises 

171 ------ 

172 AmbiguousDatasetError 

173 Raised if any of the given `DatasetRef` instances is unresolved. 

174 """ 

175 raise NotImplementedError() 

176 

177 @abstractmethod 

178 def select(self, collection: CollectionRecord, 

179 dataId: Select.Or[DataCoordinate] = Select, 

180 id: Select.Or[Optional[int]] = Select, 

181 run: Select.Or[None] = Select, 

182 ) -> Optional[SimpleQuery]: 

183 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

184 this `DatasetType`. 

185 

186 All arguments can either be a value that constrains the query or 

187 the `Select` tag object to indicate that the value should be returned 

188 in the columns in the ``SELECT`` clause. The default is `Select`. 

189 

190 Parameters 

191 ---------- 

192 collection : `CollectionRecord` 

193 The record object describing the collection to query. May not be 

194 of type `CollectionType.CHAINED`. 

195 dataId : `DataCoordinate` or `Select` 

196 The data ID to restrict results with, or an instruction to return 

197 the data ID via columns with names 

198 ``self.datasetType.dimensions.names``. 

199 id : `int`, `Select` or None, 

200 The integer primary key value for the dataset, an instruction to 

201 return it via a ``id`` column, or `None` to ignore it 

202 entirely. 

203 run : `None` or `Select` 

204 If `Select` (default), include the dataset's run key value (as 

205 column labeled with the return value of 

206 ``CollectionManager.getRunForiegnKeyName``). 

207 If `None`, do not include this column (to constrain the run, 

208 pass a `RunRecord` as the ``collection`` argument instead.) 

209 

210 Returns 

211 ------- 

212 query : `SimpleQuery` or `None` 

213 A struct containing the SQLAlchemy object that representing a 

214 simple ``SELECT`` query, or `None` if it is known that there are 

215 no datasets of this `DatasetType` that match the given constraints. 

216 """ 

217 raise NotImplementedError() 

218 

219 datasetType: DatasetType 

220 """Dataset type whose records this object manages (`DatasetType`). 

221 """ 

222 

223 

224class DatasetRecordStorageManager(ABC): 

225 """An interface that manages the tables that describe datasets. 

226 

227 `DatasetRecordStorageManager` primarily serves as a container and factory 

228 for `DatasetRecordStorage` instances, which each provide access to the 

229 records for a different `DatasetType`. 

230 """ 

231 

232 @classmethod 

233 @abstractmethod 

234 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

235 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

236 """Construct an instance of the manager. 

237 

238 Parameters 

239 ---------- 

240 db : `Database` 

241 Interface to the underlying database engine and namespace. 

242 context : `StaticTablesContext` 

243 Context object obtained from `Database.declareStaticTables`; used 

244 to declare any tables that should always be present. 

245 collections: `CollectionManager` 

246 Manager object for the collections in this `Registry`. 

247 universe : `DimensionUniverse` 

248 Universe graph containing all dimensions known to this `Registry`. 

249 

250 Returns 

251 ------- 

252 manager : `DatasetRecordStorageManager` 

253 An instance of a concrete `DatasetRecordStorageManager` subclass. 

254 """ 

255 raise NotImplementedError() 

256 

257 @classmethod 

258 @abstractmethod 

259 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

260 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

261 **kwargs: Any) -> ddl.FieldSpec: 

262 """Add a foreign key (field and constraint) referencing the dataset 

263 table. 

264 

265 Parameters 

266 ---------- 

267 tableSpec : `ddl.TableSpec` 

268 Specification for the table that should reference the dataset 

269 table. Will be modified in place. 

270 name: `str`, optional 

271 A name to use for the prefix of the new field; the full name is 

272 ``{name}_id``. 

273 onDelete: `str`, optional 

274 One of "CASCADE" or "SET NULL", indicating what should happen to 

275 the referencing row if the collection row is deleted. `None` 

276 indicates that this should be an integrity error. 

277 constraint: `bool`, optional 

278 If `False` (`True` is default), add a field that can be joined to 

279 the dataset primary key, but do not add a foreign key constraint. 

280 **kwargs 

281 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

282 constructor (only the ``name`` and ``dtype`` arguments are 

283 otherwise provided). 

284 

285 Returns 

286 ------- 

287 idSpec : `ddl.FieldSpec` 

288 Specification for the ID field. 

289 """ 

290 raise NotImplementedError() 

291 

292 @abstractmethod 

293 def refresh(self, *, universe: DimensionUniverse) -> None: 

294 """Ensure all other operations on this manager are aware of any 

295 dataset types that may have been registered by other clients since 

296 it was initialized or last refreshed. 

297 """ 

298 raise NotImplementedError() 

299 

300 @abstractmethod 

301 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

302 """Return an object that provides access to the records associated with 

303 the given `DatasetType`, if one exists. 

304 

305 Parameters 

306 ---------- 

307 name : `str` 

308 Name of the dataset type. 

309 

310 Returns 

311 ------- 

312 records : `DatasetRecordStorage` or `None` 

313 The object representing the records for the given dataset type, or 

314 `None` if there are no records for that dataset type. 

315 

316 Notes 

317 ----- 

318 Dataset types registered by another client of the same repository since 

319 the last call to `initialize` or `refresh` may not be found. 

320 """ 

321 raise NotImplementedError() 

322 

323 @abstractmethod 

324 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

325 """Ensure that this `Registry` can hold records for the given 

326 `DatasetType`, creating new tables as necessary. 

327 

328 Parameters 

329 ---------- 

330 datasetType : `DatasetType` 

331 Dataset type for which a table should created (as necessary) and 

332 an associated `DatasetRecordStorage` returned. 

333 

334 Returns 

335 ------- 

336 records : `DatasetRecordStorage` 

337 The object representing the records for the given dataset type. 

338 inserted : `bool` 

339 `True` if the dataset type did not exist in the registry before. 

340 

341 Notes 

342 ----- 

343 This operation may not be invoked within a `Database.transaction` 

344 context. 

345 """ 

346 raise NotImplementedError() 

347 

348 @abstractmethod 

349 def __iter__(self) -> Iterator[DatasetType]: 

350 """Return an iterator over the the dataset types present in this layer. 

351 

352 Notes 

353 ----- 

354 Dataset types registered by another client of the same layer since 

355 the last call to `initialize` or `refresh` may not be included. 

356 """ 

357 raise NotImplementedError() 

358 

359 @abstractmethod 

360 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

361 """Return a `DatasetRef` for the given dataset primary key 

362 value. 

363 

364 Parameters 

365 ---------- 

366 id : `int` 

367 Autoincrement primary key value for the dataset. 

368 universe : `DimensionUniverse` 

369 All known dimensions. 

370 

371 Returns 

372 ------- 

373 ref : `DatasetRef` or `None` 

374 Object representing the dataset, or `None` if no dataset with the 

375 given primary key values exists in this layer. 

376 """ 

377 raise NotImplementedError()