Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 ExpandedDataCoordinate, 

42 Quantum, 

43) 

44from ..simpleQuery import Select 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ...core import DimensionUniverse 

48 from ._database import Database, StaticTablesContext 

49 from ._collections import CollectionManager, CollectionRecord, RunRecord 

50 from ..simpleQuery import SimpleQuery 

51 

52 

53class DatasetRecordStorage(ABC): 

54 """An interface that manages the records associated with a particular 

55 `DatasetType`. 

56 

57 Parameters 

58 ---------- 

59 datasetType : `DatasetType` 

60 Dataset type whose records this object manages. 

61 """ 

62 def __init__(self, datasetType: DatasetType): 

63 self.datasetType = datasetType 

64 

65 @abstractmethod 

66 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *, 

67 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]: 

68 """Insert one or more dataset entries into the database. 

69 

70 Parameters 

71 ---------- 

72 run : `RunRecord` 

73 The record object describing the `~CollectionType.RUN` collection 

74 this dataset will be associated with. 

75 dataIds : `Iterable` [ `ExpandedDataCoordinate` ] 

76 Expanded data IDs (`ExpandedDataCoordinate` instances) for the 

77 datasets to be added. The dimensions of all data IDs must be the 

78 same as ``self.datasetType.dimensions``. 

79 quantum : `Quantum`, optional 

80 The `Quantum` instance that should be recorded as responsible for 

81 producing this dataset. 

82 

83 Returns 

84 ------- 

85 datasets : `Iterable` [ `DatasetRef` ] 

86 References to the inserted datasets. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

92 """Search a collection for a dataset with the given data ID. 

93 

94 Parameters 

95 ---------- 

96 collection : `CollectionRecord` 

97 The record object describing the collection to search for the 

98 dataset. May have any `CollectionType`. 

99 dataId: `DataCoordinate` 

100 Complete (but not necessarily expanded) data ID to search with, 

101 with ``dataId.graph == self.datasetType.dimensions``. 

102 

103 Returns 

104 ------- 

105 ref : `DatasetRef` or `None` 

106 A resolved `DatasetRef` (without components populated), or `None` 

107 if no matching dataset was found. 

108 """ 

109 raise NotImplementedError() 

110 

111 @abstractmethod 

112 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

113 """Associate one or more datasets with a collection. 

114 

115 Parameters 

116 ---------- 

117 collection : `CollectionRecord` 

118 The record object describing the collection. ``collection.type`` 

119 must be `~CollectionType.TAGGED`. 

120 datasets : `Iterable` [ `DatasetRef` ] 

121 Datasets to be associated. All datasets must be resolved and have 

122 the same `DatasetType` as ``self``. 

123 

124 Raises 

125 ------ 

126 AmbiguousDatasetError 

127 Raised if any of the given `DatasetRef` instances is unresolved. 

128 

129 Notes 

130 ----- 

131 Associating a dataset with into collection that already contains a 

132 different dataset with the same `DatasetType` and data ID will remove 

133 the existing dataset from that collection. 

134 

135 Associating the same dataset into a collection multiple times is a 

136 no-op, but is still not permitted on read-only databases. 

137 """ 

138 raise NotImplementedError() 

139 

140 @abstractmethod 

141 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

142 """Remove one or more datasets from a collection. 

143 

144 Parameters 

145 ---------- 

146 collection : `CollectionRecord` 

147 The record object describing the collection. ``collection.type`` 

148 must be `~CollectionType.TAGGED`. 

149 datasets : `Iterable` [ `DatasetRef` ] 

150 Datasets to be disassociated. All datasets must be resolved and 

151 have the same `DatasetType` as ``self``. 

152 

153 Raises 

154 ------ 

155 AmbiguousDatasetError 

156 Raised if any of the given `DatasetRef` instances is unresolved. 

157 """ 

158 raise NotImplementedError() 

159 

160 @abstractmethod 

161 def select(self, collection: CollectionRecord, 

162 dataId: Select.Or[DataCoordinate] = Select, 

163 id: Select.Or[Optional[int]] = Select, 

164 run: Select.Or[None] = Select, 

165 ) -> Optional[SimpleQuery]: 

166 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

167 this `DatasetType`. 

168 

169 All arguments can either be a value that constrains the query or 

170 the `Select` tag object to indicate that the value should be returned 

171 in the columns in the ``SELECT`` clause. The default is `Select`. 

172 

173 Parameters 

174 ---------- 

175 collection : `CollectionRecord` 

176 The record object describing the collection to query. May not be 

177 of type `CollectionType.CHAINED`. 

178 dataId : `DataCoordinate` or `Select` 

179 The data ID to restrict results with, or an instruction to return 

180 the data ID via columns with names 

181 ``self.datasetType.dimensions.names``. 

182 id : `int`, `Select` or None, 

183 The integer primary key value for the dataset, an instruction to 

184 return it via a ``id`` column, or `None` to ignore it 

185 entirely. 

186 run : `None` or `Select` 

187 If `Select` (default), include the dataset's run key value (as 

188 column labeled with the return value of 

189 ``CollectionManager.getRunForiegnKeyName``). 

190 If `None`, do not include this column (to constrain the run, 

191 pass a `RunRecord` as the ``collection`` argument instead.) 

192 

193 Returns 

194 ------- 

195 query : `SimpleQuery` or `None` 

196 A struct containing the SQLAlchemy object that representing a 

197 simple ``SELECT`` query, or `None` if it is known that there are 

198 no datasets of this `DatasetType` that match the given constraints. 

199 """ 

200 raise NotImplementedError() 

201 

202 datasetType: DatasetType 

203 """Dataset type whose records this object manages (`DatasetType`). 

204 """ 

205 

206 

207class DatasetRecordStorageManager(ABC): 

208 """An interface that manages the tables that describe datasets. 

209 

210 `DatasetRecordStorageManager` primarily serves as a container and factory 

211 for `DatasetRecordStorage` instances, which each provide access to the 

212 records for a different `DatasetType`. 

213 """ 

214 

215 @classmethod 

216 @abstractmethod 

217 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

218 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

219 """Construct an instance of the manager. 

220 

221 Parameters 

222 ---------- 

223 db : `Database` 

224 Interface to the underlying database engine and namespace. 

225 context : `StaticTablesContext` 

226 Context object obtained from `Database.declareStaticTables`; used 

227 to declare any tables that should always be present. 

228 collections: `CollectionManager` 

229 Manager object for the collections in this `Registry`. 

230 universe : `DimensionUniverse` 

231 Universe graph containing all dimensions known to this `Registry`. 

232 

233 Returns 

234 ------- 

235 manager : `DatasetRecordStorageManager` 

236 An instance of a concrete `DatasetRecordStorageManager` subclass. 

237 """ 

238 raise NotImplementedError() 

239 

240 @classmethod 

241 @abstractmethod 

242 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

243 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

244 **kwargs: Any) -> ddl.FieldSpec: 

245 """Add a foreign key (field and constraint) referencing the dataset 

246 table. 

247 

248 Parameters 

249 ---------- 

250 tableSpec : `ddl.TableSpec` 

251 Specification for the table that should reference the dataset 

252 table. Will be modified in place. 

253 name: `str`, optional 

254 A name to use for the prefix of the new field; the full name is 

255 ``{name}_id``. 

256 onDelete: `str`, optional 

257 One of "CASCADE" or "SET NULL", indicating what should happen to 

258 the referencing row if the collection row is deleted. `None` 

259 indicates that this should be an integrity error. 

260 constraint: `bool`, optional 

261 If `False` (`True` is default), add a field that can be joined to 

262 the dataset primary key, but do not add a foreign key constraint. 

263 **kwargs 

264 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

265 constructor (only the ``name`` and ``dtype`` arguments are 

266 otherwise provided). 

267 

268 Returns 

269 ------- 

270 idSpec : `ddl.FieldSpec` 

271 Specification for the ID field. 

272 """ 

273 raise NotImplementedError() 

274 

275 @abstractmethod 

276 def refresh(self, *, universe: DimensionUniverse) -> None: 

277 """Ensure all other operations on this manager are aware of any 

278 dataset types that may have been registered by other clients since 

279 it was initialized or last refreshed. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

285 """Return an object that provides access to the records associated with 

286 the given `DatasetType`, if one exists. 

287 

288 Parameters 

289 ---------- 

290 name : `str` 

291 Name of the dataset type. 

292 

293 Returns 

294 ------- 

295 records : `DatasetRecordStorage` or `None` 

296 The object representing the records for the given dataset type, or 

297 `None` if there are no records for that dataset type. 

298 

299 Notes 

300 ----- 

301 Dataset types registered by another client of the same repository since 

302 the last call to `initialize` or `refresh` may not be found. 

303 """ 

304 raise NotImplementedError() 

305 

306 @abstractmethod 

307 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

308 """Ensure that this `Registry` can hold records for the given 

309 `DatasetType`, creating new tables as necessary. 

310 

311 Parameters 

312 ---------- 

313 datasetType : `DatasetType` 

314 Dataset type for which a table should created (as necessary) and 

315 an associated `DatasetRecordStorage` returned. 

316 

317 Returns 

318 ------- 

319 records : `DatasetRecordStorage` 

320 The object representing the records for the given dataset type. 

321 inserted : `bool` 

322 `True` if the dataset type did not exist in the registry before. 

323 

324 Notes 

325 ----- 

326 This operation may not be invoked within a `Database.transaction` 

327 context. 

328 """ 

329 raise NotImplementedError() 

330 

331 @abstractmethod 

332 def __iter__(self) -> Iterator[DatasetType]: 

333 """Return an iterator over the the dataset types present in this layer. 

334 

335 Notes 

336 ----- 

337 Dataset types registered by another client of the same layer since 

338 the last call to `initialize` or `refresh` may not be included. 

339 """ 

340 raise NotImplementedError() 

341 

342 @abstractmethod 

343 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

344 """Return a `DatasetRef` for the given dataset primary key 

345 value. 

346 

347 Parameters 

348 ---------- 

349 id : `int` 

350 Autoincrement primary key value for the dataset. 

351 

352 Returns 

353 ------- 

354 ref : `DatasetRef` or `None` 

355 Object representing the dataset, or `None` if no dataset with the 

356 given primary key values exists in this layer. 

357 """ 

358 raise NotImplementedError()