Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 SimpleQuery, 

42) 

43from ._versioning import VersionedExtension 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from ...core import DimensionUniverse 

47 from ._database import Database, StaticTablesContext 

48 from ._collections import CollectionManager, CollectionRecord, RunRecord 

49 

50 

51class DatasetRecordStorage(ABC): 

52 """An interface that manages the records associated with a particular 

53 `DatasetType`. 

54 

55 Parameters 

56 ---------- 

57 datasetType : `DatasetType` 

58 Dataset type whose records this object manages. 

59 """ 

60 def __init__(self, datasetType: DatasetType): 

61 self.datasetType = datasetType 

62 

63 @abstractmethod 

64 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: 

65 """Insert one or more dataset entries into the database. 

66 

67 Parameters 

68 ---------- 

69 run : `RunRecord` 

70 The record object describing the `~CollectionType.RUN` collection 

71 this dataset will be associated with. 

72 dataIds : `Iterable` [ `DataCoordinate` ] 

73 Expanded data IDs (`DataCoordinate` instances) for the 

74 datasets to be added. The dimensions of all data IDs must be the 

75 same as ``self.datasetType.dimensions``. 

76 

77 Returns 

78 ------- 

79 datasets : `Iterable` [ `DatasetRef` ] 

80 References to the inserted datasets. 

81 """ 

82 raise NotImplementedError() 

83 

84 @abstractmethod 

85 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

86 """Search a collection for a dataset with the given data ID. 

87 

88 Parameters 

89 ---------- 

90 collection : `CollectionRecord` 

91 The record object describing the collection to search for the 

92 dataset. May have any `CollectionType`. 

93 dataId: `DataCoordinate` 

94 Complete (but not necessarily expanded) data ID to search with, 

95 with ``dataId.graph == self.datasetType.dimensions``. 

96 

97 Returns 

98 ------- 

99 ref : `DatasetRef` 

100 A resolved `DatasetRef` (without components populated), or `None` 

101 if no matching dataset was found. 

102 """ 

103 raise NotImplementedError() 

104 

105 @abstractmethod 

106 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

107 """Fully delete the given datasets from the registry. 

108 

109 Parameters 

110 ---------- 

111 datasets : `Iterable` [ `DatasetRef` ] 

112 Datasets to be deleted. All datasets must be resolved and have 

113 the same `DatasetType` as ``self``. 

114 

115 Raises 

116 ------ 

117 AmbiguousDatasetError 

118 Raised if any of the given `DatasetRef` instances is unresolved. 

119 """ 

120 raise NotImplementedError() 

121 

122 @abstractmethod 

123 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

124 """Associate one or more datasets with a collection. 

125 

126 Parameters 

127 ---------- 

128 collection : `CollectionRecord` 

129 The record object describing the collection. ``collection.type`` 

130 must be `~CollectionType.TAGGED`. 

131 datasets : `Iterable` [ `DatasetRef` ] 

132 Datasets to be associated. All datasets must be resolved and have 

133 the same `DatasetType` as ``self``. 

134 

135 Raises 

136 ------ 

137 AmbiguousDatasetError 

138 Raised if any of the given `DatasetRef` instances is unresolved. 

139 

140 Notes 

141 ----- 

142 Associating a dataset with into collection that already contains a 

143 different dataset with the same `DatasetType` and data ID will remove 

144 the existing dataset from that collection. 

145 

146 Associating the same dataset into a collection multiple times is a 

147 no-op, but is still not permitted on read-only databases. 

148 """ 

149 raise NotImplementedError() 

150 

151 @abstractmethod 

152 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

153 """Remove one or more datasets from a collection. 

154 

155 Parameters 

156 ---------- 

157 collection : `CollectionRecord` 

158 The record object describing the collection. ``collection.type`` 

159 must be `~CollectionType.TAGGED`. 

160 datasets : `Iterable` [ `DatasetRef` ] 

161 Datasets to be disassociated. All datasets must be resolved and 

162 have the same `DatasetType` as ``self``. 

163 

164 Raises 

165 ------ 

166 AmbiguousDatasetError 

167 Raised if any of the given `DatasetRef` instances is unresolved. 

168 """ 

169 raise NotImplementedError() 

170 

171 @abstractmethod 

172 def select(self, collection: CollectionRecord, 

173 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

174 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select, 

175 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

176 ) -> Optional[SimpleQuery]: 

177 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

178 this `DatasetType`. 

179 

180 All arguments can either be a value that constrains the query or 

181 the `SimpleQuery.Select` tag object to indicate that the value should 

182 be returned in the columns in the ``SELECT`` clause. The default is 

183 `SimpleQuery.Select`. 

184 

185 Parameters 

186 ---------- 

187 collection : `CollectionRecord` 

188 The record object describing the collection to query. May not be 

189 of type `CollectionType.CHAINED`. 

190 dataId : `DataCoordinate` or `Select` 

191 The data ID to restrict results with, or an instruction to return 

192 the data ID via columns with names 

193 ``self.datasetType.dimensions.names``. 

194 id : `int`, `Select` or None, 

195 The integer primary key value for the dataset, an instruction to 

196 return it via a ``id`` column, or `None` to ignore it 

197 entirely. 

198 run : `None` or `Select` 

199 If `Select` (default), include the dataset's run key value (as 

200 column labeled with the return value of 

201 ``CollectionManager.getRunForiegnKeyName``). 

202 If `None`, do not include this column (to constrain the run, 

203 pass a `RunRecord` as the ``collection`` argument instead.) 

204 

205 Returns 

206 ------- 

207 query : `SimpleQuery` or `None` 

208 A struct containing the SQLAlchemy object that representing a 

209 simple ``SELECT`` query, or `None` if it is known that there are 

210 no datasets of this `DatasetType` that match the given constraints. 

211 """ 

212 raise NotImplementedError() 

213 

214 datasetType: DatasetType 

215 """Dataset type whose records this object manages (`DatasetType`). 

216 """ 

217 

218 

219class DatasetRecordStorageManager(VersionedExtension): 

220 """An interface that manages the tables that describe datasets. 

221 

222 `DatasetRecordStorageManager` primarily serves as a container and factory 

223 for `DatasetRecordStorage` instances, which each provide access to the 

224 records for a different `DatasetType`. 

225 """ 

226 

227 @classmethod 

228 @abstractmethod 

229 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

230 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

231 """Construct an instance of the manager. 

232 

233 Parameters 

234 ---------- 

235 db : `Database` 

236 Interface to the underlying database engine and namespace. 

237 context : `StaticTablesContext` 

238 Context object obtained from `Database.declareStaticTables`; used 

239 to declare any tables that should always be present. 

240 collections: `CollectionManager` 

241 Manager object for the collections in this `Registry`. 

242 universe : `DimensionUniverse` 

243 Universe graph containing all dimensions known to this `Registry`. 

244 

245 Returns 

246 ------- 

247 manager : `DatasetRecordStorageManager` 

248 An instance of a concrete `DatasetRecordStorageManager` subclass. 

249 """ 

250 raise NotImplementedError() 

251 

252 @classmethod 

253 @abstractmethod 

254 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

255 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

256 **kwargs: Any) -> ddl.FieldSpec: 

257 """Add a foreign key (field and constraint) referencing the dataset 

258 table. 

259 

260 Parameters 

261 ---------- 

262 tableSpec : `ddl.TableSpec` 

263 Specification for the table that should reference the dataset 

264 table. Will be modified in place. 

265 name: `str`, optional 

266 A name to use for the prefix of the new field; the full name is 

267 ``{name}_id``. 

268 onDelete: `str`, optional 

269 One of "CASCADE" or "SET NULL", indicating what should happen to 

270 the referencing row if the collection row is deleted. `None` 

271 indicates that this should be an integrity error. 

272 constraint: `bool`, optional 

273 If `False` (`True` is default), add a field that can be joined to 

274 the dataset primary key, but do not add a foreign key constraint. 

275 **kwargs 

276 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

277 constructor (only the ``name`` and ``dtype`` arguments are 

278 otherwise provided). 

279 

280 Returns 

281 ------- 

282 idSpec : `ddl.FieldSpec` 

283 Specification for the ID field. 

284 """ 

285 raise NotImplementedError() 

286 

287 @abstractmethod 

288 def refresh(self, *, universe: DimensionUniverse) -> None: 

289 """Ensure all other operations on this manager are aware of any 

290 dataset types that may have been registered by other clients since 

291 it was initialized or last refreshed. 

292 """ 

293 raise NotImplementedError() 

294 

295 @abstractmethod 

296 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

297 """Return an object that provides access to the records associated with 

298 the given `DatasetType`, if one exists. 

299 

300 Parameters 

301 ---------- 

302 name : `str` 

303 Name of the dataset type. 

304 

305 Returns 

306 ------- 

307 records : `DatasetRecordStorage` or `None` 

308 The object representing the records for the given dataset type, or 

309 `None` if there are no records for that dataset type. 

310 

311 Notes 

312 ----- 

313 Dataset types registered by another client of the same repository since 

314 the last call to `initialize` or `refresh` may not be found. 

315 """ 

316 raise NotImplementedError() 

317 

318 @abstractmethod 

319 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

320 """Ensure that this `Registry` can hold records for the given 

321 `DatasetType`, creating new tables as necessary. 

322 

323 Parameters 

324 ---------- 

325 datasetType : `DatasetType` 

326 Dataset type for which a table should created (as necessary) and 

327 an associated `DatasetRecordStorage` returned. 

328 

329 Returns 

330 ------- 

331 records : `DatasetRecordStorage` 

332 The object representing the records for the given dataset type. 

333 inserted : `bool` 

334 `True` if the dataset type did not exist in the registry before. 

335 

336 Notes 

337 ----- 

338 This operation may not be invoked within a `Database.transaction` 

339 context. 

340 """ 

341 raise NotImplementedError() 

342 

343 @abstractmethod 

344 def __iter__(self) -> Iterator[DatasetType]: 

345 """Return an iterator over the the dataset types present in this layer. 

346 

347 Notes 

348 ----- 

349 Dataset types registered by another client of the same layer since 

350 the last call to `initialize` or `refresh` may not be included. 

351 """ 

352 raise NotImplementedError() 

353 

354 @abstractmethod 

355 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

356 """Return a `DatasetRef` for the given dataset primary key 

357 value. 

358 

359 Parameters 

360 ---------- 

361 id : `int` 

362 Autoincrement primary key value for the dataset. 

363 universe : `DimensionUniverse` 

364 All known dimensions. 

365 

366 Returns 

367 ------- 

368 ref : `DatasetRef` or `None` 

369 Object representing the dataset, or `None` if no dataset with the 

370 given primary key values exists in this layer. 

371 """ 

372 raise NotImplementedError()