Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 SimpleQuery, 

42) 

43from ._versioning import VersionedExtension 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from ...core import DimensionUniverse 

47 from ._database import Database, StaticTablesContext 

48 from ._collections import CollectionManager, CollectionRecord, RunRecord 

49 

50 

51class DatasetRecordStorage(ABC): 

52 """An interface that manages the records associated with a particular 

53 `DatasetType`. 

54 

55 Parameters 

56 ---------- 

57 datasetType : `DatasetType` 

58 Dataset type whose records this object manages. 

59 """ 

60 def __init__(self, datasetType: DatasetType): 

61 self.datasetType = datasetType 

62 

63 @abstractmethod 

64 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: 

65 """Insert one or more dataset entries into the database. 

66 

67 Parameters 

68 ---------- 

69 run : `RunRecord` 

70 The record object describing the `~CollectionType.RUN` collection 

71 this dataset will be associated with. 

72 dataIds : `Iterable` [ `DataCoordinate` ] 

73 Expanded data IDs (`DataCoordinate` instances) for the 

74 datasets to be added. The dimensions of all data IDs must be the 

75 same as ``self.datasetType.dimensions``. 

76 

77 Returns 

78 ------- 

79 datasets : `Iterable` [ `DatasetRef` ] 

80 References to the inserted datasets. 

81 """ 

82 raise NotImplementedError() 

83 

84 @abstractmethod 

85 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

86 """Search a collection for a dataset with the given data ID. 

87 

88 Parameters 

89 ---------- 

90 collection : `CollectionRecord` 

91 The record object describing the collection to search for the 

92 dataset. May have any `CollectionType`. 

93 dataId: `DataCoordinate` 

94 Complete (but not necessarily expanded) data ID to search with, 

95 with ``dataId.graph == self.datasetType.dimensions``. 

96 

97 Returns 

98 ------- 

99 ref : `DatasetRef` 

100 A resolved `DatasetRef` (without components populated), or `None` 

101 if no matching dataset was found. 

102 """ 

103 raise NotImplementedError() 

104 

105 @abstractmethod 

106 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

107 """Fully delete the given datasets from the registry. 

108 

109 Parameters 

110 ---------- 

111 datasets : `Iterable` [ `DatasetRef` ] 

112 Datasets to be deleted. All datasets must be resolved and have 

113 the same `DatasetType` as ``self``. 

114 

115 Raises 

116 ------ 

117 AmbiguousDatasetError 

118 Raised if any of the given `DatasetRef` instances is unresolved. 

119 """ 

120 raise NotImplementedError() 

121 

122 @abstractmethod 

123 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

124 """Associate one or more datasets with a collection. 

125 

126 Parameters 

127 ---------- 

128 collection : `CollectionRecord` 

129 The record object describing the collection. ``collection.type`` 

130 must be `~CollectionType.TAGGED`. 

131 datasets : `Iterable` [ `DatasetRef` ] 

132 Datasets to be associated. All datasets must be resolved and have 

133 the same `DatasetType` as ``self``. 

134 

135 Raises 

136 ------ 

137 AmbiguousDatasetError 

138 Raised if any of the given `DatasetRef` instances is unresolved. 

139 

140 Notes 

141 ----- 

142 Associating a dataset with into collection that already contains a 

143 different dataset with the same `DatasetType` and data ID will remove 

144 the existing dataset from that collection. 

145 

146 Associating the same dataset into a collection multiple times is a 

147 no-op, but is still not permitted on read-only databases. 

148 """ 

149 raise NotImplementedError() 

150 

151 @abstractmethod 

152 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

153 """Remove one or more datasets from a collection. 

154 

155 Parameters 

156 ---------- 

157 collection : `CollectionRecord` 

158 The record object describing the collection. ``collection.type`` 

159 must be `~CollectionType.TAGGED`. 

160 datasets : `Iterable` [ `DatasetRef` ] 

161 Datasets to be disassociated. All datasets must be resolved and 

162 have the same `DatasetType` as ``self``. 

163 

164 Raises 

165 ------ 

166 AmbiguousDatasetError 

167 Raised if any of the given `DatasetRef` instances is unresolved. 

168 """ 

169 raise NotImplementedError() 

170 

171 @abstractmethod 

172 def select(self, collection: CollectionRecord, 

173 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

174 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select, 

175 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

176 ) -> Optional[SimpleQuery]: 

177 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

178 this `DatasetType`. 

179 

180 All arguments can either be a value that constrains the query or 

181 the `SimpleQuery.Select` tag object to indicate that the value should 

182 be returned in the columns in the ``SELECT`` clause. The default is 

183 `SimpleQuery.Select`. 

184 

185 Parameters 

186 ---------- 

187 collection : `CollectionRecord` 

188 The record object describing the collection to query. May not be 

189 of type `CollectionType.CHAINED`. 

190 dataId : `DataCoordinate` or `Select` 

191 The data ID to restrict results with, or an instruction to return 

192 the data ID via columns with names 

193 ``self.datasetType.dimensions.names``. 

194 id : `int`, `Select` or None, 

195 The integer primary key value for the dataset, an instruction to 

196 return it via a ``id`` column, or `None` to ignore it 

197 entirely. 

198 run : `None` or `Select` 

199 If `Select` (default), include the dataset's run key value (as 

200 column labeled with the return value of 

201 ``CollectionManager.getRunForiegnKeyName``). 

202 If `None`, do not include this column (to constrain the run, 

203 pass a `RunRecord` as the ``collection`` argument instead.) 

204 

205 Returns 

206 ------- 

207 query : `SimpleQuery` or `None` 

208 A struct containing the SQLAlchemy object that representing a 

209 simple ``SELECT`` query, or `None` if it is known that there are 

210 no datasets of this `DatasetType` that match the given constraints. 

211 """ 

212 raise NotImplementedError() 

213 

214 datasetType: DatasetType 

215 """Dataset type whose records this object manages (`DatasetType`). 

216 """ 

217 

218 

219class DatasetRecordStorageManager(VersionedExtension): 

220 """An interface that manages the tables that describe datasets. 

221 

222 `DatasetRecordStorageManager` primarily serves as a container and factory 

223 for `DatasetRecordStorage` instances, which each provide access to the 

224 records for a different `DatasetType`. 

225 """ 

226 

227 @classmethod 

228 @abstractmethod 

229 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

230 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

231 """Construct an instance of the manager. 

232 

233 Parameters 

234 ---------- 

235 db : `Database` 

236 Interface to the underlying database engine and namespace. 

237 context : `StaticTablesContext` 

238 Context object obtained from `Database.declareStaticTables`; used 

239 to declare any tables that should always be present. 

240 collections: `CollectionManager` 

241 Manager object for the collections in this `Registry`. 

242 universe : `DimensionUniverse` 

243 Universe graph containing all dimensions known to this `Registry`. 

244 

245 Returns 

246 ------- 

247 manager : `DatasetRecordStorageManager` 

248 An instance of a concrete `DatasetRecordStorageManager` subclass. 

249 """ 

250 raise NotImplementedError() 

251 

252 @classmethod 

253 @abstractmethod 

254 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

255 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

256 **kwargs: Any) -> ddl.FieldSpec: 

257 """Add a foreign key (field and constraint) referencing the dataset 

258 table. 

259 

260 Parameters 

261 ---------- 

262 tableSpec : `ddl.TableSpec` 

263 Specification for the table that should reference the dataset 

264 table. Will be modified in place. 

265 name: `str`, optional 

266 A name to use for the prefix of the new field; the full name is 

267 ``{name}_id``. 

268 onDelete: `str`, optional 

269 One of "CASCADE" or "SET NULL", indicating what should happen to 

270 the referencing row if the collection row is deleted. `None` 

271 indicates that this should be an integrity error. 

272 constraint: `bool`, optional 

273 If `False` (`True` is default), add a field that can be joined to 

274 the dataset primary key, but do not add a foreign key constraint. 

275 **kwargs 

276 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

277 constructor (only the ``name`` and ``dtype`` arguments are 

278 otherwise provided). 

279 

280 Returns 

281 ------- 

282 idSpec : `ddl.FieldSpec` 

283 Specification for the ID field. 

284 """ 

285 raise NotImplementedError() 

286 

287 @abstractmethod 

288 def refresh(self, *, universe: DimensionUniverse) -> None: 

289 """Ensure all other operations on this manager are aware of any 

290 dataset types that may have been registered by other clients since 

291 it was initialized or last refreshed. 

292 """ 

293 raise NotImplementedError() 

294 

295 def __getitem__(self, name: str) -> DatasetRecordStorage: 

296 """Return the object that provides access to the records associated 

297 with the given `DatasetType` name. 

298 

299 This is simply a convenience wrapper for `find` that raises `KeyError` 

300 when the dataset type is not found. 

301 

302 Returns 

303 ------- 

304 records : `DatasetRecordStorage` 

305 The object representing the records for the given dataset type. 

306 

307 Raises 

308 ------ 

309 KeyError 

310 Raised if there is no dataset type with the given name. 

311 

312 Notes 

313 ----- 

314 Dataset types registered by another client of the same repository since 

315 the last call to `initialize` or `refresh` may not be found. 

316 """ 

317 result = self.find(name) 

318 if result is None: 

319 raise KeyError(f"Dataset type with name '{name}' not found.") 

320 return result 

321 

322 @abstractmethod 

323 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

324 """Return an object that provides access to the records associated with 

325 the given `DatasetType` name, if one exists. 

326 

327 Parameters 

328 ---------- 

329 name : `str` 

330 Name of the dataset type. 

331 

332 Returns 

333 ------- 

334 records : `DatasetRecordStorage` or `None` 

335 The object representing the records for the given dataset type, or 

336 `None` if there are no records for that dataset type. 

337 

338 Notes 

339 ----- 

340 Dataset types registered by another client of the same repository since 

341 the last call to `initialize` or `refresh` may not be found. 

342 """ 

343 raise NotImplementedError() 

344 

345 @abstractmethod 

346 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

347 """Ensure that this `Registry` can hold records for the given 

348 `DatasetType`, creating new tables as necessary. 

349 

350 Parameters 

351 ---------- 

352 datasetType : `DatasetType` 

353 Dataset type for which a table should created (as necessary) and 

354 an associated `DatasetRecordStorage` returned. 

355 

356 Returns 

357 ------- 

358 records : `DatasetRecordStorage` 

359 The object representing the records for the given dataset type. 

360 inserted : `bool` 

361 `True` if the dataset type did not exist in the registry before. 

362 

363 Notes 

364 ----- 

365 This operation may not be invoked within a `Database.transaction` 

366 context. 

367 """ 

368 raise NotImplementedError() 

369 

370 @abstractmethod 

371 def __iter__(self) -> Iterator[DatasetType]: 

372 """Return an iterator over the the dataset types present in this layer. 

373 

374 Notes 

375 ----- 

376 Dataset types registered by another client of the same layer since 

377 the last call to `initialize` or `refresh` may not be included. 

378 """ 

379 raise NotImplementedError() 

380 

381 @abstractmethod 

382 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

383 """Return a `DatasetRef` for the given dataset primary key 

384 value. 

385 

386 Parameters 

387 ---------- 

388 id : `int` 

389 Autoincrement primary key value for the dataset. 

390 universe : `DimensionUniverse` 

391 All known dimensions. 

392 

393 Returns 

394 ------- 

395 ref : `DatasetRef` or `None` 

396 Object representing the dataset, or `None` if no dataset with the 

397 given primary key values exists in this layer. 

398 """ 

399 raise NotImplementedError()