Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage") 

25 

26from abc import ABC, abstractmethod 

27from typing import ( 

28 Any, 

29 Iterable, 

30 Iterator, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34) 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetRef, 

39 DatasetType, 

40 ddl, 

41 SimpleQuery, 

42) 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from ...core import DimensionUniverse 

46 from ._database import Database, StaticTablesContext 

47 from ._collections import CollectionManager, CollectionRecord, RunRecord 

48 

49 

50class DatasetRecordStorage(ABC): 

51 """An interface that manages the records associated with a particular 

52 `DatasetType`. 

53 

54 Parameters 

55 ---------- 

56 datasetType : `DatasetType` 

57 Dataset type whose records this object manages. 

58 """ 

59 def __init__(self, datasetType: DatasetType): 

60 self.datasetType = datasetType 

61 

62 @abstractmethod 

63 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: 

64 """Insert one or more dataset entries into the database. 

65 

66 Parameters 

67 ---------- 

68 run : `RunRecord` 

69 The record object describing the `~CollectionType.RUN` collection 

70 this dataset will be associated with. 

71 dataIds : `Iterable` [ `DataCoordinate` ] 

72 Expanded data IDs (`DataCoordinate` instances) for the 

73 datasets to be added. The dimensions of all data IDs must be the 

74 same as ``self.datasetType.dimensions``. 

75 

76 Returns 

77 ------- 

78 datasets : `Iterable` [ `DatasetRef` ] 

79 References to the inserted datasets. 

80 """ 

81 raise NotImplementedError() 

82 

83 @abstractmethod 

84 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

85 """Search a collection for a dataset with the given data ID. 

86 

87 Parameters 

88 ---------- 

89 collection : `CollectionRecord` 

90 The record object describing the collection to search for the 

91 dataset. May have any `CollectionType`. 

92 dataId: `DataCoordinate` 

93 Complete (but not necessarily expanded) data ID to search with, 

94 with ``dataId.graph == self.datasetType.dimensions``. 

95 

96 Returns 

97 ------- 

98 ref : `DatasetRef` 

99 A resolved `DatasetRef` (without components populated), or `None` 

100 if no matching dataset was found. 

101 """ 

102 raise NotImplementedError() 

103 

104 @abstractmethod 

105 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

106 """Fully delete the given datasets from the registry. 

107 

108 Parameters 

109 ---------- 

110 datasets : `Iterable` [ `DatasetRef` ] 

111 Datasets to be deleted. All datasets must be resolved and have 

112 the same `DatasetType` as ``self``. 

113 

114 Raises 

115 ------ 

116 AmbiguousDatasetError 

117 Raised if any of the given `DatasetRef` instances is unresolved. 

118 """ 

119 raise NotImplementedError() 

120 

121 @abstractmethod 

122 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

123 """Associate one or more datasets with a collection. 

124 

125 Parameters 

126 ---------- 

127 collection : `CollectionRecord` 

128 The record object describing the collection. ``collection.type`` 

129 must be `~CollectionType.TAGGED`. 

130 datasets : `Iterable` [ `DatasetRef` ] 

131 Datasets to be associated. All datasets must be resolved and have 

132 the same `DatasetType` as ``self``. 

133 

134 Raises 

135 ------ 

136 AmbiguousDatasetError 

137 Raised if any of the given `DatasetRef` instances is unresolved. 

138 

139 Notes 

140 ----- 

141 Associating a dataset with into collection that already contains a 

142 different dataset with the same `DatasetType` and data ID will remove 

143 the existing dataset from that collection. 

144 

145 Associating the same dataset into a collection multiple times is a 

146 no-op, but is still not permitted on read-only databases. 

147 """ 

148 raise NotImplementedError() 

149 

150 @abstractmethod 

151 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

152 """Remove one or more datasets from a collection. 

153 

154 Parameters 

155 ---------- 

156 collection : `CollectionRecord` 

157 The record object describing the collection. ``collection.type`` 

158 must be `~CollectionType.TAGGED`. 

159 datasets : `Iterable` [ `DatasetRef` ] 

160 Datasets to be disassociated. All datasets must be resolved and 

161 have the same `DatasetType` as ``self``. 

162 

163 Raises 

164 ------ 

165 AmbiguousDatasetError 

166 Raised if any of the given `DatasetRef` instances is unresolved. 

167 """ 

168 raise NotImplementedError() 

169 

170 @abstractmethod 

171 def select(self, collection: CollectionRecord, 

172 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

173 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select, 

174 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

175 ) -> Optional[SimpleQuery]: 

176 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

177 this `DatasetType`. 

178 

179 All arguments can either be a value that constrains the query or 

180 the `SimpleQuery.Select` tag object to indicate that the value should 

181 be returned in the columns in the ``SELECT`` clause. The default is 

182 `SimpleQuery.Select`. 

183 

184 Parameters 

185 ---------- 

186 collection : `CollectionRecord` 

187 The record object describing the collection to query. May not be 

188 of type `CollectionType.CHAINED`. 

189 dataId : `DataCoordinate` or `Select` 

190 The data ID to restrict results with, or an instruction to return 

191 the data ID via columns with names 

192 ``self.datasetType.dimensions.names``. 

193 id : `int`, `Select` or None, 

194 The integer primary key value for the dataset, an instruction to 

195 return it via a ``id`` column, or `None` to ignore it 

196 entirely. 

197 run : `None` or `Select` 

198 If `Select` (default), include the dataset's run key value (as 

199 column labeled with the return value of 

200 ``CollectionManager.getRunForiegnKeyName``). 

201 If `None`, do not include this column (to constrain the run, 

202 pass a `RunRecord` as the ``collection`` argument instead.) 

203 

204 Returns 

205 ------- 

206 query : `SimpleQuery` or `None` 

207 A struct containing the SQLAlchemy object that representing a 

208 simple ``SELECT`` query, or `None` if it is known that there are 

209 no datasets of this `DatasetType` that match the given constraints. 

210 """ 

211 raise NotImplementedError() 

212 

213 datasetType: DatasetType 

214 """Dataset type whose records this object manages (`DatasetType`). 

215 """ 

216 

217 

218class DatasetRecordStorageManager(ABC): 

219 """An interface that manages the tables that describe datasets. 

220 

221 `DatasetRecordStorageManager` primarily serves as a container and factory 

222 for `DatasetRecordStorage` instances, which each provide access to the 

223 records for a different `DatasetType`. 

224 """ 

225 

226 @classmethod 

227 @abstractmethod 

228 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

229 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

230 """Construct an instance of the manager. 

231 

232 Parameters 

233 ---------- 

234 db : `Database` 

235 Interface to the underlying database engine and namespace. 

236 context : `StaticTablesContext` 

237 Context object obtained from `Database.declareStaticTables`; used 

238 to declare any tables that should always be present. 

239 collections: `CollectionManager` 

240 Manager object for the collections in this `Registry`. 

241 universe : `DimensionUniverse` 

242 Universe graph containing all dimensions known to this `Registry`. 

243 

244 Returns 

245 ------- 

246 manager : `DatasetRecordStorageManager` 

247 An instance of a concrete `DatasetRecordStorageManager` subclass. 

248 """ 

249 raise NotImplementedError() 

250 

251 @classmethod 

252 @abstractmethod 

253 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, 

254 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None, 

255 **kwargs: Any) -> ddl.FieldSpec: 

256 """Add a foreign key (field and constraint) referencing the dataset 

257 table. 

258 

259 Parameters 

260 ---------- 

261 tableSpec : `ddl.TableSpec` 

262 Specification for the table that should reference the dataset 

263 table. Will be modified in place. 

264 name: `str`, optional 

265 A name to use for the prefix of the new field; the full name is 

266 ``{name}_id``. 

267 onDelete: `str`, optional 

268 One of "CASCADE" or "SET NULL", indicating what should happen to 

269 the referencing row if the collection row is deleted. `None` 

270 indicates that this should be an integrity error. 

271 constraint: `bool`, optional 

272 If `False` (`True` is default), add a field that can be joined to 

273 the dataset primary key, but do not add a foreign key constraint. 

274 **kwargs 

275 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

276 constructor (only the ``name`` and ``dtype`` arguments are 

277 otherwise provided). 

278 

279 Returns 

280 ------- 

281 idSpec : `ddl.FieldSpec` 

282 Specification for the ID field. 

283 """ 

284 raise NotImplementedError() 

285 

286 @abstractmethod 

287 def refresh(self, *, universe: DimensionUniverse) -> None: 

288 """Ensure all other operations on this manager are aware of any 

289 dataset types that may have been registered by other clients since 

290 it was initialized or last refreshed. 

291 """ 

292 raise NotImplementedError() 

293 

294 @abstractmethod 

295 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

296 """Return an object that provides access to the records associated with 

297 the given `DatasetType`, if one exists. 

298 

299 Parameters 

300 ---------- 

301 name : `str` 

302 Name of the dataset type. 

303 

304 Returns 

305 ------- 

306 records : `DatasetRecordStorage` or `None` 

307 The object representing the records for the given dataset type, or 

308 `None` if there are no records for that dataset type. 

309 

310 Notes 

311 ----- 

312 Dataset types registered by another client of the same repository since 

313 the last call to `initialize` or `refresh` may not be found. 

314 """ 

315 raise NotImplementedError() 

316 

317 @abstractmethod 

318 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

319 """Ensure that this `Registry` can hold records for the given 

320 `DatasetType`, creating new tables as necessary. 

321 

322 Parameters 

323 ---------- 

324 datasetType : `DatasetType` 

325 Dataset type for which a table should created (as necessary) and 

326 an associated `DatasetRecordStorage` returned. 

327 

328 Returns 

329 ------- 

330 records : `DatasetRecordStorage` 

331 The object representing the records for the given dataset type. 

332 inserted : `bool` 

333 `True` if the dataset type did not exist in the registry before. 

334 

335 Notes 

336 ----- 

337 This operation may not be invoked within a `Database.transaction` 

338 context. 

339 """ 

340 raise NotImplementedError() 

341 

342 @abstractmethod 

343 def __iter__(self) -> Iterator[DatasetType]: 

344 """Return an iterator over the the dataset types present in this layer. 

345 

346 Notes 

347 ----- 

348 Dataset types registered by another client of the same layer since 

349 the last call to `initialize` or `refresh` may not be included. 

350 """ 

351 raise NotImplementedError() 

352 

353 @abstractmethod 

354 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

355 """Return a `DatasetRef` for the given dataset primary key 

356 value. 

357 

358 Parameters 

359 ---------- 

360 id : `int` 

361 Autoincrement primary key value for the dataset. 

362 universe : `DimensionUniverse` 

363 All known dimensions. 

364 

365 Returns 

366 ------- 

367 ref : `DatasetRef` or `None` 

368 Object representing the dataset, or `None` if no dataset with the 

369 given primary key values exists in this layer. 

370 """ 

371 raise NotImplementedError()