Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 79%

65 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-28 10:10 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from collections.abc import Iterable 

27from typing import TYPE_CHECKING, Any, ContextManager 

28 

29from lsst.utils.classes import immutable 

30 

31from ...core import DatasetId, DatasetRef 

32from ._versioning import VersionedExtension, VersionTuple 

33 

34if TYPE_CHECKING: 

35 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

36 from ...core.datastore import DatastoreTransaction 

37 from ._database import Database, StaticTablesContext 

38 from ._datasets import DatasetRecordStorageManager 

39 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

40 

41 

42@immutable 

43class FakeDatasetRef: 

44 """A fake `DatasetRef` that can be used internally by butler where 

45 only the dataset ID is available. 

46 

47 Should only be used when registry can not be used to create a full 

48 `DatasetRef` from the ID. A particular use case is during dataset 

49 deletion when solely the ID is available. 

50 

51 Parameters 

52 ---------- 

53 id : `DatasetId` 

54 The dataset ID. 

55 """ 

56 

57 __slots__ = ("id",) 

58 

59 def __init__(self, id: DatasetId): 

60 self.id = id 

61 

62 def __str__(self) -> str: 

63 return f"dataset_id={self.id}" 

64 

65 def __repr__(self) -> str: 

66 return f"FakeDatasetRef({self.id})" 

67 

68 def __eq__(self, other: Any) -> bool: 

69 try: 

70 return self.id == other.id 

71 except AttributeError: 

72 return NotImplemented 

73 

74 def __hash__(self) -> int: 

75 return hash(self.id) 

76 

77 id: DatasetId 

78 """Unique identifier for this dataset. 

79 """ 

80 

81 @property 

82 def datasetType(self) -> DatasetType: 

83 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

84 

85 

86DatasetIdRef = DatasetRef | FakeDatasetRef 

87"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

88""" 

89 

90 

91class DatastoreRegistryBridge(ABC): 

92 """An abstract base class that defines the interface that a `Datastore` 

93 uses to communicate with a `Registry`. 

94 

95 Parameters 

96 ---------- 

97 datastoreName : `str` 

98 Name of the `Datastore` as it should appear in `Registry` tables 

99 referencing it. 

100 """ 

101 

102 def __init__(self, datastoreName: str): 

103 self.datastoreName = datastoreName 

104 

105 @abstractmethod 

106 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

107 """Record that a datastore holds the given datasets. 

108 

109 Parameters 

110 ---------- 

111 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

112 References to the datasets. 

113 """ 

114 raise NotImplementedError() 

115 

116 @abstractmethod 

117 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

118 """Remove dataset location information without any attempt to put it 

119 in the trash while waiting for external deletes. 

120 

121 This should be used only to implement `Datastore.forget`, or in cases 

122 where deleting the actual datastore artifacts cannot fail. 

123 

124 Parameters 

125 ---------- 

126 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

127 References to the datasets. 

128 """ 

129 raise NotImplementedError() 

130 

131 @abstractmethod 

132 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

133 """Move dataset location information to trash. 

134 

135 Parameters 

136 ---------- 

137 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

138 References to the datasets. 

139 transaction : `DatastoreTransaction` or `None` 

140 Transaction object. Can be `None` in some bridges or if no rollback 

141 is required. 

142 """ 

143 raise NotImplementedError() 

144 

145 @abstractmethod 

146 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

147 """Check which refs are listed for this datastore. 

148 

149 Parameters 

150 ---------- 

151 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

152 References to the datasets. 

153 

154 Returns 

155 ------- 

156 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

157 Datasets from ``refs`` that are recorded as being in this 

158 datastore. 

159 """ 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def emptyTrash( 

164 self, 

165 records_table: OpaqueTableStorage | None = None, 

166 record_class: type[StoredDatastoreItemInfo] | None = None, 

167 record_column: str | None = None, 

168 ) -> ContextManager[ 

169 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

170 ]: 

171 """Retrieve all the dataset ref IDs that are in the trash 

172 associated for this datastore, and then remove them if the context 

173 exists without an exception being raised. 

174 

175 Parameters 

176 ---------- 

177 records_table : `OpaqueTableStorage`, optional 

178 Table of records to query with the trash records. 

179 record_class : `type` of `StoredDatastoreItemInfo`, optional 

180 Class to use when reading records from ``records_table``. 

181 record_column : `str`, optional 

182 Name of the column in records_table that refers to the artifact. 

183 

184 Yields 

185 ------ 

186 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

187 The IDs of datasets that can be safely removed from this datastore 

188 and the corresponding information from the records table. 

189 Can be empty. 

190 artifacts_to_keep : `set` of `str`, optional 

191 Any external artifacts that are known to the table but which should 

192 not be deleted. If `None`, the caller should check themselves. 

193 

194 Examples 

195 -------- 

196 Typical usage by a Datastore is something like:: 

197 

198 with self.bridge.emptyTrash() as trashed: 

199 iter, to_keep = trashed 

200 for ref, info in iter: 

201 # Remove artifacts associated with id, 

202 # raise an exception if something goes wrong. 

203 

204 Notes 

205 ----- 

206 The object yielded by the context manager may be a single-pass 

207 iterator. If multiple passes are required, it should be converted to 

208 a `list` or other container. 

209 

210 Datastores should never raise (except perhaps in testing) when an 

211 artifact cannot be removed only because it is already gone - this 

212 condition is an unavoidable outcome of concurrent delete operations, 

213 and must not be considered and error for those to be safe. 

214 

215 If a table record is provided the trashed records will be deleted 

216 when the context manager completes. 

217 """ 

218 raise NotImplementedError() 

219 

220 datastoreName: str 

221 """The name of the `Datastore` as it should appear in `Registry` tables 

222 (`str`). 

223 """ 

224 

225 

226class DatastoreRegistryBridgeManager(VersionedExtension): 

227 """An abstract base class that defines the interface between `Registry` 

228 and `Datastore` when a new `Datastore` is constructed. 

229 

230 Parameters 

231 ---------- 

232 opaque : `OpaqueTableStorageManager` 

233 Manager object for opaque table storage in the `Registry`. 

234 universe : `DimensionUniverse` 

235 All dimensions know to the `Registry`. 

236 datasetIdColumnType : `type` 

237 Type for dataset ID column. 

238 

239 Notes 

240 ----- 

241 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

242 construction, and should use it to obtain and keep any of the following: 

243 

244 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

245 present in the datastore (needed by all datastores that are not just 

246 forwarders); 

247 

248 - one or more `OpaqueTableStorage` instance if they wish to store internal 

249 records in the `Registry` database; 

250 

251 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

252 filename templates. 

253 

254 """ 

255 

256 def __init__( 

257 self, 

258 *, 

259 opaque: OpaqueTableStorageManager, 

260 universe: DimensionUniverse, 

261 datasetIdColumnType: type, 

262 registry_schema_version: VersionTuple | None = None, 

263 ): 

264 super().__init__(registry_schema_version=registry_schema_version) 

265 self.opaque = opaque 

266 self.universe = universe 

267 self.datasetIdColumnType = datasetIdColumnType 

268 

269 @classmethod 

270 @abstractmethod 

271 def initialize( 

272 cls, 

273 db: Database, 

274 context: StaticTablesContext, 

275 *, 

276 opaque: OpaqueTableStorageManager, 

277 datasets: type[DatasetRecordStorageManager], 

278 universe: DimensionUniverse, 

279 registry_schema_version: VersionTuple | None = None, 

280 ) -> DatastoreRegistryBridgeManager: 

281 """Construct an instance of the manager. 

282 

283 Parameters 

284 ---------- 

285 db : `Database` 

286 Interface to the underlying database engine and namespace. 

287 context : `StaticTablesContext` 

288 Context object obtained from `Database.declareStaticTables`; used 

289 to declare any tables that should always be present in a layer 

290 implemented with this manager. 

291 opaque : `OpaqueTableStorageManager` 

292 Registry manager object for opaque (to Registry) tables, provided 

293 to allow Datastores to store their internal information inside the 

294 Registry database. 

295 datasets : subclass of `DatasetRecordStorageManager` 

296 Concrete class that will be used to manage the core dataset tables 

297 in this registry; should be used only to create foreign keys to 

298 those tables. 

299 universe : `DimensionUniverse` 

300 All dimensions known to the registry. 

301 registry_schema_version : `VersionTuple` or `None` 

302 Schema version of this extension as defined in registry. 

303 

304 Returns 

305 ------- 

306 manager : `DatastoreRegistryBridgeManager` 

307 An instance of a concrete `DatastoreRegistryBridgeManager` 

308 subclass. 

309 """ 

310 raise NotImplementedError() 

311 

312 @abstractmethod 

313 def refresh(self) -> None: 

314 """Ensure all other operations on this manager are aware of any 

315 collections that may have been registered by other clients since it 

316 was initialized or last refreshed. 

317 """ 

318 raise NotImplementedError() 

319 

320 @abstractmethod 

321 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

322 """Register a new `Datastore` associated with this `Registry`. 

323 

324 This method should be called by all `Datastore` classes aside from 

325 those that only forward storage to other datastores. 

326 

327 Parameters 

328 ---------- 

329 name : `str` 

330 Name of the datastore, as it should appear in `Registry` tables. 

331 ephemeral : `bool`, optional 

332 If `True` (`False` is default), return a bridge object that is 

333 backed by storage that will not last past the end of the current 

334 process. This should be used whenever the same is true of the 

335 dataset's artifacts. 

336 

337 Returns 

338 ------- 

339 bridge : `DatastoreRegistryBridge` 

340 Object that provides the interface this `Datastore` should use to 

341 communicate with the `Registry`. 

342 """ 

343 raise NotImplementedError() 

344 

345 @abstractmethod 

346 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

347 """Retrieve datastore locations for a given dataset. 

348 

349 Parameters 

350 ---------- 

351 ref : `DatasetIdRef` 

352 A reference to the dataset for which to retrieve storage 

353 information. 

354 

355 Returns 

356 ------- 

357 datastores : `~collections.abc.Iterable` [ `str` ] 

358 All the matching datastores holding this dataset. Empty if the 

359 dataset does not exist anywhere. 

360 

361 Raises 

362 ------ 

363 AmbiguousDatasetError 

364 Raised if ``ref.id`` is `None`. 

365 """ 

366 raise NotImplementedError() 

367 

368 opaque: OpaqueTableStorageManager 

369 """Registry manager object for opaque (to Registry) tables, provided 

370 to allow Datastores to store their internal information inside the 

371 Registry database. 

372 """ 

373 

374 universe: DimensionUniverse 

375 """All dimensions known to the `Registry`. 

376 """