Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

66 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from collections.abc import Iterable 

27from contextlib import AbstractContextManager 

28from typing import TYPE_CHECKING, Any 

29 

30from lsst.utils.classes import immutable 

31 

32from ...core import DatasetId, DatasetRef 

33from ._versioning import VersionedExtension, VersionTuple 

34 

35if TYPE_CHECKING: 

36 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

37 from ...core.datastore import DatastoreTransaction 

38 from ._database import Database, StaticTablesContext 

39 from ._datasets import DatasetRecordStorageManager 

40 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

41 

42 

43@immutable 

44class FakeDatasetRef: 

45 """A fake `DatasetRef` that can be used internally by butler where 

46 only the dataset ID is available. 

47 

48 Should only be used when registry can not be used to create a full 

49 `DatasetRef` from the ID. A particular use case is during dataset 

50 deletion when solely the ID is available. 

51 

52 Parameters 

53 ---------- 

54 id : `DatasetId` 

55 The dataset ID. 

56 """ 

57 

58 __slots__ = ("id",) 

59 

60 def __init__(self, id: DatasetId): 

61 self.id = id 

62 

63 def __str__(self) -> str: 

64 return f"dataset_id={self.id}" 

65 

66 def __repr__(self) -> str: 

67 return f"FakeDatasetRef({self.id})" 

68 

69 def __eq__(self, other: Any) -> bool: 

70 try: 

71 return self.id == other.id 

72 except AttributeError: 

73 return NotImplemented 

74 

75 def __hash__(self) -> int: 

76 return hash(self.id) 

77 

78 id: DatasetId 

79 """Unique identifier for this dataset. 

80 """ 

81 

82 @property 

83 def datasetType(self) -> DatasetType: 

84 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

85 

86 

87DatasetIdRef = DatasetRef | FakeDatasetRef 

88"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

89""" 

90 

91 

92class DatastoreRegistryBridge(ABC): 

93 """An abstract base class that defines the interface that a `Datastore` 

94 uses to communicate with a `Registry`. 

95 

96 Parameters 

97 ---------- 

98 datastoreName : `str` 

99 Name of the `Datastore` as it should appear in `Registry` tables 

100 referencing it. 

101 """ 

102 

103 def __init__(self, datastoreName: str): 

104 self.datastoreName = datastoreName 

105 

106 @abstractmethod 

107 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

108 """Record that a datastore holds the given datasets. 

109 

110 Parameters 

111 ---------- 

112 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

113 References to the datasets. 

114 """ 

115 raise NotImplementedError() 

116 

117 @abstractmethod 

118 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

119 """Remove dataset location information without any attempt to put it 

120 in the trash while waiting for external deletes. 

121 

122 This should be used only to implement `Datastore.forget`, or in cases 

123 where deleting the actual datastore artifacts cannot fail. 

124 

125 Parameters 

126 ---------- 

127 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

128 References to the datasets. 

129 """ 

130 raise NotImplementedError() 

131 

132 @abstractmethod 

133 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

134 """Move dataset location information to trash. 

135 

136 Parameters 

137 ---------- 

138 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

139 References to the datasets. 

140 transaction : `DatastoreTransaction` or `None` 

141 Transaction object. Can be `None` in some bridges or if no rollback 

142 is required. 

143 """ 

144 raise NotImplementedError() 

145 

146 @abstractmethod 

147 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

148 """Check which refs are listed for this datastore. 

149 

150 Parameters 

151 ---------- 

152 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

153 References to the datasets. 

154 

155 Returns 

156 ------- 

157 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

158 Datasets from ``refs`` that are recorded as being in this 

159 datastore. 

160 """ 

161 raise NotImplementedError() 

162 

163 @abstractmethod 

164 def emptyTrash( 

165 self, 

166 records_table: OpaqueTableStorage | None = None, 

167 record_class: type[StoredDatastoreItemInfo] | None = None, 

168 record_column: str | None = None, 

169 ) -> AbstractContextManager[ 

170 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

171 ]: 

172 """Retrieve all the dataset ref IDs that are in the trash 

173 associated for this datastore, and then remove them if the context 

174 exists without an exception being raised. 

175 

176 Parameters 

177 ---------- 

178 records_table : `OpaqueTableStorage`, optional 

179 Table of records to query with the trash records. 

180 record_class : `type` of `StoredDatastoreItemInfo`, optional 

181 Class to use when reading records from ``records_table``. 

182 record_column : `str`, optional 

183 Name of the column in records_table that refers to the artifact. 

184 

185 Yields 

186 ------ 

187 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

188 The IDs of datasets that can be safely removed from this datastore 

189 and the corresponding information from the records table. 

190 Can be empty. 

191 artifacts_to_keep : `set` of `str`, optional 

192 Any external artifacts that are known to the table but which should 

193 not be deleted. If `None`, the caller should check themselves. 

194 

195 Examples 

196 -------- 

197 Typical usage by a Datastore is something like:: 

198 

199 with self.bridge.emptyTrash() as trashed: 

200 iter, to_keep = trashed 

201 for ref, info in iter: 

202 # Remove artifacts associated with id, 

203 # raise an exception if something goes wrong. 

204 

205 Notes 

206 ----- 

207 The object yielded by the context manager may be a single-pass 

208 iterator. If multiple passes are required, it should be converted to 

209 a `list` or other container. 

210 

211 Datastores should never raise (except perhaps in testing) when an 

212 artifact cannot be removed only because it is already gone - this 

213 condition is an unavoidable outcome of concurrent delete operations, 

214 and must not be considered and error for those to be safe. 

215 

216 If a table record is provided the trashed records will be deleted 

217 when the context manager completes. 

218 """ 

219 raise NotImplementedError() 

220 

221 datastoreName: str 

222 """The name of the `Datastore` as it should appear in `Registry` tables 

223 (`str`). 

224 """ 

225 

226 

227class DatastoreRegistryBridgeManager(VersionedExtension): 

228 """An abstract base class that defines the interface between `Registry` 

229 and `Datastore` when a new `Datastore` is constructed. 

230 

231 Parameters 

232 ---------- 

233 opaque : `OpaqueTableStorageManager` 

234 Manager object for opaque table storage in the `Registry`. 

235 universe : `DimensionUniverse` 

236 All dimensions know to the `Registry`. 

237 datasetIdColumnType : `type` 

238 Type for dataset ID column. 

239 

240 Notes 

241 ----- 

242 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

243 construction, and should use it to obtain and keep any of the following: 

244 

245 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

246 present in the datastore (needed by all datastores that are not just 

247 forwarders); 

248 

249 - one or more `OpaqueTableStorage` instance if they wish to store internal 

250 records in the `Registry` database; 

251 

252 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

253 filename templates. 

254 

255 """ 

256 

257 def __init__( 

258 self, 

259 *, 

260 opaque: OpaqueTableStorageManager, 

261 universe: DimensionUniverse, 

262 datasetIdColumnType: type, 

263 registry_schema_version: VersionTuple | None = None, 

264 ): 

265 super().__init__(registry_schema_version=registry_schema_version) 

266 self.opaque = opaque 

267 self.universe = universe 

268 self.datasetIdColumnType = datasetIdColumnType 

269 

270 @classmethod 

271 @abstractmethod 

272 def initialize( 

273 cls, 

274 db: Database, 

275 context: StaticTablesContext, 

276 *, 

277 opaque: OpaqueTableStorageManager, 

278 datasets: type[DatasetRecordStorageManager], 

279 universe: DimensionUniverse, 

280 registry_schema_version: VersionTuple | None = None, 

281 ) -> DatastoreRegistryBridgeManager: 

282 """Construct an instance of the manager. 

283 

284 Parameters 

285 ---------- 

286 db : `Database` 

287 Interface to the underlying database engine and namespace. 

288 context : `StaticTablesContext` 

289 Context object obtained from `Database.declareStaticTables`; used 

290 to declare any tables that should always be present in a layer 

291 implemented with this manager. 

292 opaque : `OpaqueTableStorageManager` 

293 Registry manager object for opaque (to Registry) tables, provided 

294 to allow Datastores to store their internal information inside the 

295 Registry database. 

296 datasets : subclass of `DatasetRecordStorageManager` 

297 Concrete class that will be used to manage the core dataset tables 

298 in this registry; should be used only to create foreign keys to 

299 those tables. 

300 universe : `DimensionUniverse` 

301 All dimensions known to the registry. 

302 registry_schema_version : `VersionTuple` or `None` 

303 Schema version of this extension as defined in registry. 

304 

305 Returns 

306 ------- 

307 manager : `DatastoreRegistryBridgeManager` 

308 An instance of a concrete `DatastoreRegistryBridgeManager` 

309 subclass. 

310 """ 

311 raise NotImplementedError() 

312 

313 @abstractmethod 

314 def refresh(self) -> None: 

315 """Ensure all other operations on this manager are aware of any 

316 collections that may have been registered by other clients since it 

317 was initialized or last refreshed. 

318 """ 

319 raise NotImplementedError() 

320 

321 @abstractmethod 

322 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

323 """Register a new `Datastore` associated with this `Registry`. 

324 

325 This method should be called by all `Datastore` classes aside from 

326 those that only forward storage to other datastores. 

327 

328 Parameters 

329 ---------- 

330 name : `str` 

331 Name of the datastore, as it should appear in `Registry` tables. 

332 ephemeral : `bool`, optional 

333 If `True` (`False` is default), return a bridge object that is 

334 backed by storage that will not last past the end of the current 

335 process. This should be used whenever the same is true of the 

336 dataset's artifacts. 

337 

338 Returns 

339 ------- 

340 bridge : `DatastoreRegistryBridge` 

341 Object that provides the interface this `Datastore` should use to 

342 communicate with the `Registry`. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

348 """Retrieve datastore locations for a given dataset. 

349 

350 Parameters 

351 ---------- 

352 ref : `DatasetIdRef` 

353 A reference to the dataset for which to retrieve storage 

354 information. 

355 

356 Returns 

357 ------- 

358 datastores : `~collections.abc.Iterable` [ `str` ] 

359 All the matching datastores holding this dataset. Empty if the 

360 dataset does not exist anywhere. 

361 

362 Raises 

363 ------ 

364 AmbiguousDatasetError 

365 Raised if ``ref.id`` is `None`. 

366 """ 

367 raise NotImplementedError() 

368 

369 opaque: OpaqueTableStorageManager 

370 """Registry manager object for opaque (to Registry) tables, provided 

371 to allow Datastores to store their internal information inside the 

372 Registry database. 

373 """ 

374 

375 universe: DimensionUniverse 

376 """All dimensions known to the `Registry`. 

377 """