Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 79%

64 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 02:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import TYPE_CHECKING, Any, ContextManager, Iterable, Optional, Set, Tuple, Type, Union 

27 

28from lsst.utils.classes import immutable 

29 

30from ...core import DatasetId, DatasetRef 

31from ._versioning import VersionedExtension, VersionTuple 

32 

33if TYPE_CHECKING: 

34 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

35 from ...core.datastore import DatastoreTransaction 

36 from ._database import Database, StaticTablesContext 

37 from ._datasets import DatasetRecordStorageManager 

38 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

39 

40 

41@immutable 

42class FakeDatasetRef: 

43 """A fake `DatasetRef` that can be used internally by butler where 

44 only the dataset ID is available. 

45 

46 Should only be used when registry can not be used to create a full 

47 `DatasetRef` from the ID. A particular use case is during dataset 

48 deletion when solely the ID is available. 

49 

50 Parameters 

51 ---------- 

52 id : `DatasetId` 

53 The dataset ID. 

54 """ 

55 

56 __slots__ = ("id",) 

57 

58 def __init__(self, id: DatasetId): 

59 self.id = id 

60 

61 def __str__(self) -> str: 

62 return f"dataset_id={self.id}" 

63 

64 def __repr__(self) -> str: 

65 return f"FakeDatasetRef({self.id})" 

66 

67 def __eq__(self, other: Any) -> bool: 

68 try: 

69 return self.id == other.id 

70 except AttributeError: 

71 return NotImplemented 

72 

73 def __hash__(self) -> int: 

74 return hash(self.id) 

75 

76 id: DatasetId 

77 """Unique identifier for this dataset. 

78 """ 

79 

80 @property 

81 def datasetType(self) -> DatasetType: 

82 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

83 

84 

85DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

86"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

87""" 

88 

89 

90class DatastoreRegistryBridge(ABC): 

91 """An abstract base class that defines the interface that a `Datastore` 

92 uses to communicate with a `Registry`. 

93 

94 Parameters 

95 ---------- 

96 datastoreName : `str` 

97 Name of the `Datastore` as it should appear in `Registry` tables 

98 referencing it. 

99 """ 

100 

101 def __init__(self, datastoreName: str): 

102 self.datastoreName = datastoreName 

103 

104 @abstractmethod 

105 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

106 """Record that a datastore holds the given datasets. 

107 

108 Parameters 

109 ---------- 

110 refs : `Iterable` of `DatasetIdRef` 

111 References to the datasets. 

112 """ 

113 raise NotImplementedError() 

114 

115 @abstractmethod 

116 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

117 """Remove dataset location information without any attempt to put it 

118 in the trash while waiting for external deletes. 

119 

120 This should be used only to implement `Datastore.forget`, or in cases 

121 where deleting the actual datastore artifacts cannot fail. 

122 

123 Parameters 

124 ---------- 

125 refs : `Iterable` of `DatasetIdRef` 

126 References to the datasets. 

127 """ 

128 raise NotImplementedError() 

129 

130 @abstractmethod 

131 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: Optional[DatastoreTransaction]) -> None: 

132 """Move dataset location information to trash. 

133 

134 Parameters 

135 ---------- 

136 refs : `Iterable` of `DatasetIdRef` 

137 References to the datasets. 

138 transaction : `DatastoreTransaction` or `None` 

139 Transaction object. Can be `None` in some bridges or if no rollback 

140 is required. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

146 """Check which refs are listed for this datastore. 

147 

148 Parameters 

149 ---------- 

150 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

151 References to the datasets. 

152 

153 Returns 

154 ------- 

155 present : `Iterable` [ `DatasetIdRef` ] 

156 Datasets from ``refs`` that are recorded as being in this 

157 datastore. 

158 """ 

159 raise NotImplementedError() 

160 

161 @abstractmethod 

162 def emptyTrash( 

163 self, 

164 records_table: Optional[OpaqueTableStorage] = None, 

165 record_class: Optional[Type[StoredDatastoreItemInfo]] = None, 

166 record_column: Optional[str] = None, 

167 ) -> ContextManager[ 

168 Tuple[Iterable[Tuple[DatasetIdRef, Optional[StoredDatastoreItemInfo]]], Optional[Set[str]]] 

169 ]: 

170 """Retrieve all the dataset ref IDs that are in the trash 

171 associated for this datastore, and then remove them if the context 

172 exists without an exception being raised. 

173 

174 Parameters 

175 ---------- 

176 records_table : `OpaqueTableStorage`, optional 

177 Table of records to query with the trash records. 

178 record_class : `type` of `StoredDatastoreItemInfo`, optional 

179 Class to use when reading records from ``records_table``. 

180 record_column : `str`, optional 

181 Name of the column in records_table that refers to the artifact. 

182 

183 Yields 

184 ------ 

185 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

186 The IDs of datasets that can be safely removed from this datastore 

187 and the corresponding information from the records table. 

188 Can be empty. 

189 artifacts_to_keep : `set` of `str`, optional 

190 Any external artifacts that are known to the table but which should 

191 not be deleted. If `None`, the caller should check themselves. 

192 

193 Examples 

194 -------- 

195 Typical usage by a Datastore is something like:: 

196 

197 with self.bridge.emptyTrash() as trashed: 

198 iter, to_keep = trashed 

199 for ref, info in iter: 

200 # Remove artifacts associated with id, 

201 # raise an exception if something goes wrong. 

202 

203 Notes 

204 ----- 

205 The object yielded by the context manager may be a single-pass 

206 iterator. If multiple passes are required, it should be converted to 

207 a `list` or other container. 

208 

209 Datastores should never raise (except perhaps in testing) when an 

210 artifact cannot be removed only because it is already gone - this 

211 condition is an unavoidable outcome of concurrent delete operations, 

212 and must not be considered and error for those to be safe. 

213 

214 If a table record is provided the trashed records will be deleted 

215 when the context manager completes. 

216 """ 

217 raise NotImplementedError() 

218 

219 datastoreName: str 

220 """The name of the `Datastore` as it should appear in `Registry` tables 

221 (`str`). 

222 """ 

223 

224 

225class DatastoreRegistryBridgeManager(VersionedExtension): 

226 """An abstract base class that defines the interface between `Registry` 

227 and `Datastore` when a new `Datastore` is constructed. 

228 

229 Parameters 

230 ---------- 

231 opaque : `OpaqueTableStorageManager` 

232 Manager object for opaque table storage in the `Registry`. 

233 universe : `DimensionUniverse` 

234 All dimensions know to the `Registry`. 

235 datasetIdColumnType : `type` 

236 Type for dataset ID column. 

237 

238 Notes 

239 ----- 

240 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

241 construction, and should use it to obtain and keep any of the following: 

242 

243 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

244 present in the datastore (needed by all datastores that are not just 

245 forwarders); 

246 

247 - one or more `OpaqueTableStorage` instance if they wish to store internal 

248 records in the `Registry` database; 

249 

250 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

251 filename templates. 

252 

253 """ 

254 

255 def __init__( 

256 self, 

257 *, 

258 opaque: OpaqueTableStorageManager, 

259 universe: DimensionUniverse, 

260 datasetIdColumnType: type, 

261 registry_schema_version: VersionTuple | None = None, 

262 ): 

263 super().__init__(registry_schema_version=registry_schema_version) 

264 self.opaque = opaque 

265 self.universe = universe 

266 self.datasetIdColumnType = datasetIdColumnType 

267 

268 @classmethod 

269 @abstractmethod 

270 def initialize( 

271 cls, 

272 db: Database, 

273 context: StaticTablesContext, 

274 *, 

275 opaque: OpaqueTableStorageManager, 

276 datasets: Type[DatasetRecordStorageManager], 

277 universe: DimensionUniverse, 

278 registry_schema_version: VersionTuple | None = None, 

279 ) -> DatastoreRegistryBridgeManager: 

280 """Construct an instance of the manager. 

281 

282 Parameters 

283 ---------- 

284 db : `Database` 

285 Interface to the underlying database engine and namespace. 

286 context : `StaticTablesContext` 

287 Context object obtained from `Database.declareStaticTables`; used 

288 to declare any tables that should always be present in a layer 

289 implemented with this manager. 

290 opaque : `OpaqueTableStorageManager` 

291 Registry manager object for opaque (to Registry) tables, provided 

292 to allow Datastores to store their internal information inside the 

293 Registry database. 

294 datasets : subclass of `DatasetRecordStorageManager` 

295 Concrete class that will be used to manage the core dataset tables 

296 in this registry; should be used only to create foreign keys to 

297 those tables. 

298 universe : `DimensionUniverse` 

299 All dimensions known to the registry. 

300 registry_schema_version : `VersionTuple` or `None` 

301 Schema version of this extension as defined in registry. 

302 

303 Returns 

304 ------- 

305 manager : `DatastoreRegistryBridgeManager` 

306 An instance of a concrete `DatastoreRegistryBridgeManager` 

307 subclass. 

308 """ 

309 raise NotImplementedError() 

310 

311 @abstractmethod 

312 def refresh(self) -> None: 

313 """Ensure all other operations on this manager are aware of any 

314 collections that may have been registered by other clients since it 

315 was initialized or last refreshed. 

316 """ 

317 raise NotImplementedError() 

318 

319 @abstractmethod 

320 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

321 """Register a new `Datastore` associated with this `Registry`. 

322 

323 This method should be called by all `Datastore` classes aside from 

324 those that only forward storage to other datastores. 

325 

326 Parameters 

327 ---------- 

328 name : `str` 

329 Name of the datastore, as it should appear in `Registry` tables. 

330 ephemeral : `bool`, optional 

331 If `True` (`False` is default), return a bridge object that is 

332 backed by storage that will not last past the end of the current 

333 process. This should be used whenever the same is true of the 

334 dataset's artifacts. 

335 

336 Returns 

337 ------- 

338 bridge : `DatastoreRegistryBridge` 

339 Object that provides the interface this `Datastore` should use to 

340 communicate with the `Registry`. 

341 """ 

342 raise NotImplementedError() 

343 

344 @abstractmethod 

345 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

346 """Retrieve datastore locations for a given dataset. 

347 

348 Parameters 

349 ---------- 

350 ref : `DatasetIdRef` 

351 A reference to the dataset for which to retrieve storage 

352 information. 

353 

354 Returns 

355 ------- 

356 datastores : `Iterable` [ `str` ] 

357 All the matching datastores holding this dataset. Empty if the 

358 dataset does not exist anywhere. 

359 

360 Raises 

361 ------ 

362 AmbiguousDatasetError 

363 Raised if ``ref.id`` is `None`. 

364 """ 

365 raise NotImplementedError() 

366 

367 opaque: OpaqueTableStorageManager 

368 """Registry manager object for opaque (to Registry) tables, provided 

369 to allow Datastores to store their internal information inside the 

370 Registry database. 

371 """ 

372 

373 universe: DimensionUniverse 

374 """All dimensions known to the `Registry`. 

375 """