Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

68 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from collections.abc import Iterable 

27from contextlib import AbstractContextManager 

28from typing import TYPE_CHECKING, Any 

29 

30from lsst.utils.classes import immutable 

31 

32from ...core import DatasetId, DatasetRef 

33from ._versioning import VersionedExtension, VersionTuple 

34 

35if TYPE_CHECKING: 

36 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

37 from ...core.datastore import DatastoreTransaction 

38 from ._database import Database, StaticTablesContext 

39 from ._datasets import DatasetRecordStorageManager 

40 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

41 

42 

43@immutable 

44class FakeDatasetRef: 

45 """A fake `DatasetRef` that can be used internally by butler where 

46 only the dataset ID is available. 

47 

48 Should only be used when registry can not be used to create a full 

49 `DatasetRef` from the ID. A particular use case is during dataset 

50 deletion when solely the ID is available. 

51 

52 Parameters 

53 ---------- 

54 id : `DatasetId` 

55 The dataset ID. 

56 """ 

57 

58 __slots__ = ("id",) 

59 

60 def __init__(self, id: DatasetId): 

61 self.id = id 

62 

63 def __str__(self) -> str: 

64 return f"dataset_id={self.id}" 

65 

66 def __repr__(self) -> str: 

67 return f"FakeDatasetRef({self.id})" 

68 

69 def __eq__(self, other: Any) -> bool: 

70 try: 

71 return self.id == other.id 

72 except AttributeError: 

73 return NotImplemented 

74 

75 def __hash__(self) -> int: 

76 return hash(self.id) 

77 

78 id: DatasetId 

79 """Unique identifier for this dataset. 

80 """ 

81 

82 @property 

83 def datasetType(self) -> DatasetType: 

84 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

85 

86 

87DatasetIdRef = DatasetRef | FakeDatasetRef 

88"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

89""" 

90 

91 

92class DatastoreRegistryBridge(ABC): 

93 """An abstract base class that defines the interface that a `Datastore` 

94 uses to communicate with a `Registry`. 

95 

96 Parameters 

97 ---------- 

98 datastoreName : `str` 

99 Name of the `Datastore` as it should appear in `Registry` tables 

100 referencing it. 

101 """ 

102 

103 def __init__(self, datastoreName: str): 

104 self.datastoreName = datastoreName 

105 

106 @abstractmethod 

107 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

108 """Record that a datastore holds the given datasets. 

109 

110 Parameters 

111 ---------- 

112 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

113 References to the datasets. 

114 """ 

115 raise NotImplementedError() 

116 

117 @abstractmethod 

118 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

119 """Record that a datastore holds the given datasets, skipping if 

120 the ref is already registered. 

121 

122 Parameters 

123 ---------- 

124 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

125 References to the datasets. 

126 """ 

127 raise NotImplementedError() 

128 

129 @abstractmethod 

130 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

131 """Remove dataset location information without any attempt to put it 

132 in the trash while waiting for external deletes. 

133 

134 This should be used only to implement `Datastore.forget`, or in cases 

135 where deleting the actual datastore artifacts cannot fail. 

136 

137 Parameters 

138 ---------- 

139 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

140 References to the datasets. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

146 """Move dataset location information to trash. 

147 

148 Parameters 

149 ---------- 

150 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

151 References to the datasets. 

152 transaction : `DatastoreTransaction` or `None` 

153 Transaction object. Can be `None` in some bridges or if no rollback 

154 is required. 

155 """ 

156 raise NotImplementedError() 

157 

158 @abstractmethod 

159 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

160 """Check which refs are listed for this datastore. 

161 

162 Parameters 

163 ---------- 

164 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

165 References to the datasets. 

166 

167 Returns 

168 ------- 

169 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

170 Datasets from ``refs`` that are recorded as being in this 

171 datastore. 

172 """ 

173 raise NotImplementedError() 

174 

175 @abstractmethod 

176 def emptyTrash( 

177 self, 

178 records_table: OpaqueTableStorage | None = None, 

179 record_class: type[StoredDatastoreItemInfo] | None = None, 

180 record_column: str | None = None, 

181 ) -> AbstractContextManager[ 

182 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

183 ]: 

184 """Retrieve all the dataset ref IDs that are in the trash 

185 associated for this datastore, and then remove them if the context 

186 exists without an exception being raised. 

187 

188 Parameters 

189 ---------- 

190 records_table : `OpaqueTableStorage`, optional 

191 Table of records to query with the trash records. 

192 record_class : `type` of `StoredDatastoreItemInfo`, optional 

193 Class to use when reading records from ``records_table``. 

194 record_column : `str`, optional 

195 Name of the column in records_table that refers to the artifact. 

196 

197 Yields 

198 ------ 

199 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

200 The IDs of datasets that can be safely removed from this datastore 

201 and the corresponding information from the records table. 

202 Can be empty. 

203 artifacts_to_keep : `set` of `str`, optional 

204 Any external artifacts that are known to the table but which should 

205 not be deleted. If `None`, the caller should check themselves. 

206 

207 Examples 

208 -------- 

209 Typical usage by a Datastore is something like:: 

210 

211 with self.bridge.emptyTrash() as trashed: 

212 iter, to_keep = trashed 

213 for ref, info in iter: 

214 # Remove artifacts associated with id, 

215 # raise an exception if something goes wrong. 

216 

217 Notes 

218 ----- 

219 The object yielded by the context manager may be a single-pass 

220 iterator. If multiple passes are required, it should be converted to 

221 a `list` or other container. 

222 

223 Datastores should never raise (except perhaps in testing) when an 

224 artifact cannot be removed only because it is already gone - this 

225 condition is an unavoidable outcome of concurrent delete operations, 

226 and must not be considered and error for those to be safe. 

227 

228 If a table record is provided the trashed records will be deleted 

229 when the context manager completes. 

230 """ 

231 raise NotImplementedError() 

232 

233 datastoreName: str 

234 """The name of the `Datastore` as it should appear in `Registry` tables 

235 (`str`). 

236 """ 

237 

238 

239class DatastoreRegistryBridgeManager(VersionedExtension): 

240 """An abstract base class that defines the interface between `Registry` 

241 and `Datastore` when a new `Datastore` is constructed. 

242 

243 Parameters 

244 ---------- 

245 opaque : `OpaqueTableStorageManager` 

246 Manager object for opaque table storage in the `Registry`. 

247 universe : `DimensionUniverse` 

248 All dimensions know to the `Registry`. 

249 datasetIdColumnType : `type` 

250 Type for dataset ID column. 

251 

252 Notes 

253 ----- 

254 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

255 construction, and should use it to obtain and keep any of the following: 

256 

257 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

258 present in the datastore (needed by all datastores that are not just 

259 forwarders); 

260 

261 - one or more `OpaqueTableStorage` instance if they wish to store internal 

262 records in the `Registry` database; 

263 

264 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

265 filename templates. 

266 

267 """ 

268 

269 def __init__( 

270 self, 

271 *, 

272 opaque: OpaqueTableStorageManager, 

273 universe: DimensionUniverse, 

274 datasetIdColumnType: type, 

275 registry_schema_version: VersionTuple | None = None, 

276 ): 

277 super().__init__(registry_schema_version=registry_schema_version) 

278 self.opaque = opaque 

279 self.universe = universe 

280 self.datasetIdColumnType = datasetIdColumnType 

281 

282 @classmethod 

283 @abstractmethod 

284 def initialize( 

285 cls, 

286 db: Database, 

287 context: StaticTablesContext, 

288 *, 

289 opaque: OpaqueTableStorageManager, 

290 datasets: type[DatasetRecordStorageManager], 

291 universe: DimensionUniverse, 

292 registry_schema_version: VersionTuple | None = None, 

293 ) -> DatastoreRegistryBridgeManager: 

294 """Construct an instance of the manager. 

295 

296 Parameters 

297 ---------- 

298 db : `Database` 

299 Interface to the underlying database engine and namespace. 

300 context : `StaticTablesContext` 

301 Context object obtained from `Database.declareStaticTables`; used 

302 to declare any tables that should always be present in a layer 

303 implemented with this manager. 

304 opaque : `OpaqueTableStorageManager` 

305 Registry manager object for opaque (to Registry) tables, provided 

306 to allow Datastores to store their internal information inside the 

307 Registry database. 

308 datasets : subclass of `DatasetRecordStorageManager` 

309 Concrete class that will be used to manage the core dataset tables 

310 in this registry; should be used only to create foreign keys to 

311 those tables. 

312 universe : `DimensionUniverse` 

313 All dimensions known to the registry. 

314 registry_schema_version : `VersionTuple` or `None` 

315 Schema version of this extension as defined in registry. 

316 

317 Returns 

318 ------- 

319 manager : `DatastoreRegistryBridgeManager` 

320 An instance of a concrete `DatastoreRegistryBridgeManager` 

321 subclass. 

322 """ 

323 raise NotImplementedError() 

324 

325 @abstractmethod 

326 def refresh(self) -> None: 

327 """Ensure all other operations on this manager are aware of any 

328 collections that may have been registered by other clients since it 

329 was initialized or last refreshed. 

330 """ 

331 raise NotImplementedError() 

332 

333 @abstractmethod 

334 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

335 """Register a new `Datastore` associated with this `Registry`. 

336 

337 This method should be called by all `Datastore` classes aside from 

338 those that only forward storage to other datastores. 

339 

340 Parameters 

341 ---------- 

342 name : `str` 

343 Name of the datastore, as it should appear in `Registry` tables. 

344 ephemeral : `bool`, optional 

345 If `True` (`False` is default), return a bridge object that is 

346 backed by storage that will not last past the end of the current 

347 process. This should be used whenever the same is true of the 

348 dataset's artifacts. 

349 

350 Returns 

351 ------- 

352 bridge : `DatastoreRegistryBridge` 

353 Object that provides the interface this `Datastore` should use to 

354 communicate with the `Registry`. 

355 """ 

356 raise NotImplementedError() 

357 

358 @abstractmethod 

359 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

360 """Retrieve datastore locations for a given dataset. 

361 

362 Parameters 

363 ---------- 

364 ref : `DatasetIdRef` 

365 A reference to the dataset for which to retrieve storage 

366 information. 

367 

368 Returns 

369 ------- 

370 datastores : `~collections.abc.Iterable` [ `str` ] 

371 All the matching datastores holding this dataset. Empty if the 

372 dataset does not exist anywhere. 

373 

374 Raises 

375 ------ 

376 AmbiguousDatasetError 

377 Raised if ``ref.id`` is `None`. 

378 """ 

379 raise NotImplementedError() 

380 

381 opaque: OpaqueTableStorageManager 

382 """Registry manager object for opaque (to Registry) tables, provided 

383 to allow Datastores to store their internal information inside the 

384 Registry database. 

385 """ 

386 

387 universe: DimensionUniverse 

388 """All dimensions known to the `Registry`. 

389 """