Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 86%

73 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-07 11:04 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Iterable 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from ..._dataset_ref import DatasetId, DatasetRef 

39from ._versioning import VersionedExtension, VersionTuple 

40 

41if TYPE_CHECKING: 

42 from ..._dataset_ref import DatasetDatastoreRecords 

43 from ..._dataset_type import DatasetType 

44 from ...datastore import DatastoreTransaction 

45 from ...datastore.stored_file_info import StoredDatastoreItemInfo 

46 from ...dimensions import DimensionUniverse 

47 from ._database import Database, StaticTablesContext 

48 from ._datasets import DatasetRecordStorageManager 

49 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

50 

51 

52@immutable 

53class FakeDatasetRef: 

54 """A fake `DatasetRef` that can be used internally by butler where 

55 only the dataset ID is available. 

56 

57 Should only be used when registry can not be used to create a full 

58 `DatasetRef` from the ID. A particular use case is during dataset 

59 deletion when solely the ID is available. 

60 

61 Parameters 

62 ---------- 

63 id : `DatasetId` 

64 The dataset ID. 

65 """ 

66 

67 __slots__ = ("id",) 

68 

69 def __init__(self, id: DatasetId): 

70 self.id = id 

71 

72 def __str__(self) -> str: 

73 return f"dataset_id={self.id}" 

74 

75 def __repr__(self) -> str: 

76 return f"FakeDatasetRef({self.id})" 

77 

78 def __eq__(self, other: Any) -> bool: 

79 try: 

80 return self.id == other.id 

81 except AttributeError: 

82 return NotImplemented 

83 

84 def __hash__(self) -> int: 

85 return hash(self.id) 

86 

87 id: DatasetId 

88 """Unique identifier for this dataset. 

89 """ 

90 

91 @property 

92 def datasetType(self) -> DatasetType: 

93 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

94 

95 @property 

96 def _datastore_records(self) -> DatasetDatastoreRecords | None: 

97 raise AttributeError("A FakeDatasetRef can not be associated with datastore records") 

98 

99 

100DatasetIdRef = DatasetRef | FakeDatasetRef 

101"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

102""" 

103 

104 

105class DatastoreRegistryBridge(ABC): 

106 """An abstract base class that defines the interface that a `Datastore` 

107 uses to communicate with a `Registry`. 

108 

109 Parameters 

110 ---------- 

111 datastoreName : `str` 

112 Name of the `Datastore` as it should appear in `Registry` tables 

113 referencing it. 

114 """ 

115 

116 def __init__(self, datastoreName: str): 

117 self.datastoreName = datastoreName 

118 

119 @abstractmethod 

120 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

121 """Record that a datastore holds the given datasets. 

122 

123 Parameters 

124 ---------- 

125 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

126 References to the datasets. 

127 """ 

128 raise NotImplementedError() 

129 

130 @abstractmethod 

131 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

132 """Record that a datastore holds the given datasets, skipping if 

133 the ref is already registered. 

134 

135 Parameters 

136 ---------- 

137 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

138 References to the datasets. 

139 """ 

140 raise NotImplementedError() 

141 

142 @abstractmethod 

143 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

144 """Remove dataset location information without any attempt to put it 

145 in the trash while waiting for external deletes. 

146 

147 This should be used only to implement `Datastore.forget`, or in cases 

148 where deleting the actual datastore artifacts cannot fail. 

149 

150 Parameters 

151 ---------- 

152 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

153 References to the datasets. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

159 """Move dataset location information to trash. 

160 

161 Parameters 

162 ---------- 

163 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

164 References to the datasets. 

165 transaction : `DatastoreTransaction` or `None` 

166 Transaction object. Can be `None` in some bridges or if no rollback 

167 is required. 

168 """ 

169 raise NotImplementedError() 

170 

171 @abstractmethod 

172 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

173 """Check which refs are listed for this datastore. 

174 

175 Parameters 

176 ---------- 

177 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

178 References to the datasets. 

179 

180 Returns 

181 ------- 

182 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

183 Datasets from ``refs`` that are recorded as being in this 

184 datastore. 

185 """ 

186 raise NotImplementedError() 

187 

188 @abstractmethod 

189 def emptyTrash( 

190 self, 

191 records_table: OpaqueTableStorage | None = None, 

192 record_class: type[StoredDatastoreItemInfo] | None = None, 

193 record_column: str | None = None, 

194 ) -> AbstractContextManager[ 

195 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

196 ]: 

197 """Retrieve all the dataset ref IDs that are in the trash 

198 associated for this datastore, and then remove them if the context 

199 exists without an exception being raised. 

200 

201 Parameters 

202 ---------- 

203 records_table : `OpaqueTableStorage`, optional 

204 Table of records to query with the trash records. 

205 record_class : `type` of `StoredDatastoreItemInfo`, optional 

206 Class to use when reading records from ``records_table``. 

207 record_column : `str`, optional 

208 Name of the column in records_table that refers to the artifact. 

209 

210 Yields 

211 ------ 

212 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

213 The IDs of datasets that can be safely removed from this datastore 

214 and the corresponding information from the records table. 

215 Can be empty. 

216 artifacts_to_keep : `set` of `str`, optional 

217 Any external artifacts that are known to the table but which should 

218 not be deleted. If `None`, the caller should check themselves. 

219 

220 Examples 

221 -------- 

222 Typical usage by a Datastore is something like:: 

223 

224 with self.bridge.emptyTrash() as trashed: 

225 iter, to_keep = trashed 

226 for ref, info in iter: 

227 # Remove artifacts associated with id, 

228 # raise an exception if something goes wrong. 

229 

230 Notes 

231 ----- 

232 The object yielded by the context manager may be a single-pass 

233 iterator. If multiple passes are required, it should be converted to 

234 a `list` or other container. 

235 

236 Datastores should never raise (except perhaps in testing) when an 

237 artifact cannot be removed only because it is already gone - this 

238 condition is an unavoidable outcome of concurrent delete operations, 

239 and must not be considered and error for those to be safe. 

240 

241 If a table record is provided the trashed records will be deleted 

242 when the context manager completes. 

243 """ 

244 raise NotImplementedError() 

245 

246 datastoreName: str 

247 """The name of the `Datastore` as it should appear in `Registry` tables 

248 (`str`). 

249 """ 

250 

251 

252class DatastoreRegistryBridgeManager(VersionedExtension): 

253 """An abstract base class that defines the interface between `Registry` 

254 and `Datastore` when a new `Datastore` is constructed. 

255 

256 Parameters 

257 ---------- 

258 opaque : `OpaqueTableStorageManager` 

259 Manager object for opaque table storage in the `Registry`. 

260 universe : `DimensionUniverse` 

261 All dimensions know to the `Registry`. 

262 datasetIdColumnType : `type` 

263 Type for dataset ID column. 

264 registry_schema_version : `VersionTuple` or `None`, optional 

265 Version of registry schema. 

266 

267 Notes 

268 ----- 

269 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

270 construction, and should use it to obtain and keep any of the following: 

271 

272 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

273 present in the datastore (needed by all datastores that are not just 

274 forwarders); 

275 

276 - one or more `OpaqueTableStorage` instance if they wish to store internal 

277 records in the `Registry` database; 

278 

279 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

280 filename templates. 

281 """ 

282 

283 def __init__( 

284 self, 

285 *, 

286 opaque: OpaqueTableStorageManager, 

287 universe: DimensionUniverse, 

288 datasetIdColumnType: type, 

289 registry_schema_version: VersionTuple | None = None, 

290 ): 

291 super().__init__(registry_schema_version=registry_schema_version) 

292 self.opaque = opaque 

293 self.universe = universe 

294 self.datasetIdColumnType = datasetIdColumnType 

295 

296 @abstractmethod 

297 def clone( 

298 self, 

299 *, 

300 db: Database, 

301 opaque: OpaqueTableStorageManager, 

302 ) -> DatastoreRegistryBridgeManager: 

303 """Make an independent copy of this manager instance bound to new 

304 instances of `Database` and other managers. 

305 

306 Parameters 

307 ---------- 

308 db : `Database` 

309 New `Database` object to use when instantiating the manager. 

310 opaque : `OpaqueTableStorageManager` 

311 New `OpaqueTableStorageManager` object to use when instantiating 

312 the manager. 

313 

314 Returns 

315 ------- 

316 instance : `DatastoreRegistryBridgeManager` 

317 New manager instance with the same configuration as this instance, 

318 but bound to a new Database object. 

319 """ 

320 raise NotImplementedError() 

321 

322 @classmethod 

323 @abstractmethod 

324 def initialize( 

325 cls, 

326 db: Database, 

327 context: StaticTablesContext, 

328 *, 

329 opaque: OpaqueTableStorageManager, 

330 datasets: type[DatasetRecordStorageManager], 

331 universe: DimensionUniverse, 

332 registry_schema_version: VersionTuple | None = None, 

333 ) -> DatastoreRegistryBridgeManager: 

334 """Construct an instance of the manager. 

335 

336 Parameters 

337 ---------- 

338 db : `Database` 

339 Interface to the underlying database engine and namespace. 

340 context : `StaticTablesContext` 

341 Context object obtained from `Database.declareStaticTables`; used 

342 to declare any tables that should always be present in a layer 

343 implemented with this manager. 

344 opaque : `OpaqueTableStorageManager` 

345 Registry manager object for opaque (to Registry) tables, provided 

346 to allow Datastores to store their internal information inside the 

347 Registry database. 

348 datasets : subclass of `DatasetRecordStorageManager` 

349 Concrete class that will be used to manage the core dataset tables 

350 in this registry; should be used only to create foreign keys to 

351 those tables. 

352 universe : `DimensionUniverse` 

353 All dimensions known to the registry. 

354 registry_schema_version : `VersionTuple` or `None` 

355 Schema version of this extension as defined in registry. 

356 

357 Returns 

358 ------- 

359 manager : `DatastoreRegistryBridgeManager` 

360 An instance of a concrete `DatastoreRegistryBridgeManager` 

361 subclass. 

362 """ 

363 raise NotImplementedError() 

364 

365 @abstractmethod 

366 def refresh(self) -> None: 

367 """Ensure all other operations on this manager are aware of any 

368 collections that may have been registered by other clients since it 

369 was initialized or last refreshed. 

370 """ 

371 raise NotImplementedError() 

372 

373 @abstractmethod 

374 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

375 """Register a new `Datastore` associated with this `Registry`. 

376 

377 This method should be called by all `Datastore` classes aside from 

378 those that only forward storage to other datastores. 

379 

380 Parameters 

381 ---------- 

382 name : `str` 

383 Name of the datastore, as it should appear in `Registry` tables. 

384 ephemeral : `bool`, optional 

385 If `True` (`False` is default), return a bridge object that is 

386 backed by storage that will not last past the end of the current 

387 process. This should be used whenever the same is true of the 

388 dataset's artifacts. 

389 

390 Returns 

391 ------- 

392 bridge : `DatastoreRegistryBridge` 

393 Object that provides the interface this `Datastore` should use to 

394 communicate with the `Registry`. 

395 """ 

396 raise NotImplementedError() 

397 

398 @abstractmethod 

399 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

400 """Retrieve datastore locations for a given dataset. 

401 

402 Parameters 

403 ---------- 

404 ref : `DatasetIdRef` 

405 A reference to the dataset for which to retrieve storage 

406 information. 

407 

408 Returns 

409 ------- 

410 datastores : `~collections.abc.Iterable` [ `str` ] 

411 All the matching datastores holding this dataset. Empty if the 

412 dataset does not exist anywhere. 

413 

414 Raises 

415 ------ 

416 AmbiguousDatasetError 

417 Raised if ``ref.id`` is `None`. 

418 """ 

419 raise NotImplementedError() 

420 

421 opaque: OpaqueTableStorageManager 

422 """Registry manager object for opaque (to Registry) tables, provided 

423 to allow Datastores to store their internal information inside the 

424 Registry database. 

425 """ 

426 

427 universe: DimensionUniverse 

428 """All dimensions known to the `Registry`. 

429 """