Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

71 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Iterable 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from ..._dataset_ref import DatasetId, DatasetRef 

39from ._versioning import VersionedExtension, VersionTuple 

40 

41if TYPE_CHECKING: 

42 from ..._dataset_ref import DatasetDatastoreRecords 

43 from ..._dataset_type import DatasetType 

44 from ...datastore import DatastoreTransaction 

45 from ...datastore.stored_file_info import StoredDatastoreItemInfo 

46 from ...dimensions import DimensionUniverse 

47 from ._database import Database, StaticTablesContext 

48 from ._datasets import DatasetRecordStorageManager 

49 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

50 

51 

52@immutable 

53class FakeDatasetRef: 

54 """A fake `DatasetRef` that can be used internally by butler where 

55 only the dataset ID is available. 

56 

57 Should only be used when registry can not be used to create a full 

58 `DatasetRef` from the ID. A particular use case is during dataset 

59 deletion when solely the ID is available. 

60 

61 Parameters 

62 ---------- 

63 id : `DatasetId` 

64 The dataset ID. 

65 """ 

66 

67 __slots__ = ("id",) 

68 

69 def __init__(self, id: DatasetId): 

70 self.id = id 

71 

72 def __str__(self) -> str: 

73 return f"dataset_id={self.id}" 

74 

75 def __repr__(self) -> str: 

76 return f"FakeDatasetRef({self.id})" 

77 

78 def __eq__(self, other: Any) -> bool: 

79 try: 

80 return self.id == other.id 

81 except AttributeError: 

82 return NotImplemented 

83 

84 def __hash__(self) -> int: 

85 return hash(self.id) 

86 

87 id: DatasetId 

88 """Unique identifier for this dataset. 

89 """ 

90 

91 @property 

92 def datasetType(self) -> DatasetType: 

93 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

94 

95 @property 

96 def _datastore_records(self) -> DatasetDatastoreRecords | None: 

97 raise AttributeError("A FakeDatasetRef can not be associated with datastore records") 

98 

99 

100DatasetIdRef = DatasetRef | FakeDatasetRef 

101"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

102""" 

103 

104 

105class DatastoreRegistryBridge(ABC): 

106 """An abstract base class that defines the interface that a `Datastore` 

107 uses to communicate with a `Registry`. 

108 

109 Parameters 

110 ---------- 

111 datastoreName : `str` 

112 Name of the `Datastore` as it should appear in `Registry` tables 

113 referencing it. 

114 """ 

115 

116 def __init__(self, datastoreName: str): 

117 self.datastoreName = datastoreName 

118 

119 @abstractmethod 

120 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

121 """Record that a datastore holds the given datasets. 

122 

123 Parameters 

124 ---------- 

125 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

126 References to the datasets. 

127 """ 

128 raise NotImplementedError() 

129 

130 @abstractmethod 

131 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

132 """Record that a datastore holds the given datasets, skipping if 

133 the ref is already registered. 

134 

135 Parameters 

136 ---------- 

137 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

138 References to the datasets. 

139 """ 

140 raise NotImplementedError() 

141 

142 @abstractmethod 

143 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

144 """Remove dataset location information without any attempt to put it 

145 in the trash while waiting for external deletes. 

146 

147 This should be used only to implement `Datastore.forget`, or in cases 

148 where deleting the actual datastore artifacts cannot fail. 

149 

150 Parameters 

151 ---------- 

152 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

153 References to the datasets. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

159 """Move dataset location information to trash. 

160 

161 Parameters 

162 ---------- 

163 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

164 References to the datasets. 

165 transaction : `DatastoreTransaction` or `None` 

166 Transaction object. Can be `None` in some bridges or if no rollback 

167 is required. 

168 """ 

169 raise NotImplementedError() 

170 

171 @abstractmethod 

172 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

173 """Check which refs are listed for this datastore. 

174 

175 Parameters 

176 ---------- 

177 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

178 References to the datasets. 

179 

180 Returns 

181 ------- 

182 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

183 Datasets from ``refs`` that are recorded as being in this 

184 datastore. 

185 """ 

186 raise NotImplementedError() 

187 

188 @abstractmethod 

189 def emptyTrash( 

190 self, 

191 records_table: OpaqueTableStorage | None = None, 

192 record_class: type[StoredDatastoreItemInfo] | None = None, 

193 record_column: str | None = None, 

194 ) -> AbstractContextManager[ 

195 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

196 ]: 

197 """Retrieve all the dataset ref IDs that are in the trash 

198 associated for this datastore, and then remove them if the context 

199 exists without an exception being raised. 

200 

201 Parameters 

202 ---------- 

203 records_table : `OpaqueTableStorage`, optional 

204 Table of records to query with the trash records. 

205 record_class : `type` of `StoredDatastoreItemInfo`, optional 

206 Class to use when reading records from ``records_table``. 

207 record_column : `str`, optional 

208 Name of the column in records_table that refers to the artifact. 

209 

210 Yields 

211 ------ 

212 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

213 The IDs of datasets that can be safely removed from this datastore 

214 and the corresponding information from the records table. 

215 Can be empty. 

216 artifacts_to_keep : `set` of `str`, optional 

217 Any external artifacts that are known to the table but which should 

218 not be deleted. If `None`, the caller should check themselves. 

219 

220 Examples 

221 -------- 

222 Typical usage by a Datastore is something like:: 

223 

224 with self.bridge.emptyTrash() as trashed: 

225 iter, to_keep = trashed 

226 for ref, info in iter: 

227 # Remove artifacts associated with id, 

228 # raise an exception if something goes wrong. 

229 

230 Notes 

231 ----- 

232 The object yielded by the context manager may be a single-pass 

233 iterator. If multiple passes are required, it should be converted to 

234 a `list` or other container. 

235 

236 Datastores should never raise (except perhaps in testing) when an 

237 artifact cannot be removed only because it is already gone - this 

238 condition is an unavoidable outcome of concurrent delete operations, 

239 and must not be considered and error for those to be safe. 

240 

241 If a table record is provided the trashed records will be deleted 

242 when the context manager completes. 

243 """ 

244 raise NotImplementedError() 

245 

246 datastoreName: str 

247 """The name of the `Datastore` as it should appear in `Registry` tables 

248 (`str`). 

249 """ 

250 

251 

252class DatastoreRegistryBridgeManager(VersionedExtension): 

253 """An abstract base class that defines the interface between `Registry` 

254 and `Datastore` when a new `Datastore` is constructed. 

255 

256 Parameters 

257 ---------- 

258 opaque : `OpaqueTableStorageManager` 

259 Manager object for opaque table storage in the `Registry`. 

260 universe : `DimensionUniverse` 

261 All dimensions know to the `Registry`. 

262 datasetIdColumnType : `type` 

263 Type for dataset ID column. 

264 

265 Notes 

266 ----- 

267 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

268 construction, and should use it to obtain and keep any of the following: 

269 

270 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

271 present in the datastore (needed by all datastores that are not just 

272 forwarders); 

273 

274 - one or more `OpaqueTableStorage` instance if they wish to store internal 

275 records in the `Registry` database; 

276 

277 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

278 filename templates. 

279 

280 """ 

281 

282 def __init__( 

283 self, 

284 *, 

285 opaque: OpaqueTableStorageManager, 

286 universe: DimensionUniverse, 

287 datasetIdColumnType: type, 

288 registry_schema_version: VersionTuple | None = None, 

289 ): 

290 super().__init__(registry_schema_version=registry_schema_version) 

291 self.opaque = opaque 

292 self.universe = universe 

293 self.datasetIdColumnType = datasetIdColumnType 

294 

295 @classmethod 

296 @abstractmethod 

297 def initialize( 

298 cls, 

299 db: Database, 

300 context: StaticTablesContext, 

301 *, 

302 opaque: OpaqueTableStorageManager, 

303 datasets: type[DatasetRecordStorageManager], 

304 universe: DimensionUniverse, 

305 registry_schema_version: VersionTuple | None = None, 

306 ) -> DatastoreRegistryBridgeManager: 

307 """Construct an instance of the manager. 

308 

309 Parameters 

310 ---------- 

311 db : `Database` 

312 Interface to the underlying database engine and namespace. 

313 context : `StaticTablesContext` 

314 Context object obtained from `Database.declareStaticTables`; used 

315 to declare any tables that should always be present in a layer 

316 implemented with this manager. 

317 opaque : `OpaqueTableStorageManager` 

318 Registry manager object for opaque (to Registry) tables, provided 

319 to allow Datastores to store their internal information inside the 

320 Registry database. 

321 datasets : subclass of `DatasetRecordStorageManager` 

322 Concrete class that will be used to manage the core dataset tables 

323 in this registry; should be used only to create foreign keys to 

324 those tables. 

325 universe : `DimensionUniverse` 

326 All dimensions known to the registry. 

327 registry_schema_version : `VersionTuple` or `None` 

328 Schema version of this extension as defined in registry. 

329 

330 Returns 

331 ------- 

332 manager : `DatastoreRegistryBridgeManager` 

333 An instance of a concrete `DatastoreRegistryBridgeManager` 

334 subclass. 

335 """ 

336 raise NotImplementedError() 

337 

338 @abstractmethod 

339 def refresh(self) -> None: 

340 """Ensure all other operations on this manager are aware of any 

341 collections that may have been registered by other clients since it 

342 was initialized or last refreshed. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

348 """Register a new `Datastore` associated with this `Registry`. 

349 

350 This method should be called by all `Datastore` classes aside from 

351 those that only forward storage to other datastores. 

352 

353 Parameters 

354 ---------- 

355 name : `str` 

356 Name of the datastore, as it should appear in `Registry` tables. 

357 ephemeral : `bool`, optional 

358 If `True` (`False` is default), return a bridge object that is 

359 backed by storage that will not last past the end of the current 

360 process. This should be used whenever the same is true of the 

361 dataset's artifacts. 

362 

363 Returns 

364 ------- 

365 bridge : `DatastoreRegistryBridge` 

366 Object that provides the interface this `Datastore` should use to 

367 communicate with the `Registry`. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

373 """Retrieve datastore locations for a given dataset. 

374 

375 Parameters 

376 ---------- 

377 ref : `DatasetIdRef` 

378 A reference to the dataset for which to retrieve storage 

379 information. 

380 

381 Returns 

382 ------- 

383 datastores : `~collections.abc.Iterable` [ `str` ] 

384 All the matching datastores holding this dataset. Empty if the 

385 dataset does not exist anywhere. 

386 

387 Raises 

388 ------ 

389 AmbiguousDatasetError 

390 Raised if ``ref.id`` is `None`. 

391 """ 

392 raise NotImplementedError() 

393 

394 opaque: OpaqueTableStorageManager 

395 """Registry manager object for opaque (to Registry) tables, provided 

396 to allow Datastores to store their internal information inside the 

397 Registry database. 

398 """ 

399 

400 universe: DimensionUniverse 

401 """All dimensions known to the `Registry`. 

402 """