Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

71 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Iterable 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from ..._dataset_ref import DatasetId, DatasetRef 

39from ._versioning import VersionedExtension, VersionTuple 

40 

41if TYPE_CHECKING: 

42 from ..._dataset_ref import DatasetDatastoreRecords 

43 from ..._dataset_type import DatasetType 

44 from ...datastore import DatastoreTransaction 

45 from ...datastore.stored_file_info import StoredDatastoreItemInfo 

46 from ...dimensions import DimensionUniverse 

47 from ._database import Database, StaticTablesContext 

48 from ._datasets import DatasetRecordStorageManager 

49 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

50 

51 

52@immutable 

53class FakeDatasetRef: 

54 """A fake `DatasetRef` that can be used internally by butler where 

55 only the dataset ID is available. 

56 

57 Should only be used when registry can not be used to create a full 

58 `DatasetRef` from the ID. A particular use case is during dataset 

59 deletion when solely the ID is available. 

60 

61 Parameters 

62 ---------- 

63 id : `DatasetId` 

64 The dataset ID. 

65 """ 

66 

67 __slots__ = ("id",) 

68 

69 def __init__(self, id: DatasetId): 

70 self.id = id 

71 

72 def __str__(self) -> str: 

73 return f"dataset_id={self.id}" 

74 

75 def __repr__(self) -> str: 

76 return f"FakeDatasetRef({self.id})" 

77 

78 def __eq__(self, other: Any) -> bool: 

79 try: 

80 return self.id == other.id 

81 except AttributeError: 

82 return NotImplemented 

83 

84 def __hash__(self) -> int: 

85 return hash(self.id) 

86 

87 id: DatasetId 

88 """Unique identifier for this dataset. 

89 """ 

90 

91 @property 

92 def datasetType(self) -> DatasetType: 

93 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

94 

95 @property 

96 def _datastore_records(self) -> DatasetDatastoreRecords | None: 

97 raise AttributeError("A FakeDatasetRef can not be associated with datastore records") 

98 

99 

100DatasetIdRef = DatasetRef | FakeDatasetRef 

101"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

102""" 

103 

104 

105class DatastoreRegistryBridge(ABC): 

106 """An abstract base class that defines the interface that a `Datastore` 

107 uses to communicate with a `Registry`. 

108 

109 Parameters 

110 ---------- 

111 datastoreName : `str` 

112 Name of the `Datastore` as it should appear in `Registry` tables 

113 referencing it. 

114 """ 

115 

116 def __init__(self, datastoreName: str): 

117 self.datastoreName = datastoreName 

118 

119 @abstractmethod 

120 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

121 """Record that a datastore holds the given datasets. 

122 

123 Parameters 

124 ---------- 

125 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

126 References to the datasets. 

127 """ 

128 raise NotImplementedError() 

129 

130 @abstractmethod 

131 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

132 """Record that a datastore holds the given datasets, skipping if 

133 the ref is already registered. 

134 

135 Parameters 

136 ---------- 

137 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

138 References to the datasets. 

139 """ 

140 raise NotImplementedError() 

141 

142 @abstractmethod 

143 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

144 """Remove dataset location information without any attempt to put it 

145 in the trash while waiting for external deletes. 

146 

147 This should be used only to implement `Datastore.forget`, or in cases 

148 where deleting the actual datastore artifacts cannot fail. 

149 

150 Parameters 

151 ---------- 

152 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

153 References to the datasets. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

159 """Move dataset location information to trash. 

160 

161 Parameters 

162 ---------- 

163 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

164 References to the datasets. 

165 transaction : `DatastoreTransaction` or `None` 

166 Transaction object. Can be `None` in some bridges or if no rollback 

167 is required. 

168 """ 

169 raise NotImplementedError() 

170 

171 @abstractmethod 

172 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

173 """Check which refs are listed for this datastore. 

174 

175 Parameters 

176 ---------- 

177 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

178 References to the datasets. 

179 

180 Returns 

181 ------- 

182 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

183 Datasets from ``refs`` that are recorded as being in this 

184 datastore. 

185 """ 

186 raise NotImplementedError() 

187 

188 @abstractmethod 

189 def emptyTrash( 

190 self, 

191 records_table: OpaqueTableStorage | None = None, 

192 record_class: type[StoredDatastoreItemInfo] | None = None, 

193 record_column: str | None = None, 

194 ) -> AbstractContextManager[ 

195 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

196 ]: 

197 """Retrieve all the dataset ref IDs that are in the trash 

198 associated for this datastore, and then remove them if the context 

199 exists without an exception being raised. 

200 

201 Parameters 

202 ---------- 

203 records_table : `OpaqueTableStorage`, optional 

204 Table of records to query with the trash records. 

205 record_class : `type` of `StoredDatastoreItemInfo`, optional 

206 Class to use when reading records from ``records_table``. 

207 record_column : `str`, optional 

208 Name of the column in records_table that refers to the artifact. 

209 

210 Yields 

211 ------ 

212 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

213 The IDs of datasets that can be safely removed from this datastore 

214 and the corresponding information from the records table. 

215 Can be empty. 

216 artifacts_to_keep : `set` of `str`, optional 

217 Any external artifacts that are known to the table but which should 

218 not be deleted. If `None`, the caller should check themselves. 

219 

220 Examples 

221 -------- 

222 Typical usage by a Datastore is something like:: 

223 

224 with self.bridge.emptyTrash() as trashed: 

225 iter, to_keep = trashed 

226 for ref, info in iter: 

227 # Remove artifacts associated with id, 

228 # raise an exception if something goes wrong. 

229 

230 Notes 

231 ----- 

232 The object yielded by the context manager may be a single-pass 

233 iterator. If multiple passes are required, it should be converted to 

234 a `list` or other container. 

235 

236 Datastores should never raise (except perhaps in testing) when an 

237 artifact cannot be removed only because it is already gone - this 

238 condition is an unavoidable outcome of concurrent delete operations, 

239 and must not be considered and error for those to be safe. 

240 

241 If a table record is provided the trashed records will be deleted 

242 when the context manager completes. 

243 """ 

244 raise NotImplementedError() 

245 

246 datastoreName: str 

247 """The name of the `Datastore` as it should appear in `Registry` tables 

248 (`str`). 

249 """ 

250 

251 

252class DatastoreRegistryBridgeManager(VersionedExtension): 

253 """An abstract base class that defines the interface between `Registry` 

254 and `Datastore` when a new `Datastore` is constructed. 

255 

256 Parameters 

257 ---------- 

258 opaque : `OpaqueTableStorageManager` 

259 Manager object for opaque table storage in the `Registry`. 

260 universe : `DimensionUniverse` 

261 All dimensions know to the `Registry`. 

262 datasetIdColumnType : `type` 

263 Type for dataset ID column. 

264 registry_schema_version : `VersionTuple` or `None`, optional 

265 Version of registry schema. 

266 

267 Notes 

268 ----- 

269 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

270 construction, and should use it to obtain and keep any of the following: 

271 

272 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

273 present in the datastore (needed by all datastores that are not just 

274 forwarders); 

275 

276 - one or more `OpaqueTableStorage` instance if they wish to store internal 

277 records in the `Registry` database; 

278 

279 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

280 filename templates. 

281 """ 

282 

283 def __init__( 

284 self, 

285 *, 

286 opaque: OpaqueTableStorageManager, 

287 universe: DimensionUniverse, 

288 datasetIdColumnType: type, 

289 registry_schema_version: VersionTuple | None = None, 

290 ): 

291 super().__init__(registry_schema_version=registry_schema_version) 

292 self.opaque = opaque 

293 self.universe = universe 

294 self.datasetIdColumnType = datasetIdColumnType 

295 

296 @classmethod 

297 @abstractmethod 

298 def initialize( 

299 cls, 

300 db: Database, 

301 context: StaticTablesContext, 

302 *, 

303 opaque: OpaqueTableStorageManager, 

304 datasets: type[DatasetRecordStorageManager], 

305 universe: DimensionUniverse, 

306 registry_schema_version: VersionTuple | None = None, 

307 ) -> DatastoreRegistryBridgeManager: 

308 """Construct an instance of the manager. 

309 

310 Parameters 

311 ---------- 

312 db : `Database` 

313 Interface to the underlying database engine and namespace. 

314 context : `StaticTablesContext` 

315 Context object obtained from `Database.declareStaticTables`; used 

316 to declare any tables that should always be present in a layer 

317 implemented with this manager. 

318 opaque : `OpaqueTableStorageManager` 

319 Registry manager object for opaque (to Registry) tables, provided 

320 to allow Datastores to store their internal information inside the 

321 Registry database. 

322 datasets : subclass of `DatasetRecordStorageManager` 

323 Concrete class that will be used to manage the core dataset tables 

324 in this registry; should be used only to create foreign keys to 

325 those tables. 

326 universe : `DimensionUniverse` 

327 All dimensions known to the registry. 

328 registry_schema_version : `VersionTuple` or `None` 

329 Schema version of this extension as defined in registry. 

330 

331 Returns 

332 ------- 

333 manager : `DatastoreRegistryBridgeManager` 

334 An instance of a concrete `DatastoreRegistryBridgeManager` 

335 subclass. 

336 """ 

337 raise NotImplementedError() 

338 

339 @abstractmethod 

340 def refresh(self) -> None: 

341 """Ensure all other operations on this manager are aware of any 

342 collections that may have been registered by other clients since it 

343 was initialized or last refreshed. 

344 """ 

345 raise NotImplementedError() 

346 

347 @abstractmethod 

348 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

349 """Register a new `Datastore` associated with this `Registry`. 

350 

351 This method should be called by all `Datastore` classes aside from 

352 those that only forward storage to other datastores. 

353 

354 Parameters 

355 ---------- 

356 name : `str` 

357 Name of the datastore, as it should appear in `Registry` tables. 

358 ephemeral : `bool`, optional 

359 If `True` (`False` is default), return a bridge object that is 

360 backed by storage that will not last past the end of the current 

361 process. This should be used whenever the same is true of the 

362 dataset's artifacts. 

363 

364 Returns 

365 ------- 

366 bridge : `DatastoreRegistryBridge` 

367 Object that provides the interface this `Datastore` should use to 

368 communicate with the `Registry`. 

369 """ 

370 raise NotImplementedError() 

371 

372 @abstractmethod 

373 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

374 """Retrieve datastore locations for a given dataset. 

375 

376 Parameters 

377 ---------- 

378 ref : `DatasetIdRef` 

379 A reference to the dataset for which to retrieve storage 

380 information. 

381 

382 Returns 

383 ------- 

384 datastores : `~collections.abc.Iterable` [ `str` ] 

385 All the matching datastores holding this dataset. Empty if the 

386 dataset does not exist anywhere. 

387 

388 Raises 

389 ------ 

390 AmbiguousDatasetError 

391 Raised if ``ref.id`` is `None`. 

392 """ 

393 raise NotImplementedError() 

394 

395 opaque: OpaqueTableStorageManager 

396 """Registry manager object for opaque (to Registry) tables, provided 

397 to allow Datastores to store their internal information inside the 

398 Registry database. 

399 """ 

400 

401 universe: DimensionUniverse 

402 """All dimensions known to the `Registry`. 

403 """