Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

68 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Iterable 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from ..._dataset_ref import DatasetId, DatasetRef 

39from ._versioning import VersionedExtension, VersionTuple 

40 

41if TYPE_CHECKING: 

42 from ..._dataset_type import DatasetType 

43 from ...datastore import DatastoreTransaction 

44 from ...datastore.stored_file_info import StoredDatastoreItemInfo 

45 from ...dimensions import DimensionUniverse 

46 from ._database import Database, StaticTablesContext 

47 from ._datasets import DatasetRecordStorageManager 

48 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

49 

50 

51@immutable 

52class FakeDatasetRef: 

53 """A fake `DatasetRef` that can be used internally by butler where 

54 only the dataset ID is available. 

55 

56 Should only be used when registry can not be used to create a full 

57 `DatasetRef` from the ID. A particular use case is during dataset 

58 deletion when solely the ID is available. 

59 

60 Parameters 

61 ---------- 

62 id : `DatasetId` 

63 The dataset ID. 

64 """ 

65 

66 __slots__ = ("id",) 

67 

68 def __init__(self, id: DatasetId): 

69 self.id = id 

70 

71 def __str__(self) -> str: 

72 return f"dataset_id={self.id}" 

73 

74 def __repr__(self) -> str: 

75 return f"FakeDatasetRef({self.id})" 

76 

77 def __eq__(self, other: Any) -> bool: 

78 try: 

79 return self.id == other.id 

80 except AttributeError: 

81 return NotImplemented 

82 

83 def __hash__(self) -> int: 

84 return hash(self.id) 

85 

86 id: DatasetId 

87 """Unique identifier for this dataset. 

88 """ 

89 

90 @property 

91 def datasetType(self) -> DatasetType: 

92 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

93 

94 

95DatasetIdRef = DatasetRef | FakeDatasetRef 

96"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

97""" 

98 

99 

100class DatastoreRegistryBridge(ABC): 

101 """An abstract base class that defines the interface that a `Datastore` 

102 uses to communicate with a `Registry`. 

103 

104 Parameters 

105 ---------- 

106 datastoreName : `str` 

107 Name of the `Datastore` as it should appear in `Registry` tables 

108 referencing it. 

109 """ 

110 

111 def __init__(self, datastoreName: str): 

112 self.datastoreName = datastoreName 

113 

114 @abstractmethod 

115 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

116 """Record that a datastore holds the given datasets. 

117 

118 Parameters 

119 ---------- 

120 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

121 References to the datasets. 

122 """ 

123 raise NotImplementedError() 

124 

125 @abstractmethod 

126 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

127 """Record that a datastore holds the given datasets, skipping if 

128 the ref is already registered. 

129 

130 Parameters 

131 ---------- 

132 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

133 References to the datasets. 

134 """ 

135 raise NotImplementedError() 

136 

137 @abstractmethod 

138 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

139 """Remove dataset location information without any attempt to put it 

140 in the trash while waiting for external deletes. 

141 

142 This should be used only to implement `Datastore.forget`, or in cases 

143 where deleting the actual datastore artifacts cannot fail. 

144 

145 Parameters 

146 ---------- 

147 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

148 References to the datasets. 

149 """ 

150 raise NotImplementedError() 

151 

152 @abstractmethod 

153 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

154 """Move dataset location information to trash. 

155 

156 Parameters 

157 ---------- 

158 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

159 References to the datasets. 

160 transaction : `DatastoreTransaction` or `None` 

161 Transaction object. Can be `None` in some bridges or if no rollback 

162 is required. 

163 """ 

164 raise NotImplementedError() 

165 

166 @abstractmethod 

167 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

168 """Check which refs are listed for this datastore. 

169 

170 Parameters 

171 ---------- 

172 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

173 References to the datasets. 

174 

175 Returns 

176 ------- 

177 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

178 Datasets from ``refs`` that are recorded as being in this 

179 datastore. 

180 """ 

181 raise NotImplementedError() 

182 

183 @abstractmethod 

184 def emptyTrash( 

185 self, 

186 records_table: OpaqueTableStorage | None = None, 

187 record_class: type[StoredDatastoreItemInfo] | None = None, 

188 record_column: str | None = None, 

189 ) -> AbstractContextManager[ 

190 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

191 ]: 

192 """Retrieve all the dataset ref IDs that are in the trash 

193 associated for this datastore, and then remove them if the context 

194 exists without an exception being raised. 

195 

196 Parameters 

197 ---------- 

198 records_table : `OpaqueTableStorage`, optional 

199 Table of records to query with the trash records. 

200 record_class : `type` of `StoredDatastoreItemInfo`, optional 

201 Class to use when reading records from ``records_table``. 

202 record_column : `str`, optional 

203 Name of the column in records_table that refers to the artifact. 

204 

205 Yields 

206 ------ 

207 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

208 The IDs of datasets that can be safely removed from this datastore 

209 and the corresponding information from the records table. 

210 Can be empty. 

211 artifacts_to_keep : `set` of `str`, optional 

212 Any external artifacts that are known to the table but which should 

213 not be deleted. If `None`, the caller should check themselves. 

214 

215 Examples 

216 -------- 

217 Typical usage by a Datastore is something like:: 

218 

219 with self.bridge.emptyTrash() as trashed: 

220 iter, to_keep = trashed 

221 for ref, info in iter: 

222 # Remove artifacts associated with id, 

223 # raise an exception if something goes wrong. 

224 

225 Notes 

226 ----- 

227 The object yielded by the context manager may be a single-pass 

228 iterator. If multiple passes are required, it should be converted to 

229 a `list` or other container. 

230 

231 Datastores should never raise (except perhaps in testing) when an 

232 artifact cannot be removed only because it is already gone - this 

233 condition is an unavoidable outcome of concurrent delete operations, 

234 and must not be considered and error for those to be safe. 

235 

236 If a table record is provided the trashed records will be deleted 

237 when the context manager completes. 

238 """ 

239 raise NotImplementedError() 

240 

241 datastoreName: str 

242 """The name of the `Datastore` as it should appear in `Registry` tables 

243 (`str`). 

244 """ 

245 

246 

247class DatastoreRegistryBridgeManager(VersionedExtension): 

248 """An abstract base class that defines the interface between `Registry` 

249 and `Datastore` when a new `Datastore` is constructed. 

250 

251 Parameters 

252 ---------- 

253 opaque : `OpaqueTableStorageManager` 

254 Manager object for opaque table storage in the `Registry`. 

255 universe : `DimensionUniverse` 

256 All dimensions know to the `Registry`. 

257 datasetIdColumnType : `type` 

258 Type for dataset ID column. 

259 

260 Notes 

261 ----- 

262 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

263 construction, and should use it to obtain and keep any of the following: 

264 

265 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

266 present in the datastore (needed by all datastores that are not just 

267 forwarders); 

268 

269 - one or more `OpaqueTableStorage` instance if they wish to store internal 

270 records in the `Registry` database; 

271 

272 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

273 filename templates. 

274 

275 """ 

276 

277 def __init__( 

278 self, 

279 *, 

280 opaque: OpaqueTableStorageManager, 

281 universe: DimensionUniverse, 

282 datasetIdColumnType: type, 

283 registry_schema_version: VersionTuple | None = None, 

284 ): 

285 super().__init__(registry_schema_version=registry_schema_version) 

286 self.opaque = opaque 

287 self.universe = universe 

288 self.datasetIdColumnType = datasetIdColumnType 

289 

290 @classmethod 

291 @abstractmethod 

292 def initialize( 

293 cls, 

294 db: Database, 

295 context: StaticTablesContext, 

296 *, 

297 opaque: OpaqueTableStorageManager, 

298 datasets: type[DatasetRecordStorageManager], 

299 universe: DimensionUniverse, 

300 registry_schema_version: VersionTuple | None = None, 

301 ) -> DatastoreRegistryBridgeManager: 

302 """Construct an instance of the manager. 

303 

304 Parameters 

305 ---------- 

306 db : `Database` 

307 Interface to the underlying database engine and namespace. 

308 context : `StaticTablesContext` 

309 Context object obtained from `Database.declareStaticTables`; used 

310 to declare any tables that should always be present in a layer 

311 implemented with this manager. 

312 opaque : `OpaqueTableStorageManager` 

313 Registry manager object for opaque (to Registry) tables, provided 

314 to allow Datastores to store their internal information inside the 

315 Registry database. 

316 datasets : subclass of `DatasetRecordStorageManager` 

317 Concrete class that will be used to manage the core dataset tables 

318 in this registry; should be used only to create foreign keys to 

319 those tables. 

320 universe : `DimensionUniverse` 

321 All dimensions known to the registry. 

322 registry_schema_version : `VersionTuple` or `None` 

323 Schema version of this extension as defined in registry. 

324 

325 Returns 

326 ------- 

327 manager : `DatastoreRegistryBridgeManager` 

328 An instance of a concrete `DatastoreRegistryBridgeManager` 

329 subclass. 

330 """ 

331 raise NotImplementedError() 

332 

333 @abstractmethod 

334 def refresh(self) -> None: 

335 """Ensure all other operations on this manager are aware of any 

336 collections that may have been registered by other clients since it 

337 was initialized or last refreshed. 

338 """ 

339 raise NotImplementedError() 

340 

341 @abstractmethod 

342 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

343 """Register a new `Datastore` associated with this `Registry`. 

344 

345 This method should be called by all `Datastore` classes aside from 

346 those that only forward storage to other datastores. 

347 

348 Parameters 

349 ---------- 

350 name : `str` 

351 Name of the datastore, as it should appear in `Registry` tables. 

352 ephemeral : `bool`, optional 

353 If `True` (`False` is default), return a bridge object that is 

354 backed by storage that will not last past the end of the current 

355 process. This should be used whenever the same is true of the 

356 dataset's artifacts. 

357 

358 Returns 

359 ------- 

360 bridge : `DatastoreRegistryBridge` 

361 Object that provides the interface this `Datastore` should use to 

362 communicate with the `Registry`. 

363 """ 

364 raise NotImplementedError() 

365 

366 @abstractmethod 

367 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

368 """Retrieve datastore locations for a given dataset. 

369 

370 Parameters 

371 ---------- 

372 ref : `DatasetIdRef` 

373 A reference to the dataset for which to retrieve storage 

374 information. 

375 

376 Returns 

377 ------- 

378 datastores : `~collections.abc.Iterable` [ `str` ] 

379 All the matching datastores holding this dataset. Empty if the 

380 dataset does not exist anywhere. 

381 

382 Raises 

383 ------ 

384 AmbiguousDatasetError 

385 Raised if ``ref.id`` is `None`. 

386 """ 

387 raise NotImplementedError() 

388 

389 opaque: OpaqueTableStorageManager 

390 """Registry manager object for opaque (to Registry) tables, provided 

391 to allow Datastores to store their internal information inside the 

392 Registry database. 

393 """ 

394 

395 universe: DimensionUniverse 

396 """All dimensions known to the `Registry`. 

397 """