Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%

68 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

30 

31from abc import ABC, abstractmethod 

32from collections.abc import Iterable 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from ...core import DatasetId, DatasetRef 

39from ._versioning import VersionedExtension, VersionTuple 

40 

41if TYPE_CHECKING: 

42 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

43 from ...core.datastore import DatastoreTransaction 

44 from ._database import Database, StaticTablesContext 

45 from ._datasets import DatasetRecordStorageManager 

46 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

47 

48 

49@immutable 

50class FakeDatasetRef: 

51 """A fake `DatasetRef` that can be used internally by butler where 

52 only the dataset ID is available. 

53 

54 Should only be used when registry can not be used to create a full 

55 `DatasetRef` from the ID. A particular use case is during dataset 

56 deletion when solely the ID is available. 

57 

58 Parameters 

59 ---------- 

60 id : `DatasetId` 

61 The dataset ID. 

62 """ 

63 

64 __slots__ = ("id",) 

65 

66 def __init__(self, id: DatasetId): 

67 self.id = id 

68 

69 def __str__(self) -> str: 

70 return f"dataset_id={self.id}" 

71 

72 def __repr__(self) -> str: 

73 return f"FakeDatasetRef({self.id})" 

74 

75 def __eq__(self, other: Any) -> bool: 

76 try: 

77 return self.id == other.id 

78 except AttributeError: 

79 return NotImplemented 

80 

81 def __hash__(self) -> int: 

82 return hash(self.id) 

83 

84 id: DatasetId 

85 """Unique identifier for this dataset. 

86 """ 

87 

88 @property 

89 def datasetType(self) -> DatasetType: 

90 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

91 

92 

93DatasetIdRef = DatasetRef | FakeDatasetRef 

94"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

95""" 

96 

97 

98class DatastoreRegistryBridge(ABC): 

99 """An abstract base class that defines the interface that a `Datastore` 

100 uses to communicate with a `Registry`. 

101 

102 Parameters 

103 ---------- 

104 datastoreName : `str` 

105 Name of the `Datastore` as it should appear in `Registry` tables 

106 referencing it. 

107 """ 

108 

109 def __init__(self, datastoreName: str): 

110 self.datastoreName = datastoreName 

111 

112 @abstractmethod 

113 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

114 """Record that a datastore holds the given datasets. 

115 

116 Parameters 

117 ---------- 

118 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

119 References to the datasets. 

120 """ 

121 raise NotImplementedError() 

122 

123 @abstractmethod 

124 def ensure(self, refs: Iterable[DatasetIdRef]) -> None: 

125 """Record that a datastore holds the given datasets, skipping if 

126 the ref is already registered. 

127 

128 Parameters 

129 ---------- 

130 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

131 References to the datasets. 

132 """ 

133 raise NotImplementedError() 

134 

135 @abstractmethod 

136 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

137 """Remove dataset location information without any attempt to put it 

138 in the trash while waiting for external deletes. 

139 

140 This should be used only to implement `Datastore.forget`, or in cases 

141 where deleting the actual datastore artifacts cannot fail. 

142 

143 Parameters 

144 ---------- 

145 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

146 References to the datasets. 

147 """ 

148 raise NotImplementedError() 

149 

150 @abstractmethod 

151 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None: 

152 """Move dataset location information to trash. 

153 

154 Parameters 

155 ---------- 

156 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

157 References to the datasets. 

158 transaction : `DatastoreTransaction` or `None` 

159 Transaction object. Can be `None` in some bridges or if no rollback 

160 is required. 

161 """ 

162 raise NotImplementedError() 

163 

164 @abstractmethod 

165 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

166 """Check which refs are listed for this datastore. 

167 

168 Parameters 

169 ---------- 

170 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

171 References to the datasets. 

172 

173 Returns 

174 ------- 

175 present : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

176 Datasets from ``refs`` that are recorded as being in this 

177 datastore. 

178 """ 

179 raise NotImplementedError() 

180 

181 @abstractmethod 

182 def emptyTrash( 

183 self, 

184 records_table: OpaqueTableStorage | None = None, 

185 record_class: type[StoredDatastoreItemInfo] | None = None, 

186 record_column: str | None = None, 

187 ) -> AbstractContextManager[ 

188 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None] 

189 ]: 

190 """Retrieve all the dataset ref IDs that are in the trash 

191 associated for this datastore, and then remove them if the context 

192 exists without an exception being raised. 

193 

194 Parameters 

195 ---------- 

196 records_table : `OpaqueTableStorage`, optional 

197 Table of records to query with the trash records. 

198 record_class : `type` of `StoredDatastoreItemInfo`, optional 

199 Class to use when reading records from ``records_table``. 

200 record_column : `str`, optional 

201 Name of the column in records_table that refers to the artifact. 

202 

203 Yields 

204 ------ 

205 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

206 The IDs of datasets that can be safely removed from this datastore 

207 and the corresponding information from the records table. 

208 Can be empty. 

209 artifacts_to_keep : `set` of `str`, optional 

210 Any external artifacts that are known to the table but which should 

211 not be deleted. If `None`, the caller should check themselves. 

212 

213 Examples 

214 -------- 

215 Typical usage by a Datastore is something like:: 

216 

217 with self.bridge.emptyTrash() as trashed: 

218 iter, to_keep = trashed 

219 for ref, info in iter: 

220 # Remove artifacts associated with id, 

221 # raise an exception if something goes wrong. 

222 

223 Notes 

224 ----- 

225 The object yielded by the context manager may be a single-pass 

226 iterator. If multiple passes are required, it should be converted to 

227 a `list` or other container. 

228 

229 Datastores should never raise (except perhaps in testing) when an 

230 artifact cannot be removed only because it is already gone - this 

231 condition is an unavoidable outcome of concurrent delete operations, 

232 and must not be considered and error for those to be safe. 

233 

234 If a table record is provided the trashed records will be deleted 

235 when the context manager completes. 

236 """ 

237 raise NotImplementedError() 

238 

239 datastoreName: str 

240 """The name of the `Datastore` as it should appear in `Registry` tables 

241 (`str`). 

242 """ 

243 

244 

245class DatastoreRegistryBridgeManager(VersionedExtension): 

246 """An abstract base class that defines the interface between `Registry` 

247 and `Datastore` when a new `Datastore` is constructed. 

248 

249 Parameters 

250 ---------- 

251 opaque : `OpaqueTableStorageManager` 

252 Manager object for opaque table storage in the `Registry`. 

253 universe : `DimensionUniverse` 

254 All dimensions know to the `Registry`. 

255 datasetIdColumnType : `type` 

256 Type for dataset ID column. 

257 

258 Notes 

259 ----- 

260 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

261 construction, and should use it to obtain and keep any of the following: 

262 

263 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

264 present in the datastore (needed by all datastores that are not just 

265 forwarders); 

266 

267 - one or more `OpaqueTableStorage` instance if they wish to store internal 

268 records in the `Registry` database; 

269 

270 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

271 filename templates. 

272 

273 """ 

274 

275 def __init__( 

276 self, 

277 *, 

278 opaque: OpaqueTableStorageManager, 

279 universe: DimensionUniverse, 

280 datasetIdColumnType: type, 

281 registry_schema_version: VersionTuple | None = None, 

282 ): 

283 super().__init__(registry_schema_version=registry_schema_version) 

284 self.opaque = opaque 

285 self.universe = universe 

286 self.datasetIdColumnType = datasetIdColumnType 

287 

288 @classmethod 

289 @abstractmethod 

290 def initialize( 

291 cls, 

292 db: Database, 

293 context: StaticTablesContext, 

294 *, 

295 opaque: OpaqueTableStorageManager, 

296 datasets: type[DatasetRecordStorageManager], 

297 universe: DimensionUniverse, 

298 registry_schema_version: VersionTuple | None = None, 

299 ) -> DatastoreRegistryBridgeManager: 

300 """Construct an instance of the manager. 

301 

302 Parameters 

303 ---------- 

304 db : `Database` 

305 Interface to the underlying database engine and namespace. 

306 context : `StaticTablesContext` 

307 Context object obtained from `Database.declareStaticTables`; used 

308 to declare any tables that should always be present in a layer 

309 implemented with this manager. 

310 opaque : `OpaqueTableStorageManager` 

311 Registry manager object for opaque (to Registry) tables, provided 

312 to allow Datastores to store their internal information inside the 

313 Registry database. 

314 datasets : subclass of `DatasetRecordStorageManager` 

315 Concrete class that will be used to manage the core dataset tables 

316 in this registry; should be used only to create foreign keys to 

317 those tables. 

318 universe : `DimensionUniverse` 

319 All dimensions known to the registry. 

320 registry_schema_version : `VersionTuple` or `None` 

321 Schema version of this extension as defined in registry. 

322 

323 Returns 

324 ------- 

325 manager : `DatastoreRegistryBridgeManager` 

326 An instance of a concrete `DatastoreRegistryBridgeManager` 

327 subclass. 

328 """ 

329 raise NotImplementedError() 

330 

331 @abstractmethod 

332 def refresh(self) -> None: 

333 """Ensure all other operations on this manager are aware of any 

334 collections that may have been registered by other clients since it 

335 was initialized or last refreshed. 

336 """ 

337 raise NotImplementedError() 

338 

339 @abstractmethod 

340 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

341 """Register a new `Datastore` associated with this `Registry`. 

342 

343 This method should be called by all `Datastore` classes aside from 

344 those that only forward storage to other datastores. 

345 

346 Parameters 

347 ---------- 

348 name : `str` 

349 Name of the datastore, as it should appear in `Registry` tables. 

350 ephemeral : `bool`, optional 

351 If `True` (`False` is default), return a bridge object that is 

352 backed by storage that will not last past the end of the current 

353 process. This should be used whenever the same is true of the 

354 dataset's artifacts. 

355 

356 Returns 

357 ------- 

358 bridge : `DatastoreRegistryBridge` 

359 Object that provides the interface this `Datastore` should use to 

360 communicate with the `Registry`. 

361 """ 

362 raise NotImplementedError() 

363 

364 @abstractmethod 

365 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

366 """Retrieve datastore locations for a given dataset. 

367 

368 Parameters 

369 ---------- 

370 ref : `DatasetIdRef` 

371 A reference to the dataset for which to retrieve storage 

372 information. 

373 

374 Returns 

375 ------- 

376 datastores : `~collections.abc.Iterable` [ `str` ] 

377 All the matching datastores holding this dataset. Empty if the 

378 dataset does not exist anywhere. 

379 

380 Raises 

381 ------ 

382 AmbiguousDatasetError 

383 Raised if ``ref.id`` is `None`. 

384 """ 

385 raise NotImplementedError() 

386 

387 opaque: OpaqueTableStorageManager 

388 """Registry manager object for opaque (to Registry) tables, provided 

389 to allow Datastores to store their internal information inside the 

390 Registry database. 

391 """ 

392 

393 universe: DimensionUniverse 

394 """All dimensions known to the `Registry`. 

395 """