Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 65%

76 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Iterable, 

30 Optional, 

31 Set, 

32 Tuple, 

33 Type, 

34 TYPE_CHECKING, 

35 Union, 

36) 

37 

38from ...core.utils import immutable 

39from ...core import DatasetId, DatasetRef 

40from ._versioning import VersionedExtension 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

44 from ._database import Database, StaticTablesContext 

45 from ._datasets import DatasetRecordStorageManager 

46 from ._opaque import OpaqueTableStorageManager, OpaqueTableStorage 

47 

48 

49@immutable 

50class FakeDatasetRef: 

51 """A fake `DatasetRef` that can be used internally by butler where 

52 only the dataset ID is available. 

53 

54 Should only be used when registry can not be used to create a full 

55 `DatasetRef` from the ID. A particular use case is during dataset 

56 deletion when solely the ID is available. 

57 

58 Parameters 

59 ---------- 

60 id : `DatasetId` 

61 The dataset ID. 

62 """ 

63 __slots__ = ("id",) 

64 

65 def __init__(self, id: DatasetId): 

66 self.id = id 

67 

68 def __str__(self) -> str: 

69 return f"dataset_id={self.id}" 

70 

71 def __repr__(self) -> str: 

72 return f"FakeDatasetRef({self.id})" 

73 

74 def __eq__(self, other: Any) -> bool: 

75 try: 

76 return self.id == other.id 

77 except AttributeError: 

78 return NotImplemented 

79 

80 def __hash__(self) -> int: 

81 return hash(self.id) 

82 

83 id: DatasetId 

84 """Unique identifier for this dataset. 

85 """ 

86 

87 def getCheckedId(self) -> DatasetId: 

88 """Return ``self.id``. 

89 

90 This trivial method exists for compatibility with `DatasetRef`, for 

91 which checking is actually done. 

92 

93 Returns 

94 ------- 

95 id : `DatasetId` 

96 ``self.id``. 

97 """ 

98 return self.id 

99 

100 @property 

101 def datasetType(self) -> DatasetType: 

102 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

103 

104 

105DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

106"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

107""" 

108 

109 

110class DatastoreRegistryBridge(ABC): 

111 """An abstract base class that defines the interface that a `Datastore` 

112 uses to communicate with a `Registry`. 

113 

114 Parameters 

115 ---------- 

116 datastoreName : `str` 

117 Name of the `Datastore` as it should appear in `Registry` tables 

118 referencing it. 

119 """ 

120 def __init__(self, datastoreName: str): 

121 self.datastoreName = datastoreName 

122 

123 @abstractmethod 

124 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

125 """Record that a datastore holds the given datasets. 

126 

127 Parameters 

128 ---------- 

129 refs : `Iterable` of `DatasetIdRef` 

130 References to the datasets. 

131 

132 Raises 

133 ------ 

134 AmbiguousDatasetError 

135 Raised if ``any(ref.id is None for ref in refs)``. 

136 """ 

137 raise NotImplementedError() 

138 

139 @abstractmethod 

140 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

141 """Remove dataset location information without any attempt to put it 

142 in the trash while waiting for external deletes. 

143 

144 This should be used only to implement `Datastore.forget`, or in cases 

145 where deleting the actual datastore artifacts cannot fail. 

146 

147 Parameters 

148 ---------- 

149 refs : `Iterable` of `DatasetIdRef` 

150 References to the datasets. 

151 

152 Raises 

153 ------ 

154 AmbiguousDatasetError 

155 Raised if ``any(ref.id is None for ref in refs)``. 

156 """ 

157 raise NotImplementedError() 

158 

159 @abstractmethod 

160 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

161 """Move dataset location information to trash. 

162 

163 Parameters 

164 ---------- 

165 refs : `Iterable` of `DatasetIdRef` 

166 References to the datasets. 

167 

168 Raises 

169 ------ 

170 AmbiguousDatasetError 

171 Raised if ``any(ref.id is None for ref in refs)``. 

172 """ 

173 raise NotImplementedError() 

174 

175 @abstractmethod 

176 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

177 """Check which refs are listed for this datastore. 

178 

179 Parameters 

180 ---------- 

181 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

182 References to the datasets. 

183 

184 Returns 

185 ------- 

186 present : `Iterable` [ `DatasetIdRef` ] 

187 Datasets from ``refs`` that are recorded as being in this 

188 datastore. 

189 

190 Raises 

191 ------ 

192 AmbiguousDatasetError 

193 Raised if ``any(ref.id is None for ref in refs)``. 

194 """ 

195 raise NotImplementedError() 

196 

197 @abstractmethod 

198 def emptyTrash(self, records_table: Optional[OpaqueTableStorage] = None, 

199 record_class: Optional[Type[StoredDatastoreItemInfo]] = None, 

200 record_column: Optional[str] = None, 

201 ) -> ContextManager[Tuple[Iterable[Tuple[DatasetIdRef, 

202 Optional[StoredDatastoreItemInfo]]], 

203 Optional[Set[str]]]]: 

204 """Retrieve all the dataset ref IDs that are in the trash 

205 associated for this datastore, and then remove them if the context 

206 exists without an exception being raised. 

207 

208 Parameters 

209 ---------- 

210 records_table : `OpaqueTableStorage`, optional 

211 Table of records to query with the trash records. 

212 record_class : `type` of `StoredDatastoreItemInfo`, optional 

213 Class to use when reading records from ``records_table``. 

214 record_column : `str`, optional 

215 Name of the column in records_table that refers to the artifact. 

216 

217 Yields 

218 ------ 

219 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

220 The IDs of datasets that can be safely removed from this datastore 

221 and the corresponding information from the records table. 

222 Can be empty. 

223 artifacts_to_keep : `set` of `str`, optional 

224 Any external artifacts that are known to the table but which should 

225 not be deleted. If `None`, the caller should check themselves. 

226 

227 Examples 

228 -------- 

229 Typical usage by a Datastore is something like:: 

230 

231 with self.bridge.emptyTrash() as trashed: 

232 iter, to_keep = trashed 

233 for ref, info in iter: 

234 # Remove artifacts associated with id, 

235 # raise an exception if something goes wrong. 

236 

237 Notes 

238 ----- 

239 The object yielded by the context manager may be a single-pass 

240 iterator. If multiple passes are required, it should be converted to 

241 a `list` or other container. 

242 

243 Datastores should never raise (except perhaps in testing) when an 

244 artifact cannot be removed only because it is already gone - this 

245 condition is an unavoidable outcome of concurrent delete operations, 

246 and must not be considered and error for those to be safe. 

247 

248 If a table record is provided the trashed records will be deleted 

249 when the context manager completes. 

250 """ 

251 raise NotImplementedError() 

252 

253 datastoreName: str 

254 """The name of the `Datastore` as it should appear in `Registry` tables 

255 (`str`). 

256 """ 

257 

258 

259class DatastoreRegistryBridgeManager(VersionedExtension): 

260 """An abstract base class that defines the interface between `Registry` 

261 and `Datastore` when a new `Datastore` is constructed. 

262 

263 Parameters 

264 ---------- 

265 opaque : `OpaqueTableStorageManager` 

266 Manager object for opaque table storage in the `Registry`. 

267 universe : `DimensionUniverse` 

268 All dimensions know to the `Registry`. 

269 datasetIdColumnType : `type` 

270 Type for dataset ID column. 

271 

272 Notes 

273 ----- 

274 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

275 construction, and should use it to obtain and keep any of the following: 

276 

277 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

278 present in the datastore (needed by all datastores that are not just 

279 forwarders); 

280 

281 - one or more `OpaqueTableStorage` instance if they wish to store internal 

282 records in the `Registry` database; 

283 

284 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

285 filename templates. 

286 

287 """ 

288 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse, 

289 datasetIdColumnType: type): 

290 self.opaque = opaque 

291 self.universe = universe 

292 self.datasetIdColumnType = datasetIdColumnType 

293 

294 @classmethod 

295 @abstractmethod 

296 def initialize(cls, db: Database, context: StaticTablesContext, *, 

297 opaque: OpaqueTableStorageManager, 

298 datasets: Type[DatasetRecordStorageManager], 

299 universe: DimensionUniverse, 

300 ) -> DatastoreRegistryBridgeManager: 

301 """Construct an instance of the manager. 

302 

303 Parameters 

304 ---------- 

305 db : `Database` 

306 Interface to the underlying database engine and namespace. 

307 context : `StaticTablesContext` 

308 Context object obtained from `Database.declareStaticTables`; used 

309 to declare any tables that should always be present in a layer 

310 implemented with this manager. 

311 opaque : `OpaqueTableStorageManager` 

312 Registry manager object for opaque (to Registry) tables, provided 

313 to allow Datastores to store their internal information inside the 

314 Registry database. 

315 datasets : subclass of `DatasetRecordStorageManager` 

316 Concrete class that will be used to manage the core dataset tables 

317 in this registry; should be used only to create foreign keys to 

318 those tables. 

319 universe : `DimensionUniverse` 

320 All dimensions known to the registry. 

321 

322 Returns 

323 ------- 

324 manager : `DatastoreRegistryBridgeManager` 

325 An instance of a concrete `DatastoreRegistryBridgeManager` 

326 subclass. 

327 """ 

328 raise NotImplementedError() 

329 

330 @abstractmethod 

331 def refresh(self) -> None: 

332 """Ensure all other operations on this manager are aware of any 

333 collections that may have been registered by other clients since it 

334 was initialized or last refreshed. 

335 """ 

336 raise NotImplementedError() 

337 

338 @abstractmethod 

339 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

340 """Register a new `Datastore` associated with this `Registry`. 

341 

342 This method should be called by all `Datastore` classes aside from 

343 those that only forward storage to other datastores. 

344 

345 Parameters 

346 ---------- 

347 name : `str` 

348 Name of the datastore, as it should appear in `Registry` tables. 

349 ephemeral : `bool`, optional 

350 If `True` (`False` is default), return a bridge object that is 

351 backed by storage that will not last past the end of the current 

352 process. This should be used whenever the same is true of the 

353 dataset's artifacts. 

354 

355 Returns 

356 ------- 

357 bridge : `DatastoreRegistryBridge` 

358 Object that provides the interface this `Datastore` should use to 

359 communicate with the `Regitry`. 

360 """ 

361 raise NotImplementedError() 

362 

363 @abstractmethod 

364 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

365 """Retrieve datastore locations for a given dataset. 

366 

367 Parameters 

368 ---------- 

369 ref : `DatasetRef` 

370 A reference to the dataset for which to retrieve storage 

371 information. 

372 

373 Returns 

374 ------- 

375 datastores : `Iterable` [ `str` ] 

376 All the matching datastores holding this dataset. Empty if the 

377 dataset does not exist anywhere. 

378 

379 Raises 

380 ------ 

381 AmbiguousDatasetError 

382 Raised if ``ref.id`` is `None`. 

383 """ 

384 raise NotImplementedError() 

385 

386 opaque: OpaqueTableStorageManager 

387 """Registry manager object for opaque (to Registry) tables, provided 

388 to allow Datastores to store their internal information inside the 

389 Registry database. 

390 """ 

391 

392 universe: DimensionUniverse 

393 """All dimensions known to the `Registry`. 

394 """