Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 68%

79 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-17 02:08 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import TYPE_CHECKING, Any, ContextManager, Iterable, Optional, Set, Tuple, Type, Union 

27 

28from lsst.utils.classes import immutable 

29 

30from ...core import DatasetId, DatasetRef 

31from ._versioning import VersionedExtension 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

35 from ._database import Database, StaticTablesContext 

36 from ._datasets import DatasetRecordStorageManager 

37 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

38 

39 

40@immutable 

41class FakeDatasetRef: 

42 """A fake `DatasetRef` that can be used internally by butler where 

43 only the dataset ID is available. 

44 

45 Should only be used when registry can not be used to create a full 

46 `DatasetRef` from the ID. A particular use case is during dataset 

47 deletion when solely the ID is available. 

48 

49 Parameters 

50 ---------- 

51 id : `DatasetId` 

52 The dataset ID. 

53 """ 

54 

55 __slots__ = ("id",) 

56 

57 def __init__(self, id: DatasetId): 

58 self.id = id 

59 

60 def __str__(self) -> str: 

61 return f"dataset_id={self.id}" 

62 

63 def __repr__(self) -> str: 

64 return f"FakeDatasetRef({self.id})" 

65 

66 def __eq__(self, other: Any) -> bool: 

67 try: 

68 return self.id == other.id 

69 except AttributeError: 

70 return NotImplemented 

71 

72 def __hash__(self) -> int: 

73 return hash(self.id) 

74 

75 id: DatasetId 

76 """Unique identifier for this dataset. 

77 """ 

78 

79 def getCheckedId(self) -> DatasetId: 

80 """Return ``self.id``. 

81 

82 This trivial method exists for compatibility with `DatasetRef`, for 

83 which checking is actually done. 

84 

85 Returns 

86 ------- 

87 id : `DatasetId` 

88 ``self.id``. 

89 """ 

90 return self.id 

91 

92 @property 

93 def datasetType(self) -> DatasetType: 

94 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

95 

96 

97DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

98"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

99""" 

100 

101 

102class DatastoreRegistryBridge(ABC): 

103 """An abstract base class that defines the interface that a `Datastore` 

104 uses to communicate with a `Registry`. 

105 

106 Parameters 

107 ---------- 

108 datastoreName : `str` 

109 Name of the `Datastore` as it should appear in `Registry` tables 

110 referencing it. 

111 """ 

112 

113 def __init__(self, datastoreName: str): 

114 self.datastoreName = datastoreName 

115 

116 @abstractmethod 

117 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

118 """Record that a datastore holds the given datasets. 

119 

120 Parameters 

121 ---------- 

122 refs : `Iterable` of `DatasetIdRef` 

123 References to the datasets. 

124 

125 Raises 

126 ------ 

127 AmbiguousDatasetError 

128 Raised if ``any(ref.id is None for ref in refs)``. 

129 """ 

130 raise NotImplementedError() 

131 

132 @abstractmethod 

133 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

134 """Remove dataset location information without any attempt to put it 

135 in the trash while waiting for external deletes. 

136 

137 This should be used only to implement `Datastore.forget`, or in cases 

138 where deleting the actual datastore artifacts cannot fail. 

139 

140 Parameters 

141 ---------- 

142 refs : `Iterable` of `DatasetIdRef` 

143 References to the datasets. 

144 

145 Raises 

146 ------ 

147 AmbiguousDatasetError 

148 Raised if ``any(ref.id is None for ref in refs)``. 

149 """ 

150 raise NotImplementedError() 

151 

152 @abstractmethod 

153 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

154 """Move dataset location information to trash. 

155 

156 Parameters 

157 ---------- 

158 refs : `Iterable` of `DatasetIdRef` 

159 References to the datasets. 

160 

161 Raises 

162 ------ 

163 AmbiguousDatasetError 

164 Raised if ``any(ref.id is None for ref in refs)``. 

165 """ 

166 raise NotImplementedError() 

167 

168 @abstractmethod 

169 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

170 """Check which refs are listed for this datastore. 

171 

172 Parameters 

173 ---------- 

174 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

175 References to the datasets. 

176 

177 Returns 

178 ------- 

179 present : `Iterable` [ `DatasetIdRef` ] 

180 Datasets from ``refs`` that are recorded as being in this 

181 datastore. 

182 

183 Raises 

184 ------ 

185 AmbiguousDatasetError 

186 Raised if ``any(ref.id is None for ref in refs)``. 

187 """ 

188 raise NotImplementedError() 

189 

190 @abstractmethod 

191 def emptyTrash( 

192 self, 

193 records_table: Optional[OpaqueTableStorage] = None, 

194 record_class: Optional[Type[StoredDatastoreItemInfo]] = None, 

195 record_column: Optional[str] = None, 

196 ) -> ContextManager[ 

197 Tuple[Iterable[Tuple[DatasetIdRef, Optional[StoredDatastoreItemInfo]]], Optional[Set[str]]] 

198 ]: 

199 """Retrieve all the dataset ref IDs that are in the trash 

200 associated for this datastore, and then remove them if the context 

201 exists without an exception being raised. 

202 

203 Parameters 

204 ---------- 

205 records_table : `OpaqueTableStorage`, optional 

206 Table of records to query with the trash records. 

207 record_class : `type` of `StoredDatastoreItemInfo`, optional 

208 Class to use when reading records from ``records_table``. 

209 record_column : `str`, optional 

210 Name of the column in records_table that refers to the artifact. 

211 

212 Yields 

213 ------ 

214 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

215 The IDs of datasets that can be safely removed from this datastore 

216 and the corresponding information from the records table. 

217 Can be empty. 

218 artifacts_to_keep : `set` of `str`, optional 

219 Any external artifacts that are known to the table but which should 

220 not be deleted. If `None`, the caller should check themselves. 

221 

222 Examples 

223 -------- 

224 Typical usage by a Datastore is something like:: 

225 

226 with self.bridge.emptyTrash() as trashed: 

227 iter, to_keep = trashed 

228 for ref, info in iter: 

229 # Remove artifacts associated with id, 

230 # raise an exception if something goes wrong. 

231 

232 Notes 

233 ----- 

234 The object yielded by the context manager may be a single-pass 

235 iterator. If multiple passes are required, it should be converted to 

236 a `list` or other container. 

237 

238 Datastores should never raise (except perhaps in testing) when an 

239 artifact cannot be removed only because it is already gone - this 

240 condition is an unavoidable outcome of concurrent delete operations, 

241 and must not be considered and error for those to be safe. 

242 

243 If a table record is provided the trashed records will be deleted 

244 when the context manager completes. 

245 """ 

246 raise NotImplementedError() 

247 

248 datastoreName: str 

249 """The name of the `Datastore` as it should appear in `Registry` tables 

250 (`str`). 

251 """ 

252 

253 

254class DatastoreRegistryBridgeManager(VersionedExtension): 

255 """An abstract base class that defines the interface between `Registry` 

256 and `Datastore` when a new `Datastore` is constructed. 

257 

258 Parameters 

259 ---------- 

260 opaque : `OpaqueTableStorageManager` 

261 Manager object for opaque table storage in the `Registry`. 

262 universe : `DimensionUniverse` 

263 All dimensions know to the `Registry`. 

264 datasetIdColumnType : `type` 

265 Type for dataset ID column. 

266 

267 Notes 

268 ----- 

269 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

270 construction, and should use it to obtain and keep any of the following: 

271 

272 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

273 present in the datastore (needed by all datastores that are not just 

274 forwarders); 

275 

276 - one or more `OpaqueTableStorage` instance if they wish to store internal 

277 records in the `Registry` database; 

278 

279 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

280 filename templates. 

281 

282 """ 

283 

284 def __init__( 

285 self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse, datasetIdColumnType: type 

286 ): 

287 self.opaque = opaque 

288 self.universe = universe 

289 self.datasetIdColumnType = datasetIdColumnType 

290 

291 @classmethod 

292 @abstractmethod 

293 def initialize( 

294 cls, 

295 db: Database, 

296 context: StaticTablesContext, 

297 *, 

298 opaque: OpaqueTableStorageManager, 

299 datasets: Type[DatasetRecordStorageManager], 

300 universe: DimensionUniverse, 

301 ) -> DatastoreRegistryBridgeManager: 

302 """Construct an instance of the manager. 

303 

304 Parameters 

305 ---------- 

306 db : `Database` 

307 Interface to the underlying database engine and namespace. 

308 context : `StaticTablesContext` 

309 Context object obtained from `Database.declareStaticTables`; used 

310 to declare any tables that should always be present in a layer 

311 implemented with this manager. 

312 opaque : `OpaqueTableStorageManager` 

313 Registry manager object for opaque (to Registry) tables, provided 

314 to allow Datastores to store their internal information inside the 

315 Registry database. 

316 datasets : subclass of `DatasetRecordStorageManager` 

317 Concrete class that will be used to manage the core dataset tables 

318 in this registry; should be used only to create foreign keys to 

319 those tables. 

320 universe : `DimensionUniverse` 

321 All dimensions known to the registry. 

322 

323 Returns 

324 ------- 

325 manager : `DatastoreRegistryBridgeManager` 

326 An instance of a concrete `DatastoreRegistryBridgeManager` 

327 subclass. 

328 """ 

329 raise NotImplementedError() 

330 

331 @abstractmethod 

332 def refresh(self) -> None: 

333 """Ensure all other operations on this manager are aware of any 

334 collections that may have been registered by other clients since it 

335 was initialized or last refreshed. 

336 """ 

337 raise NotImplementedError() 

338 

339 @abstractmethod 

340 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

341 """Register a new `Datastore` associated with this `Registry`. 

342 

343 This method should be called by all `Datastore` classes aside from 

344 those that only forward storage to other datastores. 

345 

346 Parameters 

347 ---------- 

348 name : `str` 

349 Name of the datastore, as it should appear in `Registry` tables. 

350 ephemeral : `bool`, optional 

351 If `True` (`False` is default), return a bridge object that is 

352 backed by storage that will not last past the end of the current 

353 process. This should be used whenever the same is true of the 

354 dataset's artifacts. 

355 

356 Returns 

357 ------- 

358 bridge : `DatastoreRegistryBridge` 

359 Object that provides the interface this `Datastore` should use to 

360 communicate with the `Registry`. 

361 """ 

362 raise NotImplementedError() 

363 

364 @abstractmethod 

365 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

366 """Retrieve datastore locations for a given dataset. 

367 

368 Parameters 

369 ---------- 

370 ref : `DatasetIdRef` 

371 A reference to the dataset for which to retrieve storage 

372 information. 

373 

374 Returns 

375 ------- 

376 datastores : `Iterable` [ `str` ] 

377 All the matching datastores holding this dataset. Empty if the 

378 dataset does not exist anywhere. 

379 

380 Raises 

381 ------ 

382 AmbiguousDatasetError 

383 Raised if ``ref.id`` is `None`. 

384 """ 

385 raise NotImplementedError() 

386 

387 opaque: OpaqueTableStorageManager 

388 """Registry manager object for opaque (to Registry) tables, provided 

389 to allow Datastores to store their internal information inside the 

390 Registry database. 

391 """ 

392 

393 universe: DimensionUniverse 

394 """All dimensions known to the `Registry`. 

395 """