Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 65%

80 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-11 02:06 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import TYPE_CHECKING, Any, ContextManager, Iterable, Optional, Set, Tuple, Type, Union 

27 

28from lsst.utils.classes import immutable 

29 

30from ...core import DatasetId, DatasetRef 

31from ._versioning import VersionedExtension 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo 

35 from ...core.datastore import DatastoreTransaction 

36 from ._database import Database, StaticTablesContext 

37 from ._datasets import DatasetRecordStorageManager 

38 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager 

39 

40 

41@immutable 

42class FakeDatasetRef: 

43 """A fake `DatasetRef` that can be used internally by butler where 

44 only the dataset ID is available. 

45 

46 Should only be used when registry can not be used to create a full 

47 `DatasetRef` from the ID. A particular use case is during dataset 

48 deletion when solely the ID is available. 

49 

50 Parameters 

51 ---------- 

52 id : `DatasetId` 

53 The dataset ID. 

54 """ 

55 

56 __slots__ = ("id",) 

57 

58 def __init__(self, id: DatasetId): 

59 self.id = id 

60 

61 def __str__(self) -> str: 

62 return f"dataset_id={self.id}" 

63 

64 def __repr__(self) -> str: 

65 return f"FakeDatasetRef({self.id})" 

66 

67 def __eq__(self, other: Any) -> bool: 

68 try: 

69 return self.id == other.id 

70 except AttributeError: 

71 return NotImplemented 

72 

73 def __hash__(self) -> int: 

74 return hash(self.id) 

75 

76 id: DatasetId 

77 """Unique identifier for this dataset. 

78 """ 

79 

80 def getCheckedId(self) -> DatasetId: 

81 """Return ``self.id``. 

82 

83 This trivial method exists for compatibility with `DatasetRef`, for 

84 which checking is actually done. 

85 

86 Returns 

87 ------- 

88 id : `DatasetId` 

89 ``self.id``. 

90 """ 

91 return self.id 

92 

93 @property 

94 def datasetType(self) -> DatasetType: 

95 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

96 

97 

98DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

99"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

100""" 

101 

102 

103class DatastoreRegistryBridge(ABC): 

104 """An abstract base class that defines the interface that a `Datastore` 

105 uses to communicate with a `Registry`. 

106 

107 Parameters 

108 ---------- 

109 datastoreName : `str` 

110 Name of the `Datastore` as it should appear in `Registry` tables 

111 referencing it. 

112 """ 

113 

114 def __init__(self, datastoreName: str): 

115 self.datastoreName = datastoreName 

116 

117 @abstractmethod 

118 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

119 """Record that a datastore holds the given datasets. 

120 

121 Parameters 

122 ---------- 

123 refs : `Iterable` of `DatasetIdRef` 

124 References to the datasets. 

125 

126 Raises 

127 ------ 

128 AmbiguousDatasetError 

129 Raised if ``any(ref.id is None for ref in refs)``. 

130 """ 

131 raise NotImplementedError() 

132 

133 @abstractmethod 

134 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

135 """Remove dataset location information without any attempt to put it 

136 in the trash while waiting for external deletes. 

137 

138 This should be used only to implement `Datastore.forget`, or in cases 

139 where deleting the actual datastore artifacts cannot fail. 

140 

141 Parameters 

142 ---------- 

143 refs : `Iterable` of `DatasetIdRef` 

144 References to the datasets. 

145 

146 Raises 

147 ------ 

148 AmbiguousDatasetError 

149 Raised if ``any(ref.id is None for ref in refs)``. 

150 """ 

151 raise NotImplementedError() 

152 

153 @abstractmethod 

154 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: Optional[DatastoreTransaction]) -> None: 

155 """Move dataset location information to trash. 

156 

157 Parameters 

158 ---------- 

159 refs : `Iterable` of `DatasetIdRef` 

160 References to the datasets. 

161 transaction : `DatastoreTransaction` or `None` 

162 Transaction object. Can be `None` in some bridges or if no rollback 

163 is required. 

164 

165 Raises 

166 ------ 

167 AmbiguousDatasetError 

168 Raised if ``any(ref.id is None for ref in refs)``. 

169 """ 

170 raise NotImplementedError() 

171 

172 @abstractmethod 

173 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

174 """Check which refs are listed for this datastore. 

175 

176 Parameters 

177 ---------- 

178 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

179 References to the datasets. 

180 

181 Returns 

182 ------- 

183 present : `Iterable` [ `DatasetIdRef` ] 

184 Datasets from ``refs`` that are recorded as being in this 

185 datastore. 

186 

187 Raises 

188 ------ 

189 AmbiguousDatasetError 

190 Raised if ``any(ref.id is None for ref in refs)``. 

191 """ 

192 raise NotImplementedError() 

193 

194 @abstractmethod 

195 def emptyTrash( 

196 self, 

197 records_table: Optional[OpaqueTableStorage] = None, 

198 record_class: Optional[Type[StoredDatastoreItemInfo]] = None, 

199 record_column: Optional[str] = None, 

200 ) -> ContextManager[ 

201 Tuple[Iterable[Tuple[DatasetIdRef, Optional[StoredDatastoreItemInfo]]], Optional[Set[str]]] 

202 ]: 

203 """Retrieve all the dataset ref IDs that are in the trash 

204 associated for this datastore, and then remove them if the context 

205 exists without an exception being raised. 

206 

207 Parameters 

208 ---------- 

209 records_table : `OpaqueTableStorage`, optional 

210 Table of records to query with the trash records. 

211 record_class : `type` of `StoredDatastoreItemInfo`, optional 

212 Class to use when reading records from ``records_table``. 

213 record_column : `str`, optional 

214 Name of the column in records_table that refers to the artifact. 

215 

216 Yields 

217 ------ 

218 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`) 

219 The IDs of datasets that can be safely removed from this datastore 

220 and the corresponding information from the records table. 

221 Can be empty. 

222 artifacts_to_keep : `set` of `str`, optional 

223 Any external artifacts that are known to the table but which should 

224 not be deleted. If `None`, the caller should check themselves. 

225 

226 Examples 

227 -------- 

228 Typical usage by a Datastore is something like:: 

229 

230 with self.bridge.emptyTrash() as trashed: 

231 iter, to_keep = trashed 

232 for ref, info in iter: 

233 # Remove artifacts associated with id, 

234 # raise an exception if something goes wrong. 

235 

236 Notes 

237 ----- 

238 The object yielded by the context manager may be a single-pass 

239 iterator. If multiple passes are required, it should be converted to 

240 a `list` or other container. 

241 

242 Datastores should never raise (except perhaps in testing) when an 

243 artifact cannot be removed only because it is already gone - this 

244 condition is an unavoidable outcome of concurrent delete operations, 

245 and must not be considered and error for those to be safe. 

246 

247 If a table record is provided the trashed records will be deleted 

248 when the context manager completes. 

249 """ 

250 raise NotImplementedError() 

251 

252 datastoreName: str 

253 """The name of the `Datastore` as it should appear in `Registry` tables 

254 (`str`). 

255 """ 

256 

257 

258class DatastoreRegistryBridgeManager(VersionedExtension): 

259 """An abstract base class that defines the interface between `Registry` 

260 and `Datastore` when a new `Datastore` is constructed. 

261 

262 Parameters 

263 ---------- 

264 opaque : `OpaqueTableStorageManager` 

265 Manager object for opaque table storage in the `Registry`. 

266 universe : `DimensionUniverse` 

267 All dimensions know to the `Registry`. 

268 datasetIdColumnType : `type` 

269 Type for dataset ID column. 

270 

271 Notes 

272 ----- 

273 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

274 construction, and should use it to obtain and keep any of the following: 

275 

276 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

277 present in the datastore (needed by all datastores that are not just 

278 forwarders); 

279 

280 - one or more `OpaqueTableStorage` instance if they wish to store internal 

281 records in the `Registry` database; 

282 

283 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

284 filename templates. 

285 

286 """ 

287 

288 def __init__( 

289 self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse, datasetIdColumnType: type 

290 ): 

291 self.opaque = opaque 

292 self.universe = universe 

293 self.datasetIdColumnType = datasetIdColumnType 

294 

295 @classmethod 

296 @abstractmethod 

297 def initialize( 

298 cls, 

299 db: Database, 

300 context: StaticTablesContext, 

301 *, 

302 opaque: OpaqueTableStorageManager, 

303 datasets: Type[DatasetRecordStorageManager], 

304 universe: DimensionUniverse, 

305 ) -> DatastoreRegistryBridgeManager: 

306 """Construct an instance of the manager. 

307 

308 Parameters 

309 ---------- 

310 db : `Database` 

311 Interface to the underlying database engine and namespace. 

312 context : `StaticTablesContext` 

313 Context object obtained from `Database.declareStaticTables`; used 

314 to declare any tables that should always be present in a layer 

315 implemented with this manager. 

316 opaque : `OpaqueTableStorageManager` 

317 Registry manager object for opaque (to Registry) tables, provided 

318 to allow Datastores to store their internal information inside the 

319 Registry database. 

320 datasets : subclass of `DatasetRecordStorageManager` 

321 Concrete class that will be used to manage the core dataset tables 

322 in this registry; should be used only to create foreign keys to 

323 those tables. 

324 universe : `DimensionUniverse` 

325 All dimensions known to the registry. 

326 

327 Returns 

328 ------- 

329 manager : `DatastoreRegistryBridgeManager` 

330 An instance of a concrete `DatastoreRegistryBridgeManager` 

331 subclass. 

332 """ 

333 raise NotImplementedError() 

334 

335 @abstractmethod 

336 def refresh(self) -> None: 

337 """Ensure all other operations on this manager are aware of any 

338 collections that may have been registered by other clients since it 

339 was initialized or last refreshed. 

340 """ 

341 raise NotImplementedError() 

342 

343 @abstractmethod 

344 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

345 """Register a new `Datastore` associated with this `Registry`. 

346 

347 This method should be called by all `Datastore` classes aside from 

348 those that only forward storage to other datastores. 

349 

350 Parameters 

351 ---------- 

352 name : `str` 

353 Name of the datastore, as it should appear in `Registry` tables. 

354 ephemeral : `bool`, optional 

355 If `True` (`False` is default), return a bridge object that is 

356 backed by storage that will not last past the end of the current 

357 process. This should be used whenever the same is true of the 

358 dataset's artifacts. 

359 

360 Returns 

361 ------- 

362 bridge : `DatastoreRegistryBridge` 

363 Object that provides the interface this `Datastore` should use to 

364 communicate with the `Registry`. 

365 """ 

366 raise NotImplementedError() 

367 

368 @abstractmethod 

369 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]: 

370 """Retrieve datastore locations for a given dataset. 

371 

372 Parameters 

373 ---------- 

374 ref : `DatasetIdRef` 

375 A reference to the dataset for which to retrieve storage 

376 information. 

377 

378 Returns 

379 ------- 

380 datastores : `Iterable` [ `str` ] 

381 All the matching datastores holding this dataset. Empty if the 

382 dataset does not exist anywhere. 

383 

384 Raises 

385 ------ 

386 AmbiguousDatasetError 

387 Raised if ``ref.id`` is `None`. 

388 """ 

389 raise NotImplementedError() 

390 

391 opaque: OpaqueTableStorageManager 

392 """Registry manager object for opaque (to Registry) tables, provided 

393 to allow Datastores to store their internal information inside the 

394 Registry database. 

395 """ 

396 

397 universe: DimensionUniverse 

398 """All dimensions known to the `Registry`. 

399 """