Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Iterable, 

30 Type, 

31 TYPE_CHECKING, 

32 Union, 

33) 

34 

35from ...core.utils import immutable 

36from ...core import DatasetId, DatasetRef 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ...core import DatasetType, DimensionUniverse 

41 from ._database import Database, StaticTablesContext 

42 from ._datasets import DatasetRecordStorageManager 

43 from ._opaque import OpaqueTableStorageManager 

44 

45 

46@immutable 

47class FakeDatasetRef: 

48 """A fake `DatasetRef` that can be used internally by butler where 

49 only the dataset ID is available. 

50 

51 Should only be used when registry can not be used to create a full 

52 `DatasetRef` from the ID. A particular use case is during dataset 

53 deletion when solely the ID is available. 

54 

55 Parameters 

56 ---------- 

57 id : `DatasetId` 

58 The dataset ID. 

59 """ 

60 __slots__ = ("id",) 

61 

62 def __init__(self, id: DatasetId): 

63 self.id = id 

64 

65 def __str__(self) -> str: 

66 return f"dataset_id={self.id}" 

67 

68 def __repr__(self) -> str: 

69 return f"FakeDatasetRef({self.id})" 

70 

71 def __eq__(self, other: Any) -> bool: 

72 try: 

73 return self.id == other.id 

74 except AttributeError: 

75 return NotImplemented 

76 

77 def __hash__(self) -> int: 

78 return hash(self.id) 

79 

80 id: DatasetId 

81 """Unique identifier for this dataset. 

82 """ 

83 

84 def getCheckedId(self) -> DatasetId: 

85 """Return ``self.id``. 

86 

87 This trivial method exists for compatibility with `DatasetRef`, for 

88 which checking is actually done. 

89 

90 Returns 

91 ------- 

92 id : `DatasetId` 

93 ``self.id``. 

94 """ 

95 return self.id 

96 

97 @property 

98 def datasetType(self) -> DatasetType: 

99 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

100 

101 

102DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

103"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

104""" 

105 

106 

107class DatastoreRegistryBridge(ABC): 

108 """An abstract base class that defines the interface that a `Datastore` 

109 uses to communicate with a `Registry`. 

110 

111 Parameters 

112 ---------- 

113 datastoreName : `str` 

114 Name of the `Datastore` as it should appear in `Registry` tables 

115 referencing it. 

116 """ 

117 def __init__(self, datastoreName: str): 

118 self.datastoreName = datastoreName 

119 

120 @abstractmethod 

121 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

122 """Record that a datastore holds the given datasets. 

123 

124 Parameters 

125 ---------- 

126 refs : `Iterable` of `DatasetIdRef` 

127 References to the datasets. 

128 

129 Raises 

130 ------ 

131 AmbiguousDatasetError 

132 Raised if ``any(ref.id is None for ref in refs)``. 

133 """ 

134 raise NotImplementedError() 

135 

136 @abstractmethod 

137 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

138 """Remove dataset location information without any attempt to put it 

139 in the trash while waiting for external deletes. 

140 

141 This should be used only to implement `Datastore.forget`, or in cases 

142 where deleting the actual datastore artifacts cannot fail. 

143 

144 Parameters 

145 ---------- 

146 refs : `Iterable` of `DatasetIdRef` 

147 References to the datasets. 

148 

149 Raises 

150 ------ 

151 AmbiguousDatasetError 

152 Raised if ``any(ref.id is None for ref in refs)``. 

153 """ 

154 raise NotImplementedError() 

155 

156 @abstractmethod 

157 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

158 """Move dataset location information to trash. 

159 

160 Parameters 

161 ---------- 

162 refs : `Iterable` of `DatasetIdRef` 

163 References to the datasets. 

164 

165 Raises 

166 ------ 

167 AmbiguousDatasetError 

168 Raised if ``any(ref.id is None for ref in refs)``. 

169 """ 

170 raise NotImplementedError() 

171 

172 @abstractmethod 

173 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

174 """Check which refs are listed for this datastore. 

175 

176 Parameters 

177 ---------- 

178 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

179 References to the datasets. 

180 

181 Returns 

182 ------- 

183 present : `Iterable` [ `DatasetIdRef` ] 

184 Datasets from ``refs`` that are recorded as being in this 

185 datastore. 

186 

187 Raises 

188 ------ 

189 AmbiguousDatasetError 

190 Raised if ``any(ref.id is None for ref in refs)``. 

191 """ 

192 raise NotImplementedError() 

193 

194 @abstractmethod 

195 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

196 """Retrieve all the dataset ref IDs that are in the trash 

197 associated for this datastore, and then remove them if the context 

198 exists without an exception being raised. 

199 

200 Returns 

201 ------- 

202 ids : `set` of `DatasetIdRef` 

203 The IDs of datasets that can be safely removed from this datastore. 

204 Can be empty. 

205 

206 Examples 

207 -------- 

208 Typical usage by a Datastore is something like:: 

209 

210 with self.bridge.emptyTrash() as iter: 

211 for ref in iter: 

212 # Remove artifacts associated with ref.id, 

213 # raise an exception if something goes wrong. 

214 

215 Notes 

216 ----- 

217 The object yielded by the context manager may be a single-pass 

218 iterator. If multiple passes are required, it should be converted to 

219 a `list` or other container. 

220 

221 Datastores should never raise (except perhaps in testing) when an 

222 artifact cannot be removed only because it is already gone - this 

223 condition is an unavoidable outcome of concurrent delete operations, 

224 and must not be considered and error for those to be safe. 

225 """ 

226 raise NotImplementedError() 

227 

228 datastoreName: str 

229 """The name of the `Datastore` as it should appear in `Registry` tables 

230 (`str`). 

231 """ 

232 

233 

234class DatastoreRegistryBridgeManager(VersionedExtension): 

235 """An abstract base class that defines the interface between `Registry` 

236 and `Datastore` when a new `Datastore` is constructed. 

237 

238 Parameters 

239 ---------- 

240 opaque : `OpaqueTableStorageManager` 

241 Manager object for opaque table storage in the `Registry`. 

242 universe : `DimensionUniverse` 

243 All dimensions know to the `Registry`. 

244 datasetIdColumnType : `type` 

245 Type for dataset ID column. 

246 

247 Notes 

248 ----- 

249 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

250 construction, and should use it to obtain and keep any of the following: 

251 

252 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

253 present in the datastore (needed by all datastores that are not just 

254 forwarders); 

255 

256 - one or more `OpaqueTableStorage` instance if they wish to store internal 

257 records in the `Registry` database; 

258 

259 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

260 filename templates. 

261 

262 """ 

263 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse, 

264 datasetIdColumnType: type): 

265 self.opaque = opaque 

266 self.universe = universe 

267 self.datasetIdColumnType = datasetIdColumnType 

268 

269 @classmethod 

270 @abstractmethod 

271 def initialize(cls, db: Database, context: StaticTablesContext, *, 

272 opaque: OpaqueTableStorageManager, 

273 datasets: Type[DatasetRecordStorageManager], 

274 universe: DimensionUniverse, 

275 ) -> DatastoreRegistryBridgeManager: 

276 """Construct an instance of the manager. 

277 

278 Parameters 

279 ---------- 

280 db : `Database` 

281 Interface to the underlying database engine and namespace. 

282 context : `StaticTablesContext` 

283 Context object obtained from `Database.declareStaticTables`; used 

284 to declare any tables that should always be present in a layer 

285 implemented with this manager. 

286 opaque : `OpaqueTableStorageManager` 

287 Registry manager object for opaque (to Registry) tables, provided 

288 to allow Datastores to store their internal information inside the 

289 Registry database. 

290 datasets : subclass of `DatasetRecordStorageManager` 

291 Concrete class that will be used to manage the core dataset tables 

292 in this registry; should be used only to create foreign keys to 

293 those tables. 

294 universe : `DimensionUniverse` 

295 All dimensions known to the registry. 

296 

297 Returns 

298 ------- 

299 manager : `DatastoreRegistryBridgeManager` 

300 An instance of a concrete `DatastoreRegistryBridgeManager` 

301 subclass. 

302 """ 

303 raise NotImplementedError() 

304 

305 @abstractmethod 

306 def refresh(self) -> None: 

307 """Ensure all other operations on this manager are aware of any 

308 collections that may have been registered by other clients since it 

309 was initialized or last refreshed. 

310 """ 

311 raise NotImplementedError() 

312 

313 @abstractmethod 

314 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

315 """Register a new `Datastore` associated with this `Registry`. 

316 

317 This method should be called by all `Datastore` classes aside from 

318 those that only forward storage to other datastores. 

319 

320 Parameters 

321 ---------- 

322 name : `str` 

323 Name of the datastore, as it should appear in `Registry` tables. 

324 ephemeral : `bool`, optional 

325 If `True` (`False` is default), return a bridge object that is 

326 backed by storage that will not last past the end of the current 

327 process. This should be used whenever the same is true of the 

328 dataset's artifacts. 

329 

330 Returns 

331 ------- 

332 bridge : `DatastoreRegistryBridge` 

333 Object that provides the interface this `Datastore` should use to 

334 communicate with the `Regitry`. 

335 """ 

336 raise NotImplementedError() 

337 

338 @abstractmethod 

339 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

340 """Retrieve datastore locations for a given dataset. 

341 

342 Parameters 

343 ---------- 

344 ref : `DatasetRef` 

345 A reference to the dataset for which to retrieve storage 

346 information. 

347 

348 Returns 

349 ------- 

350 datastores : `Iterable` [ `str` ] 

351 All the matching datastores holding this dataset. Empty if the 

352 dataset does not exist anywhere. 

353 

354 Raises 

355 ------ 

356 AmbiguousDatasetError 

357 Raised if ``ref.id`` is `None`. 

358 """ 

359 raise NotImplementedError() 

360 

361 opaque: OpaqueTableStorageManager 

362 """Registry manager object for opaque (to Registry) tables, provided 

363 to allow Datastores to store their internal information inside the 

364 Registry database. 

365 """ 

366 

367 universe: DimensionUniverse 

368 """All dimensions known to the `Registry`. 

369 """