Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Iterable, 

30 Type, 

31 TYPE_CHECKING, 

32 Union, 

33) 

34 

35from ...core.utils import immutable 

36from ...core import DatasetRef 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ...core import DatasetType, DimensionUniverse 

41 from ._database import Database, StaticTablesContext 

42 from ._datasets import DatasetRecordStorageManager 

43 from ._opaque import OpaqueTableStorageManager 

44 

45 

46@immutable 

47class FakeDatasetRef: 

48 """A fake `DatasetRef` that can be used internally by butler where 

49 only the dataset ID is available. 

50 

51 Should only be used when registry can not be used to create a full 

52 `DatasetRef` from the ID. A particular use case is during dataset 

53 deletion when solely the ID is available. 

54 

55 Parameters 

56 ---------- 

57 id : `int` 

58 The dataset ID. 

59 """ 

60 __slots__ = ("id",) 

61 

62 def __new__(cls, id: int) -> FakeDatasetRef: 

63 self = super().__new__(cls) 

64 self.id = id 

65 return self 

66 

67 def __str__(self) -> str: 

68 return f"dataset_id={self.id}" 

69 

70 def __repr__(self) -> str: 

71 return f"FakeDatasetRef({self.id})" 

72 

73 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

74 try: 

75 return self.id == other.id 

76 except AttributeError: 

77 return NotImplemented 

78 

79 def __hash__(self) -> int: 

80 return hash(self.id) 

81 

82 id: int 

83 """Unique integer that identifies this dataset. 

84 """ 

85 

86 def getCheckedId(self) -> int: 

87 """Return ``self.id``. 

88 

89 This trivial method exists for compatibility with `DatasetRef`, for 

90 which checking is actually done. 

91 

92 Returns 

93 ------- 

94 id : `int` 

95 ``self.id``. 

96 """ 

97 return self.id 

98 

99 @property 

100 def datasetType(self) -> DatasetType: 

101 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

102 

103 

104DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

105"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

106""" 

107 

108 

109class DatastoreRegistryBridge(ABC): 

110 """An abstract base class that defines the interface that a `Datastore` 

111 uses to communicate with a `Registry`. 

112 

113 Parameters 

114 ---------- 

115 datastoreName : `str` 

116 Name of the `Datastore` as it should appear in `Registry` tables 

117 referencing it. 

118 """ 

119 def __init__(self, datastoreName: str): 

120 self.datastoreName = datastoreName 

121 

122 @abstractmethod 

123 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

124 """Record that a datastore holds the given datasets. 

125 

126 Parameters 

127 ---------- 

128 refs : `Iterable` of `DatasetIdRef` 

129 References to the datasets. 

130 

131 Raises 

132 ------ 

133 AmbiguousDatasetError 

134 Raised if ``any(ref.id is None for ref in refs)``. 

135 """ 

136 raise NotImplementedError() 

137 

138 @abstractmethod 

139 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

140 """Move dataset location information to trash. 

141 

142 Parameters 

143 ---------- 

144 refs : `Iterable` of `DatasetIdRef` 

145 References to the datasets. 

146 

147 Raises 

148 ------ 

149 AmbiguousDatasetError 

150 Raised if ``any(ref.id is None for ref in refs)``. 

151 """ 

152 raise NotImplementedError() 

153 

154 @abstractmethod 

155 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

156 """Check which refs are listed for this datastore. 

157 

158 Parameters 

159 ---------- 

160 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

161 References to the datasets. 

162 

163 Returns 

164 ------- 

165 present : `Iterable` [ `DatasetIdRef` ] 

166 Datasets from ``refs`` that are recorded as being in this 

167 datastore. 

168 

169 Raises 

170 ------ 

171 AmbiguousDatasetError 

172 Raised if ``any(ref.id is None for ref in refs)``. 

173 """ 

174 raise NotImplementedError() 

175 

176 @abstractmethod 

177 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

178 """Retrieve all the dataset ref IDs that are in the trash 

179 associated for this datastore, and then remove them if the context 

180 exists without an exception being raised. 

181 

182 Returns 

183 ------- 

184 ids : `set` of `DatasetIdRef` 

185 The IDs of datasets that can be safely removed from this datastore. 

186 Can be empty. 

187 

188 Examples 

189 -------- 

190 Typical usage by a Datastore is something like:: 

191 

192 with self.bridge.emptyTrash() as iter: 

193 for ref in iter: 

194 # Remove artifacts associated with ref.id, 

195 # raise an exception if something goes wrong. 

196 

197 Notes 

198 ----- 

199 The object yielded by the context manager may be a single-pass 

200 iterator. If multiple passes are required, it should be converted to 

201 a `list` or other container. 

202 

203 Datastores should never raise (except perhaps in testing) when an 

204 artifact cannot be removed only because it is already gone - this 

205 condition is an unavoidable outcome of concurrent delete operations, 

206 and must not be considered and error for those to be safe. 

207 """ 

208 raise NotImplementedError() 

209 

210 datastoreName: str 

211 """The name of the `Datastore` as it should appear in `Registry` tables 

212 (`str`). 

213 """ 

214 

215 

216class DatastoreRegistryBridgeManager(VersionedExtension): 

217 """An abstract base class that defines the interface between `Registry` 

218 and `Datastore` when a new `Datastore` is constructed. 

219 

220 Parameters 

221 ---------- 

222 opaque : `OpaqueTableStorageManager` 

223 Manager object for opaque table storage in the `Registry`. 

224 universe : `DimensionUniverse` 

225 All dimensions know to the `Registry`. 

226 

227 Notes 

228 ----- 

229 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

230 construction, and should use it to obtain and keep any of the following: 

231 

232 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

233 present in the datastore (needed by all datastores that are not just 

234 forwarders); 

235 

236 - one or more `OpaqueTableStorage` instance if they wish to store internal 

237 records in the `Registry` database; 

238 

239 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

240 filename templates. 

241 

242 """ 

243 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

244 self.opaque = opaque 

245 self.universe = universe 

246 

247 @classmethod 

248 @abstractmethod 

249 def initialize(cls, db: Database, context: StaticTablesContext, *, 

250 opaque: OpaqueTableStorageManager, 

251 datasets: Type[DatasetRecordStorageManager], 

252 universe: DimensionUniverse, 

253 ) -> DatastoreRegistryBridgeManager: 

254 """Construct an instance of the manager. 

255 

256 Parameters 

257 ---------- 

258 db : `Database` 

259 Interface to the underlying database engine and namespace. 

260 context : `StaticTablesContext` 

261 Context object obtained from `Database.declareStaticTables`; used 

262 to declare any tables that should always be present in a layer 

263 implemented with this manager. 

264 opaque : `OpaqueTableStorageManager` 

265 Registry manager object for opaque (to Registry) tables, provided 

266 to allow Datastores to store their internal information inside the 

267 Registry database. 

268 datasets : subclass of `DatasetRecordStorageManager` 

269 Concrete class that will be used to manage the core dataset tables 

270 in this registry; should be used only to create foreign keys to 

271 those tables. 

272 universe : `DimensionUniverse` 

273 All dimensions known to the registry. 

274 

275 Returns 

276 ------- 

277 manager : `DatastoreRegistryBridgeManager` 

278 An instance of a concrete `DatastoreRegistryBridgeManager` 

279 subclass. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def refresh(self) -> None: 

285 """Ensure all other operations on this manager are aware of any 

286 collections that may have been registered by other clients since it 

287 was initialized or last refreshed. 

288 """ 

289 raise NotImplementedError() 

290 

291 @abstractmethod 

292 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

293 """Register a new `Datastore` associated with this `Registry`. 

294 

295 This method should be called by all `Datastore` classes aside from 

296 those that only forward storage to other datastores. 

297 

298 Parameters 

299 ---------- 

300 name : `str` 

301 Name of the datastore, as it should appear in `Registry` tables. 

302 ephemeral : `bool`, optional 

303 If `True` (`False` is default), return a bridge object that is 

304 backed by storage that will not last past the end of the current 

305 process. This should be used whenever the same is true of the 

306 dataset's artifacts. 

307 

308 Returns 

309 ------- 

310 bridge : `DatastoreRegistryBridge` 

311 Object that provides the interface this `Datastore` should use to 

312 communicate with the `Regitry`. 

313 """ 

314 raise NotImplementedError() 

315 

316 @abstractmethod 

317 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

318 """Retrieve datastore locations for a given dataset. 

319 

320 Parameters 

321 ---------- 

322 ref : `DatasetRef` 

323 A reference to the dataset for which to retrieve storage 

324 information. 

325 

326 Returns 

327 ------- 

328 datastores : `Iterable` [ `str` ] 

329 All the matching datastores holding this dataset. Empty if the 

330 dataset does not exist anywhere. 

331 

332 Raises 

333 ------ 

334 AmbiguousDatasetError 

335 Raised if ``ref.id`` is `None`. 

336 """ 

337 raise NotImplementedError() 

338 

339 opaque: OpaqueTableStorageManager 

340 """Registry manager object for opaque (to Registry) tables, provided 

341 to allow Datastores to store their internal information inside the 

342 Registry database. 

343 """ 

344 

345 universe: DimensionUniverse 

346 """All dimensions known to the `Registry`. 

347 """