Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Iterable, 

30 Type, 

31 TYPE_CHECKING, 

32 Union, 

33) 

34 

35from ...core.utils import immutable 

36from ...core import DatasetRef 

37 

38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true

39 from ...core import DatasetType, DimensionUniverse 

40 from ._database import Database, StaticTablesContext 

41 from ._datasets import DatasetRecordStorageManager 

42 from ._opaque import OpaqueTableStorageManager 

43 

44 

45@immutable 

46class FakeDatasetRef: 

47 """A fake `DatasetRef` that can be used internally by butler where 

48 only the dataset ID is available. 

49 

50 Should only be used when registry can not be used to create a full 

51 `DatasetRef` from the ID. A particular use case is during dataset 

52 deletion when solely the ID is available. 

53 

54 Parameters 

55 ---------- 

56 id : `int` 

57 The dataset ID. 

58 """ 

59 __slots__ = ("id",) 

60 

61 def __new__(cls, id: int) -> FakeDatasetRef: 

62 self = super().__new__(cls) 

63 self.id = id 

64 return self 

65 

66 def __str__(self) -> str: 

67 return f"dataset_id={self.id}" 

68 

69 def __repr__(self) -> str: 

70 return f"FakeDatasetRef({self.id})" 

71 

72 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

73 try: 

74 return self.id == other.id 

75 except AttributeError: 

76 return NotImplemented 

77 

78 def __hash__(self) -> int: 

79 return hash(self.id) 

80 

81 id: int 

82 """Unique integer that identifies this dataset. 

83 """ 

84 

85 def getCheckedId(self) -> int: 

86 """Return ``self.id``. 

87 

88 This trivial method exists for compatibility with `DatasetRef`, for 

89 which checking is actually done. 

90 

91 Returns 

92 ------- 

93 id : `int` 

94 ``self.id``. 

95 """ 

96 return self.id 

97 

98 @property 

99 def datasetType(self) -> DatasetType: 

100 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

101 

102 

103DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

104"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

105""" 

106 

107 

108class DatastoreRegistryBridge(ABC): 

109 """An abstract base class that defines the interface that a `Datastore` 

110 uses to communicate with a `Registry`. 

111 

112 Parameters 

113 ---------- 

114 datastoreName : `str` 

115 Name of the `Datastore` as it should appear in `Registry` tables 

116 referencing it. 

117 """ 

118 def __init__(self, datastoreName: str): 

119 self.datastoreName = datastoreName 

120 

121 @abstractmethod 

122 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

123 """Record that a datastore holds the given datasets. 

124 

125 Parameters 

126 ---------- 

127 refs : `Iterable` of `DatasetIdRef` 

128 References to the datasets. 

129 

130 Raises 

131 ------ 

132 AmbiguousDatasetError 

133 Raised if ``any(ref.id is None for ref in refs)``. 

134 """ 

135 raise NotImplementedError() 

136 

137 @abstractmethod 

138 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

139 """Move dataset location information to trash. 

140 

141 Parameters 

142 ---------- 

143 refs : `Iterable` of `DatasetIdRef` 

144 References to the datasets. 

145 

146 Raises 

147 ------ 

148 AmbiguousDatasetError 

149 Raised if ``any(ref.id is None for ref in refs)``. 

150 """ 

151 raise NotImplementedError() 

152 

153 @abstractmethod 

154 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

155 """Check which refs are listed for this datastore. 

156 

157 Parameters 

158 ---------- 

159 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

160 References to the datasets. 

161 

162 Returns 

163 ------- 

164 present : `Iterable` [ `DatasetIdRef` ] 

165 Datasets from ``refs`` that are recorded as being in this 

166 datastore. 

167 

168 Raises 

169 ------ 

170 AmbiguousDatasetError 

171 Raised if ``any(ref.id is None for ref in refs)``. 

172 """ 

173 raise NotImplementedError() 

174 

175 @abstractmethod 

176 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

177 """Retrieve all the dataset ref IDs that are in the trash 

178 associated for this datastore, and then remove them if the context 

179 exists without an exception being raised. 

180 

181 Returns 

182 ------- 

183 ids : `set` of `DatasetIdRef` 

184 The IDs of datasets that can be safely removed from this datastore. 

185 Can be empty. 

186 

187 Examples 

188 -------- 

189 Typical usage by a Datastore is something like:: 

190 

191 with self.bridge.emptyTrash() as iter: 

192 for ref in iter: 

193 # Remove artifacts associated with ref.id, 

194 # raise an exception if something goes wrong. 

195 

196 Notes 

197 ----- 

198 The object yielded by the context manager may be a single-pass 

199 iterator. If multiple passes are required, it should be converted to 

200 a `list` or other container. 

201 

202 Datastores should never raise (except perhaps in testing) when an 

203 artifact cannot be removed only because it is already gone - this 

204 condition is an unavoidable outcome of concurrent delete operations, 

205 and must not be considered and error for those to be safe. 

206 """ 

207 raise NotImplementedError() 

208 

209 datastoreName: str 

210 """The name of the `Datastore` as it should appear in `Registry` tables 

211 (`str`). 

212 """ 

213 

214 

215class DatastoreRegistryBridgeManager(ABC): 

216 """An abstract base class that defines the interface between `Registry` 

217 and `Datastore` when a new `Datastore` is constructed. 

218 

219 Parameters 

220 ---------- 

221 opaque : `OpaqueTableStorageManager` 

222 Manager object for opaque table storage in the `Registry`. 

223 universe : `DimensionUniverse` 

224 All dimensions know to the `Registry`. 

225 

226 Notes 

227 ----- 

228 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

229 construction, and should use it to obtain and keep any of the following: 

230 

231 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

232 present in the datastore (needed by all datastores that are not just 

233 forwarders); 

234 

235 - one or more `OpaqueTableStorage` instance if they wish to store internal 

236 records in the `Registry` database; 

237 

238 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

239 filename templates. 

240 

241 """ 

242 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

243 self.opaque = opaque 

244 self.universe = universe 

245 

246 @classmethod 

247 @abstractmethod 

248 def initialize(cls, db: Database, context: StaticTablesContext, *, 

249 opaque: OpaqueTableStorageManager, 

250 datasets: Type[DatasetRecordStorageManager], 

251 universe: DimensionUniverse, 

252 ) -> DatastoreRegistryBridgeManager: 

253 """Construct an instance of the manager. 

254 

255 Parameters 

256 ---------- 

257 db : `Database` 

258 Interface to the underlying database engine and namespace. 

259 context : `StaticTablesContext` 

260 Context object obtained from `Database.declareStaticTables`; used 

261 to declare any tables that should always be present in a layer 

262 implemented with this manager. 

263 opaque : `OpaqueTableStorageManager` 

264 Registry manager object for opaque (to Registry) tables, provided 

265 to allow Datastores to store their internal information inside the 

266 Registry database. 

267 datasets : subclass of `DatasetRecordStorageManager` 

268 Concrete class that will be used to manage the core dataset tables 

269 in this registry; should be used only to create foreign keys to 

270 those tables. 

271 universe : `DimensionUniverse` 

272 All dimensions known to the registry. 

273 

274 Returns 

275 ------- 

276 manager : `DatastoreRegistryBridgeManager` 

277 An instance of a concrete `DatastoreRegistryBridgeManager` 

278 subclass. 

279 """ 

280 raise NotImplementedError() 

281 

282 @abstractmethod 

283 def refresh(self) -> None: 

284 """Ensure all other operations on this manager are aware of any 

285 collections that may have been registered by other clients since it 

286 was initialized or last refreshed. 

287 """ 

288 raise NotImplementedError() 

289 

290 @abstractmethod 

291 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

292 """Register a new `Datastore` associated with this `Registry`. 

293 

294 This method should be called by all `Datastore` classes aside from 

295 those that only forward storage to other datastores. 

296 

297 Parameters 

298 ---------- 

299 name : `str` 

300 Name of the datastore, as it should appear in `Registry` tables. 

301 ephemeral : `bool`, optional 

302 If `True` (`False` is default), return a bridge object that is 

303 backed by storage that will not last past the end of the current 

304 process. This should be used whenever the same is true of the 

305 dataset's artifacts. 

306 

307 Returns 

308 ------- 

309 bridge : `DatastoreRegistryBridge` 

310 Object that provides the interface this `Datastore` should use to 

311 communicate with the `Regitry`. 

312 """ 

313 raise NotImplementedError() 

314 

315 @abstractmethod 

316 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

317 """Retrieve datastore locations for a given dataset. 

318 

319 Parameters 

320 ---------- 

321 ref : `DatasetRef` 

322 A reference to the dataset for which to retrieve storage 

323 information. 

324 

325 Returns 

326 ------- 

327 datastores : `Iterable` [ `str` ] 

328 All the matching datastores holding this dataset. Empty if the 

329 dataset does not exist anywhere. 

330 

331 Raises 

332 ------ 

333 AmbiguousDatasetError 

334 Raised if ``ref.id`` is `None`. 

335 """ 

336 raise NotImplementedError() 

337 

338 opaque: OpaqueTableStorageManager 

339 """Registry manager object for opaque (to Registry) tables, provided 

340 to allow Datastores to store their internal information inside the 

341 Registry database. 

342 """ 

343 

344 universe: DimensionUniverse 

345 """All dimensions known to the `Registry`. 

346 """