Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 cast, 

29 ContextManager, 

30 Dict, 

31 Iterable, 

32 Iterator, 

33 Type, 

34 TYPE_CHECKING, 

35 Union, 

36) 

37 

38from ...core.utils import immutable 

39from ...core import DatasetRef 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from ...core import DimensionUniverse 

43 from ._database import Database, StaticTablesContext 

44 from ._datasets import DatasetRecordStorageManager 

45 from ._opaque import OpaqueTableStorageManager 

46 

47 

48@immutable 

49class FakeDatasetRef: 

50 """A fake `DatasetRef` that can be used internally by butler where 

51 only the dataset ID is available. 

52 

53 Should only be used when registry can not be used to create a full 

54 `DatasetRef` from the ID. A particular use case is during dataset 

55 deletion when solely the ID is available. 

56 

57 Parameters 

58 ---------- 

59 id : `int` 

60 The dataset ID. 

61 """ 

62 __slots__ = ("id",) 

63 

64 def __new__(cls, id: int) -> FakeDatasetRef: 

65 self = super().__new__(cls) 

66 self.id = id 

67 return self 

68 

69 def __str__(self) -> str: 

70 return f"dataset_id={self.id}" 

71 

72 def __repr__(self) -> str: 

73 return f"FakeDatasetRef({self.id})" 

74 

75 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

76 try: 

77 return self.id == other.id 

78 except AttributeError: 

79 return NotImplemented 

80 

81 def __hash__(self) -> int: 

82 return hash(self.id) 

83 

84 id: int 

85 """Unique integer that identifies this dataset. 

86 """ 

87 

88 @property 

89 def components(self) -> Dict[str, FakeDatasetRef]: 

90 return {} 

91 

92 @staticmethod 

93 def flatten(refs: Iterable[FakeDatasetRef], *, parents: bool = True) -> Iterator[DatasetRef]: 

94 return DatasetRef.flatten(cast(Iterable[DatasetRef], refs), parents=parents) 

95 

96 def getCheckedId(self) -> int: 

97 """Return ``self.id``. 

98 

99 This trivial method exists for compatibility with `DatasetRef`, for 

100 which checking is actually done. 

101 

102 Returns 

103 ------- 

104 id : `int` 

105 ``self.id``. 

106 """ 

107 return self.id 

108 

109 

110DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

111"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

112""" 

113 

114 

115class DatastoreRegistryBridge(ABC): 

116 """An abstract base class that defines the interface that a `Datastore` 

117 uses to communicate with a `Registry`. 

118 

119 Parameters 

120 ---------- 

121 datastoreName : `str` 

122 Name of the `Datastore` as it should appear in `Registry` tables 

123 referencing it. 

124 """ 

125 def __init__(self, datastoreName: str): 

126 self.datastoreName = datastoreName 

127 

128 @abstractmethod 

129 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

130 """Record that a datastore holds the given datasets. 

131 

132 Parameters 

133 ---------- 

134 refs : `Iterable` of `DatasetIdRef` 

135 References to the datasets. 

136 

137 Raises 

138 ------ 

139 AmbiguousDatasetError 

140 Raised if ``any(ref.id is None for ref in refs)``. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

146 """Move dataset location information to trash. 

147 

148 Parameters 

149 ---------- 

150 refs : `Iterable` of `DatasetIdRef` 

151 References to the datasets. 

152 

153 Raises 

154 ------ 

155 AmbiguousDatasetError 

156 Raised if ``any(ref.id is None for ref in refs)``. 

157 """ 

158 raise NotImplementedError() 

159 

160 @abstractmethod 

161 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

162 """Check which refs are listed for this datastore. 

163 

164 Parameters 

165 ---------- 

166 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

167 References to the datasets. 

168 

169 Returns 

170 ------- 

171 present : `Iterable` [ `DatasetIdRef` ] 

172 Datasets from ``refs`` that are recorded as being in this 

173 datastore. 

174 

175 Raises 

176 ------ 

177 AmbiguousDatasetError 

178 Raised if ``any(ref.id is None for ref in refs)``. 

179 """ 

180 raise NotImplementedError() 

181 

182 @abstractmethod 

183 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

184 """Retrieve all the dataset ref IDs that are in the trash 

185 associated for this datastore, and then remove them if the context 

186 exists without an exception being raised. 

187 

188 Returns 

189 ------- 

190 ids : `set` of `DatasetIdRef` 

191 The IDs of datasets that can be safely removed from this datastore. 

192 Can be empty. 

193 

194 Examples 

195 -------- 

196 Typical usage by a Datastore is something like:: 

197 

198 with self.bridge.emptyTrash() as iter: 

199 for ref in iter: 

200 # Remove artifacts associated with ref.id, 

201 # raise an exception if something goes wrong. 

202 

203 Notes 

204 ----- 

205 The object yielded by the context manager may be a single-pass 

206 iterator. If multiple passes are required, it should be converted to 

207 a `list` or other container. 

208 

209 Datastores should never raise (except perhaps in testing) when an 

210 artifact cannot be removed only because it is already gone - this 

211 condition is an unavoidable outcome of concurrent delete operations, 

212 and must not be considered and error for those to be safe. 

213 """ 

214 raise NotImplementedError() 

215 

216 datastoreName: str 

217 """The name of the `Datastore` as it should appear in `Registry` tables 

218 (`str`). 

219 """ 

220 

221 

222class DatastoreRegistryBridgeManager(ABC): 

223 """An abstract base class that defines the interface between `Registry` 

224 and `Datastore` when a new `Datastore` is constructed. 

225 

226 Parameters 

227 ---------- 

228 opaque : `OpaqueTableStorageManager` 

229 Manager object for opaque table storage in the `Registry`. 

230 universe : `DimensionUniverse` 

231 All dimensions know to the `Registry`. 

232 

233 Notes 

234 ----- 

235 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

236 construction, and should use it to obtain and keep any of the following: 

237 

238 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

239 present in the datastore (needed by all datastores that are not just 

240 forwarders); 

241 

242 - one or more `OpaqueTableStorage` instance if they wish to store internal 

243 records in the `Registry` database; 

244 

245 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

246 filename templates. 

247 """ 

248 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

249 self.opaque = opaque 

250 self.universe = universe 

251 

252 @classmethod 

253 @abstractmethod 

254 def initialize(cls, db: Database, context: StaticTablesContext, *, 

255 opaque: OpaqueTableStorageManager, 

256 datasets: Type[DatasetRecordStorageManager], 

257 universe: DimensionUniverse, 

258 ) -> DatastoreRegistryBridgeManager: 

259 """Construct an instance of the manager. 

260 

261 Parameters 

262 ---------- 

263 db : `Database` 

264 Interface to the underlying database engine and namespace. 

265 context : `StaticTablesContext` 

266 Context object obtained from `Database.declareStaticTables`; used 

267 to declare any tables that should always be present in a layer 

268 implemented with this manager. 

269 opaque : `OpaqueTableStorageManager` 

270 Registry manager object for opaque (to Registry) tables, provided 

271 to allow Datastores to store their internal information inside the 

272 Registry database. 

273 datasets : subclass of `DatasetRecordStorageManager` 

274 Concrete class that will be used to manage the core dataset tables 

275 in this registry; should be used only to create foreign keys to 

276 those tables. 

277 universe : `DimensionUniverse` 

278 All dimensions known to the registry. 

279 

280 Returns 

281 ------- 

282 manager : `DatastoreRegistryBridgeManager` 

283 An instance of a concrete `DatastoreRegistryBridgeManager` 

284 subclass. 

285 """ 

286 raise NotImplementedError() 

287 

288 @abstractmethod 

289 def refresh(self) -> None: 

290 """Ensure all other operations on this manager are aware of any 

291 collections that may have been registered by other clients since it 

292 was initialized or last refreshed. 

293 """ 

294 raise NotImplementedError() 

295 

296 @abstractmethod 

297 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

298 """Register a new `Datastore` associated with this `Registry`. 

299 

300 This method should be called by all `Datastore` classes aside from 

301 those that only forward storage to other datastores. 

302 

303 Parameters 

304 ---------- 

305 name : `str` 

306 Name of the datastore, as it should appear in `Registry` tables. 

307 ephemeral : `bool`, optional 

308 If `True` (`False` is default), return a bridge object that is 

309 backed by storage that will not last past the end of the current 

310 process. This should be used whenever the same is true of the 

311 dataset's artifacts. 

312 

313 Returns 

314 ------- 

315 bridge : `DatastoreRegistryBridge` 

316 Object that provides the interface this `Datastore` should use to 

317 communicate with the `Regitry`. 

318 """ 

319 raise NotImplementedError() 

320 

321 @abstractmethod 

322 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

323 """Retrieve datastore locations for a given dataset. 

324 

325 Parameters 

326 ---------- 

327 ref : `DatasetRef` 

328 A reference to the dataset for which to retrieve storage 

329 information. 

330 

331 Returns 

332 ------- 

333 datastores : `Iterable` [ `str` ] 

334 All the matching datastores holding this dataset. Empty if the 

335 dataset does not exist anywhere. 

336 

337 Raises 

338 ------ 

339 AmbiguousDatasetError 

340 Raised if ``ref.id`` is `None`. 

341 """ 

342 raise NotImplementedError() 

343 

344 opaque: OpaqueTableStorageManager 

345 """Registry manager object for opaque (to Registry) tables, provided 

346 to allow Datastores to store their internal information inside the 

347 Registry database. 

348 """ 

349 

350 universe: DimensionUniverse 

351 """All dimensions known to the `Registry`. 

352 """