Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Iterable, 

30 Type, 

31 TYPE_CHECKING, 

32 Union, 

33) 

34 

35from ...core.utils import immutable 

36from ...core import DatasetRef 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ...core import DatasetType, DimensionUniverse 

41 from ._database import Database, StaticTablesContext 

42 from ._datasets import DatasetRecordStorageManager 

43 from ._opaque import OpaqueTableStorageManager 

44 

45 

46@immutable 

47class FakeDatasetRef: 

48 """A fake `DatasetRef` that can be used internally by butler where 

49 only the dataset ID is available. 

50 

51 Should only be used when registry can not be used to create a full 

52 `DatasetRef` from the ID. A particular use case is during dataset 

53 deletion when solely the ID is available. 

54 

55 Parameters 

56 ---------- 

57 id : `int` 

58 The dataset ID. 

59 """ 

60 __slots__ = ("id",) 

61 

62 def __init__(self, id: int): 

63 self.id = id 

64 

65 def __str__(self) -> str: 

66 return f"dataset_id={self.id}" 

67 

68 def __repr__(self) -> str: 

69 return f"FakeDatasetRef({self.id})" 

70 

71 def __eq__(self, other: Any) -> bool: 

72 try: 

73 return self.id == other.id 

74 except AttributeError: 

75 return NotImplemented 

76 

77 def __hash__(self) -> int: 

78 return hash(self.id) 

79 

80 id: int 

81 """Unique integer that identifies this dataset. 

82 """ 

83 

84 def getCheckedId(self) -> int: 

85 """Return ``self.id``. 

86 

87 This trivial method exists for compatibility with `DatasetRef`, for 

88 which checking is actually done. 

89 

90 Returns 

91 ------- 

92 id : `int` 

93 ``self.id``. 

94 """ 

95 return self.id 

96 

97 @property 

98 def datasetType(self) -> DatasetType: 

99 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

100 

101 

102DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

103"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

104""" 

105 

106 

107class DatastoreRegistryBridge(ABC): 

108 """An abstract base class that defines the interface that a `Datastore` 

109 uses to communicate with a `Registry`. 

110 

111 Parameters 

112 ---------- 

113 datastoreName : `str` 

114 Name of the `Datastore` as it should appear in `Registry` tables 

115 referencing it. 

116 """ 

117 def __init__(self, datastoreName: str): 

118 self.datastoreName = datastoreName 

119 

120 @abstractmethod 

121 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

122 """Record that a datastore holds the given datasets. 

123 

124 Parameters 

125 ---------- 

126 refs : `Iterable` of `DatasetIdRef` 

127 References to the datasets. 

128 

129 Raises 

130 ------ 

131 AmbiguousDatasetError 

132 Raised if ``any(ref.id is None for ref in refs)``. 

133 """ 

134 raise NotImplementedError() 

135 

136 @abstractmethod 

137 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

138 """Move dataset location information to trash. 

139 

140 Parameters 

141 ---------- 

142 refs : `Iterable` of `DatasetIdRef` 

143 References to the datasets. 

144 

145 Raises 

146 ------ 

147 AmbiguousDatasetError 

148 Raised if ``any(ref.id is None for ref in refs)``. 

149 """ 

150 raise NotImplementedError() 

151 

152 @abstractmethod 

153 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

154 """Check which refs are listed for this datastore. 

155 

156 Parameters 

157 ---------- 

158 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

159 References to the datasets. 

160 

161 Returns 

162 ------- 

163 present : `Iterable` [ `DatasetIdRef` ] 

164 Datasets from ``refs`` that are recorded as being in this 

165 datastore. 

166 

167 Raises 

168 ------ 

169 AmbiguousDatasetError 

170 Raised if ``any(ref.id is None for ref in refs)``. 

171 """ 

172 raise NotImplementedError() 

173 

174 @abstractmethod 

175 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

176 """Retrieve all the dataset ref IDs that are in the trash 

177 associated for this datastore, and then remove them if the context 

178 exists without an exception being raised. 

179 

180 Returns 

181 ------- 

182 ids : `set` of `DatasetIdRef` 

183 The IDs of datasets that can be safely removed from this datastore. 

184 Can be empty. 

185 

186 Examples 

187 -------- 

188 Typical usage by a Datastore is something like:: 

189 

190 with self.bridge.emptyTrash() as iter: 

191 for ref in iter: 

192 # Remove artifacts associated with ref.id, 

193 # raise an exception if something goes wrong. 

194 

195 Notes 

196 ----- 

197 The object yielded by the context manager may be a single-pass 

198 iterator. If multiple passes are required, it should be converted to 

199 a `list` or other container. 

200 

201 Datastores should never raise (except perhaps in testing) when an 

202 artifact cannot be removed only because it is already gone - this 

203 condition is an unavoidable outcome of concurrent delete operations, 

204 and must not be considered and error for those to be safe. 

205 """ 

206 raise NotImplementedError() 

207 

208 datastoreName: str 

209 """The name of the `Datastore` as it should appear in `Registry` tables 

210 (`str`). 

211 """ 

212 

213 

214class DatastoreRegistryBridgeManager(VersionedExtension): 

215 """An abstract base class that defines the interface between `Registry` 

216 and `Datastore` when a new `Datastore` is constructed. 

217 

218 Parameters 

219 ---------- 

220 opaque : `OpaqueTableStorageManager` 

221 Manager object for opaque table storage in the `Registry`. 

222 universe : `DimensionUniverse` 

223 All dimensions know to the `Registry`. 

224 

225 Notes 

226 ----- 

227 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

228 construction, and should use it to obtain and keep any of the following: 

229 

230 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

231 present in the datastore (needed by all datastores that are not just 

232 forwarders); 

233 

234 - one or more `OpaqueTableStorage` instance if they wish to store internal 

235 records in the `Registry` database; 

236 

237 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

238 filename templates. 

239 

240 """ 

241 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

242 self.opaque = opaque 

243 self.universe = universe 

244 

245 @classmethod 

246 @abstractmethod 

247 def initialize(cls, db: Database, context: StaticTablesContext, *, 

248 opaque: OpaqueTableStorageManager, 

249 datasets: Type[DatasetRecordStorageManager], 

250 universe: DimensionUniverse, 

251 ) -> DatastoreRegistryBridgeManager: 

252 """Construct an instance of the manager. 

253 

254 Parameters 

255 ---------- 

256 db : `Database` 

257 Interface to the underlying database engine and namespace. 

258 context : `StaticTablesContext` 

259 Context object obtained from `Database.declareStaticTables`; used 

260 to declare any tables that should always be present in a layer 

261 implemented with this manager. 

262 opaque : `OpaqueTableStorageManager` 

263 Registry manager object for opaque (to Registry) tables, provided 

264 to allow Datastores to store their internal information inside the 

265 Registry database. 

266 datasets : subclass of `DatasetRecordStorageManager` 

267 Concrete class that will be used to manage the core dataset tables 

268 in this registry; should be used only to create foreign keys to 

269 those tables. 

270 universe : `DimensionUniverse` 

271 All dimensions known to the registry. 

272 

273 Returns 

274 ------- 

275 manager : `DatastoreRegistryBridgeManager` 

276 An instance of a concrete `DatastoreRegistryBridgeManager` 

277 subclass. 

278 """ 

279 raise NotImplementedError() 

280 

281 @abstractmethod 

282 def refresh(self) -> None: 

283 """Ensure all other operations on this manager are aware of any 

284 collections that may have been registered by other clients since it 

285 was initialized or last refreshed. 

286 """ 

287 raise NotImplementedError() 

288 

289 @abstractmethod 

290 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

291 """Register a new `Datastore` associated with this `Registry`. 

292 

293 This method should be called by all `Datastore` classes aside from 

294 those that only forward storage to other datastores. 

295 

296 Parameters 

297 ---------- 

298 name : `str` 

299 Name of the datastore, as it should appear in `Registry` tables. 

300 ephemeral : `bool`, optional 

301 If `True` (`False` is default), return a bridge object that is 

302 backed by storage that will not last past the end of the current 

303 process. This should be used whenever the same is true of the 

304 dataset's artifacts. 

305 

306 Returns 

307 ------- 

308 bridge : `DatastoreRegistryBridge` 

309 Object that provides the interface this `Datastore` should use to 

310 communicate with the `Regitry`. 

311 """ 

312 raise NotImplementedError() 

313 

314 @abstractmethod 

315 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

316 """Retrieve datastore locations for a given dataset. 

317 

318 Parameters 

319 ---------- 

320 ref : `DatasetRef` 

321 A reference to the dataset for which to retrieve storage 

322 information. 

323 

324 Returns 

325 ------- 

326 datastores : `Iterable` [ `str` ] 

327 All the matching datastores holding this dataset. Empty if the 

328 dataset does not exist anywhere. 

329 

330 Raises 

331 ------ 

332 AmbiguousDatasetError 

333 Raised if ``ref.id`` is `None`. 

334 """ 

335 raise NotImplementedError() 

336 

337 opaque: OpaqueTableStorageManager 

338 """Registry manager object for opaque (to Registry) tables, provided 

339 to allow Datastores to store their internal information inside the 

340 Registry database. 

341 """ 

342 

343 universe: DimensionUniverse 

344 """All dimensions known to the `Registry`. 

345 """