Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef") 

24 

25from abc import ABC, abstractmethod 

26from typing import ( 

27 Any, 

28 ContextManager, 

29 Dict, 

30 Iterable, 

31 Iterator, 

32 Type, 

33 TYPE_CHECKING, 

34 Union, 

35) 

36 

37from ...core.utils import immutable 

38from ...core import DatasetRef 

39 

40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 from ...core import DatasetType, DimensionUniverse 

42 from ._database import Database, StaticTablesContext 

43 from ._datasets import DatasetRecordStorageManager 

44 from ._opaque import OpaqueTableStorageManager 

45 

46 

47@immutable 

48class FakeDatasetRef: 

49 """A fake `DatasetRef` that can be used internally by butler where 

50 only the dataset ID is available. 

51 

52 Should only be used when registry can not be used to create a full 

53 `DatasetRef` from the ID. A particular use case is during dataset 

54 deletion when solely the ID is available. 

55 

56 Parameters 

57 ---------- 

58 id : `int` 

59 The dataset ID. 

60 """ 

61 __slots__ = ("id",) 

62 

63 def __new__(cls, id: int) -> FakeDatasetRef: 

64 self = super().__new__(cls) 

65 self.id = id 

66 return self 

67 

68 def __str__(self) -> str: 

69 return f"dataset_id={self.id}" 

70 

71 def __repr__(self) -> str: 

72 return f"FakeDatasetRef({self.id})" 

73 

74 def __eq__(self, other: Any) -> Union[bool, NotImplemented]: 

75 try: 

76 return self.id == other.id 

77 except AttributeError: 

78 return NotImplemented 

79 

80 def __hash__(self) -> int: 

81 return hash(self.id) 

82 

83 id: int 

84 """Unique integer that identifies this dataset. 

85 """ 

86 

87 @property 

88 def components(self) -> Dict[str, FakeDatasetRef]: 

89 return {} 

90 

91 def allRefs(self, parents: bool = True) -> Iterator[FakeDatasetRef]: 

92 """Return all the nested component `DatasetRef` and optionally the 

93 parent. 

94 

95 Parameters 

96 ---------- 

97 parents : `bool`, optional 

98 If `True` (default) include the given datasets in the output 

99 iterable. If `False`, include only their components. Since 

100 a `FakeDatasetRef` never have components, setting this to 

101 `False` will yield no results. 

102 

103 Yields 

104 ------ 

105 ref : `FakeDatasetRef` 

106 Since there are never components, this will either return 

107 itself or no results (depending on the value of ``parents``). 

108 """ 

109 # No components 

110 yield self 

111 

112 @staticmethod 

113 def flatten(refs: Iterable[FakeDatasetRef], *, 

114 parents: bool = True) -> Iterator[FakeDatasetRef]: 

115 """Recursively transform an iterable over `FakeDatasetRef` to include 

116 nested component `FakeDatasetRef` instances. 

117 

118 Parameters 

119 ---------- 

120 refs : `~collections.abc.Iterable` [ `FakeDatasetRef` ] 

121 Input iterable to process. 

122 parents : `bool`, optional 

123 If `True` (default) include the given datasets in the output 

124 iterable. If `False`, include only their components, which 

125 for `FakeDatasetRef` means no results. 

126 

127 Yields 

128 ------ 

129 ref : `DatasetRef` 

130 Either one of the given `FakeDatasetRef` instances (only if 

131 ``parent`` is `True`) or one of its (recursive) children. 

132 

133 Notes 

134 ----- 

135 If ``parents`` is `True`, components are guaranteed to be yielded 

136 before their parents. 

137 """ 

138 for ref in refs: 

139 for subref in ref.allRefs(parents): 

140 yield subref 

141 

142 def getCheckedId(self) -> int: 

143 """Return ``self.id``. 

144 

145 This trivial method exists for compatibility with `DatasetRef`, for 

146 which checking is actually done. 

147 

148 Returns 

149 ------- 

150 id : `int` 

151 ``self.id``. 

152 """ 

153 return self.id 

154 

155 @property 

156 def datasetType(self) -> DatasetType: 

157 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") 

158 

159 

160DatasetIdRef = Union[DatasetRef, FakeDatasetRef] 

161"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`. 

162""" 

163 

164 

165class DatastoreRegistryBridge(ABC): 

166 """An abstract base class that defines the interface that a `Datastore` 

167 uses to communicate with a `Registry`. 

168 

169 Parameters 

170 ---------- 

171 datastoreName : `str` 

172 Name of the `Datastore` as it should appear in `Registry` tables 

173 referencing it. 

174 """ 

175 def __init__(self, datastoreName: str): 

176 self.datastoreName = datastoreName 

177 

178 @abstractmethod 

179 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

180 """Record that a datastore holds the given datasets. 

181 

182 Parameters 

183 ---------- 

184 refs : `Iterable` of `DatasetIdRef` 

185 References to the datasets. 

186 

187 Raises 

188 ------ 

189 AmbiguousDatasetError 

190 Raised if ``any(ref.id is None for ref in refs)``. 

191 """ 

192 raise NotImplementedError() 

193 

194 @abstractmethod 

195 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

196 """Move dataset location information to trash. 

197 

198 Parameters 

199 ---------- 

200 refs : `Iterable` of `DatasetIdRef` 

201 References to the datasets. 

202 

203 Raises 

204 ------ 

205 AmbiguousDatasetError 

206 Raised if ``any(ref.id is None for ref in refs)``. 

207 """ 

208 raise NotImplementedError() 

209 

210 @abstractmethod 

211 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

212 """Check which refs are listed for this datastore. 

213 

214 Parameters 

215 ---------- 

216 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

217 References to the datasets. 

218 

219 Returns 

220 ------- 

221 present : `Iterable` [ `DatasetIdRef` ] 

222 Datasets from ``refs`` that are recorded as being in this 

223 datastore. 

224 

225 Raises 

226 ------ 

227 AmbiguousDatasetError 

228 Raised if ``any(ref.id is None for ref in refs)``. 

229 """ 

230 raise NotImplementedError() 

231 

232 @abstractmethod 

233 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]: 

234 """Retrieve all the dataset ref IDs that are in the trash 

235 associated for this datastore, and then remove them if the context 

236 exists without an exception being raised. 

237 

238 Returns 

239 ------- 

240 ids : `set` of `DatasetIdRef` 

241 The IDs of datasets that can be safely removed from this datastore. 

242 Can be empty. 

243 

244 Examples 

245 -------- 

246 Typical usage by a Datastore is something like:: 

247 

248 with self.bridge.emptyTrash() as iter: 

249 for ref in iter: 

250 # Remove artifacts associated with ref.id, 

251 # raise an exception if something goes wrong. 

252 

253 Notes 

254 ----- 

255 The object yielded by the context manager may be a single-pass 

256 iterator. If multiple passes are required, it should be converted to 

257 a `list` or other container. 

258 

259 Datastores should never raise (except perhaps in testing) when an 

260 artifact cannot be removed only because it is already gone - this 

261 condition is an unavoidable outcome of concurrent delete operations, 

262 and must not be considered and error for those to be safe. 

263 """ 

264 raise NotImplementedError() 

265 

266 datastoreName: str 

267 """The name of the `Datastore` as it should appear in `Registry` tables 

268 (`str`). 

269 """ 

270 

271 

272class DatastoreRegistryBridgeManager(ABC): 

273 """An abstract base class that defines the interface between `Registry` 

274 and `Datastore` when a new `Datastore` is constructed. 

275 

276 Parameters 

277 ---------- 

278 opaque : `OpaqueTableStorageManager` 

279 Manager object for opaque table storage in the `Registry`. 

280 universe : `DimensionUniverse` 

281 All dimensions know to the `Registry`. 

282 

283 Notes 

284 ----- 

285 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at 

286 construction, and should use it to obtain and keep any of the following: 

287 

288 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is 

289 present in the datastore (needed by all datastores that are not just 

290 forwarders); 

291 

292 - one or more `OpaqueTableStorage` instance if they wish to store internal 

293 records in the `Registry` database; 

294 

295 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate 

296 filename templates. 

297 """ 

298 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

299 self.opaque = opaque 

300 self.universe = universe 

301 

302 @classmethod 

303 @abstractmethod 

304 def initialize(cls, db: Database, context: StaticTablesContext, *, 

305 opaque: OpaqueTableStorageManager, 

306 datasets: Type[DatasetRecordStorageManager], 

307 universe: DimensionUniverse, 

308 ) -> DatastoreRegistryBridgeManager: 

309 """Construct an instance of the manager. 

310 

311 Parameters 

312 ---------- 

313 db : `Database` 

314 Interface to the underlying database engine and namespace. 

315 context : `StaticTablesContext` 

316 Context object obtained from `Database.declareStaticTables`; used 

317 to declare any tables that should always be present in a layer 

318 implemented with this manager. 

319 opaque : `OpaqueTableStorageManager` 

320 Registry manager object for opaque (to Registry) tables, provided 

321 to allow Datastores to store their internal information inside the 

322 Registry database. 

323 datasets : subclass of `DatasetRecordStorageManager` 

324 Concrete class that will be used to manage the core dataset tables 

325 in this registry; should be used only to create foreign keys to 

326 those tables. 

327 universe : `DimensionUniverse` 

328 All dimensions known to the registry. 

329 

330 Returns 

331 ------- 

332 manager : `DatastoreRegistryBridgeManager` 

333 An instance of a concrete `DatastoreRegistryBridgeManager` 

334 subclass. 

335 """ 

336 raise NotImplementedError() 

337 

338 @abstractmethod 

339 def refresh(self) -> None: 

340 """Ensure all other operations on this manager are aware of any 

341 collections that may have been registered by other clients since it 

342 was initialized or last refreshed. 

343 """ 

344 raise NotImplementedError() 

345 

346 @abstractmethod 

347 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge: 

348 """Register a new `Datastore` associated with this `Registry`. 

349 

350 This method should be called by all `Datastore` classes aside from 

351 those that only forward storage to other datastores. 

352 

353 Parameters 

354 ---------- 

355 name : `str` 

356 Name of the datastore, as it should appear in `Registry` tables. 

357 ephemeral : `bool`, optional 

358 If `True` (`False` is default), return a bridge object that is 

359 backed by storage that will not last past the end of the current 

360 process. This should be used whenever the same is true of the 

361 dataset's artifacts. 

362 

363 Returns 

364 ------- 

365 bridge : `DatastoreRegistryBridge` 

366 Object that provides the interface this `Datastore` should use to 

367 communicate with the `Regitry`. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

373 """Retrieve datastore locations for a given dataset. 

374 

375 Parameters 

376 ---------- 

377 ref : `DatasetRef` 

378 A reference to the dataset for which to retrieve storage 

379 information. 

380 

381 Returns 

382 ------- 

383 datastores : `Iterable` [ `str` ] 

384 All the matching datastores holding this dataset. Empty if the 

385 dataset does not exist anywhere. 

386 

387 Raises 

388 ------ 

389 AmbiguousDatasetError 

390 Raised if ``ref.id`` is `None`. 

391 """ 

392 raise NotImplementedError() 

393 

394 opaque: OpaqueTableStorageManager 

395 """Registry manager object for opaque (to Registry) tables, provided 

396 to allow Datastores to store their internal information inside the 

397 Registry database. 

398 """ 

399 

400 universe: DimensionUniverse 

401 """All dimensions known to the `Registry`. 

402 """