Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("MonolithicDatastoreRegistryBridgeManager", "MonolithicDatastoreRegistryBridge") 

24 

25from collections import namedtuple 

26from contextlib import contextmanager 

27import copy 

28from typing import cast, Dict, Iterable, Iterator, List, Optional, Type, TYPE_CHECKING 

29 

30import sqlalchemy 

31 

32from lsst.daf.butler import DatasetRef, ddl, NamedValueSet 

33from lsst.daf.butler.registry.interfaces import ( 

34 DatasetIdRef, 

35 DatastoreRegistryBridge, 

36 DatastoreRegistryBridgeManager, 

37 FakeDatasetRef, 

38 VersionTuple, 

39) 

40from lsst.daf.butler.registry.bridge.ephemeral import EphemeralDatastoreRegistryBridge 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from lsst.daf.butler import DimensionUniverse 

44 from lsst.daf.butler.registry.interfaces import ( 

45 Database, 

46 DatasetRecordStorageManager, 

47 OpaqueTableStorageManager, 

48 StaticTablesContext, 

49 ) 

50 

51 

52_TablesTuple = namedtuple( 

53 "_TablesTuple", 

54 [ 

55 "dataset_location", 

56 "dataset_location_trash", 

57 ] 

58) 

59 

60# This has to be updated on every schema change 

61_VERSION = VersionTuple(0, 2, 0) 

62 

63 

64def _makeTableSpecs(datasets: Type[DatasetRecordStorageManager]) -> _TablesTuple: 

65 """Construct specifications for tables used by the monolithic datastore 

66 bridge classes. 

67 

68 Parameters 

69 ---------- 

70 universe : `DimensionUniverse` 

71 All dimensions known to the `Registry`. 

72 datasets : subclass of `DatasetRecordStorageManager` 

73 Manager class for datasets; used only to create foreign key fields. 

74 

75 Returns 

76 ------- 

77 specs : `_TablesTuple` 

78 A named tuple containing `ddl.TableSpec` instances. 

79 """ 

80 # We want the dataset_location and dataset_location_trash tables 

81 # to have the same definition, aside from the behavior of their link 

82 # to the dataset table: the trash table has no foreign key constraint. 

83 dataset_location_spec = ddl.TableSpec( 

84 doc=( 

85 "A table that provides information on whether a dataset is stored in " 

86 "one or more Datastores. The presence or absence of a record in this " 

87 "table itself indicates whether the dataset is present in that " 

88 "Datastore. " 

89 ), 

90 fields=NamedValueSet([ 

91 ddl.FieldSpec( 

92 name="datastore_name", 

93 dtype=sqlalchemy.String, 

94 length=256, 

95 primaryKey=True, 

96 nullable=False, 

97 doc="Name of the Datastore this entry corresponds to.", 

98 ), 

99 ]), 

100 ) 

101 dataset_location = copy.deepcopy(dataset_location_spec) 

102 datasets.addDatasetForeignKey(dataset_location, primaryKey=True) 

103 dataset_location_trash = copy.deepcopy(dataset_location_spec) 

104 datasets.addDatasetForeignKey(dataset_location_trash, primaryKey=True, constraint=False) 

105 return _TablesTuple( 

106 dataset_location=dataset_location, 

107 dataset_location_trash=dataset_location_trash, 

108 ) 

109 

110 

111class MonolithicDatastoreRegistryBridge(DatastoreRegistryBridge): 

112 """An implementation of `DatastoreRegistryBridge` that uses the same two 

113 tables for all non-ephemeral datastores. 

114 

115 Parameters 

116 ---------- 

117 datastoreName : `str` 

118 Name of the `Datastore` as it should appear in `Registry` tables 

119 referencing it. 

120 db : `Database` 

121 Object providing a database connection and generic distractions. 

122 tables : `_TablesTuple` 

123 Named tuple containing `sqlalchemy.schema.Table` instances. 

124 """ 

125 def __init__(self, datastoreName: str, *, db: Database, tables: _TablesTuple): 

126 super().__init__(datastoreName) 

127 self._db = db 

128 self._tables = tables 

129 

130 def _refsToRows(self, refs: Iterable[DatasetIdRef]) -> List[dict]: 

131 """Transform an iterable of `DatasetRef` or `FakeDatasetRef` objects to 

132 a list of dictionaries that match the schema of the tables used by this 

133 class. 

134 

135 Parameters 

136 ---------- 

137 refs : `Iterable` [ `DatasetRef` or `FakeDatasetRef` ] 

138 Datasets to transform. 

139 

140 Returns 

141 ------- 

142 rows : `list` [ `dict` ] 

143 List of dictionaries, with "datastoreName" and "dataset_id" keys. 

144 """ 

145 return [{"datastore_name": self.datastoreName, "dataset_id": ref.getCheckedId()} for ref in refs] 

146 

147 def insert(self, refs: Iterable[DatasetIdRef]) -> None: 

148 # Docstring inherited from DatastoreRegistryBridge 

149 self._db.insert(self._tables.dataset_location, *self._refsToRows(refs)) 

150 

151 def forget(self, refs: Iterable[DatasetIdRef]) -> None: 

152 # Docstring inherited from DatastoreRegistryBridge 

153 rows = self._refsToRows(self.check(refs)) 

154 self._db.delete(self._tables.dataset_location, ["datastore_name", "dataset_id"], *rows) 

155 

156 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None: 

157 # Docstring inherited from DatastoreRegistryBridge 

158 # TODO: avoid self.check() call via queries like 

159 # INSERT INTO dataset_location_trash 

160 # SELECT datastore_name, dataset_id FROM dataset_location 

161 # WHERE datastore_name=? AND dataset_id IN (?); 

162 # DELETE FROM dataset_location 

163 # WHERE datastore_name=? AND dataset_id IN (?); 

164 # ...but the Database interface doesn't support those kinds of queries 

165 # right now. 

166 rows = self._refsToRows(self.check(refs)) 

167 with self._db.transaction(): 

168 self._db.delete(self._tables.dataset_location, ["datastore_name", "dataset_id"], *rows) 

169 self._db.insert(self._tables.dataset_location_trash, *rows) 

170 

171 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]: 

172 # Docstring inherited from DatastoreRegistryBridge 

173 byId = {ref.getCheckedId(): ref for ref in refs} 

174 sql = sqlalchemy.sql.select( 

175 [self._tables.dataset_location.columns.dataset_id] 

176 ).select_from( 

177 self._tables.dataset_location 

178 ).where( 

179 sqlalchemy.sql.and_( 

180 self._tables.dataset_location.columns.datastore_name == self.datastoreName, 

181 self._tables.dataset_location.columns.dataset_id.in_(byId.keys()) 

182 ) 

183 ) 

184 for row in self._db.query(sql).fetchall(): 

185 yield byId[row["dataset_id"]] 

186 

187 @contextmanager 

188 def emptyTrash(self) -> Iterator[Iterable[DatasetIdRef]]: 

189 # Docstring inherited from DatastoreRegistryBridge 

190 sql = sqlalchemy.sql.select( 

191 [self._tables.dataset_location_trash.columns.dataset_id] 

192 ).select_from( 

193 self._tables.dataset_location_trash 

194 ).where( 

195 self._tables.dataset_location_trash.columns.datastore_name == self.datastoreName 

196 ) 

197 # Run query, transform results into a list of dicts that we can later 

198 # use to delete. 

199 rows = [{"dataset_id": row["dataset_id"], "datastore_name": self.datastoreName} 

200 for row in self._db.query(sql).fetchall()] 

201 # Start contextmanager, returning generator expression to iterate over. 

202 yield (FakeDatasetRef(row["dataset_id"]) for row in rows) 

203 # No exception raised in context manager block. Delete those rows 

204 # from the trash table. 

205 self._db.delete(self._tables.dataset_location_trash, ["dataset_id", "datastore_name"], *rows) 

206 

207 

208class MonolithicDatastoreRegistryBridgeManager(DatastoreRegistryBridgeManager): 

209 """An implementation of `DatastoreRegistryBridgeManager` that uses the same 

210 two tables for all non-ephemeral datastores. 

211 

212 Parameters 

213 ---------- 

214 db : `Database` 

215 Object providing a database connection and generic distractions. 

216 tables : `_TablesTuple` 

217 Named tuple containing `sqlalchemy.schema.Table` instances. 

218 opaque : `OpaqueTableStorageManager` 

219 Manager object for opaque table storage in the `Registry`. 

220 universe : `DimensionUniverse` 

221 All dimensions know to the `Registry`. 

222 """ 

223 def __init__(self, *, db: Database, tables: _TablesTuple, 

224 opaque: OpaqueTableStorageManager, universe: DimensionUniverse): 

225 super().__init__(opaque=opaque, universe=universe) 

226 self._db = db 

227 self._tables = tables 

228 self._ephemeral: Dict[str, EphemeralDatastoreRegistryBridge] = {} 

229 

230 @classmethod 

231 def initialize(cls, db: Database, context: StaticTablesContext, *, 

232 opaque: OpaqueTableStorageManager, 

233 datasets: Type[DatasetRecordStorageManager], 

234 universe: DimensionUniverse, 

235 ) -> DatastoreRegistryBridgeManager: 

236 # Docstring inherited from DatastoreRegistryBridge 

237 tables = context.addTableTuple(_makeTableSpecs(datasets)) 

238 return cls(db=db, tables=cast(_TablesTuple, tables), opaque=opaque, universe=universe) 

239 

240 def refresh(self) -> None: 

241 # Docstring inherited from DatastoreRegistryBridge 

242 # This implementation has no in-Python state that depends on which 

243 # datastores exist, so there's nothing to do. 

244 pass 

245 

246 def register(self, name: str, *, ephemeral: bool = False) -> DatastoreRegistryBridge: 

247 # Docstring inherited from DatastoreRegistryBridge 

248 if ephemeral: 

249 return self._ephemeral.setdefault(name, EphemeralDatastoreRegistryBridge(name)) 

250 return MonolithicDatastoreRegistryBridge(name, db=self._db, tables=self._tables) 

251 

252 def findDatastores(self, ref: DatasetRef) -> Iterable[str]: 

253 # Docstring inherited from DatastoreRegistryBridge 

254 sql = sqlalchemy.sql.select( 

255 [self._tables.dataset_location.columns.datastore_name] 

256 ).select_from( 

257 self._tables.dataset_location 

258 ).where( 

259 self._tables.dataset_location.columns.dataset_id == ref.getCheckedId() 

260 ) 

261 for row in self._db.query(sql).fetchall(): 

262 yield row[self._tables.dataset_location.columns.datastore_name] 

263 for name, bridge in self._ephemeral.items(): 

264 if ref in bridge: 

265 yield name 

266 

267 @classmethod 

268 def currentVersion(cls) -> Optional[VersionTuple]: 

269 # Docstring inherited from VersionedExtension. 

270 return _VERSION 

271 

272 def schemaDigest(self) -> Optional[str]: 

273 # Docstring inherited from VersionedExtension. 

274 return self._defaultSchemaDigest(self._tables, self._db.dialect)