Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import annotations 

2 

3__all__ = ("ByDimensionsDatasetRecordStorageManager",) 

4 

5from typing import ( 

6 Dict, 

7 Iterable, 

8 Iterator, 

9 Optional, 

10 Tuple, 

11 TYPE_CHECKING, 

12) 

13 

14import sqlalchemy 

15 

16from lsst.daf.butler import ( 

17 DatasetRef, 

18 DatasetType, 

19 ddl, 

20 DimensionGraph, 

21 DimensionUniverse, 

22) 

23from lsst.daf.butler.registry import ConflictingDefinitionError 

24from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager 

25 

26from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec 

27from ._storage import ByDimensionsDatasetRecordStorage 

28 

29if TYPE_CHECKING: 29 ↛ 30line 29 didn't jump to line 30, because the condition on line 29 was never true

30 from lsst.daf.butler.registry.interfaces import ( 

31 CollectionManager, 

32 Database, 

33 StaticTablesContext, 

34 ) 

35 from .tables import StaticDatasetTablesTuple 

36 

37 

38class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager): 

39 """A manager class for datasets that uses one dataset-collection table for 

40 each group of dataset types that share the same dimensions. 

41 

42 In addition to the table organization, this class makes a number of 

43 other design choices that would have been cumbersome (to say the least) to 

44 try to pack into its name: 

45 

46 - It uses a private surrogate integer autoincrement field to identify 

47 dataset types, instead of using the name as the primary and foreign key 

48 directly. 

49 

50 - It aggressively loads all DatasetTypes into memory instead of fetching 

51 them from the database only when needed or attempting more clever forms 

52 of caching. 

53 

54 Alternative implementations that make different choices for these while 

55 keeping the same general table organization might be reasonable as well. 

56 

57 Parameters 

58 ---------- 

59 db : `Database` 

60 Interface to the underlying database engine and namespace. 

61 collections : `CollectionManager` 

62 Manager object for the collections in this `Registry`. 

63 static : `StaticDatasetTablesTuple` 

64 Named tuple of `sqlalchemy.schema.Table` instances for all static 

65 tables used by this class. 

66 """ 

67 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple): 

68 self._db = db 

69 self._collections = collections 

70 self._static = static 

71 self._byName = {} 

72 self._byId = {} 

73 

74 @classmethod 

75 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

76 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

77 # Docstring inherited from DatasetRecordStorageManager. 

78 specs = makeStaticTableSpecs(type(collections), universe=universe) 

79 static = context.addTableTuple(specs) 

80 return cls(db=db, collections=collections, static=static) 

81 

82 @classmethod 

83 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset", 

84 constraint: bool = True, onDelete: Optional[str] = None, 

85 **kwargs) -> ddl.FieldSpec: 

86 # Docstring inherited from DatasetRecordStorageManager. 

87 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs) 

88 

89 def refresh(self, *, universe: DimensionUniverse): 

90 # Docstring inherited from DatasetRecordStorageManager. 

91 byName = {} 

92 byId = {} 

93 c = self._static.dataset_type.columns 

94 for row in self._db.query(self._static.dataset_type.select()).fetchall(): 

95 name = row[c.name] 

96 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe) 

97 datasetType = DatasetType(name, dimensions, row[c.storage_class]) 

98 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType), 

99 makeDynamicTableSpec(datasetType, type(self._collections))) 

100 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

101 static=self._static, dynamic=dynamic, 

102 dataset_type_id=row["id"], 

103 collections=self._collections) 

104 byName[datasetType.name] = storage 

105 byId[storage._dataset_type_id] = storage 

106 self._byName = byName 

107 self._byId = byId 

108 

109 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

110 # Docstring inherited from DatasetRecordStorageManager. 

111 return self._byName.get(name) 

112 

113 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

114 # Docstring inherited from DatasetRecordStorageManager. 

115 storage = self._byName.get(datasetType.name) 

116 if storage is None: 

117 row, inserted = self._db.sync( 

118 self._static.dataset_type, 

119 keys={"name": datasetType.name}, 

120 compared={ 

121 "dimensions_encoded": datasetType.dimensions.encode(), 

122 "storage_class": datasetType.storageClass.name, 

123 }, 

124 returning=["id"], 

125 ) 

126 dynamic = self._db.ensureTableExists( 

127 makeDynamicTableName(datasetType), 

128 makeDynamicTableSpec(datasetType, type(self._collections)), 

129 ) 

130 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

131 static=self._static, dynamic=dynamic, 

132 dataset_type_id=row["id"], 

133 collections=self._collections) 

134 self._byName[datasetType.name] = storage 

135 self._byId[storage._dataset_type_id] = storage 

136 else: 

137 if datasetType != storage.datasetType: 

138 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " 

139 f"with database definition {storage.datasetType}.") 

140 inserted = False 

141 if inserted and datasetType.isComposite: 

142 for component in datasetType.storageClass.components: 

143 self.register(datasetType.makeComponentDatasetType(component)) 

144 return storage, inserted 

145 

146 def __iter__(self) -> Iterator[DatasetType]: 

147 for storage in self._byName.values(): 

148 yield storage.datasetType 

149 

150 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

151 # Docstring inherited from DatasetRecordStorageManager. 

152 sql = sqlalchemy.sql.select( 

153 [ 

154 self._static.dataset.columns.dataset_type_id, 

155 self._static.dataset.columns[self._collections.getRunForeignKeyName()], 

156 ] 

157 ).select_from( 

158 self._static.dataset 

159 ).where( 

160 self._static.dataset.columns.id == id 

161 ) 

162 row = self._db.query(sql).fetchone() 

163 if row is None: 

164 return None 

165 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

166 if recordsForType is None: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true

167 self.refresh() 

168 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

169 assert recordsForType is not None, "Should be guaranteed by foreign key constraints." 

170 return DatasetRef( 

171 recordsForType.datasetType, 

172 dataId=recordsForType.getDataId(id=id), 

173 id=id, 

174 run=self._collections[row[self._collections.getRunForeignKeyName()]].name 

175 ) 

176 

177 def attachComponents(self, composites: Iterable[Tuple[DatasetRef, Dict[str, DatasetRef]]] 

178 ) -> Iterator[DatasetRef]: 

179 # Docstring inherited from DatasetRecordStorageManager. 

180 rows = [] 

181 results = [] 

182 for parentRef, components in composites: 

183 rows.extend( 

184 { 

185 "component_name": componentName, 

186 "component_dataset_id": componentRef.getCheckedId(), 

187 "parent_dataset_id": parentRef.getCheckedId(), 

188 "simple": (parentRef.dataId == componentRef.dataId and parentRef.run == componentRef.run) 

189 } 

190 for componentName, componentRef in components.items() 

191 ) 

192 results.append(parentRef.resolved(parentRef.id, parentRef.run, components=components)) 

193 self._db.insert(self._static.dataset_composition, *rows) 

194 yield from results 

195 

196 def fetchComponents(self, ref: DatasetRef) -> DatasetRef: 

197 # Docstring inherited from DatasetRecordStorageManager. 

198 sql = sqlalchemy.sql.select( 

199 [ 

200 self._static.dataset_composition.columns.component_dataset_id.label("id"), 

201 self._static.dataset_composition.columns.component_name.label("name"), 

202 self._static.dataset_composition.columns.simple.label("simple"), 

203 ] 

204 ).select_from( 

205 self._static.dataset_composition 

206 ).where( 

207 self._static.dataset_composition.columns.parent_dataset_id == ref.getCheckedId() 

208 ) 

209 components = {} 

210 for row in self._db.query(sql).fetchall(): 

211 if row["simple"]: 211 ↛ 215line 211 didn't jump to line 215, because the condition on line 211 was never false

212 datasetType = ref.datasetType.makeComponentDatasetType(row["name"]) 

213 components[row["name"]] = DatasetRef(datasetType, ref.dataId, id=row["id"], run=ref.run) 

214 else: 

215 components[row["name"]] = self.getDatasetRef(row["id"]) 

216 return ref.resolved(id=ref.id, run=ref.run, components=components)