Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import annotations 

2 

3__all__ = ("ByDimensionsDatasetRecordStorageManager",) 

4 

5from typing import ( 

6 Any, 

7 Dict, 

8 Iterator, 

9 Optional, 

10 Tuple, 

11 TYPE_CHECKING, 

12) 

13 

14import copy 

15import sqlalchemy 

16 

17from lsst.daf.butler import ( 

18 DatasetRef, 

19 DatasetType, 

20 ddl, 

21 DimensionGraph, 

22 DimensionUniverse, 

23) 

24from lsst.daf.butler.registry import ConflictingDefinitionError 

25from lsst.daf.butler.registry.interfaces import ( 

26 DatasetRecordStorage, 

27 DatasetRecordStorageManager, 

28 VersionTuple 

29) 

30 

31from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec 

32from ._storage import ByDimensionsDatasetRecordStorage 

33 

34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 from lsst.daf.butler.registry.interfaces import ( 

36 CollectionManager, 

37 Database, 

38 StaticTablesContext, 

39 ) 

40 from .tables import StaticDatasetTablesTuple 

41 

42 

43# This has to be updated on every schema change 

44_VERSION = VersionTuple(0, 1, 0) 

45 

46 

47class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager): 

48 """A manager class for datasets that uses one dataset-collection table for 

49 each group of dataset types that share the same dimensions. 

50 

51 In addition to the table organization, this class makes a number of 

52 other design choices that would have been cumbersome (to say the least) to 

53 try to pack into its name: 

54 

55 - It uses a private surrogate integer autoincrement field to identify 

56 dataset types, instead of using the name as the primary and foreign key 

57 directly. 

58 

59 - It aggressively loads all DatasetTypes into memory instead of fetching 

60 them from the database only when needed or attempting more clever forms 

61 of caching. 

62 

63 Alternative implementations that make different choices for these while 

64 keeping the same general table organization might be reasonable as well. 

65 

66 Parameters 

67 ---------- 

68 db : `Database` 

69 Interface to the underlying database engine and namespace. 

70 collections : `CollectionManager` 

71 Manager object for the collections in this `Registry`. 

72 static : `StaticDatasetTablesTuple` 

73 Named tuple of `sqlalchemy.schema.Table` instances for all static 

74 tables used by this class. 

75 """ 

76 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple): 

77 self._db = db 

78 self._collections = collections 

79 self._static = static 

80 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {} 

81 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {} 

82 

83 @classmethod 

84 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

85 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

86 # Docstring inherited from DatasetRecordStorageManager. 

87 specs = makeStaticTableSpecs(type(collections), universe=universe) 

88 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore 

89 return cls(db=db, collections=collections, static=static) 

90 

91 @classmethod 

92 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset", 

93 constraint: bool = True, onDelete: Optional[str] = None, 

94 **kwargs: Any) -> ddl.FieldSpec: 

95 # Docstring inherited from DatasetRecordStorageManager. 

96 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs) 

97 

98 def refresh(self, *, universe: DimensionUniverse) -> None: 

99 # Docstring inherited from DatasetRecordStorageManager. 

100 byName = {} 

101 byId = {} 

102 c = self._static.dataset_type.columns 

103 for row in self._db.query(self._static.dataset_type.select()).fetchall(): 

104 name = row[c.name] 

105 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe) 

106 datasetType = DatasetType(name, dimensions, row[c.storage_class]) 

107 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType), 

108 makeDynamicTableSpec(datasetType, type(self._collections))) 

109 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

110 static=self._static, dynamic=dynamic, 

111 dataset_type_id=row["id"], 

112 collections=self._collections) 

113 byName[datasetType.name] = storage 

114 byId[storage._dataset_type_id] = storage 

115 self._byName = byName 

116 self._byId = byId 

117 

118 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

119 # Docstring inherited from DatasetRecordStorageManager. 

120 compositeName, componentName = DatasetType.splitDatasetTypeName(name) 

121 storage = self._byName.get(compositeName) 

122 if storage is not None and componentName is not None: 

123 componentStorage = copy.copy(storage) 

124 componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName) 

125 return componentStorage 

126 else: 

127 return storage 

128 

129 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

130 # Docstring inherited from DatasetRecordStorageManager. 

131 if datasetType.isComponent(): 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true

132 raise ValueError("Component dataset types can not be stored in registry." 

133 f" Rejecting {datasetType.name}") 

134 storage = self._byName.get(datasetType.name) 

135 if storage is None: 

136 row, inserted = self._db.sync( 

137 self._static.dataset_type, 

138 keys={"name": datasetType.name}, 

139 compared={ 

140 "dimensions_encoded": datasetType.dimensions.encode(), 

141 "storage_class": datasetType.storageClass.name, 

142 }, 

143 returning=["id"], 

144 ) 

145 assert row is not None 

146 dynamic = self._db.ensureTableExists( 

147 makeDynamicTableName(datasetType), 

148 makeDynamicTableSpec(datasetType, type(self._collections)), 

149 ) 

150 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

151 static=self._static, dynamic=dynamic, 

152 dataset_type_id=row["id"], 

153 collections=self._collections) 

154 self._byName[datasetType.name] = storage 

155 self._byId[storage._dataset_type_id] = storage 

156 else: 

157 if datasetType != storage.datasetType: 

158 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " 

159 f"with database definition {storage.datasetType}.") 

160 inserted = False 

161 return storage, inserted 

162 

163 def __iter__(self) -> Iterator[DatasetType]: 

164 for storage in self._byName.values(): 

165 yield storage.datasetType 

166 

167 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

168 # Docstring inherited from DatasetRecordStorageManager. 

169 sql = sqlalchemy.sql.select( 

170 [ 

171 self._static.dataset.columns.dataset_type_id, 

172 self._static.dataset.columns[self._collections.getRunForeignKeyName()], 

173 ] 

174 ).select_from( 

175 self._static.dataset 

176 ).where( 

177 self._static.dataset.columns.id == id 

178 ) 

179 row = self._db.query(sql).fetchone() 

180 if row is None: 

181 return None 

182 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

183 if recordsForType is None: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true

184 self.refresh(universe=universe) 

185 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

186 assert recordsForType is not None, "Should be guaranteed by foreign key constraints." 

187 return DatasetRef( 

188 recordsForType.datasetType, 

189 dataId=recordsForType.getDataId(id=id), 

190 id=id, 

191 run=self._collections[row[self._collections.getRunForeignKeyName()]].name 

192 ) 

193 

194 @classmethod 

195 def currentVersion(cls) -> Optional[VersionTuple]: 

196 # Docstring inherited from VersionedExtension. 

197 return _VERSION 

198 

199 def schemaDigest(self) -> Optional[str]: 

200 # Docstring inherited from VersionedExtension. 

201 return self._defaultSchemaDigest(self._static, self._db.dialect)