Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import annotations 

2 

3__all__ = ("ByDimensionsDatasetRecordStorageManager",) 

4 

5from typing import ( 

6 Any, 

7 Dict, 

8 Iterator, 

9 Optional, 

10 Tuple, 

11 TYPE_CHECKING, 

12) 

13 

14import sqlalchemy 

15 

16from lsst.daf.butler import ( 

17 DatasetRef, 

18 DatasetType, 

19 ddl, 

20 DimensionGraph, 

21 DimensionUniverse, 

22) 

23from lsst.daf.butler.registry import ConflictingDefinitionError 

24from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager 

25 

26from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec 

27from ._storage import ByDimensionsDatasetRecordStorage 

28 

29if TYPE_CHECKING: 29 ↛ 30line 29 didn't jump to line 30, because the condition on line 29 was never true

30 from lsst.daf.butler.registry.interfaces import ( 

31 CollectionManager, 

32 Database, 

33 StaticTablesContext, 

34 ) 

35 from .tables import StaticDatasetTablesTuple 

36 

37 

38class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager): 

39 """A manager class for datasets that uses one dataset-collection table for 

40 each group of dataset types that share the same dimensions. 

41 

42 In addition to the table organization, this class makes a number of 

43 other design choices that would have been cumbersome (to say the least) to 

44 try to pack into its name: 

45 

46 - It uses a private surrogate integer autoincrement field to identify 

47 dataset types, instead of using the name as the primary and foreign key 

48 directly. 

49 

50 - It aggressively loads all DatasetTypes into memory instead of fetching 

51 them from the database only when needed or attempting more clever forms 

52 of caching. 

53 

54 Alternative implementations that make different choices for these while 

55 keeping the same general table organization might be reasonable as well. 

56 

57 Parameters 

58 ---------- 

59 db : `Database` 

60 Interface to the underlying database engine and namespace. 

61 collections : `CollectionManager` 

62 Manager object for the collections in this `Registry`. 

63 static : `StaticDatasetTablesTuple` 

64 Named tuple of `sqlalchemy.schema.Table` instances for all static 

65 tables used by this class. 

66 """ 

67 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple): 

68 self._db = db 

69 self._collections = collections 

70 self._static = static 

71 self._byName: Dict[str, ByDimensionsDatasetRecordStorage] = {} 

72 self._byId: Dict[int, ByDimensionsDatasetRecordStorage] = {} 

73 

74 @classmethod 

75 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

76 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

77 # Docstring inherited from DatasetRecordStorageManager. 

78 specs = makeStaticTableSpecs(type(collections), universe=universe) 

79 static: StaticDatasetTablesTuple = context.addTableTuple(specs) # type: ignore 

80 return cls(db=db, collections=collections, static=static) 

81 

82 @classmethod 

83 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset", 

84 constraint: bool = True, onDelete: Optional[str] = None, 

85 **kwargs: Any) -> ddl.FieldSpec: 

86 # Docstring inherited from DatasetRecordStorageManager. 

87 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs) 

88 

89 def refresh(self, *, universe: DimensionUniverse) -> None: 

90 # Docstring inherited from DatasetRecordStorageManager. 

91 byName = {} 

92 byId = {} 

93 c = self._static.dataset_type.columns 

94 for row in self._db.query(self._static.dataset_type.select()).fetchall(): 

95 name = row[c.name] 

96 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe) 

97 datasetType = DatasetType(name, dimensions, row[c.storage_class]) 

98 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType), 

99 makeDynamicTableSpec(datasetType, type(self._collections))) 

100 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

101 static=self._static, dynamic=dynamic, 

102 dataset_type_id=row["id"], 

103 collections=self._collections) 

104 byName[datasetType.name] = storage 

105 byId[storage._dataset_type_id] = storage 

106 self._byName = byName 

107 self._byId = byId 

108 

109 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

110 # Docstring inherited from DatasetRecordStorageManager. 

111 return self._byName.get(name) 

112 

113 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

114 # Docstring inherited from DatasetRecordStorageManager. 

115 storage = self._byName.get(datasetType.name) 

116 if storage is None: 

117 row, inserted = self._db.sync( 

118 self._static.dataset_type, 

119 keys={"name": datasetType.name}, 

120 compared={ 

121 "dimensions_encoded": datasetType.dimensions.encode(), 

122 "storage_class": datasetType.storageClass.name, 

123 }, 

124 returning=["id"], 

125 ) 

126 assert row is not None 

127 dynamic = self._db.ensureTableExists( 

128 makeDynamicTableName(datasetType), 

129 makeDynamicTableSpec(datasetType, type(self._collections)), 

130 ) 

131 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

132 static=self._static, dynamic=dynamic, 

133 dataset_type_id=row["id"], 

134 collections=self._collections) 

135 self._byName[datasetType.name] = storage 

136 self._byId[storage._dataset_type_id] = storage 

137 else: 

138 if datasetType != storage.datasetType: 

139 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " 

140 f"with database definition {storage.datasetType}.") 

141 inserted = False 

142 if inserted and datasetType.isComposite: 

143 for component in datasetType.storageClass.components: 

144 self.register(datasetType.makeComponentDatasetType(component)) 

145 return storage, inserted 

146 

147 def __iter__(self) -> Iterator[DatasetType]: 

148 for storage in self._byName.values(): 

149 yield storage.datasetType 

150 

151 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: 

152 # Docstring inherited from DatasetRecordStorageManager. 

153 sql = sqlalchemy.sql.select( 

154 [ 

155 self._static.dataset.columns.dataset_type_id, 

156 self._static.dataset.columns[self._collections.getRunForeignKeyName()], 

157 ] 

158 ).select_from( 

159 self._static.dataset 

160 ).where( 

161 self._static.dataset.columns.id == id 

162 ) 

163 row = self._db.query(sql).fetchone() 

164 if row is None: 

165 return None 

166 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

167 if recordsForType is None: 167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true

168 self.refresh(universe=universe) 

169 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

170 assert recordsForType is not None, "Should be guaranteed by foreign key constraints." 

171 return DatasetRef( 

172 recordsForType.datasetType, 

173 dataId=recordsForType.getDataId(id=id), 

174 id=id, 

175 run=self._collections[row[self._collections.getRunForeignKeyName()]].name 

176 )