Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import annotations 

2 

3__all__ = ("ByDimensionsDatasetRecordStorageManager",) 

4 

5from typing import ( 

6 Iterator, 

7 Optional, 

8 Tuple, 

9 TYPE_CHECKING, 

10) 

11 

12import sqlalchemy 

13 

14from lsst.daf.butler import ( 

15 DatasetRef, 

16 DatasetType, 

17 ddl, 

18 DimensionGraph, 

19 DimensionUniverse, 

20) 

21from lsst.daf.butler.registry import ConflictingDefinitionError 

22from lsst.daf.butler.registry.interfaces import DatasetRecordStorage, DatasetRecordStorageManager 

23 

24from .tables import makeStaticTableSpecs, addDatasetForeignKey, makeDynamicTableName, makeDynamicTableSpec 

25from ._storage import ByDimensionsDatasetRecordStorage 

26 

27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true

28 from lsst.daf.butler.registry.interfaces import ( 

29 CollectionManager, 

30 Database, 

31 StaticTablesContext, 

32 ) 

33 from .tables import StaticDatasetTablesTuple 

34 

35 

36class ByDimensionsDatasetRecordStorageManager(DatasetRecordStorageManager): 

37 """A manager class for datasets that uses one dataset-collection table for 

38 each group of dataset types that share the same dimensions. 

39 

40 In addition to the table organization, this class makes a number of 

41 other design choices that would have been cumbersome (to say the least) to 

42 try to pack into its name: 

43 

44 - It uses a private surrogate integer autoincrement field to identify 

45 dataset types, instead of using the name as the primary and foreign key 

46 directly. 

47 

48 - It aggressively loads all DatasetTypes into memory instead of fetching 

49 them from the database only when needed or attempting more clever forms 

50 of caching. 

51 

52 Alternative implementations that make different choices for these while 

53 keeping the same general table organization might be reasonable as well. 

54 

55 Parameters 

56 ---------- 

57 db : `Database` 

58 Interface to the underlying database engine and namespace. 

59 collections : `CollectionManager` 

60 Manager object for the collections in this `Registry`. 

61 static : `StaticDatasetTablesTuple` 

62 Named tuple of `sqlalchemy.schema.Table` instances for all static 

63 tables used by this class. 

64 """ 

65 def __init__(self, *, db: Database, collections: CollectionManager, static: StaticDatasetTablesTuple): 

66 self._db = db 

67 self._collections = collections 

68 self._static = static 

69 self._byName = {} 

70 self._byId = {} 

71 

72 @classmethod 

73 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager, 

74 universe: DimensionUniverse) -> DatasetRecordStorageManager: 

75 # Docstring inherited from DatasetRecordStorageManager. 

76 specs = makeStaticTableSpecs(type(collections), universe=universe) 

77 static = context.addTableTuple(specs) 

78 return cls(db=db, collections=collections, static=static) 

79 

80 @classmethod 

81 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *, name: str = "dataset", 

82 constraint: bool = True, onDelete: Optional[str] = None, 

83 **kwargs) -> ddl.FieldSpec: 

84 # Docstring inherited from DatasetRecordStorageManager. 

85 return addDatasetForeignKey(tableSpec, name=name, onDelete=onDelete, constraint=constraint, **kwargs) 

86 

87 def refresh(self, *, universe: DimensionUniverse): 

88 # Docstring inherited from DatasetRecordStorageManager. 

89 byName = {} 

90 byId = {} 

91 c = self._static.dataset_type.columns 

92 for row in self._db.query(self._static.dataset_type.select()).fetchall(): 

93 name = row[c.name] 

94 dimensions = DimensionGraph.decode(row[c.dimensions_encoded], universe=universe) 

95 datasetType = DatasetType(name, dimensions, row[c.storage_class]) 

96 dynamic = self._db.getExistingTable(makeDynamicTableName(datasetType), 

97 makeDynamicTableSpec(datasetType, type(self._collections))) 

98 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

99 static=self._static, dynamic=dynamic, 

100 dataset_type_id=row["id"], 

101 collections=self._collections) 

102 byName[datasetType.name] = storage 

103 byId[storage._dataset_type_id] = storage 

104 self._byName = byName 

105 self._byId = byId 

106 

107 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

108 # Docstring inherited from DatasetRecordStorageManager. 

109 return self._byName.get(name) 

110 

111 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

112 # Docstring inherited from DatasetRecordStorageManager. 

113 storage = self._byName.get(datasetType.name) 

114 if storage is None: 

115 row, inserted = self._db.sync( 

116 self._static.dataset_type, 

117 keys={"name": datasetType.name}, 

118 compared={ 

119 "dimensions_encoded": datasetType.dimensions.encode(), 

120 "storage_class": datasetType.storageClass.name, 

121 }, 

122 returning=["id"], 

123 ) 

124 dynamic = self._db.ensureTableExists( 

125 makeDynamicTableName(datasetType), 

126 makeDynamicTableSpec(datasetType, type(self._collections)), 

127 ) 

128 storage = ByDimensionsDatasetRecordStorage(db=self._db, datasetType=datasetType, 

129 static=self._static, dynamic=dynamic, 

130 dataset_type_id=row["id"], 

131 collections=self._collections) 

132 self._byName[datasetType.name] = storage 

133 self._byId[storage._dataset_type_id] = storage 

134 else: 

135 if datasetType != storage.datasetType: 

136 raise ConflictingDefinitionError(f"Given dataset type {datasetType} is inconsistent " 

137 f"with database definition {storage.datasetType}.") 

138 inserted = False 

139 if inserted and datasetType.isComposite: 

140 for component in datasetType.storageClass.components: 

141 self.register(datasetType.makeComponentDatasetType(component)) 

142 return storage, inserted 

143 

144 def __iter__(self) -> Iterator[DatasetType]: 

145 for storage in self._byName.values(): 

146 yield storage.datasetType 

147 

148 def getDatasetRef(self, id: int) -> Optional[DatasetRef]: 

149 # Docstring inherited from DatasetRecordStorageManager. 

150 sql = sqlalchemy.sql.select( 

151 [ 

152 self._static.dataset.columns.dataset_type_id, 

153 self._static.dataset.columns[self._collections.getRunForeignKeyName()], 

154 ] 

155 ).select_from( 

156 self._static.dataset 

157 ).where( 

158 self._static.dataset.columns.id == id 

159 ) 

160 row = self._db.query(sql).fetchone() 

161 if row is None: 

162 return None 

163 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

164 if recordsForType is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true

165 self.refresh() 

166 recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) 

167 assert recordsForType is not None, "Should be guaranteed by foreign key constraints." 

168 return DatasetRef( 

169 recordsForType.datasetType, 

170 dataId=recordsForType.getDataId(id=id), 

171 id=id, 

172 run=self._collections[row[self._collections.getRunForeignKeyName()]].name 

173 )