Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import annotations 

2 

3__all__ = ("ByDimensionsDatasetRecordStorage",) 

4 

5from typing import ( 

6 Any, 

7 Dict, 

8 Iterable, 

9 Iterator, 

10 Optional, 

11 TYPE_CHECKING, 

12) 

13 

14import sqlalchemy 

15 

16from lsst.daf.butler import ( 

17 CollectionType, 

18 DataCoordinate, 

19 DatasetRef, 

20 DatasetType, 

21 ExpandedDataCoordinate, 

22 Quantum, 

23) 

24from lsst.daf.butler.registry.interfaces import DatasetRecordStorage 

25from lsst.daf.butler.registry.simpleQuery import SimpleQuery, Select 

26 

27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true

28 from ...interfaces import CollectionManager, CollectionRecord, Database, RunRecord 

29 from .tables import StaticDatasetTablesTuple 

30 

31 

32class ByDimensionsDatasetRecordStorage(DatasetRecordStorage): 

33 """Dataset record storage implementation paired with 

34 `ByDimensionsDatasetRecordStorageManager`; see that class for more 

35 information. 

36 

37 Instances of this class should never be constructed directly; use 

38 `DatasetRecordStorageManager.register` instead. 

39 """ 

40 def __init__(self, *, datasetType: DatasetType, 

41 db: Database, 

42 dataset_type_id: int, 

43 collections: CollectionManager, 

44 static: StaticDatasetTablesTuple, 

45 dynamic: sqlalchemy.sql.Table): 

46 super().__init__(datasetType=datasetType) 

47 self._dataset_type_id = dataset_type_id 

48 self._db = db 

49 self._collections = collections 

50 self._static = static 

51 self._dynamic = dynamic 

52 self._runKeyColumn = collections.getRunForeignKeyName() 

53 

54 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *, 

55 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]: 

56 # Docstring inherited from DatasetRecordStorageManager. 

57 staticRow = { 

58 "dataset_type_id": self._dataset_type_id, 

59 self._runKeyColumn: run.key, 

60 "quantum_id": quantum.id if quantum is not None else None, 

61 } 

62 dataIds = list(dataIds) 

63 # Insert into the static dataset table, generating autoincrement 

64 # dataset_id values. 

65 with self._db.transaction(): 

66 datasetIds = self._db.insert(self._static.dataset, *([staticRow]*len(dataIds)), 

67 returnIds=True) 

68 assert datasetIds is not None 

69 # Combine the generated dataset_id values and data ID fields to 

70 # form rows to be inserted into the dynamic table. 

71 protoDynamicRow = { 

72 "dataset_type_id": self._dataset_type_id, 

73 self._collections.getCollectionForeignKeyName(): run.key, 

74 } 

75 dynamicRows = [ 

76 dict(protoDynamicRow, dataset_id=dataset_id, **dataId.byName()) 

77 for dataId, dataset_id in zip(dataIds, datasetIds) 

78 ] 

79 # Insert those rows into the dynamic table. This is where we'll 

80 # get any unique constraint violations. 

81 self._db.insert(self._dynamic, *dynamicRows) 

82 for dataId, datasetId in zip(dataIds, datasetIds): 

83 yield DatasetRef( 

84 datasetType=self.datasetType, 

85 dataId=dataId, 

86 id=datasetId, 

87 run=run.name, 

88 ) 

89 

90 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: 

91 # Docstring inherited from DatasetRecordStorageManager. 

92 assert dataId.graph == self.datasetType.dimensions 

93 sql = self.select(collection=collection, dataId=dataId, id=Select, run=Select).combine() 

94 row = self._db.query(sql).fetchone() 

95 if row is None: 

96 return None 

97 return DatasetRef( 

98 datasetType=self.datasetType, 

99 dataId=dataId, 

100 id=row["id"], 

101 run=self._collections[row[self._runKeyColumn]].name 

102 ) 

103 

104 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

105 # Docstring inherited from DatasetRecordStorageManager. 

106 # Only delete from common dataset table; ON DELETE foreign key clauses 

107 # will handle the rest. 

108 self._db.delete( 

109 self._static.dataset, 

110 ["id"], 

111 *[{"id": dataset.getCheckedId()} for dataset in datasets], 

112 ) 

113 

114 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

115 # Docstring inherited from DatasetRecordStorageManager. 

116 if collection.type is not CollectionType.TAGGED: 116 ↛ 117line 116 didn't jump to line 117, because the condition on line 116 was never true

117 raise TypeError(f"Cannot associate into collection '{collection}' " 

118 f"of type {collection.type.name}; must be TAGGED.") 

119 protoRow = { 

120 self._collections.getCollectionForeignKeyName(): collection.key, 

121 "dataset_type_id": self._dataset_type_id, 

122 } 

123 rows = [] 

124 for dataset in datasets: 

125 row = dict(protoRow, dataset_id=dataset.getCheckedId()) 

126 for dimension, value in dataset.dataId.items(): 

127 row[dimension.name] = value 

128 rows.append(row) 

129 self._db.replace(self._dynamic, *rows) 

130 

131 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

132 # Docstring inherited from DatasetRecordStorageManager. 

133 if collection.type is not CollectionType.TAGGED: 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true

134 raise TypeError(f"Cannot disassociate from collection '{collection}' " 

135 f"of type {collection.type.name}; must be TAGGED.") 

136 rows = [ 

137 { 

138 "dataset_id": dataset.getCheckedId(), 

139 self._collections.getCollectionForeignKeyName(): collection.key 

140 } 

141 for dataset in datasets 

142 ] 

143 self._db.delete(self._dynamic, ["dataset_id", self._collections.getCollectionForeignKeyName()], 

144 *rows) 

145 

146 def select(self, collection: CollectionRecord, 

147 dataId: Select.Or[DataCoordinate] = Select, 

148 id: Select.Or[Optional[int]] = Select, 

149 run: Select.Or[None] = Select, 

150 ) -> SimpleQuery: 

151 # Docstring inherited from DatasetRecordStorageManager. 

152 assert collection.type is not CollectionType.CHAINED 

153 query = SimpleQuery() 

154 # We always include the _static.dataset table, and we can always get 

155 # the id and run fields from that; passing them as kwargs here tells 

156 # SimpleQuery to handle them whether they're constraints or results. 

157 # We always constraint the dataset_type_id here as well. 

158 query.join( 

159 self._static.dataset, 

160 id=id, 

161 dataset_type_id=self._dataset_type_id, 

162 **{self._runKeyColumn: run} 

163 ) 

164 # If and only if the collection is a RUN, we constrain it in the static 

165 # table (and also the dynamic table below) 

166 if collection.type is CollectionType.RUN: 

167 query.where.append(self._static.dataset.columns[self._runKeyColumn] 

168 == collection.key) 

169 # We get or constrain the data ID from the dynamic table, but that's 

170 # multiple columns, not one, so we need to transform the one Select.Or 

171 # argument into a dictionary of them. 

172 kwargs: Dict[str, Any] 

173 if dataId is Select: 

174 kwargs = {dim.name: Select for dim in self.datasetType.dimensions.required} 

175 else: 

176 kwargs = dict(dataId.byName()) 

177 # We always constrain (never retrieve) the collection from the dynamic 

178 # table. 

179 kwargs[self._collections.getCollectionForeignKeyName()] = collection.key 

180 # And now we finally join in the dynamic table. 

181 query.join( 

182 self._dynamic, 

183 onclause=(self._static.dataset.columns.id == self._dynamic.columns.dataset_id), 

184 **kwargs 

185 ) 

186 return query 

187 

188 def getDataId(self, id: int) -> DataCoordinate: 

189 # Docstring inherited from DatasetRecordStorageManager. 

190 # This query could return multiple rows (one for each tagged collection 

191 # the dataset is in, plus one for its run collection), and we don't 

192 # care which of those we get. 

193 sql = self._dynamic.select().where( 

194 sqlalchemy.sql.and_( 

195 self._dynamic.columns.dataset_id == id, 

196 self._dynamic.columns.dataset_type_id == self._dataset_type_id 

197 ) 

198 ).limit(1) 

199 row = self._db.query(sql).fetchone() 

200 assert row is not None, "Should be guaranteed by caller and foreign key constraints." 

201 return DataCoordinate.standardize( 

202 {dimension.name: row[dimension.name] for dimension in self.datasetType.dimensions.required}, 

203 graph=self.datasetType.dimensions 

204 )