Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from typing import Dict, Iterable, Optional 

26 

27import sqlalchemy 

28 

29from ...core import ( 

30 DatabaseTimespanRepresentation, 

31 DataCoordinateIterable, 

32 DimensionElement, 

33 DimensionRecord, 

34 NamedKeyDict, 

35 SimpleQuery, 

36) 

37from ..interfaces import Database, DimensionRecordStorage, StaticTablesContext 

38from ..queries import QueryBuilder 

39 

40 

41MAX_FETCH_CHUNK = 1000 

42"""Maximum number of data IDs we fetch records at a time. 

43 

44Barring something database-engine-specific, this sets the size of the actual 

45SQL query, not just the number of result rows, because the only way to query 

46for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

47term in the WHERE clause for each one. 

48""" 

49 

50 

51class TableDimensionRecordStorage(DimensionRecordStorage): 

52 """A record storage implementation uses a regular database table. 

53 

54 For spatial dimension elements, use `SpatialDimensionRecordStorage` 

55 instead. 

56 

57 Parameters 

58 ---------- 

59 db : `Database` 

60 Interface to the database engine and namespace that will hold these 

61 dimension records. 

62 element : `DimensionElement` 

63 The element whose records this storage will manage. 

64 table : `sqlalchemy.schema.Table` 

65 The logical table for the element. 

66 """ 

67 def __init__(self, db: Database, element: DimensionElement, *, table: sqlalchemy.schema.Table): 

68 self._db = db 

69 self._table = table 

70 self._element = element 

71 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

72 dimension.name: self._table.columns[name] 

73 for dimension, name in zip(self._element.dimensions, 

74 self._element.RecordClass.fields.dimensions.names) 

75 } 

76 

77 @classmethod 

78 def initialize(cls, db: Database, element: DimensionElement, *, 

79 context: Optional[StaticTablesContext] = None) -> DimensionRecordStorage: 

80 # Docstring inherited from DimensionRecordStorage. 

81 spec = element.RecordClass.fields.makeTableSpec(tsRepr=db.getTimespanRepresentation()) 

82 if context is not None: 82 ↛ 85line 82 didn't jump to line 85, because the condition on line 82 was never false

83 table = context.addTable(element.name, spec) 

84 else: 

85 table = db.ensureTableExists(element.name, spec) 

86 return cls(db, element, table=table) 

87 

88 @property 

89 def element(self) -> DimensionElement: 

90 # Docstring inherited from DimensionRecordStorage.element. 

91 return self._element 

92 

93 def clearCaches(self) -> None: 

94 # Docstring inherited from DimensionRecordStorage.clearCaches. 

95 pass 

96 

97 def join( 

98 self, 

99 builder: QueryBuilder, *, 

100 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None, 

101 timespans: Optional[NamedKeyDict[DimensionElement, DatabaseTimespanRepresentation]] = None, 

102 ) -> None: 

103 # Docstring inherited from DimensionRecordStorage. 

104 assert regions is None, "This implementation does not handle spatial joins." 

105 joinOn = builder.startJoin(self._table, self.element.dimensions, 

106 self.element.RecordClass.fields.dimensions.names) 

107 if timespans is not None: 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true

108 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

109 for timespanInQuery in timespans.values(): 

110 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

111 timespans[self.element] = timespanInTable 

112 builder.finishJoin(self._table, joinOn) 

113 return self._table 

114 

115 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

116 # Docstring inherited from DimensionRecordStorage.fetch. 

117 RecordClass = self.element.RecordClass 

118 query = SimpleQuery() 

119 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

120 if self.element.spatial is not None: 

121 query.columns.append(self._table.columns["region"]) 

122 if self.element.temporal is not None: 

123 tsRepr = self._db.getTimespanRepresentation() 

124 query.columns.extend(self._table.columns[name] for name in tsRepr.getFieldNames()) 

125 query.join(self._table) 

126 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

127 for row in self._db.query(query.combine()): 

128 values = dict(row) 

129 if self.element.temporal is not None: 

130 values[DatabaseTimespanRepresentation.NAME] = tsRepr.extract(values) 

131 yield RecordClass(**values) 

132 

133 def insert(self, *records: DimensionRecord) -> None: 

134 # Docstring inherited from DimensionRecordStorage.insert. 

135 elementRows = [record.toDict() for record in records] 

136 if self.element.temporal is not None: 

137 tsRepr = self._db.getTimespanRepresentation() 

138 for row in elementRows: 

139 timespan = row.pop(DatabaseTimespanRepresentation.NAME) 

140 tsRepr.update(timespan, result=row) 

141 with self._db.transaction(): 

142 self._db.insert(self._table, *elementRows) 

143 

144 def sync(self, record: DimensionRecord) -> bool: 

145 # Docstring inherited from DimensionRecordStorage.sync. 

146 compared = record.toDict() 

147 keys = {} 

148 for name in record.fields.required.names: 

149 keys[name] = compared.pop(name) 

150 if self.element.temporal is not None: 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true

151 tsRepr = self._db.getTimespanRepresentation() 

152 timespan = compared.pop(DatabaseTimespanRepresentation.NAME) 

153 tsRepr.update(timespan, result=compared) 

154 _, inserted = self._db.sync( 

155 self._table, 

156 keys=keys, 

157 compared=compared, 

158 ) 

159 return inserted 

160 

161 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

162 # Docstring inherited from DimensionRecordStorage.digestTables. 

163 return [self._table]