Coverage for python/lsst/daf/butler/registry/dimensions/query.py: 80%

66 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["QueryDimensionRecordStorage"] 

24 

25from typing import Any, Iterable, Mapping, Optional 

26 

27import sqlalchemy 

28 

29from ...core import ( 

30 DatabaseDimension, 

31 DatabaseDimensionElement, 

32 DataCoordinateIterable, 

33 DimensionElement, 

34 DimensionRecord, 

35 GovernorDimension, 

36 NamedKeyDict, 

37 NamedKeyMapping, 

38 SpatialRegionDatabaseRepresentation, 

39 TimespanDatabaseRepresentation, 

40) 

41from ..interfaces import ( 

42 Database, 

43 DatabaseDimensionRecordStorage, 

44 GovernorDimensionRecordStorage, 

45 StaticTablesContext, 

46) 

47from ..queries import QueryBuilder 

48 

49 

50class QueryDimensionRecordStorage(DatabaseDimensionRecordStorage): 

51 """A read-only record storage implementation backed by SELECT query. 

52 

53 At present, the only query this class supports is a SELECT DISTNCT over the 

54 table for some other dimension that has this dimension as an implied 

55 dependency. For example, we can use this class to provide access to the 

56 set of ``band`` names referenced by any ``physical_filter``. 

57 

58 Parameters 

59 ---------- 

60 db : `Database` 

61 Interface to the database engine and namespace that will hold these 

62 dimension records. 

63 element : `DatabaseDimensionElement` 

64 The element whose records this storage will manage. 

65 """ 

66 def __init__(self, db: Database, element: DatabaseDimensionElement, viewOf: str): 

67 assert isinstance(element, DatabaseDimension), \ 

68 "An element cannot be a dependency unless it is a dimension." 

69 self._db = db 

70 self._element = element 

71 self._target = element.universe[viewOf] 

72 self._targetSpec = self._target.RecordClass.fields.makeTableSpec( 

73 RegionReprClass=self._db.getSpatialRegionRepresentation(), 

74 TimespanReprClass=self._db.getTimespanRepresentation(), 

75 ) 

76 self._viewOf = viewOf 

77 self._query = None # Constructed on first use. 

78 if element not in self._target.graph.dimensions: 78 ↛ 79line 78 didn't jump to line 79, because the condition on line 78 was never true

79 raise NotImplementedError("Query-backed dimension must be a dependency of its target.") 

80 if element.metadata: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 raise NotImplementedError("Cannot use query to back dimension with metadata.") 

82 if element.implied: 82 ↛ 83line 82 didn't jump to line 83, because the condition on line 82 was never true

83 raise NotImplementedError("Cannot use query to back dimension with implied dependencies.") 

84 if element.alternateKeys: 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true

85 raise NotImplementedError("Cannot use query to back dimension with alternate unique keys.") 

86 if element.spatial is not None: 86 ↛ 87line 86 didn't jump to line 87, because the condition on line 86 was never true

87 raise NotImplementedError("Cannot use query to back spatial dimension.") 

88 if element.temporal is not None: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true

89 raise NotImplementedError("Cannot use query to back temporal dimension.") 

90 

91 @classmethod 

92 def initialize( 

93 cls, 

94 db: Database, 

95 element: DatabaseDimensionElement, *, 

96 context: Optional[StaticTablesContext] = None, 

97 config: Mapping[str, Any], 

98 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

99 ) -> DatabaseDimensionRecordStorage: 

100 # Docstring inherited from DatabaseDimensionRecordStorage. 

101 viewOf = config["view_of"] 

102 return cls(db, element, viewOf) 

103 

104 @property 

105 def element(self) -> DatabaseDimension: 

106 # Docstring inherited from DimensionRecordStorage.element. 

107 return self._element 

108 

109 def clearCaches(self) -> None: 

110 # Docstring inherited from DimensionRecordStorage.clearCaches. 

111 pass 

112 

113 def _ensureQuery(self) -> None: 

114 if self._query is None: 114 ↛ exitline 114 didn't return from function '_ensureQuery', because the condition on line 114 was never false

115 targetTable = self._db.getExistingTable(self._target.name, self._targetSpec) 

116 assert targetTable is not None 

117 columns = [] 

118 # The only columns for this dimension are ones for its required 

119 # dependencies and its own primary key (guaranteed by the checks in 

120 # the ctor). 

121 for dimension in self.element.required: 

122 if dimension == self.element: 122 ↛ 125line 122 didn't jump to line 125, because the condition on line 122 was never false

123 columns.append(targetTable.columns[dimension.name].label(dimension.primaryKey.name)) 

124 else: 

125 columns.append(targetTable.columns[dimension.name].label(dimension.name)) 

126 # This query doesn't do a SELECT DISTINCT, because that's confusing 

127 # and potentially wasteful if we apply a restrictive WHERE clause, 

128 # as SelectableDimensionRecordStorage.fetch will do. 

129 # Instead, we add DISTINCT in join() only. 

130 self._query = sqlalchemy.sql.select( 

131 *columns 

132 ).distinct( 

133 ).select_from( 

134 targetTable 

135 ).alias( 

136 self.element.name 

137 ) 

138 

139 def join( 

140 self, 

141 builder: QueryBuilder, *, 

142 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

143 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

144 ) -> None: 

145 # Docstring inherited from DimensionRecordStorage. 

146 assert regions is None, "Should be guaranteed by constructor checks." 

147 assert timespans is None, "Should be guaranteed by constructor checks." 

148 if self._target in builder.summary.mustHaveKeysJoined: 148 ↛ 153line 148 didn't jump to line 153, because the condition on line 148 was never true

149 # Do nothing; the target dimension is already being included, so 

150 # joining against a subquery referencing it would just produce a 

151 # more complicated query that's guaranteed to return the same 

152 # results. 

153 return 

154 self._ensureQuery() 

155 joinOn = builder.startJoin(self._query, self.element.required, 

156 self.element.RecordClass.fields.required.names) 

157 builder.finishJoin(self._query, joinOn) 

158 return self._query 

159 

160 def insert(self, *records: DimensionRecord, replace: bool = False) -> None: 

161 # Docstring inherited from DimensionRecordStorage.insert. 

162 raise TypeError(f"Cannot insert {self.element.name} records, " 

163 f"define as part of {self._viewOf} instead.") 

164 

165 def sync(self, record: DimensionRecord, update: bool = False) -> bool: 

166 # Docstring inherited from DimensionRecordStorage.sync. 

167 raise TypeError(f"Cannot sync {self.element.name} records, " 

168 f"define as part of {self._viewOf} instead.") 

169 

170 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

171 # Docstring inherited from DimensionRecordStorage.fetch. 

172 RecordClass = self.element.RecordClass 

173 for dataId in dataIds: 

174 # Given the restrictions imposed at construction, we know there's 

175 # nothing to actually fetch: everything we need is in the data ID. 

176 yield RecordClass(**dataId.byName()) 

177 

178 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

179 # Docstring inherited from DimensionRecordStorage.digestTables. 

180 return []