Coverage for python/lsst/daf/butler/registry/dimensions/query.py: 81%

67 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 02:46 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["QueryDimensionRecordStorage"] 

24 

25from collections.abc import Iterable, Mapping 

26from typing import Any 

27 

28import sqlalchemy 

29 

30from ...core import ( 

31 DatabaseDimension, 

32 DatabaseDimensionElement, 

33 DataCoordinateIterable, 

34 DimensionElement, 

35 DimensionRecord, 

36 GovernorDimension, 

37 NamedKeyDict, 

38 NamedKeyMapping, 

39 TimespanDatabaseRepresentation, 

40) 

41from ..interfaces import ( 

42 Database, 

43 DatabaseDimensionRecordStorage, 

44 GovernorDimensionRecordStorage, 

45 StaticTablesContext, 

46) 

47from ..queries import QueryBuilder 

48 

49 

50class QueryDimensionRecordStorage(DatabaseDimensionRecordStorage): 

51 """A read-only record storage implementation backed by SELECT query. 

52 

53 At present, the only query this class supports is a SELECT DISTINCT over 

54 the table for some other dimension that has this dimension as an implied 

55 dependency. For example, we can use this class to provide access to the 

56 set of ``band`` names referenced by any ``physical_filter``. 

57 

58 Parameters 

59 ---------- 

60 db : `Database` 

61 Interface to the database engine and namespace that will hold these 

62 dimension records. 

63 element : `DatabaseDimensionElement` 

64 The element whose records this storage will manage. 

65 """ 

66 

67 def __init__(self, db: Database, element: DatabaseDimensionElement, viewOf: str): 

68 assert isinstance( 

69 element, DatabaseDimension 

70 ), "An element cannot be a dependency unless it is a dimension." 

71 self._db = db 

72 self._element = element 

73 self._target = element.universe[viewOf] 

74 self._targetSpec = self._target.RecordClass.fields.makeTableSpec( 

75 TimespanReprClass=self._db.getTimespanRepresentation() 

76 ) 

77 self._viewOf = viewOf 

78 self._query = None # Constructed on first use. 

79 if element not in self._target.graph.dimensions: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 raise NotImplementedError("Query-backed dimension must be a dependency of its target.") 

81 if element.metadata: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true

82 raise NotImplementedError("Cannot use query to back dimension with metadata.") 

83 if element.implied: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 raise NotImplementedError("Cannot use query to back dimension with implied dependencies.") 

85 if element.alternateKeys: 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise NotImplementedError("Cannot use query to back dimension with alternate unique keys.") 

87 if element.spatial is not None: 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 raise NotImplementedError("Cannot use query to back spatial dimension.") 

89 if element.temporal is not None: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 raise NotImplementedError("Cannot use query to back temporal dimension.") 

91 

92 @classmethod 

93 def initialize( 

94 cls, 

95 db: Database, 

96 element: DatabaseDimensionElement, 

97 *, 

98 context: StaticTablesContext | None = None, 

99 config: Mapping[str, Any], 

100 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

101 ) -> DatabaseDimensionRecordStorage: 

102 # Docstring inherited from DatabaseDimensionRecordStorage. 

103 viewOf = config["view_of"] 

104 return cls(db, element, viewOf) 

105 

106 @property 

107 def element(self) -> DatabaseDimension: 

108 # Docstring inherited from DimensionRecordStorage.element. 

109 return self._element 

110 

111 def clearCaches(self) -> None: 

112 # Docstring inherited from DimensionRecordStorage.clearCaches. 

113 pass 

114 

115 def _ensureQuery(self) -> None: 

116 if self._query is None: 

117 targetTable = self._db.getExistingTable(self._target.name, self._targetSpec) 

118 assert targetTable is not None 

119 columns = [] 

120 # The only columns for this dimension are ones for its required 

121 # dependencies and its own primary key (guaranteed by the checks in 

122 # the ctor). 

123 for dimension in self.element.required: 

124 if dimension == self.element: 124 ↛ 127line 124 didn't jump to line 127, because the condition on line 124 was never false

125 columns.append(targetTable.columns[dimension.name].label(dimension.primaryKey.name)) 

126 else: 

127 columns.append(targetTable.columns[dimension.name].label(dimension.name)) 

128 # This query doesn't do a SELECT DISTINCT, because that's confusing 

129 # and potentially wasteful if we apply a restrictive WHERE clause, 

130 # as SelectableDimensionRecordStorage.fetch will do. 

131 # Instead, we add DISTINCT in join() only. 

132 self._query = ( 

133 sqlalchemy.sql.select(*columns).distinct().select_from(targetTable).alias(self.element.name) 

134 ) 

135 

136 def join( 

137 self, 

138 builder: QueryBuilder, 

139 *, 

140 regions: NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement] | None = None, 

141 timespans: NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation] | None = None, 

142 ) -> None: 

143 # Docstring inherited from DimensionRecordStorage. 

144 assert regions is None, "Should be guaranteed by constructor checks." 

145 assert timespans is None, "Should be guaranteed by constructor checks." 

146 if self._target in builder.summary.mustHaveKeysJoined: 146 ↛ 151line 146 didn't jump to line 151, because the condition on line 146 was never true

147 # Do nothing; the target dimension is already being included, so 

148 # joining against a subquery referencing it would just produce a 

149 # more complicated query that's guaranteed to return the same 

150 # results. 

151 return 

152 self._ensureQuery() 

153 joinOn = builder.startJoin( 

154 self._query, self.element.required, self.element.RecordClass.fields.required.names 

155 ) 

156 builder.finishJoin(self._query, joinOn) 

157 return self._query 

158 

159 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

160 # Docstring inherited from DimensionRecordStorage.insert. 

161 raise TypeError( 

162 f"Cannot insert {self.element.name} records, define as part of {self._viewOf} instead." 

163 ) 

164 

165 def sync(self, record: DimensionRecord, update: bool = False) -> bool: 

166 # Docstring inherited from DimensionRecordStorage.sync. 

167 raise TypeError(f"Cannot sync {self.element.name} records, define as part of {self._viewOf} instead.") 

168 

169 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

170 # Docstring inherited from DimensionRecordStorage.fetch. 

171 RecordClass = self.element.RecordClass 

172 for dataId in dataIds: 

173 # Given the restrictions imposed at construction, we know there's 

174 # nothing to actually fetch: everything we need is in the data ID. 

175 yield RecordClass(**dataId.byName()) 

176 

177 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

178 # Docstring inherited from DimensionRecordStorage.digestTables. 

179 return []