Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25import itertools 

26from typing import Optional, Dict, Any, Tuple, Callable 

27 

28from sqlalchemy.sql import FromClause 

29from sqlalchemy.engine import RowProxy, ResultProxy, Connection 

30 

31from lsst.sphgeom import Region 

32 

33from ...core import ( 

34 DataCoordinate, 

35 DatasetRef, 

36 DatasetType, 

37 DimensionGraph, 

38 ExpandedDataCoordinate, 

39) 

40from ._structs import QuerySummary, QueryColumns, QueryParameters 

41 

42 

43class Query: 

44 """A wrapper for a SQLAlchemy query that knows how to re-bind parameters 

45 and transform result rows into data IDs and dataset references. 

46 

47 A `Query` should almost always be constructed directly by a call to 

48 `QueryBuilder.finish`; direct construction will make it difficult to be 

49 able to maintain invariants between arguments (see the documentation for 

50 `QueryColumns` and `QueryParameters` for more information). 

51 

52 Parameters 

53 ---------- 

54 connection: `sqlalchemy.engine.Connection` 

55 Connection used to execute the query. 

56 sql : `sqlalchemy.sql.FromClause` 

57 A complete SELECT query, including at least SELECT, FROM, and WHERE 

58 clauses. 

59 summary : `QuerySummary` 

60 Struct that organizes the dimensions involved in the query. 

61 columns : `QueryColumns` 

62 Columns that are referenced in the query in any clause. 

63 parameters : `QueryParameters` 

64 Bind parameters for the query. 

65 

66 Notes 

67 ----- 

68 SQLAlchemy is used in the public interface of `Query` rather than just its 

69 implementation simply because avoiding this would entail writing wrappers 

70 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy` 

71 classes that are themselves generic wrappers for lower-level Python DBAPI 

72 classes. Another layer would entail another set of computational 

73 overheads, but the only reason we would seriously consider not using 

74 SQLAlchemy here in the future would be to reduce computational overheads. 

75 """ 

76 

77 def __init__(self, *, connection: Connection, sql: FromClause, 

78 summary: QuerySummary, columns: QueryColumns, parameters: QueryParameters): 

79 self.summary = summary 

80 self.sql = sql 

81 self._columns = columns 

82 self._parameters = parameters 

83 self._connection = connection 

84 

85 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]: 

86 """Return a callable that can perform extra Python-side filtering of 

87 query results. 

88 

89 To get the expected results from a query, the returned predicate *must* 

90 be used to ignore rows for which it returns `False`; this permits the 

91 `QueryBuilder` implementation to move logic from the database to Python 

92 without changing the public interface. 

93 

94 Parameters 

95 ---------- 

96 region : `sphgeom.Region`, optional 

97 A region that any result-row regions must overlap in order for the 

98 predicate to return `True`. If not provided, this will be the 

99 region in `QuerySummary.dataId`, if there is one. 

100 

101 Returns 

102 ------- 

103 func : `Callable` 

104 A callable that takes a single `sqlalchemy.engine.RowProxy` 

105 argmument and returns `bool`. 

106 """ 

107 whereRegion = region if region is not None else self.summary.dataId.region 

108 

109 def closure(row: RowProxy) -> bool: 

110 rowRegions = [row[column] for column in self._columns.regions.values()] 

111 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

112 return False 

113 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

114 

115 return closure 

116 

117 def bind(self, dataId: ExpandedDataCoordinate) -> Dict[str, Any]: 

118 """Return a dictionary that can be passed to a SQLAlchemy execute 

119 method to provide WHERE clause information at execution time rather 

120 than construction time. 

121 

122 Most callers should call `Query.execute` directly instead; when called 

123 with a data ID, that calls `bind` internally. 

124 

125 Parameters 

126 ---------- 

127 dataId : `ExpandedDataCoordinate` 

128 Data ID to transform into bind parameters. This must identify 

129 all dimensions in `QuerySummary.given`, and must have the same 

130 primary key values for all dimensions also identified by 

131 `QuerySummary.dataId`. 

132 

133 Returns 

134 ------- 

135 parameters : `dict` 

136 Dictionary that can be passed as the second argument (with 

137 ``self.sql`` this first argument) to SQLAlchemy execute methods. 

138 

139 Notes 

140 ----- 

141 Calling `bind` does not automatically update the callable returned by 

142 `predicate` with the given data ID's region (if it has one). That 

143 must be done manually by passing the region when calling `predicate`. 

144 """ 

145 assert dataId.graph == self.summary.given 

146 result = {} 

147 for dimension, parameter in self._parameters.keys.items(): 

148 result[parameter] = dataId.full[dimension] 

149 if self._parameters.timespan: 

150 result[self._parameters.timespan.begin] = dataId.timespan.begin 

151 result[self._parameters.timespan.end] = dataId.timespan.end 

152 for dimension, parameter in self._parameters.skypix.items(): 

153 result[parameter] = dimension.pixelization.envelope(dataId.region) 

154 return result 

155 

156 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate: 

157 """Extract a data ID from a result row. 

158 

159 Parameters 

160 ---------- 

161 row : `sqlalchemy.engine.RowProxy` 

162 A result row from a SQLAlchemy SELECT query. 

163 graph : `DimensionGraph`, optional 

164 The dimensions the returned data ID should identify. If not 

165 provided, this will be all dimensions in `QuerySummary.requested`. 

166 

167 Returns 

168 ------- 

169 dataId : `DataCoordinate` 

170 A minimal data ID that identifies the requested dimensions but 

171 includes no metadata or implied dimensions. 

172 """ 

173 if graph is None: 

174 graph = self.summary.requested 

175 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required) 

176 return DataCoordinate(graph, values) 

177 

178 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType, 

179 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]: 

180 """Extract a `DatasetRef` from a result row. 

181 

182 Parameters 

183 ---------- 

184 row : `sqlalchemy.engine.RowProxy` 

185 A result row from a SQLAlchemy SELECT query. 

186 datasetType : `DatasetType` 

187 Type of the dataset to extract. Must have been included in the 

188 `Query` via a call to `QueryBuilder.joinDataset` with 

189 ``isResult=True``, or otherwise included in 

190 `QueryColumns.datasets`. 

191 dataId : `DataCoordinate` 

192 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

193 `DataCoordinate` is constructed from ``row`` if `None`. 

194 

195 Returns 

196 ------- 

197 ref : `DatasetRef` 

198 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

199 `None`. 

200 rank : `int` or `None` 

201 Integer index of the collection in which this dataset was found, 

202 within the sequence of collections passed when constructing the 

203 query. `None` if `QueryBuilder.joinDataset` was called with 

204 ``addRank=False``. 

205 """ 

206 if dataId is None: 

207 dataId = self.extractDataId(row, graph=datasetType.dimensions) 

208 datasetIdColumn, datasetRankColumn = self._columns.datasets[datasetType] 

209 return (DatasetRef(datasetType, dataId, id=row[datasetIdColumn]), 

210 row[datasetRankColumn] if datasetRankColumn is not None else None) 

211 

212 def execute(self, dataId: Optional[ExpandedDataCoordinate] = None) -> ResultProxy: 

213 """Execute the query. 

214 

215 This may be called multiple times with different arguments to apply 

216 different bind parameter values without repeating the work of 

217 constructing the query. 

218 

219 Parameters 

220 ---------- 

221 dataId : `ExpandedDataCoordinate`, optional 

222 Data ID to transform into bind parameters. This must identify 

223 all dimensions in `QuerySummary.given`, and must have the same 

224 primary key values for all dimensions also identified by 

225 `QuerySummary.dataId`. If not provided, `QuerySummary.dataId` 

226 must identify all dimensions in `QuerySummary.given`. 

227 

228 Returns 

229 ------- 

230 results : `sqlalchemy.engine.ResultProxy` 

231 Object representing the query results; see SQLAlchemy documentation 

232 for more information. 

233 """ 

234 if dataId is not None: 

235 params = self.bind(dataId) 

236 return self._connection.execute(self.sql, params) 

237 else: 

238 return self._connection.execute(self.sql)