Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25import itertools 

26from typing import Optional, Tuple, Callable 

27 

28from sqlalchemy.sql import FromClause 

29from sqlalchemy.engine import RowProxy, ResultProxy, Connection 

30 

31from lsst.sphgeom import Region 

32 

33from ...core import ( 

34 DataCoordinate, 

35 DatasetRef, 

36 DatasetType, 

37 DimensionGraph, 

38) 

39from ._structs import QuerySummary, QueryColumns 

40 

41 

42class Query: 

43 """A wrapper for a SQLAlchemy query that knows how to transform result rows 

44 into data IDs and dataset references. 

45 

46 A `Query` should almost always be constructed directly by a call to 

47 `QueryBuilder.finish`; direct construction will make it difficult to be 

48 able to maintain invariants between arguments (see the documentation for 

49 `QueryColumns` for more information). 

50 

51 Parameters 

52 ---------- 

53 connection: `sqlalchemy.engine.Connection` 

54 Connection used to execute the query. 

55 sql : `sqlalchemy.sql.FromClause` 

56 A complete SELECT query, including at least SELECT, FROM, and WHERE 

57 clauses. 

58 summary : `QuerySummary` 

59 Struct that organizes the dimensions involved in the query. 

60 columns : `QueryColumns` 

61 Columns that are referenced in the query in any clause. 

62 

63 Notes 

64 ----- 

65 SQLAlchemy is used in the public interface of `Query` rather than just its 

66 implementation simply because avoiding this would entail writing wrappers 

67 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy` 

68 classes that are themselves generic wrappers for lower-level Python DBAPI 

69 classes. Another layer would entail another set of computational 

70 overheads, but the only reason we would seriously consider not using 

71 SQLAlchemy here in the future would be to reduce computational overheads. 

72 """ 

73 

74 def __init__(self, *, connection: Connection, sql: FromClause, 

75 summary: QuerySummary, columns: QueryColumns): 

76 self.summary = summary 

77 self.sql = sql 

78 self._columns = columns 

79 self._connection = connection 

80 

81 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]: 

82 """Return a callable that can perform extra Python-side filtering of 

83 query results. 

84 

85 To get the expected results from a query, the returned predicate *must* 

86 be used to ignore rows for which it returns `False`; this permits the 

87 `QueryBuilder` implementation to move logic from the database to Python 

88 without changing the public interface. 

89 

90 Parameters 

91 ---------- 

92 region : `sphgeom.Region`, optional 

93 A region that any result-row regions must overlap in order for the 

94 predicate to return `True`. If not provided, this will be the 

95 region in `QuerySummary.dataId`, if there is one. 

96 

97 Returns 

98 ------- 

99 func : `Callable` 

100 A callable that takes a single `sqlalchemy.engine.RowProxy` 

101 argmument and returns `bool`. 

102 """ 

103 whereRegion = region if region is not None else self.summary.dataId.region 

104 

105 def closure(row: RowProxy) -> bool: 

106 rowRegions = [row[column] for column in self._columns.regions.values()] 

107 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

108 return False 

109 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

110 

111 return closure 

112 

113 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate: 

114 """Extract a data ID from a result row. 

115 

116 Parameters 

117 ---------- 

118 row : `sqlalchemy.engine.RowProxy` 

119 A result row from a SQLAlchemy SELECT query. 

120 graph : `DimensionGraph`, optional 

121 The dimensions the returned data ID should identify. If not 

122 provided, this will be all dimensions in `QuerySummary.requested`. 

123 

124 Returns 

125 ------- 

126 dataId : `DataCoordinate` 

127 A minimal data ID that identifies the requested dimensions but 

128 includes no metadata or implied dimensions. 

129 """ 

130 if graph is None: 

131 graph = self.summary.requested 

132 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required) 

133 return DataCoordinate(graph, values) 

134 

135 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType, 

136 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]: 

137 """Extract a `DatasetRef` from a result row. 

138 

139 Parameters 

140 ---------- 

141 row : `sqlalchemy.engine.RowProxy` 

142 A result row from a SQLAlchemy SELECT query. 

143 datasetType : `DatasetType` 

144 Type of the dataset to extract. Must have been included in the 

145 `Query` via a call to `QueryBuilder.joinDataset` with 

146 ``isResult=True``, or otherwise included in 

147 `QueryColumns.datasets`. 

148 dataId : `DataCoordinate` 

149 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

150 `DataCoordinate` is constructed from ``row`` if `None`. 

151 

152 Returns 

153 ------- 

154 ref : `DatasetRef` 

155 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

156 `None`. 

157 rank : `int` or `None` 

158 Integer index of the collection in which this dataset was found, 

159 within the sequence of collections passed when constructing the 

160 query. `None` if `QueryBuilder.joinDataset` was called with 

161 ``addRank=False``. 

162 """ 

163 if dataId is None: 

164 dataId = self.extractDataId(row, graph=datasetType.dimensions) 

165 datasetIdColumn, datasetRankColumn = self._columns.datasets[datasetType] 

166 return (DatasetRef(datasetType, dataId, id=row[datasetIdColumn]), 

167 row[datasetRankColumn] if datasetRankColumn is not None else None) 

168 

169 def execute(self) -> ResultProxy: 

170 """Execute the query. 

171 

172 Returns 

173 ------- 

174 results : `sqlalchemy.engine.ResultProxy` 

175 Object representing the query results; see SQLAlchemy documentation 

176 for more information. 

177 """ 

178 return self._connection.execute(self.sql)