Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25import itertools 

26from typing import Iterable, Optional, Tuple, Callable 

27 

28from sqlalchemy.sql import FromClause 

29from sqlalchemy.engine import RowProxy 

30 

31from lsst.sphgeom import Region 

32 

33from ...core import ( 

34 DataCoordinate, 

35 DatasetRef, 

36 DatasetType, 

37 Dimension, 

38 DimensionGraph, 

39) 

40from ..interfaces import CollectionManager 

41from ._structs import QuerySummary, QueryColumns 

42 

43 

44class Query: 

45 """A wrapper for a SQLAlchemy query that knows how to transform result rows 

46 into data IDs and dataset references. 

47 

48 A `Query` should almost always be constructed directly by a call to 

49 `QueryBuilder.finish`; direct construction will make it difficult to be 

50 able to maintain invariants between arguments (see the documentation for 

51 `QueryColumns` for more information). 

52 

53 Parameters 

54 ---------- 

55 sql : `sqlalchemy.sql.FromClause` 

56 A complete SELECT query, including at least SELECT, FROM, and WHERE 

57 clauses. 

58 summary : `QuerySummary` 

59 Struct that organizes the dimensions involved in the query. 

60 columns : `QueryColumns` 

61 Columns that are referenced in the query in any clause. 

62 collections : `CollectionsManager`, 

63 Manager object for collection tables. 

64 

65 Notes 

66 ----- 

67 SQLAlchemy is used in the public interface of `Query` rather than just its 

68 implementation simply because avoiding this would entail writing wrappers 

69 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy` 

70 classes that are themselves generic wrappers for lower-level Python DBAPI 

71 classes. Another layer would entail another set of computational 

72 overheads, but the only reason we would seriously consider not using 

73 SQLAlchemy here in the future would be to reduce computational overheads. 

74 """ 

75 

76 def __init__(self, *, sql: FromClause, 

77 summary: QuerySummary, 

78 columns: QueryColumns, 

79 collections: CollectionManager): 

80 self.summary = summary 

81 self.sql = sql 

82 self._columns = columns 

83 self._collections = collections 

84 

85 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]: 

86 """Return a callable that can perform extra Python-side filtering of 

87 query results. 

88 

89 To get the expected results from a query, the returned predicate *must* 

90 be used to ignore rows for which it returns `False`; this permits the 

91 `QueryBuilder` implementation to move logic from the database to Python 

92 without changing the public interface. 

93 

94 Parameters 

95 ---------- 

96 region : `sphgeom.Region`, optional 

97 A region that any result-row regions must overlap in order for the 

98 predicate to return `True`. If not provided, this will be the 

99 region in `QuerySummary.dataId`, if there is one. 

100 

101 Returns 

102 ------- 

103 func : `Callable` 

104 A callable that takes a single `sqlalchemy.engine.RowProxy` 

105 argmument and returns `bool`. 

106 """ 

107 whereRegion = region if region is not None else self.summary.dataId.region 

108 

109 def closure(row: RowProxy) -> bool: 

110 rowRegions = [row[column] for column in self._columns.regions.values()] 

111 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

112 return False 

113 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

114 

115 return closure 

116 

117 def extractDimensionsTuple(self, row: RowProxy, dimensions: Iterable[Dimension]) -> tuple: 

118 """Extract a tuple of data ID values from a result row. 

119 

120 Parameters 

121 ---------- 

122 row : `sqlalchemy.engine.RowProxy` 

123 A result row from a SQLAlchemy SELECT query. 

124 dimensions : `Iterable` [ `Dimension` ] 

125 The dimensions to include in the returned tuple, in order. 

126 

127 Returns 

128 ------- 

129 values : `tuple` 

130 A tuple of dimension primary key values. 

131 """ 

132 return tuple(row[self._columns.getKeyColumn(dimension)] for dimension in dimensions) 

133 

134 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None 

135 ) -> DataCoordinate: 

136 """Extract a data ID from a result row. 

137 

138 Parameters 

139 ---------- 

140 row : `sqlalchemy.engine.RowProxy` 

141 A result row from a SQLAlchemy SELECT query. 

142 graph : `DimensionGraph`, optional 

143 The dimensions the returned data ID should identify. If not 

144 provided, this will be all dimensions in `QuerySummary.requested`. 

145 

146 Returns 

147 ------- 

148 dataId : `DataCoordinate` 

149 A data ID that identifies all required and implied dimensions. 

150 """ 

151 if graph is None: 

152 graph = self.summary.requested 

153 return DataCoordinate.fromFullValues( 

154 graph, 

155 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

156 ) 

157 

158 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType, 

159 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]: 

160 """Extract a `DatasetRef` from a result row. 

161 

162 Parameters 

163 ---------- 

164 row : `sqlalchemy.engine.RowProxy` 

165 A result row from a SQLAlchemy SELECT query. 

166 datasetType : `DatasetType` 

167 Type of the dataset to extract. Must have been included in the 

168 `Query` via a call to `QueryBuilder.joinDataset` with 

169 ``isResult=True``, or otherwise included in 

170 `QueryColumns.datasets`. 

171 dataId : `DataCoordinate` 

172 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

173 `DataCoordinate` is constructed from ``row`` if `None`. 

174 

175 Returns 

176 ------- 

177 ref : `DatasetRef` 

178 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

179 `None`. 

180 rank : `int` or `None` 

181 Integer index of the collection in which this dataset was found, 

182 within the sequence of collections passed when constructing the 

183 query. `None` if `QueryBuilder.joinDataset` was called with 

184 ``addRank=False``. 

185 """ 

186 if dataId is None: 

187 dataId = self.extractDataId(row, graph=datasetType.dimensions) 

188 datasetColumns = self._columns.datasets[datasetType] 

189 runRecord = self._collections[row[datasetColumns.runKey]] 

190 return (DatasetRef(datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name), 

191 row[datasetColumns.rank] if datasetColumns.rank is not None else None)