Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25import itertools 

26from typing import Optional, Tuple, Callable 

27 

28from sqlalchemy.sql import FromClause 

29from sqlalchemy.engine import RowProxy 

30 

31from lsst.sphgeom import Region 

32 

33from ...core import ( 

34 DataCoordinate, 

35 DatasetRef, 

36 DatasetType, 

37 DimensionGraph, 

38) 

39from ..interfaces import CollectionManager 

40from ._structs import QuerySummary, QueryColumns 

41 

42 

43class Query: 

44 """A wrapper for a SQLAlchemy query that knows how to transform result rows 

45 into data IDs and dataset references. 

46 

47 A `Query` should almost always be constructed directly by a call to 

48 `QueryBuilder.finish`; direct construction will make it difficult to be 

49 able to maintain invariants between arguments (see the documentation for 

50 `QueryColumns` for more information). 

51 

52 Parameters 

53 ---------- 

54 sql : `sqlalchemy.sql.FromClause` 

55 A complete SELECT query, including at least SELECT, FROM, and WHERE 

56 clauses. 

57 summary : `QuerySummary` 

58 Struct that organizes the dimensions involved in the query. 

59 columns : `QueryColumns` 

60 Columns that are referenced in the query in any clause. 

61 collections : `CollectionsManager`, 

62 Manager object for collection tables. 

63 

64 Notes 

65 ----- 

66 SQLAlchemy is used in the public interface of `Query` rather than just its 

67 implementation simply because avoiding this would entail writing wrappers 

68 for the `sqlalchemy.engine.RowProxy` and `sqlalchemy.engine.ResultProxy` 

69 classes that are themselves generic wrappers for lower-level Python DBAPI 

70 classes. Another layer would entail another set of computational 

71 overheads, but the only reason we would seriously consider not using 

72 SQLAlchemy here in the future would be to reduce computational overheads. 

73 """ 

74 

75 def __init__(self, *, sql: FromClause, 

76 summary: QuerySummary, 

77 columns: QueryColumns, 

78 collections: CollectionManager): 

79 self.summary = summary 

80 self.sql = sql 

81 self._columns = columns 

82 self._collections = collections 

83 

84 def predicate(self, region: Optional[Region] = None) -> Callable[[RowProxy], bool]: 

85 """Return a callable that can perform extra Python-side filtering of 

86 query results. 

87 

88 To get the expected results from a query, the returned predicate *must* 

89 be used to ignore rows for which it returns `False`; this permits the 

90 `QueryBuilder` implementation to move logic from the database to Python 

91 without changing the public interface. 

92 

93 Parameters 

94 ---------- 

95 region : `sphgeom.Region`, optional 

96 A region that any result-row regions must overlap in order for the 

97 predicate to return `True`. If not provided, this will be the 

98 region in `QuerySummary.dataId`, if there is one. 

99 

100 Returns 

101 ------- 

102 func : `Callable` 

103 A callable that takes a single `sqlalchemy.engine.RowProxy` 

104 argmument and returns `bool`. 

105 """ 

106 whereRegion = region if region is not None else self.summary.dataId.region 

107 

108 def closure(row: RowProxy) -> bool: 

109 rowRegions = [row[column] for column in self._columns.regions.values()] 

110 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

111 return False 

112 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

113 

114 return closure 

115 

116 def extractDataId(self, row: RowProxy, *, graph: Optional[DimensionGraph] = None) -> DataCoordinate: 

117 """Extract a data ID from a result row. 

118 

119 Parameters 

120 ---------- 

121 row : `sqlalchemy.engine.RowProxy` 

122 A result row from a SQLAlchemy SELECT query. 

123 graph : `DimensionGraph`, optional 

124 The dimensions the returned data ID should identify. If not 

125 provided, this will be all dimensions in `QuerySummary.requested`. 

126 

127 Returns 

128 ------- 

129 dataId : `DataCoordinate` 

130 A minimal data ID that identifies the requested dimensions but 

131 includes no metadata or implied dimensions. 

132 """ 

133 if graph is None: 

134 graph = self.summary.requested 

135 values = tuple(row[self._columns.getKeyColumn(dimension)] for dimension in graph.required) 

136 return DataCoordinate(graph, values) 

137 

138 def extractDatasetRef(self, row: RowProxy, datasetType: DatasetType, 

139 dataId: Optional[DataCoordinate] = None) -> Tuple[DatasetRef, Optional[int]]: 

140 """Extract a `DatasetRef` from a result row. 

141 

142 Parameters 

143 ---------- 

144 row : `sqlalchemy.engine.RowProxy` 

145 A result row from a SQLAlchemy SELECT query. 

146 datasetType : `DatasetType` 

147 Type of the dataset to extract. Must have been included in the 

148 `Query` via a call to `QueryBuilder.joinDataset` with 

149 ``isResult=True``, or otherwise included in 

150 `QueryColumns.datasets`. 

151 dataId : `DataCoordinate` 

152 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

153 `DataCoordinate` is constructed from ``row`` if `None`. 

154 

155 Returns 

156 ------- 

157 ref : `DatasetRef` 

158 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

159 `None`. 

160 rank : `int` or `None` 

161 Integer index of the collection in which this dataset was found, 

162 within the sequence of collections passed when constructing the 

163 query. `None` if `QueryBuilder.joinDataset` was called with 

164 ``addRank=False``. 

165 """ 

166 if dataId is None: 

167 dataId = self.extractDataId(row, graph=datasetType.dimensions) 

168 datasetColumns = self._columns.datasets[datasetType] 

169 runRecord = self._collections[row[datasetColumns.runKey]] 

170 return (DatasetRef(datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name), 

171 row[datasetColumns.rank] if datasetColumns.rank is not None else None)