Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["QuerySummary"] # other classes here are local to subpackage 

24 

25from dataclasses import dataclass 

26from typing import Iterator, List, Optional, Set, Union 

27 

28from sqlalchemy.sql import ColumnElement 

29 

30from ...core import ( 

31 DatasetType, 

32 Dimension, 

33 DimensionElement, 

34 DimensionGraph, 

35 DimensionUniverse, 

36 ExpandedDataCoordinate, 

37 NamedKeyDict, 

38 NamedValueSet, 

39 SkyPixDimension, 

40 Timespan, 

41) 

42from .exprParser import Node, ParserYacc 

43 

44 

45@dataclass 

46class QueryWhereExpression: 

47 """A struct representing a parsed user-provided WHERE expression. 

48 

49 Parameters 

50 ---------- 

51 universe : `DimensionUniverse` 

52 All known dimensions. 

53 expression : `str`, optional 

54 The string expression to parse. 

55 """ 

56 def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None): 

57 if expression: 

58 from .expressions import InspectionVisitor 

59 try: 

60 parser = ParserYacc() 

61 self.tree = parser.parse(expression) 

62 except Exception as exc: 

63 raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc 

64 visitor = InspectionVisitor(universe) 

65 self.tree.visit(visitor) 

66 self.keys = visitor.keys 

67 self.metadata = visitor.metadata 

68 else: 

69 self.tree = None 

70 self.keys = NamedValueSet() 

71 self.metadata = NamedKeyDict() 

72 

73 tree: Optional[Node] 

74 """The parsed user expression tree, if present (`Node` or `None`). 

75 """ 

76 

77 keys: NamedValueSet[Dimension] 

78 """All dimensions whose keys are referenced by the expression 

79 (`NamedValueSet` of `Dimension`). 

80 """ 

81 

82 metadata: NamedKeyDict[DimensionElement, Set[str]] 

83 """All dimension elements metadata fields referenced by the expression 

84 (`NamedKeyDict` mapping `DimensionElement` to a `set` of field names). 

85 """ 

86 

87 

88@dataclass 

89class QuerySummary: 

90 """A struct that holds and categorizes the dimensions involved in a query. 

91 

92 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and 

93 it needs to include all of the dimensions that will be included in the 

94 query (including any needed for querying datasets). 

95 

96 Parameters 

97 ---------- 

98 requested : `DimensionGraph` 

99 The dimensions whose primary keys should be included in the result rows 

100 of the query. 

101 dataId : `ExpandedDataCoordinate`, optional 

102 A fully-expanded data ID identifying dimensions known in advance. If 

103 not provided, will be set to an empty data ID. 

104 expression : `str` or `QueryWhereExpression`, optional 

105 A user-provided string WHERE expression. 

106 """ 

107 def __init__(self, requested: DimensionGraph, *, 

108 dataId: Optional[ExpandedDataCoordinate] = None, 

109 expression: Optional[Union[str, QueryWhereExpression]] = None): 

110 self.requested = requested 

111 self.dataId = dataId if dataId is not None else ExpandedDataCoordinate(requested.universe.empty, ()) 

112 self.expression = (expression if isinstance(expression, QueryWhereExpression) 

113 else QueryWhereExpression(requested.universe, expression)) 

114 

115 requested: DimensionGraph 

116 """Dimensions whose primary keys should be included in the result rows of 

117 the query (`DimensionGraph`). 

118 """ 

119 

120 dataId: ExpandedDataCoordinate 

121 """A data ID identifying dimensions known before query construction 

122 (`ExpandedDataCoordinate`). 

123 """ 

124 

125 expression: QueryWhereExpression 

126 """Information about any parsed user WHERE expression 

127 (`QueryWhereExpression`). 

128 """ 

129 

130 @property 

131 def universe(self) -> DimensionUniverse: 

132 """All known dimensions (`DimensionUniverse`). 

133 """ 

134 return self.requested.universe 

135 

136 @property 

137 def spatial(self) -> NamedValueSet[DimensionElement]: 

138 """Dimension elements whose regions and skypix IDs should be included 

139 in the query (`NamedValueSet` of `DimensionElement`). 

140 """ 

141 # An element may participate spatially in the query if: 

142 # - it's the most precise spatial element for its system in the 

143 # requested dimensions (i.e. in `self.requested.spatial`); 

144 # - it isn't also given at query construction time. 

145 result = self.mustHaveKeysJoined.spatial - self.dataId.graph.elements 

146 if len(result) == 1: 

147 # There's no spatial join, but there might be a WHERE filter based 

148 # on a given region. 

149 if self.dataId.graph.spatial: 

150 # We can only perform those filters against SkyPix dimensions, 

151 # so if what we have isn't one, add the common SkyPix dimension 

152 # to the query; the element we have will be joined to that. 

153 element, = result 

154 if not isinstance(element, SkyPixDimension): 

155 result.add(self.universe.commonSkyPix) 

156 else: 

157 # There is no spatial join or filter in this query. Even 

158 # if this element might be associated with spatial 

159 # information, we don't need it for this query. 

160 return NamedValueSet() 

161 elif len(result) > 1: 

162 # There's a spatial join. Those require the common SkyPix 

163 # system to be included in the query in order to connect them. 

164 result.add(self.universe.commonSkyPix) 

165 return result 

166 

167 @property 

168 def temporal(self) -> NamedValueSet[DimensionElement]: 

169 """Dimension elements whose timespans should be included in the 

170 query (`NamedValueSet` of `DimensionElement`). 

171 """ 

172 # An element may participate temporally in the query if: 

173 # - it's the most precise temporal element for its system in the 

174 # requested dimensions (i.e. in `self.requested.temporal`); 

175 # - it isn't also given at query construction time. 

176 result = self.mustHaveKeysJoined.temporal - self.dataId.graph.elements 

177 if len(result) == 1 and not self.dataId.graph.temporal: 

178 # No temporal join or filter. Even if this element might be 

179 # associated with temporal information, we don't need it for this 

180 # query. 

181 return NamedValueSet() 

182 return result 

183 

184 @property 

185 def mustHaveKeysJoined(self) -> DimensionGraph: 

186 """Dimensions whose primary keys must be used in the JOIN ON clauses 

187 of the query, even if their tables do not appear (`DimensionGraph`). 

188 

189 A `Dimension` primary key can appear in a join clause without its table 

190 via a foreign key column in table of a dependent dimension element or 

191 dataset. 

192 """ 

193 names = set(self.requested.names | self.expression.keys.names) 

194 return DimensionGraph(self.universe, names=names) 

195 

196 @property 

197 def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]: 

198 """Dimension elements whose associated tables must appear in the 

199 query's FROM clause (`NamedValueSet` of `DimensionElement`). 

200 """ 

201 result = self.spatial | self.temporal | self.expression.metadata.keys() 

202 for dimension in self.mustHaveKeysJoined: 

203 if dimension.implied: 

204 result.add(dimension) 

205 for element in self.mustHaveKeysJoined.union(self.dataId.graph).elements: 

206 if element.alwaysJoin: 

207 result.add(element) 

208 return result 

209 

210 

211@dataclass 

212class DatasetQueryColumns: 

213 """A struct containing the columns used to reconstruct `DatasetRef` 

214 instances from query results. 

215 """ 

216 

217 id: ColumnElement 

218 """Column containing the unique integer ID for this dataset. 

219 """ 

220 

221 runKey: ColumnElement 

222 """Foreign key column to the `~CollectionType.RUN` collection that holds 

223 this dataset. 

224 """ 

225 

226 rank: Optional[ColumnElement] = None 

227 """Column containing the index into the ordered sequence of given 

228 collections for the collection in which this dataset was found. 

229 """ 

230 

231 def __iter__(self) -> Iterator[ColumnElement]: 

232 yield self.id 

233 yield self.runKey 

234 if self.rank is not None: 

235 yield self.rank 

236 

237 

238@dataclass 

239class QueryColumns: 

240 """A struct organizing the columns in an under-construction or currently- 

241 executing query. 

242 

243 Takes no parameters at construction, as expected usage is to add elements 

244 to its container attributes incrementally. 

245 """ 

246 def __init__(self): 

247 self.keys = NamedKeyDict() 

248 self.timespans = NamedKeyDict() 

249 self.regions = NamedKeyDict() 

250 self.datasets = NamedKeyDict() 

251 

252 keys: NamedKeyDict[Dimension, List[ColumnElement]] 

253 """Columns that correspond to the primary key values of dimensions 

254 (`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`). 

255 

256 Each value list contains columns from multiple tables corresponding to the 

257 same dimension, and the query should constrain the values of those columns 

258 to be the same. 

259 

260 In a `Query`, the keys of this dictionary must include at least the 

261 dimensions in `QuerySummary.requested` and `QuerySummary.dataId.graph`. 

262 """ 

263 

264 timespans: NamedKeyDict[DimensionElement, Timespan[ColumnElement]] 

265 """Columns that correspond to timespans for elements that participate in a 

266 temporal join or filter in the query (`NamedKeyDict` mapping 

267 `DimensionElement` to `Timespan` of `ColumnElement`). 

268 

269 In a `Query`, the keys of this dictionary must be exactly the elements 

270 in `QuerySummary.temporal`. 

271 """ 

272 

273 regions: NamedKeyDict[DimensionElement, ColumnElement] 

274 """Columns that correspond to regions for elements that participate in a 

275 spatial join or filter in the query (`NamedKeyDict` mapping 

276 `DimensionElement` to `ColumnElement`). 

277 

278 In a `Query`, the keys of this dictionary must be exactly the elements 

279 in `QuerySummary.spatial`. 

280 """ 

281 

282 datasets: NamedKeyDict[DatasetType, DatasetQueryColumns] 

283 """Columns that can be used to construct `DatasetRef` instances from query 

284 results, for each `DatasetType` included in the query 

285 (`NamedKeyDict` [ `DatasetType`, `DatasetQueryColumns` ] ). 

286 """ 

287 

288 def getKeyColumn(self, dimension: Dimension) -> ColumnElement: 

289 """ Return one of the columns in self.keys for the given dimension. 

290 

291 The column selected is an implentation detail but is guaranteed to 

292 be deterministic and consistent across multiple calls. 

293 

294 Parameters 

295 ---------- 

296 dimension : `Dimension` 

297 Element for which to obtain a key column. 

298 

299 Returns 

300 ------- 

301 column : `sqlalchemy.sql.ColumnElement` 

302 SQLAlchemy column object. 

303 """ 

304 # Choosing the last element here is entirely for human readers of the 

305 # query (e.g. developers debugging things); it makes it more likely a 

306 # dimension key will be provided by the dimension's own table, or 

307 # failing that, some closely related dimension, which might be less 

308 # surprising to see than e.g. some dataset subquery. From the 

309 # database's perspective this is entirely arbitrary, cause the query 

310 # guarantees they all have equal values. 

311 return self.keys[dimension][-1]