Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["QuerySummary"] # other classes here are local to subpackage 

24 

25import enum 

26from dataclasses import dataclass 

27from typing import Optional, Tuple, List, Set, Union 

28 

29from sqlalchemy.sql import ColumnElement, bindparam 

30 

31from ...core import ( 

32 DatasetType, 

33 Dimension, 

34 DimensionElement, 

35 DimensionGraph, 

36 DimensionUniverse, 

37 ExpandedDataCoordinate, 

38 SkyPixDimension, 

39 Timespan, 

40) 

41from ...core.utils import NamedValueSet, NamedKeyDict 

42from .exprParser import Node, ParserYacc 

43 

44 

45class GivenTime(enum.Enum): 

46 """Enumeration specifying when (and if) a data ID value is provided as 

47 a constraint on a query. 

48 """ 

49 

50 NOT_GIVEN = 0 

51 """This value is never provided as a constraint on the query. 

52 """ 

53 

54 AT_CONSTRUCTION = 1 

55 """This value is provided at query construction, can hence be obtained from 

56 `QuerySummary.dataId`. 

57 """ 

58 

59 AT_EXECUTION = 2 

60 """This value is provided only at query execution, and must be included in 

61 the data ID passed to `Query.execute` or `Query.bind`. 

62 """ 

63 

64 

65@dataclass 

66class QueryWhereExpression: 

67 """A struct representing a parsed user-provided WHERE expression. 

68 

69 Parameters 

70 ---------- 

71 universe : `DimensionUniverse` 

72 All known dimensions. 

73 expression : `str`, optional 

74 The string expression to parse. 

75 """ 

76 def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None): 

77 if expression: 

78 from .expressions import InspectionVisitor 

79 try: 

80 parser = ParserYacc() 

81 self.tree = parser.parse(expression) 

82 except Exception as exc: 

83 raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc 

84 visitor = InspectionVisitor(universe) 

85 self.tree.visit(visitor) 

86 self.keys = visitor.keys 

87 self.metadata = visitor.metadata 

88 else: 

89 self.tree = None 

90 self.keys = NamedValueSet() 

91 self.metadata = NamedKeyDict() 

92 

93 tree: Optional[Node] 

94 """The parsed user expression tree, if present (`Node` or `None`). 

95 """ 

96 

97 keys: NamedValueSet[Dimension] 

98 """All dimensions whose keys are referenced by the expression 

99 (`NamedValueSet` of `Dimension`). 

100 """ 

101 

102 metadata: NamedKeyDict[DimensionElement, Set[str]] 

103 """All dimension elements metadata fields referenced by the expression 

104 (`NamedKeyDict` mapping `DimensionElement` to a `set` of field names). 

105 """ 

106 

107 

108@dataclass 

109class QuerySummary: 

110 """A struct that holds and categorizes the dimensions involved in a query. 

111 

112 A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and 

113 it needs to include all of the dimensions that will be included in the 

114 query (including any needed for querying datasets). 

115 

116 Parameters 

117 ---------- 

118 requested : `DimensionGraph` 

119 The dimensions whose primary keys should be included in the result rows 

120 of the query. 

121 dataId : `ExpandedDataCoordinate`, optional 

122 A fully-expanded data ID identifying dimensions known in advance. If 

123 not provided, will be set to an empty data ID. 

124 expression : `str` or `QueryWhereExpression`, optional 

125 A user-provided string WHERE expression. 

126 given : `DimensionGraph`, optional 

127 Dimensions that will be fully identified before the query is executed, 

128 if not necessarily provided (in ``dataId``) now. If provided, must be 

129 a superset of ``dataId.graph``; if not provided, will be set to 

130 ``dataId.graph``. 

131 entire : `NamedValueSet` of `DimensionElement`, optional 

132 Dimension elements that should be fully included in any spatial or 

133 temporal join, including child elements that would not otherwise be 

134 included in that join. For example, passing "visit" here in a query 

135 constrained to a single tract would include all visit+detector 

136 combinations in any visit that overlaps that tract, not just the 

137 visit+detector combinations that directly overlap the tract. 

138 """ 

139 def __init__(self, requested: DimensionGraph, *, 

140 dataId: Optional[ExpandedDataCoordinate] = None, 

141 expression: Optional[Union[str, QueryWhereExpression]] = None, 

142 given: Optional[DimensionGraph] = None, 

143 entire: Optional[NamedValueSet[DimensionElement]] = None): 

144 self.requested = requested 

145 self.dataId = dataId if dataId is not None else ExpandedDataCoordinate(requested.universe.empty, ()) 

146 self.given = given if given is not None else self.dataId.graph 

147 assert self.given.issuperset(self.dataId.graph) 

148 self.expression = (expression if isinstance(expression, QueryWhereExpression) 

149 else QueryWhereExpression(requested.universe, expression)) 

150 self.entire = entire if entire is not None else NamedValueSet() 

151 

152 requested: DimensionGraph 

153 """Dimensions whose primary keys should be included in the result rows of 

154 the query (`DimensionGraph`). 

155 """ 

156 

157 dataId: ExpandedDataCoordinate 

158 """A data ID identifying dimensions known before query construction 

159 (`ExpandedDataCoordinate`). 

160 """ 

161 

162 expression: QueryWhereExpression 

163 """Information about any parsed user WHERE expression 

164 (`QueryWhereExpression`). 

165 """ 

166 

167 given: DimensionGraph 

168 """All dimensions whose primary keys are fully identified before query 

169 execution (`DimensionGraph`). 

170 """ 

171 

172 entire: NamedValueSet[DimensionElement] 

173 """Dimension elements that should be fully included when they overlap other 

174 elements spatially or temporally (`NamedValueSet` of `DimensionElement`). 

175 

176 For example, including the visit dimension here in a query that also 

177 requests the detector dimension and has a user expression on tract will 

178 result in all visit+detector combinations being returned for any visits 

179 that overlap the tract, rather than just the visit+detector combinations 

180 that directly overlap the tract. 

181 """ 

182 

183 def whenIsDimensionGiven(self, dimension: Dimension) -> GivenTime: 

184 """Return an enumeration value indicating when the given dimension 

185 is identified in the WHERE clause. 

186 

187 Returns 

188 ------- 

189 when : `GivenTime` 

190 Enumeration indicating when the dimension is identified. 

191 """ 

192 if dimension in self.dataId.graph: 

193 return GivenTime.AT_CONSTRUCTION 

194 elif dimension in self.given: 

195 return GivenTime.AT_EXECUTION 

196 else: 

197 return GivenTime.NOT_GIVEN 

198 

199 def whenIsRegionGiven(self) -> GivenTime: 

200 """Return an enumeration value indicating when a region is provided 

201 in the WHERE clause. 

202 

203 Returns 

204 ------- 

205 when : `GivenTime` 

206 Enumeration indicating when a region is provided. 

207 """ 

208 if self.given.spatial: 

209 if self.given.spatial == self.dataId.graph.spatial: 

210 return GivenTime.AT_CONSTRUCTION 

211 else: 

212 return GivenTime.AT_EXECUTION 

213 else: 

214 return GivenTime.NOT_GIVEN 

215 

216 def whenIsTimespanGiven(self) -> GivenTime: 

217 """Return an enumeration value indicating when a timespan is provided 

218 in the WHERE clause. 

219 

220 Returns 

221 ------- 

222 when : `GivenTime` 

223 Enumeration indicating when a timespan is provided. 

224 """ 

225 if self.given.temporal: 

226 if self.given.temporal == self.dataId.graph.temporal: 

227 return GivenTime.AT_CONSTRUCTION 

228 else: 

229 return GivenTime.AT_EXECUTION 

230 else: 

231 return GivenTime.NOT_GIVEN 

232 

233 @property 

234 def universe(self) -> DimensionUniverse: 

235 """All known dimensions (`DimensionUniverse`). 

236 """ 

237 return self.requested.universe 

238 

239 @property 

240 def spatial(self) -> NamedValueSet[DimensionElement]: 

241 """Dimension elements whose regions and skypix IDs should be included 

242 in the query (`NamedValueSet` of `DimensionElement`). 

243 """ 

244 # An element may participate spatially in the query if: 

245 # - it's the most precise spatial element for its system in the 

246 # requested dimensions (i.e. in `self.requested.spatial`); 

247 # - it isn't also given at query construction or execution time. 

248 result = self.mustHaveKeysJoined.getSpatial(prefer=self.entire) - self.given.elements 

249 if len(result) == 1: 

250 # There's no spatial join, but there might be a WHERE filter based 

251 # on a given region. 

252 if self.given.spatial: 

253 # We can only perform those filters against SkyPix dimensions, 

254 # so if what we have isn't one, add the common SkyPix dimension 

255 # to the query; the element we have will be joined to that. 

256 element, = result 

257 if not isinstance(element, SkyPixDimension): 

258 result.add(self.universe.commonSkyPix) 

259 else: 

260 # There is no spatial join or filter in this query. Even 

261 # if this element might be associated with spatial 

262 # information, we don't need it for this query. 

263 return NamedValueSet() 

264 elif len(result) > 1: 

265 # There's a spatial join. Those require the common SkyPix 

266 # system to be included in the query in order to connect them. 

267 result.add(self.universe.commonSkyPix) 

268 return result 

269 

270 @property 

271 def temporal(self) -> NamedValueSet[DimensionElement]: 

272 """Dimension elements whose timespans should be included in the 

273 query (`NamedValueSet` of `DimensionElement`). 

274 """ 

275 # An element may participate temporally in the query if: 

276 # - it's the most precise temporal element for its system in the 

277 # requested dimensions (i.e. in `self.requested.temporal`); 

278 # - it isn't also given at query construction or execution time. 

279 result = self.mustHaveKeysJoined.getTemporal(prefer=self.entire) - self.given.elements 

280 if len(result) == 1 and not self.given.getTemporal(): 

281 # No temporal join or filter. Even if this element might be 

282 # associated with temporal information, we don't need it for this 

283 # query. 

284 return NamedValueSet() 

285 return result 

286 

287 @property 

288 def mustHaveKeysJoined(self) -> DimensionGraph: 

289 """Dimensions whose primary keys must be used in the JOIN ON clauses 

290 of the query, even if their tables do not appear (`DimensionGraph`). 

291 

292 A `Dimension` primary key can appear in a join clause without its table 

293 via a foreign key column in table of a dependent dimension element or 

294 dataset. 

295 """ 

296 names = set(self.requested.names | self.expression.keys.names) 

297 return DimensionGraph(self.universe, names=names) 

298 

299 @property 

300 def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]: 

301 """Dimension elements whose associated tables must appear in the 

302 query's FROM clause (`NamedValueSet` of `DimensionElement`). 

303 """ 

304 result = self.spatial | self.temporal | self.expression.metadata.keys() 

305 for dimension in self.mustHaveKeysJoined: 

306 if dimension.implied: 

307 result.add(dimension) 

308 return result 

309 

310 

311@dataclass 

312class QueryColumns: 

313 """A struct organizing the columns in an under-construction or currently- 

314 executing query. 

315 

316 Takes no parameters at construction, as expected usage is to add elements 

317 to its container attributes incrementally. 

318 """ 

319 def __init__(self): 

320 self.keys = NamedKeyDict() 

321 self.timespans = NamedKeyDict() 

322 self.regions = NamedKeyDict() 

323 self.datasets = NamedKeyDict() 

324 

325 keys: NamedKeyDict[Dimension, List[ColumnElement]] 

326 """Columns that correspond to the primary key values of dimensions 

327 (`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`). 

328 

329 Each value list contains columns from multiple tables corresponding to the 

330 same dimension, and the query should constrain the values of those columns 

331 to be the same. 

332 

333 In a `Query`, the keys of this dictionary must include at least the 

334 dimensions in `QuerySummary.requested` and `QuerySummary.given`. 

335 """ 

336 

337 timespans: NamedKeyDict[DimensionElement, Timespan[ColumnElement]] 

338 """Columns that correspond to timespans for elements that participate in a 

339 temporal join or filter in the query (`NamedKeyDict` mapping 

340 `DimensionElement` to `Timespan` of `ColumnElement`). 

341 

342 In a `Query`, the keys of this dictionary must be exactly the elements 

343 in `QuerySummary.temporal`. 

344 """ 

345 

346 regions: NamedKeyDict[DimensionElement, ColumnElement] 

347 """Columns that correspond to regions for elements that participate in a 

348 spatial join or filter in the query (`NamedKeyDict` mapping 

349 `DimensionElement` to `ColumnElement`). 

350 

351 In a `Query`, the keys of this dictionary must be exactly the elements 

352 in `QuerySummary.spatial`. 

353 """ 

354 

355 datasets: NamedKeyDict[DatasetType, Tuple[ColumnElement, Optional[ColumnElement]]] 

356 """Columns that correspond to the ``dataset_id`` and optionally collection 

357 rank for a dataset in the query (`NamedKeyDict` mapping `DatasetType` to 

358 `tuple` of `ColumnElement`). 

359 

360 "Collection rank" here is the index of the collection in which this dataset 

361 was found in the list of collections to search; a lower rank corresponds 

362 to a collection that appears earlier in the search path. 

363 """ 

364 

365 def getKeyColumn(self, dimension: Dimension) -> ColumnElement: 

366 """ Return one of the columns in self.keys for the given dimension. 

367 

368 The column selected is an implentation detail but is guaranteed to 

369 be deterministic and consistent across multiple calls. 

370 

371 Parameters 

372 ---------- 

373 dimension : `Dimension` 

374 Element for which to obtain a key column. 

375 

376 Returns 

377 ------- 

378 column : `sqlalchemy.sql.ColumnElement` 

379 SQLAlchemy column object. 

380 """ 

381 # Choosing the last element here is entirely for human readers of the 

382 # query (e.g. developers debugging things); it makes it more likely a 

383 # dimension key will be provided by the dimension's own table, or 

384 # failing that, some closely related dimension, which might be less 

385 # surprising to see than e.g. some dataset subquery. From the 

386 # database's perspective this is entirely arbitrary, cause the query 

387 # guarantees they all have equal values. 

388 return self.keys[dimension][-1] 

389 

390 

391@dataclass 

392class QueryParameters: 

393 """A struct managing deferred bind parameters in a query. 

394 

395 Takes no parameters at construction, as expected usage is to add elements 

396 to its container attributes incrementally. 

397 """ 

398 def __init__(self): 

399 self.keys = NamedKeyDict() 

400 self.timespan = None 

401 self.skypix = NamedKeyDict() 

402 

403 keys: NamedKeyDict[Dimension, bindparam] 

404 """Bind parameters that correspond to dimension primary key values 

405 (`NamedKeyDict` mapping `Dimension` to `sqlalchemy.sql.bindparam`). 

406 

407 In a `Query`, the keys of this dictionary are the subset of 

408 `QuerySummary.given` for which `QuerySummary.whenIsDimensionGiven` 

409 returns `False`. 

410 """ 

411 

412 timespan: Optional[Timespan[bindparam]] 

413 """Bind parameters that correspond to timespans (`Timespan` of 

414 `sqlalchemy.sql.bindparam`). 

415 

416 In a `Query`, this is not `None` if and only if 

417 `QuerySummary.whenIsTimespanGiven` returns `GivenTime.AT_EXECUTION`. 

418 """ 

419 

420 skypix: NamedKeyDict[SkyPixDimension, bindparam] 

421 """Bind parameters that correspond to skypix IDs (`NamedKeyDict` mapping 

422 `SkyPixDimension` to to`sqlalchemy.sql.bindparam`). 

423 

424 In a `Query`, this is not `None` if and only if 

425 `QuerySummary.whenIsRegionGiven` returns `GivenTime.AT_EXECUTION`. 

426 """