Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CheckVisitor", 

25 "InspectionVisitor", 

26 "InspectionSummary", 

27) 

28 

29import dataclasses 

30from typing import ( 

31 AbstractSet, 

32 List, 

33 Optional, 

34 Sequence, 

35 Set, 

36 Tuple, 

37 TYPE_CHECKING, 

38 Union, 

39) 

40 

41from ....core import ( 

42 DataCoordinate, 

43 DimensionUniverse, 

44 Dimension, 

45 DimensionElement, 

46 DimensionGraph, 

47 GovernorDimension, 

48 NamedKeyDict, 

49 NamedValueSet, 

50) 

51from ...wildcards import EllipsisType, Ellipsis 

52from .parser import Node, TreeVisitor 

53from .normalForm import NormalForm, NormalFormVisitor 

54from .categorize import categorizeElementId, categorizeConstant, ExpressionConstant 

55 

56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true

57 import astropy.time 

58 

59 

60@dataclasses.dataclass 

61class InspectionSummary: 

62 """Base class for objects used by `CheckVisitor` and `InspectionVisitor` 

63 to gather information about a parsed expression. 

64 """ 

65 

66 def update(self, other: InspectionSummary) -> None: 

67 """Update ``self`` with all dimensions and columns from ``other``. 

68 

69 Parameters 

70 ---------- 

71 other : `InspectionSummary` 

72 The other summary object. 

73 """ 

74 self.dimensions.update(other.dimensions) 

75 for element, columns in other.columns.items(): 

76 self.columns.setdefault(element, set()).update(columns) 

77 self.hasIngestDate = self.hasIngestDate or other.hasIngestDate 

78 

79 dimensions: NamedValueSet[Dimension] = dataclasses.field(default_factory=NamedValueSet) 

80 """Dimensions whose primary keys or dependencies were referenced anywhere 

81 in this branch (`NamedValueSet` [ `Dimension` ]). 

82 """ 

83 

84 columns: NamedKeyDict[DimensionElement, Set[str]] = dataclasses.field(default_factory=NamedKeyDict) 

85 """Dimension element tables whose columns were referenced anywhere in this 

86 branch (`NamedKeyDict` [ `DimensionElement`, `set` [ `str` ] ]). 

87 """ 

88 

89 hasIngestDate: bool = False 

90 """Whether this expression includes the special dataset ingest date 

91 identifier (`bool`). 

92 """ 

93 

94 

95@dataclasses.dataclass 

96class TreeSummary(InspectionSummary): 

97 """Result object used by `InspectionVisitor` to gather information about 

98 a parsed expression. 

99 

100 Notes 

101 ----- 

102 TreeSummary adds attributes that allow dimension equivalence expressions 

103 (e.g. "tract=4") to be recognized when they appear in simple contexts 

104 (surrounded only by ANDs and ORs). When `InspectionVisitor` is used on its 

105 own (i.e. when ``check=False`` in the query code), these don't do anything, 

106 but they don't cost much, either. They are used by `CheckVisitor` when it 

107 delegates to `InspectionVisitor` to see what governor dimension values are 

108 set in a branch of the normal-form expression. 

109 """ 

110 

111 def merge(self, other: TreeSummary, isEq: bool = False) -> TreeSummary: 

112 """Merge ``other`` into ``self``, making ``self`` a summary of both 

113 expression tree branches. 

114 

115 Parameters 

116 ---------- 

117 other : `TreeSummary` 

118 The other summary object. 

119 isEq : `bool`, optional 

120 If `True` (`False` is default), these summaries are being combined 

121 via the equality operator. 

122 

123 Returns 

124 ------- 

125 self : `TreeSummary` 

126 The merged summary (updated in-place). 

127 """ 

128 self.update(other) 

129 if isEq and self.isDataIdKeyOnly() and other.isDataIdValueOnly(): 

130 self.dataIdValue = other.dataIdValue 

131 elif isEq and self.isDataIdValueOnly() and other.isDataIdKeyOnly(): 

132 self.dataIdKey = other.dataIdKey 

133 else: 

134 self.dataIdKey = None 

135 self.dataIdValue = None 

136 return self 

137 

138 def isDataIdKeyOnly(self) -> bool: 

139 """Test whether this branch is _just_ a data ID key identifier. 

140 """ 

141 return self.dataIdKey is not None and self.dataIdValue is None 

142 

143 def isDataIdValueOnly(self) -> bool: 

144 """Test whether this branch is _just_ a literal value that may be 

145 used as the value in a data ID key-value pair. 

146 """ 

147 return self.dataIdKey is None and self.dataIdValue is not None 

148 

149 dataIdKey: Optional[Dimension] = None 

150 """A `Dimension` that is (if `dataIdValue` is not `None`) or may be 

151 (if `dataIdValue` is `None`) fully identified by a literal value in this 

152 branch. 

153 """ 

154 

155 dataIdValue: Optional[str] = None 

156 """A literal value that constrains (if `dataIdKey` is not `None`) or may 

157 constrain (if `dataIdKey` is `None`) a dimension in this branch. 

158 

159 This is always a `str` or `None`, but it may need to be coerced to `int` 

160 to reflect the actual user intent. 

161 """ 

162 

163 

164class InspectionVisitor(TreeVisitor[TreeSummary]): 

165 """Implements TreeVisitor to identify dimension elements that need 

166 to be included in a query, prior to actually constructing a SQLAlchemy 

167 WHERE clause from it. 

168 

169 Parameters 

170 ---------- 

171 universe : `DimensionUniverse` 

172 All known dimensions. 

173 bindKeys : `collections.abc.Set` [ `str` ] 

174 Identifiers that represent bound parameter values, and hence need not 

175 represent in-database entities. 

176 """ 

177 def __init__(self, universe: DimensionUniverse, bindKeys: AbstractSet[str]): 

178 self.universe = universe 

179 self.bindKeys = bindKeys 

180 

181 def visitNumericLiteral(self, value: str, node: Node) -> TreeSummary: 

182 # Docstring inherited from TreeVisitor.visitNumericLiteral 

183 return TreeSummary(dataIdValue=value) 

184 

185 def visitStringLiteral(self, value: str, node: Node) -> TreeSummary: 

186 # Docstring inherited from TreeVisitor.visitStringLiteral 

187 return TreeSummary(dataIdValue=value) 

188 

189 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> TreeSummary: 

190 # Docstring inherited from TreeVisitor.visitTimeLiteral 

191 return TreeSummary() 

192 

193 def visitIdentifier(self, name: str, node: Node) -> TreeSummary: 

194 # Docstring inherited from TreeVisitor.visitIdentifier 

195 if name in self.bindKeys: 

196 return TreeSummary() 

197 constant = categorizeConstant(name) 

198 if constant is ExpressionConstant.INGEST_DATE: 

199 return TreeSummary(hasIngestDate=True) 

200 elif constant is ExpressionConstant.NULL: 

201 return TreeSummary() 

202 assert constant is None, "Enum variant conditionals should be exhaustive." 

203 element, column = categorizeElementId(self.universe, name) 

204 if column is None: 

205 assert isinstance(element, Dimension) 

206 return TreeSummary( 

207 dimensions=NamedValueSet(element.graph.dimensions), 

208 dataIdKey=element, 

209 ) 

210 else: 

211 return TreeSummary( 

212 dimensions=NamedValueSet(element.graph.dimensions), 

213 columns=NamedKeyDict({element: {column}}) 

214 ) 

215 

216 def visitUnaryOp(self, operator: str, operand: TreeSummary, node: Node 

217 ) -> TreeSummary: 

218 # Docstring inherited from TreeVisitor.visitUnaryOp 

219 return operand 

220 

221 def visitBinaryOp(self, operator: str, lhs: TreeSummary, rhs: TreeSummary, 

222 node: Node) -> TreeSummary: 

223 # Docstring inherited from TreeVisitor.visitBinaryOp 

224 return lhs.merge(rhs, isEq=(operator == "=")) 

225 

226 def visitIsIn(self, lhs: TreeSummary, values: List[TreeSummary], not_in: bool, 

227 node: Node) -> TreeSummary: 

228 # Docstring inherited from TreeVisitor.visitIsIn 

229 for v in values: 

230 lhs.merge(v) 

231 return lhs 

232 

233 def visitParens(self, expression: TreeSummary, node: Node) -> TreeSummary: 

234 # Docstring inherited from TreeVisitor.visitParens 

235 return expression 

236 

237 def visitTupleNode(self, items: Tuple[TreeSummary, ...], node: Node) -> TreeSummary: 

238 # Docstring inherited from base class 

239 result = TreeSummary() 

240 for i in items: 

241 result.merge(i) 

242 return result 

243 

244 def visitRangeLiteral(self, start: int, stop: int, stride: Optional[int], node: Node 

245 ) -> TreeSummary: 

246 # Docstring inherited from TreeVisitor.visitRangeLiteral 

247 return TreeSummary() 

248 

249 def visitPointNode(self, ra: TreeSummary, dec: TreeSummary, node: Node) -> TreeSummary: 

250 # Docstring inherited from base class 

251 return TreeSummary() 

252 

253 

254@dataclasses.dataclass 

255class InnerSummary(InspectionSummary): 

256 """Result object used by `CheckVisitor` to gather referenced dimensions 

257 and tables from an inner group of AND'd together expression branches, and 

258 check them for consistency and completeness. 

259 """ 

260 

261 governors: NamedKeyDict[GovernorDimension, str] = dataclasses.field(default_factory=NamedKeyDict) 

262 """Mapping containing the values of all governor dimensions that are 

263 equated with literal values in this expression branch. 

264 """ 

265 

266 

267@dataclasses.dataclass 

268class OuterSummary(InspectionSummary): 

269 """Result object used by `CheckVisitor` to gather referenced dimensions, 

270 tables, and governor dimension values from the entire expression. 

271 """ 

272 

273 governors: NamedKeyDict[GovernorDimension, Union[Set[str], EllipsisType]] \ 

274 = dataclasses.field(default_factory=NamedKeyDict) 

275 """Mapping containing all values that appear in this expression for any 

276 governor dimension relevant to the query. 

277 

278 Mapping values may be a `set` of `str` to indicate that only these values 

279 are permitted for a dimension, or ``...`` indicate that the values for 

280 that governor are not fully constrained by this expression. 

281 """ 

282 

283 

284class CheckVisitor(NormalFormVisitor[TreeSummary, InnerSummary, OuterSummary]): 

285 """An implementation of `NormalFormVisitor` that identifies the dimensions 

286 and tables that need to be included in a query while performing some checks 

287 for completeness and consistency. 

288 

289 Parameters 

290 ---------- 

291 dataId : `DataCoordinate` 

292 Dimension values that are fully known in advance. 

293 graph : `DimensionGraph` 

294 The dimensions the query would include in the absence of this 

295 expression. 

296 bindKeys : `collections.abc.Set` [ `str` ] 

297 Identifiers that represent bound parameter values, and hence need not 

298 represent in-database entities. 

299 """ 

300 def __init__(self, dataId: DataCoordinate, graph: DimensionGraph, bindKeys: AbstractSet[str]): 

301 self.dataId = dataId 

302 self.graph = graph 

303 self.bindKeys = bindKeys 

304 self._branchVisitor = InspectionVisitor(dataId.universe, bindKeys) 

305 

306 def visitBranch(self, node: Node) -> TreeSummary: 

307 # Docstring inherited from NormalFormVisitor. 

308 return node.visit(self._branchVisitor) 

309 

310 def visitInner(self, branches: Sequence[TreeSummary], form: NormalForm) -> InnerSummary: 

311 # Docstring inherited from NormalFormVisitor. 

312 # Disjunctive normal form means inner branches are AND'd together... 

313 assert form is NormalForm.DISJUNCTIVE 

314 # ...and that means each branch we iterate over together below 

315 # constrains the others, and they all need to be consistent. Moreover, 

316 # because outer branches are OR'd together, we also know that if 

317 # something is missing from one of these branches (like a governor 

318 # dimension value like the instrument or skymap needed to interpret a 

319 # visit or tract number), it really is missing, because there's no way 

320 # some other inner branch can constraint it. 

321 # 

322 # That is, except the data ID the visitor was passed at construction; 

323 # that's AND'd to the entire expression later, and thus it affects all 

324 # branches. To take care of that, we add any governor values it 

325 # contains to the summary in advance. 

326 summary = InnerSummary() 

327 summary.governors.update((k, self.dataId[k]) for k in self.dataId.graph.governors) # type: ignore 

328 # Finally, we loop over those branches. 

329 for branch in branches: 

330 # Update the sets of dimensions and columns we've seen anywhere in 

331 # the expression in any context. 

332 summary.update(branch) 

333 # Test whether this branch has a form like '<dimension>=<value' 

334 # (or equivalent; categorizeIdentifier is smart enough to see that 

335 # e.g. 'detector.id=4' is equivalent to 'detector=4'). 

336 # If so, and it's a governor dimension, remember that we've 

337 # constrained it on this branch, and make sure it's consistent 

338 # with any other constraints on any other branches its AND'd with. 

339 if isinstance(branch.dataIdKey, GovernorDimension) and branch.dataIdValue is not None: 

340 governor = branch.dataIdKey 

341 value = summary.governors.setdefault(governor, branch.dataIdValue) 

342 if value != branch.dataIdValue: 

343 # Expression says something like "instrument='HSC' AND 

344 # instrument='DECam'", or data ID has one and expression 

345 # has the other. 

346 if governor in self.dataId: 

347 raise RuntimeError( 

348 f"Conflict between expression containing {governor.name}={branch.dataIdValue!r} " 

349 f"and data ID with {governor.name}={value!r}." 

350 ) 

351 else: 

352 raise RuntimeError( 

353 f"Conflicting literal values for {governor.name} in expression: " 

354 f"{value!r} != {branch.dataIdValue!r}." 

355 ) 

356 # Now that we know which governor values we've constrained, see if any 

357 # are missing, i.e. if the expression contains something like "visit=X" 

358 # without saying what instrument that visit corresponds to. This rules 

359 # out a lot of accidents, but it also rules out possibly-legitimate 

360 # multi-instrument queries like "visit.seeing < 0.7". But it's not 

361 # unreasonable to ask the user to be explicit about the instruments 

362 # they want to consider to work around this restriction, and that's 

363 # what we do. Note that if someone does write an expression like 

364 # 

365 # (instrument='HSC' OR instrument='DECam') AND visit.seeing < 0.7 

366 # 

367 # then in disjunctive normal form that will become 

368 # 

369 # (instrument='HSC' AND visit.seeing < 0.7) 

370 # OR (instrument='DECam' AND visit.seeing < 0.7) 

371 # 

372 # i.e. each instrument will get its own outer branch and the logic here 

373 # still works (that sort of thing is why we convert to normal form, 

374 # after all). 

375 governorsNeededInBranch: NamedValueSet[GovernorDimension] = NamedValueSet() 

376 for dimension in summary.dimensions: 

377 governorsNeededInBranch.update(dimension.graph.governors) 

378 if not governorsNeededInBranch.issubset(summary.governors.keys()): 

379 missing = NamedValueSet(governorsNeededInBranch - summary.governors.keys()) 

380 raise RuntimeError( 

381 f"No value(s) for governor dimensions {missing} in expression that references dependent " 

382 "dimensions. 'Governor' dimensions must always be specified completely in either the " 

383 "query expression (via simple 'name=<value>' terms, not 'IN' terms) or in a data ID passed " 

384 "to the query method." 

385 ) 

386 return summary 

387 

388 def visitOuter(self, branches: Sequence[InnerSummary], form: NormalForm) -> OuterSummary: 

389 # Docstring inherited from NormalFormVisitor. 

390 # Disjunctive normal form means outer branches are OR'd together. 

391 assert form is NormalForm.DISJUNCTIVE 

392 # Iterate over branches in first pass to gather all dimensions and 

393 # columns referenced. This aggregation is for the full query, so we 

394 # don't care whether things are joined by AND or OR (or + or -, etc). 

395 summary = OuterSummary() 

396 for branch in branches: 

397 summary.update(branch) 

398 # See if we've referenced any dimensions that weren't in the original 

399 # query graph; if so, we update that to include them. This is what 

400 # lets a user say "tract=X" on the command line (well, "skymap=Y AND 

401 # tract=X" - logic in visitInner checks for that) when running a task 

402 # like ISR that has nothing to do with skymaps. 

403 if not summary.dimensions.issubset(self.graph.dimensions): 

404 self.graph = DimensionGraph( 

405 self.graph.universe, 

406 dimensions=(summary.dimensions | self.graph.dimensions), 

407 ) 

408 # Set up a dict of empty sets, with all of the governors this query 

409 # involves as keys. 

410 summary.governors.update((k, set()) for k in self.graph.governors) 

411 # Iterate over branches again to see if there are any branches that 

412 # don't constraint a particular governor (because these branches are 

413 # OR'd together, that means there is no constraint on that governor at 

414 # all); if that's the case, we set the dict value to None. If a 

415 # governor is constrained by all branches, we update the set with the 

416 # values that governor can have. 

417 for branch in branches: 

418 for governor in summary.governors: 

419 currentValues = summary.governors[governor] 

420 if currentValues is not Ellipsis: 

421 branchValue = branch.governors.get(governor) 

422 if branchValue is None: 

423 # This governor is unconstrained in this branch, so 

424 # no other branch can constrain it. 

425 summary.governors[governor] = Ellipsis 

426 else: 

427 currentValues.add(branchValue) 

428 return summary