Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

from __future__ import annotations 

 

__all__ = ["QuerySummary"] # other classes here are local to subpackage 

 

import enum 

from dataclasses import dataclass 

from typing import Optional, Tuple, List, Set, Union 

 

from sqlalchemy.sql import ColumnElement, bindparam 

 

from ...core import ( 

DatasetType, 

Dimension, 

DimensionElement, 

DimensionGraph, 

DimensionUniverse, 

ExpandedDataCoordinate, 

SkyPixDimension, 

Timespan, 

) 

from ...core.utils import NamedValueSet, NamedKeyDict 

from .exprParser import Node, ParserYacc 

 

 

class GivenTime(enum.Enum): 

"""Enumeration specifying when (and if) a data ID value is provided as 

a constraint on a query. 

""" 

 

NOT_GIVEN = 0 

"""This value is never provided as a constraint on the query. 

""" 

 

AT_CONSTRUCTION = 1 

"""This value is provided at query construction, can hence be obtained from 

`QuerySummary.dataId`. 

""" 

 

AT_EXECUTION = 2 

"""This value is provided only at query execution, and must be included in 

the data ID passed to `Query.execute` or `Query.bind`. 

""" 

 

 

@dataclass 

class QueryWhereExpression: 

"""A struct representing a parsed user-provided WHERE expression. 

 

Parameters 

---------- 

universe : `DimensionUniverse` 

All known dimensions. 

expression : `str`, optional 

The string expression to parse. 

""" 

def __init__(self, universe: DimensionUniverse, expression: Optional[str] = None): 

if expression: 

from .expressions import InspectionVisitor 

try: 

parser = ParserYacc() 

self.tree = parser.parse(expression) 

except Exception as exc: 

raise RuntimeError(f"Failed to parse user expression `{expression}'.") from exc 

visitor = InspectionVisitor(universe) 

self.tree.visit(visitor) 

self.keys = visitor.keys 

self.metadata = visitor.metadata 

else: 

self.tree = None 

self.keys = NamedValueSet() 

self.metadata = NamedKeyDict() 

 

tree: Optional[Node] 

"""The parsed user expression tree, if present (`Node` or `None`). 

""" 

 

keys: NamedValueSet[Dimension] 

"""All dimensions whose keys are referenced by the expression 

(`NamedValueSet` of `Dimension`). 

""" 

 

metadata: NamedKeyDict[DimensionElement, Set[str]] 

"""All dimension elements metadata fields referenced by the expression 

(`NamedKeyDict` mapping `DimensionElement` to a `set` of field names). 

""" 

 

 

@dataclass 

class QuerySummary: 

"""A struct that holds and categorizes the dimensions involved in a query. 

 

A `QuerySummary` instance is necessary to construct a `QueryBuilder`, and 

it needs to include all of the dimensions that will be included in the 

query (including any needed for querying datasets). 

 

Parameters 

---------- 

requested : `DimensionGraph` 

The dimensions whose primary keys should be included in the result rows 

of the query. 

dataId : `ExpandedDataCoordinate`, optional 

A fully-expanded data ID identifying dimensions known in advance. If 

not provided, will be set to an empty data ID. 

expression : `str` or `QueryWhereExpression`, optional 

A user-provided string WHERE expression. 

given : `DimensionGraph`, optional 

Dimensions that will be fully identified before the query is executed, 

if not necessarily provided (in ``dataId``) now. If provided, must be 

a superset of ``dataId.graph``; if not provided, will be set to 

``dataId.graph``. 

entire : `NamedValueSet` of `DimensionElement`, optional 

Dimension elements that should be fully included in any spatial or 

temporal join, including child elements that would not otherwise be 

included in that join. For example, passing "visit" here in a query 

constrained to a single tract would include all visit+detector 

combinations in any visit that overlaps that tract, not just the 

visit+detector combinations that directly overlap the tract. 

""" 

def __init__(self, requested: DimensionGraph, *, 

dataId: Optional[ExpandedDataCoordinate] = None, 

expression: Optional[Union[str, QueryWhereExpression]] = None, 

given: Optional[DimensionGraph] = None, 

entire: Optional[NamedValueSet[DimensionElement]] = None): 

self.requested = requested 

self.dataId = dataId if dataId is not None else ExpandedDataCoordinate(requested.universe.empty, ()) 

self.given = given if given is not None else self.dataId.graph 

assert self.given.issuperset(self.dataId.graph) 

self.expression = (expression if isinstance(expression, QueryWhereExpression) 

else QueryWhereExpression(requested.universe, expression)) 

self.entire = entire if entire is not None else NamedValueSet() 

 

requested: DimensionGraph 

"""Dimensions whose primary keys should be included in the result rows of 

the query (`DimensionGraph`). 

""" 

 

dataId: ExpandedDataCoordinate 

"""A data ID identifying dimensions known before query construction 

(`ExpandedDataCoordinate`). 

""" 

 

expression: QueryWhereExpression 

"""Information about any parsed user WHERE expression 

(`QueryWhereExpression`). 

""" 

 

given: DimensionGraph 

"""All dimensions whose primary keys are fully identified before query 

execution (`DimensionGraph`). 

""" 

 

entire: NamedValueSet[DimensionElement] 

"""Dimension elements that should be fully included when they overlap other 

elements spatially or temporally (`NamedValueSet` of `DimensionElement`). 

 

For example, including the visit dimension here in a query that also 

requests the detector dimension and has a user expression on tract will 

result in all visit+detector combinations being returned for any visits 

that overlap the tract, rather than just the visit+detector combinations 

that directly overlap the tract. 

""" 

 

def whenIsDimensionGiven(self, dimension: Dimension) -> GivenTime: 

"""Return an enumeration value indicating when the given dimension 

is identified in the WHERE clause. 

 

Returns 

------- 

when : `GivenTime` 

Enumeration indicating when the dimension is identified. 

""" 

if dimension in self.dataId.graph: 

return GivenTime.AT_CONSTRUCTION 

elif dimension in self.given: 

return GivenTime.AT_EXECUTION 

else: 

return GivenTime.NOT_GIVEN 

 

def whenIsRegionGiven(self) -> GivenTime: 

"""Return an enumeration value indicating when a region is provided 

in the WHERE clause. 

 

Returns 

------- 

when : `GivenTime` 

Enumeration indicating when a region is provided. 

""" 

if self.given.spatial: 

if self.given.spatial == self.dataId.graph.spatial: 

return GivenTime.AT_CONSTRUCTION 

else: 

return GivenTime.AT_EXECUTION 

else: 

return GivenTime.NOT_GIVEN 

 

def whenIsTimespanGiven(self) -> GivenTime: 

"""Return an enumeration value indicating when a timespan is provided 

in the WHERE clause. 

 

Returns 

------- 

when : `GivenTime` 

Enumeration indicating when a timespan is provided. 

""" 

if self.given.temporal: 

if self.given.temporal == self.dataId.graph.temporal: 

return GivenTime.AT_CONSTRUCTION 

else: 

return GivenTime.AT_EXECUTION 

else: 

return GivenTime.NOT_GIVEN 

 

@property 

def universe(self) -> DimensionUniverse: 

"""All known dimensions (`DimensionUniverse`). 

""" 

return self.requested.universe 

 

@property 

def spatial(self) -> NamedValueSet[DimensionElement]: 

"""Dimension elements whose regions and skypix IDs should be included 

in the query (`NamedValueSet` of `DimensionElement`). 

""" 

# An element may participate spatially in the query if: 

# - it's the most precise spatial element for its system in the 

# requested dimensions (i.e. in `self.requested.spatial`); 

# - it isn't also given at query construction or execution time. 

result = self.mustHaveKeysJoined.getSpatial(prefer=self.entire) - self.given.elements 

if len(result) == 1: 

# There's no spatial join, but there might be a WHERE filter based 

# on a given region. 

if self.given.spatial: 

# We can only perform those filters against SkyPix dimensions, 

# so if what we have isn't one, add the common SkyPix dimension 

# to the query; the element we have will be joined to that. 

element, = result 

if not isinstance(element, SkyPixDimension): 

result.add(self.universe.commonSkyPix) 

else: 

# There is no spatial join or filter in this query. Even 

# if this element might be associated with spatial 

# information, we don't need it for this query. 

return NamedValueSet() 

elif len(result) > 1: 

# There's a spatial join. Those require the common SkyPix 

# system to be included in the query in order to connect them. 

result.add(self.universe.commonSkyPix) 

return result 

 

@property 

def temporal(self) -> NamedValueSet[DimensionElement]: 

"""Dimension elements whose timespans should be included in the 

query (`NamedValueSet` of `DimensionElement`). 

""" 

# An element may participate temporally in the query if: 

# - it's the most precise temporal element for its system in the 

# requested dimensions (i.e. in `self.requested.temporal`); 

# - it isn't also given at query construction or execution time. 

result = self.mustHaveKeysJoined.getTemporal(prefer=self.entire) - self.given.elements 

if len(result) == 1 and not self.given.getTemporal(): 

# No temporal join or filter. Even if this element might be 

# associated with temporal information, we don't need it for this 

# query. 

return NamedValueSet() 

return result 

 

@property 

def mustHaveKeysJoined(self) -> DimensionGraph: 

"""Dimensions whose primary keys must be used in the JOIN ON clauses 

of the query, even if their tables do not appear (`DimensionGraph`). 

 

A `Dimension` primary key can appear in a join clause without its table 

via a foreign key column in table of a dependent dimension element or 

dataset. 

""" 

names = set(self.requested.names | self.expression.keys.names) 

return DimensionGraph(self.universe, names=names) 

 

@property 

def mustHaveTableJoined(self) -> NamedValueSet[DimensionElement]: 

"""Dimension elements whose associated tables must appear in the 

query's FROM clause (`NamedValueSet` of `DimensionElement`). 

""" 

result = self.spatial | self.temporal | self.expression.metadata.keys() 

for dimension in self.mustHaveKeysJoined: 

if dimension.implied: 

result.add(dimension) 

return result 

 

 

@dataclass 

class QueryColumns: 

"""A struct organizing the columns in an under-construction or currently- 

executing query. 

 

Takes no parameters at construction, as expected usage is to add elements 

to its container attributes incrementally. 

""" 

def __init__(self): 

self.keys = NamedKeyDict() 

self.timespans = NamedKeyDict() 

self.regions = NamedKeyDict() 

self.datasets = NamedKeyDict() 

 

keys: NamedKeyDict[Dimension, List[ColumnElement]] 

"""Columns that correspond to the primary key values of dimensions 

(`NamedKeyDict` mapping `Dimension` to a `list` of `ColumnElement`). 

 

Each value list contains columns from multiple tables corresponding to the 

same dimension, and the query should constrain the values of those columns 

to be the same. 

 

In a `Query`, the keys of this dictionary must include at least the 

dimensions in `QuerySummary.requested` and `QuerySummary.given`. 

""" 

 

timespans: NamedKeyDict[DimensionElement, Timespan[ColumnElement]] 

"""Columns that correspond to timespans for elements that participate in a 

temporal join or filter in the query (`NamedKeyDict` mapping 

`DimensionElement` to `Timespan` of `ColumnElement`). 

 

In a `Query`, the keys of this dictionary must be exactly the elements 

in `QuerySummary.temporal`. 

""" 

 

regions: NamedKeyDict[DimensionElement, ColumnElement] 

"""Columns that correspond to regions for elements that participate in a 

spatial join or filter in the query (`NamedKeyDict` mapping 

`DimensionElement` to `ColumnElement`). 

 

In a `Query`, the keys of this dictionary must be exactly the elements 

in `QuerySummary.spatial`. 

""" 

 

datasets: NamedKeyDict[DatasetType, Tuple[ColumnElement, Optional[ColumnElement]]] 

"""Columns that correspond to the ``dataset_id`` and optionally collection 

rank for a dataset in the query (`NamedKeyDict` mapping `DatasetType` to 

`tuple` of `ColumnElement`). 

 

"Collection rank" here is the index of the collection in which this dataset 

was found in the list of collections to search; a lower rank corresponds 

to a collection that appears earlier in the search path. 

""" 

 

def getKeyColumn(self, dimension: Dimension) -> ColumnElement: 

""" Return one of the columns in self.keys for the given dimension. 

 

The column selected is an implentation detail but is guaranteed to 

be deterministic and consistent across multiple calls. 

 

Parameters 

---------- 

dimension : `Dimension` 

Element for which to obtain a key column. 

 

Returns 

------- 

column : `sqlalchemy.sql.ColumnElement` 

SQLAlchemy column object. 

""" 

# Choosing the last element here is entirely for human readers of the 

# query (e.g. developers debugging things); it makes it more likely a 

# dimension key will be provided by the dimension's own table, or 

# failing that, some closely related dimension, which might be less 

# surprising to see than e.g. some dataset subquery. From the 

# database's perspective this is entirely arbitrary, cause the query 

# guarantees they all have equal values. 

return self.keys[dimension][-1] 

 

 

@dataclass 

class QueryParameters: 

"""A struct managing deferred bind parameters in a query. 

 

Takes no parameters at construction, as expected usage is to add elements 

to its container attributes incrementally. 

""" 

def __init__(self): 

self.keys = NamedKeyDict() 

self.timespan = None 

self.skypix = NamedKeyDict() 

 

keys: NamedKeyDict[Dimension, bindparam] 

"""Bind parameters that correspond to dimension primary key values 

(`NamedKeyDict` mapping `Dimension` to `sqlalchemy.sql.bindparam`). 

 

In a `Query`, the keys of this dictionary are the subset of 

`QuerySummary.given` for which `QuerySummary.whenIsDimensionGiven` 

returns `False`. 

""" 

 

timespan: Optional[Timespan[bindparam]] 

"""Bind parameters that correspond to timespans (`Timespan` of 

`sqlalchemy.sql.bindparam`). 

 

In a `Query`, this is not `None` if and only if 

`QuerySummary.whenIsTimespanGiven` returns `GivenTime.AT_EXECUTION`. 

""" 

 

skypix: NamedKeyDict[SkyPixDimension, bindparam] 

"""Bind parameters that correspond to skypix IDs (`NamedKeyDict` mapping 

`SkyPixDimension` to to`sqlalchemy.sql.bindparam`). 

 

In a `Query`, this is not `None` if and only if 

`QuerySummary.whenIsRegionGiven` returns `GivenTime.AT_EXECUTION`. 

"""