Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25from abc import ABC, abstractmethod 

26from contextlib import contextmanager 

27import enum 

28import itertools 

29from typing import ( 

30 Callable, 

31 Dict, 

32 Iterable, 

33 Iterator, 

34 Mapping, 

35 Optional, 

36 Tuple, 

37 TYPE_CHECKING, 

38) 

39 

40import sqlalchemy 

41 

42from lsst.sphgeom import Region 

43 

44from ...core import ( 

45 addDimensionForeignKey, 

46 DataCoordinate, 

47 DatasetRef, 

48 DatasetType, 

49 ddl, 

50 Dimension, 

51 DimensionElement, 

52 DimensionGraph, 

53 DimensionRecord, 

54 DimensionUniverse, 

55 SpatialRegionDatabaseRepresentation, 

56 SimpleQuery, 

57) 

58from ..interfaces import Database 

59from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

60 

61if TYPE_CHECKING: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true

62 from ._builder import QueryBuilder 

63 

64 

65class Query(ABC): 

66 """An abstract base class for queries that return some combination of 

67 `DatasetRef` and `DataCoordinate` objects. 

68 

69 Parameters 

70 ---------- 

71 graph : `DimensionGraph` 

72 Object describing the dimensions included in the query. 

73 whereRegion : `lsst.sphgeom.Region`, optional 

74 Region that all region columns in all returned rows must overlap. 

75 managers : `RegistryManagers` 

76 A struct containing the registry manager instances used by the query 

77 system. 

78 

79 Notes 

80 ----- 

81 The `Query` hierarchy abstracts over the database/SQL representation of a 

82 particular set of data IDs or datasets. It is expected to be used as a 

83 backend for other objects that provide more natural interfaces for one or 

84 both of these, not as part of a public interface to query results. 

85 """ 

86 def __init__(self, *, 

87 graph: DimensionGraph, 

88 whereRegion: Optional[Region], 

89 managers: RegistryManagers, 

90 ): 

91 self.graph = graph 

92 self.whereRegion = whereRegion 

93 self.managers = managers 

94 

95 @abstractmethod 

96 def isUnique(self) -> bool: 

97 """Return `True` if this query's rows are guaranteed to be unique, and 

98 `False` otherwise. 

99 

100 If this query has dataset results (`datasetType` is not `None`), 

101 uniqueness applies to the `DatasetRef` instances returned by 

102 `extractDatasetRef` from the result of `rows`. If it does not have 

103 dataset results, uniqueness applies to the `DataCoordinate` instances 

104 returned by `extractDataId`. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

110 """Return the query column that contains the primary key value for 

111 the dimension with the given name. 

112 

113 Parameters 

114 ---------- 

115 name : `str` 

116 Name of the dimension. 

117 

118 Returns 

119 ------- 

120 column : `sqlalchemy.sql.ColumnElement`. 

121 SQLAlchemy object representing a column in the query. 

122 

123 Notes 

124 ----- 

125 This method is intended primarily as a hook for subclasses to implement 

126 and the ABC to call in order to provide higher-level functionality; 

127 code that uses `Query` objects (but does not implement one) should 

128 usually not have to call this method. 

129 """ 

130 raise NotImplementedError() 

131 

132 @property 

133 @abstractmethod 

134 def spatial(self) -> Iterator[DimensionElement]: 

135 """An iterator over the dimension element columns used in post-query 

136 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]). 

137 

138 Notes 

139 ----- 

140 This property is intended primarily as a hook for subclasses to 

141 implement and the ABC to call in order to provide higher-level 

142 functionality; code that uses `Query` objects (but does not implement 

143 one) should usually not have to access this property. 

144 """ 

145 raise NotImplementedError() 

146 

147 @abstractmethod 

148 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

149 """Return a region column for one of the dimension elements iterated 

150 over by `spatial`. 

151 

152 Parameters 

153 ---------- 

154 name : `str` 

155 Name of the element. 

156 

157 Returns 

158 ------- 

159 column : `sqlalchemy.sql.ColumnElement` 

160 SQLAlchemy representing a result column in the query. 

161 

162 Notes 

163 ----- 

164 This method is intended primarily as a hook for subclasses to implement 

165 and the ABC to call in order to provide higher-level functionality; 

166 code that uses `Query` objects (but does not implement one) should 

167 usually not have to call this method. 

168 """ 

169 raise NotImplementedError() 

170 

171 @property 

172 def datasetType(self) -> Optional[DatasetType]: 

173 """The `DatasetType` of datasets returned by this query, or `None` 

174 if there are no dataset results (`DatasetType` or `None`). 

175 """ 

176 cols = self.getDatasetColumns() 

177 if cols is None: 

178 return None 

179 return cols.datasetType 

180 

181 @abstractmethod 

182 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

183 """Return the columns for the datasets returned by this query. 

184 

185 Returns 

186 ------- 

187 columns : `DatasetQueryColumns` or `None` 

188 Struct containing SQLAlchemy representations of the result columns 

189 for a dataset. 

190 

191 Notes 

192 ----- 

193 This method is intended primarily as a hook for subclasses to implement 

194 and the ABC to call in order to provide higher-level functionality; 

195 code that uses `Query` objects (but does not implement one) should 

196 usually not have to call this method. 

197 """ 

198 raise NotImplementedError() 

199 

200 @property 

201 @abstractmethod 

202 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

203 """A SQLAlchemy object representing the full query 

204 (`sqlalchemy.sql.FromClause` or `None`). 

205 

206 This is `None` in the special case where the query has no columns, and 

207 only one logical row. 

208 """ 

209 raise NotImplementedError() 

210 

211 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]: 

212 """Return a callable that can perform extra Python-side filtering of 

213 query results. 

214 

215 To get the expected results from a query, the returned predicate *must* 

216 be used to ignore rows for which it returns `False`; this permits the 

217 `QueryBuilder` implementation to move logic from the database to Python 

218 without changing the public interface. 

219 

220 Parameters 

221 ---------- 

222 region : `sphgeom.Region`, optional 

223 A region that any result-row regions must overlap in order for the 

224 predicate to return `True`. If not provided, this will be 

225 ``self.whereRegion``, if that exists. 

226 

227 Returns 

228 ------- 

229 func : `Callable` 

230 A callable that takes a single `sqlalchemy.engine.RowProxy` 

231 argmument and returns `bool`. 

232 """ 

233 whereRegion = region if region is not None else self.whereRegion 

234 

235 def closure(row: sqlalchemy.engine.RowProxy) -> bool: 

236 rowRegions = [row._mapping[self.getRegionColumn(element.name)] for element in self.spatial] 

237 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

238 return False 

239 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

240 

241 return closure 

242 

243 def rows(self, db: Database, *, region: Optional[Region] = None 

244 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

245 """Execute the query and yield result rows, applying `predicate`. 

246 

247 Parameters 

248 ---------- 

249 region : `sphgeom.Region`, optional 

250 A region that any result-row regions must overlap in order to be 

251 yielded. If not provided, this will be ``self.whereRegion``, if 

252 that exists. 

253 

254 Yields 

255 ------ 

256 row : `sqlalchemy.engine.RowProxy` or `None` 

257 Result row from the query. `None` may yielded exactly once instead 

258 of any real rows to indicate an empty query (see `EmptyQuery`). 

259 """ 

260 predicate = self.predicate(region) 

261 for row in db.query(self.sql): 

262 if predicate(row): 

263 yield row 

264 

265 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy], 

266 dimensions: Iterable[Dimension]) -> tuple: 

267 """Extract a tuple of data ID values from a result row. 

268 

269 Parameters 

270 ---------- 

271 row : `sqlalchemy.engine.RowProxy` or `None` 

272 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

273 the row from an `EmptyQuery`. 

274 dimensions : `Iterable` [ `Dimension` ] 

275 The dimensions to include in the returned tuple, in order. 

276 

277 Returns 

278 ------- 

279 values : `tuple` 

280 A tuple of dimension primary key values. 

281 """ 

282 if row is None: 

283 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions." 

284 return () 

285 return tuple(row._mapping[self.getDimensionColumn(dimension.name)] for dimension in dimensions) 

286 

287 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *, 

288 graph: Optional[DimensionGraph] = None, 

289 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

290 ) -> DataCoordinate: 

291 """Extract a data ID from a result row. 

292 

293 Parameters 

294 ---------- 

295 row : `sqlalchemy.engine.RowProxy` or `None` 

296 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

297 the row from an `EmptyQuery`. 

298 graph : `DimensionGraph`, optional 

299 The dimensions the returned data ID should identify. If not 

300 provided, this will be all dimensions in `QuerySummary.requested`. 

301 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

302 Nested mapping containing records to attach to the returned 

303 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will 

304 return `True`. If provided, outer keys must include all dimension 

305 element names in ``graph``, and inner keys should be tuples of 

306 dimension primary key values in the same order as 

307 ``element.graph.required``. If not provided, 

308 `DataCoordinate.hasRecords` will return `False` on the returned 

309 object. 

310 

311 Returns 

312 ------- 

313 dataId : `DataCoordinate` 

314 A data ID that identifies all required and implied dimensions. If 

315 ``records is not None``, this is have 

316 `~DataCoordinate.hasRecords()` return `True`. 

317 """ 

318 if graph is None: 

319 graph = self.graph 

320 if not graph: 

321 return DataCoordinate.makeEmpty(self.graph.universe) 

322 dataId = DataCoordinate.fromFullValues( 

323 graph, 

324 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

325 ) 

326 if records is not None: 

327 recordsForRow = {} 

328 for element in graph.elements: 

329 key = tuple(dataId.subset(element.graph).values()) 

330 recordsForRow[element.name] = records[element.name].get(key) 

331 return dataId.expanded(recordsForRow) 

332 else: 

333 return dataId 

334 

335 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy, 

336 dataId: Optional[DataCoordinate] = None, 

337 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

338 ) -> DatasetRef: 

339 """Extract a `DatasetRef` from a result row. 

340 

341 Parameters 

342 ---------- 

343 row : `sqlalchemy.engine.RowProxy` 

344 A result row from a SQLAlchemy SELECT query. 

345 dataId : `DataCoordinate` 

346 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

347 `DataCoordinate` is constructed from ``row`` if `None`. 

348 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

349 Records to use to return an `ExpandedDataCoordinate`. If provided, 

350 outer keys must include all dimension element names in ``graph``, 

351 and inner keys should be tuples of dimension primary key values 

352 in the same order as ``element.graph.required``. 

353 

354 Returns 

355 ------- 

356 ref : `DatasetRef` 

357 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

358 `None`. 

359 """ 

360 datasetColumns = self.getDatasetColumns() 

361 assert datasetColumns is not None 

362 if dataId is None: 

363 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records) 

364 runRecord = self.managers.collections[row._mapping[datasetColumns.runKey]] 

365 return DatasetRef(datasetColumns.datasetType, dataId, id=row._mapping[datasetColumns.id], 

366 run=runRecord.name) 

367 

368 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec: 

369 """Helper method for subclass implementations of `materialize`. 

370 

371 Parameters 

372 ---------- 

373 constraints : `bool`, optional 

374 If `True` (`False` is default), define a specification that 

375 includes actual foreign key constraints for logical foreign keys. 

376 Some database engines do not permit temporary tables to reference 

377 normal tables, so this should be `False` when generating a spec 

378 for a temporary table unless the database engine is known to 

379 support them. 

380 

381 Returns 

382 ------- 

383 spec : `ddl.TableSpec` 

384 Specification for a table that could hold this query's result rows. 

385 """ 

386 unique = self.isUnique() 

387 spec = ddl.TableSpec(fields=()) 

388 for dimension in self.graph: 

389 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints) 

390 for element in self.spatial: 

391 spec.fields.update( 

392 SpatialRegionDatabaseRepresentation.makeFieldSpecs( 

393 nullable=True, 

394 name=f"{element.name}_region", 

395 ) 

396 ) 

397 datasetColumns = self.getDatasetColumns() 

398 if datasetColumns is not None: 

399 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints) 

400 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints) 

401 return spec 

402 

403 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None, 

404 datasets: bool = True, 

405 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]: 

406 """Helper method for subclass implementations of `subset`. 

407 

408 Parameters 

409 ---------- 

410 graph : `DimensionGraph`, optional 

411 Dimensions to include in the new `Query` being constructed. 

412 ``subset`` implementations should generally just forward their 

413 own ``graph`` argument here. 

414 datasets : `bool`, optional 

415 Whether the new `Query` should include dataset results. Defaults 

416 to `True`, but is ignored if ``self`` does not include dataset 

417 results. 

418 unique : `bool`, optional 

419 Whether the new `Query` should guarantee unique results (this may 

420 come with a performance penalty). 

421 

422 Returns 

423 ------- 

424 graph : `DimensionGraph` 

425 The dimensions of the new `Query`. This is exactly the same as 

426 the argument of the same name, with ``self.graph`` used if that 

427 argument is `None`. 

428 columns : `QueryColumns` or `None` 

429 A struct containing the SQLAlchemy column objects to use in the 

430 new query, contructed by delegating to other (mostly abstract) 

431 methods on ``self``. If `None`, `subset` may return ``self``. 

432 """ 

433 if graph is None: 

434 graph = self.graph 

435 if (graph == self.graph and (self.getDatasetColumns() is None or datasets) 

436 and (self.isUnique() or not unique)): 

437 return graph, None 

438 columns = QueryColumns() 

439 for dimension in graph.dimensions: 

440 col = self.getDimensionColumn(dimension.name) 

441 columns.keys[dimension] = [col] 

442 if not unique: 

443 for element in self.spatial: 

444 col = self.getRegionColumn(element.name) 

445 columns.regions[element] = col 

446 if datasets and self.getDatasetColumns() is not None: 

447 columns.datasets = self.getDatasetColumns() 

448 return graph, columns 

449 

450 @contextmanager 

451 def materialize(self, db: Database) -> Iterator[Query]: 

452 """Execute this query and insert its results into a temporary table. 

453 

454 Parameters 

455 ---------- 

456 db : `Database` 

457 Database engine to execute the query against. 

458 

459 Returns 

460 ------- 

461 context : `typing.ContextManager` [ `MaterializedQuery` ] 

462 A context manager that ensures the temporary table is created and 

463 populated in ``__enter__`` (returning a `MaterializedQuery` object 

464 backed by that table), and dropped in ``__exit__``. If ``self`` 

465 is already a `MaterializedQuery`, ``__enter__`` may just return 

466 ``self`` and ``__exit__`` may do nothing (reflecting the fact that 

467 an outer context manager should already take care of everything 

468 else). 

469 """ 

470 spec = self._makeTableSpec() 

471 with db.session() as session: 

472 table = session.makeTemporaryTable(spec) 

473 db.insert(table, select=self.sql, names=spec.fields.names) 

474 yield MaterializedQuery(table=table, 

475 spatial=self.spatial, 

476 datasetType=self.datasetType, 

477 isUnique=self.isUnique(), 

478 graph=self.graph, 

479 whereRegion=self.whereRegion, 

480 managers=self.managers) 

481 session.dropTemporaryTable(table) 

482 

483 @abstractmethod 

484 def subset(self, *, graph: Optional[DimensionGraph] = None, 

485 datasets: bool = True, 

486 unique: bool = False) -> Query: 

487 """Return a new `Query` whose columns and/or rows are (mostly) subset 

488 of this one's. 

489 

490 Parameters 

491 ---------- 

492 graph : `DimensionGraph`, optional 

493 Dimensions to include in the new `Query` being constructed. 

494 If `None` (default), ``self.graph`` is used. 

495 datasets : `bool`, optional 

496 Whether the new `Query` should include dataset results. Defaults 

497 to `True`, but is ignored if ``self`` does not include dataset 

498 results. 

499 unique : `bool`, optional 

500 Whether the new `Query` should guarantee unique results (this may 

501 come with a performance penalty). 

502 

503 Returns 

504 ------- 

505 query : `Query` 

506 A query object corresponding to the given inputs. May be ``self`` 

507 if no changes were requested. 

508 

509 Notes 

510 ----- 

511 The way spatial overlaps are handled at present makes it impossible to 

512 fully guarantee in general that the new query's rows are a subset of 

513 this one's while also returning unique rows. That's because the 

514 database is only capable of performing approximate, conservative 

515 overlaps via the common skypix system; we defer actual region overlap 

516 operations to per-result-row Python logic. But including the region 

517 columns necessary to do that postprocessing in the query makes it 

518 impossible to do a SELECT DISTINCT on the user-visible dimensions of 

519 the query. For example, consider starting with a query with dimensions 

520 (instrument, skymap, visit, tract). That involves a spatial join 

521 between visit and tract, and we include the region columns from both 

522 tables in the results in order to only actually yield result rows 

523 (see `predicate` and `rows`) where the regions in those two columns 

524 overlap. If the user then wants to subset to just (skymap, tract) with 

525 unique results, we have two unpalatable options: 

526 

527 - we can do a SELECT DISTINCT with just the skymap and tract columns 

528 in the SELECT clause, dropping all detailed overlap information and 

529 including some tracts that did not actually overlap any of the 

530 visits in the original query (but were regarded as _possibly_ 

531 overlapping via the coarser, common-skypix relationships); 

532 

533 - we can include the tract and visit region columns in the query, and 

534 continue to filter out the non-overlapping pairs, but completely 

535 disregard the user's request for unique tracts. 

536 

537 This interface specifies that implementations must do the former, as 

538 that's what makes things efficient in our most important use case 

539 (``QuantumGraph`` generation in ``pipe_base``). We may be able to 

540 improve this situation in the future by putting exact overlap 

541 information in the database, either by using built-in (but 

542 engine-specific) spatial database functionality or (more likely) 

543 switching to a scheme in which pairwise dimension spatial relationships 

544 are explicitly precomputed (for e.g. combinations of instruments and 

545 skymaps). 

546 """ 

547 raise NotImplementedError() 

548 

549 @abstractmethod 

550 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

551 """Return a `QueryBuilder` that can be used to construct a new `Query` 

552 that is joined to (and hence constrained by) this one. 

553 

554 Parameters 

555 ---------- 

556 summary : `QuerySummary`, optional 

557 A `QuerySummary` instance that specifies the dimensions and any 

558 additional constraints to include in the new query being 

559 constructed, or `None` to use the dimensions of ``self`` with no 

560 additional constraints. 

561 """ 

562 raise NotImplementedError() 

563 

564 graph: DimensionGraph 

565 """The dimensions identified by this query and included in any data IDs 

566 created from its result rows (`DimensionGraph`). 

567 """ 

568 

569 whereRegion: Optional[Region] 

570 """A spatial region that all regions in all rows returned by this query 

571 must overlap (`lsst.sphgeom.Region` or `None`). 

572 """ 

573 

574 managers: RegistryManagers 

575 """A struct containing `Registry` helper object (`RegistryManagers`). 

576 """ 

577 

578 

579class DirectQueryUniqueness(enum.Enum): 

580 """An enum representing the ways in which a query can have unique rows (or 

581 not). 

582 """ 

583 

584 NOT_UNIQUE = enum.auto() 

585 """The query is not expected to have unique rows. 

586 """ 

587 

588 NATURALLY_UNIQUE = enum.auto() 

589 """The construction of the query guarantees that it will have unique 

590 result rows, even without SELECT DISTINCT or a GROUP BY clause. 

591 """ 

592 

593 NEEDS_DISTINCT = enum.auto() 

594 """The query is expected to yield unique result rows, and needs to use 

595 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this. 

596 """ 

597 

598 

599class DirectQuery(Query): 

600 """A `Query` implementation that represents a direct SELECT query that 

601 usually joins many tables. 

602 

603 `DirectQuery` objects should generally only be constructed by 

604 `QueryBuilder` or the methods of other `Query` objects. 

605 

606 Parameters 

607 ---------- 

608 simpleQuery : `SimpleQuery` 

609 Struct representing the actual SELECT, FROM, and WHERE clauses. 

610 columns : `QueryColumns` 

611 Columns that are referenced in the query in any clause. 

612 uniqueness : `DirectQueryUniqueness` 

613 Enum value indicating whether the query should yield unique result 

614 rows, and if so whether that needs to be explicitly requested of the 

615 database. 

616 graph : `DimensionGraph` 

617 Object describing the dimensions included in the query. 

618 whereRegion : `lsst.sphgeom.Region`, optional 

619 Region that all region columns in all returned rows must overlap. 

620 managers : `RegistryManagers` 

621 Struct containing the `Registry` manager helper objects, to be 

622 forwarded to the `Query` constructor. 

623 """ 

624 def __init__(self, *, 

625 simpleQuery: SimpleQuery, 

626 columns: QueryColumns, 

627 uniqueness: DirectQueryUniqueness, 

628 graph: DimensionGraph, 

629 whereRegion: Optional[Region], 

630 managers: RegistryManagers): 

631 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

632 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql" 

633 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns." 

634 self._simpleQuery = simpleQuery 

635 self._columns = columns 

636 self._uniqueness = uniqueness 

637 self._datasetQueryColumns: Optional[DatasetQueryColumns] = None 

638 self._dimensionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {} 

639 self._regionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {} 

640 

641 def isUnique(self) -> bool: 

642 # Docstring inherited from Query. 

643 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE 

644 

645 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

646 # Docstring inherited from Query. 

647 column = self._dimensionColumns.get(name) 

648 if column is None: 

649 column = self._columns.getKeyColumn(name).label(name) 

650 self._dimensionColumns[name] = column 

651 return column 

652 

653 @property 

654 def spatial(self) -> Iterator[DimensionElement]: 

655 # Docstring inherited from Query. 

656 return iter(self._columns.regions) 

657 

658 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

659 # Docstring inherited from Query. 

660 column = self._regionColumns.get(name) 

661 if column is None: 

662 column = self._columns.regions[name].column.label(f"{name}_region") 

663 self._regionColumns[name] = column 

664 return column 

665 

666 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

667 # Docstring inherited from Query. 

668 if self._datasetQueryColumns is None: 

669 base = self._columns.datasets 

670 if base is None: 

671 return None 

672 ingestDate = base.ingestDate 

673 if ingestDate is not None: 

674 ingestDate = ingestDate.label("ingest_date") 

675 self._datasetQueryColumns = DatasetQueryColumns( 

676 datasetType=base.datasetType, 

677 id=base.id.label("dataset_id"), 

678 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()), 

679 ingestDate=ingestDate, 

680 ) 

681 return self._datasetQueryColumns 

682 

683 @property 

684 def sql(self) -> sqlalchemy.sql.FromClause: 

685 # Docstring inherited from Query. 

686 simpleQuery = self._simpleQuery.copy() 

687 for dimension in self.graph: 

688 simpleQuery.columns.append(self.getDimensionColumn(dimension.name)) 

689 for element in self.spatial: 

690 simpleQuery.columns.append(self.getRegionColumn(element.name)) 

691 datasetColumns = self.getDatasetColumns() 

692 if datasetColumns is not None: 

693 simpleQuery.columns.extend(datasetColumns) 

694 sql = simpleQuery.combine() 

695 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT: 

696 return sql.distinct() 

697 else: 

698 return sql 

699 

700 def subset(self, *, graph: Optional[DimensionGraph] = None, 

701 datasets: bool = True, 

702 unique: bool = False) -> Query: 

703 # Docstring inherited from Query. 

704 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

705 if columns is None: 

706 return self 

707 if columns.isEmpty(): 

708 return EmptyQuery(self.graph.universe, self.managers) 

709 return DirectQuery( 

710 simpleQuery=self._simpleQuery.copy(), 

711 columns=columns, 

712 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

713 graph=graph, 

714 whereRegion=self.whereRegion if not unique else None, 

715 managers=self.managers, 

716 ) 

717 

718 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

719 # Docstring inherited from Query. 

720 from ._builder import QueryBuilder 

721 if summary is None: 

722 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

723 if not summary.requested.issubset(self.graph): 

724 raise NotImplementedError( 

725 f"Query.makeBuilder does not yet support augmenting dimensions " 

726 f"({summary.requested.dimensions}) beyond those originally included in the query " 

727 f"({self.graph.dimensions})." 

728 ) 

729 builder = QueryBuilder(summary, managers=self.managers) 

730 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions, 

731 datasets=self.getDatasetColumns()) 

732 return builder 

733 

734 

735class MaterializedQuery(Query): 

736 """A `Query` implementation that represents query results saved in a 

737 temporary table. 

738 

739 `MaterializedQuery` instances should not be constructed directly; use 

740 `Query.materialize()` instead. 

741 

742 Parameters 

743 ---------- 

744 table : `sqlalchemy.schema.Table` 

745 SQLAlchemy object represnting the temporary table. 

746 spatial : `Iterable` [ `DimensionElement` ] 

747 Spatial dimension elements whose regions must overlap for each valid 

748 result row (which may reject some rows that are in the table). 

749 datasetType : `DatasetType` 

750 The `DatasetType` of datasets returned by this query, or `None` 

751 if there are no dataset results 

752 isUnique : `bool` 

753 If `True`, the table's rows are unique, and there is no need to 

754 add ``SELECT DISTINCT`` to gaurantee this in results. 

755 graph : `DimensionGraph` 

756 Dimensions included in the columns of this table. 

757 whereRegion : `Region` or `None` 

758 A spatial region all result-row regions must overlap to be valid (which 

759 may reject some rows that are in the table). 

760 managers : `RegistryManagers` 

761 A struct containing `Registry` manager helper objects, forwarded to 

762 the `Query` constructor. 

763 """ 

764 def __init__(self, *, 

765 table: sqlalchemy.schema.Table, 

766 spatial: Iterable[DimensionElement], 

767 datasetType: Optional[DatasetType], 

768 isUnique: bool, 

769 graph: DimensionGraph, 

770 whereRegion: Optional[Region], 

771 managers: RegistryManagers): 

772 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

773 self._table = table 

774 self._spatial = tuple(spatial) 

775 self._datasetType = datasetType 

776 self._isUnique = isUnique 

777 

778 def isUnique(self) -> bool: 

779 # Docstring inherited from Query. 

780 return self._isUnique 

781 

782 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

783 # Docstring inherited from Query. 

784 return self._table.columns[name] 

785 

786 @property 

787 def spatial(self) -> Iterator[DimensionElement]: 

788 # Docstring inherited from Query. 

789 return iter(self._spatial) 

790 

791 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

792 # Docstring inherited from Query. 

793 return self._table.columns[f"{name}_region"] 

794 

795 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

796 # Docstring inherited from Query. 

797 if self._datasetType is not None: 

798 return DatasetQueryColumns( 

799 datasetType=self._datasetType, 

800 id=self._table.columns["dataset_id"], 

801 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()], 

802 ingestDate=None, 

803 ) 

804 else: 

805 return None 

806 

807 @property 

808 def sql(self) -> sqlalchemy.sql.FromClause: 

809 # Docstring inherited from Query. 

810 return self._table.select() 

811 

812 @contextmanager 

813 def materialize(self, db: Database) -> Iterator[Query]: 

814 # Docstring inherited from Query. 

815 yield self 

816 

817 def subset(self, *, graph: Optional[DimensionGraph] = None, 

818 datasets: bool = True, 

819 unique: bool = False) -> Query: 

820 # Docstring inherited from Query. 

821 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

822 if columns is None: 

823 return self 

824 if columns.isEmpty(): 

825 return EmptyQuery(self.graph.universe, managers=self.managers) 

826 simpleQuery = SimpleQuery() 

827 simpleQuery.join(self._table) 

828 return DirectQuery( 

829 simpleQuery=simpleQuery, 

830 columns=columns, 

831 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

832 graph=graph, 

833 whereRegion=self.whereRegion if not unique else None, 

834 managers=self.managers, 

835 ) 

836 

837 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

838 # Docstring inherited from Query. 

839 from ._builder import QueryBuilder 

840 if summary is None: 

841 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

842 if not summary.requested.issubset(self.graph): 

843 raise NotImplementedError( 

844 f"Query.makeBuilder does not yet support augmenting dimensions " 

845 f"({summary.requested.dimensions}) beyond those originally included in the query " 

846 f"({self.graph.dimensions})." 

847 ) 

848 builder = QueryBuilder(summary, managers=self.managers) 

849 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns()) 

850 return builder 

851 

852 

853class EmptyQuery(Query): 

854 """A `Query` implementation that handes the special case where the query 

855 would have no columns. 

856 

857 Parameters 

858 ---------- 

859 universe : `DimensionUniverse` 

860 Set of all dimensions from which the null set is extracted. 

861 managers : `RegistryManagers` 

862 A struct containing the registry manager instances used by the query 

863 system. 

864 """ 

865 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers): 

866 super().__init__(graph=universe.empty, whereRegion=None, managers=managers) 

867 

868 def isUnique(self) -> bool: 

869 # Docstring inherited from Query. 

870 return True 

871 

872 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

873 # Docstring inherited from Query. 

874 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).") 

875 

876 @property 

877 def spatial(self) -> Iterator[DimensionElement]: 

878 # Docstring inherited from Query. 

879 return iter(()) 

880 

881 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

882 # Docstring inherited from Query. 

883 raise KeyError(f"No region for {name} in query (no regions at all, actually).") 

884 

885 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

886 # Docstring inherited from Query. 

887 return None 

888 

889 def rows(self, db: Database, *, region: Optional[Region] = None 

890 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

891 yield None 

892 

893 @property 

894 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

895 # Docstring inherited from Query. 

896 return None 

897 

898 @contextmanager 

899 def materialize(self, db: Database) -> Iterator[Query]: 

900 # Docstring inherited from Query. 

901 yield self 

902 

903 def subset(self, *, graph: Optional[DimensionGraph] = None, 

904 datasets: bool = True, 

905 unique: bool = False) -> Query: 

906 # Docstring inherited from Query. 

907 assert graph is None or graph.issubset(self.graph) 

908 return self 

909 

910 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

911 # Docstring inherited from Query. 

912 from ._builder import QueryBuilder 

913 if summary is None: 

914 summary = QuerySummary(self.graph) 

915 if not summary.requested.issubset(self.graph): 

916 raise NotImplementedError( 

917 f"Query.makeBuilder does not yet support augmenting dimensions " 

918 f"({summary.requested.dimensions}) beyond those originally included in the query " 

919 f"({self.graph.dimensions})." 

920 ) 

921 return QueryBuilder(summary, managers=self.managers)