Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25from abc import ABC, abstractmethod 

26from contextlib import contextmanager 

27import enum 

28import itertools 

29from typing import ( 

30 Callable, 

31 Dict, 

32 Iterable, 

33 Iterator, 

34 Mapping, 

35 Optional, 

36 Tuple, 

37 TYPE_CHECKING, 

38) 

39 

40import sqlalchemy 

41 

42from lsst.sphgeom import Region 

43 

44from ...core import ( 

45 addDimensionForeignKey, 

46 DataCoordinate, 

47 DatasetRef, 

48 DatasetType, 

49 ddl, 

50 Dimension, 

51 DimensionElement, 

52 DimensionGraph, 

53 DimensionRecord, 

54 DimensionUniverse, 

55 SpatialRegionDatabaseRepresentation, 

56 SimpleQuery, 

57) 

58from ..interfaces import Database 

59from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

60 

61if TYPE_CHECKING: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true

62 from ._builder import QueryBuilder 

63 

64 

65class Query(ABC): 

66 """An abstract base class for queries that return some combination of 

67 `DatasetRef` and `DataCoordinate` objects. 

68 

69 Parameters 

70 ---------- 

71 graph : `DimensionGraph` 

72 Object describing the dimensions included in the query. 

73 whereRegion : `lsst.sphgeom.Region`, optional 

74 Region that all region columns in all returned rows must overlap. 

75 managers : `RegistryManagers` 

76 A struct containing the registry manager instances used by the query 

77 system. 

78 

79 Notes 

80 ----- 

81 The `Query` hierarchy abstracts over the database/SQL representation of a 

82 particular set of data IDs or datasets. It is expected to be used as a 

83 backend for other objects that provide more natural interfaces for one or 

84 both of these, not as part of a public interface to query results. 

85 """ 

86 def __init__(self, *, 

87 graph: DimensionGraph, 

88 whereRegion: Optional[Region], 

89 managers: RegistryManagers, 

90 ): 

91 self.graph = graph 

92 self.whereRegion = whereRegion 

93 self.managers = managers 

94 

95 @abstractmethod 

96 def isUnique(self) -> bool: 

97 """Return `True` if this query's rows are guaranteed to be unique, and 

98 `False` otherwise. 

99 

100 If this query has dataset results (`datasetType` is not `None`), 

101 uniqueness applies to the `DatasetRef` instances returned by 

102 `extractDatasetRef` from the result of `rows`. If it does not have 

103 dataset results, uniqueness applies to the `DataCoordinate` instances 

104 returned by `extractDataId`. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

110 """Return the query column that contains the primary key value for 

111 the dimension with the given name. 

112 

113 Parameters 

114 ---------- 

115 name : `str` 

116 Name of the dimension. 

117 

118 Returns 

119 ------- 

120 column : `sqlalchemy.sql.ColumnElement`. 

121 SQLAlchemy object representing a column in the query. 

122 

123 Notes 

124 ----- 

125 This method is intended primarily as a hook for subclasses to implement 

126 and the ABC to call in order to provide higher-level functionality; 

127 code that uses `Query` objects (but does not implement one) should 

128 usually not have to call this method. 

129 """ 

130 raise NotImplementedError() 

131 

132 @property 

133 @abstractmethod 

134 def spatial(self) -> Iterator[DimensionElement]: 

135 """An iterator over the dimension element columns used in post-query 

136 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]). 

137 

138 Notes 

139 ----- 

140 This property is intended primarily as a hook for subclasses to 

141 implement and the ABC to call in order to provide higher-level 

142 functionality; code that uses `Query` objects (but does not implement 

143 one) should usually not have to access this property. 

144 """ 

145 raise NotImplementedError() 

146 

147 @abstractmethod 

148 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

149 """Return a region column for one of the dimension elements iterated 

150 over by `spatial`. 

151 

152 Parameters 

153 ---------- 

154 name : `str` 

155 Name of the element. 

156 

157 Returns 

158 ------- 

159 column : `sqlalchemy.sql.ColumnElement` 

160 SQLAlchemy representing a result column in the query. 

161 

162 Notes 

163 ----- 

164 This method is intended primarily as a hook for subclasses to implement 

165 and the ABC to call in order to provide higher-level functionality; 

166 code that uses `Query` objects (but does not implement one) should 

167 usually not have to call this method. 

168 """ 

169 raise NotImplementedError() 

170 

171 @property 

172 def datasetType(self) -> Optional[DatasetType]: 

173 """The `DatasetType` of datasets returned by this query, or `None` 

174 if there are no dataset results (`DatasetType` or `None`). 

175 """ 

176 cols = self.getDatasetColumns() 

177 if cols is None: 

178 return None 

179 return cols.datasetType 

180 

181 @abstractmethod 

182 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

183 """Return the columns for the datasets returned by this query. 

184 

185 Returns 

186 ------- 

187 columns : `DatasetQueryColumns` or `None` 

188 Struct containing SQLAlchemy representations of the result columns 

189 for a dataset. 

190 

191 Notes 

192 ----- 

193 This method is intended primarily as a hook for subclasses to implement 

194 and the ABC to call in order to provide higher-level functionality; 

195 code that uses `Query` objects (but does not implement one) should 

196 usually not have to call this method. 

197 """ 

198 raise NotImplementedError() 

199 

200 @property 

201 @abstractmethod 

202 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

203 """A SQLAlchemy object representing the full query 

204 (`sqlalchemy.sql.FromClause` or `None`). 

205 

206 This is `None` in the special case where the query has no columns, and 

207 only one logical row. 

208 """ 

209 raise NotImplementedError() 

210 

211 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]: 

212 """Return a callable that can perform extra Python-side filtering of 

213 query results. 

214 

215 To get the expected results from a query, the returned predicate *must* 

216 be used to ignore rows for which it returns `False`; this permits the 

217 `QueryBuilder` implementation to move logic from the database to Python 

218 without changing the public interface. 

219 

220 Parameters 

221 ---------- 

222 region : `sphgeom.Region`, optional 

223 A region that any result-row regions must overlap in order for the 

224 predicate to return `True`. If not provided, this will be 

225 ``self.whereRegion``, if that exists. 

226 

227 Returns 

228 ------- 

229 func : `Callable` 

230 A callable that takes a single `sqlalchemy.engine.RowProxy` 

231 argmument and returns `bool`. 

232 """ 

233 whereRegion = region if region is not None else self.whereRegion 

234 

235 def closure(row: sqlalchemy.engine.RowProxy) -> bool: 

236 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial] 

237 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

238 return False 

239 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

240 

241 return closure 

242 

243 def rows(self, db: Database, *, region: Optional[Region] = None 

244 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

245 """Execute the query and yield result rows, applying `predicate`. 

246 

247 Parameters 

248 ---------- 

249 region : `sphgeom.Region`, optional 

250 A region that any result-row regions must overlap in order to be 

251 yielded. If not provided, this will be ``self.whereRegion``, if 

252 that exists. 

253 

254 Yields 

255 ------ 

256 row : `sqlalchemy.engine.RowProxy` or `None` 

257 Result row from the query. `None` may yielded exactly once instead 

258 of any real rows to indicate an empty query (see `EmptyQuery`). 

259 """ 

260 predicate = self.predicate(region) 

261 for row in db.query(self.sql): 

262 if predicate(row): 

263 yield row 

264 

265 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy], 

266 dimensions: Iterable[Dimension]) -> tuple: 

267 """Extract a tuple of data ID values from a result row. 

268 

269 Parameters 

270 ---------- 

271 row : `sqlalchemy.engine.RowProxy` or `None` 

272 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

273 the row from an `EmptyQuery`. 

274 dimensions : `Iterable` [ `Dimension` ] 

275 The dimensions to include in the returned tuple, in order. 

276 

277 Returns 

278 ------- 

279 values : `tuple` 

280 A tuple of dimension primary key values. 

281 """ 

282 if row is None: 

283 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions." 

284 return () 

285 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions) 

286 

287 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *, 

288 graph: Optional[DimensionGraph] = None, 

289 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

290 ) -> DataCoordinate: 

291 """Extract a data ID from a result row. 

292 

293 Parameters 

294 ---------- 

295 row : `sqlalchemy.engine.RowProxy` or `None` 

296 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

297 the row from an `EmptyQuery`. 

298 graph : `DimensionGraph`, optional 

299 The dimensions the returned data ID should identify. If not 

300 provided, this will be all dimensions in `QuerySummary.requested`. 

301 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

302 Nested mapping containing records to attach to the returned 

303 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will 

304 return `True`. If provided, outer keys must include all dimension 

305 element names in ``graph``, and inner keys should be tuples of 

306 dimension primary key values in the same order as 

307 ``element.graph.required``. If not provided, 

308 `DataCoordinate.hasRecords` will return `False` on the returned 

309 object. 

310 

311 Returns 

312 ------- 

313 dataId : `DataCoordinate` 

314 A data ID that identifies all required and implied dimensions. If 

315 ``records is not None``, this is have 

316 `~DataCoordinate.hasRecords()` return `True`. 

317 """ 

318 if graph is None: 

319 graph = self.graph 

320 if not graph: 

321 return DataCoordinate.makeEmpty(self.graph.universe) 

322 dataId = DataCoordinate.fromFullValues( 

323 graph, 

324 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

325 ) 

326 if records is not None: 

327 recordsForRow = {} 

328 for element in graph.elements: 

329 key = tuple(dataId.subset(element.graph).values()) 

330 recordsForRow[element.name] = records[element.name].get(key) 

331 return dataId.expanded(recordsForRow) 

332 else: 

333 return dataId 

334 

335 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy, 

336 dataId: Optional[DataCoordinate] = None, 

337 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

338 ) -> DatasetRef: 

339 """Extract a `DatasetRef` from a result row. 

340 

341 Parameters 

342 ---------- 

343 row : `sqlalchemy.engine.RowProxy` 

344 A result row from a SQLAlchemy SELECT query. 

345 dataId : `DataCoordinate` 

346 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

347 `DataCoordinate` is constructed from ``row`` if `None`. 

348 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

349 Records to use to return an `ExpandedDataCoordinate`. If provided, 

350 outer keys must include all dimension element names in ``graph``, 

351 and inner keys should be tuples of dimension primary key values 

352 in the same order as ``element.graph.required``. 

353 

354 Returns 

355 ------- 

356 ref : `DatasetRef` 

357 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

358 `None`. 

359 """ 

360 datasetColumns = self.getDatasetColumns() 

361 assert datasetColumns is not None 

362 if dataId is None: 

363 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records) 

364 runRecord = self.managers.collections[row[datasetColumns.runKey]] 

365 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name) 

366 

367 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec: 

368 """Helper method for subclass implementations of `materialize`. 

369 

370 Parameters 

371 ---------- 

372 constraints : `bool`, optional 

373 If `True` (`False` is default), define a specification that 

374 includes actual foreign key constraints for logical foreign keys. 

375 Some database engines do not permit temporary tables to reference 

376 normal tables, so this should be `False` when generating a spec 

377 for a temporary table unless the database engine is known to 

378 support them. 

379 

380 Returns 

381 ------- 

382 spec : `ddl.TableSpec` 

383 Specification for a table that could hold this query's result rows. 

384 """ 

385 unique = self.isUnique() 

386 spec = ddl.TableSpec(fields=()) 

387 for dimension in self.graph: 

388 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints) 

389 for element in self.spatial: 

390 spec.fields.update( 

391 SpatialRegionDatabaseRepresentation.makeFieldSpecs( 

392 nullable=True, 

393 name=f"{element.name}_region", 

394 ) 

395 ) 

396 datasetColumns = self.getDatasetColumns() 

397 if datasetColumns is not None: 

398 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints) 

399 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints) 

400 return spec 

401 

402 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None, 

403 datasets: bool = True, 

404 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]: 

405 """Helper method for subclass implementations of `subset`. 

406 

407 Parameters 

408 ---------- 

409 graph : `DimensionGraph`, optional 

410 Dimensions to include in the new `Query` being constructed. 

411 ``subset`` implementations should generally just forward their 

412 own ``graph`` argument here. 

413 datasets : `bool`, optional 

414 Whether the new `Query` should include dataset results. Defaults 

415 to `True`, but is ignored if ``self`` does not include dataset 

416 results. 

417 unique : `bool`, optional 

418 Whether the new `Query` should guarantee unique results (this may 

419 come with a performance penalty). 

420 

421 Returns 

422 ------- 

423 graph : `DimensionGraph` 

424 The dimensions of the new `Query`. This is exactly the same as 

425 the argument of the same name, with ``self.graph`` used if that 

426 argument is `None`. 

427 columns : `QueryColumns` or `None` 

428 A struct containing the SQLAlchemy column objects to use in the 

429 new query, contructed by delegating to other (mostly abstract) 

430 methods on ``self``. If `None`, `subset` may return ``self``. 

431 """ 

432 if graph is None: 

433 graph = self.graph 

434 if (graph == self.graph and (self.getDatasetColumns() is None or datasets) 

435 and (self.isUnique() or not unique)): 

436 return graph, None 

437 columns = QueryColumns() 

438 for dimension in graph.dimensions: 

439 col = self.getDimensionColumn(dimension.name) 

440 columns.keys[dimension] = [col] 

441 if not unique: 

442 for element in self.spatial: 

443 col = self.getRegionColumn(element.name) 

444 columns.regions[element] = col 

445 if datasets and self.getDatasetColumns() is not None: 

446 columns.datasets = self.getDatasetColumns() 

447 return graph, columns 

448 

449 @contextmanager 

450 def materialize(self, db: Database) -> Iterator[Query]: 

451 """Execute this query and insert its results into a temporary table. 

452 

453 Parameters 

454 ---------- 

455 db : `Database` 

456 Database engine to execute the query against. 

457 

458 Returns 

459 ------- 

460 context : `typing.ContextManager` [ `MaterializedQuery` ] 

461 A context manager that ensures the temporary table is created and 

462 populated in ``__enter__`` (returning a `MaterializedQuery` object 

463 backed by that table), and dropped in ``__exit__``. If ``self`` 

464 is already a `MaterializedQuery`, ``__enter__`` may just return 

465 ``self`` and ``__exit__`` may do nothing (reflecting the fact that 

466 an outer context manager should already take care of everything 

467 else). 

468 """ 

469 spec = self._makeTableSpec() 

470 with db.session() as session: 

471 table = session.makeTemporaryTable(spec) 

472 db.insert(table, select=self.sql, names=spec.fields.names) 

473 yield MaterializedQuery(table=table, 

474 spatial=self.spatial, 

475 datasetType=self.datasetType, 

476 isUnique=self.isUnique(), 

477 graph=self.graph, 

478 whereRegion=self.whereRegion, 

479 managers=self.managers) 

480 session.dropTemporaryTable(table) 

481 

482 @abstractmethod 

483 def subset(self, *, graph: Optional[DimensionGraph] = None, 

484 datasets: bool = True, 

485 unique: bool = False) -> Query: 

486 """Return a new `Query` whose columns and/or rows are (mostly) subset 

487 of this one's. 

488 

489 Parameters 

490 ---------- 

491 graph : `DimensionGraph`, optional 

492 Dimensions to include in the new `Query` being constructed. 

493 If `None` (default), ``self.graph`` is used. 

494 datasets : `bool`, optional 

495 Whether the new `Query` should include dataset results. Defaults 

496 to `True`, but is ignored if ``self`` does not include dataset 

497 results. 

498 unique : `bool`, optional 

499 Whether the new `Query` should guarantee unique results (this may 

500 come with a performance penalty). 

501 

502 Returns 

503 ------- 

504 query : `Query` 

505 A query object corresponding to the given inputs. May be ``self`` 

506 if no changes were requested. 

507 

508 Notes 

509 ----- 

510 The way spatial overlaps are handled at present makes it impossible to 

511 fully guarantee in general that the new query's rows are a subset of 

512 this one's while also returning unique rows. That's because the 

513 database is only capable of performing approximate, conservative 

514 overlaps via the common skypix system; we defer actual region overlap 

515 operations to per-result-row Python logic. But including the region 

516 columns necessary to do that postprocessing in the query makes it 

517 impossible to do a SELECT DISTINCT on the user-visible dimensions of 

518 the query. For example, consider starting with a query with dimensions 

519 (instrument, skymap, visit, tract). That involves a spatial join 

520 between visit and tract, and we include the region columns from both 

521 tables in the results in order to only actually yield result rows 

522 (see `predicate` and `rows`) where the regions in those two columns 

523 overlap. If the user then wants to subset to just (skymap, tract) with 

524 unique results, we have two unpalatable options: 

525 

526 - we can do a SELECT DISTINCT with just the skymap and tract columns 

527 in the SELECT clause, dropping all detailed overlap information and 

528 including some tracts that did not actually overlap any of the 

529 visits in the original query (but were regarded as _possibly_ 

530 overlapping via the coarser, common-skypix relationships); 

531 

532 - we can include the tract and visit region columns in the query, and 

533 continue to filter out the non-overlapping pairs, but completely 

534 disregard the user's request for unique tracts. 

535 

536 This interface specifies that implementations must do the former, as 

537 that's what makes things efficient in our most important use case 

538 (``QuantumGraph`` generation in ``pipe_base``). We may be able to 

539 improve this situation in the future by putting exact overlap 

540 information in the database, either by using built-in (but 

541 engine-specific) spatial database functionality or (more likely) 

542 switching to a scheme in which pairwise dimension spatial relationships 

543 are explicitly precomputed (for e.g. combinations of instruments and 

544 skymaps). 

545 """ 

546 raise NotImplementedError() 

547 

548 @abstractmethod 

549 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

550 """Return a `QueryBuilder` that can be used to construct a new `Query` 

551 that is joined to (and hence constrained by) this one. 

552 

553 Parameters 

554 ---------- 

555 summary : `QuerySummary`, optional 

556 A `QuerySummary` instance that specifies the dimensions and any 

557 additional constraints to include in the new query being 

558 constructed, or `None` to use the dimensions of ``self`` with no 

559 additional constraints. 

560 """ 

561 raise NotImplementedError() 

562 

563 graph: DimensionGraph 

564 """The dimensions identified by this query and included in any data IDs 

565 created from its result rows (`DimensionGraph`). 

566 """ 

567 

568 whereRegion: Optional[Region] 

569 """A spatial region that all regions in all rows returned by this query 

570 must overlap (`lsst.sphgeom.Region` or `None`). 

571 """ 

572 

573 managers: RegistryManagers 

574 """A struct containing `Registry` helper object (`RegistryManagers`). 

575 """ 

576 

577 

578class DirectQueryUniqueness(enum.Enum): 

579 """An enum representing the ways in which a query can have unique rows (or 

580 not). 

581 """ 

582 

583 NOT_UNIQUE = enum.auto() 

584 """The query is not expected to have unique rows. 

585 """ 

586 

587 NATURALLY_UNIQUE = enum.auto() 

588 """The construction of the query guarantees that it will have unique 

589 result rows, even without SELECT DISTINCT or a GROUP BY clause. 

590 """ 

591 

592 NEEDS_DISTINCT = enum.auto() 

593 """The query is expected to yield unique result rows, and needs to use 

594 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this. 

595 """ 

596 

597 

598class DirectQuery(Query): 

599 """A `Query` implementation that represents a direct SELECT query that 

600 usually joins many tables. 

601 

602 `DirectQuery` objects should generally only be constructed by 

603 `QueryBuilder` or the methods of other `Query` objects. 

604 

605 Parameters 

606 ---------- 

607 simpleQuery : `SimpleQuery` 

608 Struct representing the actual SELECT, FROM, and WHERE clauses. 

609 columns : `QueryColumns` 

610 Columns that are referenced in the query in any clause. 

611 uniqueness : `DirectQueryUniqueness` 

612 Enum value indicating whether the query should yield unique result 

613 rows, and if so whether that needs to be explicitly requested of the 

614 database. 

615 graph : `DimensionGraph` 

616 Object describing the dimensions included in the query. 

617 whereRegion : `lsst.sphgeom.Region`, optional 

618 Region that all region columns in all returned rows must overlap. 

619 managers : `RegistryManagers` 

620 Struct containing the `Registry` manager helper objects, to be 

621 forwarded to the `Query` constructor. 

622 """ 

623 def __init__(self, *, 

624 simpleQuery: SimpleQuery, 

625 columns: QueryColumns, 

626 uniqueness: DirectQueryUniqueness, 

627 graph: DimensionGraph, 

628 whereRegion: Optional[Region], 

629 managers: RegistryManagers): 

630 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

631 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql" 

632 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns." 

633 self._simpleQuery = simpleQuery 

634 self._columns = columns 

635 self._uniqueness = uniqueness 

636 self._datasetQueryColumns: Optional[DatasetQueryColumns] = None 

637 self._dimensionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {} 

638 self._regionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {} 

639 

640 def isUnique(self) -> bool: 

641 # Docstring inherited from Query. 

642 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE 

643 

644 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

645 # Docstring inherited from Query. 

646 column = self._dimensionColumns.get(name) 

647 if column is None: 

648 column = self._columns.getKeyColumn(name).label(name) 

649 self._dimensionColumns[name] = column 

650 return column 

651 

652 @property 

653 def spatial(self) -> Iterator[DimensionElement]: 

654 # Docstring inherited from Query. 

655 return iter(self._columns.regions) 

656 

657 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

658 # Docstring inherited from Query. 

659 column = self._regionColumns.get(name) 

660 if column is None: 

661 column = self._columns.regions[name].column.label(f"{name}_region") 

662 self._regionColumns[name] = column 

663 return column 

664 

665 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

666 # Docstring inherited from Query. 

667 if self._datasetQueryColumns is None: 

668 base = self._columns.datasets 

669 if base is None: 

670 return None 

671 ingestDate = base.ingestDate 

672 if ingestDate is not None: 

673 ingestDate = ingestDate.label("ingest_date") 

674 self._datasetQueryColumns = DatasetQueryColumns( 

675 datasetType=base.datasetType, 

676 id=base.id.label("dataset_id"), 

677 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()), 

678 ingestDate=ingestDate, 

679 ) 

680 return self._datasetQueryColumns 

681 

682 @property 

683 def sql(self) -> sqlalchemy.sql.FromClause: 

684 # Docstring inherited from Query. 

685 simpleQuery = self._simpleQuery.copy() 

686 for dimension in self.graph: 

687 simpleQuery.columns.append(self.getDimensionColumn(dimension.name)) 

688 for element in self.spatial: 

689 simpleQuery.columns.append(self.getRegionColumn(element.name)) 

690 datasetColumns = self.getDatasetColumns() 

691 if datasetColumns is not None: 

692 simpleQuery.columns.extend(datasetColumns) 

693 sql = simpleQuery.combine() 

694 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT: 

695 return sql.distinct() 

696 else: 

697 return sql 

698 

699 def subset(self, *, graph: Optional[DimensionGraph] = None, 

700 datasets: bool = True, 

701 unique: bool = False) -> Query: 

702 # Docstring inherited from Query. 

703 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

704 if columns is None: 

705 return self 

706 if columns.isEmpty(): 

707 return EmptyQuery(self.graph.universe, self.managers) 

708 return DirectQuery( 

709 simpleQuery=self._simpleQuery.copy(), 

710 columns=columns, 

711 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

712 graph=graph, 

713 whereRegion=self.whereRegion if not unique else None, 

714 managers=self.managers, 

715 ) 

716 

717 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

718 # Docstring inherited from Query. 

719 from ._builder import QueryBuilder 

720 if summary is None: 

721 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

722 if not summary.requested.issubset(self.graph): 

723 raise NotImplementedError( 

724 f"Query.makeBuilder does not yet support augmenting dimensions " 

725 f"({summary.requested.dimensions}) beyond those originally included in the query " 

726 f"({self.graph.dimensions})." 

727 ) 

728 builder = QueryBuilder(summary, managers=self.managers) 

729 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions, 

730 datasets=self.getDatasetColumns()) 

731 return builder 

732 

733 

734class MaterializedQuery(Query): 

735 """A `Query` implementation that represents query results saved in a 

736 temporary table. 

737 

738 `MaterializedQuery` instances should not be constructed directly; use 

739 `Query.materialize()` instead. 

740 

741 Parameters 

742 ---------- 

743 table : `sqlalchemy.schema.Table` 

744 SQLAlchemy object represnting the temporary table. 

745 spatial : `Iterable` [ `DimensionElement` ] 

746 Spatial dimension elements whose regions must overlap for each valid 

747 result row (which may reject some rows that are in the table). 

748 datasetType : `DatasetType` 

749 The `DatasetType` of datasets returned by this query, or `None` 

750 if there are no dataset results 

751 isUnique : `bool` 

752 If `True`, the table's rows are unique, and there is no need to 

753 add ``SELECT DISTINCT`` to gaurantee this in results. 

754 graph : `DimensionGraph` 

755 Dimensions included in the columns of this table. 

756 whereRegion : `Region` or `None` 

757 A spatial region all result-row regions must overlap to be valid (which 

758 may reject some rows that are in the table). 

759 managers : `RegistryManagers` 

760 A struct containing `Registry` manager helper objects, forwarded to 

761 the `Query` constructor. 

762 """ 

763 def __init__(self, *, 

764 table: sqlalchemy.schema.Table, 

765 spatial: Iterable[DimensionElement], 

766 datasetType: Optional[DatasetType], 

767 isUnique: bool, 

768 graph: DimensionGraph, 

769 whereRegion: Optional[Region], 

770 managers: RegistryManagers): 

771 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

772 self._table = table 

773 self._spatial = tuple(spatial) 

774 self._datasetType = datasetType 

775 self._isUnique = isUnique 

776 

777 def isUnique(self) -> bool: 

778 # Docstring inherited from Query. 

779 return self._isUnique 

780 

781 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

782 # Docstring inherited from Query. 

783 return self._table.columns[name] 

784 

785 @property 

786 def spatial(self) -> Iterator[DimensionElement]: 

787 # Docstring inherited from Query. 

788 return iter(self._spatial) 

789 

790 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

791 # Docstring inherited from Query. 

792 return self._table.columns[f"{name}_region"] 

793 

794 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

795 # Docstring inherited from Query. 

796 if self._datasetType is not None: 

797 return DatasetQueryColumns( 

798 datasetType=self._datasetType, 

799 id=self._table.columns["dataset_id"], 

800 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()], 

801 ingestDate=None, 

802 ) 

803 else: 

804 return None 

805 

806 @property 

807 def sql(self) -> sqlalchemy.sql.FromClause: 

808 # Docstring inherited from Query. 

809 return self._table.select() 

810 

811 @contextmanager 

812 def materialize(self, db: Database) -> Iterator[Query]: 

813 # Docstring inherited from Query. 

814 yield self 

815 

816 def subset(self, *, graph: Optional[DimensionGraph] = None, 

817 datasets: bool = True, 

818 unique: bool = False) -> Query: 

819 # Docstring inherited from Query. 

820 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

821 if columns is None: 

822 return self 

823 if columns.isEmpty(): 

824 return EmptyQuery(self.graph.universe, managers=self.managers) 

825 simpleQuery = SimpleQuery() 

826 simpleQuery.join(self._table) 

827 return DirectQuery( 

828 simpleQuery=simpleQuery, 

829 columns=columns, 

830 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

831 graph=graph, 

832 whereRegion=self.whereRegion if not unique else None, 

833 managers=self.managers, 

834 ) 

835 

836 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

837 # Docstring inherited from Query. 

838 from ._builder import QueryBuilder 

839 if summary is None: 

840 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

841 if not summary.requested.issubset(self.graph): 

842 raise NotImplementedError( 

843 f"Query.makeBuilder does not yet support augmenting dimensions " 

844 f"({summary.requested.dimensions}) beyond those originally included in the query " 

845 f"({self.graph.dimensions})." 

846 ) 

847 builder = QueryBuilder(summary, managers=self.managers) 

848 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns()) 

849 return builder 

850 

851 

852class EmptyQuery(Query): 

853 """A `Query` implementation that handes the special case where the query 

854 would have no columns. 

855 

856 Parameters 

857 ---------- 

858 universe : `DimensionUniverse` 

859 Set of all dimensions from which the null set is extracted. 

860 managers : `RegistryManagers` 

861 A struct containing the registry manager instances used by the query 

862 system. 

863 """ 

864 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers): 

865 super().__init__(graph=universe.empty, whereRegion=None, managers=managers) 

866 

867 def isUnique(self) -> bool: 

868 # Docstring inherited from Query. 

869 return True 

870 

871 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

872 # Docstring inherited from Query. 

873 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).") 

874 

875 @property 

876 def spatial(self) -> Iterator[DimensionElement]: 

877 # Docstring inherited from Query. 

878 return iter(()) 

879 

880 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

881 # Docstring inherited from Query. 

882 raise KeyError(f"No region for {name} in query (no regions at all, actually).") 

883 

884 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

885 # Docstring inherited from Query. 

886 return None 

887 

888 def rows(self, db: Database, *, region: Optional[Region] = None 

889 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

890 yield None 

891 

892 @property 

893 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

894 # Docstring inherited from Query. 

895 return None 

896 

897 @contextmanager 

898 def materialize(self, db: Database) -> Iterator[Query]: 

899 # Docstring inherited from Query. 

900 yield self 

901 

902 def subset(self, *, graph: Optional[DimensionGraph] = None, 

903 datasets: bool = True, 

904 unique: bool = False) -> Query: 

905 # Docstring inherited from Query. 

906 assert graph is None or graph.issubset(self.graph) 

907 return self 

908 

909 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

910 # Docstring inherited from Query. 

911 from ._builder import QueryBuilder 

912 if summary is None: 

913 summary = QuerySummary(self.graph) 

914 if not summary.requested.issubset(self.graph): 

915 raise NotImplementedError( 

916 f"Query.makeBuilder does not yet support augmenting dimensions " 

917 f"({summary.requested.dimensions}) beyond those originally included in the query " 

918 f"({self.graph.dimensions})." 

919 ) 

920 return QueryBuilder(summary, managers=self.managers)