Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25from abc import ABC, abstractmethod 

26from contextlib import contextmanager 

27import copy 

28import enum 

29import itertools 

30from typing import ( 

31 Callable, 

32 Iterable, 

33 Iterator, 

34 Mapping, 

35 Optional, 

36 Tuple, 

37 TYPE_CHECKING, 

38) 

39 

40import sqlalchemy 

41 

42from lsst.sphgeom import Region 

43 

44from ...core import ( 

45 addDimensionForeignKey, 

46 DataCoordinate, 

47 DatasetRef, 

48 DatasetType, 

49 ddl, 

50 Dimension, 

51 DimensionElement, 

52 DimensionGraph, 

53 DimensionRecord, 

54 DimensionUniverse, 

55 REGION_FIELD_SPEC, 

56 SimpleQuery, 

57) 

58from ..interfaces import Database 

59from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

60 

61if TYPE_CHECKING: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true

62 from ._builder import QueryBuilder 

63 

64 

65class Query(ABC): 

66 """An abstract base class for queries that return some combination of 

67 `DatasetRef` and `DataCoordinate` objects. 

68 

69 Parameters 

70 ---------- 

71 graph : `DimensionGraph` 

72 Object describing the dimensions included in the query. 

73 whereRegion : `lsst.sphgeom.Region`, optional 

74 Region that all region columns in all returned rows must overlap. 

75 managers : `RegistryManagers` 

76 A struct containing the registry manager instances used by the query 

77 system. 

78 

79 Notes 

80 ----- 

81 The `Query` hierarchy abstracts over the database/SQL representation of a 

82 particular set of data IDs or datasets. It is expected to be used as a 

83 backend for other objects that provide more natural interfaces for one or 

84 both of these, not as part of a public interface to query results. 

85 """ 

86 def __init__(self, *, 

87 graph: DimensionGraph, 

88 whereRegion: Optional[Region], 

89 managers: RegistryManagers, 

90 ): 

91 self.graph = graph 

92 self.whereRegion = whereRegion 

93 self.managers = managers 

94 

95 @abstractmethod 

96 def isUnique(self) -> bool: 

97 """Return `True` if this query's rows are guaranteed to be unique, and 

98 `False` otherwise. 

99 

100 If this query has dataset results (`datasetType` is not `None`), 

101 uniqueness applies to the `DatasetRef` instances returned by 

102 `extractDatasetRef` from the result of `rows`. If it does not have 

103 dataset results, uniqueness applies to the `DataCoordinate` instances 

104 returned by `extractDataId`. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

110 """Return the query column that contains the primary key value for 

111 the dimension with the given name. 

112 

113 Parameters 

114 ---------- 

115 name : `str` 

116 Name of the dimension. 

117 

118 Returns 

119 ------- 

120 column : `sqlalchemy.sql.ColumnElement`. 

121 SQLAlchemy object representing a column in the query. 

122 

123 Notes 

124 ----- 

125 This method is intended primarily as a hook for subclasses to implement 

126 and the ABC to call in order to provide higher-level functionality; 

127 code that uses `Query` objects (but does not implement one) should 

128 usually not have to call this method. 

129 """ 

130 raise NotImplementedError() 

131 

132 @property 

133 @abstractmethod 

134 def spatial(self) -> Iterator[DimensionElement]: 

135 """An iterator over the dimension element columns used in post-query 

136 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]). 

137 

138 Notes 

139 ----- 

140 This property is intended primarily as a hook for subclasses to 

141 implement and the ABC to call in order to provide higher-level 

142 functionality; code that uses `Query` objects (but does not implement 

143 one) should usually not have to access this property. 

144 """ 

145 raise NotImplementedError() 

146 

147 @abstractmethod 

148 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

149 """Return a region column for one of the dimension elements iterated 

150 over by `spatial`. 

151 

152 Parameters 

153 ---------- 

154 name : `str` 

155 Name of the element. 

156 

157 Returns 

158 ------- 

159 column : `sqlalchemy.sql.ColumnElement` 

160 SQLAlchemy representing a result column in the query. 

161 

162 Notes 

163 ----- 

164 This method is intended primarily as a hook for subclasses to implement 

165 and the ABC to call in order to provide higher-level functionality; 

166 code that uses `Query` objects (but does not implement one) should 

167 usually not have to call this method. 

168 """ 

169 raise NotImplementedError() 

170 

171 @property 

172 def datasetType(self) -> Optional[DatasetType]: 

173 """The `DatasetType` of datasets returned by this query, or `None` 

174 if there are no dataset results (`DatasetType` or `None`). 

175 """ 

176 cols = self.getDatasetColumns() 

177 if cols is None: 

178 return None 

179 return cols.datasetType 

180 

181 @abstractmethod 

182 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

183 """Return the columns for the datasets returned by this query. 

184 

185 Returns 

186 ------- 

187 columns : `DatasetQueryColumns` or `None` 

188 Struct containing SQLAlchemy representations of the result columns 

189 for a dataset. 

190 

191 Notes 

192 ----- 

193 This method is intended primarily as a hook for subclasses to implement 

194 and the ABC to call in order to provide higher-level functionality; 

195 code that uses `Query` objects (but does not implement one) should 

196 usually not have to call this method. 

197 """ 

198 raise NotImplementedError() 

199 

200 @property 

201 @abstractmethod 

202 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

203 """A SQLAlchemy object representing the full query 

204 (`sqlalchemy.sql.FromClause` or `None`). 

205 

206 This is `None` in the special case where the query has no columns, and 

207 only one logical row. 

208 """ 

209 raise NotImplementedError() 

210 

211 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]: 

212 """Return a callable that can perform extra Python-side filtering of 

213 query results. 

214 

215 To get the expected results from a query, the returned predicate *must* 

216 be used to ignore rows for which it returns `False`; this permits the 

217 `QueryBuilder` implementation to move logic from the database to Python 

218 without changing the public interface. 

219 

220 Parameters 

221 ---------- 

222 region : `sphgeom.Region`, optional 

223 A region that any result-row regions must overlap in order for the 

224 predicate to return `True`. If not provided, this will be 

225 ``self.whereRegion``, if that exists. 

226 

227 Returns 

228 ------- 

229 func : `Callable` 

230 A callable that takes a single `sqlalchemy.engine.RowProxy` 

231 argmument and returns `bool`. 

232 """ 

233 whereRegion = region if region is not None else self.whereRegion 

234 

235 def closure(row: sqlalchemy.engine.RowProxy) -> bool: 

236 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial] 

237 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

238 return False 

239 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

240 

241 return closure 

242 

243 def rows(self, db: Database, *, region: Optional[Region] = None 

244 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

245 """Execute the query and yield result rows, applying `predicate`. 

246 

247 Parameters 

248 ---------- 

249 region : `sphgeom.Region`, optional 

250 A region that any result-row regions must overlap in order to be 

251 yielded. If not provided, this will be ``self.whereRegion``, if 

252 that exists. 

253 

254 Yields 

255 ------ 

256 row : `sqlalchemy.engine.RowProxy` or `None` 

257 Result row from the query. `None` may yielded exactly once instead 

258 of any real rows to indicate an empty query (see `EmptyQuery`). 

259 """ 

260 predicate = self.predicate(region) 

261 for row in db.query(self.sql): 

262 if predicate(row): 

263 yield row 

264 

265 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy], 

266 dimensions: Iterable[Dimension]) -> tuple: 

267 """Extract a tuple of data ID values from a result row. 

268 

269 Parameters 

270 ---------- 

271 row : `sqlalchemy.engine.RowProxy` or `None` 

272 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

273 the row from an `EmptyQuery`. 

274 dimensions : `Iterable` [ `Dimension` ] 

275 The dimensions to include in the returned tuple, in order. 

276 

277 Returns 

278 ------- 

279 values : `tuple` 

280 A tuple of dimension primary key values. 

281 """ 

282 if row is None: 

283 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions." 

284 return () 

285 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions) 

286 

287 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *, 

288 graph: Optional[DimensionGraph] = None, 

289 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

290 ) -> DataCoordinate: 

291 """Extract a data ID from a result row. 

292 

293 Parameters 

294 ---------- 

295 row : `sqlalchemy.engine.RowProxy` or `None` 

296 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

297 the row from an `EmptyQuery`. 

298 graph : `DimensionGraph`, optional 

299 The dimensions the returned data ID should identify. If not 

300 provided, this will be all dimensions in `QuerySummary.requested`. 

301 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

302 Nested mapping containing records to attach to the returned 

303 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will 

304 return `True`. If provided, outer keys must include all dimension 

305 element names in ``graph``, and inner keys should be tuples of 

306 dimension primary key values in the same order as 

307 ``element.graph.required``. If not provided, 

308 `DataCoordinate.hasRecords` will return `False` on the returned 

309 object. 

310 

311 Returns 

312 ------- 

313 dataId : `DataCoordinate` 

314 A data ID that identifies all required and implied dimensions. If 

315 ``records is not None``, this is have 

316 `~DataCoordinate.hasRecords()` return `True`. 

317 """ 

318 if graph is None: 

319 graph = self.graph 

320 if not graph: 

321 return DataCoordinate.makeEmpty(self.graph.universe) 

322 dataId = DataCoordinate.fromFullValues( 

323 graph, 

324 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

325 ) 

326 if records is not None: 

327 recordsForRow = {} 

328 for element in graph.elements: 

329 key = tuple(dataId.subset(element.graph).values()) 

330 recordsForRow[element.name] = records[element.name].get(key) 

331 return dataId.expanded(recordsForRow) 

332 else: 

333 return dataId 

334 

335 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy, 

336 dataId: Optional[DataCoordinate] = None, 

337 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

338 ) -> DatasetRef: 

339 """Extract a `DatasetRef` from a result row. 

340 

341 Parameters 

342 ---------- 

343 row : `sqlalchemy.engine.RowProxy` 

344 A result row from a SQLAlchemy SELECT query. 

345 dataId : `DataCoordinate` 

346 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

347 `DataCoordinate` is constructed from ``row`` if `None`. 

348 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

349 Records to use to return an `ExpandedDataCoordinate`. If provided, 

350 outer keys must include all dimension element names in ``graph``, 

351 and inner keys should be tuples of dimension primary key values 

352 in the same order as ``element.graph.required``. 

353 

354 Returns 

355 ------- 

356 ref : `DatasetRef` 

357 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

358 `None`. 

359 """ 

360 datasetColumns = self.getDatasetColumns() 

361 assert datasetColumns is not None 

362 if dataId is None: 

363 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records) 

364 runRecord = self.managers.collections[row[datasetColumns.runKey]] 

365 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name) 

366 

367 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec: 

368 """Helper method for subclass implementations of `materialize`. 

369 

370 Parameters 

371 ---------- 

372 constraints : `bool`, optional 

373 If `True` (`False` is default), define a specification that 

374 includes actual foreign key constraints for logical foreign keys. 

375 Some database engines do not permit temporary tables to reference 

376 normal tables, so this should be `False` when generating a spec 

377 for a temporary table unless the database engine is known to 

378 support them. 

379 

380 Returns 

381 ------- 

382 spec : `ddl.TableSpec` 

383 Specification for a table that could hold this query's result rows. 

384 """ 

385 unique = self.isUnique() 

386 spec = ddl.TableSpec(fields=()) 

387 for dimension in self.graph: 

388 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints) 

389 for element in self.spatial: 

390 field = copy.copy(REGION_FIELD_SPEC) 

391 field.name = f"{element.name}_region" 

392 spec.fields.add(field) 

393 datasetColumns = self.getDatasetColumns() 

394 if datasetColumns is not None: 

395 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints) 

396 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints) 

397 return spec 

398 

399 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None, 

400 datasets: bool = True, 

401 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]: 

402 """Helper method for subclass implementations of `subset`. 

403 

404 Parameters 

405 ---------- 

406 graph : `DimensionGraph`, optional 

407 Dimensions to include in the new `Query` being constructed. 

408 ``subset`` implementations should generally just forward their 

409 own ``graph`` argument here. 

410 datasets : `bool`, optional 

411 Whether the new `Query` should include dataset results. Defaults 

412 to `True`, but is ignored if ``self`` does not include dataset 

413 results. 

414 unique : `bool`, optional 

415 Whether the new `Query` should guarantee unique results (this may 

416 come with a performance penalty). 

417 

418 Returns 

419 ------- 

420 graph : `DimensionGraph` 

421 The dimensions of the new `Query`. This is exactly the same as 

422 the argument of the same name, with ``self.graph`` used if that 

423 argument is `None`. 

424 columns : `QueryColumns` or `None` 

425 A struct containing the SQLAlchemy column objects to use in the 

426 new query, contructed by delegating to other (mostly abstract) 

427 methods on ``self``. If `None`, `subset` may return ``self``. 

428 """ 

429 if graph is None: 

430 graph = self.graph 

431 if (graph == self.graph and (self.getDatasetColumns() is None or datasets) 

432 and (self.isUnique() or not unique)): 

433 return graph, None 

434 columns = QueryColumns() 

435 for dimension in graph.dimensions: 

436 col = self.getDimensionColumn(dimension.name) 

437 columns.keys[dimension] = [col] 

438 if not unique: 

439 for element in self.spatial: 

440 col = self.getRegionColumn(element.name) 

441 columns.regions[element] = col 

442 if datasets and self.getDatasetColumns() is not None: 

443 columns.datasets = self.getDatasetColumns() 

444 return graph, columns 

445 

446 @contextmanager 

447 def materialize(self, db: Database) -> Iterator[Query]: 

448 """Execute this query and insert its results into a temporary table. 

449 

450 Parameters 

451 ---------- 

452 db : `Database` 

453 Database engine to execute the query against. 

454 

455 Returns 

456 ------- 

457 context : `typing.ContextManager` [ `MaterializedQuery` ] 

458 A context manager that ensures the temporary table is created and 

459 populated in ``__enter__`` (returning a `MaterializedQuery` object 

460 backed by that table), and dropped in ``__exit__``. If ``self`` 

461 is already a `MaterializedQuery`, ``__enter__`` may just return 

462 ``self`` and ``__exit__`` may do nothing (reflecting the fact that 

463 an outer context manager should already take care of everything 

464 else). 

465 """ 

466 spec = self._makeTableSpec() 

467 table = db.makeTemporaryTable(spec) 

468 db.insert(table, select=self.sql, names=spec.fields.names) 

469 yield MaterializedQuery(table=table, 

470 spatial=self.spatial, 

471 datasetType=self.datasetType, 

472 isUnique=self.isUnique(), 

473 graph=self.graph, 

474 whereRegion=self.whereRegion, 

475 managers=self.managers) 

476 db.dropTemporaryTable(table) 

477 

478 @abstractmethod 

479 def subset(self, *, graph: Optional[DimensionGraph] = None, 

480 datasets: bool = True, 

481 unique: bool = False) -> Query: 

482 """Return a new `Query` whose columns and/or rows are (mostly) subset 

483 of this one's. 

484 

485 Parameters 

486 ---------- 

487 graph : `DimensionGraph`, optional 

488 Dimensions to include in the new `Query` being constructed. 

489 If `None` (default), ``self.graph`` is used. 

490 datasets : `bool`, optional 

491 Whether the new `Query` should include dataset results. Defaults 

492 to `True`, but is ignored if ``self`` does not include dataset 

493 results. 

494 unique : `bool`, optional 

495 Whether the new `Query` should guarantee unique results (this may 

496 come with a performance penalty). 

497 

498 Returns 

499 ------- 

500 query : `Query` 

501 A query object corresponding to the given inputs. May be ``self`` 

502 if no changes were requested. 

503 

504 Notes 

505 ----- 

506 The way spatial overlaps are handled at present makes it impossible to 

507 fully guarantee in general that the new query's rows are a subset of 

508 this one's while also returning unique rows. That's because the 

509 database is only capable of performing approximate, conservative 

510 overlaps via the common skypix system; we defer actual region overlap 

511 operations to per-result-row Python logic. But including the region 

512 columns necessary to do that postprocessing in the query makes it 

513 impossible to do a SELECT DISTINCT on the user-visible dimensions of 

514 the query. For example, consider starting with a query with dimensions 

515 (instrument, skymap, visit, tract). That involves a spatial join 

516 between visit and tract, and we include the region columns from both 

517 tables in the results in order to only actually yield result rows 

518 (see `predicate` and `rows`) where the regions in those two columns 

519 overlap. If the user then wants to subset to just (skymap, tract) with 

520 unique results, we have two unpalatable options: 

521 

522 - we can do a SELECT DISTINCT with just the skymap and tract columns 

523 in the SELECT clause, dropping all detailed overlap information and 

524 including some tracts that did not actually overlap any of the 

525 visits in the original query (but were regarded as _possibly_ 

526 overlapping via the coarser, common-skypix relationships); 

527 

528 - we can include the tract and visit region columns in the query, and 

529 continue to filter out the non-overlapping pairs, but completely 

530 disregard the user's request for unique tracts. 

531 

532 This interface specifies that implementations must do the former, as 

533 that's what makes things efficient in our most important use case 

534 (``QuantumGraph`` generation in ``pipe_base``). We may be able to 

535 improve this situation in the future by putting exact overlap 

536 information in the database, either by using built-in (but 

537 engine-specific) spatial database functionality or (more likely) 

538 switching to a scheme in which pairwise dimension spatial relationships 

539 are explicitly precomputed (for e.g. combinations of instruments and 

540 skymaps). 

541 """ 

542 raise NotImplementedError() 

543 

544 @abstractmethod 

545 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

546 """Return a `QueryBuilder` that can be used to construct a new `Query` 

547 that is joined to (and hence constrained by) this one. 

548 

549 Parameters 

550 ---------- 

551 summary : `QuerySummary`, optional 

552 A `QuerySummary` instance that specifies the dimensions and any 

553 additional constraints to include in the new query being 

554 constructed, or `None` to use the dimensions of ``self`` with no 

555 additional constraints. 

556 """ 

557 raise NotImplementedError() 

558 

559 graph: DimensionGraph 

560 """The dimensions identified by this query and included in any data IDs 

561 created from its result rows (`DimensionGraph`). 

562 """ 

563 

564 whereRegion: Optional[Region] 

565 """A spatial region that all regions in all rows returned by this query 

566 must overlap (`lsst.sphgeom.Region` or `None`). 

567 """ 

568 

569 managers: RegistryManagers 

570 """A struct containing `Registry` helper object (`RegistryManagers`). 

571 """ 

572 

573 

574class DirectQueryUniqueness(enum.Enum): 

575 """An enum representing the ways in which a query can have unique rows (or 

576 not). 

577 """ 

578 

579 NOT_UNIQUE = enum.auto() 

580 """The query is not expected to have unique rows. 

581 """ 

582 

583 NATURALLY_UNIQUE = enum.auto() 

584 """The construction of the query guarantees that it will have unique 

585 result rows, even without SELECT DISTINCT or a GROUP BY clause. 

586 """ 

587 

588 NEEDS_DISTINCT = enum.auto() 

589 """The query is expected to yield unique result rows, and needs to use 

590 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this. 

591 """ 

592 

593 

594class DirectQuery(Query): 

595 """A `Query` implementation that represents a direct SELECT query that 

596 usually joins many tables. 

597 

598 `DirectQuery` objects should generally only be constructed by 

599 `QueryBuilder` or the methods of other `Query` objects. 

600 

601 Parameters 

602 ---------- 

603 simpleQuery : `SimpleQuery` 

604 Struct representing the actual SELECT, FROM, and WHERE clauses. 

605 columns : `QueryColumns` 

606 Columns that are referenced in the query in any clause. 

607 uniqueness : `DirectQueryUniqueness` 

608 Enum value indicating whether the query should yield unique result 

609 rows, and if so whether that needs to be explicitly requested of the 

610 database. 

611 graph : `DimensionGraph` 

612 Object describing the dimensions included in the query. 

613 whereRegion : `lsst.sphgeom.Region`, optional 

614 Region that all region columns in all returned rows must overlap. 

615 managers : `RegistryManagers` 

616 Struct containing the `Registry` manager helper objects, to be 

617 forwarded to the `Query` constructor. 

618 """ 

619 def __init__(self, *, 

620 simpleQuery: SimpleQuery, 

621 columns: QueryColumns, 

622 uniqueness: DirectQueryUniqueness, 

623 graph: DimensionGraph, 

624 whereRegion: Optional[Region], 

625 managers: RegistryManagers): 

626 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

627 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql" 

628 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns." 

629 self._simpleQuery = simpleQuery 

630 self._columns = columns 

631 self._uniqueness = uniqueness 

632 

633 def isUnique(self) -> bool: 

634 # Docstring inherited from Query. 

635 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE 

636 

637 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

638 # Docstring inherited from Query. 

639 return self._columns.getKeyColumn(name).label(name) 

640 

641 @property 

642 def spatial(self) -> Iterator[DimensionElement]: 

643 # Docstring inherited from Query. 

644 return iter(self._columns.regions) 

645 

646 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

647 # Docstring inherited from Query. 

648 return self._columns.regions[name].label(f"{name}_region") 

649 

650 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

651 # Docstring inherited from Query. 

652 base = self._columns.datasets 

653 if base is None: 

654 return None 

655 return DatasetQueryColumns( 

656 datasetType=base.datasetType, 

657 id=base.id.label("dataset_id"), 

658 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()), 

659 ) 

660 

661 @property 

662 def sql(self) -> sqlalchemy.sql.FromClause: 

663 # Docstring inherited from Query. 

664 simpleQuery = self._simpleQuery.copy() 

665 for dimension in self.graph: 

666 simpleQuery.columns.append(self.getDimensionColumn(dimension.name)) 

667 for element in self.spatial: 

668 simpleQuery.columns.append(self.getRegionColumn(element.name)) 

669 datasetColumns = self.getDatasetColumns() 

670 if datasetColumns is not None: 

671 simpleQuery.columns.extend(datasetColumns) 

672 sql = simpleQuery.combine() 

673 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT: 

674 return sql.distinct() 

675 else: 

676 return sql 

677 

678 def subset(self, *, graph: Optional[DimensionGraph] = None, 

679 datasets: bool = True, 

680 unique: bool = False) -> Query: 

681 # Docstring inherited from Query. 

682 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

683 if columns is None: 

684 return self 

685 if columns.isEmpty(): 

686 return EmptyQuery(self.graph.universe, self.managers) 

687 return DirectQuery( 

688 simpleQuery=self._simpleQuery.copy(), 

689 columns=columns, 

690 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

691 graph=graph, 

692 whereRegion=self.whereRegion if not unique else None, 

693 managers=self.managers, 

694 ) 

695 

696 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

697 # Docstring inherited from Query. 

698 from ._builder import QueryBuilder 

699 if summary is None: 

700 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

701 if not summary.requested.issubset(self.graph): 

702 raise NotImplementedError( 

703 f"Query.makeBuilder does not yet support augmenting dimensions " 

704 f"({summary.requested.dimensions}) beyond those originally included in the query " 

705 f"({self.graph.dimensions})." 

706 ) 

707 builder = QueryBuilder(summary, managers=self.managers) 

708 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions, 

709 datasets=self.getDatasetColumns()) 

710 return builder 

711 

712 

713class MaterializedQuery(Query): 

714 """A `Query` implementation that represents query results saved in a 

715 temporary table. 

716 

717 `MaterializedQuery` instances should not be constructed directly; use 

718 `Query.materialize()` instead. 

719 

720 Parameters 

721 ---------- 

722 table : `sqlalchemy.schema.Table` 

723 SQLAlchemy object represnting the temporary table. 

724 spatial : `Iterable` [ `DimensionElement` ] 

725 Spatial dimension elements whose regions must overlap for each valid 

726 result row (which may reject some rows that are in the table). 

727 datasetType : `DatasetType` 

728 The `DatasetType` of datasets returned by this query, or `None` 

729 if there are no dataset results 

730 isUnique : `bool` 

731 If `True`, the table's rows are unique, and there is no need to 

732 add ``SELECT DISTINCT`` to gaurantee this in results. 

733 graph : `DimensionGraph` 

734 Dimensions included in the columns of this table. 

735 whereRegion : `Region` or `None` 

736 A spatial region all result-row regions must overlap to be valid (which 

737 may reject some rows that are in the table). 

738 managers : `RegistryManagers` 

739 A struct containing `Registry` manager helper objects, forwarded to 

740 the `Query` constructor. 

741 """ 

742 def __init__(self, *, 

743 table: sqlalchemy.schema.Table, 

744 spatial: Iterable[DimensionElement], 

745 datasetType: Optional[DatasetType], 

746 isUnique: bool, 

747 graph: DimensionGraph, 

748 whereRegion: Optional[Region], 

749 managers: RegistryManagers): 

750 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

751 self._table = table 

752 self._spatial = tuple(spatial) 

753 self._datasetType = datasetType 

754 self._isUnique = isUnique 

755 

756 def isUnique(self) -> bool: 

757 # Docstring inherited from Query. 

758 return self._isUnique 

759 

760 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

761 # Docstring inherited from Query. 

762 return self._table.columns[name] 

763 

764 @property 

765 def spatial(self) -> Iterator[DimensionElement]: 

766 # Docstring inherited from Query. 

767 return iter(self._spatial) 

768 

769 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

770 # Docstring inherited from Query. 

771 return self._table.columns[f"{name}_region"] 

772 

773 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

774 # Docstring inherited from Query. 

775 if self._datasetType is not None: 

776 return DatasetQueryColumns( 

777 datasetType=self._datasetType, 

778 id=self._table.columns["dataset_id"], 

779 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()], 

780 ) 

781 else: 

782 return None 

783 

784 @property 

785 def sql(self) -> sqlalchemy.sql.FromClause: 

786 # Docstring inherited from Query. 

787 return self._table.select() 

788 

789 @contextmanager 

790 def materialize(self, db: Database) -> Iterator[Query]: 

791 # Docstring inherited from Query. 

792 yield self 

793 

794 def subset(self, *, graph: Optional[DimensionGraph] = None, 

795 datasets: bool = True, 

796 unique: bool = False) -> Query: 

797 # Docstring inherited from Query. 

798 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

799 if columns is None: 

800 return self 

801 if columns.isEmpty(): 

802 return EmptyQuery(self.graph.universe, managers=self.managers) 

803 simpleQuery = SimpleQuery() 

804 simpleQuery.join(self._table) 

805 return DirectQuery( 

806 simpleQuery=simpleQuery, 

807 columns=columns, 

808 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

809 graph=graph, 

810 whereRegion=self.whereRegion if not unique else None, 

811 managers=self.managers, 

812 ) 

813 

814 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

815 # Docstring inherited from Query. 

816 from ._builder import QueryBuilder 

817 if summary is None: 

818 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

819 if not summary.requested.issubset(self.graph): 

820 raise NotImplementedError( 

821 f"Query.makeBuilder does not yet support augmenting dimensions " 

822 f"({summary.requested.dimensions}) beyond those originally included in the query " 

823 f"({self.graph.dimensions})." 

824 ) 

825 builder = QueryBuilder(summary, managers=self.managers) 

826 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns()) 

827 return builder 

828 

829 

830class EmptyQuery(Query): 

831 """A `Query` implementation that handes the special case where the query 

832 would have no columns. 

833 

834 Parameters 

835 ---------- 

836 universe : `DimensionUniverse` 

837 Set of all dimensions from which the null set is extracted. 

838 managers : `RegistryManagers` 

839 A struct containing the registry manager instances used by the query 

840 system. 

841 """ 

842 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers): 

843 super().__init__(graph=universe.empty, whereRegion=None, managers=managers) 

844 

845 def isUnique(self) -> bool: 

846 # Docstring inherited from Query. 

847 return True 

848 

849 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

850 # Docstring inherited from Query. 

851 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).") 

852 

853 @property 

854 def spatial(self) -> Iterator[DimensionElement]: 

855 # Docstring inherited from Query. 

856 return iter(()) 

857 

858 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

859 # Docstring inherited from Query. 

860 raise KeyError(f"No region for {name} in query (no regions at all, actually).") 

861 

862 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

863 # Docstring inherited from Query. 

864 return None 

865 

866 def rows(self, db: Database, *, region: Optional[Region] = None 

867 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

868 yield None 

869 

870 @property 

871 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

872 # Docstring inherited from Query. 

873 return None 

874 

875 @contextmanager 

876 def materialize(self, db: Database) -> Iterator[Query]: 

877 # Docstring inherited from Query. 

878 yield self 

879 

880 def subset(self, *, graph: Optional[DimensionGraph] = None, 

881 datasets: bool = True, 

882 unique: bool = False) -> Query: 

883 # Docstring inherited from Query. 

884 assert graph is None or graph.issubset(self.graph) 

885 return self 

886 

887 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

888 # Docstring inherited from Query. 

889 from ._builder import QueryBuilder 

890 if summary is None: 

891 summary = QuerySummary(self.graph) 

892 if not summary.requested.issubset(self.graph): 

893 raise NotImplementedError( 

894 f"Query.makeBuilder does not yet support augmenting dimensions " 

895 f"({summary.requested.dimensions}) beyond those originally included in the query " 

896 f"({self.graph.dimensions})." 

897 ) 

898 return QueryBuilder(summary, managers=self.managers)