Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25from abc import ABC, abstractmethod 

26from contextlib import contextmanager 

27import enum 

28import itertools 

29from typing import ( 

30 Callable, 

31 Iterable, 

32 Iterator, 

33 Mapping, 

34 Optional, 

35 Tuple, 

36 TYPE_CHECKING, 

37) 

38 

39import sqlalchemy 

40 

41from lsst.sphgeom import Region 

42 

43from ...core import ( 

44 addDimensionForeignKey, 

45 DataCoordinate, 

46 DatasetRef, 

47 DatasetType, 

48 ddl, 

49 Dimension, 

50 DimensionElement, 

51 DimensionGraph, 

52 DimensionRecord, 

53 DimensionUniverse, 

54 SpatialRegionDatabaseRepresentation, 

55 SimpleQuery, 

56) 

57from ..interfaces import Database 

58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

59 

60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 from ._builder import QueryBuilder 

62 

63 

64class Query(ABC): 

65 """An abstract base class for queries that return some combination of 

66 `DatasetRef` and `DataCoordinate` objects. 

67 

68 Parameters 

69 ---------- 

70 graph : `DimensionGraph` 

71 Object describing the dimensions included in the query. 

72 whereRegion : `lsst.sphgeom.Region`, optional 

73 Region that all region columns in all returned rows must overlap. 

74 managers : `RegistryManagers` 

75 A struct containing the registry manager instances used by the query 

76 system. 

77 

78 Notes 

79 ----- 

80 The `Query` hierarchy abstracts over the database/SQL representation of a 

81 particular set of data IDs or datasets. It is expected to be used as a 

82 backend for other objects that provide more natural interfaces for one or 

83 both of these, not as part of a public interface to query results. 

84 """ 

85 def __init__(self, *, 

86 graph: DimensionGraph, 

87 whereRegion: Optional[Region], 

88 managers: RegistryManagers, 

89 ): 

90 self.graph = graph 

91 self.whereRegion = whereRegion 

92 self.managers = managers 

93 

94 @abstractmethod 

95 def isUnique(self) -> bool: 

96 """Return `True` if this query's rows are guaranteed to be unique, and 

97 `False` otherwise. 

98 

99 If this query has dataset results (`datasetType` is not `None`), 

100 uniqueness applies to the `DatasetRef` instances returned by 

101 `extractDatasetRef` from the result of `rows`. If it does not have 

102 dataset results, uniqueness applies to the `DataCoordinate` instances 

103 returned by `extractDataId`. 

104 """ 

105 raise NotImplementedError() 

106 

107 @abstractmethod 

108 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

109 """Return the query column that contains the primary key value for 

110 the dimension with the given name. 

111 

112 Parameters 

113 ---------- 

114 name : `str` 

115 Name of the dimension. 

116 

117 Returns 

118 ------- 

119 column : `sqlalchemy.sql.ColumnElement`. 

120 SQLAlchemy object representing a column in the query. 

121 

122 Notes 

123 ----- 

124 This method is intended primarily as a hook for subclasses to implement 

125 and the ABC to call in order to provide higher-level functionality; 

126 code that uses `Query` objects (but does not implement one) should 

127 usually not have to call this method. 

128 """ 

129 raise NotImplementedError() 

130 

131 @property 

132 @abstractmethod 

133 def spatial(self) -> Iterator[DimensionElement]: 

134 """An iterator over the dimension element columns used in post-query 

135 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]). 

136 

137 Notes 

138 ----- 

139 This property is intended primarily as a hook for subclasses to 

140 implement and the ABC to call in order to provide higher-level 

141 functionality; code that uses `Query` objects (but does not implement 

142 one) should usually not have to access this property. 

143 """ 

144 raise NotImplementedError() 

145 

146 @abstractmethod 

147 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

148 """Return a region column for one of the dimension elements iterated 

149 over by `spatial`. 

150 

151 Parameters 

152 ---------- 

153 name : `str` 

154 Name of the element. 

155 

156 Returns 

157 ------- 

158 column : `sqlalchemy.sql.ColumnElement` 

159 SQLAlchemy representing a result column in the query. 

160 

161 Notes 

162 ----- 

163 This method is intended primarily as a hook for subclasses to implement 

164 and the ABC to call in order to provide higher-level functionality; 

165 code that uses `Query` objects (but does not implement one) should 

166 usually not have to call this method. 

167 """ 

168 raise NotImplementedError() 

169 

170 @property 

171 def datasetType(self) -> Optional[DatasetType]: 

172 """The `DatasetType` of datasets returned by this query, or `None` 

173 if there are no dataset results (`DatasetType` or `None`). 

174 """ 

175 cols = self.getDatasetColumns() 

176 if cols is None: 

177 return None 

178 return cols.datasetType 

179 

180 @abstractmethod 

181 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

182 """Return the columns for the datasets returned by this query. 

183 

184 Returns 

185 ------- 

186 columns : `DatasetQueryColumns` or `None` 

187 Struct containing SQLAlchemy representations of the result columns 

188 for a dataset. 

189 

190 Notes 

191 ----- 

192 This method is intended primarily as a hook for subclasses to implement 

193 and the ABC to call in order to provide higher-level functionality; 

194 code that uses `Query` objects (but does not implement one) should 

195 usually not have to call this method. 

196 """ 

197 raise NotImplementedError() 

198 

199 @property 

200 @abstractmethod 

201 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

202 """A SQLAlchemy object representing the full query 

203 (`sqlalchemy.sql.FromClause` or `None`). 

204 

205 This is `None` in the special case where the query has no columns, and 

206 only one logical row. 

207 """ 

208 raise NotImplementedError() 

209 

210 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]: 

211 """Return a callable that can perform extra Python-side filtering of 

212 query results. 

213 

214 To get the expected results from a query, the returned predicate *must* 

215 be used to ignore rows for which it returns `False`; this permits the 

216 `QueryBuilder` implementation to move logic from the database to Python 

217 without changing the public interface. 

218 

219 Parameters 

220 ---------- 

221 region : `sphgeom.Region`, optional 

222 A region that any result-row regions must overlap in order for the 

223 predicate to return `True`. If not provided, this will be 

224 ``self.whereRegion``, if that exists. 

225 

226 Returns 

227 ------- 

228 func : `Callable` 

229 A callable that takes a single `sqlalchemy.engine.RowProxy` 

230 argmument and returns `bool`. 

231 """ 

232 whereRegion = region if region is not None else self.whereRegion 

233 

234 def closure(row: sqlalchemy.engine.RowProxy) -> bool: 

235 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial] 

236 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

237 return False 

238 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

239 

240 return closure 

241 

242 def rows(self, db: Database, *, region: Optional[Region] = None 

243 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

244 """Execute the query and yield result rows, applying `predicate`. 

245 

246 Parameters 

247 ---------- 

248 region : `sphgeom.Region`, optional 

249 A region that any result-row regions must overlap in order to be 

250 yielded. If not provided, this will be ``self.whereRegion``, if 

251 that exists. 

252 

253 Yields 

254 ------ 

255 row : `sqlalchemy.engine.RowProxy` or `None` 

256 Result row from the query. `None` may yielded exactly once instead 

257 of any real rows to indicate an empty query (see `EmptyQuery`). 

258 """ 

259 predicate = self.predicate(region) 

260 for row in db.query(self.sql): 

261 if predicate(row): 

262 yield row 

263 

264 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy], 

265 dimensions: Iterable[Dimension]) -> tuple: 

266 """Extract a tuple of data ID values from a result row. 

267 

268 Parameters 

269 ---------- 

270 row : `sqlalchemy.engine.RowProxy` or `None` 

271 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

272 the row from an `EmptyQuery`. 

273 dimensions : `Iterable` [ `Dimension` ] 

274 The dimensions to include in the returned tuple, in order. 

275 

276 Returns 

277 ------- 

278 values : `tuple` 

279 A tuple of dimension primary key values. 

280 """ 

281 if row is None: 

282 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions." 

283 return () 

284 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions) 

285 

286 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *, 

287 graph: Optional[DimensionGraph] = None, 

288 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

289 ) -> DataCoordinate: 

290 """Extract a data ID from a result row. 

291 

292 Parameters 

293 ---------- 

294 row : `sqlalchemy.engine.RowProxy` or `None` 

295 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

296 the row from an `EmptyQuery`. 

297 graph : `DimensionGraph`, optional 

298 The dimensions the returned data ID should identify. If not 

299 provided, this will be all dimensions in `QuerySummary.requested`. 

300 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

301 Nested mapping containing records to attach to the returned 

302 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will 

303 return `True`. If provided, outer keys must include all dimension 

304 element names in ``graph``, and inner keys should be tuples of 

305 dimension primary key values in the same order as 

306 ``element.graph.required``. If not provided, 

307 `DataCoordinate.hasRecords` will return `False` on the returned 

308 object. 

309 

310 Returns 

311 ------- 

312 dataId : `DataCoordinate` 

313 A data ID that identifies all required and implied dimensions. If 

314 ``records is not None``, this is have 

315 `~DataCoordinate.hasRecords()` return `True`. 

316 """ 

317 if graph is None: 

318 graph = self.graph 

319 if not graph: 

320 return DataCoordinate.makeEmpty(self.graph.universe) 

321 dataId = DataCoordinate.fromFullValues( 

322 graph, 

323 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

324 ) 

325 if records is not None: 

326 recordsForRow = {} 

327 for element in graph.elements: 

328 key = tuple(dataId.subset(element.graph).values()) 

329 recordsForRow[element.name] = records[element.name].get(key) 

330 return dataId.expanded(recordsForRow) 

331 else: 

332 return dataId 

333 

334 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy, 

335 dataId: Optional[DataCoordinate] = None, 

336 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

337 ) -> DatasetRef: 

338 """Extract a `DatasetRef` from a result row. 

339 

340 Parameters 

341 ---------- 

342 row : `sqlalchemy.engine.RowProxy` 

343 A result row from a SQLAlchemy SELECT query. 

344 dataId : `DataCoordinate` 

345 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

346 `DataCoordinate` is constructed from ``row`` if `None`. 

347 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

348 Records to use to return an `ExpandedDataCoordinate`. If provided, 

349 outer keys must include all dimension element names in ``graph``, 

350 and inner keys should be tuples of dimension primary key values 

351 in the same order as ``element.graph.required``. 

352 

353 Returns 

354 ------- 

355 ref : `DatasetRef` 

356 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

357 `None`. 

358 """ 

359 datasetColumns = self.getDatasetColumns() 

360 assert datasetColumns is not None 

361 if dataId is None: 

362 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records) 

363 runRecord = self.managers.collections[row[datasetColumns.runKey]] 

364 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name) 

365 

366 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec: 

367 """Helper method for subclass implementations of `materialize`. 

368 

369 Parameters 

370 ---------- 

371 constraints : `bool`, optional 

372 If `True` (`False` is default), define a specification that 

373 includes actual foreign key constraints for logical foreign keys. 

374 Some database engines do not permit temporary tables to reference 

375 normal tables, so this should be `False` when generating a spec 

376 for a temporary table unless the database engine is known to 

377 support them. 

378 

379 Returns 

380 ------- 

381 spec : `ddl.TableSpec` 

382 Specification for a table that could hold this query's result rows. 

383 """ 

384 unique = self.isUnique() 

385 spec = ddl.TableSpec(fields=()) 

386 for dimension in self.graph: 

387 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints) 

388 for element in self.spatial: 

389 spec.fields.update( 

390 SpatialRegionDatabaseRepresentation.makeFieldSpecs( 

391 nullable=True, 

392 name=f"{element.name}_region", 

393 ) 

394 ) 

395 datasetColumns = self.getDatasetColumns() 

396 if datasetColumns is not None: 

397 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints) 

398 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints) 

399 return spec 

400 

401 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None, 

402 datasets: bool = True, 

403 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]: 

404 """Helper method for subclass implementations of `subset`. 

405 

406 Parameters 

407 ---------- 

408 graph : `DimensionGraph`, optional 

409 Dimensions to include in the new `Query` being constructed. 

410 ``subset`` implementations should generally just forward their 

411 own ``graph`` argument here. 

412 datasets : `bool`, optional 

413 Whether the new `Query` should include dataset results. Defaults 

414 to `True`, but is ignored if ``self`` does not include dataset 

415 results. 

416 unique : `bool`, optional 

417 Whether the new `Query` should guarantee unique results (this may 

418 come with a performance penalty). 

419 

420 Returns 

421 ------- 

422 graph : `DimensionGraph` 

423 The dimensions of the new `Query`. This is exactly the same as 

424 the argument of the same name, with ``self.graph`` used if that 

425 argument is `None`. 

426 columns : `QueryColumns` or `None` 

427 A struct containing the SQLAlchemy column objects to use in the 

428 new query, contructed by delegating to other (mostly abstract) 

429 methods on ``self``. If `None`, `subset` may return ``self``. 

430 """ 

431 if graph is None: 

432 graph = self.graph 

433 if (graph == self.graph and (self.getDatasetColumns() is None or datasets) 

434 and (self.isUnique() or not unique)): 

435 return graph, None 

436 columns = QueryColumns() 

437 for dimension in graph.dimensions: 

438 col = self.getDimensionColumn(dimension.name) 

439 columns.keys[dimension] = [col] 

440 if not unique: 

441 for element in self.spatial: 

442 col = self.getRegionColumn(element.name) 

443 columns.regions[element] = col 

444 if datasets and self.getDatasetColumns() is not None: 

445 columns.datasets = self.getDatasetColumns() 

446 return graph, columns 

447 

448 @contextmanager 

449 def materialize(self, db: Database) -> Iterator[Query]: 

450 """Execute this query and insert its results into a temporary table. 

451 

452 Parameters 

453 ---------- 

454 db : `Database` 

455 Database engine to execute the query against. 

456 

457 Returns 

458 ------- 

459 context : `typing.ContextManager` [ `MaterializedQuery` ] 

460 A context manager that ensures the temporary table is created and 

461 populated in ``__enter__`` (returning a `MaterializedQuery` object 

462 backed by that table), and dropped in ``__exit__``. If ``self`` 

463 is already a `MaterializedQuery`, ``__enter__`` may just return 

464 ``self`` and ``__exit__`` may do nothing (reflecting the fact that 

465 an outer context manager should already take care of everything 

466 else). 

467 """ 

468 spec = self._makeTableSpec() 

469 table = db.makeTemporaryTable(spec) 

470 db.insert(table, select=self.sql, names=spec.fields.names) 

471 yield MaterializedQuery(table=table, 

472 spatial=self.spatial, 

473 datasetType=self.datasetType, 

474 isUnique=self.isUnique(), 

475 graph=self.graph, 

476 whereRegion=self.whereRegion, 

477 managers=self.managers) 

478 db.dropTemporaryTable(table) 

479 

480 @abstractmethod 

481 def subset(self, *, graph: Optional[DimensionGraph] = None, 

482 datasets: bool = True, 

483 unique: bool = False) -> Query: 

484 """Return a new `Query` whose columns and/or rows are (mostly) subset 

485 of this one's. 

486 

487 Parameters 

488 ---------- 

489 graph : `DimensionGraph`, optional 

490 Dimensions to include in the new `Query` being constructed. 

491 If `None` (default), ``self.graph`` is used. 

492 datasets : `bool`, optional 

493 Whether the new `Query` should include dataset results. Defaults 

494 to `True`, but is ignored if ``self`` does not include dataset 

495 results. 

496 unique : `bool`, optional 

497 Whether the new `Query` should guarantee unique results (this may 

498 come with a performance penalty). 

499 

500 Returns 

501 ------- 

502 query : `Query` 

503 A query object corresponding to the given inputs. May be ``self`` 

504 if no changes were requested. 

505 

506 Notes 

507 ----- 

508 The way spatial overlaps are handled at present makes it impossible to 

509 fully guarantee in general that the new query's rows are a subset of 

510 this one's while also returning unique rows. That's because the 

511 database is only capable of performing approximate, conservative 

512 overlaps via the common skypix system; we defer actual region overlap 

513 operations to per-result-row Python logic. But including the region 

514 columns necessary to do that postprocessing in the query makes it 

515 impossible to do a SELECT DISTINCT on the user-visible dimensions of 

516 the query. For example, consider starting with a query with dimensions 

517 (instrument, skymap, visit, tract). That involves a spatial join 

518 between visit and tract, and we include the region columns from both 

519 tables in the results in order to only actually yield result rows 

520 (see `predicate` and `rows`) where the regions in those two columns 

521 overlap. If the user then wants to subset to just (skymap, tract) with 

522 unique results, we have two unpalatable options: 

523 

524 - we can do a SELECT DISTINCT with just the skymap and tract columns 

525 in the SELECT clause, dropping all detailed overlap information and 

526 including some tracts that did not actually overlap any of the 

527 visits in the original query (but were regarded as _possibly_ 

528 overlapping via the coarser, common-skypix relationships); 

529 

530 - we can include the tract and visit region columns in the query, and 

531 continue to filter out the non-overlapping pairs, but completely 

532 disregard the user's request for unique tracts. 

533 

534 This interface specifies that implementations must do the former, as 

535 that's what makes things efficient in our most important use case 

536 (``QuantumGraph`` generation in ``pipe_base``). We may be able to 

537 improve this situation in the future by putting exact overlap 

538 information in the database, either by using built-in (but 

539 engine-specific) spatial database functionality or (more likely) 

540 switching to a scheme in which pairwise dimension spatial relationships 

541 are explicitly precomputed (for e.g. combinations of instruments and 

542 skymaps). 

543 """ 

544 raise NotImplementedError() 

545 

546 @abstractmethod 

547 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

548 """Return a `QueryBuilder` that can be used to construct a new `Query` 

549 that is joined to (and hence constrained by) this one. 

550 

551 Parameters 

552 ---------- 

553 summary : `QuerySummary`, optional 

554 A `QuerySummary` instance that specifies the dimensions and any 

555 additional constraints to include in the new query being 

556 constructed, or `None` to use the dimensions of ``self`` with no 

557 additional constraints. 

558 """ 

559 raise NotImplementedError() 

560 

561 graph: DimensionGraph 

562 """The dimensions identified by this query and included in any data IDs 

563 created from its result rows (`DimensionGraph`). 

564 """ 

565 

566 whereRegion: Optional[Region] 

567 """A spatial region that all regions in all rows returned by this query 

568 must overlap (`lsst.sphgeom.Region` or `None`). 

569 """ 

570 

571 managers: RegistryManagers 

572 """A struct containing `Registry` helper object (`RegistryManagers`). 

573 """ 

574 

575 

576class DirectQueryUniqueness(enum.Enum): 

577 """An enum representing the ways in which a query can have unique rows (or 

578 not). 

579 """ 

580 

581 NOT_UNIQUE = enum.auto() 

582 """The query is not expected to have unique rows. 

583 """ 

584 

585 NATURALLY_UNIQUE = enum.auto() 

586 """The construction of the query guarantees that it will have unique 

587 result rows, even without SELECT DISTINCT or a GROUP BY clause. 

588 """ 

589 

590 NEEDS_DISTINCT = enum.auto() 

591 """The query is expected to yield unique result rows, and needs to use 

592 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this. 

593 """ 

594 

595 

596class DirectQuery(Query): 

597 """A `Query` implementation that represents a direct SELECT query that 

598 usually joins many tables. 

599 

600 `DirectQuery` objects should generally only be constructed by 

601 `QueryBuilder` or the methods of other `Query` objects. 

602 

603 Parameters 

604 ---------- 

605 simpleQuery : `SimpleQuery` 

606 Struct representing the actual SELECT, FROM, and WHERE clauses. 

607 columns : `QueryColumns` 

608 Columns that are referenced in the query in any clause. 

609 uniqueness : `DirectQueryUniqueness` 

610 Enum value indicating whether the query should yield unique result 

611 rows, and if so whether that needs to be explicitly requested of the 

612 database. 

613 graph : `DimensionGraph` 

614 Object describing the dimensions included in the query. 

615 whereRegion : `lsst.sphgeom.Region`, optional 

616 Region that all region columns in all returned rows must overlap. 

617 managers : `RegistryManagers` 

618 Struct containing the `Registry` manager helper objects, to be 

619 forwarded to the `Query` constructor. 

620 """ 

621 def __init__(self, *, 

622 simpleQuery: SimpleQuery, 

623 columns: QueryColumns, 

624 uniqueness: DirectQueryUniqueness, 

625 graph: DimensionGraph, 

626 whereRegion: Optional[Region], 

627 managers: RegistryManagers): 

628 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

629 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql" 

630 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns." 

631 self._simpleQuery = simpleQuery 

632 self._columns = columns 

633 self._uniqueness = uniqueness 

634 

635 def isUnique(self) -> bool: 

636 # Docstring inherited from Query. 

637 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE 

638 

639 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

640 # Docstring inherited from Query. 

641 return self._columns.getKeyColumn(name).label(name) 

642 

643 @property 

644 def spatial(self) -> Iterator[DimensionElement]: 

645 # Docstring inherited from Query. 

646 return iter(self._columns.regions) 

647 

648 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

649 # Docstring inherited from Query. 

650 return self._columns.regions[name].column.label(f"{name}_region") 

651 

652 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

653 # Docstring inherited from Query. 

654 base = self._columns.datasets 

655 if base is None: 

656 return None 

657 ingestDate = base.ingestDate 

658 if ingestDate is not None: 

659 ingestDate = ingestDate.label("ingest_date") 

660 return DatasetQueryColumns( 

661 datasetType=base.datasetType, 

662 id=base.id.label("dataset_id"), 

663 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()), 

664 ingestDate=ingestDate, 

665 ) 

666 

667 @property 

668 def sql(self) -> sqlalchemy.sql.FromClause: 

669 # Docstring inherited from Query. 

670 simpleQuery = self._simpleQuery.copy() 

671 for dimension in self.graph: 

672 simpleQuery.columns.append(self.getDimensionColumn(dimension.name)) 

673 for element in self.spatial: 

674 simpleQuery.columns.append(self.getRegionColumn(element.name)) 

675 datasetColumns = self.getDatasetColumns() 

676 if datasetColumns is not None: 

677 simpleQuery.columns.extend(datasetColumns) 

678 sql = simpleQuery.combine() 

679 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT: 

680 return sql.distinct() 

681 else: 

682 return sql 

683 

684 def subset(self, *, graph: Optional[DimensionGraph] = None, 

685 datasets: bool = True, 

686 unique: bool = False) -> Query: 

687 # Docstring inherited from Query. 

688 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

689 if columns is None: 

690 return self 

691 if columns.isEmpty(): 

692 return EmptyQuery(self.graph.universe, self.managers) 

693 return DirectQuery( 

694 simpleQuery=self._simpleQuery.copy(), 

695 columns=columns, 

696 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

697 graph=graph, 

698 whereRegion=self.whereRegion if not unique else None, 

699 managers=self.managers, 

700 ) 

701 

702 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

703 # Docstring inherited from Query. 

704 from ._builder import QueryBuilder 

705 if summary is None: 

706 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

707 if not summary.requested.issubset(self.graph): 

708 raise NotImplementedError( 

709 f"Query.makeBuilder does not yet support augmenting dimensions " 

710 f"({summary.requested.dimensions}) beyond those originally included in the query " 

711 f"({self.graph.dimensions})." 

712 ) 

713 builder = QueryBuilder(summary, managers=self.managers) 

714 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions, 

715 datasets=self.getDatasetColumns()) 

716 return builder 

717 

718 

719class MaterializedQuery(Query): 

720 """A `Query` implementation that represents query results saved in a 

721 temporary table. 

722 

723 `MaterializedQuery` instances should not be constructed directly; use 

724 `Query.materialize()` instead. 

725 

726 Parameters 

727 ---------- 

728 table : `sqlalchemy.schema.Table` 

729 SQLAlchemy object represnting the temporary table. 

730 spatial : `Iterable` [ `DimensionElement` ] 

731 Spatial dimension elements whose regions must overlap for each valid 

732 result row (which may reject some rows that are in the table). 

733 datasetType : `DatasetType` 

734 The `DatasetType` of datasets returned by this query, or `None` 

735 if there are no dataset results 

736 isUnique : `bool` 

737 If `True`, the table's rows are unique, and there is no need to 

738 add ``SELECT DISTINCT`` to gaurantee this in results. 

739 graph : `DimensionGraph` 

740 Dimensions included in the columns of this table. 

741 whereRegion : `Region` or `None` 

742 A spatial region all result-row regions must overlap to be valid (which 

743 may reject some rows that are in the table). 

744 managers : `RegistryManagers` 

745 A struct containing `Registry` manager helper objects, forwarded to 

746 the `Query` constructor. 

747 """ 

748 def __init__(self, *, 

749 table: sqlalchemy.schema.Table, 

750 spatial: Iterable[DimensionElement], 

751 datasetType: Optional[DatasetType], 

752 isUnique: bool, 

753 graph: DimensionGraph, 

754 whereRegion: Optional[Region], 

755 managers: RegistryManagers): 

756 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

757 self._table = table 

758 self._spatial = tuple(spatial) 

759 self._datasetType = datasetType 

760 self._isUnique = isUnique 

761 

762 def isUnique(self) -> bool: 

763 # Docstring inherited from Query. 

764 return self._isUnique 

765 

766 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

767 # Docstring inherited from Query. 

768 return self._table.columns[name] 

769 

770 @property 

771 def spatial(self) -> Iterator[DimensionElement]: 

772 # Docstring inherited from Query. 

773 return iter(self._spatial) 

774 

775 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

776 # Docstring inherited from Query. 

777 return self._table.columns[f"{name}_region"] 

778 

779 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

780 # Docstring inherited from Query. 

781 if self._datasetType is not None: 

782 return DatasetQueryColumns( 

783 datasetType=self._datasetType, 

784 id=self._table.columns["dataset_id"], 

785 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()], 

786 ingestDate=None, 

787 ) 

788 else: 

789 return None 

790 

791 @property 

792 def sql(self) -> sqlalchemy.sql.FromClause: 

793 # Docstring inherited from Query. 

794 return self._table.select() 

795 

796 @contextmanager 

797 def materialize(self, db: Database) -> Iterator[Query]: 

798 # Docstring inherited from Query. 

799 yield self 

800 

801 def subset(self, *, graph: Optional[DimensionGraph] = None, 

802 datasets: bool = True, 

803 unique: bool = False) -> Query: 

804 # Docstring inherited from Query. 

805 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

806 if columns is None: 

807 return self 

808 if columns.isEmpty(): 

809 return EmptyQuery(self.graph.universe, managers=self.managers) 

810 simpleQuery = SimpleQuery() 

811 simpleQuery.join(self._table) 

812 return DirectQuery( 

813 simpleQuery=simpleQuery, 

814 columns=columns, 

815 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

816 graph=graph, 

817 whereRegion=self.whereRegion if not unique else None, 

818 managers=self.managers, 

819 ) 

820 

821 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

822 # Docstring inherited from Query. 

823 from ._builder import QueryBuilder 

824 if summary is None: 

825 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

826 if not summary.requested.issubset(self.graph): 

827 raise NotImplementedError( 

828 f"Query.makeBuilder does not yet support augmenting dimensions " 

829 f"({summary.requested.dimensions}) beyond those originally included in the query " 

830 f"({self.graph.dimensions})." 

831 ) 

832 builder = QueryBuilder(summary, managers=self.managers) 

833 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns()) 

834 return builder 

835 

836 

837class EmptyQuery(Query): 

838 """A `Query` implementation that handes the special case where the query 

839 would have no columns. 

840 

841 Parameters 

842 ---------- 

843 universe : `DimensionUniverse` 

844 Set of all dimensions from which the null set is extracted. 

845 managers : `RegistryManagers` 

846 A struct containing the registry manager instances used by the query 

847 system. 

848 """ 

849 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers): 

850 super().__init__(graph=universe.empty, whereRegion=None, managers=managers) 

851 

852 def isUnique(self) -> bool: 

853 # Docstring inherited from Query. 

854 return True 

855 

856 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

857 # Docstring inherited from Query. 

858 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).") 

859 

860 @property 

861 def spatial(self) -> Iterator[DimensionElement]: 

862 # Docstring inherited from Query. 

863 return iter(()) 

864 

865 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

866 # Docstring inherited from Query. 

867 raise KeyError(f"No region for {name} in query (no regions at all, actually).") 

868 

869 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

870 # Docstring inherited from Query. 

871 return None 

872 

873 def rows(self, db: Database, *, region: Optional[Region] = None 

874 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

875 yield None 

876 

877 @property 

878 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

879 # Docstring inherited from Query. 

880 return None 

881 

882 @contextmanager 

883 def materialize(self, db: Database) -> Iterator[Query]: 

884 # Docstring inherited from Query. 

885 yield self 

886 

887 def subset(self, *, graph: Optional[DimensionGraph] = None, 

888 datasets: bool = True, 

889 unique: bool = False) -> Query: 

890 # Docstring inherited from Query. 

891 assert graph is None or graph.issubset(self.graph) 

892 return self 

893 

894 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

895 # Docstring inherited from Query. 

896 from ._builder import QueryBuilder 

897 if summary is None: 

898 summary = QuerySummary(self.graph) 

899 if not summary.requested.issubset(self.graph): 

900 raise NotImplementedError( 

901 f"Query.makeBuilder does not yet support augmenting dimensions " 

902 f"({summary.requested.dimensions}) beyond those originally included in the query " 

903 f"({self.graph.dimensions})." 

904 ) 

905 return QueryBuilder(summary, managers=self.managers)