Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("Query",) 

24 

25from abc import ABC, abstractmethod 

26from contextlib import contextmanager 

27import enum 

28import itertools 

29from typing import ( 

30 Callable, 

31 Iterable, 

32 Iterator, 

33 Mapping, 

34 Optional, 

35 Tuple, 

36 TYPE_CHECKING, 

37) 

38 

39import sqlalchemy 

40 

41from lsst.sphgeom import Region 

42 

43from ...core import ( 

44 addDimensionForeignKey, 

45 DataCoordinate, 

46 DatasetRef, 

47 DatasetType, 

48 ddl, 

49 Dimension, 

50 DimensionElement, 

51 DimensionGraph, 

52 DimensionRecord, 

53 DimensionUniverse, 

54 SpatialRegionDatabaseRepresentation, 

55 SimpleQuery, 

56) 

57from ..interfaces import Database 

58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

59 

60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 from ._builder import QueryBuilder 

62 

63 

64class Query(ABC): 

65 """An abstract base class for queries that return some combination of 

66 `DatasetRef` and `DataCoordinate` objects. 

67 

68 Parameters 

69 ---------- 

70 graph : `DimensionGraph` 

71 Object describing the dimensions included in the query. 

72 whereRegion : `lsst.sphgeom.Region`, optional 

73 Region that all region columns in all returned rows must overlap. 

74 managers : `RegistryManagers` 

75 A struct containing the registry manager instances used by the query 

76 system. 

77 

78 Notes 

79 ----- 

80 The `Query` hierarchy abstracts over the database/SQL representation of a 

81 particular set of data IDs or datasets. It is expected to be used as a 

82 backend for other objects that provide more natural interfaces for one or 

83 both of these, not as part of a public interface to query results. 

84 """ 

85 def __init__(self, *, 

86 graph: DimensionGraph, 

87 whereRegion: Optional[Region], 

88 managers: RegistryManagers, 

89 ): 

90 self.graph = graph 

91 self.whereRegion = whereRegion 

92 self.managers = managers 

93 

94 @abstractmethod 

95 def isUnique(self) -> bool: 

96 """Return `True` if this query's rows are guaranteed to be unique, and 

97 `False` otherwise. 

98 

99 If this query has dataset results (`datasetType` is not `None`), 

100 uniqueness applies to the `DatasetRef` instances returned by 

101 `extractDatasetRef` from the result of `rows`. If it does not have 

102 dataset results, uniqueness applies to the `DataCoordinate` instances 

103 returned by `extractDataId`. 

104 """ 

105 raise NotImplementedError() 

106 

107 @abstractmethod 

108 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

109 """Return the query column that contains the primary key value for 

110 the dimension with the given name. 

111 

112 Parameters 

113 ---------- 

114 name : `str` 

115 Name of the dimension. 

116 

117 Returns 

118 ------- 

119 column : `sqlalchemy.sql.ColumnElement`. 

120 SQLAlchemy object representing a column in the query. 

121 

122 Notes 

123 ----- 

124 This method is intended primarily as a hook for subclasses to implement 

125 and the ABC to call in order to provide higher-level functionality; 

126 code that uses `Query` objects (but does not implement one) should 

127 usually not have to call this method. 

128 """ 

129 raise NotImplementedError() 

130 

131 @property 

132 @abstractmethod 

133 def spatial(self) -> Iterator[DimensionElement]: 

134 """An iterator over the dimension element columns used in post-query 

135 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]). 

136 

137 Notes 

138 ----- 

139 This property is intended primarily as a hook for subclasses to 

140 implement and the ABC to call in order to provide higher-level 

141 functionality; code that uses `Query` objects (but does not implement 

142 one) should usually not have to access this property. 

143 """ 

144 raise NotImplementedError() 

145 

146 @abstractmethod 

147 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

148 """Return a region column for one of the dimension elements iterated 

149 over by `spatial`. 

150 

151 Parameters 

152 ---------- 

153 name : `str` 

154 Name of the element. 

155 

156 Returns 

157 ------- 

158 column : `sqlalchemy.sql.ColumnElement` 

159 SQLAlchemy representing a result column in the query. 

160 

161 Notes 

162 ----- 

163 This method is intended primarily as a hook for subclasses to implement 

164 and the ABC to call in order to provide higher-level functionality; 

165 code that uses `Query` objects (but does not implement one) should 

166 usually not have to call this method. 

167 """ 

168 raise NotImplementedError() 

169 

170 @property 

171 def datasetType(self) -> Optional[DatasetType]: 

172 """The `DatasetType` of datasets returned by this query, or `None` 

173 if there are no dataset results (`DatasetType` or `None`). 

174 """ 

175 cols = self.getDatasetColumns() 

176 if cols is None: 

177 return None 

178 return cols.datasetType 

179 

180 @abstractmethod 

181 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

182 """Return the columns for the datasets returned by this query. 

183 

184 Returns 

185 ------- 

186 columns : `DatasetQueryColumns` or `None` 

187 Struct containing SQLAlchemy representations of the result columns 

188 for a dataset. 

189 

190 Notes 

191 ----- 

192 This method is intended primarily as a hook for subclasses to implement 

193 and the ABC to call in order to provide higher-level functionality; 

194 code that uses `Query` objects (but does not implement one) should 

195 usually not have to call this method. 

196 """ 

197 raise NotImplementedError() 

198 

199 @property 

200 @abstractmethod 

201 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

202 """A SQLAlchemy object representing the full query 

203 (`sqlalchemy.sql.FromClause` or `None`). 

204 

205 This is `None` in the special case where the query has no columns, and 

206 only one logical row. 

207 """ 

208 raise NotImplementedError() 

209 

210 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]: 

211 """Return a callable that can perform extra Python-side filtering of 

212 query results. 

213 

214 To get the expected results from a query, the returned predicate *must* 

215 be used to ignore rows for which it returns `False`; this permits the 

216 `QueryBuilder` implementation to move logic from the database to Python 

217 without changing the public interface. 

218 

219 Parameters 

220 ---------- 

221 region : `sphgeom.Region`, optional 

222 A region that any result-row regions must overlap in order for the 

223 predicate to return `True`. If not provided, this will be 

224 ``self.whereRegion``, if that exists. 

225 

226 Returns 

227 ------- 

228 func : `Callable` 

229 A callable that takes a single `sqlalchemy.engine.RowProxy` 

230 argmument and returns `bool`. 

231 """ 

232 whereRegion = region if region is not None else self.whereRegion 

233 

234 def closure(row: sqlalchemy.engine.RowProxy) -> bool: 

235 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial] 

236 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions): 

237 return False 

238 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)) 

239 

240 return closure 

241 

242 def rows(self, db: Database, *, region: Optional[Region] = None 

243 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

244 """Execute the query and yield result rows, applying `predicate`. 

245 

246 Parameters 

247 ---------- 

248 region : `sphgeom.Region`, optional 

249 A region that any result-row regions must overlap in order to be 

250 yielded. If not provided, this will be ``self.whereRegion``, if 

251 that exists. 

252 

253 Yields 

254 ------ 

255 row : `sqlalchemy.engine.RowProxy` or `None` 

256 Result row from the query. `None` may yielded exactly once instead 

257 of any real rows to indicate an empty query (see `EmptyQuery`). 

258 """ 

259 predicate = self.predicate(region) 

260 for row in db.query(self.sql): 

261 if predicate(row): 

262 yield row 

263 

264 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy], 

265 dimensions: Iterable[Dimension]) -> tuple: 

266 """Extract a tuple of data ID values from a result row. 

267 

268 Parameters 

269 ---------- 

270 row : `sqlalchemy.engine.RowProxy` or `None` 

271 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

272 the row from an `EmptyQuery`. 

273 dimensions : `Iterable` [ `Dimension` ] 

274 The dimensions to include in the returned tuple, in order. 

275 

276 Returns 

277 ------- 

278 values : `tuple` 

279 A tuple of dimension primary key values. 

280 """ 

281 if row is None: 

282 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions." 

283 return () 

284 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions) 

285 

286 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *, 

287 graph: Optional[DimensionGraph] = None, 

288 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

289 ) -> DataCoordinate: 

290 """Extract a data ID from a result row. 

291 

292 Parameters 

293 ---------- 

294 row : `sqlalchemy.engine.RowProxy` or `None` 

295 A result row from a SQLAlchemy SELECT query, or `None` to indicate 

296 the row from an `EmptyQuery`. 

297 graph : `DimensionGraph`, optional 

298 The dimensions the returned data ID should identify. If not 

299 provided, this will be all dimensions in `QuerySummary.requested`. 

300 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

301 Nested mapping containing records to attach to the returned 

302 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will 

303 return `True`. If provided, outer keys must include all dimension 

304 element names in ``graph``, and inner keys should be tuples of 

305 dimension primary key values in the same order as 

306 ``element.graph.required``. If not provided, 

307 `DataCoordinate.hasRecords` will return `False` on the returned 

308 object. 

309 

310 Returns 

311 ------- 

312 dataId : `DataCoordinate` 

313 A data ID that identifies all required and implied dimensions. If 

314 ``records is not None``, this is have 

315 `~DataCoordinate.hasRecords()` return `True`. 

316 """ 

317 if graph is None: 

318 graph = self.graph 

319 if not graph: 

320 return DataCoordinate.makeEmpty(self.graph.universe) 

321 dataId = DataCoordinate.fromFullValues( 

322 graph, 

323 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied)) 

324 ) 

325 if records is not None: 

326 recordsForRow = {} 

327 for element in graph.elements: 

328 key = tuple(dataId.subset(element.graph).values()) 

329 recordsForRow[element.name] = records[element.name].get(key) 

330 return dataId.expanded(recordsForRow) 

331 else: 

332 return dataId 

333 

334 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy, 

335 dataId: Optional[DataCoordinate] = None, 

336 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None, 

337 ) -> DatasetRef: 

338 """Extract a `DatasetRef` from a result row. 

339 

340 Parameters 

341 ---------- 

342 row : `sqlalchemy.engine.RowProxy` 

343 A result row from a SQLAlchemy SELECT query. 

344 dataId : `DataCoordinate` 

345 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class) 

346 `DataCoordinate` is constructed from ``row`` if `None`. 

347 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ] 

348 Records to use to return an `ExpandedDataCoordinate`. If provided, 

349 outer keys must include all dimension element names in ``graph``, 

350 and inner keys should be tuples of dimension primary key values 

351 in the same order as ``element.graph.required``. 

352 

353 Returns 

354 ------- 

355 ref : `DatasetRef` 

356 Reference to the dataset; guaranteed to have `DatasetRef.id` not 

357 `None`. 

358 """ 

359 datasetColumns = self.getDatasetColumns() 

360 assert datasetColumns is not None 

361 if dataId is None: 

362 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records) 

363 runRecord = self.managers.collections[row[datasetColumns.runKey]] 

364 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name) 

365 

366 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec: 

367 """Helper method for subclass implementations of `materialize`. 

368 

369 Parameters 

370 ---------- 

371 constraints : `bool`, optional 

372 If `True` (`False` is default), define a specification that 

373 includes actual foreign key constraints for logical foreign keys. 

374 Some database engines do not permit temporary tables to reference 

375 normal tables, so this should be `False` when generating a spec 

376 for a temporary table unless the database engine is known to 

377 support them. 

378 

379 Returns 

380 ------- 

381 spec : `ddl.TableSpec` 

382 Specification for a table that could hold this query's result rows. 

383 """ 

384 unique = self.isUnique() 

385 spec = ddl.TableSpec(fields=()) 

386 for dimension in self.graph: 

387 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints) 

388 for element in self.spatial: 

389 spec.fields.update( 

390 SpatialRegionDatabaseRepresentation.makeFieldSpecs( 

391 nullable=True, 

392 name=f"{element.name}_region", 

393 ) 

394 ) 

395 datasetColumns = self.getDatasetColumns() 

396 if datasetColumns is not None: 

397 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints) 

398 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints) 

399 return spec 

400 

401 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None, 

402 datasets: bool = True, 

403 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]: 

404 """Helper method for subclass implementations of `subset`. 

405 

406 Parameters 

407 ---------- 

408 graph : `DimensionGraph`, optional 

409 Dimensions to include in the new `Query` being constructed. 

410 ``subset`` implementations should generally just forward their 

411 own ``graph`` argument here. 

412 datasets : `bool`, optional 

413 Whether the new `Query` should include dataset results. Defaults 

414 to `True`, but is ignored if ``self`` does not include dataset 

415 results. 

416 unique : `bool`, optional 

417 Whether the new `Query` should guarantee unique results (this may 

418 come with a performance penalty). 

419 

420 Returns 

421 ------- 

422 graph : `DimensionGraph` 

423 The dimensions of the new `Query`. This is exactly the same as 

424 the argument of the same name, with ``self.graph`` used if that 

425 argument is `None`. 

426 columns : `QueryColumns` or `None` 

427 A struct containing the SQLAlchemy column objects to use in the 

428 new query, contructed by delegating to other (mostly abstract) 

429 methods on ``self``. If `None`, `subset` may return ``self``. 

430 """ 

431 if graph is None: 

432 graph = self.graph 

433 if (graph == self.graph and (self.getDatasetColumns() is None or datasets) 

434 and (self.isUnique() or not unique)): 

435 return graph, None 

436 columns = QueryColumns() 

437 for dimension in graph.dimensions: 

438 col = self.getDimensionColumn(dimension.name) 

439 columns.keys[dimension] = [col] 

440 if not unique: 

441 for element in self.spatial: 

442 col = self.getRegionColumn(element.name) 

443 columns.regions[element] = col 

444 if datasets and self.getDatasetColumns() is not None: 

445 columns.datasets = self.getDatasetColumns() 

446 return graph, columns 

447 

448 @contextmanager 

449 def materialize(self, db: Database) -> Iterator[Query]: 

450 """Execute this query and insert its results into a temporary table. 

451 

452 Parameters 

453 ---------- 

454 db : `Database` 

455 Database engine to execute the query against. 

456 

457 Returns 

458 ------- 

459 context : `typing.ContextManager` [ `MaterializedQuery` ] 

460 A context manager that ensures the temporary table is created and 

461 populated in ``__enter__`` (returning a `MaterializedQuery` object 

462 backed by that table), and dropped in ``__exit__``. If ``self`` 

463 is already a `MaterializedQuery`, ``__enter__`` may just return 

464 ``self`` and ``__exit__`` may do nothing (reflecting the fact that 

465 an outer context manager should already take care of everything 

466 else). 

467 """ 

468 spec = self._makeTableSpec() 

469 with db.session() as session: 

470 table = session.makeTemporaryTable(spec) 

471 db.insert(table, select=self.sql, names=spec.fields.names) 

472 yield MaterializedQuery(table=table, 

473 spatial=self.spatial, 

474 datasetType=self.datasetType, 

475 isUnique=self.isUnique(), 

476 graph=self.graph, 

477 whereRegion=self.whereRegion, 

478 managers=self.managers) 

479 session.dropTemporaryTable(table) 

480 

481 @abstractmethod 

482 def subset(self, *, graph: Optional[DimensionGraph] = None, 

483 datasets: bool = True, 

484 unique: bool = False) -> Query: 

485 """Return a new `Query` whose columns and/or rows are (mostly) subset 

486 of this one's. 

487 

488 Parameters 

489 ---------- 

490 graph : `DimensionGraph`, optional 

491 Dimensions to include in the new `Query` being constructed. 

492 If `None` (default), ``self.graph`` is used. 

493 datasets : `bool`, optional 

494 Whether the new `Query` should include dataset results. Defaults 

495 to `True`, but is ignored if ``self`` does not include dataset 

496 results. 

497 unique : `bool`, optional 

498 Whether the new `Query` should guarantee unique results (this may 

499 come with a performance penalty). 

500 

501 Returns 

502 ------- 

503 query : `Query` 

504 A query object corresponding to the given inputs. May be ``self`` 

505 if no changes were requested. 

506 

507 Notes 

508 ----- 

509 The way spatial overlaps are handled at present makes it impossible to 

510 fully guarantee in general that the new query's rows are a subset of 

511 this one's while also returning unique rows. That's because the 

512 database is only capable of performing approximate, conservative 

513 overlaps via the common skypix system; we defer actual region overlap 

514 operations to per-result-row Python logic. But including the region 

515 columns necessary to do that postprocessing in the query makes it 

516 impossible to do a SELECT DISTINCT on the user-visible dimensions of 

517 the query. For example, consider starting with a query with dimensions 

518 (instrument, skymap, visit, tract). That involves a spatial join 

519 between visit and tract, and we include the region columns from both 

520 tables in the results in order to only actually yield result rows 

521 (see `predicate` and `rows`) where the regions in those two columns 

522 overlap. If the user then wants to subset to just (skymap, tract) with 

523 unique results, we have two unpalatable options: 

524 

525 - we can do a SELECT DISTINCT with just the skymap and tract columns 

526 in the SELECT clause, dropping all detailed overlap information and 

527 including some tracts that did not actually overlap any of the 

528 visits in the original query (but were regarded as _possibly_ 

529 overlapping via the coarser, common-skypix relationships); 

530 

531 - we can include the tract and visit region columns in the query, and 

532 continue to filter out the non-overlapping pairs, but completely 

533 disregard the user's request for unique tracts. 

534 

535 This interface specifies that implementations must do the former, as 

536 that's what makes things efficient in our most important use case 

537 (``QuantumGraph`` generation in ``pipe_base``). We may be able to 

538 improve this situation in the future by putting exact overlap 

539 information in the database, either by using built-in (but 

540 engine-specific) spatial database functionality or (more likely) 

541 switching to a scheme in which pairwise dimension spatial relationships 

542 are explicitly precomputed (for e.g. combinations of instruments and 

543 skymaps). 

544 """ 

545 raise NotImplementedError() 

546 

547 @abstractmethod 

548 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

549 """Return a `QueryBuilder` that can be used to construct a new `Query` 

550 that is joined to (and hence constrained by) this one. 

551 

552 Parameters 

553 ---------- 

554 summary : `QuerySummary`, optional 

555 A `QuerySummary` instance that specifies the dimensions and any 

556 additional constraints to include in the new query being 

557 constructed, or `None` to use the dimensions of ``self`` with no 

558 additional constraints. 

559 """ 

560 raise NotImplementedError() 

561 

562 graph: DimensionGraph 

563 """The dimensions identified by this query and included in any data IDs 

564 created from its result rows (`DimensionGraph`). 

565 """ 

566 

567 whereRegion: Optional[Region] 

568 """A spatial region that all regions in all rows returned by this query 

569 must overlap (`lsst.sphgeom.Region` or `None`). 

570 """ 

571 

572 managers: RegistryManagers 

573 """A struct containing `Registry` helper object (`RegistryManagers`). 

574 """ 

575 

576 

577class DirectQueryUniqueness(enum.Enum): 

578 """An enum representing the ways in which a query can have unique rows (or 

579 not). 

580 """ 

581 

582 NOT_UNIQUE = enum.auto() 

583 """The query is not expected to have unique rows. 

584 """ 

585 

586 NATURALLY_UNIQUE = enum.auto() 

587 """The construction of the query guarantees that it will have unique 

588 result rows, even without SELECT DISTINCT or a GROUP BY clause. 

589 """ 

590 

591 NEEDS_DISTINCT = enum.auto() 

592 """The query is expected to yield unique result rows, and needs to use 

593 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this. 

594 """ 

595 

596 

597class DirectQuery(Query): 

598 """A `Query` implementation that represents a direct SELECT query that 

599 usually joins many tables. 

600 

601 `DirectQuery` objects should generally only be constructed by 

602 `QueryBuilder` or the methods of other `Query` objects. 

603 

604 Parameters 

605 ---------- 

606 simpleQuery : `SimpleQuery` 

607 Struct representing the actual SELECT, FROM, and WHERE clauses. 

608 columns : `QueryColumns` 

609 Columns that are referenced in the query in any clause. 

610 uniqueness : `DirectQueryUniqueness` 

611 Enum value indicating whether the query should yield unique result 

612 rows, and if so whether that needs to be explicitly requested of the 

613 database. 

614 graph : `DimensionGraph` 

615 Object describing the dimensions included in the query. 

616 whereRegion : `lsst.sphgeom.Region`, optional 

617 Region that all region columns in all returned rows must overlap. 

618 managers : `RegistryManagers` 

619 Struct containing the `Registry` manager helper objects, to be 

620 forwarded to the `Query` constructor. 

621 """ 

622 def __init__(self, *, 

623 simpleQuery: SimpleQuery, 

624 columns: QueryColumns, 

625 uniqueness: DirectQueryUniqueness, 

626 graph: DimensionGraph, 

627 whereRegion: Optional[Region], 

628 managers: RegistryManagers): 

629 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

630 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql" 

631 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns." 

632 self._simpleQuery = simpleQuery 

633 self._columns = columns 

634 self._uniqueness = uniqueness 

635 

636 def isUnique(self) -> bool: 

637 # Docstring inherited from Query. 

638 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE 

639 

640 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

641 # Docstring inherited from Query. 

642 return self._columns.getKeyColumn(name).label(name) 

643 

644 @property 

645 def spatial(self) -> Iterator[DimensionElement]: 

646 # Docstring inherited from Query. 

647 return iter(self._columns.regions) 

648 

649 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

650 # Docstring inherited from Query. 

651 return self._columns.regions[name].column.label(f"{name}_region") 

652 

653 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

654 # Docstring inherited from Query. 

655 base = self._columns.datasets 

656 if base is None: 

657 return None 

658 ingestDate = base.ingestDate 

659 if ingestDate is not None: 

660 ingestDate = ingestDate.label("ingest_date") 

661 return DatasetQueryColumns( 

662 datasetType=base.datasetType, 

663 id=base.id.label("dataset_id"), 

664 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()), 

665 ingestDate=ingestDate, 

666 ) 

667 

668 @property 

669 def sql(self) -> sqlalchemy.sql.FromClause: 

670 # Docstring inherited from Query. 

671 simpleQuery = self._simpleQuery.copy() 

672 for dimension in self.graph: 

673 simpleQuery.columns.append(self.getDimensionColumn(dimension.name)) 

674 for element in self.spatial: 

675 simpleQuery.columns.append(self.getRegionColumn(element.name)) 

676 datasetColumns = self.getDatasetColumns() 

677 if datasetColumns is not None: 

678 simpleQuery.columns.extend(datasetColumns) 

679 sql = simpleQuery.combine() 

680 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT: 

681 return sql.distinct() 

682 else: 

683 return sql 

684 

685 def subset(self, *, graph: Optional[DimensionGraph] = None, 

686 datasets: bool = True, 

687 unique: bool = False) -> Query: 

688 # Docstring inherited from Query. 

689 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

690 if columns is None: 

691 return self 

692 if columns.isEmpty(): 

693 return EmptyQuery(self.graph.universe, self.managers) 

694 return DirectQuery( 

695 simpleQuery=self._simpleQuery.copy(), 

696 columns=columns, 

697 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

698 graph=graph, 

699 whereRegion=self.whereRegion if not unique else None, 

700 managers=self.managers, 

701 ) 

702 

703 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

704 # Docstring inherited from Query. 

705 from ._builder import QueryBuilder 

706 if summary is None: 

707 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

708 if not summary.requested.issubset(self.graph): 

709 raise NotImplementedError( 

710 f"Query.makeBuilder does not yet support augmenting dimensions " 

711 f"({summary.requested.dimensions}) beyond those originally included in the query " 

712 f"({self.graph.dimensions})." 

713 ) 

714 builder = QueryBuilder(summary, managers=self.managers) 

715 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions, 

716 datasets=self.getDatasetColumns()) 

717 return builder 

718 

719 

720class MaterializedQuery(Query): 

721 """A `Query` implementation that represents query results saved in a 

722 temporary table. 

723 

724 `MaterializedQuery` instances should not be constructed directly; use 

725 `Query.materialize()` instead. 

726 

727 Parameters 

728 ---------- 

729 table : `sqlalchemy.schema.Table` 

730 SQLAlchemy object represnting the temporary table. 

731 spatial : `Iterable` [ `DimensionElement` ] 

732 Spatial dimension elements whose regions must overlap for each valid 

733 result row (which may reject some rows that are in the table). 

734 datasetType : `DatasetType` 

735 The `DatasetType` of datasets returned by this query, or `None` 

736 if there are no dataset results 

737 isUnique : `bool` 

738 If `True`, the table's rows are unique, and there is no need to 

739 add ``SELECT DISTINCT`` to gaurantee this in results. 

740 graph : `DimensionGraph` 

741 Dimensions included in the columns of this table. 

742 whereRegion : `Region` or `None` 

743 A spatial region all result-row regions must overlap to be valid (which 

744 may reject some rows that are in the table). 

745 managers : `RegistryManagers` 

746 A struct containing `Registry` manager helper objects, forwarded to 

747 the `Query` constructor. 

748 """ 

749 def __init__(self, *, 

750 table: sqlalchemy.schema.Table, 

751 spatial: Iterable[DimensionElement], 

752 datasetType: Optional[DatasetType], 

753 isUnique: bool, 

754 graph: DimensionGraph, 

755 whereRegion: Optional[Region], 

756 managers: RegistryManagers): 

757 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers) 

758 self._table = table 

759 self._spatial = tuple(spatial) 

760 self._datasetType = datasetType 

761 self._isUnique = isUnique 

762 

763 def isUnique(self) -> bool: 

764 # Docstring inherited from Query. 

765 return self._isUnique 

766 

767 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

768 # Docstring inherited from Query. 

769 return self._table.columns[name] 

770 

771 @property 

772 def spatial(self) -> Iterator[DimensionElement]: 

773 # Docstring inherited from Query. 

774 return iter(self._spatial) 

775 

776 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

777 # Docstring inherited from Query. 

778 return self._table.columns[f"{name}_region"] 

779 

780 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

781 # Docstring inherited from Query. 

782 if self._datasetType is not None: 

783 return DatasetQueryColumns( 

784 datasetType=self._datasetType, 

785 id=self._table.columns["dataset_id"], 

786 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()], 

787 ingestDate=None, 

788 ) 

789 else: 

790 return None 

791 

792 @property 

793 def sql(self) -> sqlalchemy.sql.FromClause: 

794 # Docstring inherited from Query. 

795 return self._table.select() 

796 

797 @contextmanager 

798 def materialize(self, db: Database) -> Iterator[Query]: 

799 # Docstring inherited from Query. 

800 yield self 

801 

802 def subset(self, *, graph: Optional[DimensionGraph] = None, 

803 datasets: bool = True, 

804 unique: bool = False) -> Query: 

805 # Docstring inherited from Query. 

806 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique) 

807 if columns is None: 

808 return self 

809 if columns.isEmpty(): 

810 return EmptyQuery(self.graph.universe, managers=self.managers) 

811 simpleQuery = SimpleQuery() 

812 simpleQuery.join(self._table) 

813 return DirectQuery( 

814 simpleQuery=simpleQuery, 

815 columns=columns, 

816 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE, 

817 graph=graph, 

818 whereRegion=self.whereRegion if not unique else None, 

819 managers=self.managers, 

820 ) 

821 

822 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

823 # Docstring inherited from Query. 

824 from ._builder import QueryBuilder 

825 if summary is None: 

826 summary = QuerySummary(self.graph, whereRegion=self.whereRegion) 

827 if not summary.requested.issubset(self.graph): 

828 raise NotImplementedError( 

829 f"Query.makeBuilder does not yet support augmenting dimensions " 

830 f"({summary.requested.dimensions}) beyond those originally included in the query " 

831 f"({self.graph.dimensions})." 

832 ) 

833 builder = QueryBuilder(summary, managers=self.managers) 

834 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns()) 

835 return builder 

836 

837 

838class EmptyQuery(Query): 

839 """A `Query` implementation that handes the special case where the query 

840 would have no columns. 

841 

842 Parameters 

843 ---------- 

844 universe : `DimensionUniverse` 

845 Set of all dimensions from which the null set is extracted. 

846 managers : `RegistryManagers` 

847 A struct containing the registry manager instances used by the query 

848 system. 

849 """ 

850 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers): 

851 super().__init__(graph=universe.empty, whereRegion=None, managers=managers) 

852 

853 def isUnique(self) -> bool: 

854 # Docstring inherited from Query. 

855 return True 

856 

857 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

858 # Docstring inherited from Query. 

859 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).") 

860 

861 @property 

862 def spatial(self) -> Iterator[DimensionElement]: 

863 # Docstring inherited from Query. 

864 return iter(()) 

865 

866 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement: 

867 # Docstring inherited from Query. 

868 raise KeyError(f"No region for {name} in query (no regions at all, actually).") 

869 

870 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]: 

871 # Docstring inherited from Query. 

872 return None 

873 

874 def rows(self, db: Database, *, region: Optional[Region] = None 

875 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]: 

876 yield None 

877 

878 @property 

879 def sql(self) -> Optional[sqlalchemy.sql.FromClause]: 

880 # Docstring inherited from Query. 

881 return None 

882 

883 @contextmanager 

884 def materialize(self, db: Database) -> Iterator[Query]: 

885 # Docstring inherited from Query. 

886 yield self 

887 

888 def subset(self, *, graph: Optional[DimensionGraph] = None, 

889 datasets: bool = True, 

890 unique: bool = False) -> Query: 

891 # Docstring inherited from Query. 

892 assert graph is None or graph.issubset(self.graph) 

893 return self 

894 

895 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder: 

896 # Docstring inherited from Query. 

897 from ._builder import QueryBuilder 

898 if summary is None: 

899 summary = QuerySummary(self.graph) 

900 if not summary.requested.issubset(self.graph): 

901 raise NotImplementedError( 

902 f"Query.makeBuilder does not yet support augmenting dimensions " 

903 f"({summary.requested.dimensions}) beyond those originally included in the query " 

904 f"({self.graph.dimensions})." 

905 ) 

906 return QueryBuilder(summary, managers=self.managers)