Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 11%

183 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-26 15:15 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBuilder",) 

24 

25from collections.abc import Iterable, Set 

26from typing import Any 

27 

28import sqlalchemy.sql 

29 

30from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension 

31from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet 

32from .._collectionType import CollectionType 

33from .._exceptions import DataIdValueError 

34from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage 

35from ..wildcards import CollectionWildcard 

36from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query 

37from ._query_backend import QueryBackend 

38from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary 

39from .expressions import convertExpressionToSql 

40 

41 

42class QueryBuilder: 

43 """A builder for potentially complex queries that join tables based 

44 on dimension relationships. 

45 

46 Parameters 

47 ---------- 

48 summary : `QuerySummary` 

49 Struct organizing the dimensions involved in the query. 

50 backend : `QueryBackend` 

51 Backend object that represents the `Registry` implementation. 

52 doomed_by : `Iterable` [ `str` ], optional 

53 A list of messages (appropriate for e.g. logging or exceptions) that 

54 explain why the query is known to return no results even before it is 

55 executed. Queries with a non-empty list will never be executed. 

56 """ 

57 

58 def __init__( 

59 self, 

60 summary: QuerySummary, 

61 backend: QueryBackend, 

62 doomed_by: Iterable[str] = (), 

63 ): 

64 self.summary = summary 

65 self._backend = backend 

66 self._simpleQuery = SimpleQuery() 

67 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict() 

68 self._columns = QueryColumns() 

69 self._doomed_by = list(doomed_by) 

70 

71 self._validateGovernors() 

72 

73 def _validateGovernors(self) -> None: 

74 """Check that governor dimensions specified by query actually exist. 

75 

76 This helps to avoid mistakes in governor values. It also implements 

77 consistent failure behavior for cases when governor dimensions are 

78 specified in either DataId ow WHERE clause. 

79 

80 Raises 

81 ------ 

82 DataIdValueError 

83 Raised when governor dimension values are not found. 

84 """ 

85 for dimension, bounds in self.summary.where.governor_constraints.items(): 

86 storage = self._backend.managers.dimensions[self._backend.universe[dimension]] 

87 if isinstance(storage, GovernorDimensionRecordStorage): 

88 if not (storage.values >= bounds): 

89 raise DataIdValueError( 

90 f"Unknown values specified for governor dimension {dimension}: " 

91 f"{set(bounds - storage.values)}." 

92 ) 

93 

94 def hasDimensionKey(self, dimension: Dimension) -> bool: 

95 """Return `True` if the given dimension's primary key column has 

96 been included in the query (possibly via a foreign key column on some 

97 other table). 

98 """ 

99 return dimension in self._columns.keys 

100 

101 def joinDimensionElement(self, element: DimensionElement) -> None: 

102 """Add the table for a `DimensionElement` to the query. 

103 

104 This automatically joins the element table to all other tables in the 

105 query with which it is related, via both dimension keys and spatial 

106 and temporal relationships. 

107 

108 External calls to this method should rarely be necessary; `finish` will 

109 automatically call it if the `DimensionElement` has been identified as 

110 one that must be included. 

111 

112 Parameters 

113 ---------- 

114 element : `DimensionElement` 

115 Element for which a table should be added. The element must be 

116 associated with a database table (see `DimensionElement.hasTable`). 

117 """ 

118 assert element not in self._elements, "Element already included in query." 

119 storage = self._backend.managers.dimensions[element] 

120 fromClause = storage.join( 

121 self, 

122 regions=self._columns.regions if element in self.summary.spatial else None, 

123 timespans=self._columns.timespans if element in self.summary.temporal else None, 

124 ) 

125 self._elements[element] = fromClause 

126 

127 def joinDataset( 

128 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False 

129 ) -> bool: 

130 """Add a dataset search or constraint to the query. 

131 

132 Unlike other `QueryBuilder` join methods, this *must* be called 

133 directly to search for datasets of a particular type or constrain the 

134 query results based on the exists of datasets. However, all dimensions 

135 used to identify the dataset type must have already been included in 

136 `QuerySummary.requested` when initializing the `QueryBuilder`. 

137 

138 Parameters 

139 ---------- 

140 datasetType : `DatasetType` 

141 The type of datasets to search for. 

142 collections : `Any` 

143 An expression that fully or partially identifies the collections 

144 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

145 thereof. `...` can be used to return all collections. See 

146 :ref:`daf_butler_collection_expressions` for more information. 

147 isResult : `bool`, optional 

148 If `True` (default), include the dataset ID column in the 

149 result columns of the query, allowing complete `DatasetRef` 

150 instances to be produced from the query results for this dataset 

151 type. If `False`, the existence of datasets of this type is used 

152 only to constrain the data IDs returned by the query. 

153 `joinDataset` may be called with ``isResult=True`` at most one time 

154 on a particular `QueryBuilder` instance. 

155 findFirst : `bool`, optional 

156 If `True` (`False` is default), only include the first match for 

157 each data ID, searching the given collections in order. Requires 

158 that all entries in ``collections`` be regular strings, so there is 

159 a clear search order. Ignored if ``isResult`` is `False`. 

160 

161 Returns 

162 ------- 

163 anyRecords : `bool` 

164 If `True`, joining the dataset table was successful and the query 

165 should proceed. If `False`, we were able to determine (from the 

166 combination of ``datasetType`` and ``collections``) that there 

167 would be no results joined in from this dataset, and hence (due to 

168 the inner join that would normally be present), the full query will 

169 return no results. 

170 """ 

171 assert datasetType in self.summary.datasets 

172 collections = CollectionWildcard.from_expression(collections) 

173 if isResult and findFirst: 

174 collections.require_ordered() 

175 # If we are searching all collections with no constraints, loop over 

176 # RUN collections only, because that will include all datasets. 

177 collectionTypes: Set[CollectionType] 

178 if collections == CollectionWildcard(): 

179 collectionTypes = {CollectionType.RUN} 

180 else: 

181 collectionTypes = CollectionType.all() 

182 datasetRecordStorage = self._backend.managers.datasets.find(datasetType.name) 

183 if datasetRecordStorage is None: 

184 # Unrecognized dataset type means no results. It might be better 

185 # to raise here, but this is consistent with previous behavior, 

186 # which is expected by QuantumGraph generation code in pipe_base. 

187 self._doomed_by.append( 

188 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in " 

189 "any collection." 

190 ) 

191 return False 

192 collectionRecords: list[CollectionRecord] = [] 

193 rejections: list[str] = [] 

194 for collectionRecord in self._backend.resolve_collection_wildcard( 

195 collections, collection_types=collectionTypes 

196 ): 

197 # Only include collections that (according to collection summaries) 

198 # might have datasets of this type and governor dimensions 

199 # consistent with the query's WHERE clause. 

200 collection_summary = self._backend.managers.datasets.getCollectionSummary(collectionRecord) 

201 if not collection_summary.is_compatible_with( 

202 datasetType, 

203 self.summary.where.governor_constraints, 

204 rejections=rejections, 

205 name=collectionRecord.name, 

206 ): 

207 continue 

208 if collectionRecord.type is CollectionType.CALIBRATION: 

209 # If collection name was provided explicitly then say sorry if 

210 # this is a kind of query we don't support yet; otherwise 

211 # collection is a part of chained one or regex match and we 

212 # skip it to not break queries of other included collections. 

213 if datasetType.isCalibration(): 

214 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal: 

215 if collectionRecord.name in collections.strings: 

216 raise NotImplementedError( 

217 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type " 

218 f"collection '{collectionRecord.name}' is not yet supported." 

219 ) 

220 else: 

221 rejections.append( 

222 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection " 

223 f"{collectionRecord.name!r} because temporal calibration queries aren't " 

224 "implemented; this is not an error only because the query structure implies " 

225 "that searching this collection may be incidental." 

226 ) 

227 continue 

228 elif findFirst: 

229 if collectionRecord.name in collections.strings: 

230 raise NotImplementedError( 

231 f"Find-first query for dataset type '{datasetType.name}' in " 

232 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet " 

233 "supported." 

234 ) 

235 else: 

236 rejections.append( 

237 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection " 

238 f"{collectionRecord.name!r} because find-first calibration queries aren't " 

239 "implemented; this is not an error only because the query structure implies " 

240 "that searching this collection may be incidental." 

241 ) 

242 continue 

243 else: 

244 collectionRecords.append(collectionRecord) 

245 else: 

246 # We can never find a non-calibration dataset in a 

247 # CALIBRATION collection. 

248 rejections.append( 

249 f"Not searching for non-calibration dataset {datasetType.name!r} " 

250 f"in CALIBRATION collection {collectionRecord.name!r}." 

251 ) 

252 continue 

253 else: 

254 collectionRecords.append(collectionRecord) 

255 if isResult: 

256 if findFirst: 

257 subquery = self._build_dataset_search_subquery( 

258 datasetRecordStorage, 

259 collectionRecords, 

260 ) 

261 else: 

262 subquery = self._build_dataset_query_subquery( 

263 datasetRecordStorage, 

264 collectionRecords, 

265 ) 

266 columns = DatasetQueryColumns( 

267 datasetType=datasetType, 

268 id=subquery.columns["id"], 

269 runKey=subquery.columns[self._backend.managers.collections.getRunForeignKeyName()], 

270 ingestDate=subquery.columns["ingest_date"], 

271 ) 

272 else: 

273 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords) 

274 columns = None 

275 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns) 

276 if not collectionRecords: 

277 if rejections: 

278 self._doomed_by.extend(rejections) 

279 else: 

280 self._doomed_by.append(f"No collections to search matching expression {collections}.") 

281 return False 

282 return not self._doomed_by 

283 

284 def _build_dataset_constraint_subquery( 

285 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

286 ) -> sqlalchemy.sql.FromClause: 

287 """Internal helper method to build a dataset subquery for a parent 

288 query that does not return dataset results. 

289 

290 Parameters 

291 ---------- 

292 storage : `DatasetRecordStorage` 

293 Storage object for the dataset type the subquery is for. 

294 collections : `list` [ `CollectionRecord` ] 

295 Records for the collections to be searched. Collections with no 

296 datasets of this type or with governor dimensions incompatible with 

297 the rest of the query should already have been filtered out. 

298 `~CollectionType.CALIBRATION` collections should also be filtered 

299 out if this is a temporal query. 

300 

301 Returns 

302 ------- 

303 sql : `sqlalchemy.sql.FromClause` 

304 A SQLAlchemy aliased subquery object. Has columns for each 

305 dataset type dimension, or an unspecified column (just to prevent 

306 SQL syntax errors) where there is no data ID. 

307 """ 

308 return storage.select( 

309 *collections, 

310 dataId=SimpleQuery.Select, 

311 # If this dataset type has no dimensions, we're in danger of 

312 # generating an invalid subquery that has no columns in the 

313 # SELECT clause. An easy fix is to just select some arbitrary 

314 # column that goes unused, like the dataset ID. 

315 id=None if storage.datasetType.dimensions else SimpleQuery.Select, 

316 run=None, 

317 ingestDate=None, 

318 timespan=None, 

319 ).alias(storage.datasetType.name) 

320 

321 def _build_dataset_query_subquery( 

322 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

323 ) -> sqlalchemy.sql.FromClause: 

324 """Internal helper method to build a dataset subquery for a parent 

325 query that returns all matching dataset results. 

326 

327 Parameters 

328 ---------- 

329 storage : `DatasetRecordStorage` 

330 Storage object for the dataset type the subquery is for. 

331 collections : `list` [ `CollectionRecord` ] 

332 Records for the collections to be searched. Collections with no 

333 datasets of this type or with governor dimensions incompatible with 

334 the rest of the query should already have been filtered out. 

335 `~CollectionType.CALIBRATION` collections should also be filtered 

336 out if this is a temporal query. 

337 

338 Returns 

339 ------- 

340 sql : `sqlalchemy.sql.FromClause` 

341 A SQLAlchemy aliased subquery object. Has columns for each dataset 

342 type dimension, the dataset ID, the `~CollectionType.RUN` 

343 collection key, and the ingest date. 

344 """ 

345 sql = storage.select( 

346 *collections, 

347 dataId=SimpleQuery.Select, 

348 id=SimpleQuery.Select, 

349 run=SimpleQuery.Select, 

350 ingestDate=SimpleQuery.Select, 

351 timespan=None, 

352 ).alias(storage.datasetType.name) 

353 return sql 

354 

355 def _build_dataset_search_subquery( 

356 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

357 ) -> sqlalchemy.sql.FromClause: 

358 """Internal helper method to build a dataset subquery for a parent 

359 query that returns the first matching dataset for each data ID and 

360 dataset type name from an ordered list of collections. 

361 

362 Parameters 

363 ---------- 

364 storage : `DatasetRecordStorage` 

365 Storage object for the dataset type the subquery is for. 

366 collections : `list` [ `CollectionRecord` ] 

367 Records for the collections to be searched. Collections with no 

368 datasets of this type or with governor dimensions incompatible with 

369 the rest of the query should already have been filtered out. 

370 `~CollectionType.CALIBRATION` collections should be filtered out as 

371 well. 

372 

373 Returns 

374 ------- 

375 sql : `sqlalchemy.sql.FromClause` 

376 A SQLAlchemy aliased subquery object. Has columns for each dataset 

377 type dimension, the dataset ID, the `~CollectionType.RUN` 

378 collection key, and the ingest date. 

379 """ 

380 # Query-simplification shortcut: if there is only one collection, a 

381 # find-first search is just a regular result subquery. Same is true 

382 # if this is a doomed query with no collections to search. 

383 if len(collections) <= 1: 

384 return self._build_dataset_query_subquery(storage, collections) 

385 # In the more general case, we build a subquery of the form below to 

386 # search the collections in order. 

387 # 

388 # WITH {dst}_search AS ( 

389 # SELECT {data-id-cols}, id, run_id, 1 AS rank 

390 # FROM <collection1> 

391 # UNION ALL 

392 # SELECT {data-id-cols}, id, run_id, 2 AS rank 

393 # FROM <collection2> 

394 # UNION ALL 

395 # ... 

396 # ) 

397 # SELECT 

398 # {dst}_window.{data-id-cols}, 

399 # {dst}_window.id, 

400 # {dst}_window.run_id 

401 # FROM ( 

402 # SELECT 

403 # {dst}_search.{data-id-cols}, 

404 # {dst}_search.id, 

405 # {dst}_search.run_id, 

406 # ROW_NUMBER() OVER ( 

407 # PARTITION BY {dst_search}.{data-id-cols} 

408 # ORDER BY rank 

409 # ) AS rownum 

410 # ) {dst}_window 

411 # WHERE 

412 # {dst}_window.rownum = 1; 

413 # 

414 # We'll start with the Common Table Expression (CTE) at the top. 

415 search = storage.select( 

416 *collections, 

417 dataId=SimpleQuery.Select, 

418 id=SimpleQuery.Select, 

419 run=SimpleQuery.Select, 

420 ingestDate=SimpleQuery.Select, 

421 timespan=None, 

422 rank=SimpleQuery.Select, 

423 ).cte(f"{storage.datasetType.name}_search") 

424 # Now we fill out the SELECT from the CTE, and the subquery it contains 

425 # (at the same time, since they have the same columns, aside from the 

426 # OVER clause). 

427 run_key_name = self._backend.managers.collections.getRunForeignKeyName() 

428 window_data_id_cols = [ 

429 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names 

430 ] 

431 window_select_cols = [ 

432 search.columns["id"].label("id"), 

433 search.columns[run_key_name].label(run_key_name), 

434 search.columns["ingest_date"].label("ingest_date"), 

435 ] 

436 window_select_cols += window_data_id_cols 

437 window_select_cols.append( 

438 sqlalchemy.sql.func.row_number() 

439 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"]) 

440 .label("rownum") 

441 ) 

442 window = ( 

443 sqlalchemy.sql.select(*window_select_cols) 

444 .select_from(search) 

445 .alias(f"{storage.datasetType.name}_window") 

446 ) 

447 sql = ( 

448 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols]) 

449 .select_from(window) 

450 .where(window.columns["rownum"] == 1) 

451 .alias(storage.datasetType.name) 

452 ) 

453 return sql 

454 

455 def joinTable( 

456 self, 

457 table: sqlalchemy.sql.FromClause, 

458 dimensions: NamedValueAbstractSet[Dimension], 

459 *, 

460 datasets: DatasetQueryColumns | None = None, 

461 ) -> None: 

462 """Join an arbitrary table to the query via dimension relationships. 

463 

464 External calls to this method should only be necessary for tables whose 

465 records represent neither datasets nor dimension elements. 

466 

467 Parameters 

468 ---------- 

469 table : `sqlalchemy.sql.FromClause` 

470 SQLAlchemy object representing the logical table (which may be a 

471 join or subquery expression) to be joined. 

472 dimensions : iterable of `Dimension` 

473 The dimensions that relate this table to others that may be in the 

474 query. The table must have columns with the names of the 

475 dimensions. 

476 datasets : `DatasetQueryColumns`, optional 

477 Columns that identify a dataset that is part of the query results. 

478 """ 

479 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions) 

480 unexpectedDimensions.discard(self._backend.universe.commonSkyPix) 

481 if unexpectedDimensions: 

482 raise NotImplementedError( 

483 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that " 

484 f"were not provided originally to the QuerySummary object passed at construction." 

485 ) 

486 joinOn = self.startJoin(table, dimensions, dimensions.names) 

487 self.finishJoin(table, joinOn) 

488 if datasets is not None: 

489 assert ( 

490 self._columns.datasets is None 

491 ), "At most one result dataset type can be returned by a query." 

492 self._columns.datasets = datasets 

493 

494 def startJoin( 

495 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str] 

496 ) -> list[sqlalchemy.sql.ColumnElement]: 

497 """Begin a join on dimensions. 

498 

499 Must be followed by call to `finishJoin`. 

500 

501 Parameters 

502 ---------- 

503 table : `sqlalchemy.sql.FromClause` 

504 SQLAlchemy object representing the logical table (which may be a 

505 join or subquery expression) to be joined. 

506 dimensions : iterable of `Dimension` 

507 The dimensions that relate this table to others that may be in the 

508 query. The table must have columns with the names of the 

509 dimensions. 

510 columnNames : iterable of `str` 

511 Names of the columns that correspond to dimension key values; must 

512 be `zip` iterable with ``dimensions``. 

513 

514 Returns 

515 ------- 

516 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

517 Sequence of boolean expressions that should be combined with AND 

518 to form (part of) the ON expression for this JOIN. 

519 """ 

520 joinOn = [] 

521 for dimension, columnName in zip(dimensions, columnNames): 

522 columnInTable = table.columns[columnName] 

523 columnsInQuery = self._columns.keys.setdefault(dimension, []) 

524 for columnInQuery in columnsInQuery: 

525 joinOn.append(columnInQuery == columnInTable) 

526 columnsInQuery.append(columnInTable) 

527 return joinOn 

528 

529 def finishJoin( 

530 self, table: sqlalchemy.sql.FromClause, joinOn: list[sqlalchemy.sql.ColumnElement] 

531 ) -> None: 

532 """Complete a join on dimensions. 

533 

534 Must be preceded by call to `startJoin`. 

535 

536 Parameters 

537 ---------- 

538 table : `sqlalchemy.sql.FromClause` 

539 SQLAlchemy object representing the logical table (which may be a 

540 join or subquery expression) to be joined. Must be the same object 

541 passed to `startJoin`. 

542 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

543 Sequence of boolean expressions that should be combined with AND 

544 to form (part of) the ON expression for this JOIN. Should include 

545 at least the elements of the list returned by `startJoin`. 

546 """ 

547 onclause: sqlalchemy.sql.ColumnElement | None 

548 if len(joinOn) == 0: 

549 onclause = None 

550 elif len(joinOn) == 1: 

551 onclause = joinOn[0] 

552 else: 

553 onclause = sqlalchemy.sql.and_(*joinOn) 

554 self._simpleQuery.join(table, onclause=onclause) 

555 

556 def _joinMissingDimensionElements(self) -> None: 

557 """Join all dimension element tables that were identified as necessary 

558 by `QuerySummary` and have not yet been joined. 

559 

560 For internal use by `QueryBuilder` only; will be called (and should 

561 only by called) by `finish`. 

562 """ 

563 # Join all DimensionElement tables that we need for spatial/temporal 

564 # joins/filters or a nontrivial WHERE expression. 

565 # We iterate over these in *reverse* topological order to minimize the 

566 # number of tables joined. For example, the "visit" table provides 

567 # the primary key value for the "instrument" table it depends on, so we 

568 # don't need to join "instrument" as well unless we had a nontrivial 

569 # expression on it (and hence included it already above). 

570 for element in self._backend.universe.sorted(self.summary.mustHaveTableJoined, reverse=True): 

571 self.joinDimensionElement(element) 

572 # Join in any requested Dimension tables that don't already have their 

573 # primary keys identified by the query. 

574 for dimension in self._backend.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True): 

575 if dimension not in self._columns.keys: 

576 self.joinDimensionElement(dimension) 

577 

578 def _addWhereClause(self) -> None: 

579 """Add a WHERE clause to the query under construction, connecting all 

580 joined dimensions to the expression and data ID dimensions from 

581 `QuerySummary`. 

582 

583 For internal use by `QueryBuilder` only; will be called (and should 

584 only by called) by `finish`. 

585 """ 

586 if self.summary.where.tree is not None: 

587 self._simpleQuery.where.append( 

588 convertExpressionToSql( 

589 self.summary.where.tree, 

590 self._backend.universe, 

591 columns=self._columns, 

592 elements=self._elements, 

593 bind=self.summary.where.bind, 

594 TimespanReprClass=self._backend.managers.column_types.timespan_cls, 

595 ) 

596 ) 

597 for dimension, columnsInQuery in self._columns.keys.items(): 

598 if dimension in self.summary.where.dataId.graph: 

599 givenKey = self.summary.where.dataId[dimension] 

600 # Add a WHERE term for each column that corresponds to each 

601 # key. This is redundant with the JOIN ON clauses that make 

602 # them equal to each other, but more constraints have a chance 

603 # of making things easier on the DB's query optimizer. 

604 for columnInQuery in columnsInQuery: 

605 self._simpleQuery.where.append(columnInQuery == givenKey) 

606 else: 

607 # Dimension is not fully identified, but it might be a skypix 

608 # dimension that's constrained by a given region. 

609 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension): 

610 # We know the region now. 

611 givenSkyPixIds: list[int] = [] 

612 for begin, end in dimension.pixelization.envelope(self.summary.where.region): 

613 givenSkyPixIds.extend(range(begin, end)) 

614 for columnInQuery in columnsInQuery: 

615 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds)) 

616 # If we are given an dataId with a timespan, and there are one or more 

617 # timespans in the query that aren't given, add a WHERE expression for 

618 # each of them. 

619 if self.summary.where.dataId.graph.temporal and self.summary.temporal: 

620 # Timespan is known now. 

621 givenInterval = self.summary.where.dataId.timespan 

622 assert givenInterval is not None 

623 for element, intervalInQuery in self._columns.timespans.items(): 

624 assert element not in self.summary.where.dataId.graph.elements 

625 self._simpleQuery.where.append( 

626 intervalInQuery.overlaps( 

627 self._backend.managers.column_types.timespan_cls.fromLiteral(givenInterval) 

628 ) 

629 ) 

630 

631 def finish(self, joinMissing: bool = True) -> Query: 

632 """Finish query constructing, returning a new `Query` instance. 

633 

634 Parameters 

635 ---------- 

636 joinMissing : `bool`, optional 

637 If `True` (default), automatically join any missing dimension 

638 element tables (according to the categorization of the 

639 `QuerySummary` the builder was constructed with). `False` should 

640 only be passed if the caller can independently guarantee that all 

641 dimension relationships are already captured in non-dimension 

642 tables that have been manually included in the query. 

643 

644 Returns 

645 ------- 

646 query : `Query` 

647 A `Query` object that can be executed and used to interpret result 

648 rows. 

649 """ 

650 if joinMissing: 

651 self._joinMissingDimensionElements() 

652 self._addWhereClause() 

653 if self._columns.isEmpty(): 

654 return EmptyQuery( 

655 self._backend.universe, 

656 backend=self._backend, 

657 doomed_by=self._doomed_by, 

658 ) 

659 return DirectQuery( 

660 graph=self.summary.requested, 

661 uniqueness=DirectQueryUniqueness.NOT_UNIQUE, 

662 whereRegion=self.summary.where.region, 

663 simpleQuery=self._simpleQuery, 

664 columns=self._columns, 

665 order_by_columns=self._order_by_columns(), 

666 limit=self.summary.limit, 

667 backend=self._backend, 

668 doomed_by=self._doomed_by, 

669 ) 

670 

671 def _order_by_columns(self) -> Iterable[OrderByColumn]: 

672 """Generate columns to be used for ORDER BY clause. 

673 

674 Returns 

675 ------- 

676 order_by_columns : `Iterable` [ `ColumnIterable` ] 

677 Sequence of columns to appear in ORDER BY clause. 

678 """ 

679 order_by_columns: list[OrderByColumn] = [] 

680 if not self.summary.order_by: 

681 return order_by_columns 

682 

683 for order_by_column in self.summary.order_by.order_by_columns: 

684 column: sqlalchemy.sql.ColumnElement 

685 if order_by_column.column is None: 

686 # dimension name, it has to be in SELECT list already, only 

687 # add it to ORDER BY 

688 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension" 

689 column = self._columns.getKeyColumn(order_by_column.element) 

690 else: 

691 table = self._elements[order_by_column.element] 

692 

693 if order_by_column.column in ("timespan.begin", "timespan.end"): 

694 TimespanReprClass = self._backend.managers.column_types.timespan_cls 

695 timespan_repr = TimespanReprClass.from_columns(table.columns) 

696 if order_by_column.column == "timespan.begin": 

697 column = timespan_repr.lower() 

698 label = f"{order_by_column.element.name}_timespan_begin" 

699 else: 

700 column = timespan_repr.upper() 

701 label = f"{order_by_column.element.name}_timespan_end" 

702 else: 

703 column = table.columns[order_by_column.column] 

704 # make a unique label for it 

705 label = f"{order_by_column.element.name}_{order_by_column.column}" 

706 

707 column = column.label(label) 

708 

709 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering)) 

710 

711 return order_by_columns