Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 12%

184 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-27 08:58 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBuilder",) 

24 

25from collections.abc import Iterable, Set 

26from typing import Any 

27 

28import sqlalchemy.sql 

29 

30from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension 

31from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet 

32from .._collectionType import CollectionType 

33from .._exceptions import DataIdValueError 

34from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage 

35from ..wildcards import CollectionQuery, CollectionSearch 

36from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query 

37from ._query_backend import QueryBackend 

38from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary 

39from .expressions import convertExpressionToSql 

40 

41 

42class QueryBuilder: 

43 """A builder for potentially complex queries that join tables based 

44 on dimension relationships. 

45 

46 Parameters 

47 ---------- 

48 summary : `QuerySummary` 

49 Struct organizing the dimensions involved in the query. 

50 backend : `QueryBackend` 

51 Backend object that represents the `Registry` implementation. 

52 doomed_by : `Iterable` [ `str` ], optional 

53 A list of messages (appropriate for e.g. logging or exceptions) that 

54 explain why the query is known to return no results even before it is 

55 executed. Queries with a non-empty list will never be executed. 

56 """ 

57 

58 def __init__( 

59 self, 

60 summary: QuerySummary, 

61 backend: QueryBackend, 

62 doomed_by: Iterable[str] = (), 

63 ): 

64 self.summary = summary 

65 self._backend = backend 

66 self._simpleQuery = SimpleQuery() 

67 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict() 

68 self._columns = QueryColumns() 

69 self._doomed_by = list(doomed_by) 

70 

71 self._validateGovernors() 

72 

73 def _validateGovernors(self) -> None: 

74 """Check that governor dimensions specified by query actually exist. 

75 

76 This helps to avoid mistakes in governor values. It also implements 

77 consistent failure behavior for cases when governor dimensions are 

78 specified in either DataId ow WHERE clause. 

79 

80 Raises 

81 ------ 

82 DataIdValueError 

83 Raised when governor dimension values are not found. 

84 """ 

85 for dimension, bounds in self.summary.where.governor_constraints.items(): 

86 storage = self._backend.managers.dimensions[self._backend.universe[dimension]] 

87 if isinstance(storage, GovernorDimensionRecordStorage): 

88 if not (storage.values >= bounds): 

89 raise DataIdValueError( 

90 f"Unknown values specified for governor dimension {dimension}: " 

91 f"{set(bounds - storage.values)}." 

92 ) 

93 

94 def hasDimensionKey(self, dimension: Dimension) -> bool: 

95 """Return `True` if the given dimension's primary key column has 

96 been included in the query (possibly via a foreign key column on some 

97 other table). 

98 """ 

99 return dimension in self._columns.keys 

100 

101 def joinDimensionElement(self, element: DimensionElement) -> None: 

102 """Add the table for a `DimensionElement` to the query. 

103 

104 This automatically joins the element table to all other tables in the 

105 query with which it is related, via both dimension keys and spatial 

106 and temporal relationships. 

107 

108 External calls to this method should rarely be necessary; `finish` will 

109 automatically call it if the `DimensionElement` has been identified as 

110 one that must be included. 

111 

112 Parameters 

113 ---------- 

114 element : `DimensionElement` 

115 Element for which a table should be added. The element must be 

116 associated with a database table (see `DimensionElement.hasTable`). 

117 """ 

118 assert element not in self._elements, "Element already included in query." 

119 storage = self._backend.managers.dimensions[element] 

120 fromClause = storage.join( 

121 self, 

122 regions=self._columns.regions if element in self.summary.spatial else None, 

123 timespans=self._columns.timespans if element in self.summary.temporal else None, 

124 ) 

125 self._elements[element] = fromClause 

126 

127 def joinDataset( 

128 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False 

129 ) -> bool: 

130 """Add a dataset search or constraint to the query. 

131 

132 Unlike other `QueryBuilder` join methods, this *must* be called 

133 directly to search for datasets of a particular type or constrain the 

134 query results based on the exists of datasets. However, all dimensions 

135 used to identify the dataset type must have already been included in 

136 `QuerySummary.requested` when initializing the `QueryBuilder`. 

137 

138 Parameters 

139 ---------- 

140 datasetType : `DatasetType` 

141 The type of datasets to search for. 

142 collections : `Any` 

143 An expression that fully or partially identifies the collections 

144 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

145 thereof. `...` can be used to return all collections. See 

146 :ref:`daf_butler_collection_expressions` for more information. 

147 isResult : `bool`, optional 

148 If `True` (default), include the dataset ID column in the 

149 result columns of the query, allowing complete `DatasetRef` 

150 instances to be produced from the query results for this dataset 

151 type. If `False`, the existence of datasets of this type is used 

152 only to constrain the data IDs returned by the query. 

153 `joinDataset` may be called with ``isResult=True`` at most one time 

154 on a particular `QueryBuilder` instance. 

155 findFirst : `bool`, optional 

156 If `True` (`False` is default), only include the first match for 

157 each data ID, searching the given collections in order. Requires 

158 that all entries in ``collections`` be regular strings, so there is 

159 a clear search order. Ignored if ``isResult`` is `False`. 

160 

161 Returns 

162 ------- 

163 anyRecords : `bool` 

164 If `True`, joining the dataset table was successful and the query 

165 should proceed. If `False`, we were able to determine (from the 

166 combination of ``datasetType`` and ``collections``) that there 

167 would be no results joined in from this dataset, and hence (due to 

168 the inner join that would normally be present), the full query will 

169 return no results. 

170 """ 

171 assert datasetType in self.summary.datasets 

172 if isResult and findFirst: 

173 collections = CollectionSearch.fromExpression(collections) 

174 else: 

175 collections = CollectionQuery.fromExpression(collections) 

176 explicitCollections = frozenset(collections.explicitNames()) 

177 # If we are searching all collections with no constraints, loop over 

178 # RUN collections only, because that will include all datasets. 

179 collectionTypes: Set[CollectionType] 

180 if collections == CollectionQuery(): 

181 collectionTypes = {CollectionType.RUN} 

182 else: 

183 collectionTypes = CollectionType.all() 

184 datasetRecordStorage = self._backend.managers.datasets.find(datasetType.name) 

185 if datasetRecordStorage is None: 

186 # Unrecognized dataset type means no results. It might be better 

187 # to raise here, but this is consistent with previous behavior, 

188 # which is expected by QuantumGraph generation code in pipe_base. 

189 self._doomed_by.append( 

190 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in " 

191 "any collection." 

192 ) 

193 return False 

194 collectionRecords: list[CollectionRecord] = [] 

195 rejections: list[str] = [] 

196 for collectionRecord in collections.iter( 

197 self._backend.managers.collections, collectionTypes=collectionTypes 

198 ): 

199 # Only include collections that (according to collection summaries) 

200 # might have datasets of this type and governor dimensions 

201 # consistent with the query's WHERE clause. 

202 collection_summary = self._backend.managers.datasets.getCollectionSummary(collectionRecord) 

203 if not collection_summary.is_compatible_with( 

204 datasetType, 

205 self.summary.where.governor_constraints, 

206 rejections=rejections, 

207 name=collectionRecord.name, 

208 ): 

209 continue 

210 if collectionRecord.type is CollectionType.CALIBRATION: 

211 # If collection name was provided explicitly then say sorry if 

212 # this is a kind of query we don't support yet; otherwise 

213 # collection is a part of chained one or regex match and we 

214 # skip it to not break queries of other included collections. 

215 if datasetType.isCalibration(): 

216 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal: 

217 if collectionRecord.name in explicitCollections: 

218 raise NotImplementedError( 

219 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type " 

220 f"collection '{collectionRecord.name}' is not yet supported." 

221 ) 

222 else: 

223 rejections.append( 

224 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection " 

225 f"{collectionRecord.name!r} because temporal calibration queries aren't " 

226 "implemented; this is not an error only because the query structure implies " 

227 "that searching this collection may be incidental." 

228 ) 

229 continue 

230 elif findFirst: 

231 if collectionRecord.name in explicitCollections: 

232 raise NotImplementedError( 

233 f"Find-first query for dataset type '{datasetType.name}' in " 

234 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet " 

235 "supported." 

236 ) 

237 else: 

238 rejections.append( 

239 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection " 

240 f"{collectionRecord.name!r} because find-first calibration queries aren't " 

241 "implemented; this is not an error only because the query structure implies " 

242 "that searching this collection may be incidental." 

243 ) 

244 continue 

245 else: 

246 collectionRecords.append(collectionRecord) 

247 else: 

248 # We can never find a non-calibration dataset in a 

249 # CALIBRATION collection. 

250 rejections.append( 

251 f"Not searching for non-calibration dataset {datasetType.name!r} " 

252 f"in CALIBRATION collection {collectionRecord.name!r}." 

253 ) 

254 continue 

255 else: 

256 collectionRecords.append(collectionRecord) 

257 if isResult: 

258 if findFirst: 

259 subquery = self._build_dataset_search_subquery( 

260 datasetRecordStorage, 

261 collectionRecords, 

262 ) 

263 else: 

264 subquery = self._build_dataset_query_subquery( 

265 datasetRecordStorage, 

266 collectionRecords, 

267 ) 

268 columns = DatasetQueryColumns( 

269 datasetType=datasetType, 

270 id=subquery.columns["id"], 

271 runKey=subquery.columns[self._backend.managers.collections.getRunForeignKeyName()], 

272 ingestDate=subquery.columns["ingest_date"], 

273 ) 

274 else: 

275 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords) 

276 columns = None 

277 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns) 

278 if not collectionRecords: 

279 if rejections: 

280 self._doomed_by.extend(rejections) 

281 else: 

282 self._doomed_by.append(f"No collections to search matching expression {collections}.") 

283 return False 

284 return not self._doomed_by 

285 

286 def _build_dataset_constraint_subquery( 

287 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

288 ) -> sqlalchemy.sql.FromClause: 

289 """Internal helper method to build a dataset subquery for a parent 

290 query that does not return dataset results. 

291 

292 Parameters 

293 ---------- 

294 storage : `DatasetRecordStorage` 

295 Storage object for the dataset type the subquery is for. 

296 collections : `list` [ `CollectionRecord` ] 

297 Records for the collections to be searched. Collections with no 

298 datasets of this type or with governor dimensions incompatible with 

299 the rest of the query should already have been filtered out. 

300 `~CollectionType.CALIBRATION` collections should also be filtered 

301 out if this is a temporal query. 

302 

303 Returns 

304 ------- 

305 sql : `sqlalchemy.sql.FromClause` 

306 A SQLAlchemy aliased subquery object. Has columns for each 

307 dataset type dimension, or an unspecified column (just to prevent 

308 SQL syntax errors) where there is no data ID. 

309 """ 

310 return storage.select( 

311 *collections, 

312 dataId=SimpleQuery.Select, 

313 # If this dataset type has no dimensions, we're in danger of 

314 # generating an invalid subquery that has no columns in the 

315 # SELECT clause. An easy fix is to just select some arbitrary 

316 # column that goes unused, like the dataset ID. 

317 id=None if storage.datasetType.dimensions else SimpleQuery.Select, 

318 run=None, 

319 ingestDate=None, 

320 timespan=None, 

321 ).alias(storage.datasetType.name) 

322 

323 def _build_dataset_query_subquery( 

324 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

325 ) -> sqlalchemy.sql.FromClause: 

326 """Internal helper method to build a dataset subquery for a parent 

327 query that returns all matching dataset results. 

328 

329 Parameters 

330 ---------- 

331 storage : `DatasetRecordStorage` 

332 Storage object for the dataset type the subquery is for. 

333 collections : `list` [ `CollectionRecord` ] 

334 Records for the collections to be searched. Collections with no 

335 datasets of this type or with governor dimensions incompatible with 

336 the rest of the query should already have been filtered out. 

337 `~CollectionType.CALIBRATION` collections should also be filtered 

338 out if this is a temporal query. 

339 

340 Returns 

341 ------- 

342 sql : `sqlalchemy.sql.FromClause` 

343 A SQLAlchemy aliased subquery object. Has columns for each dataset 

344 type dimension, the dataset ID, the `~CollectionType.RUN` 

345 collection key, and the ingest date. 

346 """ 

347 sql = storage.select( 

348 *collections, 

349 dataId=SimpleQuery.Select, 

350 id=SimpleQuery.Select, 

351 run=SimpleQuery.Select, 

352 ingestDate=SimpleQuery.Select, 

353 timespan=None, 

354 ).alias(storage.datasetType.name) 

355 return sql 

356 

357 def _build_dataset_search_subquery( 

358 self, storage: DatasetRecordStorage, collections: list[CollectionRecord] 

359 ) -> sqlalchemy.sql.FromClause: 

360 """Internal helper method to build a dataset subquery for a parent 

361 query that returns the first matching dataset for each data ID and 

362 dataset type name from an ordered list of collections. 

363 

364 Parameters 

365 ---------- 

366 storage : `DatasetRecordStorage` 

367 Storage object for the dataset type the subquery is for. 

368 collections : `list` [ `CollectionRecord` ] 

369 Records for the collections to be searched. Collections with no 

370 datasets of this type or with governor dimensions incompatible with 

371 the rest of the query should already have been filtered out. 

372 `~CollectionType.CALIBRATION` collections should be filtered out as 

373 well. 

374 

375 Returns 

376 ------- 

377 sql : `sqlalchemy.sql.FromClause` 

378 A SQLAlchemy aliased subquery object. Has columns for each dataset 

379 type dimension, the dataset ID, the `~CollectionType.RUN` 

380 collection key, and the ingest date. 

381 """ 

382 # Query-simplification shortcut: if there is only one collection, a 

383 # find-first search is just a regular result subquery. Same is true 

384 # if this is a doomed query with no collections to search. 

385 if len(collections) <= 1: 

386 return self._build_dataset_query_subquery(storage, collections) 

387 # In the more general case, we build a subquery of the form below to 

388 # search the collections in order. 

389 # 

390 # WITH {dst}_search AS ( 

391 # SELECT {data-id-cols}, id, run_id, 1 AS rank 

392 # FROM <collection1> 

393 # UNION ALL 

394 # SELECT {data-id-cols}, id, run_id, 2 AS rank 

395 # FROM <collection2> 

396 # UNION ALL 

397 # ... 

398 # ) 

399 # SELECT 

400 # {dst}_window.{data-id-cols}, 

401 # {dst}_window.id, 

402 # {dst}_window.run_id 

403 # FROM ( 

404 # SELECT 

405 # {dst}_search.{data-id-cols}, 

406 # {dst}_search.id, 

407 # {dst}_search.run_id, 

408 # ROW_NUMBER() OVER ( 

409 # PARTITION BY {dst_search}.{data-id-cols} 

410 # ORDER BY rank 

411 # ) AS rownum 

412 # ) {dst}_window 

413 # WHERE 

414 # {dst}_window.rownum = 1; 

415 # 

416 # We'll start with the Common Table Expression (CTE) at the top. 

417 search = storage.select( 

418 *collections, 

419 dataId=SimpleQuery.Select, 

420 id=SimpleQuery.Select, 

421 run=SimpleQuery.Select, 

422 ingestDate=SimpleQuery.Select, 

423 timespan=None, 

424 rank=SimpleQuery.Select, 

425 ).cte(f"{storage.datasetType.name}_search") 

426 # Now we fill out the SELECT from the CTE, and the subquery it contains 

427 # (at the same time, since they have the same columns, aside from the 

428 # OVER clause). 

429 run_key_name = self._backend.managers.collections.getRunForeignKeyName() 

430 window_data_id_cols = [ 

431 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names 

432 ] 

433 window_select_cols = [ 

434 search.columns["id"].label("id"), 

435 search.columns[run_key_name].label(run_key_name), 

436 search.columns["ingest_date"].label("ingest_date"), 

437 ] 

438 window_select_cols += window_data_id_cols 

439 window_select_cols.append( 

440 sqlalchemy.sql.func.row_number() 

441 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"]) 

442 .label("rownum") 

443 ) 

444 window = ( 

445 sqlalchemy.sql.select(*window_select_cols) 

446 .select_from(search) 

447 .alias(f"{storage.datasetType.name}_window") 

448 ) 

449 sql = ( 

450 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols]) 

451 .select_from(window) 

452 .where(window.columns["rownum"] == 1) 

453 .alias(storage.datasetType.name) 

454 ) 

455 return sql 

456 

457 def joinTable( 

458 self, 

459 table: sqlalchemy.sql.FromClause, 

460 dimensions: NamedValueAbstractSet[Dimension], 

461 *, 

462 datasets: DatasetQueryColumns | None = None, 

463 ) -> None: 

464 """Join an arbitrary table to the query via dimension relationships. 

465 

466 External calls to this method should only be necessary for tables whose 

467 records represent neither datasets nor dimension elements. 

468 

469 Parameters 

470 ---------- 

471 table : `sqlalchemy.sql.FromClause` 

472 SQLAlchemy object representing the logical table (which may be a 

473 join or subquery expression) to be joined. 

474 dimensions : iterable of `Dimension` 

475 The dimensions that relate this table to others that may be in the 

476 query. The table must have columns with the names of the 

477 dimensions. 

478 datasets : `DatasetQueryColumns`, optional 

479 Columns that identify a dataset that is part of the query results. 

480 """ 

481 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions) 

482 unexpectedDimensions.discard(self._backend.universe.commonSkyPix) 

483 if unexpectedDimensions: 

484 raise NotImplementedError( 

485 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that " 

486 f"were not provided originally to the QuerySummary object passed at construction." 

487 ) 

488 joinOn = self.startJoin(table, dimensions, dimensions.names) 

489 self.finishJoin(table, joinOn) 

490 if datasets is not None: 

491 assert ( 

492 self._columns.datasets is None 

493 ), "At most one result dataset type can be returned by a query." 

494 self._columns.datasets = datasets 

495 

496 def startJoin( 

497 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str] 

498 ) -> list[sqlalchemy.sql.ColumnElement]: 

499 """Begin a join on dimensions. 

500 

501 Must be followed by call to `finishJoin`. 

502 

503 Parameters 

504 ---------- 

505 table : `sqlalchemy.sql.FromClause` 

506 SQLAlchemy object representing the logical table (which may be a 

507 join or subquery expression) to be joined. 

508 dimensions : iterable of `Dimension` 

509 The dimensions that relate this table to others that may be in the 

510 query. The table must have columns with the names of the 

511 dimensions. 

512 columnNames : iterable of `str` 

513 Names of the columns that correspond to dimension key values; must 

514 be `zip` iterable with ``dimensions``. 

515 

516 Returns 

517 ------- 

518 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

519 Sequence of boolean expressions that should be combined with AND 

520 to form (part of) the ON expression for this JOIN. 

521 """ 

522 joinOn = [] 

523 for dimension, columnName in zip(dimensions, columnNames): 

524 columnInTable = table.columns[columnName] 

525 columnsInQuery = self._columns.keys.setdefault(dimension, []) 

526 for columnInQuery in columnsInQuery: 

527 joinOn.append(columnInQuery == columnInTable) 

528 columnsInQuery.append(columnInTable) 

529 return joinOn 

530 

531 def finishJoin( 

532 self, table: sqlalchemy.sql.FromClause, joinOn: list[sqlalchemy.sql.ColumnElement] 

533 ) -> None: 

534 """Complete a join on dimensions. 

535 

536 Must be preceded by call to `startJoin`. 

537 

538 Parameters 

539 ---------- 

540 table : `sqlalchemy.sql.FromClause` 

541 SQLAlchemy object representing the logical table (which may be a 

542 join or subquery expression) to be joined. Must be the same object 

543 passed to `startJoin`. 

544 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

545 Sequence of boolean expressions that should be combined with AND 

546 to form (part of) the ON expression for this JOIN. Should include 

547 at least the elements of the list returned by `startJoin`. 

548 """ 

549 onclause: sqlalchemy.sql.ColumnElement | None 

550 if len(joinOn) == 0: 

551 onclause = None 

552 elif len(joinOn) == 1: 

553 onclause = joinOn[0] 

554 else: 

555 onclause = sqlalchemy.sql.and_(*joinOn) 

556 self._simpleQuery.join(table, onclause=onclause) 

557 

558 def _joinMissingDimensionElements(self) -> None: 

559 """Join all dimension element tables that were identified as necessary 

560 by `QuerySummary` and have not yet been joined. 

561 

562 For internal use by `QueryBuilder` only; will be called (and should 

563 only by called) by `finish`. 

564 """ 

565 # Join all DimensionElement tables that we need for spatial/temporal 

566 # joins/filters or a nontrivial WHERE expression. 

567 # We iterate over these in *reverse* topological order to minimize the 

568 # number of tables joined. For example, the "visit" table provides 

569 # the primary key value for the "instrument" table it depends on, so we 

570 # don't need to join "instrument" as well unless we had a nontrivial 

571 # expression on it (and hence included it already above). 

572 for element in self._backend.universe.sorted(self.summary.mustHaveTableJoined, reverse=True): 

573 self.joinDimensionElement(element) 

574 # Join in any requested Dimension tables that don't already have their 

575 # primary keys identified by the query. 

576 for dimension in self._backend.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True): 

577 if dimension not in self._columns.keys: 

578 self.joinDimensionElement(dimension) 

579 

580 def _addWhereClause(self) -> None: 

581 """Add a WHERE clause to the query under construction, connecting all 

582 joined dimensions to the expression and data ID dimensions from 

583 `QuerySummary`. 

584 

585 For internal use by `QueryBuilder` only; will be called (and should 

586 only by called) by `finish`. 

587 """ 

588 if self.summary.where.tree is not None: 

589 self._simpleQuery.where.append( 

590 convertExpressionToSql( 

591 self.summary.where.tree, 

592 self._backend.universe, 

593 columns=self._columns, 

594 elements=self._elements, 

595 bind=self.summary.where.bind, 

596 TimespanReprClass=self._backend.managers.column_types.timespan_cls, 

597 ) 

598 ) 

599 for dimension, columnsInQuery in self._columns.keys.items(): 

600 if dimension in self.summary.where.dataId.graph: 

601 givenKey = self.summary.where.dataId[dimension] 

602 # Add a WHERE term for each column that corresponds to each 

603 # key. This is redundant with the JOIN ON clauses that make 

604 # them equal to each other, but more constraints have a chance 

605 # of making things easier on the DB's query optimizer. 

606 for columnInQuery in columnsInQuery: 

607 self._simpleQuery.where.append(columnInQuery == givenKey) 

608 else: 

609 # Dimension is not fully identified, but it might be a skypix 

610 # dimension that's constrained by a given region. 

611 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension): 

612 # We know the region now. 

613 givenSkyPixIds: list[int] = [] 

614 for begin, end in dimension.pixelization.envelope(self.summary.where.region): 

615 givenSkyPixIds.extend(range(begin, end)) 

616 for columnInQuery in columnsInQuery: 

617 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds)) 

618 # If we are given an dataId with a timespan, and there are one or more 

619 # timespans in the query that aren't given, add a WHERE expression for 

620 # each of them. 

621 if self.summary.where.dataId.graph.temporal and self.summary.temporal: 

622 # Timespan is known now. 

623 givenInterval = self.summary.where.dataId.timespan 

624 assert givenInterval is not None 

625 for element, intervalInQuery in self._columns.timespans.items(): 

626 assert element not in self.summary.where.dataId.graph.elements 

627 self._simpleQuery.where.append( 

628 intervalInQuery.overlaps( 

629 self._backend.managers.column_types.timespan_cls.fromLiteral(givenInterval) 

630 ) 

631 ) 

632 

633 def finish(self, joinMissing: bool = True) -> Query: 

634 """Finish query constructing, returning a new `Query` instance. 

635 

636 Parameters 

637 ---------- 

638 joinMissing : `bool`, optional 

639 If `True` (default), automatically join any missing dimension 

640 element tables (according to the categorization of the 

641 `QuerySummary` the builder was constructed with). `False` should 

642 only be passed if the caller can independently guarantee that all 

643 dimension relationships are already captured in non-dimension 

644 tables that have been manually included in the query. 

645 

646 Returns 

647 ------- 

648 query : `Query` 

649 A `Query` object that can be executed and used to interpret result 

650 rows. 

651 """ 

652 if joinMissing: 

653 self._joinMissingDimensionElements() 

654 self._addWhereClause() 

655 if self._columns.isEmpty(): 

656 return EmptyQuery( 

657 self._backend.universe, 

658 backend=self._backend, 

659 doomed_by=self._doomed_by, 

660 ) 

661 return DirectQuery( 

662 graph=self.summary.requested, 

663 uniqueness=DirectQueryUniqueness.NOT_UNIQUE, 

664 whereRegion=self.summary.where.region, 

665 simpleQuery=self._simpleQuery, 

666 columns=self._columns, 

667 order_by_columns=self._order_by_columns(), 

668 limit=self.summary.limit, 

669 backend=self._backend, 

670 doomed_by=self._doomed_by, 

671 ) 

672 

673 def _order_by_columns(self) -> Iterable[OrderByColumn]: 

674 """Generate columns to be used for ORDER BY clause. 

675 

676 Returns 

677 ------- 

678 order_by_columns : `Iterable` [ `ColumnIterable` ] 

679 Sequence of columns to appear in ORDER BY clause. 

680 """ 

681 order_by_columns: list[OrderByColumn] = [] 

682 if not self.summary.order_by: 

683 return order_by_columns 

684 

685 for order_by_column in self.summary.order_by.order_by_columns: 

686 

687 column: sqlalchemy.sql.ColumnElement 

688 if order_by_column.column is None: 

689 # dimension name, it has to be in SELECT list already, only 

690 # add it to ORDER BY 

691 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension" 

692 column = self._columns.getKeyColumn(order_by_column.element) 

693 else: 

694 table = self._elements[order_by_column.element] 

695 

696 if order_by_column.column in ("timespan.begin", "timespan.end"): 

697 TimespanReprClass = self._backend.managers.column_types.timespan_cls 

698 timespan_repr = TimespanReprClass.from_columns(table.columns) 

699 if order_by_column.column == "timespan.begin": 

700 column = timespan_repr.lower() 

701 label = f"{order_by_column.element.name}_timespan_begin" 

702 else: 

703 column = timespan_repr.upper() 

704 label = f"{order_by_column.element.name}_timespan_end" 

705 else: 

706 column = table.columns[order_by_column.column] 

707 # make a unique label for it 

708 label = f"{order_by_column.element.name}_{order_by_column.column}" 

709 

710 column = column.label(label) 

711 

712 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering)) 

713 

714 return order_by_columns