Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 10%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

189 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBuilder",) 

24 

25from typing import AbstractSet, Any, Iterable, List, Optional 

26 

27import sqlalchemy.sql 

28 

29from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension 

30from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet 

31from .._collectionType import CollectionType 

32from .._exceptions import DataIdValueError 

33from ..interfaces import GovernorDimensionRecordStorage 

34from ..wildcards import CollectionQuery, CollectionSearch 

35from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query 

36from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

37from .expressions import convertExpressionToSql 

38 

39 

40class QueryBuilder: 

41 """A builder for potentially complex queries that join tables based 

42 on dimension relationships. 

43 

44 Parameters 

45 ---------- 

46 summary : `QuerySummary` 

47 Struct organizing the dimensions involved in the query. 

48 managers : `RegistryManagers` 

49 A struct containing the registry manager instances used by the query 

50 system. 

51 doomed_by : `Iterable` [ `str` ], optional 

52 A list of messages (appropriate for e.g. logging or exceptions) that 

53 explain why the query is known to return no results even before it is 

54 executed. Queries with a non-empty list will never be executed. 

55 """ 

56 

57 def __init__(self, summary: QuerySummary, managers: RegistryManagers, doomed_by: Iterable[str] = ()): 

58 self.summary = summary 

59 self._simpleQuery = SimpleQuery() 

60 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict() 

61 self._columns = QueryColumns() 

62 self._managers = managers 

63 self._doomed_by = list(doomed_by) 

64 

65 self._validateGovernors() 

66 

67 def _validateGovernors(self) -> None: 

68 """Check that governor dimensions specified by query actually exist. 

69 

70 This helps to avoid mistakes in governor values. It also implements 

71 consistent failure behavior for cases when governor dimensions are 

72 specified in either DataId ow WHERE clause. 

73 

74 Raises 

75 ------ 

76 LookupError 

77 Raised when governor dimension values are not found. 

78 """ 

79 for governor, values in self.summary.where.restriction.items(): 

80 storage = self._managers.dimensions[governor] 

81 assert isinstance( 

82 storage, GovernorDimensionRecordStorage 

83 ), f"Unexpected type of the governor dimension record storage {type(storage)}" 

84 if not values <= storage.values: 

85 unknown = values - storage.values 

86 raise DataIdValueError( 

87 f"Unknown values specified for governor dimension {governor}: {unknown}" 

88 ) 

89 

90 def hasDimensionKey(self, dimension: Dimension) -> bool: 

91 """Return `True` if the given dimension's primary key column has 

92 been included in the query (possibly via a foreign key column on some 

93 other table). 

94 """ 

95 return dimension in self._columns.keys 

96 

97 def joinDimensionElement(self, element: DimensionElement) -> None: 

98 """Add the table for a `DimensionElement` to the query. 

99 

100 This automatically joins the element table to all other tables in the 

101 query with which it is related, via both dimension keys and spatial 

102 and temporal relationships. 

103 

104 External calls to this method should rarely be necessary; `finish` will 

105 automatically call it if the `DimensionElement` has been identified as 

106 one that must be included. 

107 

108 Parameters 

109 ---------- 

110 element : `DimensionElement` 

111 Element for which a table should be added. The element must be 

112 associated with a database table (see `DimensionElement.hasTable`). 

113 """ 

114 assert element not in self._elements, "Element already included in query." 

115 storage = self._managers.dimensions[element] 

116 fromClause = storage.join( 

117 self, 

118 regions=self._columns.regions if element in self.summary.spatial else None, 

119 timespans=self._columns.timespans if element in self.summary.temporal else None, 

120 ) 

121 self._elements[element] = fromClause 

122 

123 def joinDataset( 

124 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False 

125 ) -> bool: 

126 """Add a dataset search or constraint to the query. 

127 

128 Unlike other `QueryBuilder` join methods, this *must* be called 

129 directly to search for datasets of a particular type or constrain the 

130 query results based on the exists of datasets. However, all dimensions 

131 used to identify the dataset type must have already been included in 

132 `QuerySummary.requested` when initializing the `QueryBuilder`. 

133 

134 Parameters 

135 ---------- 

136 datasetType : `DatasetType` 

137 The type of datasets to search for. 

138 collections : `Any` 

139 An expression that fully or partially identifies the collections 

140 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

141 thereof. `...` can be used to return all collections. See 

142 :ref:`daf_butler_collection_expressions` for more information. 

143 isResult : `bool`, optional 

144 If `True` (default), include the dataset ID column in the 

145 result columns of the query, allowing complete `DatasetRef` 

146 instances to be produced from the query results for this dataset 

147 type. If `False`, the existence of datasets of this type is used 

148 only to constrain the data IDs returned by the query. 

149 `joinDataset` may be called with ``isResult=True`` at most one time 

150 on a particular `QueryBuilder` instance. 

151 findFirst : `bool`, optional 

152 If `True` (`False` is default), only include the first match for 

153 each data ID, searching the given collections in order. Requires 

154 that all entries in ``collections`` be regular strings, so there is 

155 a clear search order. Ignored if ``isResult`` is `False`. 

156 

157 Returns 

158 ------- 

159 anyRecords : `bool` 

160 If `True`, joining the dataset table was successful and the query 

161 should proceed. If `False`, we were able to determine (from the 

162 combination of ``datasetType`` and ``collections``) that there 

163 would be no results joined in from this dataset, and hence (due to 

164 the inner join that would normally be present), the full query will 

165 return no results. 

166 """ 

167 assert datasetType in self.summary.datasets 

168 if isResult and findFirst: 

169 collections = CollectionSearch.fromExpression(collections) 

170 else: 

171 collections = CollectionQuery.fromExpression(collections) 

172 explicitCollections = frozenset(collections.explicitNames()) 

173 # If we are searching all collections with no constraints, loop over 

174 # RUN collections only, because that will include all datasets. 

175 collectionTypes: AbstractSet[CollectionType] 

176 if collections == CollectionQuery(): 

177 collectionTypes = {CollectionType.RUN} 

178 else: 

179 collectionTypes = CollectionType.all() 

180 datasetRecordStorage = self._managers.datasets.find(datasetType.name) 

181 if datasetRecordStorage is None: 

182 # Unrecognized dataset type means no results. It might be better 

183 # to raise here, but this is consistent with previous behavior, 

184 # which is expected by QuantumGraph generation code in pipe_base. 

185 self._doomed_by.append( 

186 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in " 

187 "any collection." 

188 ) 

189 return False 

190 subsubqueries = [] 

191 runKeyName = self._managers.collections.getRunForeignKeyName() 

192 baseColumnNames = {"id", runKeyName, "ingest_date"} if isResult else set() 

193 baseColumnNames.update(datasetType.dimensions.required.names) 

194 if not findFirst: 

195 calibration_collections = [] 

196 other_collections = [] 

197 rejections: List[str] = [] 

198 for rank, collectionRecord in enumerate( 

199 collections.iter(self._managers.collections, collectionTypes=collectionTypes) 

200 ): 

201 # Only include collections that (according to collection summaries) 

202 # might have datasets of this type and governor dimensions 

203 # consistent with the query's WHERE clause. 

204 collection_summary = self._managers.datasets.getCollectionSummary(collectionRecord) 

205 if not collection_summary.is_compatible_with( 

206 datasetType, 

207 self.summary.where.restriction, 

208 rejections=rejections, 

209 name=collectionRecord.name, 

210 ): 

211 continue 

212 if collectionRecord.type is CollectionType.CALIBRATION: 

213 # If collection name was provided explicitly then say sorry, 

214 # otherwise collection is a part of chained one and we skip it. 

215 if datasetType.isCalibration() and collectionRecord.name in explicitCollections: 

216 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal: 

217 raise NotImplementedError( 

218 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type " 

219 f"collection '{collectionRecord.name}' is not yet supported." 

220 ) 

221 elif findFirst: 

222 raise NotImplementedError( 

223 f"Find-first query for dataset type '{datasetType.name}' in CALIBRATION-type " 

224 f"collection '{collectionRecord.name}' is not yet supported." 

225 ) 

226 else: 

227 calibration_collections.append(collectionRecord) 

228 else: 

229 # We can never find a non-calibration dataset in a 

230 # CALIBRATION collection. 

231 rejections.append( 

232 f"Not searching for non-calibration dataset {datasetType.name!r} " 

233 f"in CALIBRATION collection {collectionRecord.name!r}." 

234 ) 

235 continue 

236 elif findFirst: 

237 # If findFirst=True, each collection gets its own subquery so 

238 # we can add a literal rank for it. 

239 ssq = datasetRecordStorage.select( 

240 collectionRecord, 

241 dataId=SimpleQuery.Select, 

242 id=SimpleQuery.Select if isResult else None, 

243 run=SimpleQuery.Select if isResult else None, 

244 ingestDate=SimpleQuery.Select if isResult else None, 

245 ) 

246 assert {c.name for c in ssq.columns} == baseColumnNames 

247 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

248 subsubqueries.append(ssq.combine()) 

249 else: 

250 # If findFirst=False, we have one subquery for all CALIBRATION 

251 # collections and one subquery for all other collections; we'll 

252 # assemble those later after grouping by collection type. 

253 other_collections.append(collectionRecord) 

254 if not findFirst: 

255 if other_collections: 

256 ssq = datasetRecordStorage.select( 

257 *other_collections, 

258 dataId=SimpleQuery.Select, 

259 id=SimpleQuery.Select if isResult else None, 

260 run=SimpleQuery.Select if isResult else None, 

261 ingestDate=SimpleQuery.Select if isResult else None, 

262 ) 

263 subsubqueries.append(ssq.combine()) 

264 if calibration_collections: 

265 ssq = datasetRecordStorage.select( 

266 *calibration_collections, 

267 dataId=SimpleQuery.Select, 

268 id=SimpleQuery.Select if isResult else None, 

269 run=SimpleQuery.Select if isResult else None, 

270 ingestDate=SimpleQuery.Select if isResult else None, 

271 ) 

272 subsubqueries.append(ssq.combine()) 

273 if not subsubqueries: 

274 if rejections: 

275 self._doomed_by.extend(rejections) 

276 else: 

277 self._doomed_by.append(f"No collections to search matching expression {collections}.") 

278 # Make a single subquery with no collections that never yields 

279 # results; this should never get executed, but downstream code 

280 # still needs to access the SQLAlchemy column objects. 

281 ssq = datasetRecordStorage.select( 

282 dataId=SimpleQuery.Select, 

283 id=SimpleQuery.Select if isResult else None, 

284 run=SimpleQuery.Select if isResult else None, 

285 ingestDate=SimpleQuery.Select if isResult else None, 

286 ) 

287 if findFirst: 

288 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

289 subsubqueries.append(ssq.combine()) 

290 # Although one would expect that these subqueries can be 

291 # UNION ALL instead of UNION because each subquery is already 

292 # distinct, it turns out that with many 

293 # subqueries this causes catastrophic performance problems 

294 # with both sqlite and postgres. Using UNION may require 

295 # more table scans, but a much simpler query plan given our 

296 # table structures. See DM-31429. 

297 subquery = sqlalchemy.sql.union(*subsubqueries) 

298 columns: Optional[DatasetQueryColumns] = None 

299 if isResult: 

300 if findFirst: 

301 # Rewrite the subquery (currently a UNION ALL over 

302 # per-collection subsubqueries) to select the rows with the 

303 # lowest rank per data ID. The block below will set subquery 

304 # to something like this: 

305 # 

306 # WITH {dst}_search AS ( 

307 # SELECT {data-id-cols}, id, run_id, 1 AS rank 

308 # FROM <collection1> 

309 # UNION ALL 

310 # SELECT {data-id-cols}, id, run_id, 2 AS rank 

311 # FROM <collection2> 

312 # UNION ALL 

313 # ... 

314 # ) 

315 # SELECT 

316 # {dst}_window.{data-id-cols}, 

317 # {dst}_window.id, 

318 # {dst}_window.run_id 

319 # FROM ( 

320 # SELECT 

321 # {dst}_search.{data-id-cols}, 

322 # {dst}_search.id, 

323 # {dst}_search.run_id, 

324 # ROW_NUMBER() OVER ( 

325 # PARTITION BY {dst_search}.{data-id-cols} 

326 # ORDER BY rank 

327 # ) AS rownum 

328 # ) {dst}_window 

329 # WHERE 

330 # {dst}_window.rownum = 1; 

331 # 

332 search = subquery.cte(f"{datasetType.name}_search") 

333 windowDataIdCols = [ 

334 search.columns[name].label(name) for name in datasetType.dimensions.required.names 

335 ] 

336 windowSelectCols = [ 

337 search.columns["id"].label("id"), 

338 search.columns[runKeyName].label(runKeyName), 

339 search.columns["ingest_date"].label("ingest_date"), 

340 ] 

341 windowSelectCols += windowDataIdCols 

342 assert {c.name for c in windowSelectCols} == baseColumnNames 

343 windowSelectCols.append( 

344 sqlalchemy.sql.func.row_number() 

345 .over(partition_by=windowDataIdCols, order_by=search.columns["rank"]) 

346 .label("rownum") 

347 ) 

348 window = ( 

349 sqlalchemy.sql.select(*windowSelectCols) 

350 .select_from(search) 

351 .alias(f"{datasetType.name}_window") 

352 ) 

353 subquery = ( 

354 sqlalchemy.sql.select(*[window.columns[name].label(name) for name in baseColumnNames]) 

355 .select_from(window) 

356 .where(window.columns["rownum"] == 1) 

357 .alias(datasetType.name) 

358 ) 

359 else: 

360 subquery = subquery.alias(datasetType.name) 

361 columns = DatasetQueryColumns( 

362 datasetType=datasetType, 

363 id=subquery.columns["id"], 

364 runKey=subquery.columns[runKeyName], 

365 ingestDate=subquery.columns["ingest_date"], 

366 ) 

367 else: 

368 subquery = subquery.alias(datasetType.name) 

369 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns) 

370 return not self._doomed_by 

371 

372 def joinTable( 

373 self, 

374 table: sqlalchemy.sql.FromClause, 

375 dimensions: NamedValueAbstractSet[Dimension], 

376 *, 

377 datasets: Optional[DatasetQueryColumns] = None, 

378 ) -> None: 

379 """Join an arbitrary table to the query via dimension relationships. 

380 

381 External calls to this method should only be necessary for tables whose 

382 records represent neither datasets nor dimension elements. 

383 

384 Parameters 

385 ---------- 

386 table : `sqlalchemy.sql.FromClause` 

387 SQLAlchemy object representing the logical table (which may be a 

388 join or subquery expression) to be joined. 

389 dimensions : iterable of `Dimension` 

390 The dimensions that relate this table to others that may be in the 

391 query. The table must have columns with the names of the 

392 dimensions. 

393 datasets : `DatasetQueryColumns`, optional 

394 Columns that identify a dataset that is part of the query results. 

395 """ 

396 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions) 

397 unexpectedDimensions.discard(self.summary.universe.commonSkyPix) 

398 if unexpectedDimensions: 

399 raise NotImplementedError( 

400 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that " 

401 f"were not provided originally to the QuerySummary object passed at construction." 

402 ) 

403 joinOn = self.startJoin(table, dimensions, dimensions.names) 

404 self.finishJoin(table, joinOn) 

405 if datasets is not None: 

406 assert ( 

407 self._columns.datasets is None 

408 ), "At most one result dataset type can be returned by a query." 

409 self._columns.datasets = datasets 

410 

411 def startJoin( 

412 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str] 

413 ) -> List[sqlalchemy.sql.ColumnElement]: 

414 """Begin a join on dimensions. 

415 

416 Must be followed by call to `finishJoin`. 

417 

418 Parameters 

419 ---------- 

420 table : `sqlalchemy.sql.FromClause` 

421 SQLAlchemy object representing the logical table (which may be a 

422 join or subquery expression) to be joined. 

423 dimensions : iterable of `Dimension` 

424 The dimensions that relate this table to others that may be in the 

425 query. The table must have columns with the names of the 

426 dimensions. 

427 columnNames : iterable of `str` 

428 Names of the columns that correspond to dimension key values; must 

429 be `zip` iterable with ``dimensions``. 

430 

431 Returns 

432 ------- 

433 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

434 Sequence of boolean expressions that should be combined with AND 

435 to form (part of) the ON expression for this JOIN. 

436 """ 

437 joinOn = [] 

438 for dimension, columnName in zip(dimensions, columnNames): 

439 columnInTable = table.columns[columnName] 

440 columnsInQuery = self._columns.keys.setdefault(dimension, []) 

441 for columnInQuery in columnsInQuery: 

442 joinOn.append(columnInQuery == columnInTable) 

443 columnsInQuery.append(columnInTable) 

444 return joinOn 

445 

446 def finishJoin( 

447 self, table: sqlalchemy.sql.FromClause, joinOn: List[sqlalchemy.sql.ColumnElement] 

448 ) -> None: 

449 """Complete a join on dimensions. 

450 

451 Must be preceded by call to `startJoin`. 

452 

453 Parameters 

454 ---------- 

455 table : `sqlalchemy.sql.FromClause` 

456 SQLAlchemy object representing the logical table (which may be a 

457 join or subquery expression) to be joined. Must be the same object 

458 passed to `startJoin`. 

459 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

460 Sequence of boolean expressions that should be combined with AND 

461 to form (part of) the ON expression for this JOIN. Should include 

462 at least the elements of the list returned by `startJoin`. 

463 """ 

464 onclause: Optional[sqlalchemy.sql.ColumnElement] 

465 if len(joinOn) == 0: 

466 onclause = None 

467 elif len(joinOn) == 1: 

468 onclause = joinOn[0] 

469 else: 

470 onclause = sqlalchemy.sql.and_(*joinOn) 

471 self._simpleQuery.join(table, onclause=onclause) 

472 

473 def _joinMissingDimensionElements(self) -> None: 

474 """Join all dimension element tables that were identified as necessary 

475 by `QuerySummary` and have not yet been joined. 

476 

477 For internal use by `QueryBuilder` only; will be called (and should 

478 only by called) by `finish`. 

479 """ 

480 # Join all DimensionElement tables that we need for spatial/temporal 

481 # joins/filters or a nontrivial WHERE expression. 

482 # We iterate over these in *reverse* topological order to minimize the 

483 # number of tables joined. For example, the "visit" table provides 

484 # the primary key value for the "instrument" table it depends on, so we 

485 # don't need to join "instrument" as well unless we had a nontrivial 

486 # expression on it (and hence included it already above). 

487 for element in self.summary.universe.sorted(self.summary.mustHaveTableJoined, reverse=True): 

488 self.joinDimensionElement(element) 

489 # Join in any requested Dimension tables that don't already have their 

490 # primary keys identified by the query. 

491 for dimension in self.summary.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True): 

492 if dimension not in self._columns.keys: 

493 self.joinDimensionElement(dimension) 

494 

495 def _addWhereClause(self) -> None: 

496 """Add a WHERE clause to the query under construction, connecting all 

497 joined dimensions to the expression and data ID dimensions from 

498 `QuerySummary`. 

499 

500 For internal use by `QueryBuilder` only; will be called (and should 

501 only by called) by `finish`. 

502 """ 

503 if self.summary.where.tree is not None: 

504 self._simpleQuery.where.append( 

505 convertExpressionToSql( 

506 self.summary.where.tree, 

507 self.summary.universe, 

508 columns=self._columns, 

509 elements=self._elements, 

510 bind=self.summary.where.bind, 

511 TimespanReprClass=self._managers.TimespanReprClass, 

512 ) 

513 ) 

514 for dimension, columnsInQuery in self._columns.keys.items(): 

515 if dimension in self.summary.where.dataId.graph: 

516 givenKey = self.summary.where.dataId[dimension] 

517 # Add a WHERE term for each column that corresponds to each 

518 # key. This is redundant with the JOIN ON clauses that make 

519 # them equal to each other, but more constraints have a chance 

520 # of making things easier on the DB's query optimizer. 

521 for columnInQuery in columnsInQuery: 

522 self._simpleQuery.where.append(columnInQuery == givenKey) 

523 else: 

524 # Dimension is not fully identified, but it might be a skypix 

525 # dimension that's constrained by a given region. 

526 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension): 

527 # We know the region now. 

528 givenSkyPixIds: List[int] = [] 

529 for begin, end in dimension.pixelization.envelope(self.summary.where.region): 

530 givenSkyPixIds.extend(range(begin, end)) 

531 for columnInQuery in columnsInQuery: 

532 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds)) 

533 # If we are given an dataId with a timespan, and there are one or more 

534 # timespans in the query that aren't given, add a WHERE expression for 

535 # each of them. 

536 if self.summary.where.dataId.graph.temporal and self.summary.temporal: 

537 # Timespan is known now. 

538 givenInterval = self.summary.where.dataId.timespan 

539 assert givenInterval is not None 

540 for element, intervalInQuery in self._columns.timespans.items(): 

541 assert element not in self.summary.where.dataId.graph.elements 

542 self._simpleQuery.where.append( 

543 intervalInQuery.overlaps(self._managers.TimespanReprClass.fromLiteral(givenInterval)) 

544 ) 

545 

546 def finish(self, joinMissing: bool = True) -> Query: 

547 """Finish query constructing, returning a new `Query` instance. 

548 

549 Parameters 

550 ---------- 

551 joinMissing : `bool`, optional 

552 If `True` (default), automatically join any missing dimension 

553 element tables (according to the categorization of the 

554 `QuerySummary` the builder was constructed with). `False` should 

555 only be passed if the caller can independently guarantee that all 

556 dimension relationships are already captured in non-dimension 

557 tables that have been manually included in the query. 

558 

559 Returns 

560 ------- 

561 query : `Query` 

562 A `Query` object that can be executed and used to interpret result 

563 rows. 

564 """ 

565 if joinMissing: 

566 self._joinMissingDimensionElements() 

567 self._addWhereClause() 

568 if self._columns.isEmpty(): 

569 return EmptyQuery( 

570 self.summary.requested.universe, managers=self._managers, doomed_by=self._doomed_by 

571 ) 

572 return DirectQuery( 

573 graph=self.summary.requested, 

574 uniqueness=DirectQueryUniqueness.NOT_UNIQUE, 

575 whereRegion=self.summary.where.dataId.region, 

576 simpleQuery=self._simpleQuery, 

577 columns=self._columns, 

578 order_by_columns=self._order_by_columns(), 

579 limit=self.summary.limit, 

580 managers=self._managers, 

581 doomed_by=self._doomed_by, 

582 ) 

583 

584 def _order_by_columns(self) -> Iterable[OrderByColumn]: 

585 """Generate columns to be used for ORDER BY clause. 

586 

587 Returns 

588 ------- 

589 order_by_columns : `Iterable` [ `ColumnIterable` ] 

590 Sequence of columns to appear in ORDER BY clause. 

591 """ 

592 order_by_columns: List[OrderByColumn] = [] 

593 if not self.summary.order_by: 

594 return order_by_columns 

595 

596 for order_by_column in self.summary.order_by.order_by_columns: 

597 

598 column: sqlalchemy.sql.ColumnElement 

599 if order_by_column.column is None: 

600 # dimension name, it has to be in SELECT list already, only 

601 # add it to ORDER BY 

602 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension" 

603 column = self._columns.getKeyColumn(order_by_column.element) 

604 else: 

605 table = self._elements[order_by_column.element] 

606 

607 if order_by_column.column in ("timespan.begin", "timespan.end"): 

608 TimespanReprClass = self._managers.TimespanReprClass 

609 timespan_repr = TimespanReprClass.fromSelectable(table) 

610 if order_by_column.column == "timespan.begin": 

611 column = timespan_repr.lower() 

612 label = f"{order_by_column.element.name}_timespan_begin" 

613 else: 

614 column = timespan_repr.upper() 

615 label = f"{order_by_column.element.name}_timespan_end" 

616 else: 

617 column = table.columns[order_by_column.column] 

618 # make a unique label for it 

619 label = f"{order_by_column.element.name}_{order_by_column.column}" 

620 

621 column = column.label(label) 

622 

623 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering)) 

624 

625 return order_by_columns