Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 9%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

188 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBuilder",) 

24 

25import dataclasses 

26from typing import AbstractSet, Any, Iterable, List, Optional 

27 

28import sqlalchemy.sql 

29 

30from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension, ddl 

31from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet 

32from .._collectionType import CollectionType 

33from ..wildcards import CollectionQuery, CollectionSearch 

34from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query 

35from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers 

36from .expressions import convertExpressionToSql 

37 

38 

39class QueryBuilder: 

40 """A builder for potentially complex queries that join tables based 

41 on dimension relationships. 

42 

43 Parameters 

44 ---------- 

45 summary : `QuerySummary` 

46 Struct organizing the dimensions involved in the query. 

47 managers : `RegistryManagers` 

48 A struct containing the registry manager instances used by the query 

49 system. 

50 doomed_by : `Iterable` [ `str` ], optional 

51 A list of messages (appropriate for e.g. logging or exceptions) that 

52 explain why the query is known to return no results even before it is 

53 executed. Queries with a non-empty list will never be executed. 

54 """ 

55 

56 def __init__(self, summary: QuerySummary, managers: RegistryManagers, doomed_by: Iterable[str] = ()): 

57 self.summary = summary 

58 self._simpleQuery = SimpleQuery() 

59 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict() 

60 self._columns = QueryColumns() 

61 self._managers = managers 

62 self._doomed_by = list(doomed_by) 

63 

64 def hasDimensionKey(self, dimension: Dimension) -> bool: 

65 """Return `True` if the given dimension's primary key column has 

66 been included in the query (possibly via a foreign key column on some 

67 other table). 

68 """ 

69 return dimension in self._columns.keys 

70 

71 def joinDimensionElement(self, element: DimensionElement) -> None: 

72 """Add the table for a `DimensionElement` to the query. 

73 

74 This automatically joins the element table to all other tables in the 

75 query with which it is related, via both dimension keys and spatial 

76 and temporal relationships. 

77 

78 External calls to this method should rarely be necessary; `finish` will 

79 automatically call it if the `DimensionElement` has been identified as 

80 one that must be included. 

81 

82 Parameters 

83 ---------- 

84 element : `DimensionElement` 

85 Element for which a table should be added. The element must be 

86 associated with a database table (see `DimensionElement.hasTable`). 

87 """ 

88 assert element not in self._elements, "Element already included in query." 

89 storage = self._managers.dimensions[element] 

90 fromClause = storage.join( 

91 self, 

92 regions=self._columns.regions if element in self.summary.spatial else None, 

93 timespans=self._columns.timespans if element in self.summary.temporal else None, 

94 ) 

95 self._elements[element] = fromClause 

96 

97 def joinDataset( 

98 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False 

99 ) -> bool: 

100 """Add a dataset search or constraint to the query. 

101 

102 Unlike other `QueryBuilder` join methods, this *must* be called 

103 directly to search for datasets of a particular type or constrain the 

104 query results based on the exists of datasets. However, all dimensions 

105 used to identify the dataset type must have already been included in 

106 `QuerySummary.requested` when initializing the `QueryBuilder`. 

107 

108 Parameters 

109 ---------- 

110 datasetType : `DatasetType` 

111 The type of datasets to search for. 

112 collections : `Any` 

113 An expression that fully or partially identifies the collections 

114 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

115 thereof. `...` can be used to return all collections. See 

116 :ref:`daf_butler_collection_expressions` for more information. 

117 isResult : `bool`, optional 

118 If `True` (default), include the dataset ID column in the 

119 result columns of the query, allowing complete `DatasetRef` 

120 instances to be produced from the query results for this dataset 

121 type. If `False`, the existence of datasets of this type is used 

122 only to constrain the data IDs returned by the query. 

123 `joinDataset` may be called with ``isResult=True`` at most one time 

124 on a particular `QueryBuilder` instance. 

125 findFirst : `bool`, optional 

126 If `True` (`False` is default), only include the first match for 

127 each data ID, searching the given collections in order. Requires 

128 that all entries in ``collections`` be regular strings, so there is 

129 a clear search order. Ignored if ``isResult`` is `False`. 

130 

131 Returns 

132 ------- 

133 anyRecords : `bool` 

134 If `True`, joining the dataset table was successful and the query 

135 should proceed. If `False`, we were able to determine (from the 

136 combination of ``datasetType`` and ``collections``) that there 

137 would be no results joined in from this dataset, and hence (due to 

138 the inner join that would normally be present), the full query will 

139 return no results. 

140 """ 

141 assert datasetType in self.summary.datasets 

142 if isResult and findFirst: 

143 collections = CollectionSearch.fromExpression(collections) 

144 else: 

145 collections = CollectionQuery.fromExpression(collections) 

146 explicitCollections = frozenset(collections.explicitNames()) 

147 # If we are searching all collections with no constraints, loop over 

148 # RUN collections only, because that will include all datasets. 

149 collectionTypes: AbstractSet[CollectionType] 

150 if collections == CollectionQuery(): 

151 collectionTypes = {CollectionType.RUN} 

152 else: 

153 collectionTypes = CollectionType.all() 

154 datasetRecordStorage = self._managers.datasets.find(datasetType.name) 

155 if datasetRecordStorage is None: 

156 # Unrecognized dataset type means no results. It might be better 

157 # to raise here, but this is consistent with previous behavior, 

158 # which is expected by QuantumGraph generation code in pipe_base. 

159 self._doomed_by.append( 

160 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in " 

161 "any collection." 

162 ) 

163 return False 

164 subsubqueries = [] 

165 runKeyName = self._managers.collections.getRunForeignKeyName() 

166 baseColumnNames = {"id", runKeyName, "ingest_date"} if isResult else set() 

167 baseColumnNames.update(datasetType.dimensions.required.names) 

168 if not findFirst: 

169 calibration_collections = [] 

170 other_collections = [] 

171 rejections: List[str] = [] 

172 for rank, collectionRecord in enumerate( 

173 collections.iter(self._managers.collections, collectionTypes=collectionTypes) 

174 ): 

175 # Only include collections that (according to collection summaries) 

176 # might have datasets of this type and governor dimensions 

177 # consistent with the query's WHERE clause. 

178 collection_summary = self._managers.datasets.getCollectionSummary(collectionRecord) 

179 if not collection_summary.is_compatible_with( 

180 datasetType, 

181 self.summary.where.restriction, 

182 rejections=rejections, 

183 name=collectionRecord.name, 

184 ): 

185 continue 

186 if collectionRecord.type is CollectionType.CALIBRATION: 

187 # If collection name was provided explicitly then say sorry, 

188 # otherwise collection is a part of chained one and we skip it. 

189 if datasetType.isCalibration() and collectionRecord.name in explicitCollections: 

190 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal: 

191 raise NotImplementedError( 

192 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type " 

193 f"collection '{collectionRecord.name}' is not yet supported." 

194 ) 

195 elif findFirst: 

196 raise NotImplementedError( 

197 f"Find-first query for dataset type '{datasetType.name}' in CALIBRATION-type " 

198 f"collection '{collectionRecord.name}' is not yet supported." 

199 ) 

200 else: 

201 calibration_collections.append(collectionRecord) 

202 else: 

203 # We can never find a non-calibration dataset in a 

204 # CALIBRATION collection. 

205 rejections.append( 

206 f"Not searching for non-calibration dataset {datasetType.name!r} " 

207 f"in CALIBRATION collection {collectionRecord.name!r}." 

208 ) 

209 continue 

210 elif findFirst: 

211 # If findFirst=True, each collection gets its own subquery so 

212 # we can add a literal rank for it. 

213 ssq = datasetRecordStorage.select( 

214 collectionRecord, 

215 dataId=SimpleQuery.Select, 

216 id=SimpleQuery.Select if isResult else None, 

217 run=SimpleQuery.Select if isResult else None, 

218 ingestDate=SimpleQuery.Select if isResult else None, 

219 ) 

220 assert {c.name for c in ssq.columns} == baseColumnNames 

221 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

222 subsubqueries.append(ssq.combine()) 

223 else: 

224 # If findFirst=False, we have one subquery for all CALIBRATION 

225 # collections and one subquery for all other collections; we'll 

226 # assemble those later after grouping by collection type. 

227 other_collections.append(collectionRecord) 

228 if not findFirst: 

229 if other_collections: 

230 ssq = datasetRecordStorage.select( 

231 *other_collections, 

232 dataId=SimpleQuery.Select, 

233 id=SimpleQuery.Select if isResult else None, 

234 run=SimpleQuery.Select if isResult else None, 

235 ingestDate=SimpleQuery.Select if isResult else None, 

236 ) 

237 subsubqueries.append(ssq.combine()) 

238 if calibration_collections: 

239 ssq = datasetRecordStorage.select( 

240 *calibration_collections, 

241 dataId=SimpleQuery.Select, 

242 id=SimpleQuery.Select if isResult else None, 

243 run=SimpleQuery.Select if isResult else None, 

244 ingestDate=SimpleQuery.Select if isResult else None, 

245 ) 

246 subsubqueries.append(ssq.combine()) 

247 if not subsubqueries: 

248 if rejections: 

249 self._doomed_by.extend(rejections) 

250 else: 

251 self._doomed_by.append(f"No collections to search matching expression {collections}.") 

252 # Make a single subquery with no collections that never yields 

253 # results; this should never get executed, but downstream code 

254 # still needs to access the SQLAlchemy column objects. 

255 ssq = datasetRecordStorage.select( 

256 dataId=SimpleQuery.Select, 

257 id=SimpleQuery.Select if isResult else None, 

258 run=SimpleQuery.Select if isResult else None, 

259 ingestDate=SimpleQuery.Select if isResult else None, 

260 ) 

261 if findFirst: 

262 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

263 subsubqueries.append(ssq.combine()) 

264 # Although one would expect that these subqueries can be 

265 # UNION ALL instead of UNION because each subquery is already 

266 # distinct, it turns out that with many 

267 # subqueries this causes catastrophic performance problems 

268 # with both sqlite and postgres. Using UNION may require 

269 # more table scans, but a much simpler query plan given our 

270 # table structures. See DM-31429. 

271 subquery = sqlalchemy.sql.union(*subsubqueries) 

272 columns: Optional[DatasetQueryColumns] = None 

273 if isResult: 

274 if findFirst: 

275 # Rewrite the subquery (currently a UNION ALL over 

276 # per-collection subsubqueries) to select the rows with the 

277 # lowest rank per data ID. The block below will set subquery 

278 # to something like this: 

279 # 

280 # WITH {dst}_search AS ( 

281 # SELECT {data-id-cols}, id, run_id, 1 AS rank 

282 # FROM <collection1> 

283 # UNION ALL 

284 # SELECT {data-id-cols}, id, run_id, 2 AS rank 

285 # FROM <collection2> 

286 # UNION ALL 

287 # ... 

288 # ) 

289 # SELECT 

290 # {dst}_window.{data-id-cols}, 

291 # {dst}_window.id, 

292 # {dst}_window.run_id 

293 # FROM ( 

294 # SELECT 

295 # {dst}_search.{data-id-cols}, 

296 # {dst}_search.id, 

297 # {dst}_search.run_id, 

298 # ROW_NUMBER() OVER ( 

299 # PARTITION BY {dst_search}.{data-id-cols} 

300 # ORDER BY rank 

301 # ) AS rownum 

302 # ) {dst}_window 

303 # WHERE 

304 # {dst}_window.rownum = 1; 

305 # 

306 search = subquery.cte(f"{datasetType.name}_search") 

307 windowDataIdCols = [ 

308 search.columns[name].label(name) for name in datasetType.dimensions.required.names 

309 ] 

310 windowSelectCols = [ 

311 search.columns["id"].label("id"), 

312 search.columns[runKeyName].label(runKeyName), 

313 search.columns["ingest_date"].label("ingest_date"), 

314 ] 

315 windowSelectCols += windowDataIdCols 

316 assert {c.name for c in windowSelectCols} == baseColumnNames 

317 windowSelectCols.append( 

318 sqlalchemy.sql.func.row_number() 

319 .over(partition_by=windowDataIdCols, order_by=search.columns["rank"]) 

320 .label("rownum") 

321 ) 

322 window = ( 

323 sqlalchemy.sql.select(*windowSelectCols) 

324 .select_from(search) 

325 .alias(f"{datasetType.name}_window") 

326 ) 

327 subquery = ( 

328 sqlalchemy.sql.select(*[window.columns[name].label(name) for name in baseColumnNames]) 

329 .select_from(window) 

330 .where(window.columns["rownum"] == 1) 

331 .alias(datasetType.name) 

332 ) 

333 else: 

334 subquery = subquery.alias(datasetType.name) 

335 columns = DatasetQueryColumns( 

336 datasetType=datasetType, 

337 id=subquery.columns["id"], 

338 runKey=subquery.columns[runKeyName], 

339 ingestDate=subquery.columns["ingest_date"], 

340 ) 

341 else: 

342 subquery = subquery.alias(datasetType.name) 

343 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns) 

344 return not self._doomed_by 

345 

346 def joinTable( 

347 self, 

348 table: sqlalchemy.sql.FromClause, 

349 dimensions: NamedValueAbstractSet[Dimension], 

350 *, 

351 datasets: Optional[DatasetQueryColumns] = None, 

352 ) -> None: 

353 """Join an arbitrary table to the query via dimension relationships. 

354 

355 External calls to this method should only be necessary for tables whose 

356 records represent neither datasets nor dimension elements. 

357 

358 Parameters 

359 ---------- 

360 table : `sqlalchemy.sql.FromClause` 

361 SQLAlchemy object representing the logical table (which may be a 

362 join or subquery expression) to be joined. 

363 dimensions : iterable of `Dimension` 

364 The dimensions that relate this table to others that may be in the 

365 query. The table must have columns with the names of the 

366 dimensions. 

367 datasets : `DatasetQueryColumns`, optional 

368 Columns that identify a dataset that is part of the query results. 

369 """ 

370 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions) 

371 unexpectedDimensions.discard(self.summary.universe.commonSkyPix) 

372 if unexpectedDimensions: 

373 raise NotImplementedError( 

374 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that " 

375 f"were not provided originally to the QuerySummary object passed at construction." 

376 ) 

377 joinOn = self.startJoin(table, dimensions, dimensions.names) 

378 self.finishJoin(table, joinOn) 

379 if datasets is not None: 

380 assert ( 

381 self._columns.datasets is None 

382 ), "At most one result dataset type can be returned by a query." 

383 self._columns.datasets = datasets 

384 

385 def startJoin( 

386 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str] 

387 ) -> List[sqlalchemy.sql.ColumnElement]: 

388 """Begin a join on dimensions. 

389 

390 Must be followed by call to `finishJoin`. 

391 

392 Parameters 

393 ---------- 

394 table : `sqlalchemy.sql.FromClause` 

395 SQLAlchemy object representing the logical table (which may be a 

396 join or subquery expression) to be joined. 

397 dimensions : iterable of `Dimension` 

398 The dimensions that relate this table to others that may be in the 

399 query. The table must have columns with the names of the 

400 dimensions. 

401 columnNames : iterable of `str` 

402 Names of the columns that correspond to dimension key values; must 

403 be `zip` iterable with ``dimensions``. 

404 

405 Returns 

406 ------- 

407 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

408 Sequence of boolean expressions that should be combined with AND 

409 to form (part of) the ON expression for this JOIN. 

410 """ 

411 joinOn = [] 

412 for dimension, columnName in zip(dimensions, columnNames): 

413 columnInTable = table.columns[columnName] 

414 columnsInQuery = self._columns.keys.setdefault(dimension, []) 

415 for columnInQuery in columnsInQuery: 

416 joinOn.append(columnInQuery == columnInTable) 

417 columnsInQuery.append(columnInTable) 

418 return joinOn 

419 

420 def finishJoin( 

421 self, table: sqlalchemy.sql.FromClause, joinOn: List[sqlalchemy.sql.ColumnElement] 

422 ) -> None: 

423 """Complete a join on dimensions. 

424 

425 Must be preceded by call to `startJoin`. 

426 

427 Parameters 

428 ---------- 

429 table : `sqlalchemy.sql.FromClause` 

430 SQLAlchemy object representing the logical table (which may be a 

431 join or subquery expression) to be joined. Must be the same object 

432 passed to `startJoin`. 

433 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

434 Sequence of boolean expressions that should be combined with AND 

435 to form (part of) the ON expression for this JOIN. Should include 

436 at least the elements of the list returned by `startJoin`. 

437 """ 

438 onclause: Optional[sqlalchemy.sql.ColumnElement] 

439 if len(joinOn) == 0: 

440 onclause = None 

441 elif len(joinOn) == 1: 

442 onclause = joinOn[0] 

443 else: 

444 onclause = sqlalchemy.sql.and_(*joinOn) 

445 self._simpleQuery.join(table, onclause=onclause) 

446 

447 def _joinMissingDimensionElements(self) -> None: 

448 """Join all dimension element tables that were identified as necessary 

449 by `QuerySummary` and have not yet been joined. 

450 

451 For internal use by `QueryBuilder` only; will be called (and should 

452 only by called) by `finish`. 

453 """ 

454 # Join all DimensionElement tables that we need for spatial/temporal 

455 # joins/filters or a nontrivial WHERE expression. 

456 # We iterate over these in *reverse* topological order to minimize the 

457 # number of tables joined. For example, the "visit" table provides 

458 # the primary key value for the "instrument" table it depends on, so we 

459 # don't need to join "instrument" as well unless we had a nontrivial 

460 # expression on it (and hence included it already above). 

461 for element in self.summary.universe.sorted(self.summary.mustHaveTableJoined, reverse=True): 

462 self.joinDimensionElement(element) 

463 # Join in any requested Dimension tables that don't already have their 

464 # primary keys identified by the query. 

465 for dimension in self.summary.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True): 

466 if dimension not in self._columns.keys: 

467 self.joinDimensionElement(dimension) 

468 

469 def _addWhereClause(self) -> None: 

470 """Add a WHERE clause to the query under construction, connecting all 

471 joined dimensions to the expression and data ID dimensions from 

472 `QuerySummary`. 

473 

474 For internal use by `QueryBuilder` only; will be called (and should 

475 only by called) by `finish`. 

476 """ 

477 if self.summary.where.tree is not None: 

478 self._simpleQuery.where.append( 

479 convertExpressionToSql( 

480 self.summary.where.tree, 

481 self.summary.universe, 

482 columns=self._columns, 

483 elements=self._elements, 

484 bind=self.summary.where.bind, 

485 TimespanReprClass=self._managers.TimespanReprClass, 

486 ) 

487 ) 

488 for dimension, columnsInQuery in self._columns.keys.items(): 

489 if dimension in self.summary.where.dataId.graph: 

490 givenKey = self.summary.where.dataId[dimension] 

491 # Add a WHERE term for each column that corresponds to each 

492 # key. This is redundant with the JOIN ON clauses that make 

493 # them equal to each other, but more constraints have a chance 

494 # of making things easier on the DB's query optimizer. 

495 for columnInQuery in columnsInQuery: 

496 self._simpleQuery.where.append(columnInQuery == givenKey) 

497 else: 

498 # Dimension is not fully identified, but it might be a skypix 

499 # dimension that's constrained by a given region. 

500 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension): 

501 # We know the region now. 

502 givenSkyPixIds: List[int] = [] 

503 for begin, end in dimension.pixelization.envelope(self.summary.where.region): 

504 givenSkyPixIds.extend(range(begin, end)) 

505 for columnInQuery in columnsInQuery: 

506 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds)) 

507 # If we are given an dataId with a timespan, and there are one or more 

508 # timespans in the query that aren't given, add a WHERE expression for 

509 # each of them. 

510 if self.summary.where.dataId.graph.temporal and self.summary.temporal: 

511 # Timespan is known now. 

512 givenInterval = self.summary.where.dataId.timespan 

513 assert givenInterval is not None 

514 for element, intervalInQuery in self._columns.timespans.items(): 

515 assert element not in self.summary.where.dataId.graph.elements 

516 self._simpleQuery.where.append( 

517 intervalInQuery.overlaps(self._managers.TimespanReprClass.fromLiteral(givenInterval)) 

518 ) 

519 

520 def finish(self, joinMissing: bool = True) -> Query: 

521 """Finish query constructing, returning a new `Query` instance. 

522 

523 Parameters 

524 ---------- 

525 joinMissing : `bool`, optional 

526 If `True` (default), automatically join any missing dimension 

527 element tables (according to the categorization of the 

528 `QuerySummary` the builder was constructed with). `False` should 

529 only be passed if the caller can independently guarantee that all 

530 dimension relationships are already captured in non-dimension 

531 tables that have been manually included in the query. 

532 

533 Returns 

534 ------- 

535 query : `Query` 

536 A `Query` object that can be executed and used to interpret result 

537 rows. 

538 """ 

539 if joinMissing: 

540 self._joinMissingDimensionElements() 

541 self._addWhereClause() 

542 if self._columns.isEmpty(): 

543 return EmptyQuery( 

544 self.summary.requested.universe, managers=self._managers, doomed_by=self._doomed_by 

545 ) 

546 return DirectQuery( 

547 graph=self.summary.requested, 

548 uniqueness=DirectQueryUniqueness.NOT_UNIQUE, 

549 whereRegion=self.summary.where.dataId.region, 

550 simpleQuery=self._simpleQuery, 

551 columns=self._columns, 

552 order_by_columns=self._order_by_columns(), 

553 limit=self.summary.limit, 

554 managers=self._managers, 

555 doomed_by=self._doomed_by, 

556 ) 

557 

558 def _order_by_columns(self) -> Iterable[OrderByColumn]: 

559 """Generate columns to be used for ORDER BY clause. 

560 

561 Returns 

562 ------- 

563 order_by_columns : `Iterable` [ `ColumnIterable` ] 

564 Sequence of columns to appear in ORDER BY clause. 

565 """ 

566 order_by_columns: List[OrderByColumn] = [] 

567 if not self.summary.order_by: 

568 return order_by_columns 

569 

570 for order_by_column in self.summary.order_by.order_by_columns: 

571 

572 column: sqlalchemy.sql.ColumnElement 

573 field_spec: Optional[ddl.FieldSpec] 

574 dimension: Optional[Dimension] = None 

575 if order_by_column.column is None: 

576 # dimension name, it has to be in SELECT list already, only 

577 # add it to ORDER BY 

578 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension" 

579 column = self._columns.getKeyColumn(order_by_column.element) 

580 add_to_select = False 

581 field_spec = None 

582 dimension = order_by_column.element 

583 else: 

584 table = self._elements[order_by_column.element] 

585 

586 if order_by_column.column in ("timespan.begin", "timespan.end"): 

587 TimespanReprClass = self._managers.TimespanReprClass 

588 timespan_repr = TimespanReprClass.fromSelectable(table) 

589 if order_by_column.column == "timespan.begin": 

590 column = timespan_repr.lower() 

591 label = f"{order_by_column.element.name}_timespan_begin" 

592 else: 

593 column = timespan_repr.upper() 

594 label = f"{order_by_column.element.name}_timespan_end" 

595 field_spec = ddl.FieldSpec(label, dtype=sqlalchemy.BigInteger, nullable=True) 

596 else: 

597 column = table.columns[order_by_column.column] 

598 # make a unique label for it 

599 label = f"{order_by_column.element.name}_{order_by_column.column}" 

600 field_spec = order_by_column.element.RecordClass.fields.facts[order_by_column.column] 

601 field_spec = dataclasses.replace(field_spec, name=label) 

602 

603 column = column.label(label) 

604 add_to_select = True 

605 

606 order_by_columns.append( 

607 OrderByColumn( 

608 column=column, 

609 ordering=order_by_column.ordering, 

610 add_to_select=add_to_select, 

611 field_spec=field_spec, 

612 dimension=dimension, 

613 ) 

614 ) 

615 

616 return order_by_columns