Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 9%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

189 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBuilder",) 

24 

25import dataclasses 

26from typing import AbstractSet, Any, Iterable, List, Optional 

27 

28import sqlalchemy.sql 

29 

30from ...core import ( 

31 DimensionElement, 

32 SkyPixDimension, 

33 Dimension, 

34 DatasetType, 

35 SimpleQuery, 

36) 

37 

38from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet 

39from ...core import ddl 

40 

41from .._collectionType import CollectionType 

42from ._structs import QuerySummary, QueryColumns, DatasetQueryColumns, RegistryManagers 

43from .expressions import convertExpressionToSql 

44from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query 

45from ..wildcards import CollectionSearch, CollectionQuery 

46 

47 

48class QueryBuilder: 

49 """A builder for potentially complex queries that join tables based 

50 on dimension relationships. 

51 

52 Parameters 

53 ---------- 

54 summary : `QuerySummary` 

55 Struct organizing the dimensions involved in the query. 

56 managers : `RegistryManagers` 

57 A struct containing the registry manager instances used by the query 

58 system. 

59 doomed_by : `Iterable` [ `str` ], optional 

60 A list of messages (appropriate for e.g. logging or exceptions) that 

61 explain why the query is known to return no results even before it is 

62 executed. Queries with a non-empty list will never be executed. 

63 """ 

64 def __init__(self, summary: QuerySummary, managers: RegistryManagers, doomed_by: Iterable[str] = ()): 

65 self.summary = summary 

66 self._simpleQuery = SimpleQuery() 

67 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict() 

68 self._columns = QueryColumns() 

69 self._managers = managers 

70 self._doomed_by = list(doomed_by) 

71 

72 def hasDimensionKey(self, dimension: Dimension) -> bool: 

73 """Return `True` if the given dimension's primary key column has 

74 been included in the query (possibly via a foreign key column on some 

75 other table). 

76 """ 

77 return dimension in self._columns.keys 

78 

79 def joinDimensionElement(self, element: DimensionElement) -> None: 

80 """Add the table for a `DimensionElement` to the query. 

81 

82 This automatically joins the element table to all other tables in the 

83 query with which it is related, via both dimension keys and spatial 

84 and temporal relationships. 

85 

86 External calls to this method should rarely be necessary; `finish` will 

87 automatically call it if the `DimensionElement` has been identified as 

88 one that must be included. 

89 

90 Parameters 

91 ---------- 

92 element : `DimensionElement` 

93 Element for which a table should be added. The element must be 

94 associated with a database table (see `DimensionElement.hasTable`). 

95 """ 

96 assert element not in self._elements, "Element already included in query." 

97 storage = self._managers.dimensions[element] 

98 fromClause = storage.join( 

99 self, 

100 regions=self._columns.regions if element in self.summary.spatial else None, 

101 timespans=self._columns.timespans if element in self.summary.temporal else None, 

102 ) 

103 self._elements[element] = fromClause 

104 

105 def joinDataset(self, datasetType: DatasetType, collections: Any, *, 

106 isResult: bool = True, findFirst: bool = False) -> bool: 

107 """Add a dataset search or constraint to the query. 

108 

109 Unlike other `QueryBuilder` join methods, this *must* be called 

110 directly to search for datasets of a particular type or constrain the 

111 query results based on the exists of datasets. However, all dimensions 

112 used to identify the dataset type must have already been included in 

113 `QuerySummary.requested` when initializing the `QueryBuilder`. 

114 

115 Parameters 

116 ---------- 

117 datasetType : `DatasetType` 

118 The type of datasets to search for. 

119 collections : `Any` 

120 An expression that fully or partially identifies the collections 

121 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

122 thereof. `...` can be used to return all collections. See 

123 :ref:`daf_butler_collection_expressions` for more information. 

124 isResult : `bool`, optional 

125 If `True` (default), include the dataset ID column in the 

126 result columns of the query, allowing complete `DatasetRef` 

127 instances to be produced from the query results for this dataset 

128 type. If `False`, the existence of datasets of this type is used 

129 only to constrain the data IDs returned by the query. 

130 `joinDataset` may be called with ``isResult=True`` at most one time 

131 on a particular `QueryBuilder` instance. 

132 findFirst : `bool`, optional 

133 If `True` (`False` is default), only include the first match for 

134 each data ID, searching the given collections in order. Requires 

135 that all entries in ``collections`` be regular strings, so there is 

136 a clear search order. Ignored if ``isResult`` is `False`. 

137 

138 Returns 

139 ------- 

140 anyRecords : `bool` 

141 If `True`, joining the dataset table was successful and the query 

142 should proceed. If `False`, we were able to determine (from the 

143 combination of ``datasetType`` and ``collections``) that there 

144 would be no results joined in from this dataset, and hence (due to 

145 the inner join that would normally be present), the full query will 

146 return no results. 

147 """ 

148 assert datasetType in self.summary.datasets 

149 if isResult and findFirst: 

150 collections = CollectionSearch.fromExpression(collections) 

151 else: 

152 collections = CollectionQuery.fromExpression(collections) 

153 explicitCollections = frozenset(collections.explicitNames()) 

154 # If we are searching all collections with no constraints, loop over 

155 # RUN collections only, because that will include all datasets. 

156 collectionTypes: AbstractSet[CollectionType] 

157 if collections == CollectionQuery(): 

158 collectionTypes = {CollectionType.RUN} 

159 else: 

160 collectionTypes = CollectionType.all() 

161 datasetRecordStorage = self._managers.datasets.find(datasetType.name) 

162 if datasetRecordStorage is None: 

163 # Unrecognized dataset type means no results. It might be better 

164 # to raise here, but this is consistent with previous behavior, 

165 # which is expected by QuantumGraph generation code in pipe_base. 

166 self._doomed_by.append( 

167 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in " 

168 "any collection." 

169 ) 

170 return False 

171 subsubqueries = [] 

172 runKeyName = self._managers.collections.getRunForeignKeyName() 

173 baseColumnNames = {"id", runKeyName, "ingest_date"} if isResult else set() 

174 baseColumnNames.update(datasetType.dimensions.required.names) 

175 if not findFirst: 

176 calibration_collections = [] 

177 other_collections = [] 

178 rejections: List[str] = [] 

179 for rank, collectionRecord in enumerate(collections.iter(self._managers.collections, 

180 collectionTypes=collectionTypes)): 

181 # Only include collections that (according to collection summaries) 

182 # might have datasets of this type and governor dimensions 

183 # consistent with the query's WHERE clause. 

184 collection_summary = self._managers.datasets.getCollectionSummary(collectionRecord) 

185 if not collection_summary.is_compatible_with( 

186 datasetType, 

187 self.summary.where.restriction, 

188 rejections=rejections, 

189 name=collectionRecord.name, 

190 ): 

191 continue 

192 if collectionRecord.type is CollectionType.CALIBRATION: 

193 # If collection name was provided explicitly then say sorry, 

194 # otherwise collection is a part of chained one and we skip it. 

195 if datasetType.isCalibration() and collectionRecord.name in explicitCollections: 

196 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal: 

197 raise NotImplementedError( 

198 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type " 

199 f"collection '{collectionRecord.name}' is not yet supported." 

200 ) 

201 elif findFirst: 

202 raise NotImplementedError( 

203 f"Find-first query for dataset type '{datasetType.name}' in CALIBRATION-type " 

204 f"collection '{collectionRecord.name}' is not yet supported." 

205 ) 

206 else: 

207 calibration_collections.append(collectionRecord) 

208 else: 

209 # We can never find a non-calibration dataset in a 

210 # CALIBRATION collection. 

211 rejections.append( 

212 f"Not searching for non-calibration dataset {datasetType.name!r} " 

213 f"in CALIBRATION collection {collectionRecord.name!r}." 

214 ) 

215 continue 

216 elif findFirst: 

217 # If findFirst=True, each collection gets its own subquery so 

218 # we can add a literal rank for it. 

219 ssq = datasetRecordStorage.select( 

220 collectionRecord, 

221 dataId=SimpleQuery.Select, 

222 id=SimpleQuery.Select if isResult else None, 

223 run=SimpleQuery.Select if isResult else None, 

224 ingestDate=SimpleQuery.Select if isResult else None, 

225 ) 

226 assert {c.name for c in ssq.columns} == baseColumnNames 

227 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

228 subsubqueries.append(ssq.combine()) 

229 else: 

230 # If findFirst=False, we have one subquery for all CALIBRATION 

231 # collections and one subquery for all other collections; we'll 

232 # assemble those later after grouping by collection type. 

233 other_collections.append(collectionRecord) 

234 if not findFirst: 

235 if other_collections: 

236 ssq = datasetRecordStorage.select( 

237 *other_collections, 

238 dataId=SimpleQuery.Select, 

239 id=SimpleQuery.Select if isResult else None, 

240 run=SimpleQuery.Select if isResult else None, 

241 ingestDate=SimpleQuery.Select if isResult else None, 

242 ) 

243 subsubqueries.append(ssq.combine()) 

244 if calibration_collections: 

245 ssq = datasetRecordStorage.select( 

246 *calibration_collections, 

247 dataId=SimpleQuery.Select, 

248 id=SimpleQuery.Select if isResult else None, 

249 run=SimpleQuery.Select if isResult else None, 

250 ingestDate=SimpleQuery.Select if isResult else None, 

251 ) 

252 subsubqueries.append(ssq.combine()) 

253 if not subsubqueries: 

254 if rejections: 

255 self._doomed_by.extend(rejections) 

256 else: 

257 self._doomed_by.append(f"No collections to search matching expression {collections}.") 

258 # Make a single subquery with no collections that never yields 

259 # results; this should never get executed, but downstream code 

260 # still needs to access the SQLAlchemy column objects. 

261 ssq = datasetRecordStorage.select( 

262 dataId=SimpleQuery.Select, 

263 id=SimpleQuery.Select if isResult else None, 

264 run=SimpleQuery.Select if isResult else None, 

265 ingestDate=SimpleQuery.Select if isResult else None, 

266 ) 

267 if findFirst: 

268 ssq.columns.append(sqlalchemy.sql.literal(rank).label("rank")) 

269 subsubqueries.append(ssq.combine()) 

270 # Although one would expect that these subqueries can be 

271 # UNION ALL instead of UNION because each subquery is already 

272 # distinct, it turns out that with many 

273 # subqueries this causes catastrophic performance problems 

274 # with both sqlite and postgres. Using UNION may require 

275 # more table scans, but a much simpler query plan given our 

276 # table structures. See DM-31429. 

277 subquery = sqlalchemy.sql.union(*subsubqueries) 

278 columns: Optional[DatasetQueryColumns] = None 

279 if isResult: 

280 if findFirst: 

281 # Rewrite the subquery (currently a UNION ALL over 

282 # per-collection subsubqueries) to select the rows with the 

283 # lowest rank per data ID. The block below will set subquery 

284 # to something like this: 

285 # 

286 # WITH {dst}_search AS ( 

287 # SELECT {data-id-cols}, id, run_id, 1 AS rank 

288 # FROM <collection1> 

289 # UNION ALL 

290 # SELECT {data-id-cols}, id, run_id, 2 AS rank 

291 # FROM <collection2> 

292 # UNION ALL 

293 # ... 

294 # ) 

295 # SELECT 

296 # {dst}_window.{data-id-cols}, 

297 # {dst}_window.id, 

298 # {dst}_window.run_id 

299 # FROM ( 

300 # SELECT 

301 # {dst}_search.{data-id-cols}, 

302 # {dst}_search.id, 

303 # {dst}_search.run_id, 

304 # ROW_NUMBER() OVER ( 

305 # PARTITION BY {dst_search}.{data-id-cols} 

306 # ORDER BY rank 

307 # ) AS rownum 

308 # ) {dst}_window 

309 # WHERE 

310 # {dst}_window.rownum = 1; 

311 # 

312 search = subquery.cte(f"{datasetType.name}_search") 

313 windowDataIdCols = [ 

314 search.columns[name].label(name) for name in datasetType.dimensions.required.names 

315 ] 

316 windowSelectCols = [ 

317 search.columns["id"].label("id"), 

318 search.columns[runKeyName].label(runKeyName), 

319 search.columns["ingest_date"].label("ingest_date"), 

320 ] 

321 windowSelectCols += windowDataIdCols 

322 assert {c.name for c in windowSelectCols} == baseColumnNames 

323 windowSelectCols.append( 

324 sqlalchemy.sql.func.row_number().over( 

325 partition_by=windowDataIdCols, 

326 order_by=search.columns["rank"] 

327 ).label("rownum") 

328 ) 

329 window = sqlalchemy.sql.select( 

330 *windowSelectCols 

331 ).select_from(search).alias( 

332 f"{datasetType.name}_window" 

333 ) 

334 subquery = sqlalchemy.sql.select( 

335 *[window.columns[name].label(name) for name in baseColumnNames] 

336 ).select_from( 

337 window 

338 ).where( 

339 window.columns["rownum"] == 1 

340 ).alias(datasetType.name) 

341 else: 

342 subquery = subquery.alias(datasetType.name) 

343 columns = DatasetQueryColumns( 

344 datasetType=datasetType, 

345 id=subquery.columns["id"], 

346 runKey=subquery.columns[runKeyName], 

347 ingestDate=subquery.columns["ingest_date"], 

348 ) 

349 else: 

350 subquery = subquery.alias(datasetType.name) 

351 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns) 

352 return not self._doomed_by 

353 

354 def joinTable(self, table: sqlalchemy.sql.FromClause, dimensions: NamedValueAbstractSet[Dimension], *, 

355 datasets: Optional[DatasetQueryColumns] = None) -> None: 

356 """Join an arbitrary table to the query via dimension relationships. 

357 

358 External calls to this method should only be necessary for tables whose 

359 records represent neither datasets nor dimension elements. 

360 

361 Parameters 

362 ---------- 

363 table : `sqlalchemy.sql.FromClause` 

364 SQLAlchemy object representing the logical table (which may be a 

365 join or subquery expression) to be joined. 

366 dimensions : iterable of `Dimension` 

367 The dimensions that relate this table to others that may be in the 

368 query. The table must have columns with the names of the 

369 dimensions. 

370 datasets : `DatasetQueryColumns`, optional 

371 Columns that identify a dataset that is part of the query results. 

372 """ 

373 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions) 

374 unexpectedDimensions.discard(self.summary.universe.commonSkyPix) 

375 if unexpectedDimensions: 

376 raise NotImplementedError( 

377 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that " 

378 f"were not provided originally to the QuerySummary object passed at construction." 

379 ) 

380 joinOn = self.startJoin(table, dimensions, dimensions.names) 

381 self.finishJoin(table, joinOn) 

382 if datasets is not None: 

383 assert self._columns.datasets is None, \ 

384 "At most one result dataset type can be returned by a query." 

385 self._columns.datasets = datasets 

386 

387 def startJoin(self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], 

388 columnNames: Iterable[str] 

389 ) -> List[sqlalchemy.sql.ColumnElement]: 

390 """Begin a join on dimensions. 

391 

392 Must be followed by call to `finishJoin`. 

393 

394 Parameters 

395 ---------- 

396 table : `sqlalchemy.sql.FromClause` 

397 SQLAlchemy object representing the logical table (which may be a 

398 join or subquery expression) to be joined. 

399 dimensions : iterable of `Dimension` 

400 The dimensions that relate this table to others that may be in the 

401 query. The table must have columns with the names of the 

402 dimensions. 

403 columnNames : iterable of `str` 

404 Names of the columns that correspond to dimension key values; must 

405 be `zip` iterable with ``dimensions``. 

406 

407 Returns 

408 ------- 

409 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

410 Sequence of boolean expressions that should be combined with AND 

411 to form (part of) the ON expression for this JOIN. 

412 """ 

413 joinOn = [] 

414 for dimension, columnName in zip(dimensions, columnNames): 

415 columnInTable = table.columns[columnName] 

416 columnsInQuery = self._columns.keys.setdefault(dimension, []) 

417 for columnInQuery in columnsInQuery: 

418 joinOn.append(columnInQuery == columnInTable) 

419 columnsInQuery.append(columnInTable) 

420 return joinOn 

421 

422 def finishJoin(self, table: sqlalchemy.sql.FromClause, joinOn: List[sqlalchemy.sql.ColumnElement] 

423 ) -> None: 

424 """Complete a join on dimensions. 

425 

426 Must be preceded by call to `startJoin`. 

427 

428 Parameters 

429 ---------- 

430 table : `sqlalchemy.sql.FromClause` 

431 SQLAlchemy object representing the logical table (which may be a 

432 join or subquery expression) to be joined. Must be the same object 

433 passed to `startJoin`. 

434 joinOn : `list` of `sqlalchemy.sql.ColumnElement` 

435 Sequence of boolean expressions that should be combined with AND 

436 to form (part of) the ON expression for this JOIN. Should include 

437 at least the elements of the list returned by `startJoin`. 

438 """ 

439 onclause: Optional[sqlalchemy.sql.ColumnElement] 

440 if len(joinOn) == 0: 

441 onclause = None 

442 elif len(joinOn) == 1: 

443 onclause = joinOn[0] 

444 else: 

445 onclause = sqlalchemy.sql.and_(*joinOn) 

446 self._simpleQuery.join(table, onclause=onclause) 

447 

448 def _joinMissingDimensionElements(self) -> None: 

449 """Join all dimension element tables that were identified as necessary 

450 by `QuerySummary` and have not yet been joined. 

451 

452 For internal use by `QueryBuilder` only; will be called (and should 

453 only by called) by `finish`. 

454 """ 

455 # Join all DimensionElement tables that we need for spatial/temporal 

456 # joins/filters or a nontrivial WHERE expression. 

457 # We iterate over these in *reverse* topological order to minimize the 

458 # number of tables joined. For example, the "visit" table provides 

459 # the primary key value for the "instrument" table it depends on, so we 

460 # don't need to join "instrument" as well unless we had a nontrivial 

461 # expression on it (and hence included it already above). 

462 for element in self.summary.universe.sorted(self.summary.mustHaveTableJoined, reverse=True): 

463 self.joinDimensionElement(element) 

464 # Join in any requested Dimension tables that don't already have their 

465 # primary keys identified by the query. 

466 for dimension in self.summary.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True): 

467 if dimension not in self._columns.keys: 

468 self.joinDimensionElement(dimension) 

469 

470 def _addWhereClause(self) -> None: 

471 """Add a WHERE clause to the query under construction, connecting all 

472 joined dimensions to the expression and data ID dimensions from 

473 `QuerySummary`. 

474 

475 For internal use by `QueryBuilder` only; will be called (and should 

476 only by called) by `finish`. 

477 """ 

478 if self.summary.where.tree is not None: 

479 self._simpleQuery.where.append( 

480 convertExpressionToSql( 

481 self.summary.where.tree, 

482 self.summary.universe, 

483 columns=self._columns, 

484 elements=self._elements, 

485 bind=self.summary.where.bind, 

486 TimespanReprClass=self._managers.TimespanReprClass, 

487 ) 

488 ) 

489 for dimension, columnsInQuery in self._columns.keys.items(): 

490 if dimension in self.summary.where.dataId.graph: 

491 givenKey = self.summary.where.dataId[dimension] 

492 # Add a WHERE term for each column that corresponds to each 

493 # key. This is redundant with the JOIN ON clauses that make 

494 # them equal to each other, but more constraints have a chance 

495 # of making things easier on the DB's query optimizer. 

496 for columnInQuery in columnsInQuery: 

497 self._simpleQuery.where.append(columnInQuery == givenKey) 

498 else: 

499 # Dimension is not fully identified, but it might be a skypix 

500 # dimension that's constrained by a given region. 

501 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension): 

502 # We know the region now. 

503 givenSkyPixIds: List[int] = [] 

504 for begin, end in dimension.pixelization.envelope(self.summary.where.region): 

505 givenSkyPixIds.extend(range(begin, end)) 

506 for columnInQuery in columnsInQuery: 

507 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds)) 

508 # If we are given an dataId with a timespan, and there are one or more 

509 # timespans in the query that aren't given, add a WHERE expression for 

510 # each of them. 

511 if self.summary.where.dataId.graph.temporal and self.summary.temporal: 

512 # Timespan is known now. 

513 givenInterval = self.summary.where.dataId.timespan 

514 assert givenInterval is not None 

515 for element, intervalInQuery in self._columns.timespans.items(): 

516 assert element not in self.summary.where.dataId.graph.elements 

517 self._simpleQuery.where.append( 

518 intervalInQuery.overlaps(self._managers.TimespanReprClass.fromLiteral(givenInterval)) 

519 ) 

520 

521 def finish(self, joinMissing: bool = True) -> Query: 

522 """Finish query constructing, returning a new `Query` instance. 

523 

524 Parameters 

525 ---------- 

526 joinMissing : `bool`, optional 

527 If `True` (default), automatically join any missing dimension 

528 element tables (according to the categorization of the 

529 `QuerySummary` the builder was constructed with). `False` should 

530 only be passed if the caller can independently guarantee that all 

531 dimension relationships are already captured in non-dimension 

532 tables that have been manually included in the query. 

533 

534 Returns 

535 ------- 

536 query : `Query` 

537 A `Query` object that can be executed and used to interpret result 

538 rows. 

539 """ 

540 if joinMissing: 

541 self._joinMissingDimensionElements() 

542 self._addWhereClause() 

543 if self._columns.isEmpty(): 

544 return EmptyQuery(self.summary.requested.universe, managers=self._managers, 

545 doomed_by=self._doomed_by) 

546 return DirectQuery(graph=self.summary.requested, 

547 uniqueness=DirectQueryUniqueness.NOT_UNIQUE, 

548 whereRegion=self.summary.where.dataId.region, 

549 simpleQuery=self._simpleQuery, 

550 columns=self._columns, 

551 order_by_columns=self._order_by_columns(), 

552 limit=self.summary.limit, 

553 managers=self._managers, 

554 doomed_by=self._doomed_by) 

555 

556 def _order_by_columns(self) -> Iterable[OrderByColumn]: 

557 """Generate columns to be used for ORDER BY clause. 

558 

559 Returns 

560 ------- 

561 order_by_columns : `Iterable` [ `ColumnIterable` ] 

562 Sequence of columns to appear in ORDER BY clause. 

563 """ 

564 order_by_columns: List[OrderByColumn] = [] 

565 if not self.summary.order_by: 

566 return order_by_columns 

567 

568 for order_by_column in self.summary.order_by.order_by_columns: 

569 

570 column: sqlalchemy.sql.ColumnElement 

571 field_spec: Optional[ddl.FieldSpec] 

572 dimension: Optional[Dimension] = None 

573 if order_by_column.column is None: 

574 # dimension name, it has to be in SELECT list already, only 

575 # add it to ORDER BY 

576 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension" 

577 column = self._columns.getKeyColumn(order_by_column.element) 

578 add_to_select = False 

579 field_spec = None 

580 dimension = order_by_column.element 

581 else: 

582 table = self._elements[order_by_column.element] 

583 

584 if order_by_column.column in ("timespan.begin", "timespan.end"): 

585 TimespanReprClass = self._managers.TimespanReprClass 

586 timespan_repr = TimespanReprClass.fromSelectable(table) 

587 if order_by_column.column == "timespan.begin": 

588 column = timespan_repr.lower() 

589 label = f"{order_by_column.element.name}_timespan_begin" 

590 else: 

591 column = timespan_repr.upper() 

592 label = f"{order_by_column.element.name}_timespan_end" 

593 field_spec = ddl.FieldSpec(label, dtype=sqlalchemy.BigInteger, nullable=True) 

594 else: 

595 column = table.columns[order_by_column.column] 

596 # make a unique label for it 

597 label = f"{order_by_column.element.name}_{order_by_column.column}" 

598 field_spec = order_by_column.element.RecordClass.fields.facts[order_by_column.column] 

599 field_spec = dataclasses.replace(field_spec, name=label) 

600 

601 column = column.label(label) 

602 add_to_select = True 

603 

604 order_by_columns.append( 

605 OrderByColumn(column=column, ordering=order_by_column.ordering, 

606 add_to_select=add_to_select, field_spec=field_spec, 

607 dimension=dimension) 

608 ) 

609 

610 return order_by_columns