Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 12%
184 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBuilder",)
25from collections.abc import Iterable, Set
26from typing import Any
28import sqlalchemy.sql
30from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension
31from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet
32from .._collectionType import CollectionType
33from .._exceptions import DataIdValueError
34from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage
35from ..wildcards import CollectionQuery, CollectionSearch
36from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query
37from ._query_backend import QueryBackend
38from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary
39from .expressions import convertExpressionToSql
42class QueryBuilder:
43 """A builder for potentially complex queries that join tables based
44 on dimension relationships.
46 Parameters
47 ----------
48 summary : `QuerySummary`
49 Struct organizing the dimensions involved in the query.
50 backend : `QueryBackend`
51 Backend object that represents the `Registry` implementation.
52 doomed_by : `Iterable` [ `str` ], optional
53 A list of messages (appropriate for e.g. logging or exceptions) that
54 explain why the query is known to return no results even before it is
55 executed. Queries with a non-empty list will never be executed.
56 """
58 def __init__(
59 self,
60 summary: QuerySummary,
61 backend: QueryBackend,
62 doomed_by: Iterable[str] = (),
63 ):
64 self.summary = summary
65 self._backend = backend
66 self._simpleQuery = SimpleQuery()
67 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict()
68 self._columns = QueryColumns()
69 self._doomed_by = list(doomed_by)
71 self._validateGovernors()
73 def _validateGovernors(self) -> None:
74 """Check that governor dimensions specified by query actually exist.
76 This helps to avoid mistakes in governor values. It also implements
77 consistent failure behavior for cases when governor dimensions are
78 specified in either DataId ow WHERE clause.
80 Raises
81 ------
82 DataIdValueError
83 Raised when governor dimension values are not found.
84 """
85 for dimension, bounds in self.summary.where.governor_constraints.items():
86 storage = self._backend.managers.dimensions[self._backend.universe[dimension]]
87 if isinstance(storage, GovernorDimensionRecordStorage):
88 if not (storage.values >= bounds):
89 raise DataIdValueError(
90 f"Unknown values specified for governor dimension {dimension}: "
91 f"{set(bounds - storage.values)}."
92 )
94 def hasDimensionKey(self, dimension: Dimension) -> bool:
95 """Return `True` if the given dimension's primary key column has
96 been included in the query (possibly via a foreign key column on some
97 other table).
98 """
99 return dimension in self._columns.keys
101 def joinDimensionElement(self, element: DimensionElement) -> None:
102 """Add the table for a `DimensionElement` to the query.
104 This automatically joins the element table to all other tables in the
105 query with which it is related, via both dimension keys and spatial
106 and temporal relationships.
108 External calls to this method should rarely be necessary; `finish` will
109 automatically call it if the `DimensionElement` has been identified as
110 one that must be included.
112 Parameters
113 ----------
114 element : `DimensionElement`
115 Element for which a table should be added. The element must be
116 associated with a database table (see `DimensionElement.hasTable`).
117 """
118 assert element not in self._elements, "Element already included in query."
119 storage = self._backend.managers.dimensions[element]
120 fromClause = storage.join(
121 self,
122 regions=self._columns.regions if element in self.summary.spatial else None,
123 timespans=self._columns.timespans if element in self.summary.temporal else None,
124 )
125 self._elements[element] = fromClause
127 def joinDataset(
128 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False
129 ) -> bool:
130 """Add a dataset search or constraint to the query.
132 Unlike other `QueryBuilder` join methods, this *must* be called
133 directly to search for datasets of a particular type or constrain the
134 query results based on the exists of datasets. However, all dimensions
135 used to identify the dataset type must have already been included in
136 `QuerySummary.requested` when initializing the `QueryBuilder`.
138 Parameters
139 ----------
140 datasetType : `DatasetType`
141 The type of datasets to search for.
142 collections : `Any`
143 An expression that fully or partially identifies the collections
144 to search for datasets, such as a `str`, `re.Pattern`, or iterable
145 thereof. `...` can be used to return all collections. See
146 :ref:`daf_butler_collection_expressions` for more information.
147 isResult : `bool`, optional
148 If `True` (default), include the dataset ID column in the
149 result columns of the query, allowing complete `DatasetRef`
150 instances to be produced from the query results for this dataset
151 type. If `False`, the existence of datasets of this type is used
152 only to constrain the data IDs returned by the query.
153 `joinDataset` may be called with ``isResult=True`` at most one time
154 on a particular `QueryBuilder` instance.
155 findFirst : `bool`, optional
156 If `True` (`False` is default), only include the first match for
157 each data ID, searching the given collections in order. Requires
158 that all entries in ``collections`` be regular strings, so there is
159 a clear search order. Ignored if ``isResult`` is `False`.
161 Returns
162 -------
163 anyRecords : `bool`
164 If `True`, joining the dataset table was successful and the query
165 should proceed. If `False`, we were able to determine (from the
166 combination of ``datasetType`` and ``collections``) that there
167 would be no results joined in from this dataset, and hence (due to
168 the inner join that would normally be present), the full query will
169 return no results.
170 """
171 assert datasetType in self.summary.datasets
172 if isResult and findFirst:
173 collections = CollectionSearch.fromExpression(collections)
174 else:
175 collections = CollectionQuery.fromExpression(collections)
176 explicitCollections = frozenset(collections.explicitNames())
177 # If we are searching all collections with no constraints, loop over
178 # RUN collections only, because that will include all datasets.
179 collectionTypes: Set[CollectionType]
180 if collections == CollectionQuery():
181 collectionTypes = {CollectionType.RUN}
182 else:
183 collectionTypes = CollectionType.all()
184 datasetRecordStorage = self._backend.managers.datasets.find(datasetType.name)
185 if datasetRecordStorage is None:
186 # Unrecognized dataset type means no results. It might be better
187 # to raise here, but this is consistent with previous behavior,
188 # which is expected by QuantumGraph generation code in pipe_base.
189 self._doomed_by.append(
190 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in "
191 "any collection."
192 )
193 return False
194 collectionRecords: list[CollectionRecord] = []
195 rejections: list[str] = []
196 for collectionRecord in collections.iter(
197 self._backend.managers.collections, collectionTypes=collectionTypes
198 ):
199 # Only include collections that (according to collection summaries)
200 # might have datasets of this type and governor dimensions
201 # consistent with the query's WHERE clause.
202 collection_summary = self._backend.managers.datasets.getCollectionSummary(collectionRecord)
203 if not collection_summary.is_compatible_with(
204 datasetType,
205 self.summary.where.governor_constraints,
206 rejections=rejections,
207 name=collectionRecord.name,
208 ):
209 continue
210 if collectionRecord.type is CollectionType.CALIBRATION:
211 # If collection name was provided explicitly then say sorry if
212 # this is a kind of query we don't support yet; otherwise
213 # collection is a part of chained one or regex match and we
214 # skip it to not break queries of other included collections.
215 if datasetType.isCalibration():
216 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal:
217 if collectionRecord.name in explicitCollections:
218 raise NotImplementedError(
219 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type "
220 f"collection '{collectionRecord.name}' is not yet supported."
221 )
222 else:
223 rejections.append(
224 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
225 f"{collectionRecord.name!r} because temporal calibration queries aren't "
226 "implemented; this is not an error only because the query structure implies "
227 "that searching this collection may be incidental."
228 )
229 continue
230 elif findFirst:
231 if collectionRecord.name in explicitCollections:
232 raise NotImplementedError(
233 f"Find-first query for dataset type '{datasetType.name}' in "
234 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet "
235 "supported."
236 )
237 else:
238 rejections.append(
239 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
240 f"{collectionRecord.name!r} because find-first calibration queries aren't "
241 "implemented; this is not an error only because the query structure implies "
242 "that searching this collection may be incidental."
243 )
244 continue
245 else:
246 collectionRecords.append(collectionRecord)
247 else:
248 # We can never find a non-calibration dataset in a
249 # CALIBRATION collection.
250 rejections.append(
251 f"Not searching for non-calibration dataset {datasetType.name!r} "
252 f"in CALIBRATION collection {collectionRecord.name!r}."
253 )
254 continue
255 else:
256 collectionRecords.append(collectionRecord)
257 if isResult:
258 if findFirst:
259 subquery = self._build_dataset_search_subquery(
260 datasetRecordStorage,
261 collectionRecords,
262 )
263 else:
264 subquery = self._build_dataset_query_subquery(
265 datasetRecordStorage,
266 collectionRecords,
267 )
268 columns = DatasetQueryColumns(
269 datasetType=datasetType,
270 id=subquery.columns["id"],
271 runKey=subquery.columns[self._backend.managers.collections.getRunForeignKeyName()],
272 ingestDate=subquery.columns["ingest_date"],
273 )
274 else:
275 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords)
276 columns = None
277 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns)
278 if not collectionRecords:
279 if rejections:
280 self._doomed_by.extend(rejections)
281 else:
282 self._doomed_by.append(f"No collections to search matching expression {collections}.")
283 return False
284 return not self._doomed_by
286 def _build_dataset_constraint_subquery(
287 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
288 ) -> sqlalchemy.sql.FromClause:
289 """Internal helper method to build a dataset subquery for a parent
290 query that does not return dataset results.
292 Parameters
293 ----------
294 storage : `DatasetRecordStorage`
295 Storage object for the dataset type the subquery is for.
296 collections : `list` [ `CollectionRecord` ]
297 Records for the collections to be searched. Collections with no
298 datasets of this type or with governor dimensions incompatible with
299 the rest of the query should already have been filtered out.
300 `~CollectionType.CALIBRATION` collections should also be filtered
301 out if this is a temporal query.
303 Returns
304 -------
305 sql : `sqlalchemy.sql.FromClause`
306 A SQLAlchemy aliased subquery object. Has columns for each
307 dataset type dimension, or an unspecified column (just to prevent
308 SQL syntax errors) where there is no data ID.
309 """
310 return storage.select(
311 *collections,
312 dataId=SimpleQuery.Select,
313 # If this dataset type has no dimensions, we're in danger of
314 # generating an invalid subquery that has no columns in the
315 # SELECT clause. An easy fix is to just select some arbitrary
316 # column that goes unused, like the dataset ID.
317 id=None if storage.datasetType.dimensions else SimpleQuery.Select,
318 run=None,
319 ingestDate=None,
320 timespan=None,
321 ).alias(storage.datasetType.name)
323 def _build_dataset_query_subquery(
324 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
325 ) -> sqlalchemy.sql.FromClause:
326 """Internal helper method to build a dataset subquery for a parent
327 query that returns all matching dataset results.
329 Parameters
330 ----------
331 storage : `DatasetRecordStorage`
332 Storage object for the dataset type the subquery is for.
333 collections : `list` [ `CollectionRecord` ]
334 Records for the collections to be searched. Collections with no
335 datasets of this type or with governor dimensions incompatible with
336 the rest of the query should already have been filtered out.
337 `~CollectionType.CALIBRATION` collections should also be filtered
338 out if this is a temporal query.
340 Returns
341 -------
342 sql : `sqlalchemy.sql.FromClause`
343 A SQLAlchemy aliased subquery object. Has columns for each dataset
344 type dimension, the dataset ID, the `~CollectionType.RUN`
345 collection key, and the ingest date.
346 """
347 sql = storage.select(
348 *collections,
349 dataId=SimpleQuery.Select,
350 id=SimpleQuery.Select,
351 run=SimpleQuery.Select,
352 ingestDate=SimpleQuery.Select,
353 timespan=None,
354 ).alias(storage.datasetType.name)
355 return sql
357 def _build_dataset_search_subquery(
358 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
359 ) -> sqlalchemy.sql.FromClause:
360 """Internal helper method to build a dataset subquery for a parent
361 query that returns the first matching dataset for each data ID and
362 dataset type name from an ordered list of collections.
364 Parameters
365 ----------
366 storage : `DatasetRecordStorage`
367 Storage object for the dataset type the subquery is for.
368 collections : `list` [ `CollectionRecord` ]
369 Records for the collections to be searched. Collections with no
370 datasets of this type or with governor dimensions incompatible with
371 the rest of the query should already have been filtered out.
372 `~CollectionType.CALIBRATION` collections should be filtered out as
373 well.
375 Returns
376 -------
377 sql : `sqlalchemy.sql.FromClause`
378 A SQLAlchemy aliased subquery object. Has columns for each dataset
379 type dimension, the dataset ID, the `~CollectionType.RUN`
380 collection key, and the ingest date.
381 """
382 # Query-simplification shortcut: if there is only one collection, a
383 # find-first search is just a regular result subquery. Same is true
384 # if this is a doomed query with no collections to search.
385 if len(collections) <= 1:
386 return self._build_dataset_query_subquery(storage, collections)
387 # In the more general case, we build a subquery of the form below to
388 # search the collections in order.
389 #
390 # WITH {dst}_search AS (
391 # SELECT {data-id-cols}, id, run_id, 1 AS rank
392 # FROM <collection1>
393 # UNION ALL
394 # SELECT {data-id-cols}, id, run_id, 2 AS rank
395 # FROM <collection2>
396 # UNION ALL
397 # ...
398 # )
399 # SELECT
400 # {dst}_window.{data-id-cols},
401 # {dst}_window.id,
402 # {dst}_window.run_id
403 # FROM (
404 # SELECT
405 # {dst}_search.{data-id-cols},
406 # {dst}_search.id,
407 # {dst}_search.run_id,
408 # ROW_NUMBER() OVER (
409 # PARTITION BY {dst_search}.{data-id-cols}
410 # ORDER BY rank
411 # ) AS rownum
412 # ) {dst}_window
413 # WHERE
414 # {dst}_window.rownum = 1;
415 #
416 # We'll start with the Common Table Expression (CTE) at the top.
417 search = storage.select(
418 *collections,
419 dataId=SimpleQuery.Select,
420 id=SimpleQuery.Select,
421 run=SimpleQuery.Select,
422 ingestDate=SimpleQuery.Select,
423 timespan=None,
424 rank=SimpleQuery.Select,
425 ).cte(f"{storage.datasetType.name}_search")
426 # Now we fill out the SELECT from the CTE, and the subquery it contains
427 # (at the same time, since they have the same columns, aside from the
428 # OVER clause).
429 run_key_name = self._backend.managers.collections.getRunForeignKeyName()
430 window_data_id_cols = [
431 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names
432 ]
433 window_select_cols = [
434 search.columns["id"].label("id"),
435 search.columns[run_key_name].label(run_key_name),
436 search.columns["ingest_date"].label("ingest_date"),
437 ]
438 window_select_cols += window_data_id_cols
439 window_select_cols.append(
440 sqlalchemy.sql.func.row_number()
441 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"])
442 .label("rownum")
443 )
444 window = (
445 sqlalchemy.sql.select(*window_select_cols)
446 .select_from(search)
447 .alias(f"{storage.datasetType.name}_window")
448 )
449 sql = (
450 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols])
451 .select_from(window)
452 .where(window.columns["rownum"] == 1)
453 .alias(storage.datasetType.name)
454 )
455 return sql
457 def joinTable(
458 self,
459 table: sqlalchemy.sql.FromClause,
460 dimensions: NamedValueAbstractSet[Dimension],
461 *,
462 datasets: DatasetQueryColumns | None = None,
463 ) -> None:
464 """Join an arbitrary table to the query via dimension relationships.
466 External calls to this method should only be necessary for tables whose
467 records represent neither datasets nor dimension elements.
469 Parameters
470 ----------
471 table : `sqlalchemy.sql.FromClause`
472 SQLAlchemy object representing the logical table (which may be a
473 join or subquery expression) to be joined.
474 dimensions : iterable of `Dimension`
475 The dimensions that relate this table to others that may be in the
476 query. The table must have columns with the names of the
477 dimensions.
478 datasets : `DatasetQueryColumns`, optional
479 Columns that identify a dataset that is part of the query results.
480 """
481 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions)
482 unexpectedDimensions.discard(self._backend.universe.commonSkyPix)
483 if unexpectedDimensions:
484 raise NotImplementedError(
485 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that "
486 f"were not provided originally to the QuerySummary object passed at construction."
487 )
488 joinOn = self.startJoin(table, dimensions, dimensions.names)
489 self.finishJoin(table, joinOn)
490 if datasets is not None:
491 assert (
492 self._columns.datasets is None
493 ), "At most one result dataset type can be returned by a query."
494 self._columns.datasets = datasets
496 def startJoin(
497 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str]
498 ) -> list[sqlalchemy.sql.ColumnElement]:
499 """Begin a join on dimensions.
501 Must be followed by call to `finishJoin`.
503 Parameters
504 ----------
505 table : `sqlalchemy.sql.FromClause`
506 SQLAlchemy object representing the logical table (which may be a
507 join or subquery expression) to be joined.
508 dimensions : iterable of `Dimension`
509 The dimensions that relate this table to others that may be in the
510 query. The table must have columns with the names of the
511 dimensions.
512 columnNames : iterable of `str`
513 Names of the columns that correspond to dimension key values; must
514 be `zip` iterable with ``dimensions``.
516 Returns
517 -------
518 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
519 Sequence of boolean expressions that should be combined with AND
520 to form (part of) the ON expression for this JOIN.
521 """
522 joinOn = []
523 for dimension, columnName in zip(dimensions, columnNames):
524 columnInTable = table.columns[columnName]
525 columnsInQuery = self._columns.keys.setdefault(dimension, [])
526 for columnInQuery in columnsInQuery:
527 joinOn.append(columnInQuery == columnInTable)
528 columnsInQuery.append(columnInTable)
529 return joinOn
531 def finishJoin(
532 self, table: sqlalchemy.sql.FromClause, joinOn: list[sqlalchemy.sql.ColumnElement]
533 ) -> None:
534 """Complete a join on dimensions.
536 Must be preceded by call to `startJoin`.
538 Parameters
539 ----------
540 table : `sqlalchemy.sql.FromClause`
541 SQLAlchemy object representing the logical table (which may be a
542 join or subquery expression) to be joined. Must be the same object
543 passed to `startJoin`.
544 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
545 Sequence of boolean expressions that should be combined with AND
546 to form (part of) the ON expression for this JOIN. Should include
547 at least the elements of the list returned by `startJoin`.
548 """
549 onclause: sqlalchemy.sql.ColumnElement | None
550 if len(joinOn) == 0:
551 onclause = None
552 elif len(joinOn) == 1:
553 onclause = joinOn[0]
554 else:
555 onclause = sqlalchemy.sql.and_(*joinOn)
556 self._simpleQuery.join(table, onclause=onclause)
558 def _joinMissingDimensionElements(self) -> None:
559 """Join all dimension element tables that were identified as necessary
560 by `QuerySummary` and have not yet been joined.
562 For internal use by `QueryBuilder` only; will be called (and should
563 only by called) by `finish`.
564 """
565 # Join all DimensionElement tables that we need for spatial/temporal
566 # joins/filters or a nontrivial WHERE expression.
567 # We iterate over these in *reverse* topological order to minimize the
568 # number of tables joined. For example, the "visit" table provides
569 # the primary key value for the "instrument" table it depends on, so we
570 # don't need to join "instrument" as well unless we had a nontrivial
571 # expression on it (and hence included it already above).
572 for element in self._backend.universe.sorted(self.summary.mustHaveTableJoined, reverse=True):
573 self.joinDimensionElement(element)
574 # Join in any requested Dimension tables that don't already have their
575 # primary keys identified by the query.
576 for dimension in self._backend.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True):
577 if dimension not in self._columns.keys:
578 self.joinDimensionElement(dimension)
580 def _addWhereClause(self) -> None:
581 """Add a WHERE clause to the query under construction, connecting all
582 joined dimensions to the expression and data ID dimensions from
583 `QuerySummary`.
585 For internal use by `QueryBuilder` only; will be called (and should
586 only by called) by `finish`.
587 """
588 if self.summary.where.tree is not None:
589 self._simpleQuery.where.append(
590 convertExpressionToSql(
591 self.summary.where.tree,
592 self._backend.universe,
593 columns=self._columns,
594 elements=self._elements,
595 bind=self.summary.where.bind,
596 TimespanReprClass=self._backend.managers.column_types.timespan_cls,
597 )
598 )
599 for dimension, columnsInQuery in self._columns.keys.items():
600 if dimension in self.summary.where.dataId.graph:
601 givenKey = self.summary.where.dataId[dimension]
602 # Add a WHERE term for each column that corresponds to each
603 # key. This is redundant with the JOIN ON clauses that make
604 # them equal to each other, but more constraints have a chance
605 # of making things easier on the DB's query optimizer.
606 for columnInQuery in columnsInQuery:
607 self._simpleQuery.where.append(columnInQuery == givenKey)
608 else:
609 # Dimension is not fully identified, but it might be a skypix
610 # dimension that's constrained by a given region.
611 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension):
612 # We know the region now.
613 givenSkyPixIds: list[int] = []
614 for begin, end in dimension.pixelization.envelope(self.summary.where.region):
615 givenSkyPixIds.extend(range(begin, end))
616 for columnInQuery in columnsInQuery:
617 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds))
618 # If we are given an dataId with a timespan, and there are one or more
619 # timespans in the query that aren't given, add a WHERE expression for
620 # each of them.
621 if self.summary.where.dataId.graph.temporal and self.summary.temporal:
622 # Timespan is known now.
623 givenInterval = self.summary.where.dataId.timespan
624 assert givenInterval is not None
625 for element, intervalInQuery in self._columns.timespans.items():
626 assert element not in self.summary.where.dataId.graph.elements
627 self._simpleQuery.where.append(
628 intervalInQuery.overlaps(
629 self._backend.managers.column_types.timespan_cls.fromLiteral(givenInterval)
630 )
631 )
633 def finish(self, joinMissing: bool = True) -> Query:
634 """Finish query constructing, returning a new `Query` instance.
636 Parameters
637 ----------
638 joinMissing : `bool`, optional
639 If `True` (default), automatically join any missing dimension
640 element tables (according to the categorization of the
641 `QuerySummary` the builder was constructed with). `False` should
642 only be passed if the caller can independently guarantee that all
643 dimension relationships are already captured in non-dimension
644 tables that have been manually included in the query.
646 Returns
647 -------
648 query : `Query`
649 A `Query` object that can be executed and used to interpret result
650 rows.
651 """
652 if joinMissing:
653 self._joinMissingDimensionElements()
654 self._addWhereClause()
655 if self._columns.isEmpty():
656 return EmptyQuery(
657 self._backend.universe,
658 backend=self._backend,
659 doomed_by=self._doomed_by,
660 )
661 return DirectQuery(
662 graph=self.summary.requested,
663 uniqueness=DirectQueryUniqueness.NOT_UNIQUE,
664 whereRegion=self.summary.where.region,
665 simpleQuery=self._simpleQuery,
666 columns=self._columns,
667 order_by_columns=self._order_by_columns(),
668 limit=self.summary.limit,
669 backend=self._backend,
670 doomed_by=self._doomed_by,
671 )
673 def _order_by_columns(self) -> Iterable[OrderByColumn]:
674 """Generate columns to be used for ORDER BY clause.
676 Returns
677 -------
678 order_by_columns : `Iterable` [ `ColumnIterable` ]
679 Sequence of columns to appear in ORDER BY clause.
680 """
681 order_by_columns: list[OrderByColumn] = []
682 if not self.summary.order_by:
683 return order_by_columns
685 for order_by_column in self.summary.order_by.order_by_columns:
687 column: sqlalchemy.sql.ColumnElement
688 if order_by_column.column is None:
689 # dimension name, it has to be in SELECT list already, only
690 # add it to ORDER BY
691 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension"
692 column = self._columns.getKeyColumn(order_by_column.element)
693 else:
694 table = self._elements[order_by_column.element]
696 if order_by_column.column in ("timespan.begin", "timespan.end"):
697 TimespanReprClass = self._backend.managers.column_types.timespan_cls
698 timespan_repr = TimespanReprClass.from_columns(table.columns)
699 if order_by_column.column == "timespan.begin":
700 column = timespan_repr.lower()
701 label = f"{order_by_column.element.name}_timespan_begin"
702 else:
703 column = timespan_repr.upper()
704 label = f"{order_by_column.element.name}_timespan_end"
705 else:
706 column = table.columns[order_by_column.column]
707 # make a unique label for it
708 label = f"{order_by_column.element.name}_{order_by_column.column}"
710 column = column.label(label)
712 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering))
714 return order_by_columns