Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 11%
183 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBuilder",)
25from collections.abc import Iterable, Set
26from typing import Any
28import sqlalchemy.sql
30from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension
31from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet
32from .._collectionType import CollectionType
33from .._exceptions import DataIdValueError
34from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage
35from ..wildcards import CollectionWildcard
36from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query
37from ._query_backend import QueryBackend
38from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary
39from .expressions import convertExpressionToSql
42class QueryBuilder:
43 """A builder for potentially complex queries that join tables based
44 on dimension relationships.
46 Parameters
47 ----------
48 summary : `QuerySummary`
49 Struct organizing the dimensions involved in the query.
50 backend : `QueryBackend`
51 Backend object that represents the `Registry` implementation.
52 doomed_by : `Iterable` [ `str` ], optional
53 A list of messages (appropriate for e.g. logging or exceptions) that
54 explain why the query is known to return no results even before it is
55 executed. Queries with a non-empty list will never be executed.
56 """
58 def __init__(
59 self,
60 summary: QuerySummary,
61 backend: QueryBackend,
62 doomed_by: Iterable[str] = (),
63 ):
64 self.summary = summary
65 self._backend = backend
66 self._simpleQuery = SimpleQuery()
67 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict()
68 self._columns = QueryColumns()
69 self._doomed_by = list(doomed_by)
71 self._validateGovernors()
73 def _validateGovernors(self) -> None:
74 """Check that governor dimensions specified by query actually exist.
76 This helps to avoid mistakes in governor values. It also implements
77 consistent failure behavior for cases when governor dimensions are
78 specified in either DataId ow WHERE clause.
80 Raises
81 ------
82 DataIdValueError
83 Raised when governor dimension values are not found.
84 """
85 for dimension, bounds in self.summary.where.governor_constraints.items():
86 storage = self._backend.managers.dimensions[self._backend.universe[dimension]]
87 if isinstance(storage, GovernorDimensionRecordStorage):
88 if not (storage.values >= bounds):
89 raise DataIdValueError(
90 f"Unknown values specified for governor dimension {dimension}: "
91 f"{set(bounds - storage.values)}."
92 )
94 def hasDimensionKey(self, dimension: Dimension) -> bool:
95 """Return `True` if the given dimension's primary key column has
96 been included in the query (possibly via a foreign key column on some
97 other table).
98 """
99 return dimension in self._columns.keys
101 def joinDimensionElement(self, element: DimensionElement) -> None:
102 """Add the table for a `DimensionElement` to the query.
104 This automatically joins the element table to all other tables in the
105 query with which it is related, via both dimension keys and spatial
106 and temporal relationships.
108 External calls to this method should rarely be necessary; `finish` will
109 automatically call it if the `DimensionElement` has been identified as
110 one that must be included.
112 Parameters
113 ----------
114 element : `DimensionElement`
115 Element for which a table should be added. The element must be
116 associated with a database table (see `DimensionElement.hasTable`).
117 """
118 assert element not in self._elements, "Element already included in query."
119 storage = self._backend.managers.dimensions[element]
120 fromClause = storage.join(
121 self,
122 regions=self._columns.regions if element in self.summary.spatial else None,
123 timespans=self._columns.timespans if element in self.summary.temporal else None,
124 )
125 self._elements[element] = fromClause
127 def joinDataset(
128 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False
129 ) -> bool:
130 """Add a dataset search or constraint to the query.
132 Unlike other `QueryBuilder` join methods, this *must* be called
133 directly to search for datasets of a particular type or constrain the
134 query results based on the exists of datasets. However, all dimensions
135 used to identify the dataset type must have already been included in
136 `QuerySummary.requested` when initializing the `QueryBuilder`.
138 Parameters
139 ----------
140 datasetType : `DatasetType`
141 The type of datasets to search for.
142 collections : `Any`
143 An expression that fully or partially identifies the collections
144 to search for datasets, such as a `str`, `re.Pattern`, or iterable
145 thereof. `...` can be used to return all collections. See
146 :ref:`daf_butler_collection_expressions` for more information.
147 isResult : `bool`, optional
148 If `True` (default), include the dataset ID column in the
149 result columns of the query, allowing complete `DatasetRef`
150 instances to be produced from the query results for this dataset
151 type. If `False`, the existence of datasets of this type is used
152 only to constrain the data IDs returned by the query.
153 `joinDataset` may be called with ``isResult=True`` at most one time
154 on a particular `QueryBuilder` instance.
155 findFirst : `bool`, optional
156 If `True` (`False` is default), only include the first match for
157 each data ID, searching the given collections in order. Requires
158 that all entries in ``collections`` be regular strings, so there is
159 a clear search order. Ignored if ``isResult`` is `False`.
161 Returns
162 -------
163 anyRecords : `bool`
164 If `True`, joining the dataset table was successful and the query
165 should proceed. If `False`, we were able to determine (from the
166 combination of ``datasetType`` and ``collections``) that there
167 would be no results joined in from this dataset, and hence (due to
168 the inner join that would normally be present), the full query will
169 return no results.
170 """
171 assert datasetType in self.summary.datasets
172 collections = CollectionWildcard.from_expression(collections)
173 if isResult and findFirst:
174 collections.require_ordered()
175 # If we are searching all collections with no constraints, loop over
176 # RUN collections only, because that will include all datasets.
177 collectionTypes: Set[CollectionType]
178 if collections == CollectionWildcard():
179 collectionTypes = {CollectionType.RUN}
180 else:
181 collectionTypes = CollectionType.all()
182 datasetRecordStorage = self._backend.managers.datasets.find(datasetType.name)
183 if datasetRecordStorage is None:
184 # Unrecognized dataset type means no results. It might be better
185 # to raise here, but this is consistent with previous behavior,
186 # which is expected by QuantumGraph generation code in pipe_base.
187 self._doomed_by.append(
188 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in "
189 "any collection."
190 )
191 return False
192 collectionRecords: list[CollectionRecord] = []
193 rejections: list[str] = []
194 for collectionRecord in self._backend.resolve_collection_wildcard(
195 collections, collection_types=collectionTypes
196 ):
197 # Only include collections that (according to collection summaries)
198 # might have datasets of this type and governor dimensions
199 # consistent with the query's WHERE clause.
200 collection_summary = self._backend.managers.datasets.getCollectionSummary(collectionRecord)
201 if not collection_summary.is_compatible_with(
202 datasetType,
203 self.summary.where.governor_constraints,
204 rejections=rejections,
205 name=collectionRecord.name,
206 ):
207 continue
208 if collectionRecord.type is CollectionType.CALIBRATION:
209 # If collection name was provided explicitly then say sorry if
210 # this is a kind of query we don't support yet; otherwise
211 # collection is a part of chained one or regex match and we
212 # skip it to not break queries of other included collections.
213 if datasetType.isCalibration():
214 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal:
215 if collectionRecord.name in collections.strings:
216 raise NotImplementedError(
217 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type "
218 f"collection '{collectionRecord.name}' is not yet supported."
219 )
220 else:
221 rejections.append(
222 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
223 f"{collectionRecord.name!r} because temporal calibration queries aren't "
224 "implemented; this is not an error only because the query structure implies "
225 "that searching this collection may be incidental."
226 )
227 continue
228 elif findFirst:
229 if collectionRecord.name in collections.strings:
230 raise NotImplementedError(
231 f"Find-first query for dataset type '{datasetType.name}' in "
232 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet "
233 "supported."
234 )
235 else:
236 rejections.append(
237 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
238 f"{collectionRecord.name!r} because find-first calibration queries aren't "
239 "implemented; this is not an error only because the query structure implies "
240 "that searching this collection may be incidental."
241 )
242 continue
243 else:
244 collectionRecords.append(collectionRecord)
245 else:
246 # We can never find a non-calibration dataset in a
247 # CALIBRATION collection.
248 rejections.append(
249 f"Not searching for non-calibration dataset {datasetType.name!r} "
250 f"in CALIBRATION collection {collectionRecord.name!r}."
251 )
252 continue
253 else:
254 collectionRecords.append(collectionRecord)
255 if isResult:
256 if findFirst:
257 subquery = self._build_dataset_search_subquery(
258 datasetRecordStorage,
259 collectionRecords,
260 )
261 else:
262 subquery = self._build_dataset_query_subquery(
263 datasetRecordStorage,
264 collectionRecords,
265 )
266 columns = DatasetQueryColumns(
267 datasetType=datasetType,
268 id=subquery.columns["id"],
269 runKey=subquery.columns[self._backend.managers.collections.getRunForeignKeyName()],
270 ingestDate=subquery.columns["ingest_date"],
271 )
272 else:
273 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords)
274 columns = None
275 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns)
276 if not collectionRecords:
277 if rejections:
278 self._doomed_by.extend(rejections)
279 else:
280 self._doomed_by.append(f"No collections to search matching expression {collections}.")
281 return False
282 return not self._doomed_by
284 def _build_dataset_constraint_subquery(
285 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
286 ) -> sqlalchemy.sql.FromClause:
287 """Internal helper method to build a dataset subquery for a parent
288 query that does not return dataset results.
290 Parameters
291 ----------
292 storage : `DatasetRecordStorage`
293 Storage object for the dataset type the subquery is for.
294 collections : `list` [ `CollectionRecord` ]
295 Records for the collections to be searched. Collections with no
296 datasets of this type or with governor dimensions incompatible with
297 the rest of the query should already have been filtered out.
298 `~CollectionType.CALIBRATION` collections should also be filtered
299 out if this is a temporal query.
301 Returns
302 -------
303 sql : `sqlalchemy.sql.FromClause`
304 A SQLAlchemy aliased subquery object. Has columns for each
305 dataset type dimension, or an unspecified column (just to prevent
306 SQL syntax errors) where there is no data ID.
307 """
308 return storage.select(
309 *collections,
310 dataId=SimpleQuery.Select,
311 # If this dataset type has no dimensions, we're in danger of
312 # generating an invalid subquery that has no columns in the
313 # SELECT clause. An easy fix is to just select some arbitrary
314 # column that goes unused, like the dataset ID.
315 id=None if storage.datasetType.dimensions else SimpleQuery.Select,
316 run=None,
317 ingestDate=None,
318 timespan=None,
319 ).alias(storage.datasetType.name)
321 def _build_dataset_query_subquery(
322 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
323 ) -> sqlalchemy.sql.FromClause:
324 """Internal helper method to build a dataset subquery for a parent
325 query that returns all matching dataset results.
327 Parameters
328 ----------
329 storage : `DatasetRecordStorage`
330 Storage object for the dataset type the subquery is for.
331 collections : `list` [ `CollectionRecord` ]
332 Records for the collections to be searched. Collections with no
333 datasets of this type or with governor dimensions incompatible with
334 the rest of the query should already have been filtered out.
335 `~CollectionType.CALIBRATION` collections should also be filtered
336 out if this is a temporal query.
338 Returns
339 -------
340 sql : `sqlalchemy.sql.FromClause`
341 A SQLAlchemy aliased subquery object. Has columns for each dataset
342 type dimension, the dataset ID, the `~CollectionType.RUN`
343 collection key, and the ingest date.
344 """
345 sql = storage.select(
346 *collections,
347 dataId=SimpleQuery.Select,
348 id=SimpleQuery.Select,
349 run=SimpleQuery.Select,
350 ingestDate=SimpleQuery.Select,
351 timespan=None,
352 ).alias(storage.datasetType.name)
353 return sql
355 def _build_dataset_search_subquery(
356 self, storage: DatasetRecordStorage, collections: list[CollectionRecord]
357 ) -> sqlalchemy.sql.FromClause:
358 """Internal helper method to build a dataset subquery for a parent
359 query that returns the first matching dataset for each data ID and
360 dataset type name from an ordered list of collections.
362 Parameters
363 ----------
364 storage : `DatasetRecordStorage`
365 Storage object for the dataset type the subquery is for.
366 collections : `list` [ `CollectionRecord` ]
367 Records for the collections to be searched. Collections with no
368 datasets of this type or with governor dimensions incompatible with
369 the rest of the query should already have been filtered out.
370 `~CollectionType.CALIBRATION` collections should be filtered out as
371 well.
373 Returns
374 -------
375 sql : `sqlalchemy.sql.FromClause`
376 A SQLAlchemy aliased subquery object. Has columns for each dataset
377 type dimension, the dataset ID, the `~CollectionType.RUN`
378 collection key, and the ingest date.
379 """
380 # Query-simplification shortcut: if there is only one collection, a
381 # find-first search is just a regular result subquery. Same is true
382 # if this is a doomed query with no collections to search.
383 if len(collections) <= 1:
384 return self._build_dataset_query_subquery(storage, collections)
385 # In the more general case, we build a subquery of the form below to
386 # search the collections in order.
387 #
388 # WITH {dst}_search AS (
389 # SELECT {data-id-cols}, id, run_id, 1 AS rank
390 # FROM <collection1>
391 # UNION ALL
392 # SELECT {data-id-cols}, id, run_id, 2 AS rank
393 # FROM <collection2>
394 # UNION ALL
395 # ...
396 # )
397 # SELECT
398 # {dst}_window.{data-id-cols},
399 # {dst}_window.id,
400 # {dst}_window.run_id
401 # FROM (
402 # SELECT
403 # {dst}_search.{data-id-cols},
404 # {dst}_search.id,
405 # {dst}_search.run_id,
406 # ROW_NUMBER() OVER (
407 # PARTITION BY {dst_search}.{data-id-cols}
408 # ORDER BY rank
409 # ) AS rownum
410 # ) {dst}_window
411 # WHERE
412 # {dst}_window.rownum = 1;
413 #
414 # We'll start with the Common Table Expression (CTE) at the top.
415 search = storage.select(
416 *collections,
417 dataId=SimpleQuery.Select,
418 id=SimpleQuery.Select,
419 run=SimpleQuery.Select,
420 ingestDate=SimpleQuery.Select,
421 timespan=None,
422 rank=SimpleQuery.Select,
423 ).cte(f"{storage.datasetType.name}_search")
424 # Now we fill out the SELECT from the CTE, and the subquery it contains
425 # (at the same time, since they have the same columns, aside from the
426 # OVER clause).
427 run_key_name = self._backend.managers.collections.getRunForeignKeyName()
428 window_data_id_cols = [
429 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names
430 ]
431 window_select_cols = [
432 search.columns["id"].label("id"),
433 search.columns[run_key_name].label(run_key_name),
434 search.columns["ingest_date"].label("ingest_date"),
435 ]
436 window_select_cols += window_data_id_cols
437 window_select_cols.append(
438 sqlalchemy.sql.func.row_number()
439 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"])
440 .label("rownum")
441 )
442 window = (
443 sqlalchemy.sql.select(*window_select_cols)
444 .select_from(search)
445 .alias(f"{storage.datasetType.name}_window")
446 )
447 sql = (
448 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols])
449 .select_from(window)
450 .where(window.columns["rownum"] == 1)
451 .alias(storage.datasetType.name)
452 )
453 return sql
455 def joinTable(
456 self,
457 table: sqlalchemy.sql.FromClause,
458 dimensions: NamedValueAbstractSet[Dimension],
459 *,
460 datasets: DatasetQueryColumns | None = None,
461 ) -> None:
462 """Join an arbitrary table to the query via dimension relationships.
464 External calls to this method should only be necessary for tables whose
465 records represent neither datasets nor dimension elements.
467 Parameters
468 ----------
469 table : `sqlalchemy.sql.FromClause`
470 SQLAlchemy object representing the logical table (which may be a
471 join or subquery expression) to be joined.
472 dimensions : iterable of `Dimension`
473 The dimensions that relate this table to others that may be in the
474 query. The table must have columns with the names of the
475 dimensions.
476 datasets : `DatasetQueryColumns`, optional
477 Columns that identify a dataset that is part of the query results.
478 """
479 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions)
480 unexpectedDimensions.discard(self._backend.universe.commonSkyPix)
481 if unexpectedDimensions:
482 raise NotImplementedError(
483 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that "
484 f"were not provided originally to the QuerySummary object passed at construction."
485 )
486 joinOn = self.startJoin(table, dimensions, dimensions.names)
487 self.finishJoin(table, joinOn)
488 if datasets is not None:
489 assert (
490 self._columns.datasets is None
491 ), "At most one result dataset type can be returned by a query."
492 self._columns.datasets = datasets
494 def startJoin(
495 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str]
496 ) -> list[sqlalchemy.sql.ColumnElement]:
497 """Begin a join on dimensions.
499 Must be followed by call to `finishJoin`.
501 Parameters
502 ----------
503 table : `sqlalchemy.sql.FromClause`
504 SQLAlchemy object representing the logical table (which may be a
505 join or subquery expression) to be joined.
506 dimensions : iterable of `Dimension`
507 The dimensions that relate this table to others that may be in the
508 query. The table must have columns with the names of the
509 dimensions.
510 columnNames : iterable of `str`
511 Names of the columns that correspond to dimension key values; must
512 be `zip` iterable with ``dimensions``.
514 Returns
515 -------
516 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
517 Sequence of boolean expressions that should be combined with AND
518 to form (part of) the ON expression for this JOIN.
519 """
520 joinOn = []
521 for dimension, columnName in zip(dimensions, columnNames):
522 columnInTable = table.columns[columnName]
523 columnsInQuery = self._columns.keys.setdefault(dimension, [])
524 for columnInQuery in columnsInQuery:
525 joinOn.append(columnInQuery == columnInTable)
526 columnsInQuery.append(columnInTable)
527 return joinOn
529 def finishJoin(
530 self, table: sqlalchemy.sql.FromClause, joinOn: list[sqlalchemy.sql.ColumnElement]
531 ) -> None:
532 """Complete a join on dimensions.
534 Must be preceded by call to `startJoin`.
536 Parameters
537 ----------
538 table : `sqlalchemy.sql.FromClause`
539 SQLAlchemy object representing the logical table (which may be a
540 join or subquery expression) to be joined. Must be the same object
541 passed to `startJoin`.
542 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
543 Sequence of boolean expressions that should be combined with AND
544 to form (part of) the ON expression for this JOIN. Should include
545 at least the elements of the list returned by `startJoin`.
546 """
547 onclause: sqlalchemy.sql.ColumnElement | None
548 if len(joinOn) == 0:
549 onclause = None
550 elif len(joinOn) == 1:
551 onclause = joinOn[0]
552 else:
553 onclause = sqlalchemy.sql.and_(*joinOn)
554 self._simpleQuery.join(table, onclause=onclause)
556 def _joinMissingDimensionElements(self) -> None:
557 """Join all dimension element tables that were identified as necessary
558 by `QuerySummary` and have not yet been joined.
560 For internal use by `QueryBuilder` only; will be called (and should
561 only by called) by `finish`.
562 """
563 # Join all DimensionElement tables that we need for spatial/temporal
564 # joins/filters or a nontrivial WHERE expression.
565 # We iterate over these in *reverse* topological order to minimize the
566 # number of tables joined. For example, the "visit" table provides
567 # the primary key value for the "instrument" table it depends on, so we
568 # don't need to join "instrument" as well unless we had a nontrivial
569 # expression on it (and hence included it already above).
570 for element in self._backend.universe.sorted(self.summary.mustHaveTableJoined, reverse=True):
571 self.joinDimensionElement(element)
572 # Join in any requested Dimension tables that don't already have their
573 # primary keys identified by the query.
574 for dimension in self._backend.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True):
575 if dimension not in self._columns.keys:
576 self.joinDimensionElement(dimension)
578 def _addWhereClause(self) -> None:
579 """Add a WHERE clause to the query under construction, connecting all
580 joined dimensions to the expression and data ID dimensions from
581 `QuerySummary`.
583 For internal use by `QueryBuilder` only; will be called (and should
584 only by called) by `finish`.
585 """
586 if self.summary.where.tree is not None:
587 self._simpleQuery.where.append(
588 convertExpressionToSql(
589 self.summary.where.tree,
590 self._backend.universe,
591 columns=self._columns,
592 elements=self._elements,
593 bind=self.summary.where.bind,
594 TimespanReprClass=self._backend.managers.column_types.timespan_cls,
595 )
596 )
597 for dimension, columnsInQuery in self._columns.keys.items():
598 if dimension in self.summary.where.dataId.graph:
599 givenKey = self.summary.where.dataId[dimension]
600 # Add a WHERE term for each column that corresponds to each
601 # key. This is redundant with the JOIN ON clauses that make
602 # them equal to each other, but more constraints have a chance
603 # of making things easier on the DB's query optimizer.
604 for columnInQuery in columnsInQuery:
605 self._simpleQuery.where.append(columnInQuery == givenKey)
606 else:
607 # Dimension is not fully identified, but it might be a skypix
608 # dimension that's constrained by a given region.
609 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension):
610 # We know the region now.
611 givenSkyPixIds: list[int] = []
612 for begin, end in dimension.pixelization.envelope(self.summary.where.region):
613 givenSkyPixIds.extend(range(begin, end))
614 for columnInQuery in columnsInQuery:
615 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds))
616 # If we are given an dataId with a timespan, and there are one or more
617 # timespans in the query that aren't given, add a WHERE expression for
618 # each of them.
619 if self.summary.where.dataId.graph.temporal and self.summary.temporal:
620 # Timespan is known now.
621 givenInterval = self.summary.where.dataId.timespan
622 assert givenInterval is not None
623 for element, intervalInQuery in self._columns.timespans.items():
624 assert element not in self.summary.where.dataId.graph.elements
625 self._simpleQuery.where.append(
626 intervalInQuery.overlaps(
627 self._backend.managers.column_types.timespan_cls.fromLiteral(givenInterval)
628 )
629 )
631 def finish(self, joinMissing: bool = True) -> Query:
632 """Finish query constructing, returning a new `Query` instance.
634 Parameters
635 ----------
636 joinMissing : `bool`, optional
637 If `True` (default), automatically join any missing dimension
638 element tables (according to the categorization of the
639 `QuerySummary` the builder was constructed with). `False` should
640 only be passed if the caller can independently guarantee that all
641 dimension relationships are already captured in non-dimension
642 tables that have been manually included in the query.
644 Returns
645 -------
646 query : `Query`
647 A `Query` object that can be executed and used to interpret result
648 rows.
649 """
650 if joinMissing:
651 self._joinMissingDimensionElements()
652 self._addWhereClause()
653 if self._columns.isEmpty():
654 return EmptyQuery(
655 self._backend.universe,
656 backend=self._backend,
657 doomed_by=self._doomed_by,
658 )
659 return DirectQuery(
660 graph=self.summary.requested,
661 uniqueness=DirectQueryUniqueness.NOT_UNIQUE,
662 whereRegion=self.summary.where.region,
663 simpleQuery=self._simpleQuery,
664 columns=self._columns,
665 order_by_columns=self._order_by_columns(),
666 limit=self.summary.limit,
667 backend=self._backend,
668 doomed_by=self._doomed_by,
669 )
671 def _order_by_columns(self) -> Iterable[OrderByColumn]:
672 """Generate columns to be used for ORDER BY clause.
674 Returns
675 -------
676 order_by_columns : `Iterable` [ `ColumnIterable` ]
677 Sequence of columns to appear in ORDER BY clause.
678 """
679 order_by_columns: list[OrderByColumn] = []
680 if not self.summary.order_by:
681 return order_by_columns
683 for order_by_column in self.summary.order_by.order_by_columns:
685 column: sqlalchemy.sql.ColumnElement
686 if order_by_column.column is None:
687 # dimension name, it has to be in SELECT list already, only
688 # add it to ORDER BY
689 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension"
690 column = self._columns.getKeyColumn(order_by_column.element)
691 else:
692 table = self._elements[order_by_column.element]
694 if order_by_column.column in ("timespan.begin", "timespan.end"):
695 TimespanReprClass = self._backend.managers.column_types.timespan_cls
696 timespan_repr = TimespanReprClass.from_columns(table.columns)
697 if order_by_column.column == "timespan.begin":
698 column = timespan_repr.lower()
699 label = f"{order_by_column.element.name}_timespan_begin"
700 else:
701 column = timespan_repr.upper()
702 label = f"{order_by_column.element.name}_timespan_end"
703 else:
704 column = table.columns[order_by_column.column]
705 # make a unique label for it
706 label = f"{order_by_column.element.name}_{order_by_column.column}"
708 column = column.label(label)
710 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering))
712 return order_by_columns