Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 11%
182 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBuilder",)
25from typing import AbstractSet, Any, Iterable, List, Optional
27import sqlalchemy.sql
29from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension
30from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet
31from .._collectionType import CollectionType
32from .._exceptions import DataIdValueError
33from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage
34from ..wildcards import CollectionQuery, CollectionSearch
35from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query
36from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
37from .expressions import convertExpressionToSql
40class QueryBuilder:
41 """A builder for potentially complex queries that join tables based
42 on dimension relationships.
44 Parameters
45 ----------
46 summary : `QuerySummary`
47 Struct organizing the dimensions involved in the query.
48 managers : `RegistryManagers`
49 A struct containing the registry manager instances used by the query
50 system.
51 doomed_by : `Iterable` [ `str` ], optional
52 A list of messages (appropriate for e.g. logging or exceptions) that
53 explain why the query is known to return no results even before it is
54 executed. Queries with a non-empty list will never be executed.
55 """
57 def __init__(self, summary: QuerySummary, managers: RegistryManagers, doomed_by: Iterable[str] = ()):
58 self.summary = summary
59 self._simpleQuery = SimpleQuery()
60 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict()
61 self._columns = QueryColumns()
62 self._managers = managers
63 self._doomed_by = list(doomed_by)
65 self._validateGovernors()
67 def _validateGovernors(self) -> None:
68 """Check that governor dimensions specified by query actually exist.
70 This helps to avoid mistakes in governor values. It also implements
71 consistent failure behavior for cases when governor dimensions are
72 specified in either DataId ow WHERE clause.
74 Raises
75 ------
76 DataIdValueError
77 Raised when governor dimension values are not found.
78 """
79 for dimension, bounds in self.summary.where.governor_constraints.items():
80 storage = self._managers.dimensions[self.summary.requested.universe[dimension]]
81 if isinstance(storage, GovernorDimensionRecordStorage):
82 if not (storage.values >= bounds):
83 raise DataIdValueError(
84 f"Unknown values specified for governor dimension {dimension}: "
85 f"{set(bounds - storage.values)}."
86 )
88 def hasDimensionKey(self, dimension: Dimension) -> bool:
89 """Return `True` if the given dimension's primary key column has
90 been included in the query (possibly via a foreign key column on some
91 other table).
92 """
93 return dimension in self._columns.keys
95 def joinDimensionElement(self, element: DimensionElement) -> None:
96 """Add the table for a `DimensionElement` to the query.
98 This automatically joins the element table to all other tables in the
99 query with which it is related, via both dimension keys and spatial
100 and temporal relationships.
102 External calls to this method should rarely be necessary; `finish` will
103 automatically call it if the `DimensionElement` has been identified as
104 one that must be included.
106 Parameters
107 ----------
108 element : `DimensionElement`
109 Element for which a table should be added. The element must be
110 associated with a database table (see `DimensionElement.hasTable`).
111 """
112 assert element not in self._elements, "Element already included in query."
113 storage = self._managers.dimensions[element]
114 fromClause = storage.join(
115 self,
116 regions=self._columns.regions if element in self.summary.spatial else None,
117 timespans=self._columns.timespans if element in self.summary.temporal else None,
118 )
119 self._elements[element] = fromClause
121 def joinDataset(
122 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False
123 ) -> bool:
124 """Add a dataset search or constraint to the query.
126 Unlike other `QueryBuilder` join methods, this *must* be called
127 directly to search for datasets of a particular type or constrain the
128 query results based on the exists of datasets. However, all dimensions
129 used to identify the dataset type must have already been included in
130 `QuerySummary.requested` when initializing the `QueryBuilder`.
132 Parameters
133 ----------
134 datasetType : `DatasetType`
135 The type of datasets to search for.
136 collections : `Any`
137 An expression that fully or partially identifies the collections
138 to search for datasets, such as a `str`, `re.Pattern`, or iterable
139 thereof. `...` can be used to return all collections. See
140 :ref:`daf_butler_collection_expressions` for more information.
141 isResult : `bool`, optional
142 If `True` (default), include the dataset ID column in the
143 result columns of the query, allowing complete `DatasetRef`
144 instances to be produced from the query results for this dataset
145 type. If `False`, the existence of datasets of this type is used
146 only to constrain the data IDs returned by the query.
147 `joinDataset` may be called with ``isResult=True`` at most one time
148 on a particular `QueryBuilder` instance.
149 findFirst : `bool`, optional
150 If `True` (`False` is default), only include the first match for
151 each data ID, searching the given collections in order. Requires
152 that all entries in ``collections`` be regular strings, so there is
153 a clear search order. Ignored if ``isResult`` is `False`.
155 Returns
156 -------
157 anyRecords : `bool`
158 If `True`, joining the dataset table was successful and the query
159 should proceed. If `False`, we were able to determine (from the
160 combination of ``datasetType`` and ``collections``) that there
161 would be no results joined in from this dataset, and hence (due to
162 the inner join that would normally be present), the full query will
163 return no results.
164 """
165 assert datasetType in self.summary.datasets
166 if isResult and findFirst:
167 collections = CollectionSearch.fromExpression(collections)
168 else:
169 collections = CollectionQuery.fromExpression(collections)
170 explicitCollections = frozenset(collections.explicitNames())
171 # If we are searching all collections with no constraints, loop over
172 # RUN collections only, because that will include all datasets.
173 collectionTypes: AbstractSet[CollectionType]
174 if collections == CollectionQuery():
175 collectionTypes = {CollectionType.RUN}
176 else:
177 collectionTypes = CollectionType.all()
178 datasetRecordStorage = self._managers.datasets.find(datasetType.name)
179 if datasetRecordStorage is None:
180 # Unrecognized dataset type means no results. It might be better
181 # to raise here, but this is consistent with previous behavior,
182 # which is expected by QuantumGraph generation code in pipe_base.
183 self._doomed_by.append(
184 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in "
185 "any collection."
186 )
187 return False
188 collectionRecords: List[CollectionRecord] = []
189 rejections: List[str] = []
190 for collectionRecord in collections.iter(self._managers.collections, collectionTypes=collectionTypes):
191 # Only include collections that (according to collection summaries)
192 # might have datasets of this type and governor dimensions
193 # consistent with the query's WHERE clause.
194 collection_summary = self._managers.datasets.getCollectionSummary(collectionRecord)
195 if not collection_summary.is_compatible_with(
196 datasetType,
197 self.summary.where.governor_constraints,
198 rejections=rejections,
199 name=collectionRecord.name,
200 ):
201 continue
202 if collectionRecord.type is CollectionType.CALIBRATION:
203 # If collection name was provided explicitly then say sorry if
204 # this is a kind of query we don't support yet; otherwise
205 # collection is a part of chained one or regex match and we
206 # skip it to not break queries of other included collections.
207 if datasetType.isCalibration():
208 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal:
209 if collectionRecord.name in explicitCollections:
210 raise NotImplementedError(
211 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type "
212 f"collection '{collectionRecord.name}' is not yet supported."
213 )
214 else:
215 rejections.append(
216 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
217 f"{collectionRecord.name!r} because temporal calibration queries aren't "
218 "implemented; this is not an error only because the query structure implies "
219 "that searching this collection may be incidental."
220 )
221 continue
222 elif findFirst:
223 if collectionRecord.name in explicitCollections:
224 raise NotImplementedError(
225 f"Find-first query for dataset type '{datasetType.name}' in "
226 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet "
227 "supported."
228 )
229 else:
230 rejections.append(
231 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
232 f"{collectionRecord.name!r} because find-first calibration queries aren't "
233 "implemented; this is not an error only because the query structure implies "
234 "that searching this collection may be incidental."
235 )
236 continue
237 else:
238 collectionRecords.append(collectionRecord)
239 else:
240 # We can never find a non-calibration dataset in a
241 # CALIBRATION collection.
242 rejections.append(
243 f"Not searching for non-calibration dataset {datasetType.name!r} "
244 f"in CALIBRATION collection {collectionRecord.name!r}."
245 )
246 continue
247 else:
248 collectionRecords.append(collectionRecord)
249 if isResult:
250 if findFirst:
251 subquery = self._build_dataset_search_subquery(
252 datasetRecordStorage,
253 collectionRecords,
254 )
255 else:
256 subquery = self._build_dataset_query_subquery(
257 datasetRecordStorage,
258 collectionRecords,
259 )
260 columns = DatasetQueryColumns(
261 datasetType=datasetType,
262 id=subquery.columns["id"],
263 runKey=subquery.columns[self._managers.collections.getRunForeignKeyName()],
264 ingestDate=subquery.columns["ingest_date"],
265 )
266 else:
267 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords)
268 columns = None
269 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns)
270 if not collectionRecords:
271 if rejections:
272 self._doomed_by.extend(rejections)
273 else:
274 self._doomed_by.append(f"No collections to search matching expression {collections}.")
275 return False
276 return not self._doomed_by
278 def _build_dataset_constraint_subquery(
279 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
280 ) -> sqlalchemy.sql.FromClause:
281 """Internal helper method to build a dataset subquery for a parent
282 query that does not return dataset results.
284 Parameters
285 ----------
286 storage : `DatasetRecordStorage`
287 Storage object for the dataset type the subquery is for.
288 collections : `list` [ `CollectionRecord` ]
289 Records for the collections to be searched. Collections with no
290 datasets of this type or with governor dimensions incompatible with
291 the rest of the query should already have been filtered out.
292 `~CollectionType.CALIBRATION` collections should also be filtered
293 out if this is a temporal query.
295 Returns
296 -------
297 sql : `sqlalchemy.sql.FromClause`
298 A SQLAlchemy aliased subquery object. Has columns for each
299 dataset type dimension, or an unspecified column (just to prevent
300 SQL syntax errors) where there is no data ID.
301 """
302 return storage.select(
303 *collections,
304 dataId=SimpleQuery.Select,
305 # If this dataset type has no dimensions, we're in danger of
306 # generating an invalid subquery that has no columns in the
307 # SELECT clause. An easy fix is to just select some arbitrary
308 # column that goes unused, like the dataset ID.
309 id=None if storage.datasetType.dimensions else SimpleQuery.Select,
310 run=None,
311 ingestDate=None,
312 timespan=None,
313 ).alias(storage.datasetType.name)
315 def _build_dataset_query_subquery(
316 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
317 ) -> sqlalchemy.sql.FromClause:
318 """Internal helper method to build a dataset subquery for a parent
319 query that returns all matching dataset results.
321 Parameters
322 ----------
323 storage : `DatasetRecordStorage`
324 Storage object for the dataset type the subquery is for.
325 collections : `list` [ `CollectionRecord` ]
326 Records for the collections to be searched. Collections with no
327 datasets of this type or with governor dimensions incompatible with
328 the rest of the query should already have been filtered out.
329 `~CollectionType.CALIBRATION` collections should also be filtered
330 out if this is a temporal query.
332 Returns
333 -------
334 sql : `sqlalchemy.sql.FromClause`
335 A SQLAlchemy aliased subquery object. Has columns for each dataset
336 type dimension, the dataset ID, the `~CollectionType.RUN`
337 collection key, and the ingest date.
338 """
339 sql = storage.select(
340 *collections,
341 dataId=SimpleQuery.Select,
342 id=SimpleQuery.Select,
343 run=SimpleQuery.Select,
344 ingestDate=SimpleQuery.Select,
345 timespan=None,
346 ).alias(storage.datasetType.name)
347 return sql
349 def _build_dataset_search_subquery(
350 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
351 ) -> sqlalchemy.sql.FromClause:
352 """Internal helper method to build a dataset subquery for a parent
353 query that returns the first matching dataset for each data ID and
354 dataset type name from an ordered list of collections.
356 Parameters
357 ----------
358 storage : `DatasetRecordStorage`
359 Storage object for the dataset type the subquery is for.
360 collections : `list` [ `CollectionRecord` ]
361 Records for the collections to be searched. Collections with no
362 datasets of this type or with governor dimensions incompatible with
363 the rest of the query should already have been filtered out.
364 `~CollectionType.CALIBRATION` collections should be filtered out as
365 well.
367 Returns
368 -------
369 sql : `sqlalchemy.sql.FromClause`
370 A SQLAlchemy aliased subquery object. Has columns for each dataset
371 type dimension, the dataset ID, the `~CollectionType.RUN`
372 collection key, and the ingest date.
373 """
374 # Query-simplification shortcut: if there is only one collection, a
375 # find-first search is just a regular result subquery. Same is true
376 # if this is a doomed query with no collections to search.
377 if len(collections) <= 1:
378 return self._build_dataset_query_subquery(storage, collections)
379 # In the more general case, we build a subquery of the form below to
380 # search the collections in order.
381 #
382 # WITH {dst}_search AS (
383 # SELECT {data-id-cols}, id, run_id, 1 AS rank
384 # FROM <collection1>
385 # UNION ALL
386 # SELECT {data-id-cols}, id, run_id, 2 AS rank
387 # FROM <collection2>
388 # UNION ALL
389 # ...
390 # )
391 # SELECT
392 # {dst}_window.{data-id-cols},
393 # {dst}_window.id,
394 # {dst}_window.run_id
395 # FROM (
396 # SELECT
397 # {dst}_search.{data-id-cols},
398 # {dst}_search.id,
399 # {dst}_search.run_id,
400 # ROW_NUMBER() OVER (
401 # PARTITION BY {dst_search}.{data-id-cols}
402 # ORDER BY rank
403 # ) AS rownum
404 # ) {dst}_window
405 # WHERE
406 # {dst}_window.rownum = 1;
407 #
408 # We'll start with the Common Table Expression (CTE) at the top.
409 search = storage.select(
410 *collections,
411 dataId=SimpleQuery.Select,
412 id=SimpleQuery.Select,
413 run=SimpleQuery.Select,
414 ingestDate=SimpleQuery.Select,
415 timespan=None,
416 rank=SimpleQuery.Select,
417 ).cte(f"{storage.datasetType.name}_search")
418 # Now we fill out the SELECT from the CTE, and the subquery it contains
419 # (at the same time, since they have the same columns, aside from the
420 # OVER clause).
421 run_key_name = self._managers.collections.getRunForeignKeyName()
422 window_data_id_cols = [
423 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names
424 ]
425 window_select_cols = [
426 search.columns["id"].label("id"),
427 search.columns[run_key_name].label(run_key_name),
428 search.columns["ingest_date"].label("ingest_date"),
429 ]
430 window_select_cols += window_data_id_cols
431 window_select_cols.append(
432 sqlalchemy.sql.func.row_number()
433 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"])
434 .label("rownum")
435 )
436 window = (
437 sqlalchemy.sql.select(*window_select_cols)
438 .select_from(search)
439 .alias(f"{storage.datasetType.name}_window")
440 )
441 sql = (
442 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols])
443 .select_from(window)
444 .where(window.columns["rownum"] == 1)
445 .alias(storage.datasetType.name)
446 )
447 return sql
449 def joinTable(
450 self,
451 table: sqlalchemy.sql.FromClause,
452 dimensions: NamedValueAbstractSet[Dimension],
453 *,
454 datasets: Optional[DatasetQueryColumns] = None,
455 ) -> None:
456 """Join an arbitrary table to the query via dimension relationships.
458 External calls to this method should only be necessary for tables whose
459 records represent neither datasets nor dimension elements.
461 Parameters
462 ----------
463 table : `sqlalchemy.sql.FromClause`
464 SQLAlchemy object representing the logical table (which may be a
465 join or subquery expression) to be joined.
466 dimensions : iterable of `Dimension`
467 The dimensions that relate this table to others that may be in the
468 query. The table must have columns with the names of the
469 dimensions.
470 datasets : `DatasetQueryColumns`, optional
471 Columns that identify a dataset that is part of the query results.
472 """
473 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions)
474 unexpectedDimensions.discard(self.summary.universe.commonSkyPix)
475 if unexpectedDimensions:
476 raise NotImplementedError(
477 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that "
478 f"were not provided originally to the QuerySummary object passed at construction."
479 )
480 joinOn = self.startJoin(table, dimensions, dimensions.names)
481 self.finishJoin(table, joinOn)
482 if datasets is not None:
483 assert (
484 self._columns.datasets is None
485 ), "At most one result dataset type can be returned by a query."
486 self._columns.datasets = datasets
488 def startJoin(
489 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str]
490 ) -> List[sqlalchemy.sql.ColumnElement]:
491 """Begin a join on dimensions.
493 Must be followed by call to `finishJoin`.
495 Parameters
496 ----------
497 table : `sqlalchemy.sql.FromClause`
498 SQLAlchemy object representing the logical table (which may be a
499 join or subquery expression) to be joined.
500 dimensions : iterable of `Dimension`
501 The dimensions that relate this table to others that may be in the
502 query. The table must have columns with the names of the
503 dimensions.
504 columnNames : iterable of `str`
505 Names of the columns that correspond to dimension key values; must
506 be `zip` iterable with ``dimensions``.
508 Returns
509 -------
510 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
511 Sequence of boolean expressions that should be combined with AND
512 to form (part of) the ON expression for this JOIN.
513 """
514 joinOn = []
515 for dimension, columnName in zip(dimensions, columnNames):
516 columnInTable = table.columns[columnName]
517 columnsInQuery = self._columns.keys.setdefault(dimension, [])
518 for columnInQuery in columnsInQuery:
519 joinOn.append(columnInQuery == columnInTable)
520 columnsInQuery.append(columnInTable)
521 return joinOn
523 def finishJoin(
524 self, table: sqlalchemy.sql.FromClause, joinOn: List[sqlalchemy.sql.ColumnElement]
525 ) -> None:
526 """Complete a join on dimensions.
528 Must be preceded by call to `startJoin`.
530 Parameters
531 ----------
532 table : `sqlalchemy.sql.FromClause`
533 SQLAlchemy object representing the logical table (which may be a
534 join or subquery expression) to be joined. Must be the same object
535 passed to `startJoin`.
536 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
537 Sequence of boolean expressions that should be combined with AND
538 to form (part of) the ON expression for this JOIN. Should include
539 at least the elements of the list returned by `startJoin`.
540 """
541 onclause: Optional[sqlalchemy.sql.ColumnElement]
542 if len(joinOn) == 0:
543 onclause = None
544 elif len(joinOn) == 1:
545 onclause = joinOn[0]
546 else:
547 onclause = sqlalchemy.sql.and_(*joinOn)
548 self._simpleQuery.join(table, onclause=onclause)
550 def _joinMissingDimensionElements(self) -> None:
551 """Join all dimension element tables that were identified as necessary
552 by `QuerySummary` and have not yet been joined.
554 For internal use by `QueryBuilder` only; will be called (and should
555 only by called) by `finish`.
556 """
557 # Join all DimensionElement tables that we need for spatial/temporal
558 # joins/filters or a nontrivial WHERE expression.
559 # We iterate over these in *reverse* topological order to minimize the
560 # number of tables joined. For example, the "visit" table provides
561 # the primary key value for the "instrument" table it depends on, so we
562 # don't need to join "instrument" as well unless we had a nontrivial
563 # expression on it (and hence included it already above).
564 for element in self.summary.universe.sorted(self.summary.mustHaveTableJoined, reverse=True):
565 self.joinDimensionElement(element)
566 # Join in any requested Dimension tables that don't already have their
567 # primary keys identified by the query.
568 for dimension in self.summary.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True):
569 if dimension not in self._columns.keys:
570 self.joinDimensionElement(dimension)
572 def _addWhereClause(self) -> None:
573 """Add a WHERE clause to the query under construction, connecting all
574 joined dimensions to the expression and data ID dimensions from
575 `QuerySummary`.
577 For internal use by `QueryBuilder` only; will be called (and should
578 only by called) by `finish`.
579 """
580 if self.summary.where.tree is not None:
581 self._simpleQuery.where.append(
582 convertExpressionToSql(
583 self.summary.where.tree,
584 self.summary.universe,
585 columns=self._columns,
586 elements=self._elements,
587 bind=self.summary.where.bind,
588 TimespanReprClass=self._managers.TimespanReprClass,
589 )
590 )
591 for dimension, columnsInQuery in self._columns.keys.items():
592 if dimension in self.summary.where.dataId.graph:
593 givenKey = self.summary.where.dataId[dimension]
594 # Add a WHERE term for each column that corresponds to each
595 # key. This is redundant with the JOIN ON clauses that make
596 # them equal to each other, but more constraints have a chance
597 # of making things easier on the DB's query optimizer.
598 for columnInQuery in columnsInQuery:
599 self._simpleQuery.where.append(columnInQuery == givenKey)
600 else:
601 # Dimension is not fully identified, but it might be a skypix
602 # dimension that's constrained by a given region.
603 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension):
604 # We know the region now.
605 givenSkyPixIds: List[int] = []
606 for begin, end in dimension.pixelization.envelope(self.summary.where.region):
607 givenSkyPixIds.extend(range(begin, end))
608 for columnInQuery in columnsInQuery:
609 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds))
610 # If we are given an dataId with a timespan, and there are one or more
611 # timespans in the query that aren't given, add a WHERE expression for
612 # each of them.
613 if self.summary.where.dataId.graph.temporal and self.summary.temporal:
614 # Timespan is known now.
615 givenInterval = self.summary.where.dataId.timespan
616 assert givenInterval is not None
617 for element, intervalInQuery in self._columns.timespans.items():
618 assert element not in self.summary.where.dataId.graph.elements
619 self._simpleQuery.where.append(
620 intervalInQuery.overlaps(self._managers.TimespanReprClass.fromLiteral(givenInterval))
621 )
623 def finish(self, joinMissing: bool = True) -> Query:
624 """Finish query constructing, returning a new `Query` instance.
626 Parameters
627 ----------
628 joinMissing : `bool`, optional
629 If `True` (default), automatically join any missing dimension
630 element tables (according to the categorization of the
631 `QuerySummary` the builder was constructed with). `False` should
632 only be passed if the caller can independently guarantee that all
633 dimension relationships are already captured in non-dimension
634 tables that have been manually included in the query.
636 Returns
637 -------
638 query : `Query`
639 A `Query` object that can be executed and used to interpret result
640 rows.
641 """
642 if joinMissing:
643 self._joinMissingDimensionElements()
644 self._addWhereClause()
645 if self._columns.isEmpty():
646 return EmptyQuery(
647 self.summary.requested.universe, managers=self._managers, doomed_by=self._doomed_by
648 )
649 return DirectQuery(
650 graph=self.summary.requested,
651 uniqueness=DirectQueryUniqueness.NOT_UNIQUE,
652 whereRegion=self.summary.where.region,
653 simpleQuery=self._simpleQuery,
654 columns=self._columns,
655 order_by_columns=self._order_by_columns(),
656 limit=self.summary.limit,
657 managers=self._managers,
658 doomed_by=self._doomed_by,
659 )
661 def _order_by_columns(self) -> Iterable[OrderByColumn]:
662 """Generate columns to be used for ORDER BY clause.
664 Returns
665 -------
666 order_by_columns : `Iterable` [ `ColumnIterable` ]
667 Sequence of columns to appear in ORDER BY clause.
668 """
669 order_by_columns: List[OrderByColumn] = []
670 if not self.summary.order_by:
671 return order_by_columns
673 for order_by_column in self.summary.order_by.order_by_columns:
675 column: sqlalchemy.sql.ColumnElement
676 if order_by_column.column is None:
677 # dimension name, it has to be in SELECT list already, only
678 # add it to ORDER BY
679 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension"
680 column = self._columns.getKeyColumn(order_by_column.element)
681 else:
682 table = self._elements[order_by_column.element]
684 if order_by_column.column in ("timespan.begin", "timespan.end"):
685 TimespanReprClass = self._managers.TimespanReprClass
686 timespan_repr = TimespanReprClass.from_columns(table.columns)
687 if order_by_column.column == "timespan.begin":
688 column = timespan_repr.lower()
689 label = f"{order_by_column.element.name}_timespan_begin"
690 else:
691 column = timespan_repr.upper()
692 label = f"{order_by_column.element.name}_timespan_end"
693 else:
694 column = table.columns[order_by_column.column]
695 # make a unique label for it
696 label = f"{order_by_column.element.name}_{order_by_column.column}"
698 column = column.label(label)
700 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering))
702 return order_by_columns