Coverage for python/lsst/daf/butler/registry/queries/_builder.py: 11%
187 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 09:43 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 09:43 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBuilder",)
25from typing import AbstractSet, Any, Iterable, List, Optional
27import sqlalchemy.sql
29from ...core import DatasetType, Dimension, DimensionElement, SimpleQuery, SkyPixDimension
30from ...core.named import NamedKeyDict, NamedValueAbstractSet, NamedValueSet
31from .._collectionType import CollectionType
32from .._exceptions import DataIdValueError
33from ..interfaces import CollectionRecord, DatasetRecordStorage, GovernorDimensionRecordStorage
34from ..wildcards import CollectionQuery, CollectionSearch
35from ._query import DirectQuery, DirectQueryUniqueness, EmptyQuery, OrderByColumn, Query
36from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
37from .expressions import convertExpressionToSql
40class QueryBuilder:
41 """A builder for potentially complex queries that join tables based
42 on dimension relationships.
44 Parameters
45 ----------
46 summary : `QuerySummary`
47 Struct organizing the dimensions involved in the query.
48 managers : `RegistryManagers`
49 A struct containing the registry manager instances used by the query
50 system.
51 doomed_by : `Iterable` [ `str` ], optional
52 A list of messages (appropriate for e.g. logging or exceptions) that
53 explain why the query is known to return no results even before it is
54 executed. Queries with a non-empty list will never be executed.
55 """
57 def __init__(self, summary: QuerySummary, managers: RegistryManagers, doomed_by: Iterable[str] = ()):
58 self.summary = summary
59 self._simpleQuery = SimpleQuery()
60 self._elements: NamedKeyDict[DimensionElement, sqlalchemy.sql.FromClause] = NamedKeyDict()
61 self._columns = QueryColumns()
62 self._managers = managers
63 self._doomed_by = list(doomed_by)
65 self._validateGovernors()
67 def _validateGovernors(self) -> None:
68 """Check that governor dimensions specified by query actually exist.
70 This helps to avoid mistakes in governor values. It also implements
71 consistent failure behavior for cases when governor dimensions are
72 specified in either DataId ow WHERE clause.
74 Raises
75 ------
76 LookupError
77 Raised when governor dimension values are not found.
78 """
79 for governor, values in self.summary.where.restriction.items():
80 storage = self._managers.dimensions[governor]
81 assert isinstance(
82 storage, GovernorDimensionRecordStorage
83 ), f"Unexpected type of the governor dimension record storage {type(storage)}"
84 if not values <= storage.values:
85 unknown = values - storage.values
86 raise DataIdValueError(
87 f"Unknown values specified for governor dimension {governor}: {unknown}"
88 )
90 def hasDimensionKey(self, dimension: Dimension) -> bool:
91 """Return `True` if the given dimension's primary key column has
92 been included in the query (possibly via a foreign key column on some
93 other table).
94 """
95 return dimension in self._columns.keys
97 def joinDimensionElement(self, element: DimensionElement) -> None:
98 """Add the table for a `DimensionElement` to the query.
100 This automatically joins the element table to all other tables in the
101 query with which it is related, via both dimension keys and spatial
102 and temporal relationships.
104 External calls to this method should rarely be necessary; `finish` will
105 automatically call it if the `DimensionElement` has been identified as
106 one that must be included.
108 Parameters
109 ----------
110 element : `DimensionElement`
111 Element for which a table should be added. The element must be
112 associated with a database table (see `DimensionElement.hasTable`).
113 """
114 assert element not in self._elements, "Element already included in query."
115 storage = self._managers.dimensions[element]
116 fromClause = storage.join(
117 self,
118 regions=self._columns.regions if element in self.summary.spatial else None,
119 timespans=self._columns.timespans if element in self.summary.temporal else None,
120 )
121 self._elements[element] = fromClause
123 def joinDataset(
124 self, datasetType: DatasetType, collections: Any, *, isResult: bool = True, findFirst: bool = False
125 ) -> bool:
126 """Add a dataset search or constraint to the query.
128 Unlike other `QueryBuilder` join methods, this *must* be called
129 directly to search for datasets of a particular type or constrain the
130 query results based on the exists of datasets. However, all dimensions
131 used to identify the dataset type must have already been included in
132 `QuerySummary.requested` when initializing the `QueryBuilder`.
134 Parameters
135 ----------
136 datasetType : `DatasetType`
137 The type of datasets to search for.
138 collections : `Any`
139 An expression that fully or partially identifies the collections
140 to search for datasets, such as a `str`, `re.Pattern`, or iterable
141 thereof. `...` can be used to return all collections. See
142 :ref:`daf_butler_collection_expressions` for more information.
143 isResult : `bool`, optional
144 If `True` (default), include the dataset ID column in the
145 result columns of the query, allowing complete `DatasetRef`
146 instances to be produced from the query results for this dataset
147 type. If `False`, the existence of datasets of this type is used
148 only to constrain the data IDs returned by the query.
149 `joinDataset` may be called with ``isResult=True`` at most one time
150 on a particular `QueryBuilder` instance.
151 findFirst : `bool`, optional
152 If `True` (`False` is default), only include the first match for
153 each data ID, searching the given collections in order. Requires
154 that all entries in ``collections`` be regular strings, so there is
155 a clear search order. Ignored if ``isResult`` is `False`.
157 Returns
158 -------
159 anyRecords : `bool`
160 If `True`, joining the dataset table was successful and the query
161 should proceed. If `False`, we were able to determine (from the
162 combination of ``datasetType`` and ``collections``) that there
163 would be no results joined in from this dataset, and hence (due to
164 the inner join that would normally be present), the full query will
165 return no results.
166 """
167 assert datasetType in self.summary.datasets
168 if isResult and findFirst:
169 collections = CollectionSearch.fromExpression(collections)
170 else:
171 collections = CollectionQuery.fromExpression(collections)
172 explicitCollections = frozenset(collections.explicitNames())
173 # If we are searching all collections with no constraints, loop over
174 # RUN collections only, because that will include all datasets.
175 collectionTypes: AbstractSet[CollectionType]
176 if collections == CollectionQuery():
177 collectionTypes = {CollectionType.RUN}
178 else:
179 collectionTypes = CollectionType.all()
180 datasetRecordStorage = self._managers.datasets.find(datasetType.name)
181 if datasetRecordStorage is None:
182 # Unrecognized dataset type means no results. It might be better
183 # to raise here, but this is consistent with previous behavior,
184 # which is expected by QuantumGraph generation code in pipe_base.
185 self._doomed_by.append(
186 f"Dataset type {datasetType.name!r} is not registered, so no instances of it can exist in "
187 "any collection."
188 )
189 return False
190 collectionRecords: List[CollectionRecord] = []
191 rejections: List[str] = []
192 for collectionRecord in collections.iter(self._managers.collections, collectionTypes=collectionTypes):
193 # Only include collections that (according to collection summaries)
194 # might have datasets of this type and governor dimensions
195 # consistent with the query's WHERE clause.
196 collection_summary = self._managers.datasets.getCollectionSummary(collectionRecord)
197 if not collection_summary.is_compatible_with(
198 datasetType,
199 self.summary.where.restriction,
200 rejections=rejections,
201 name=collectionRecord.name,
202 ):
203 continue
204 if collectionRecord.type is CollectionType.CALIBRATION:
205 # If collection name was provided explicitly then say sorry if
206 # this is a kind of query we don't support yet; otherwise
207 # collection is a part of chained one or regex match and we
208 # skip it to not break queries of other included collections.
209 if datasetType.isCalibration():
210 if self.summary.temporal or self.summary.mustHaveKeysJoined.temporal:
211 if collectionRecord.name in explicitCollections:
212 raise NotImplementedError(
213 f"Temporal query for dataset type '{datasetType.name}' in CALIBRATION-type "
214 f"collection '{collectionRecord.name}' is not yet supported."
215 )
216 else:
217 rejections.append(
218 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
219 f"{collectionRecord.name!r} because temporal calibration queries aren't "
220 "implemented; this is not an error only because the query structure implies "
221 "that searching this collection may be incidental."
222 )
223 continue
224 elif findFirst:
225 if collectionRecord.name in explicitCollections:
226 raise NotImplementedError(
227 f"Find-first query for dataset type '{datasetType.name}' in "
228 f"CALIBRATION-type collection '{collectionRecord.name}' is not yet "
229 "supported."
230 )
231 else:
232 rejections.append(
233 f"Not searching for dataset {datasetType.name!r} in CALIBRATION collection "
234 f"{collectionRecord.name!r} because find-first calibration queries aren't "
235 "implemented; this is not an error only because the query structure implies "
236 "that searching this collection may be incidental."
237 )
238 continue
239 else:
240 collectionRecords.append(collectionRecord)
241 else:
242 # We can never find a non-calibration dataset in a
243 # CALIBRATION collection.
244 rejections.append(
245 f"Not searching for non-calibration dataset {datasetType.name!r} "
246 f"in CALIBRATION collection {collectionRecord.name!r}."
247 )
248 continue
249 else:
250 collectionRecords.append(collectionRecord)
251 if isResult:
252 if findFirst:
253 subquery = self._build_dataset_search_subquery(
254 datasetRecordStorage,
255 collectionRecords,
256 )
257 else:
258 subquery = self._build_dataset_query_subquery(
259 datasetRecordStorage,
260 collectionRecords,
261 )
262 columns = DatasetQueryColumns(
263 datasetType=datasetType,
264 id=subquery.columns["id"],
265 runKey=subquery.columns[self._managers.collections.getRunForeignKeyName()],
266 ingestDate=subquery.columns["ingest_date"],
267 )
268 else:
269 subquery = self._build_dataset_constraint_subquery(datasetRecordStorage, collectionRecords)
270 columns = None
271 self.joinTable(subquery, datasetType.dimensions.required, datasets=columns)
272 if not collectionRecords:
273 if rejections:
274 self._doomed_by.extend(rejections)
275 else:
276 self._doomed_by.append(f"No collections to search matching expression {collections}.")
277 return False
278 return not self._doomed_by
280 def _build_dataset_constraint_subquery(
281 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
282 ) -> sqlalchemy.sql.FromClause:
283 """Internal helper method to build a dataset subquery for a parent
284 query that does not return dataset results.
286 Parameters
287 ----------
288 storage : `DatasetRecordStorage`
289 Storage object for the dataset type the subquery is for.
290 collections : `list` [ `CollectionRecord` ]
291 Records for the collections to be searched. Collections with no
292 datasets of this type or with governor dimensions incompatible with
293 the rest of the query should already have been filtered out.
294 `~CollectionType.CALIBRATION` collections should also be filtered
295 out if this is a temporal query.
297 Returns
298 -------
299 sql : `sqlalchemy.sql.FromClause`
300 A SQLAlchemy aliased subquery object. Has columns for each
301 dataset type dimension, or an unspecified column (just to prevent
302 SQL syntax errors) where there is no data ID.
303 """
304 return storage.select(
305 *collections,
306 dataId=SimpleQuery.Select,
307 # If this dataset type has no dimensions, we're in danger of
308 # generating an invalid subquery that has no columns in the
309 # SELECT clause. An easy fix is to just select some arbitrary
310 # column that goes unused, like the dataset ID.
311 id=None if storage.datasetType.dimensions else SimpleQuery.Select,
312 run=None,
313 ingestDate=None,
314 timespan=None,
315 ).alias(storage.datasetType.name)
317 def _build_dataset_query_subquery(
318 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
319 ) -> sqlalchemy.sql.FromClause:
320 """Internal helper method to build a dataset subquery for a parent
321 query that returns all matching dataset results.
323 Parameters
324 ----------
325 storage : `DatasetRecordStorage`
326 Storage object for the dataset type the subquery is for.
327 collections : `list` [ `CollectionRecord` ]
328 Records for the collections to be searched. Collections with no
329 datasets of this type or with governor dimensions incompatible with
330 the rest of the query should already have been filtered out.
331 `~CollectionType.CALIBRATION` collections should also be filtered
332 out if this is a temporal query.
334 Returns
335 -------
336 sql : `sqlalchemy.sql.FromClause`
337 A SQLAlchemy aliased subquery object. Has columns for each dataset
338 type dimension, the dataset ID, the `~CollectionType.RUN`
339 collection key, and the ingest date.
340 """
341 sql = storage.select(
342 *collections,
343 dataId=SimpleQuery.Select,
344 id=SimpleQuery.Select,
345 run=SimpleQuery.Select,
346 ingestDate=SimpleQuery.Select,
347 timespan=None,
348 ).alias(storage.datasetType.name)
349 return sql
351 def _build_dataset_search_subquery(
352 self, storage: DatasetRecordStorage, collections: List[CollectionRecord]
353 ) -> sqlalchemy.sql.FromClause:
354 """Internal helper method to build a dataset subquery for a parent
355 query that returns the first matching dataset for each data ID and
356 dataset type name from an ordered list of collections.
358 Parameters
359 ----------
360 storage : `DatasetRecordStorage`
361 Storage object for the dataset type the subquery is for.
362 collections : `list` [ `CollectionRecord` ]
363 Records for the collections to be searched. Collections with no
364 datasets of this type or with governor dimensions incompatible with
365 the rest of the query should already have been filtered out.
366 `~CollectionType.CALIBRATION` collections should be filtered out as
367 well.
369 Returns
370 -------
371 sql : `sqlalchemy.sql.FromClause`
372 A SQLAlchemy aliased subquery object. Has columns for each dataset
373 type dimension, the dataset ID, the `~CollectionType.RUN`
374 collection key, and the ingest date.
375 """
376 # Query-simplification shortcut: if there is only one collection, a
377 # find-first search is just a regular result subquery. Same is true
378 # if this is a doomed query with no collections to search.
379 if len(collections) <= 1:
380 return self._build_dataset_query_subquery(storage, collections)
381 # In the more general case, we build a subquery of the form below to
382 # search the collections in order.
383 #
384 # WITH {dst}_search AS (
385 # SELECT {data-id-cols}, id, run_id, 1 AS rank
386 # FROM <collection1>
387 # UNION ALL
388 # SELECT {data-id-cols}, id, run_id, 2 AS rank
389 # FROM <collection2>
390 # UNION ALL
391 # ...
392 # )
393 # SELECT
394 # {dst}_window.{data-id-cols},
395 # {dst}_window.id,
396 # {dst}_window.run_id
397 # FROM (
398 # SELECT
399 # {dst}_search.{data-id-cols},
400 # {dst}_search.id,
401 # {dst}_search.run_id,
402 # ROW_NUMBER() OVER (
403 # PARTITION BY {dst_search}.{data-id-cols}
404 # ORDER BY rank
405 # ) AS rownum
406 # ) {dst}_window
407 # WHERE
408 # {dst}_window.rownum = 1;
409 #
410 # We'll start with the Common Table Expression (CTE) at the top.
411 subqueries = []
412 for rank, collection_record in enumerate(collections):
413 ssq = storage.select(
414 collection_record,
415 dataId=SimpleQuery.Select,
416 id=SimpleQuery.Select,
417 run=SimpleQuery.Select,
418 ingestDate=SimpleQuery.Select,
419 timespan=None,
420 )
421 subqueries.append(ssq.add_columns(sqlalchemy.sql.literal(rank).label("rank")))
422 # Although one would expect that these subqueries can be UNION ALL
423 # instead of UNION because each subquery is already distinct, it turns
424 # out that with many subqueries this causes catastrophic performance
425 # problems with both sqlite and postgres. Using UNION may require more
426 # table scans, but a much simpler query plan given our table
427 # structures. See DM-31429.
428 search = sqlalchemy.sql.union(*subqueries).cte(f"{storage.datasetType.name}_search")
429 # Now we fill out the SELECT the CTE, and the subquery it contains (at
430 # the same time, since they have the same columns, aside from the OVER
431 # clause).
432 run_key_name = self._managers.collections.getRunForeignKeyName()
433 window_data_id_cols = [
434 search.columns[name].label(name) for name in storage.datasetType.dimensions.required.names
435 ]
436 window_select_cols = [
437 search.columns["id"].label("id"),
438 search.columns[run_key_name].label(run_key_name),
439 search.columns["ingest_date"].label("ingest_date"),
440 ]
441 window_select_cols += window_data_id_cols
442 window_select_cols.append(
443 sqlalchemy.sql.func.row_number()
444 .over(partition_by=window_data_id_cols, order_by=search.columns["rank"])
445 .label("rownum")
446 )
447 window = (
448 sqlalchemy.sql.select(*window_select_cols)
449 .select_from(search)
450 .alias(f"{storage.datasetType.name}_window")
451 )
452 sql = (
453 sqlalchemy.sql.select(*[window.columns[col.name].label(col.name) for col in window_select_cols])
454 .select_from(window)
455 .where(window.columns["rownum"] == 1)
456 .alias(storage.datasetType.name)
457 )
458 return sql
460 def joinTable(
461 self,
462 table: sqlalchemy.sql.FromClause,
463 dimensions: NamedValueAbstractSet[Dimension],
464 *,
465 datasets: Optional[DatasetQueryColumns] = None,
466 ) -> None:
467 """Join an arbitrary table to the query via dimension relationships.
469 External calls to this method should only be necessary for tables whose
470 records represent neither datasets nor dimension elements.
472 Parameters
473 ----------
474 table : `sqlalchemy.sql.FromClause`
475 SQLAlchemy object representing the logical table (which may be a
476 join or subquery expression) to be joined.
477 dimensions : iterable of `Dimension`
478 The dimensions that relate this table to others that may be in the
479 query. The table must have columns with the names of the
480 dimensions.
481 datasets : `DatasetQueryColumns`, optional
482 Columns that identify a dataset that is part of the query results.
483 """
484 unexpectedDimensions = NamedValueSet(dimensions - self.summary.mustHaveKeysJoined.dimensions)
485 unexpectedDimensions.discard(self.summary.universe.commonSkyPix)
486 if unexpectedDimensions:
487 raise NotImplementedError(
488 f"QueryBuilder does not yet support joining in dimensions {unexpectedDimensions} that "
489 f"were not provided originally to the QuerySummary object passed at construction."
490 )
491 joinOn = self.startJoin(table, dimensions, dimensions.names)
492 self.finishJoin(table, joinOn)
493 if datasets is not None:
494 assert (
495 self._columns.datasets is None
496 ), "At most one result dataset type can be returned by a query."
497 self._columns.datasets = datasets
499 def startJoin(
500 self, table: sqlalchemy.sql.FromClause, dimensions: Iterable[Dimension], columnNames: Iterable[str]
501 ) -> List[sqlalchemy.sql.ColumnElement]:
502 """Begin a join on dimensions.
504 Must be followed by call to `finishJoin`.
506 Parameters
507 ----------
508 table : `sqlalchemy.sql.FromClause`
509 SQLAlchemy object representing the logical table (which may be a
510 join or subquery expression) to be joined.
511 dimensions : iterable of `Dimension`
512 The dimensions that relate this table to others that may be in the
513 query. The table must have columns with the names of the
514 dimensions.
515 columnNames : iterable of `str`
516 Names of the columns that correspond to dimension key values; must
517 be `zip` iterable with ``dimensions``.
519 Returns
520 -------
521 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
522 Sequence of boolean expressions that should be combined with AND
523 to form (part of) the ON expression for this JOIN.
524 """
525 joinOn = []
526 for dimension, columnName in zip(dimensions, columnNames):
527 columnInTable = table.columns[columnName]
528 columnsInQuery = self._columns.keys.setdefault(dimension, [])
529 for columnInQuery in columnsInQuery:
530 joinOn.append(columnInQuery == columnInTable)
531 columnsInQuery.append(columnInTable)
532 return joinOn
534 def finishJoin(
535 self, table: sqlalchemy.sql.FromClause, joinOn: List[sqlalchemy.sql.ColumnElement]
536 ) -> None:
537 """Complete a join on dimensions.
539 Must be preceded by call to `startJoin`.
541 Parameters
542 ----------
543 table : `sqlalchemy.sql.FromClause`
544 SQLAlchemy object representing the logical table (which may be a
545 join or subquery expression) to be joined. Must be the same object
546 passed to `startJoin`.
547 joinOn : `list` of `sqlalchemy.sql.ColumnElement`
548 Sequence of boolean expressions that should be combined with AND
549 to form (part of) the ON expression for this JOIN. Should include
550 at least the elements of the list returned by `startJoin`.
551 """
552 onclause: Optional[sqlalchemy.sql.ColumnElement]
553 if len(joinOn) == 0:
554 onclause = None
555 elif len(joinOn) == 1:
556 onclause = joinOn[0]
557 else:
558 onclause = sqlalchemy.sql.and_(*joinOn)
559 self._simpleQuery.join(table, onclause=onclause)
561 def _joinMissingDimensionElements(self) -> None:
562 """Join all dimension element tables that were identified as necessary
563 by `QuerySummary` and have not yet been joined.
565 For internal use by `QueryBuilder` only; will be called (and should
566 only by called) by `finish`.
567 """
568 # Join all DimensionElement tables that we need for spatial/temporal
569 # joins/filters or a nontrivial WHERE expression.
570 # We iterate over these in *reverse* topological order to minimize the
571 # number of tables joined. For example, the "visit" table provides
572 # the primary key value for the "instrument" table it depends on, so we
573 # don't need to join "instrument" as well unless we had a nontrivial
574 # expression on it (and hence included it already above).
575 for element in self.summary.universe.sorted(self.summary.mustHaveTableJoined, reverse=True):
576 self.joinDimensionElement(element)
577 # Join in any requested Dimension tables that don't already have their
578 # primary keys identified by the query.
579 for dimension in self.summary.universe.sorted(self.summary.mustHaveKeysJoined, reverse=True):
580 if dimension not in self._columns.keys:
581 self.joinDimensionElement(dimension)
583 def _addWhereClause(self) -> None:
584 """Add a WHERE clause to the query under construction, connecting all
585 joined dimensions to the expression and data ID dimensions from
586 `QuerySummary`.
588 For internal use by `QueryBuilder` only; will be called (and should
589 only by called) by `finish`.
590 """
591 if self.summary.where.tree is not None:
592 self._simpleQuery.where.append(
593 convertExpressionToSql(
594 self.summary.where.tree,
595 self.summary.universe,
596 columns=self._columns,
597 elements=self._elements,
598 bind=self.summary.where.bind,
599 TimespanReprClass=self._managers.TimespanReprClass,
600 )
601 )
602 for dimension, columnsInQuery in self._columns.keys.items():
603 if dimension in self.summary.where.dataId.graph:
604 givenKey = self.summary.where.dataId[dimension]
605 # Add a WHERE term for each column that corresponds to each
606 # key. This is redundant with the JOIN ON clauses that make
607 # them equal to each other, but more constraints have a chance
608 # of making things easier on the DB's query optimizer.
609 for columnInQuery in columnsInQuery:
610 self._simpleQuery.where.append(columnInQuery == givenKey)
611 else:
612 # Dimension is not fully identified, but it might be a skypix
613 # dimension that's constrained by a given region.
614 if self.summary.where.region is not None and isinstance(dimension, SkyPixDimension):
615 # We know the region now.
616 givenSkyPixIds: List[int] = []
617 for begin, end in dimension.pixelization.envelope(self.summary.where.region):
618 givenSkyPixIds.extend(range(begin, end))
619 for columnInQuery in columnsInQuery:
620 self._simpleQuery.where.append(columnInQuery.in_(givenSkyPixIds))
621 # If we are given an dataId with a timespan, and there are one or more
622 # timespans in the query that aren't given, add a WHERE expression for
623 # each of them.
624 if self.summary.where.dataId.graph.temporal and self.summary.temporal:
625 # Timespan is known now.
626 givenInterval = self.summary.where.dataId.timespan
627 assert givenInterval is not None
628 for element, intervalInQuery in self._columns.timespans.items():
629 assert element not in self.summary.where.dataId.graph.elements
630 self._simpleQuery.where.append(
631 intervalInQuery.overlaps(self._managers.TimespanReprClass.fromLiteral(givenInterval))
632 )
634 def finish(self, joinMissing: bool = True) -> Query:
635 """Finish query constructing, returning a new `Query` instance.
637 Parameters
638 ----------
639 joinMissing : `bool`, optional
640 If `True` (default), automatically join any missing dimension
641 element tables (according to the categorization of the
642 `QuerySummary` the builder was constructed with). `False` should
643 only be passed if the caller can independently guarantee that all
644 dimension relationships are already captured in non-dimension
645 tables that have been manually included in the query.
647 Returns
648 -------
649 query : `Query`
650 A `Query` object that can be executed and used to interpret result
651 rows.
652 """
653 if joinMissing:
654 self._joinMissingDimensionElements()
655 self._addWhereClause()
656 if self._columns.isEmpty():
657 return EmptyQuery(
658 self.summary.requested.universe, managers=self._managers, doomed_by=self._doomed_by
659 )
660 return DirectQuery(
661 graph=self.summary.requested,
662 uniqueness=DirectQueryUniqueness.NOT_UNIQUE,
663 whereRegion=self.summary.where.dataId.region,
664 simpleQuery=self._simpleQuery,
665 columns=self._columns,
666 order_by_columns=self._order_by_columns(),
667 limit=self.summary.limit,
668 managers=self._managers,
669 doomed_by=self._doomed_by,
670 )
672 def _order_by_columns(self) -> Iterable[OrderByColumn]:
673 """Generate columns to be used for ORDER BY clause.
675 Returns
676 -------
677 order_by_columns : `Iterable` [ `ColumnIterable` ]
678 Sequence of columns to appear in ORDER BY clause.
679 """
680 order_by_columns: List[OrderByColumn] = []
681 if not self.summary.order_by:
682 return order_by_columns
684 for order_by_column in self.summary.order_by.order_by_columns:
686 column: sqlalchemy.sql.ColumnElement
687 if order_by_column.column is None:
688 # dimension name, it has to be in SELECT list already, only
689 # add it to ORDER BY
690 assert isinstance(order_by_column.element, Dimension), "expecting full Dimension"
691 column = self._columns.getKeyColumn(order_by_column.element)
692 else:
693 table = self._elements[order_by_column.element]
695 if order_by_column.column in ("timespan.begin", "timespan.end"):
696 TimespanReprClass = self._managers.TimespanReprClass
697 timespan_repr = TimespanReprClass.fromSelectable(table)
698 if order_by_column.column == "timespan.begin":
699 column = timespan_repr.lower()
700 label = f"{order_by_column.element.name}_timespan_begin"
701 else:
702 column = timespan_repr.upper()
703 label = f"{order_by_column.element.name}_timespan_end"
704 else:
705 column = table.columns[order_by_column.column]
706 # make a unique label for it
707 label = f"{order_by_column.element.name}_{order_by_column.column}"
709 column = column.label(label)
711 order_by_columns.append(OrderByColumn(column=column, ordering=order_by_column.ordering))
713 return order_by_columns