Coverage for python/lsst/daf/butler/registry/queries/_query.py: 22%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import enum
28import itertools
29from typing import (
30 Dict,
31 Iterable,
32 Iterator,
33 Mapping,
34 Optional,
35 Tuple,
36 TYPE_CHECKING,
37)
39import sqlalchemy
41from lsst.sphgeom import Region
43from ...core import (
44 addDimensionForeignKey,
45 DataCoordinate,
46 DatasetRef,
47 DatasetType,
48 ddl,
49 Dimension,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 SpatialRegionDatabaseRepresentation,
55 SimpleQuery,
56)
57from ..interfaces import Database
58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 from ._builder import QueryBuilder
64class Query(ABC):
65 """An abstract base class for queries that return some combination of
66 `DatasetRef` and `DataCoordinate` objects.
68 Parameters
69 ----------
70 graph : `DimensionGraph`
71 Object describing the dimensions included in the query.
72 whereRegion : `lsst.sphgeom.Region`, optional
73 Region that all region columns in all returned rows must overlap.
74 managers : `RegistryManagers`
75 A struct containing the registry manager instances used by the query
76 system.
77 doomed_by : `Iterable` [ `str` ], optional
78 A list of messages (appropriate for e.g. logging or exceptions) that
79 explain why the query is known to return no results even before it is
80 executed. Queries with a non-empty list will never be executed.
82 Notes
83 -----
84 The `Query` hierarchy abstracts over the database/SQL representation of a
85 particular set of data IDs or datasets. It is expected to be used as a
86 backend for other objects that provide more natural interfaces for one or
87 both of these, not as part of a public interface to query results.
88 """
89 def __init__(self, *,
90 graph: DimensionGraph,
91 whereRegion: Optional[Region],
92 managers: RegistryManagers,
93 doomed_by: Iterable[str] = (),
94 ):
95 self.graph = graph
96 self.whereRegion = whereRegion
97 self.managers = managers
98 self._doomed_by = tuple(doomed_by)
99 self._filtered_by_join: Optional[int] = None
100 self._filtered_by_where: Optional[int] = None
102 @abstractmethod
103 def isUnique(self) -> bool:
104 """Return `True` if this query's rows are guaranteed to be unique, and
105 `False` otherwise.
107 If this query has dataset results (`datasetType` is not `None`),
108 uniqueness applies to the `DatasetRef` instances returned by
109 `extractDatasetRef` from the result of `rows`. If it does not have
110 dataset results, uniqueness applies to the `DataCoordinate` instances
111 returned by `extractDataId`.
112 """
113 raise NotImplementedError()
115 @abstractmethod
116 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
117 """Return the query column that contains the primary key value for
118 the dimension with the given name.
120 Parameters
121 ----------
122 name : `str`
123 Name of the dimension.
125 Returns
126 -------
127 column : `sqlalchemy.sql.ColumnElement`.
128 SQLAlchemy object representing a column in the query.
130 Notes
131 -----
132 This method is intended primarily as a hook for subclasses to implement
133 and the ABC to call in order to provide higher-level functionality;
134 code that uses `Query` objects (but does not implement one) should
135 usually not have to call this method.
136 """
137 raise NotImplementedError()
139 @property
140 @abstractmethod
141 def spatial(self) -> Iterator[DimensionElement]:
142 """An iterator over the dimension element columns used in post-query
143 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
145 Notes
146 -----
147 This property is intended primarily as a hook for subclasses to
148 implement and the ABC to call in order to provide higher-level
149 functionality; code that uses `Query` objects (but does not implement
150 one) should usually not have to access this property.
151 """
152 raise NotImplementedError()
154 @abstractmethod
155 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
156 """Return a region column for one of the dimension elements iterated
157 over by `spatial`.
159 Parameters
160 ----------
161 name : `str`
162 Name of the element.
164 Returns
165 -------
166 column : `sqlalchemy.sql.ColumnElement`
167 SQLAlchemy representing a result column in the query.
169 Notes
170 -----
171 This method is intended primarily as a hook for subclasses to implement
172 and the ABC to call in order to provide higher-level functionality;
173 code that uses `Query` objects (but does not implement one) should
174 usually not have to call this method.
175 """
176 raise NotImplementedError()
178 @property
179 def datasetType(self) -> Optional[DatasetType]:
180 """The `DatasetType` of datasets returned by this query, or `None`
181 if there are no dataset results (`DatasetType` or `None`).
182 """
183 cols = self.getDatasetColumns()
184 if cols is None:
185 return None
186 return cols.datasetType
188 def count(self, db: Database, *, region: Optional[Region] = None, exact: bool = True) -> int:
189 """Count the number of rows this query would return.
191 Parameters
192 ----------
193 db : `Database`
194 Object managing the database connection.
195 region : `sphgeom.Region`, optional
196 A region that any result-row regions must overlap in order to be
197 yielded. If not provided, this will be ``self.whereRegion``, if
198 that exists.
199 exact : `bool`, optional
200 If `True`, run the full query and perform post-query filtering if
201 needed to account for that filtering in the count. If `False`, the
202 result may be an upper bound.
204 Returns
205 -------
206 count : `int`
207 The number of rows the query would return, or an upper bound if
208 ``exact=False``.
210 Notes
211 -----
212 This counts the number of rows returned, not the number of unique rows
213 returned, so even with ``exact=True`` it may provide only an upper
214 bound on the number of *deduplicated* result rows.
215 """
216 if self._doomed_by:
217 return 0
218 sql = self.sql
219 if sql is None:
220 return 1
221 if exact and self.spatial:
222 filtered_count = 0
223 for _ in self.rows(db, region=region):
224 filtered_count += 1
225 return filtered_count
226 else:
227 return db.query(
228 sql.with_only_columns([sqlalchemy.sql.func.count()]).order_by(None)
229 ).scalar()
231 def any(
232 self,
233 db: Database, *,
234 region: Optional[Region] = None,
235 execute: bool = True,
236 exact: bool = True,
237 ) -> bool:
238 """Test whether this query returns any results.
240 Parameters
241 ----------
242 db : `Database`
243 Object managing the database connection.
244 region : `sphgeom.Region`, optional
245 A region that any result-row regions must overlap in order to be
246 yielded. If not provided, this will be ``self.whereRegion``, if
247 that exists.
248 execute : `bool`, optional
249 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
250 determined prior to execution that the query would return no rows.
251 exact : `bool`, optional
252 If `True`, run the full query and perform post-query filtering if
253 needed, until at least one result row is found. If `False`, the
254 returned result does not account for post-query filtering, and
255 hence may be `True` even when all result rows would be filtered
256 out.
258 Returns
259 -------
260 any : `bool`
261 `True` if the query would (or might, depending on arguments) yield
262 result rows. `False` if it definitely would not.
263 """
264 if self._doomed_by:
265 return False
266 sql = self.sql
267 if sql is None:
268 return True
269 if exact and not execute:
270 raise TypeError("Cannot obtain exact results without executing the query.")
271 if exact and self.spatial:
272 for _ in self.rows(db, region=region):
273 return True
274 return False
275 elif execute:
276 return db.query(sql.limit(1)).one_or_none() is not None
277 else:
278 return True
280 def explain_no_results(
281 self,
282 db: Database, *,
283 region: Optional[Region] = None,
284 ) -> Iterator[str]:
285 """Return human-readable messages that may help explain why the query
286 yields no results.
288 Parameters
289 ----------
290 db : `Database`
291 Object managing the database connection.
292 region : `sphgeom.Region`, optional
293 A region that any result-row regions must overlap in order to be
294 yielded. If not provided, this will be ``self.whereRegion``, if
295 that exists.
297 Returns
298 -------
299 messages : `Iterator` [ `str` ]
300 String messages that describe reasons the query might not yield any
301 results.
303 Notes
304 -----
305 Messages related to post-query filtering are only available if `rows`,
306 `any`, or `count` was already called with the same region (with
307 ``exact=True`` for the latter two).
309 At present, this method only returns messages that are generated while
310 the query is being built or filtered. In the future, it may perform
311 its own new follow-up queries, which users may wish to short-circuit
312 simply by not continuing to iterate over its results.
313 """
314 yield from self._doomed_by
315 if self._filtered_by_where:
316 yield (
317 f"{self._filtered_by_where} result rows were filtered out because "
318 "one or more region did not overlap the WHERE-clause region."
319 )
320 if self._filtered_by_join:
321 yield (
322 f"{self._filtered_by_join} result rows were filtered out because "
323 "one or more regions did not overlap."
324 )
326 @abstractmethod
327 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
328 """Return the columns for the datasets returned by this query.
330 Returns
331 -------
332 columns : `DatasetQueryColumns` or `None`
333 Struct containing SQLAlchemy representations of the result columns
334 for a dataset.
336 Notes
337 -----
338 This method is intended primarily as a hook for subclasses to implement
339 and the ABC to call in order to provide higher-level functionality;
340 code that uses `Query` objects (but does not implement one) should
341 usually not have to call this method.
342 """
343 raise NotImplementedError()
345 @property
346 @abstractmethod
347 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
348 """A SQLAlchemy object representing the full query
349 (`sqlalchemy.sql.FromClause` or `None`).
351 This is `None` in the special case where the query has no columns, and
352 only one logical row.
353 """
354 raise NotImplementedError()
356 def rows(self, db: Database, *, region: Optional[Region] = None
357 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
358 """Execute the query and yield result rows, applying `predicate`.
360 Parameters
361 ----------
362 db : `Database`
363 Object managing the database connection.
364 region : `sphgeom.Region`, optional
365 A region that any result-row regions must overlap in order to be
366 yielded. If not provided, this will be ``self.whereRegion``, if
367 that exists.
369 Yields
370 ------
371 row : `sqlalchemy.engine.RowProxy` or `None`
372 Result row from the query. `None` may yielded exactly once instead
373 of any real rows to indicate an empty query (see `EmptyQuery`).
374 """
375 if self._doomed_by:
376 return
377 whereRegion = region if region is not None else self.whereRegion
378 self._filtered_by_where = 0
379 self._filtered_by_join = 0
380 for row in db.query(self.sql):
381 rowRegions = [row._mapping[self.getRegionColumn(element.name)] for element in self.spatial]
382 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
383 self._filtered_by_where += 1
384 continue
385 if not not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)):
386 self._filtered_by_join += 1
387 continue
388 yield row
390 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
391 dimensions: Iterable[Dimension]) -> tuple:
392 """Extract a tuple of data ID values from a result row.
394 Parameters
395 ----------
396 row : `sqlalchemy.engine.RowProxy` or `None`
397 A result row from a SQLAlchemy SELECT query, or `None` to indicate
398 the row from an `EmptyQuery`.
399 dimensions : `Iterable` [ `Dimension` ]
400 The dimensions to include in the returned tuple, in order.
402 Returns
403 -------
404 values : `tuple`
405 A tuple of dimension primary key values.
406 """
407 if row is None:
408 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
409 return ()
410 return tuple(row._mapping[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
412 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
413 graph: Optional[DimensionGraph] = None,
414 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
415 ) -> DataCoordinate:
416 """Extract a data ID from a result row.
418 Parameters
419 ----------
420 row : `sqlalchemy.engine.RowProxy` or `None`
421 A result row from a SQLAlchemy SELECT query, or `None` to indicate
422 the row from an `EmptyQuery`.
423 graph : `DimensionGraph`, optional
424 The dimensions the returned data ID should identify. If not
425 provided, this will be all dimensions in `QuerySummary.requested`.
426 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
427 Nested mapping containing records to attach to the returned
428 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
429 return `True`. If provided, outer keys must include all dimension
430 element names in ``graph``, and inner keys should be tuples of
431 dimension primary key values in the same order as
432 ``element.graph.required``. If not provided,
433 `DataCoordinate.hasRecords` will return `False` on the returned
434 object.
436 Returns
437 -------
438 dataId : `DataCoordinate`
439 A data ID that identifies all required and implied dimensions. If
440 ``records is not None``, this is have
441 `~DataCoordinate.hasRecords()` return `True`.
442 """
443 if graph is None:
444 graph = self.graph
445 if not graph:
446 return DataCoordinate.makeEmpty(self.graph.universe)
447 dataId = DataCoordinate.fromFullValues(
448 graph,
449 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
450 )
451 if records is not None:
452 recordsForRow = {}
453 for element in graph.elements:
454 key = tuple(dataId.subset(element.graph).values())
455 recordsForRow[element.name] = records[element.name].get(key)
456 return dataId.expanded(recordsForRow)
457 else:
458 return dataId
460 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
461 dataId: Optional[DataCoordinate] = None,
462 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
463 ) -> DatasetRef:
464 """Extract a `DatasetRef` from a result row.
466 Parameters
467 ----------
468 row : `sqlalchemy.engine.RowProxy`
469 A result row from a SQLAlchemy SELECT query.
470 dataId : `DataCoordinate`
471 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
472 `DataCoordinate` is constructed from ``row`` if `None`.
473 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
474 Records to use to return an `ExpandedDataCoordinate`. If provided,
475 outer keys must include all dimension element names in ``graph``,
476 and inner keys should be tuples of dimension primary key values
477 in the same order as ``element.graph.required``.
479 Returns
480 -------
481 ref : `DatasetRef`
482 Reference to the dataset; guaranteed to have `DatasetRef.id` not
483 `None`.
484 """
485 datasetColumns = self.getDatasetColumns()
486 assert datasetColumns is not None
487 if dataId is None:
488 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
489 runRecord = self.managers.collections[row._mapping[datasetColumns.runKey]]
490 return DatasetRef(datasetColumns.datasetType, dataId, id=row._mapping[datasetColumns.id],
491 run=runRecord.name)
493 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
494 """Helper method for subclass implementations of `materialize`.
496 Parameters
497 ----------
498 constraints : `bool`, optional
499 If `True` (`False` is default), define a specification that
500 includes actual foreign key constraints for logical foreign keys.
501 Some database engines do not permit temporary tables to reference
502 normal tables, so this should be `False` when generating a spec
503 for a temporary table unless the database engine is known to
504 support them.
506 Returns
507 -------
508 spec : `ddl.TableSpec`
509 Specification for a table that could hold this query's result rows.
510 """
511 unique = self.isUnique()
512 spec = ddl.TableSpec(fields=())
513 for dimension in self.graph:
514 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
515 for element in self.spatial:
516 spec.fields.update(
517 SpatialRegionDatabaseRepresentation.makeFieldSpecs(
518 nullable=True,
519 name=f"{element.name}_region",
520 )
521 )
522 datasetColumns = self.getDatasetColumns()
523 if datasetColumns is not None:
524 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
525 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
526 return spec
528 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
529 datasets: bool = True,
530 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
531 """Helper method for subclass implementations of `subset`.
533 Parameters
534 ----------
535 graph : `DimensionGraph`, optional
536 Dimensions to include in the new `Query` being constructed.
537 ``subset`` implementations should generally just forward their
538 own ``graph`` argument here.
539 datasets : `bool`, optional
540 Whether the new `Query` should include dataset results. Defaults
541 to `True`, but is ignored if ``self`` does not include dataset
542 results.
543 unique : `bool`, optional
544 Whether the new `Query` should guarantee unique results (this may
545 come with a performance penalty).
547 Returns
548 -------
549 graph : `DimensionGraph`
550 The dimensions of the new `Query`. This is exactly the same as
551 the argument of the same name, with ``self.graph`` used if that
552 argument is `None`.
553 columns : `QueryColumns` or `None`
554 A struct containing the SQLAlchemy column objects to use in the
555 new query, contructed by delegating to other (mostly abstract)
556 methods on ``self``. If `None`, `subset` may return ``self``.
557 """
558 if graph is None:
559 graph = self.graph
560 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
561 and (self.isUnique() or not unique)):
562 return graph, None
563 columns = QueryColumns()
564 for dimension in graph.dimensions:
565 col = self.getDimensionColumn(dimension.name)
566 columns.keys[dimension] = [col]
567 if not unique:
568 for element in self.spatial:
569 col = self.getRegionColumn(element.name)
570 columns.regions[element] = col
571 if datasets and self.getDatasetColumns() is not None:
572 columns.datasets = self.getDatasetColumns()
573 return graph, columns
575 @contextmanager
576 def materialize(self, db: Database) -> Iterator[Query]:
577 """Execute this query and insert its results into a temporary table.
579 Parameters
580 ----------
581 db : `Database`
582 Database engine to execute the query against.
584 Returns
585 -------
586 context : `typing.ContextManager` [ `MaterializedQuery` ]
587 A context manager that ensures the temporary table is created and
588 populated in ``__enter__`` (returning a `MaterializedQuery` object
589 backed by that table), and dropped in ``__exit__``. If ``self``
590 is already a `MaterializedQuery`, ``__enter__`` may just return
591 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
592 an outer context manager should already take care of everything
593 else).
594 """
595 spec = self._makeTableSpec()
596 with db.session() as session:
597 table = session.makeTemporaryTable(spec)
598 if not self._doomed_by:
599 db.insert(table, select=self.sql, names=spec.fields.names)
600 yield MaterializedQuery(table=table,
601 spatial=self.spatial,
602 datasetType=self.datasetType,
603 isUnique=self.isUnique(),
604 graph=self.graph,
605 whereRegion=self.whereRegion,
606 managers=self.managers,
607 doomed_by=self._doomed_by)
608 session.dropTemporaryTable(table)
610 @abstractmethod
611 def subset(self, *, graph: Optional[DimensionGraph] = None,
612 datasets: bool = True,
613 unique: bool = False) -> Query:
614 """Return a new `Query` whose columns and/or rows are (mostly) subset
615 of this one's.
617 Parameters
618 ----------
619 graph : `DimensionGraph`, optional
620 Dimensions to include in the new `Query` being constructed.
621 If `None` (default), ``self.graph`` is used.
622 datasets : `bool`, optional
623 Whether the new `Query` should include dataset results. Defaults
624 to `True`, but is ignored if ``self`` does not include dataset
625 results.
626 unique : `bool`, optional
627 Whether the new `Query` should guarantee unique results (this may
628 come with a performance penalty).
630 Returns
631 -------
632 query : `Query`
633 A query object corresponding to the given inputs. May be ``self``
634 if no changes were requested.
636 Notes
637 -----
638 The way spatial overlaps are handled at present makes it impossible to
639 fully guarantee in general that the new query's rows are a subset of
640 this one's while also returning unique rows. That's because the
641 database is only capable of performing approximate, conservative
642 overlaps via the common skypix system; we defer actual region overlap
643 operations to per-result-row Python logic. But including the region
644 columns necessary to do that postprocessing in the query makes it
645 impossible to do a SELECT DISTINCT on the user-visible dimensions of
646 the query. For example, consider starting with a query with dimensions
647 (instrument, skymap, visit, tract). That involves a spatial join
648 between visit and tract, and we include the region columns from both
649 tables in the results in order to only actually yield result rows
650 (see `predicate` and `rows`) where the regions in those two columns
651 overlap. If the user then wants to subset to just (skymap, tract) with
652 unique results, we have two unpalatable options:
654 - we can do a SELECT DISTINCT with just the skymap and tract columns
655 in the SELECT clause, dropping all detailed overlap information and
656 including some tracts that did not actually overlap any of the
657 visits in the original query (but were regarded as _possibly_
658 overlapping via the coarser, common-skypix relationships);
660 - we can include the tract and visit region columns in the query, and
661 continue to filter out the non-overlapping pairs, but completely
662 disregard the user's request for unique tracts.
664 This interface specifies that implementations must do the former, as
665 that's what makes things efficient in our most important use case
666 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
667 improve this situation in the future by putting exact overlap
668 information in the database, either by using built-in (but
669 engine-specific) spatial database functionality or (more likely)
670 switching to a scheme in which pairwise dimension spatial relationships
671 are explicitly precomputed (for e.g. combinations of instruments and
672 skymaps).
673 """
674 raise NotImplementedError()
676 @abstractmethod
677 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
678 """Return a `QueryBuilder` that can be used to construct a new `Query`
679 that is joined to (and hence constrained by) this one.
681 Parameters
682 ----------
683 summary : `QuerySummary`, optional
684 A `QuerySummary` instance that specifies the dimensions and any
685 additional constraints to include in the new query being
686 constructed, or `None` to use the dimensions of ``self`` with no
687 additional constraints.
688 """
689 raise NotImplementedError()
691 graph: DimensionGraph
692 """The dimensions identified by this query and included in any data IDs
693 created from its result rows (`DimensionGraph`).
694 """
696 whereRegion: Optional[Region]
697 """A spatial region that all regions in all rows returned by this query
698 must overlap (`lsst.sphgeom.Region` or `None`).
699 """
701 managers: RegistryManagers
702 """A struct containing `Registry` helper object (`RegistryManagers`).
703 """
706class DirectQueryUniqueness(enum.Enum):
707 """An enum representing the ways in which a query can have unique rows (or
708 not).
709 """
711 NOT_UNIQUE = enum.auto()
712 """The query is not expected to have unique rows.
713 """
715 NATURALLY_UNIQUE = enum.auto()
716 """The construction of the query guarantees that it will have unique
717 result rows, even without SELECT DISTINCT or a GROUP BY clause.
718 """
720 NEEDS_DISTINCT = enum.auto()
721 """The query is expected to yield unique result rows, and needs to use
722 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
723 """
726class DirectQuery(Query):
727 """A `Query` implementation that represents a direct SELECT query that
728 usually joins many tables.
730 `DirectQuery` objects should generally only be constructed by
731 `QueryBuilder` or the methods of other `Query` objects.
733 Parameters
734 ----------
735 simpleQuery : `SimpleQuery`
736 Struct representing the actual SELECT, FROM, and WHERE clauses.
737 columns : `QueryColumns`
738 Columns that are referenced in the query in any clause.
739 uniqueness : `DirectQueryUniqueness`
740 Enum value indicating whether the query should yield unique result
741 rows, and if so whether that needs to be explicitly requested of the
742 database.
743 graph : `DimensionGraph`
744 Object describing the dimensions included in the query.
745 whereRegion : `lsst.sphgeom.Region`, optional
746 Region that all region columns in all returned rows must overlap.
747 managers : `RegistryManagers`
748 Struct containing the `Registry` manager helper objects, to be
749 forwarded to the `Query` constructor.
750 doomed_by : `Iterable` [ `str` ], optional
751 A list of messages (appropriate for e.g. logging or exceptions) that
752 explain why the query is known to return no results even before it is
753 executed. Queries with a non-empty list will never be executed.
754 """
755 def __init__(self, *,
756 simpleQuery: SimpleQuery,
757 columns: QueryColumns,
758 uniqueness: DirectQueryUniqueness,
759 graph: DimensionGraph,
760 whereRegion: Optional[Region],
761 managers: RegistryManagers,
762 doomed_by: Iterable[str] = ()):
763 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers, doomed_by=doomed_by)
764 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
765 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
766 self._simpleQuery = simpleQuery
767 self._columns = columns
768 self._uniqueness = uniqueness
769 self._datasetQueryColumns: Optional[DatasetQueryColumns] = None
770 self._dimensionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
771 self._regionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
773 def isUnique(self) -> bool:
774 # Docstring inherited from Query.
775 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
777 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
778 # Docstring inherited from Query.
779 column = self._dimensionColumns.get(name)
780 if column is None:
781 column = self._columns.getKeyColumn(name).label(name)
782 self._dimensionColumns[name] = column
783 return column
785 @property
786 def spatial(self) -> Iterator[DimensionElement]:
787 # Docstring inherited from Query.
788 return iter(self._columns.regions)
790 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
791 # Docstring inherited from Query.
792 column = self._regionColumns.get(name)
793 if column is None:
794 column = self._columns.regions[name].column.label(f"{name}_region")
795 self._regionColumns[name] = column
796 return column
798 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
799 # Docstring inherited from Query.
800 if self._datasetQueryColumns is None:
801 base = self._columns.datasets
802 if base is None:
803 return None
804 ingestDate = base.ingestDate
805 if ingestDate is not None:
806 ingestDate = ingestDate.label("ingest_date")
807 self._datasetQueryColumns = DatasetQueryColumns(
808 datasetType=base.datasetType,
809 id=base.id.label("dataset_id"),
810 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
811 ingestDate=ingestDate,
812 )
813 return self._datasetQueryColumns
815 @property
816 def sql(self) -> sqlalchemy.sql.FromClause:
817 # Docstring inherited from Query.
818 simpleQuery = self._simpleQuery.copy()
819 for dimension in self.graph:
820 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
821 for element in self.spatial:
822 simpleQuery.columns.append(self.getRegionColumn(element.name))
823 datasetColumns = self.getDatasetColumns()
824 if datasetColumns is not None:
825 simpleQuery.columns.extend(datasetColumns)
826 sql = simpleQuery.combine()
827 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
828 return sql.distinct()
829 else:
830 return sql
832 def subset(self, *, graph: Optional[DimensionGraph] = None,
833 datasets: bool = True,
834 unique: bool = False) -> Query:
835 # Docstring inherited from Query.
836 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
837 if columns is None:
838 return self
839 if columns.isEmpty():
840 return EmptyQuery(self.graph.universe, self.managers)
841 return DirectQuery(
842 simpleQuery=self._simpleQuery.copy(),
843 columns=columns,
844 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
845 graph=graph,
846 whereRegion=self.whereRegion if not unique else None,
847 managers=self.managers,
848 doomed_by=self._doomed_by,
849 )
851 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
852 # Docstring inherited from Query.
853 from ._builder import QueryBuilder
854 if summary is None:
855 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
856 if not summary.requested.issubset(self.graph):
857 raise NotImplementedError(
858 f"Query.makeBuilder does not yet support augmenting dimensions "
859 f"({summary.requested.dimensions}) beyond those originally included in the query "
860 f"({self.graph.dimensions})."
861 )
862 builder = QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)
863 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
864 datasets=self.getDatasetColumns())
865 return builder
868class MaterializedQuery(Query):
869 """A `Query` implementation that represents query results saved in a
870 temporary table.
872 `MaterializedQuery` instances should not be constructed directly; use
873 `Query.materialize()` instead.
875 Parameters
876 ----------
877 table : `sqlalchemy.schema.Table`
878 SQLAlchemy object represnting the temporary table.
879 spatial : `Iterable` [ `DimensionElement` ]
880 Spatial dimension elements whose regions must overlap for each valid
881 result row (which may reject some rows that are in the table).
882 datasetType : `DatasetType`
883 The `DatasetType` of datasets returned by this query, or `None`
884 if there are no dataset results
885 isUnique : `bool`
886 If `True`, the table's rows are unique, and there is no need to
887 add ``SELECT DISTINCT`` to gaurantee this in results.
888 graph : `DimensionGraph`
889 Dimensions included in the columns of this table.
890 whereRegion : `Region` or `None`
891 A spatial region all result-row regions must overlap to be valid (which
892 may reject some rows that are in the table).
893 managers : `RegistryManagers`
894 A struct containing `Registry` manager helper objects, forwarded to
895 the `Query` constructor.
896 doomed_by : `Iterable` [ `str` ], optional
897 A list of messages (appropriate for e.g. logging or exceptions) that
898 explain why the query is known to return no results even before it is
899 executed. Queries with a non-empty list will never be executed.
900 """
901 def __init__(self, *,
902 table: sqlalchemy.schema.Table,
903 spatial: Iterable[DimensionElement],
904 datasetType: Optional[DatasetType],
905 isUnique: bool,
906 graph: DimensionGraph,
907 whereRegion: Optional[Region],
908 managers: RegistryManagers,
909 doomed_by: Iterable[str] = ()):
910 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers, doomed_by=doomed_by)
911 self._table = table
912 self._spatial = tuple(spatial)
913 self._datasetType = datasetType
914 self._isUnique = isUnique
916 def isUnique(self) -> bool:
917 # Docstring inherited from Query.
918 return self._isUnique
920 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
921 # Docstring inherited from Query.
922 return self._table.columns[name]
924 @property
925 def spatial(self) -> Iterator[DimensionElement]:
926 # Docstring inherited from Query.
927 return iter(self._spatial)
929 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
930 # Docstring inherited from Query.
931 return self._table.columns[f"{name}_region"]
933 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
934 # Docstring inherited from Query.
935 if self._datasetType is not None:
936 return DatasetQueryColumns(
937 datasetType=self._datasetType,
938 id=self._table.columns["dataset_id"],
939 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
940 ingestDate=None,
941 )
942 else:
943 return None
945 @property
946 def sql(self) -> sqlalchemy.sql.FromClause:
947 # Docstring inherited from Query.
948 return self._table.select()
950 @contextmanager
951 def materialize(self, db: Database) -> Iterator[Query]:
952 # Docstring inherited from Query.
953 yield self
955 def subset(self, *, graph: Optional[DimensionGraph] = None,
956 datasets: bool = True,
957 unique: bool = False) -> Query:
958 # Docstring inherited from Query.
959 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
960 if columns is None:
961 return self
962 if columns.isEmpty():
963 return EmptyQuery(self.graph.universe, managers=self.managers)
964 simpleQuery = SimpleQuery()
965 simpleQuery.join(self._table)
966 return DirectQuery(
967 simpleQuery=simpleQuery,
968 columns=columns,
969 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
970 graph=graph,
971 whereRegion=self.whereRegion if not unique else None,
972 managers=self.managers,
973 doomed_by=self._doomed_by,
974 )
976 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
977 # Docstring inherited from Query.
978 from ._builder import QueryBuilder
979 if summary is None:
980 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
981 if not summary.requested.issubset(self.graph):
982 raise NotImplementedError(
983 f"Query.makeBuilder does not yet support augmenting dimensions "
984 f"({summary.requested.dimensions}) beyond those originally included in the query "
985 f"({self.graph.dimensions})."
986 )
987 builder = QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)
988 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
989 return builder
992class EmptyQuery(Query):
993 """A `Query` implementation that handes the special case where the query
994 would have no columns.
996 Parameters
997 ----------
998 universe : `DimensionUniverse`
999 Set of all dimensions from which the null set is extracted.
1000 managers : `RegistryManagers`
1001 A struct containing the registry manager instances used by the query
1002 system.
1003 doomed_by : `Iterable` [ `str` ], optional
1004 A list of messages (appropriate for e.g. logging or exceptions) that
1005 explain why the query is known to return no results even before it is
1006 executed. Queries with a non-empty list will never be executed.
1007 """
1008 def __init__(
1009 self,
1010 universe: DimensionUniverse,
1011 managers: RegistryManagers,
1012 doomed_by: Iterable[str] = (),
1013 ):
1014 super().__init__(graph=universe.empty, whereRegion=None, managers=managers, doomed_by=doomed_by)
1016 def isUnique(self) -> bool:
1017 # Docstring inherited from Query.
1018 return True
1020 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
1021 # Docstring inherited from Query.
1022 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
1024 @property
1025 def spatial(self) -> Iterator[DimensionElement]:
1026 # Docstring inherited from Query.
1027 return iter(())
1029 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
1030 # Docstring inherited from Query.
1031 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
1033 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
1034 # Docstring inherited from Query.
1035 return None
1037 def rows(self, db: Database, *, region: Optional[Region] = None
1038 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
1039 if not self._doomed_by:
1040 yield None
1042 @property
1043 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
1044 # Docstring inherited from Query.
1045 return None
1047 @contextmanager
1048 def materialize(self, db: Database) -> Iterator[Query]:
1049 # Docstring inherited from Query.
1050 yield self
1052 def subset(self, *, graph: Optional[DimensionGraph] = None,
1053 datasets: bool = True,
1054 unique: bool = False) -> Query:
1055 # Docstring inherited from Query.
1056 assert graph is None or graph.issubset(self.graph)
1057 return self
1059 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
1060 # Docstring inherited from Query.
1061 from ._builder import QueryBuilder
1062 if summary is None:
1063 summary = QuerySummary(self.graph)
1064 if not summary.requested.issubset(self.graph):
1065 raise NotImplementedError(
1066 f"Query.makeBuilder does not yet support augmenting dimensions "
1067 f"({summary.requested.dimensions}) beyond those originally included in the query "
1068 f"({self.graph.dimensions})."
1069 )
1070 return QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)