Coverage for python/lsst/daf/butler/registry/queries/_query.py: 21%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import enum
28import itertools
29from typing import (
30 Dict,
31 Iterable,
32 Iterator,
33 Mapping,
34 Optional,
35 Tuple,
36 TYPE_CHECKING,
37)
39import sqlalchemy
41from lsst.sphgeom import Region
43from ...core import (
44 addDimensionForeignKey,
45 DataCoordinate,
46 DatasetRef,
47 DatasetType,
48 ddl,
49 Dimension,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 SpatialRegionDatabaseRepresentation,
55 SimpleQuery,
56)
57from ..interfaces import Database
58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 from ._builder import QueryBuilder
64class Query(ABC):
65 """An abstract base class for queries that return some combination of
66 `DatasetRef` and `DataCoordinate` objects.
68 Parameters
69 ----------
70 graph : `DimensionGraph`
71 Object describing the dimensions included in the query.
72 whereRegion : `lsst.sphgeom.Region`, optional
73 Region that all region columns in all returned rows must overlap.
74 managers : `RegistryManagers`
75 A struct containing the registry manager instances used by the query
76 system.
77 doomed_by : `Iterable` [ `str` ], optional
78 A list of messages (appropriate for e.g. logging or exceptions) that
79 explain why the query is known to return no results even before it is
80 executed. Queries with a non-empty list will never be executed.
82 Notes
83 -----
84 The `Query` hierarchy abstracts over the database/SQL representation of a
85 particular set of data IDs or datasets. It is expected to be used as a
86 backend for other objects that provide more natural interfaces for one or
87 both of these, not as part of a public interface to query results.
88 """
89 def __init__(self, *,
90 graph: DimensionGraph,
91 whereRegion: Optional[Region],
92 managers: RegistryManagers,
93 doomed_by: Iterable[str] = (),
94 ):
95 self.graph = graph
96 self.whereRegion = whereRegion
97 self.managers = managers
98 self._doomed_by = tuple(doomed_by)
99 self._filtered_by_join: Optional[int] = None
100 self._filtered_by_where: Optional[int] = None
102 @abstractmethod
103 def isUnique(self) -> bool:
104 """Return `True` if this query's rows are guaranteed to be unique, and
105 `False` otherwise.
107 If this query has dataset results (`datasetType` is not `None`),
108 uniqueness applies to the `DatasetRef` instances returned by
109 `extractDatasetRef` from the result of `rows`. If it does not have
110 dataset results, uniqueness applies to the `DataCoordinate` instances
111 returned by `extractDataId`.
112 """
113 raise NotImplementedError()
115 @abstractmethod
116 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
117 """Return the query column that contains the primary key value for
118 the dimension with the given name.
120 Parameters
121 ----------
122 name : `str`
123 Name of the dimension.
125 Returns
126 -------
127 column : `sqlalchemy.sql.ColumnElement`.
128 SQLAlchemy object representing a column in the query.
130 Notes
131 -----
132 This method is intended primarily as a hook for subclasses to implement
133 and the ABC to call in order to provide higher-level functionality;
134 code that uses `Query` objects (but does not implement one) should
135 usually not have to call this method.
136 """
137 raise NotImplementedError()
139 @property
140 @abstractmethod
141 def spatial(self) -> Iterator[DimensionElement]:
142 """An iterator over the dimension element columns used in post-query
143 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
145 Notes
146 -----
147 This property is intended primarily as a hook for subclasses to
148 implement and the ABC to call in order to provide higher-level
149 functionality; code that uses `Query` objects (but does not implement
150 one) should usually not have to access this property.
151 """
152 raise NotImplementedError()
154 @abstractmethod
155 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
156 """Return a region column for one of the dimension elements iterated
157 over by `spatial`.
159 Parameters
160 ----------
161 name : `str`
162 Name of the element.
164 Returns
165 -------
166 column : `sqlalchemy.sql.ColumnElement`
167 SQLAlchemy representing a result column in the query.
169 Notes
170 -----
171 This method is intended primarily as a hook for subclasses to implement
172 and the ABC to call in order to provide higher-level functionality;
173 code that uses `Query` objects (but does not implement one) should
174 usually not have to call this method.
175 """
176 raise NotImplementedError()
178 @property
179 def datasetType(self) -> Optional[DatasetType]:
180 """The `DatasetType` of datasets returned by this query, or `None`
181 if there are no dataset results (`DatasetType` or `None`).
182 """
183 cols = self.getDatasetColumns()
184 if cols is None:
185 return None
186 return cols.datasetType
188 def count(self, db: Database, *, region: Optional[Region] = None, exact: bool = True) -> int:
189 """Count the number of rows this query would return.
191 Parameters
192 ----------
193 db : `Database`
194 Object managing the database connection.
195 region : `sphgeom.Region`, optional
196 A region that any result-row regions must overlap in order to be
197 yielded. If not provided, this will be ``self.whereRegion``, if
198 that exists.
199 exact : `bool`, optional
200 If `True`, run the full query and perform post-query filtering if
201 needed to account for that filtering in the count. If `False`, the
202 result may be an upper bound.
204 Returns
205 -------
206 count : `int`
207 The number of rows the query would return, or an upper bound if
208 ``exact=False``.
210 Notes
211 -----
212 This counts the number of rows returned, not the number of unique rows
213 returned, so even with ``exact=True`` it may provide only an upper
214 bound on the number of *deduplicated* result rows.
215 """
216 if self._doomed_by:
217 return 0
218 sql = self.sql
219 if sql is None:
220 return 1
221 if exact and self.spatial:
222 filtered_count = 0
223 for _ in self.rows(db, region=region):
224 filtered_count += 1
225 return filtered_count
226 else:
227 return db.query(
228 sql.with_only_columns([sqlalchemy.sql.func.count()]).order_by(None)
229 ).scalar()
231 def any(
232 self,
233 db: Database, *,
234 region: Optional[Region] = None,
235 execute: bool = True,
236 exact: bool = True,
237 ) -> bool:
238 """Test whether this query returns any results.
240 Parameters
241 ----------
242 db : `Database`
243 Object managing the database connection.
244 region : `sphgeom.Region`, optional
245 A region that any result-row regions must overlap in order to be
246 yielded. If not provided, this will be ``self.whereRegion``, if
247 that exists.
248 execute : `bool`, optional
249 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
250 determined prior to execution that the query would return no rows.
251 exact : `bool`, optional
252 If `True`, run the full query and perform post-query filtering if
253 needed, until at least one result row is found. If `False`, the
254 returned result does not account for post-query filtering, and
255 hence may be `True` even when all result rows would be filtered
256 out.
258 Returns
259 -------
260 any : `bool`
261 `True` if the query would (or might, depending on arguments) yield
262 result rows. `False` if it definitely would not.
263 """
264 if self._doomed_by:
265 return False
266 sql = self.sql
267 if sql is None:
268 return True
269 if exact and not execute:
270 raise TypeError("Cannot obtain exact results without executing the query.")
271 if exact and self.spatial:
272 for _ in self.rows(db, region=region):
273 return True
274 return False
275 elif execute:
276 return db.query(sql.limit(1)).one_or_none() is not None
277 else:
278 return True
280 def explain_no_results(
281 self,
282 db: Database, *,
283 region: Optional[Region] = None,
284 followup: bool = True,
285 ) -> Iterator[str]:
286 """Return human-readable messages that may help explain why the query
287 yields no results.
289 Parameters
290 ----------
291 db : `Database`
292 Object managing the database connection.
293 region : `sphgeom.Region`, optional
294 A region that any result-row regions must overlap in order to be
295 yielded. If not provided, this will be ``self.whereRegion``, if
296 that exists.
297 followup : `bool`, optional
298 If `True` (default) perform inexpensive follow-up queries if no
299 diagnostics are available from query generation alone.
301 Returns
302 -------
303 messages : `Iterator` [ `str` ]
304 String messages that describe reasons the query might not yield any
305 results.
307 Notes
308 -----
309 Messages related to post-query filtering are only available if `rows`,
310 `any`, or `count` was already called with the same region (with
311 ``exact=True`` for the latter two).
312 """
313 from ._builder import QueryBuilder
314 if self._doomed_by:
315 yield from self._doomed_by
316 return
317 if self._filtered_by_where:
318 yield (
319 f"{self._filtered_by_where} result rows were filtered out because "
320 "one or more region did not overlap the WHERE-clause region."
321 )
322 if self._filtered_by_join:
323 yield (
324 f"{self._filtered_by_join} result rows were filtered out because "
325 "one or more regions did not overlap."
326 )
327 if (not followup) or self._filtered_by_join or self._filtered_by_where:
328 return
329 # Query didn't return results even before client-side filtering, and
330 # caller says we can do follow-up queries to determine why.
331 # Start by seeing if there are _any_ dimension records for each element
332 # involved.
333 for element in self.graph.elements:
334 summary = QuerySummary(element.graph)
335 builder = QueryBuilder(summary, self.managers)
336 followup_query = builder.finish()
337 if not followup_query.any(db, exact=False):
338 yield f"No dimension records for element '{element.name}' found."
339 yield from followup_query.explain_no_results(db, region=region, followup=False)
340 return
342 @abstractmethod
343 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
344 """Return the columns for the datasets returned by this query.
346 Returns
347 -------
348 columns : `DatasetQueryColumns` or `None`
349 Struct containing SQLAlchemy representations of the result columns
350 for a dataset.
352 Notes
353 -----
354 This method is intended primarily as a hook for subclasses to implement
355 and the ABC to call in order to provide higher-level functionality;
356 code that uses `Query` objects (but does not implement one) should
357 usually not have to call this method.
358 """
359 raise NotImplementedError()
361 @property
362 @abstractmethod
363 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
364 """A SQLAlchemy object representing the full query
365 (`sqlalchemy.sql.FromClause` or `None`).
367 This is `None` in the special case where the query has no columns, and
368 only one logical row.
369 """
370 raise NotImplementedError()
372 def rows(self, db: Database, *, region: Optional[Region] = None
373 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
374 """Execute the query and yield result rows, applying `predicate`.
376 Parameters
377 ----------
378 db : `Database`
379 Object managing the database connection.
380 region : `sphgeom.Region`, optional
381 A region that any result-row regions must overlap in order to be
382 yielded. If not provided, this will be ``self.whereRegion``, if
383 that exists.
385 Yields
386 ------
387 row : `sqlalchemy.engine.RowProxy` or `None`
388 Result row from the query. `None` may yielded exactly once instead
389 of any real rows to indicate an empty query (see `EmptyQuery`).
390 """
391 if self._doomed_by:
392 return
393 whereRegion = region if region is not None else self.whereRegion
394 self._filtered_by_where = 0
395 self._filtered_by_join = 0
396 for row in db.query(self.sql):
397 rowRegions = [row._mapping[self.getRegionColumn(element.name)] for element in self.spatial]
398 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
399 self._filtered_by_where += 1
400 continue
401 if not not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2)):
402 self._filtered_by_join += 1
403 continue
404 yield row
406 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
407 dimensions: Iterable[Dimension]) -> tuple:
408 """Extract a tuple of data ID values from a result row.
410 Parameters
411 ----------
412 row : `sqlalchemy.engine.RowProxy` or `None`
413 A result row from a SQLAlchemy SELECT query, or `None` to indicate
414 the row from an `EmptyQuery`.
415 dimensions : `Iterable` [ `Dimension` ]
416 The dimensions to include in the returned tuple, in order.
418 Returns
419 -------
420 values : `tuple`
421 A tuple of dimension primary key values.
422 """
423 if row is None:
424 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
425 return ()
426 return tuple(row._mapping[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
428 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
429 graph: Optional[DimensionGraph] = None,
430 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
431 ) -> DataCoordinate:
432 """Extract a data ID from a result row.
434 Parameters
435 ----------
436 row : `sqlalchemy.engine.RowProxy` or `None`
437 A result row from a SQLAlchemy SELECT query, or `None` to indicate
438 the row from an `EmptyQuery`.
439 graph : `DimensionGraph`, optional
440 The dimensions the returned data ID should identify. If not
441 provided, this will be all dimensions in `QuerySummary.requested`.
442 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
443 Nested mapping containing records to attach to the returned
444 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
445 return `True`. If provided, outer keys must include all dimension
446 element names in ``graph``, and inner keys should be tuples of
447 dimension primary key values in the same order as
448 ``element.graph.required``. If not provided,
449 `DataCoordinate.hasRecords` will return `False` on the returned
450 object.
452 Returns
453 -------
454 dataId : `DataCoordinate`
455 A data ID that identifies all required and implied dimensions. If
456 ``records is not None``, this is have
457 `~DataCoordinate.hasRecords()` return `True`.
458 """
459 if graph is None:
460 graph = self.graph
461 if not graph:
462 return DataCoordinate.makeEmpty(self.graph.universe)
463 dataId = DataCoordinate.fromFullValues(
464 graph,
465 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
466 )
467 if records is not None:
468 recordsForRow = {}
469 for element in graph.elements:
470 key = tuple(dataId.subset(element.graph).values())
471 recordsForRow[element.name] = records[element.name].get(key)
472 return dataId.expanded(recordsForRow)
473 else:
474 return dataId
476 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
477 dataId: Optional[DataCoordinate] = None,
478 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
479 ) -> DatasetRef:
480 """Extract a `DatasetRef` from a result row.
482 Parameters
483 ----------
484 row : `sqlalchemy.engine.RowProxy`
485 A result row from a SQLAlchemy SELECT query.
486 dataId : `DataCoordinate`
487 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
488 `DataCoordinate` is constructed from ``row`` if `None`.
489 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
490 Records to use to return an `ExpandedDataCoordinate`. If provided,
491 outer keys must include all dimension element names in ``graph``,
492 and inner keys should be tuples of dimension primary key values
493 in the same order as ``element.graph.required``.
495 Returns
496 -------
497 ref : `DatasetRef`
498 Reference to the dataset; guaranteed to have `DatasetRef.id` not
499 `None`.
500 """
501 datasetColumns = self.getDatasetColumns()
502 assert datasetColumns is not None
503 if dataId is None:
504 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
505 runRecord = self.managers.collections[row._mapping[datasetColumns.runKey]]
506 return DatasetRef(datasetColumns.datasetType, dataId, id=row._mapping[datasetColumns.id],
507 run=runRecord.name)
509 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
510 """Helper method for subclass implementations of `materialize`.
512 Parameters
513 ----------
514 constraints : `bool`, optional
515 If `True` (`False` is default), define a specification that
516 includes actual foreign key constraints for logical foreign keys.
517 Some database engines do not permit temporary tables to reference
518 normal tables, so this should be `False` when generating a spec
519 for a temporary table unless the database engine is known to
520 support them.
522 Returns
523 -------
524 spec : `ddl.TableSpec`
525 Specification for a table that could hold this query's result rows.
526 """
527 unique = self.isUnique()
528 spec = ddl.TableSpec(fields=())
529 for dimension in self.graph:
530 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
531 for element in self.spatial:
532 spec.fields.update(
533 SpatialRegionDatabaseRepresentation.makeFieldSpecs(
534 nullable=True,
535 name=f"{element.name}_region",
536 )
537 )
538 datasetColumns = self.getDatasetColumns()
539 if datasetColumns is not None:
540 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
541 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
542 return spec
544 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
545 datasets: bool = True,
546 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
547 """Helper method for subclass implementations of `subset`.
549 Parameters
550 ----------
551 graph : `DimensionGraph`, optional
552 Dimensions to include in the new `Query` being constructed.
553 ``subset`` implementations should generally just forward their
554 own ``graph`` argument here.
555 datasets : `bool`, optional
556 Whether the new `Query` should include dataset results. Defaults
557 to `True`, but is ignored if ``self`` does not include dataset
558 results.
559 unique : `bool`, optional
560 Whether the new `Query` should guarantee unique results (this may
561 come with a performance penalty).
563 Returns
564 -------
565 graph : `DimensionGraph`
566 The dimensions of the new `Query`. This is exactly the same as
567 the argument of the same name, with ``self.graph`` used if that
568 argument is `None`.
569 columns : `QueryColumns` or `None`
570 A struct containing the SQLAlchemy column objects to use in the
571 new query, contructed by delegating to other (mostly abstract)
572 methods on ``self``. If `None`, `subset` may return ``self``.
573 """
574 if graph is None:
575 graph = self.graph
576 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
577 and (self.isUnique() or not unique)):
578 return graph, None
579 columns = QueryColumns()
580 for dimension in graph.dimensions:
581 col = self.getDimensionColumn(dimension.name)
582 columns.keys[dimension] = [col]
583 if not unique:
584 for element in self.spatial:
585 col = self.getRegionColumn(element.name)
586 columns.regions[element] = col
587 if datasets and self.getDatasetColumns() is not None:
588 columns.datasets = self.getDatasetColumns()
589 return graph, columns
591 @contextmanager
592 def materialize(self, db: Database) -> Iterator[Query]:
593 """Execute this query and insert its results into a temporary table.
595 Parameters
596 ----------
597 db : `Database`
598 Database engine to execute the query against.
600 Returns
601 -------
602 context : `typing.ContextManager` [ `MaterializedQuery` ]
603 A context manager that ensures the temporary table is created and
604 populated in ``__enter__`` (returning a `MaterializedQuery` object
605 backed by that table), and dropped in ``__exit__``. If ``self``
606 is already a `MaterializedQuery`, ``__enter__`` may just return
607 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
608 an outer context manager should already take care of everything
609 else).
610 """
611 spec = self._makeTableSpec()
612 with db.session() as session:
613 table = session.makeTemporaryTable(spec)
614 if not self._doomed_by:
615 db.insert(table, select=self.sql, names=spec.fields.names)
616 yield MaterializedQuery(table=table,
617 spatial=self.spatial,
618 datasetType=self.datasetType,
619 isUnique=self.isUnique(),
620 graph=self.graph,
621 whereRegion=self.whereRegion,
622 managers=self.managers,
623 doomed_by=self._doomed_by)
624 session.dropTemporaryTable(table)
626 @abstractmethod
627 def subset(self, *, graph: Optional[DimensionGraph] = None,
628 datasets: bool = True,
629 unique: bool = False) -> Query:
630 """Return a new `Query` whose columns and/or rows are (mostly) subset
631 of this one's.
633 Parameters
634 ----------
635 graph : `DimensionGraph`, optional
636 Dimensions to include in the new `Query` being constructed.
637 If `None` (default), ``self.graph`` is used.
638 datasets : `bool`, optional
639 Whether the new `Query` should include dataset results. Defaults
640 to `True`, but is ignored if ``self`` does not include dataset
641 results.
642 unique : `bool`, optional
643 Whether the new `Query` should guarantee unique results (this may
644 come with a performance penalty).
646 Returns
647 -------
648 query : `Query`
649 A query object corresponding to the given inputs. May be ``self``
650 if no changes were requested.
652 Notes
653 -----
654 The way spatial overlaps are handled at present makes it impossible to
655 fully guarantee in general that the new query's rows are a subset of
656 this one's while also returning unique rows. That's because the
657 database is only capable of performing approximate, conservative
658 overlaps via the common skypix system; we defer actual region overlap
659 operations to per-result-row Python logic. But including the region
660 columns necessary to do that postprocessing in the query makes it
661 impossible to do a SELECT DISTINCT on the user-visible dimensions of
662 the query. For example, consider starting with a query with dimensions
663 (instrument, skymap, visit, tract). That involves a spatial join
664 between visit and tract, and we include the region columns from both
665 tables in the results in order to only actually yield result rows
666 (see `predicate` and `rows`) where the regions in those two columns
667 overlap. If the user then wants to subset to just (skymap, tract) with
668 unique results, we have two unpalatable options:
670 - we can do a SELECT DISTINCT with just the skymap and tract columns
671 in the SELECT clause, dropping all detailed overlap information and
672 including some tracts that did not actually overlap any of the
673 visits in the original query (but were regarded as _possibly_
674 overlapping via the coarser, common-skypix relationships);
676 - we can include the tract and visit region columns in the query, and
677 continue to filter out the non-overlapping pairs, but completely
678 disregard the user's request for unique tracts.
680 This interface specifies that implementations must do the former, as
681 that's what makes things efficient in our most important use case
682 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
683 improve this situation in the future by putting exact overlap
684 information in the database, either by using built-in (but
685 engine-specific) spatial database functionality or (more likely)
686 switching to a scheme in which pairwise dimension spatial relationships
687 are explicitly precomputed (for e.g. combinations of instruments and
688 skymaps).
689 """
690 raise NotImplementedError()
692 @abstractmethod
693 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
694 """Return a `QueryBuilder` that can be used to construct a new `Query`
695 that is joined to (and hence constrained by) this one.
697 Parameters
698 ----------
699 summary : `QuerySummary`, optional
700 A `QuerySummary` instance that specifies the dimensions and any
701 additional constraints to include in the new query being
702 constructed, or `None` to use the dimensions of ``self`` with no
703 additional constraints.
704 """
705 raise NotImplementedError()
707 graph: DimensionGraph
708 """The dimensions identified by this query and included in any data IDs
709 created from its result rows (`DimensionGraph`).
710 """
712 whereRegion: Optional[Region]
713 """A spatial region that all regions in all rows returned by this query
714 must overlap (`lsst.sphgeom.Region` or `None`).
715 """
717 managers: RegistryManagers
718 """A struct containing `Registry` helper object (`RegistryManagers`).
719 """
722class DirectQueryUniqueness(enum.Enum):
723 """An enum representing the ways in which a query can have unique rows (or
724 not).
725 """
727 NOT_UNIQUE = enum.auto()
728 """The query is not expected to have unique rows.
729 """
731 NATURALLY_UNIQUE = enum.auto()
732 """The construction of the query guarantees that it will have unique
733 result rows, even without SELECT DISTINCT or a GROUP BY clause.
734 """
736 NEEDS_DISTINCT = enum.auto()
737 """The query is expected to yield unique result rows, and needs to use
738 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
739 """
742class DirectQuery(Query):
743 """A `Query` implementation that represents a direct SELECT query that
744 usually joins many tables.
746 `DirectQuery` objects should generally only be constructed by
747 `QueryBuilder` or the methods of other `Query` objects.
749 Parameters
750 ----------
751 simpleQuery : `SimpleQuery`
752 Struct representing the actual SELECT, FROM, and WHERE clauses.
753 columns : `QueryColumns`
754 Columns that are referenced in the query in any clause.
755 uniqueness : `DirectQueryUniqueness`
756 Enum value indicating whether the query should yield unique result
757 rows, and if so whether that needs to be explicitly requested of the
758 database.
759 graph : `DimensionGraph`
760 Object describing the dimensions included in the query.
761 whereRegion : `lsst.sphgeom.Region`, optional
762 Region that all region columns in all returned rows must overlap.
763 managers : `RegistryManagers`
764 Struct containing the `Registry` manager helper objects, to be
765 forwarded to the `Query` constructor.
766 doomed_by : `Iterable` [ `str` ], optional
767 A list of messages (appropriate for e.g. logging or exceptions) that
768 explain why the query is known to return no results even before it is
769 executed. Queries with a non-empty list will never be executed.
770 """
771 def __init__(self, *,
772 simpleQuery: SimpleQuery,
773 columns: QueryColumns,
774 uniqueness: DirectQueryUniqueness,
775 graph: DimensionGraph,
776 whereRegion: Optional[Region],
777 managers: RegistryManagers,
778 doomed_by: Iterable[str] = ()):
779 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers, doomed_by=doomed_by)
780 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
781 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
782 self._simpleQuery = simpleQuery
783 self._columns = columns
784 self._uniqueness = uniqueness
785 self._datasetQueryColumns: Optional[DatasetQueryColumns] = None
786 self._dimensionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
787 self._regionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
789 def isUnique(self) -> bool:
790 # Docstring inherited from Query.
791 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
793 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
794 # Docstring inherited from Query.
795 column = self._dimensionColumns.get(name)
796 if column is None:
797 column = self._columns.getKeyColumn(name).label(name)
798 self._dimensionColumns[name] = column
799 return column
801 @property
802 def spatial(self) -> Iterator[DimensionElement]:
803 # Docstring inherited from Query.
804 return iter(self._columns.regions)
806 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
807 # Docstring inherited from Query.
808 column = self._regionColumns.get(name)
809 if column is None:
810 column = self._columns.regions[name].column.label(f"{name}_region")
811 self._regionColumns[name] = column
812 return column
814 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
815 # Docstring inherited from Query.
816 if self._datasetQueryColumns is None:
817 base = self._columns.datasets
818 if base is None:
819 return None
820 ingestDate = base.ingestDate
821 if ingestDate is not None:
822 ingestDate = ingestDate.label("ingest_date")
823 self._datasetQueryColumns = DatasetQueryColumns(
824 datasetType=base.datasetType,
825 id=base.id.label("dataset_id"),
826 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
827 ingestDate=ingestDate,
828 )
829 return self._datasetQueryColumns
831 @property
832 def sql(self) -> sqlalchemy.sql.FromClause:
833 # Docstring inherited from Query.
834 simpleQuery = self._simpleQuery.copy()
835 for dimension in self.graph:
836 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
837 for element in self.spatial:
838 simpleQuery.columns.append(self.getRegionColumn(element.name))
839 datasetColumns = self.getDatasetColumns()
840 if datasetColumns is not None:
841 simpleQuery.columns.extend(datasetColumns)
842 sql = simpleQuery.combine()
843 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
844 return sql.distinct()
845 else:
846 return sql
848 def subset(self, *, graph: Optional[DimensionGraph] = None,
849 datasets: bool = True,
850 unique: bool = False) -> Query:
851 # Docstring inherited from Query.
852 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
853 if columns is None:
854 return self
855 if columns.isEmpty():
856 return EmptyQuery(self.graph.universe, self.managers)
857 return DirectQuery(
858 simpleQuery=self._simpleQuery.copy(),
859 columns=columns,
860 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
861 graph=graph,
862 whereRegion=self.whereRegion if not unique else None,
863 managers=self.managers,
864 doomed_by=self._doomed_by,
865 )
867 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
868 # Docstring inherited from Query.
869 from ._builder import QueryBuilder
870 if summary is None:
871 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
872 if not summary.requested.issubset(self.graph):
873 raise NotImplementedError(
874 f"Query.makeBuilder does not yet support augmenting dimensions "
875 f"({summary.requested.dimensions}) beyond those originally included in the query "
876 f"({self.graph.dimensions})."
877 )
878 builder = QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)
879 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
880 datasets=self.getDatasetColumns())
881 return builder
884class MaterializedQuery(Query):
885 """A `Query` implementation that represents query results saved in a
886 temporary table.
888 `MaterializedQuery` instances should not be constructed directly; use
889 `Query.materialize()` instead.
891 Parameters
892 ----------
893 table : `sqlalchemy.schema.Table`
894 SQLAlchemy object represnting the temporary table.
895 spatial : `Iterable` [ `DimensionElement` ]
896 Spatial dimension elements whose regions must overlap for each valid
897 result row (which may reject some rows that are in the table).
898 datasetType : `DatasetType`
899 The `DatasetType` of datasets returned by this query, or `None`
900 if there are no dataset results
901 isUnique : `bool`
902 If `True`, the table's rows are unique, and there is no need to
903 add ``SELECT DISTINCT`` to gaurantee this in results.
904 graph : `DimensionGraph`
905 Dimensions included in the columns of this table.
906 whereRegion : `Region` or `None`
907 A spatial region all result-row regions must overlap to be valid (which
908 may reject some rows that are in the table).
909 managers : `RegistryManagers`
910 A struct containing `Registry` manager helper objects, forwarded to
911 the `Query` constructor.
912 doomed_by : `Iterable` [ `str` ], optional
913 A list of messages (appropriate for e.g. logging or exceptions) that
914 explain why the query is known to return no results even before it is
915 executed. Queries with a non-empty list will never be executed.
916 """
917 def __init__(self, *,
918 table: sqlalchemy.schema.Table,
919 spatial: Iterable[DimensionElement],
920 datasetType: Optional[DatasetType],
921 isUnique: bool,
922 graph: DimensionGraph,
923 whereRegion: Optional[Region],
924 managers: RegistryManagers,
925 doomed_by: Iterable[str] = ()):
926 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers, doomed_by=doomed_by)
927 self._table = table
928 self._spatial = tuple(spatial)
929 self._datasetType = datasetType
930 self._isUnique = isUnique
932 def isUnique(self) -> bool:
933 # Docstring inherited from Query.
934 return self._isUnique
936 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
937 # Docstring inherited from Query.
938 return self._table.columns[name]
940 @property
941 def spatial(self) -> Iterator[DimensionElement]:
942 # Docstring inherited from Query.
943 return iter(self._spatial)
945 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
946 # Docstring inherited from Query.
947 return self._table.columns[f"{name}_region"]
949 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
950 # Docstring inherited from Query.
951 if self._datasetType is not None:
952 return DatasetQueryColumns(
953 datasetType=self._datasetType,
954 id=self._table.columns["dataset_id"],
955 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
956 ingestDate=None,
957 )
958 else:
959 return None
961 @property
962 def sql(self) -> sqlalchemy.sql.FromClause:
963 # Docstring inherited from Query.
964 return self._table.select()
966 @contextmanager
967 def materialize(self, db: Database) -> Iterator[Query]:
968 # Docstring inherited from Query.
969 yield self
971 def subset(self, *, graph: Optional[DimensionGraph] = None,
972 datasets: bool = True,
973 unique: bool = False) -> Query:
974 # Docstring inherited from Query.
975 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
976 if columns is None:
977 return self
978 if columns.isEmpty():
979 return EmptyQuery(self.graph.universe, managers=self.managers)
980 simpleQuery = SimpleQuery()
981 simpleQuery.join(self._table)
982 return DirectQuery(
983 simpleQuery=simpleQuery,
984 columns=columns,
985 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
986 graph=graph,
987 whereRegion=self.whereRegion if not unique else None,
988 managers=self.managers,
989 doomed_by=self._doomed_by,
990 )
992 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
993 # Docstring inherited from Query.
994 from ._builder import QueryBuilder
995 if summary is None:
996 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
997 if not summary.requested.issubset(self.graph):
998 raise NotImplementedError(
999 f"Query.makeBuilder does not yet support augmenting dimensions "
1000 f"({summary.requested.dimensions}) beyond those originally included in the query "
1001 f"({self.graph.dimensions})."
1002 )
1003 builder = QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)
1004 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
1005 return builder
1008class EmptyQuery(Query):
1009 """A `Query` implementation that handes the special case where the query
1010 would have no columns.
1012 Parameters
1013 ----------
1014 universe : `DimensionUniverse`
1015 Set of all dimensions from which the null set is extracted.
1016 managers : `RegistryManagers`
1017 A struct containing the registry manager instances used by the query
1018 system.
1019 doomed_by : `Iterable` [ `str` ], optional
1020 A list of messages (appropriate for e.g. logging or exceptions) that
1021 explain why the query is known to return no results even before it is
1022 executed. Queries with a non-empty list will never be executed.
1023 """
1024 def __init__(
1025 self,
1026 universe: DimensionUniverse,
1027 managers: RegistryManagers,
1028 doomed_by: Iterable[str] = (),
1029 ):
1030 super().__init__(graph=universe.empty, whereRegion=None, managers=managers, doomed_by=doomed_by)
1032 def isUnique(self) -> bool:
1033 # Docstring inherited from Query.
1034 return True
1036 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
1037 # Docstring inherited from Query.
1038 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
1040 @property
1041 def spatial(self) -> Iterator[DimensionElement]:
1042 # Docstring inherited from Query.
1043 return iter(())
1045 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
1046 # Docstring inherited from Query.
1047 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
1049 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
1050 # Docstring inherited from Query.
1051 return None
1053 def rows(self, db: Database, *, region: Optional[Region] = None
1054 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
1055 if not self._doomed_by:
1056 yield None
1058 @property
1059 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
1060 # Docstring inherited from Query.
1061 return None
1063 @contextmanager
1064 def materialize(self, db: Database) -> Iterator[Query]:
1065 # Docstring inherited from Query.
1066 yield self
1068 def subset(self, *, graph: Optional[DimensionGraph] = None,
1069 datasets: bool = True,
1070 unique: bool = False) -> Query:
1071 # Docstring inherited from Query.
1072 assert graph is None or graph.issubset(self.graph)
1073 return self
1075 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
1076 # Docstring inherited from Query.
1077 from ._builder import QueryBuilder
1078 if summary is None:
1079 summary = QuerySummary(self.graph)
1080 if not summary.requested.issubset(self.graph):
1081 raise NotImplementedError(
1082 f"Query.makeBuilder does not yet support augmenting dimensions "
1083 f"({summary.requested.dimensions}) beyond those originally included in the query "
1084 f"({self.graph.dimensions})."
1085 )
1086 return QueryBuilder(summary, managers=self.managers, doomed_by=self._doomed_by)