Coverage for python/lsst/daf/butler/registry/queries/_query.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import copy
28import enum
29import itertools
30from typing import (
31 Callable,
32 Iterable,
33 Iterator,
34 Mapping,
35 Optional,
36 Tuple,
37 TYPE_CHECKING,
38)
40import sqlalchemy
42from lsst.sphgeom import Region
44from ...core import (
45 addDimensionForeignKey,
46 DataCoordinate,
47 DatasetRef,
48 DatasetType,
49 ddl,
50 Dimension,
51 DimensionElement,
52 DimensionGraph,
53 DimensionRecord,
54 DimensionUniverse,
55 REGION_FIELD_SPEC,
56 SimpleQuery,
57)
58from ..interfaces import Database
59from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
61if TYPE_CHECKING: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true
62 from ._builder import QueryBuilder
65class Query(ABC):
66 """An abstract base class for queries that return some combination of
67 `DatasetRef` and `DataCoordinate` objects.
69 Parameters
70 ----------
71 graph : `DimensionGraph`
72 Object describing the dimensions included in the query.
73 whereRegion : `lsst.sphgeom.Region`, optional
74 Region that all region columns in all returned rows must overlap.
75 managers : `RegistryManagers`
76 A struct containing the registry manager instances used by the query
77 system.
79 Notes
80 -----
81 The `Query` hierarchy abstracts over the database/SQL representation of a
82 particular set of data IDs or datasets. It is expected to be used as a
83 backend for other objects that provide more natural interfaces for one or
84 both of these, not as part of a public interface to query results.
85 """
86 def __init__(self, *,
87 graph: DimensionGraph,
88 whereRegion: Optional[Region],
89 managers: RegistryManagers,
90 ):
91 self.graph = graph
92 self.whereRegion = whereRegion
93 self.managers = managers
95 @abstractmethod
96 def isUnique(self) -> bool:
97 """Return `True` if this query's rows are guaranteed to be unique, and
98 `False` otherwise.
100 If this query has dataset results (`datasetType` is not `None`),
101 uniqueness applies to the `DatasetRef` instances returned by
102 `extractDatasetRef` from the result of `rows`. If it does not have
103 dataset results, uniqueness applies to the `DataCoordinate` instances
104 returned by `extractDataId`.
105 """
106 raise NotImplementedError()
108 @abstractmethod
109 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
110 """Return the query column that contains the primary key value for
111 the dimension with the given name.
113 Parameters
114 ----------
115 name : `str`
116 Name of the dimension.
118 Returns
119 -------
120 column : `sqlalchemy.sql.ColumnElement`.
121 SQLAlchemy object representing a column in the query.
123 Notes
124 -----
125 This method is intended primarily as a hook for subclasses to implement
126 and the ABC to call in order to provide higher-level functionality;
127 code that uses `Query` objects (but does not implement one) should
128 usually not have to call this method.
129 """
130 raise NotImplementedError()
132 @property
133 @abstractmethod
134 def spatial(self) -> Iterator[DimensionElement]:
135 """An iterator over the dimension element columns used in post-query
136 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
138 Notes
139 -----
140 This property is intended primarily as a hook for subclasses to
141 implement and the ABC to call in order to provide higher-level
142 functionality; code that uses `Query` objects (but does not implement
143 one) should usually not have to access this property.
144 """
145 raise NotImplementedError()
147 @abstractmethod
148 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
149 """Return a region column for one of the dimension elements iterated
150 over by `spatial`.
152 Parameters
153 ----------
154 name : `str`
155 Name of the element.
157 Returns
158 -------
159 column : `sqlalchemy.sql.ColumnElement`
160 SQLAlchemy representing a result column in the query.
162 Notes
163 -----
164 This method is intended primarily as a hook for subclasses to implement
165 and the ABC to call in order to provide higher-level functionality;
166 code that uses `Query` objects (but does not implement one) should
167 usually not have to call this method.
168 """
169 raise NotImplementedError()
171 @property
172 def datasetType(self) -> Optional[DatasetType]:
173 """The `DatasetType` of datasets returned by this query, or `None`
174 if there are no dataset results (`DatasetType` or `None`).
175 """
176 cols = self.getDatasetColumns()
177 if cols is None:
178 return None
179 return cols.datasetType
181 @abstractmethod
182 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
183 """Return the columns for the datasets returned by this query.
185 Returns
186 -------
187 columns : `DatasetQueryColumns` or `None`
188 Struct containing SQLAlchemy representations of the result columns
189 for a dataset.
191 Notes
192 -----
193 This method is intended primarily as a hook for subclasses to implement
194 and the ABC to call in order to provide higher-level functionality;
195 code that uses `Query` objects (but does not implement one) should
196 usually not have to call this method.
197 """
198 raise NotImplementedError()
200 @property
201 @abstractmethod
202 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
203 """A SQLAlchemy object representing the full query
204 (`sqlalchemy.sql.FromClause` or `None`).
206 This is `None` in the special case where the query has no columns, and
207 only one logical row.
208 """
209 raise NotImplementedError()
211 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]:
212 """Return a callable that can perform extra Python-side filtering of
213 query results.
215 To get the expected results from a query, the returned predicate *must*
216 be used to ignore rows for which it returns `False`; this permits the
217 `QueryBuilder` implementation to move logic from the database to Python
218 without changing the public interface.
220 Parameters
221 ----------
222 region : `sphgeom.Region`, optional
223 A region that any result-row regions must overlap in order for the
224 predicate to return `True`. If not provided, this will be
225 ``self.whereRegion``, if that exists.
227 Returns
228 -------
229 func : `Callable`
230 A callable that takes a single `sqlalchemy.engine.RowProxy`
231 argmument and returns `bool`.
232 """
233 whereRegion = region if region is not None else self.whereRegion
235 def closure(row: sqlalchemy.engine.RowProxy) -> bool:
236 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial]
237 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
238 return False
239 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
241 return closure
243 def rows(self, db: Database, *, region: Optional[Region] = None
244 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
245 """Execute the query and yield result rows, applying `predicate`.
247 Parameters
248 ----------
249 region : `sphgeom.Region`, optional
250 A region that any result-row regions must overlap in order to be
251 yielded. If not provided, this will be ``self.whereRegion``, if
252 that exists.
254 Yields
255 ------
256 row : `sqlalchemy.engine.RowProxy` or `None`
257 Result row from the query. `None` may yielded exactly once instead
258 of any real rows to indicate an empty query (see `EmptyQuery`).
259 """
260 predicate = self.predicate(region)
261 for row in db.query(self.sql):
262 if predicate(row):
263 yield row
265 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
266 dimensions: Iterable[Dimension]) -> tuple:
267 """Extract a tuple of data ID values from a result row.
269 Parameters
270 ----------
271 row : `sqlalchemy.engine.RowProxy` or `None`
272 A result row from a SQLAlchemy SELECT query, or `None` to indicate
273 the row from an `EmptyQuery`.
274 dimensions : `Iterable` [ `Dimension` ]
275 The dimensions to include in the returned tuple, in order.
277 Returns
278 -------
279 values : `tuple`
280 A tuple of dimension primary key values.
281 """
282 if row is None:
283 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
284 return ()
285 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
287 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
288 graph: Optional[DimensionGraph] = None,
289 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
290 ) -> DataCoordinate:
291 """Extract a data ID from a result row.
293 Parameters
294 ----------
295 row : `sqlalchemy.engine.RowProxy` or `None`
296 A result row from a SQLAlchemy SELECT query, or `None` to indicate
297 the row from an `EmptyQuery`.
298 graph : `DimensionGraph`, optional
299 The dimensions the returned data ID should identify. If not
300 provided, this will be all dimensions in `QuerySummary.requested`.
301 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
302 Nested mapping containing records to attach to the returned
303 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
304 return `True`. If provided, outer keys must include all dimension
305 element names in ``graph``, and inner keys should be tuples of
306 dimension primary key values in the same order as
307 ``element.graph.required``. If not provided,
308 `DataCoordinate.hasRecords` will return `False` on the returned
309 object.
311 Returns
312 -------
313 dataId : `DataCoordinate`
314 A data ID that identifies all required and implied dimensions. If
315 ``records is not None``, this is have
316 `~DataCoordinate.hasRecords()` return `True`.
317 """
318 if graph is None:
319 graph = self.graph
320 if not graph:
321 return DataCoordinate.makeEmpty(self.graph.universe)
322 dataId = DataCoordinate.fromFullValues(
323 graph,
324 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
325 )
326 if records is not None:
327 recordsForRow = {}
328 for element in graph.elements:
329 key = tuple(dataId.subset(element.graph).values())
330 recordsForRow[element.name] = records[element.name].get(key)
331 return dataId.expanded(recordsForRow)
332 else:
333 return dataId
335 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
336 dataId: Optional[DataCoordinate] = None,
337 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
338 ) -> DatasetRef:
339 """Extract a `DatasetRef` from a result row.
341 Parameters
342 ----------
343 row : `sqlalchemy.engine.RowProxy`
344 A result row from a SQLAlchemy SELECT query.
345 dataId : `DataCoordinate`
346 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
347 `DataCoordinate` is constructed from ``row`` if `None`.
348 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
349 Records to use to return an `ExpandedDataCoordinate`. If provided,
350 outer keys must include all dimension element names in ``graph``,
351 and inner keys should be tuples of dimension primary key values
352 in the same order as ``element.graph.required``.
354 Returns
355 -------
356 ref : `DatasetRef`
357 Reference to the dataset; guaranteed to have `DatasetRef.id` not
358 `None`.
359 """
360 datasetColumns = self.getDatasetColumns()
361 assert datasetColumns is not None
362 if dataId is None:
363 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
364 runRecord = self.managers.collections[row[datasetColumns.runKey]]
365 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name)
367 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
368 """Helper method for subclass implementations of `materialize`.
370 Parameters
371 ----------
372 constraints : `bool`, optional
373 If `True` (`False` is default), define a specification that
374 includes actual foreign key constraints for logical foreign keys.
375 Some database engines do not permit temporary tables to reference
376 normal tables, so this should be `False` when generating a spec
377 for a temporary table unless the database engine is known to
378 support them.
380 Returns
381 -------
382 spec : `ddl.TableSpec`
383 Specification for a table that could hold this query's result rows.
384 """
385 unique = self.isUnique()
386 spec = ddl.TableSpec(fields=())
387 for dimension in self.graph:
388 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
389 for element in self.spatial:
390 field = copy.copy(REGION_FIELD_SPEC)
391 field.name = f"{element.name}_region"
392 spec.fields.add(field)
393 datasetColumns = self.getDatasetColumns()
394 if datasetColumns is not None:
395 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
396 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
397 return spec
399 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
400 datasets: bool = True,
401 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
402 """Helper method for subclass implementations of `subset`.
404 Parameters
405 ----------
406 graph : `DimensionGraph`, optional
407 Dimensions to include in the new `Query` being constructed.
408 ``subset`` implementations should generally just forward their
409 own ``graph`` argument here.
410 datasets : `bool`, optional
411 Whether the new `Query` should include dataset results. Defaults
412 to `True`, but is ignored if ``self`` does not include dataset
413 results.
414 unique : `bool`, optional
415 Whether the new `Query` should guarantee unique results (this may
416 come with a performance penalty).
418 Returns
419 -------
420 graph : `DimensionGraph`
421 The dimensions of the new `Query`. This is exactly the same as
422 the argument of the same name, with ``self.graph`` used if that
423 argument is `None`.
424 columns : `QueryColumns` or `None`
425 A struct containing the SQLAlchemy column objects to use in the
426 new query, contructed by delegating to other (mostly abstract)
427 methods on ``self``. If `None`, `subset` may return ``self``.
428 """
429 if graph is None:
430 graph = self.graph
431 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
432 and (self.isUnique() or not unique)):
433 return graph, None
434 columns = QueryColumns()
435 for dimension in graph.dimensions:
436 col = self.getDimensionColumn(dimension.name)
437 columns.keys[dimension] = [col]
438 if not unique:
439 for element in self.spatial:
440 col = self.getRegionColumn(element.name)
441 columns.regions[element] = col
442 if datasets and self.getDatasetColumns() is not None:
443 columns.datasets = self.getDatasetColumns()
444 return graph, columns
446 @contextmanager
447 def materialize(self, db: Database) -> Iterator[Query]:
448 """Execute this query and insert its results into a temporary table.
450 Parameters
451 ----------
452 db : `Database`
453 Database engine to execute the query against.
455 Returns
456 -------
457 context : `typing.ContextManager` [ `MaterializedQuery` ]
458 A context manager that ensures the temporary table is created and
459 populated in ``__enter__`` (returning a `MaterializedQuery` object
460 backed by that table), and dropped in ``__exit__``. If ``self``
461 is already a `MaterializedQuery`, ``__enter__`` may just return
462 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
463 an outer context manager should already take care of everything
464 else).
465 """
466 spec = self._makeTableSpec()
467 table = db.makeTemporaryTable(spec)
468 db.insert(table, select=self.sql, names=spec.fields.names)
469 yield MaterializedQuery(table=table,
470 spatial=self.spatial,
471 datasetType=self.datasetType,
472 isUnique=self.isUnique(),
473 graph=self.graph,
474 whereRegion=self.whereRegion,
475 managers=self.managers)
476 db.dropTemporaryTable(table)
478 @abstractmethod
479 def subset(self, *, graph: Optional[DimensionGraph] = None,
480 datasets: bool = True,
481 unique: bool = False) -> Query:
482 """Return a new `Query` whose columns and/or rows are (mostly) subset
483 of this one's.
485 Parameters
486 ----------
487 graph : `DimensionGraph`, optional
488 Dimensions to include in the new `Query` being constructed.
489 If `None` (default), ``self.graph`` is used.
490 datasets : `bool`, optional
491 Whether the new `Query` should include dataset results. Defaults
492 to `True`, but is ignored if ``self`` does not include dataset
493 results.
494 unique : `bool`, optional
495 Whether the new `Query` should guarantee unique results (this may
496 come with a performance penalty).
498 Returns
499 -------
500 query : `Query`
501 A query object corresponding to the given inputs. May be ``self``
502 if no changes were requested.
504 Notes
505 -----
506 The way spatial overlaps are handled at present makes it impossible to
507 fully guarantee in general that the new query's rows are a subset of
508 this one's while also returning unique rows. That's because the
509 database is only capable of performing approximate, conservative
510 overlaps via the common skypix system; we defer actual region overlap
511 operations to per-result-row Python logic. But including the region
512 columns necessary to do that postprocessing in the query makes it
513 impossible to do a SELECT DISTINCT on the user-visible dimensions of
514 the query. For example, consider starting with a query with dimensions
515 (instrument, skymap, visit, tract). That involves a spatial join
516 between visit and tract, and we include the region columns from both
517 tables in the results in order to only actually yield result rows
518 (see `predicate` and `rows`) where the regions in those two columns
519 overlap. If the user then wants to subset to just (skymap, tract) with
520 unique results, we have two unpalatable options:
522 - we can do a SELECT DISTINCT with just the skymap and tract columns
523 in the SELECT clause, dropping all detailed overlap information and
524 including some tracts that did not actually overlap any of the
525 visits in the original query (but were regarded as _possibly_
526 overlapping via the coarser, common-skypix relationships);
528 - we can include the tract and visit region columns in the query, and
529 continue to filter out the non-overlapping pairs, but completely
530 disregard the user's request for unique tracts.
532 This interface specifies that implementations must do the former, as
533 that's what makes things efficient in our most important use case
534 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
535 improve this situation in the future by putting exact overlap
536 information in the database, either by using built-in (but
537 engine-specific) spatial database functionality or (more likely)
538 switching to a scheme in which pairwise dimension spatial relationships
539 are explicitly precomputed (for e.g. combinations of instruments and
540 skymaps).
541 """
542 raise NotImplementedError()
544 @abstractmethod
545 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
546 """Return a `QueryBuilder` that can be used to construct a new `Query`
547 that is joined to (and hence constrained by) this one.
549 Parameters
550 ----------
551 summary : `QuerySummary`, optional
552 A `QuerySummary` instance that specifies the dimensions and any
553 additional constraints to include in the new query being
554 constructed, or `None` to use the dimensions of ``self`` with no
555 additional constraints.
556 """
557 raise NotImplementedError()
559 graph: DimensionGraph
560 """The dimensions identified by this query and included in any data IDs
561 created from its result rows (`DimensionGraph`).
562 """
564 whereRegion: Optional[Region]
565 """A spatial region that all regions in all rows returned by this query
566 must overlap (`lsst.sphgeom.Region` or `None`).
567 """
569 managers: RegistryManagers
570 """A struct containing `Registry` helper object (`RegistryManagers`).
571 """
574class DirectQueryUniqueness(enum.Enum):
575 """An enum representing the ways in which a query can have unique rows (or
576 not).
577 """
579 NOT_UNIQUE = enum.auto()
580 """The query is not expected to have unique rows.
581 """
583 NATURALLY_UNIQUE = enum.auto()
584 """The construction of the query guarantees that it will have unique
585 result rows, even without SELECT DISTINCT or a GROUP BY clause.
586 """
588 NEEDS_DISTINCT = enum.auto()
589 """The query is expected to yield unique result rows, and needs to use
590 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
591 """
594class DirectQuery(Query):
595 """A `Query` implementation that represents a direct SELECT query that
596 usually joins many tables.
598 `DirectQuery` objects should generally only be constructed by
599 `QueryBuilder` or the methods of other `Query` objects.
601 Parameters
602 ----------
603 simpleQuery : `SimpleQuery`
604 Struct representing the actual SELECT, FROM, and WHERE clauses.
605 columns : `QueryColumns`
606 Columns that are referenced in the query in any clause.
607 uniqueness : `DirectQueryUniqueness`
608 Enum value indicating whether the query should yield unique result
609 rows, and if so whether that needs to be explicitly requested of the
610 database.
611 graph : `DimensionGraph`
612 Object describing the dimensions included in the query.
613 whereRegion : `lsst.sphgeom.Region`, optional
614 Region that all region columns in all returned rows must overlap.
615 managers : `RegistryManagers`
616 Struct containing the `Registry` manager helper objects, to be
617 forwarded to the `Query` constructor.
618 """
619 def __init__(self, *,
620 simpleQuery: SimpleQuery,
621 columns: QueryColumns,
622 uniqueness: DirectQueryUniqueness,
623 graph: DimensionGraph,
624 whereRegion: Optional[Region],
625 managers: RegistryManagers):
626 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
627 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
628 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
629 self._simpleQuery = simpleQuery
630 self._columns = columns
631 self._uniqueness = uniqueness
633 def isUnique(self) -> bool:
634 # Docstring inherited from Query.
635 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
637 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
638 # Docstring inherited from Query.
639 return self._columns.getKeyColumn(name).label(name)
641 @property
642 def spatial(self) -> Iterator[DimensionElement]:
643 # Docstring inherited from Query.
644 return iter(self._columns.regions)
646 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
647 # Docstring inherited from Query.
648 return self._columns.regions[name].label(f"{name}_region")
650 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
651 # Docstring inherited from Query.
652 base = self._columns.datasets
653 if base is None:
654 return None
655 ingestDate = base.ingestDate
656 if ingestDate is not None:
657 ingestDate = ingestDate.label("ingest_date")
658 return DatasetQueryColumns(
659 datasetType=base.datasetType,
660 id=base.id.label("dataset_id"),
661 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
662 ingestDate=ingestDate,
663 )
665 @property
666 def sql(self) -> sqlalchemy.sql.FromClause:
667 # Docstring inherited from Query.
668 simpleQuery = self._simpleQuery.copy()
669 for dimension in self.graph:
670 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
671 for element in self.spatial:
672 simpleQuery.columns.append(self.getRegionColumn(element.name))
673 datasetColumns = self.getDatasetColumns()
674 if datasetColumns is not None:
675 simpleQuery.columns.extend(datasetColumns)
676 sql = simpleQuery.combine()
677 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
678 return sql.distinct()
679 else:
680 return sql
682 def subset(self, *, graph: Optional[DimensionGraph] = None,
683 datasets: bool = True,
684 unique: bool = False) -> Query:
685 # Docstring inherited from Query.
686 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
687 if columns is None:
688 return self
689 if columns.isEmpty():
690 return EmptyQuery(self.graph.universe, self.managers)
691 return DirectQuery(
692 simpleQuery=self._simpleQuery.copy(),
693 columns=columns,
694 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
695 graph=graph,
696 whereRegion=self.whereRegion if not unique else None,
697 managers=self.managers,
698 )
700 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
701 # Docstring inherited from Query.
702 from ._builder import QueryBuilder
703 if summary is None:
704 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
705 if not summary.requested.issubset(self.graph):
706 raise NotImplementedError(
707 f"Query.makeBuilder does not yet support augmenting dimensions "
708 f"({summary.requested.dimensions}) beyond those originally included in the query "
709 f"({self.graph.dimensions})."
710 )
711 builder = QueryBuilder(summary, managers=self.managers)
712 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
713 datasets=self.getDatasetColumns())
714 return builder
717class MaterializedQuery(Query):
718 """A `Query` implementation that represents query results saved in a
719 temporary table.
721 `MaterializedQuery` instances should not be constructed directly; use
722 `Query.materialize()` instead.
724 Parameters
725 ----------
726 table : `sqlalchemy.schema.Table`
727 SQLAlchemy object represnting the temporary table.
728 spatial : `Iterable` [ `DimensionElement` ]
729 Spatial dimension elements whose regions must overlap for each valid
730 result row (which may reject some rows that are in the table).
731 datasetType : `DatasetType`
732 The `DatasetType` of datasets returned by this query, or `None`
733 if there are no dataset results
734 isUnique : `bool`
735 If `True`, the table's rows are unique, and there is no need to
736 add ``SELECT DISTINCT`` to gaurantee this in results.
737 graph : `DimensionGraph`
738 Dimensions included in the columns of this table.
739 whereRegion : `Region` or `None`
740 A spatial region all result-row regions must overlap to be valid (which
741 may reject some rows that are in the table).
742 managers : `RegistryManagers`
743 A struct containing `Registry` manager helper objects, forwarded to
744 the `Query` constructor.
745 """
746 def __init__(self, *,
747 table: sqlalchemy.schema.Table,
748 spatial: Iterable[DimensionElement],
749 datasetType: Optional[DatasetType],
750 isUnique: bool,
751 graph: DimensionGraph,
752 whereRegion: Optional[Region],
753 managers: RegistryManagers):
754 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
755 self._table = table
756 self._spatial = tuple(spatial)
757 self._datasetType = datasetType
758 self._isUnique = isUnique
760 def isUnique(self) -> bool:
761 # Docstring inherited from Query.
762 return self._isUnique
764 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
765 # Docstring inherited from Query.
766 return self._table.columns[name]
768 @property
769 def spatial(self) -> Iterator[DimensionElement]:
770 # Docstring inherited from Query.
771 return iter(self._spatial)
773 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
774 # Docstring inherited from Query.
775 return self._table.columns[f"{name}_region"]
777 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
778 # Docstring inherited from Query.
779 if self._datasetType is not None:
780 return DatasetQueryColumns(
781 datasetType=self._datasetType,
782 id=self._table.columns["dataset_id"],
783 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
784 ingestDate=None,
785 )
786 else:
787 return None
789 @property
790 def sql(self) -> sqlalchemy.sql.FromClause:
791 # Docstring inherited from Query.
792 return self._table.select()
794 @contextmanager
795 def materialize(self, db: Database) -> Iterator[Query]:
796 # Docstring inherited from Query.
797 yield self
799 def subset(self, *, graph: Optional[DimensionGraph] = None,
800 datasets: bool = True,
801 unique: bool = False) -> Query:
802 # Docstring inherited from Query.
803 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
804 if columns is None:
805 return self
806 if columns.isEmpty():
807 return EmptyQuery(self.graph.universe, managers=self.managers)
808 simpleQuery = SimpleQuery()
809 simpleQuery.join(self._table)
810 return DirectQuery(
811 simpleQuery=simpleQuery,
812 columns=columns,
813 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
814 graph=graph,
815 whereRegion=self.whereRegion if not unique else None,
816 managers=self.managers,
817 )
819 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
820 # Docstring inherited from Query.
821 from ._builder import QueryBuilder
822 if summary is None:
823 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
824 if not summary.requested.issubset(self.graph):
825 raise NotImplementedError(
826 f"Query.makeBuilder does not yet support augmenting dimensions "
827 f"({summary.requested.dimensions}) beyond those originally included in the query "
828 f"({self.graph.dimensions})."
829 )
830 builder = QueryBuilder(summary, managers=self.managers)
831 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
832 return builder
835class EmptyQuery(Query):
836 """A `Query` implementation that handes the special case where the query
837 would have no columns.
839 Parameters
840 ----------
841 universe : `DimensionUniverse`
842 Set of all dimensions from which the null set is extracted.
843 managers : `RegistryManagers`
844 A struct containing the registry manager instances used by the query
845 system.
846 """
847 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers):
848 super().__init__(graph=universe.empty, whereRegion=None, managers=managers)
850 def isUnique(self) -> bool:
851 # Docstring inherited from Query.
852 return True
854 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
855 # Docstring inherited from Query.
856 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
858 @property
859 def spatial(self) -> Iterator[DimensionElement]:
860 # Docstring inherited from Query.
861 return iter(())
863 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
864 # Docstring inherited from Query.
865 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
867 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
868 # Docstring inherited from Query.
869 return None
871 def rows(self, db: Database, *, region: Optional[Region] = None
872 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
873 yield None
875 @property
876 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
877 # Docstring inherited from Query.
878 return None
880 @contextmanager
881 def materialize(self, db: Database) -> Iterator[Query]:
882 # Docstring inherited from Query.
883 yield self
885 def subset(self, *, graph: Optional[DimensionGraph] = None,
886 datasets: bool = True,
887 unique: bool = False) -> Query:
888 # Docstring inherited from Query.
889 assert graph is None or graph.issubset(self.graph)
890 return self
892 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
893 # Docstring inherited from Query.
894 from ._builder import QueryBuilder
895 if summary is None:
896 summary = QuerySummary(self.graph)
897 if not summary.requested.issubset(self.graph):
898 raise NotImplementedError(
899 f"Query.makeBuilder does not yet support augmenting dimensions "
900 f"({summary.requested.dimensions}) beyond those originally included in the query "
901 f"({self.graph.dimensions})."
902 )
903 return QueryBuilder(summary, managers=self.managers)