Coverage for python/lsst/daf/butler/registry/queries/_query.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import enum
28import itertools
29from typing import (
30 Callable,
31 Iterable,
32 Iterator,
33 Mapping,
34 Optional,
35 Tuple,
36 TYPE_CHECKING,
37)
39import sqlalchemy
41from lsst.sphgeom import Region
43from ...core import (
44 addDimensionForeignKey,
45 DataCoordinate,
46 DatasetRef,
47 DatasetType,
48 ddl,
49 Dimension,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 SpatialRegionDatabaseRepresentation,
55 SimpleQuery,
56)
57from ..interfaces import Database
58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 from ._builder import QueryBuilder
64class Query(ABC):
65 """An abstract base class for queries that return some combination of
66 `DatasetRef` and `DataCoordinate` objects.
68 Parameters
69 ----------
70 graph : `DimensionGraph`
71 Object describing the dimensions included in the query.
72 whereRegion : `lsst.sphgeom.Region`, optional
73 Region that all region columns in all returned rows must overlap.
74 managers : `RegistryManagers`
75 A struct containing the registry manager instances used by the query
76 system.
78 Notes
79 -----
80 The `Query` hierarchy abstracts over the database/SQL representation of a
81 particular set of data IDs or datasets. It is expected to be used as a
82 backend for other objects that provide more natural interfaces for one or
83 both of these, not as part of a public interface to query results.
84 """
85 def __init__(self, *,
86 graph: DimensionGraph,
87 whereRegion: Optional[Region],
88 managers: RegistryManagers,
89 ):
90 self.graph = graph
91 self.whereRegion = whereRegion
92 self.managers = managers
94 @abstractmethod
95 def isUnique(self) -> bool:
96 """Return `True` if this query's rows are guaranteed to be unique, and
97 `False` otherwise.
99 If this query has dataset results (`datasetType` is not `None`),
100 uniqueness applies to the `DatasetRef` instances returned by
101 `extractDatasetRef` from the result of `rows`. If it does not have
102 dataset results, uniqueness applies to the `DataCoordinate` instances
103 returned by `extractDataId`.
104 """
105 raise NotImplementedError()
107 @abstractmethod
108 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
109 """Return the query column that contains the primary key value for
110 the dimension with the given name.
112 Parameters
113 ----------
114 name : `str`
115 Name of the dimension.
117 Returns
118 -------
119 column : `sqlalchemy.sql.ColumnElement`.
120 SQLAlchemy object representing a column in the query.
122 Notes
123 -----
124 This method is intended primarily as a hook for subclasses to implement
125 and the ABC to call in order to provide higher-level functionality;
126 code that uses `Query` objects (but does not implement one) should
127 usually not have to call this method.
128 """
129 raise NotImplementedError()
131 @property
132 @abstractmethod
133 def spatial(self) -> Iterator[DimensionElement]:
134 """An iterator over the dimension element columns used in post-query
135 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
137 Notes
138 -----
139 This property is intended primarily as a hook for subclasses to
140 implement and the ABC to call in order to provide higher-level
141 functionality; code that uses `Query` objects (but does not implement
142 one) should usually not have to access this property.
143 """
144 raise NotImplementedError()
146 @abstractmethod
147 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
148 """Return a region column for one of the dimension elements iterated
149 over by `spatial`.
151 Parameters
152 ----------
153 name : `str`
154 Name of the element.
156 Returns
157 -------
158 column : `sqlalchemy.sql.ColumnElement`
159 SQLAlchemy representing a result column in the query.
161 Notes
162 -----
163 This method is intended primarily as a hook for subclasses to implement
164 and the ABC to call in order to provide higher-level functionality;
165 code that uses `Query` objects (but does not implement one) should
166 usually not have to call this method.
167 """
168 raise NotImplementedError()
170 @property
171 def datasetType(self) -> Optional[DatasetType]:
172 """The `DatasetType` of datasets returned by this query, or `None`
173 if there are no dataset results (`DatasetType` or `None`).
174 """
175 cols = self.getDatasetColumns()
176 if cols is None:
177 return None
178 return cols.datasetType
180 @abstractmethod
181 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
182 """Return the columns for the datasets returned by this query.
184 Returns
185 -------
186 columns : `DatasetQueryColumns` or `None`
187 Struct containing SQLAlchemy representations of the result columns
188 for a dataset.
190 Notes
191 -----
192 This method is intended primarily as a hook for subclasses to implement
193 and the ABC to call in order to provide higher-level functionality;
194 code that uses `Query` objects (but does not implement one) should
195 usually not have to call this method.
196 """
197 raise NotImplementedError()
199 @property
200 @abstractmethod
201 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
202 """A SQLAlchemy object representing the full query
203 (`sqlalchemy.sql.FromClause` or `None`).
205 This is `None` in the special case where the query has no columns, and
206 only one logical row.
207 """
208 raise NotImplementedError()
210 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]:
211 """Return a callable that can perform extra Python-side filtering of
212 query results.
214 To get the expected results from a query, the returned predicate *must*
215 be used to ignore rows for which it returns `False`; this permits the
216 `QueryBuilder` implementation to move logic from the database to Python
217 without changing the public interface.
219 Parameters
220 ----------
221 region : `sphgeom.Region`, optional
222 A region that any result-row regions must overlap in order for the
223 predicate to return `True`. If not provided, this will be
224 ``self.whereRegion``, if that exists.
226 Returns
227 -------
228 func : `Callable`
229 A callable that takes a single `sqlalchemy.engine.RowProxy`
230 argmument and returns `bool`.
231 """
232 whereRegion = region if region is not None else self.whereRegion
234 def closure(row: sqlalchemy.engine.RowProxy) -> bool:
235 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial]
236 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
237 return False
238 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
240 return closure
242 def rows(self, db: Database, *, region: Optional[Region] = None
243 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
244 """Execute the query and yield result rows, applying `predicate`.
246 Parameters
247 ----------
248 region : `sphgeom.Region`, optional
249 A region that any result-row regions must overlap in order to be
250 yielded. If not provided, this will be ``self.whereRegion``, if
251 that exists.
253 Yields
254 ------
255 row : `sqlalchemy.engine.RowProxy` or `None`
256 Result row from the query. `None` may yielded exactly once instead
257 of any real rows to indicate an empty query (see `EmptyQuery`).
258 """
259 predicate = self.predicate(region)
260 for row in db.query(self.sql):
261 if predicate(row):
262 yield row
264 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
265 dimensions: Iterable[Dimension]) -> tuple:
266 """Extract a tuple of data ID values from a result row.
268 Parameters
269 ----------
270 row : `sqlalchemy.engine.RowProxy` or `None`
271 A result row from a SQLAlchemy SELECT query, or `None` to indicate
272 the row from an `EmptyQuery`.
273 dimensions : `Iterable` [ `Dimension` ]
274 The dimensions to include in the returned tuple, in order.
276 Returns
277 -------
278 values : `tuple`
279 A tuple of dimension primary key values.
280 """
281 if row is None:
282 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
283 return ()
284 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
286 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
287 graph: Optional[DimensionGraph] = None,
288 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
289 ) -> DataCoordinate:
290 """Extract a data ID from a result row.
292 Parameters
293 ----------
294 row : `sqlalchemy.engine.RowProxy` or `None`
295 A result row from a SQLAlchemy SELECT query, or `None` to indicate
296 the row from an `EmptyQuery`.
297 graph : `DimensionGraph`, optional
298 The dimensions the returned data ID should identify. If not
299 provided, this will be all dimensions in `QuerySummary.requested`.
300 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
301 Nested mapping containing records to attach to the returned
302 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
303 return `True`. If provided, outer keys must include all dimension
304 element names in ``graph``, and inner keys should be tuples of
305 dimension primary key values in the same order as
306 ``element.graph.required``. If not provided,
307 `DataCoordinate.hasRecords` will return `False` on the returned
308 object.
310 Returns
311 -------
312 dataId : `DataCoordinate`
313 A data ID that identifies all required and implied dimensions. If
314 ``records is not None``, this is have
315 `~DataCoordinate.hasRecords()` return `True`.
316 """
317 if graph is None:
318 graph = self.graph
319 if not graph:
320 return DataCoordinate.makeEmpty(self.graph.universe)
321 dataId = DataCoordinate.fromFullValues(
322 graph,
323 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
324 )
325 if records is not None:
326 recordsForRow = {}
327 for element in graph.elements:
328 key = tuple(dataId.subset(element.graph).values())
329 recordsForRow[element.name] = records[element.name].get(key)
330 return dataId.expanded(recordsForRow)
331 else:
332 return dataId
334 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
335 dataId: Optional[DataCoordinate] = None,
336 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
337 ) -> DatasetRef:
338 """Extract a `DatasetRef` from a result row.
340 Parameters
341 ----------
342 row : `sqlalchemy.engine.RowProxy`
343 A result row from a SQLAlchemy SELECT query.
344 dataId : `DataCoordinate`
345 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
346 `DataCoordinate` is constructed from ``row`` if `None`.
347 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
348 Records to use to return an `ExpandedDataCoordinate`. If provided,
349 outer keys must include all dimension element names in ``graph``,
350 and inner keys should be tuples of dimension primary key values
351 in the same order as ``element.graph.required``.
353 Returns
354 -------
355 ref : `DatasetRef`
356 Reference to the dataset; guaranteed to have `DatasetRef.id` not
357 `None`.
358 """
359 datasetColumns = self.getDatasetColumns()
360 assert datasetColumns is not None
361 if dataId is None:
362 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
363 runRecord = self.managers.collections[row[datasetColumns.runKey]]
364 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name)
366 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
367 """Helper method for subclass implementations of `materialize`.
369 Parameters
370 ----------
371 constraints : `bool`, optional
372 If `True` (`False` is default), define a specification that
373 includes actual foreign key constraints for logical foreign keys.
374 Some database engines do not permit temporary tables to reference
375 normal tables, so this should be `False` when generating a spec
376 for a temporary table unless the database engine is known to
377 support them.
379 Returns
380 -------
381 spec : `ddl.TableSpec`
382 Specification for a table that could hold this query's result rows.
383 """
384 unique = self.isUnique()
385 spec = ddl.TableSpec(fields=())
386 for dimension in self.graph:
387 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
388 for element in self.spatial:
389 spec.fields.update(
390 SpatialRegionDatabaseRepresentation.makeFieldSpecs(
391 nullable=True,
392 name=f"{element.name}_region",
393 )
394 )
395 datasetColumns = self.getDatasetColumns()
396 if datasetColumns is not None:
397 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
398 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
399 return spec
401 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
402 datasets: bool = True,
403 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
404 """Helper method for subclass implementations of `subset`.
406 Parameters
407 ----------
408 graph : `DimensionGraph`, optional
409 Dimensions to include in the new `Query` being constructed.
410 ``subset`` implementations should generally just forward their
411 own ``graph`` argument here.
412 datasets : `bool`, optional
413 Whether the new `Query` should include dataset results. Defaults
414 to `True`, but is ignored if ``self`` does not include dataset
415 results.
416 unique : `bool`, optional
417 Whether the new `Query` should guarantee unique results (this may
418 come with a performance penalty).
420 Returns
421 -------
422 graph : `DimensionGraph`
423 The dimensions of the new `Query`. This is exactly the same as
424 the argument of the same name, with ``self.graph`` used if that
425 argument is `None`.
426 columns : `QueryColumns` or `None`
427 A struct containing the SQLAlchemy column objects to use in the
428 new query, contructed by delegating to other (mostly abstract)
429 methods on ``self``. If `None`, `subset` may return ``self``.
430 """
431 if graph is None:
432 graph = self.graph
433 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
434 and (self.isUnique() or not unique)):
435 return graph, None
436 columns = QueryColumns()
437 for dimension in graph.dimensions:
438 col = self.getDimensionColumn(dimension.name)
439 columns.keys[dimension] = [col]
440 if not unique:
441 for element in self.spatial:
442 col = self.getRegionColumn(element.name)
443 columns.regions[element] = col
444 if datasets and self.getDatasetColumns() is not None:
445 columns.datasets = self.getDatasetColumns()
446 return graph, columns
448 @contextmanager
449 def materialize(self, db: Database) -> Iterator[Query]:
450 """Execute this query and insert its results into a temporary table.
452 Parameters
453 ----------
454 db : `Database`
455 Database engine to execute the query against.
457 Returns
458 -------
459 context : `typing.ContextManager` [ `MaterializedQuery` ]
460 A context manager that ensures the temporary table is created and
461 populated in ``__enter__`` (returning a `MaterializedQuery` object
462 backed by that table), and dropped in ``__exit__``. If ``self``
463 is already a `MaterializedQuery`, ``__enter__`` may just return
464 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
465 an outer context manager should already take care of everything
466 else).
467 """
468 spec = self._makeTableSpec()
469 table = db.makeTemporaryTable(spec)
470 db.insert(table, select=self.sql, names=spec.fields.names)
471 yield MaterializedQuery(table=table,
472 spatial=self.spatial,
473 datasetType=self.datasetType,
474 isUnique=self.isUnique(),
475 graph=self.graph,
476 whereRegion=self.whereRegion,
477 managers=self.managers)
478 db.dropTemporaryTable(table)
480 @abstractmethod
481 def subset(self, *, graph: Optional[DimensionGraph] = None,
482 datasets: bool = True,
483 unique: bool = False) -> Query:
484 """Return a new `Query` whose columns and/or rows are (mostly) subset
485 of this one's.
487 Parameters
488 ----------
489 graph : `DimensionGraph`, optional
490 Dimensions to include in the new `Query` being constructed.
491 If `None` (default), ``self.graph`` is used.
492 datasets : `bool`, optional
493 Whether the new `Query` should include dataset results. Defaults
494 to `True`, but is ignored if ``self`` does not include dataset
495 results.
496 unique : `bool`, optional
497 Whether the new `Query` should guarantee unique results (this may
498 come with a performance penalty).
500 Returns
501 -------
502 query : `Query`
503 A query object corresponding to the given inputs. May be ``self``
504 if no changes were requested.
506 Notes
507 -----
508 The way spatial overlaps are handled at present makes it impossible to
509 fully guarantee in general that the new query's rows are a subset of
510 this one's while also returning unique rows. That's because the
511 database is only capable of performing approximate, conservative
512 overlaps via the common skypix system; we defer actual region overlap
513 operations to per-result-row Python logic. But including the region
514 columns necessary to do that postprocessing in the query makes it
515 impossible to do a SELECT DISTINCT on the user-visible dimensions of
516 the query. For example, consider starting with a query with dimensions
517 (instrument, skymap, visit, tract). That involves a spatial join
518 between visit and tract, and we include the region columns from both
519 tables in the results in order to only actually yield result rows
520 (see `predicate` and `rows`) where the regions in those two columns
521 overlap. If the user then wants to subset to just (skymap, tract) with
522 unique results, we have two unpalatable options:
524 - we can do a SELECT DISTINCT with just the skymap and tract columns
525 in the SELECT clause, dropping all detailed overlap information and
526 including some tracts that did not actually overlap any of the
527 visits in the original query (but were regarded as _possibly_
528 overlapping via the coarser, common-skypix relationships);
530 - we can include the tract and visit region columns in the query, and
531 continue to filter out the non-overlapping pairs, but completely
532 disregard the user's request for unique tracts.
534 This interface specifies that implementations must do the former, as
535 that's what makes things efficient in our most important use case
536 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
537 improve this situation in the future by putting exact overlap
538 information in the database, either by using built-in (but
539 engine-specific) spatial database functionality or (more likely)
540 switching to a scheme in which pairwise dimension spatial relationships
541 are explicitly precomputed (for e.g. combinations of instruments and
542 skymaps).
543 """
544 raise NotImplementedError()
546 @abstractmethod
547 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
548 """Return a `QueryBuilder` that can be used to construct a new `Query`
549 that is joined to (and hence constrained by) this one.
551 Parameters
552 ----------
553 summary : `QuerySummary`, optional
554 A `QuerySummary` instance that specifies the dimensions and any
555 additional constraints to include in the new query being
556 constructed, or `None` to use the dimensions of ``self`` with no
557 additional constraints.
558 """
559 raise NotImplementedError()
561 graph: DimensionGraph
562 """The dimensions identified by this query and included in any data IDs
563 created from its result rows (`DimensionGraph`).
564 """
566 whereRegion: Optional[Region]
567 """A spatial region that all regions in all rows returned by this query
568 must overlap (`lsst.sphgeom.Region` or `None`).
569 """
571 managers: RegistryManagers
572 """A struct containing `Registry` helper object (`RegistryManagers`).
573 """
576class DirectQueryUniqueness(enum.Enum):
577 """An enum representing the ways in which a query can have unique rows (or
578 not).
579 """
581 NOT_UNIQUE = enum.auto()
582 """The query is not expected to have unique rows.
583 """
585 NATURALLY_UNIQUE = enum.auto()
586 """The construction of the query guarantees that it will have unique
587 result rows, even without SELECT DISTINCT or a GROUP BY clause.
588 """
590 NEEDS_DISTINCT = enum.auto()
591 """The query is expected to yield unique result rows, and needs to use
592 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
593 """
596class DirectQuery(Query):
597 """A `Query` implementation that represents a direct SELECT query that
598 usually joins many tables.
600 `DirectQuery` objects should generally only be constructed by
601 `QueryBuilder` or the methods of other `Query` objects.
603 Parameters
604 ----------
605 simpleQuery : `SimpleQuery`
606 Struct representing the actual SELECT, FROM, and WHERE clauses.
607 columns : `QueryColumns`
608 Columns that are referenced in the query in any clause.
609 uniqueness : `DirectQueryUniqueness`
610 Enum value indicating whether the query should yield unique result
611 rows, and if so whether that needs to be explicitly requested of the
612 database.
613 graph : `DimensionGraph`
614 Object describing the dimensions included in the query.
615 whereRegion : `lsst.sphgeom.Region`, optional
616 Region that all region columns in all returned rows must overlap.
617 managers : `RegistryManagers`
618 Struct containing the `Registry` manager helper objects, to be
619 forwarded to the `Query` constructor.
620 """
621 def __init__(self, *,
622 simpleQuery: SimpleQuery,
623 columns: QueryColumns,
624 uniqueness: DirectQueryUniqueness,
625 graph: DimensionGraph,
626 whereRegion: Optional[Region],
627 managers: RegistryManagers):
628 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
629 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
630 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
631 self._simpleQuery = simpleQuery
632 self._columns = columns
633 self._uniqueness = uniqueness
635 def isUnique(self) -> bool:
636 # Docstring inherited from Query.
637 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
639 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
640 # Docstring inherited from Query.
641 return self._columns.getKeyColumn(name).label(name)
643 @property
644 def spatial(self) -> Iterator[DimensionElement]:
645 # Docstring inherited from Query.
646 return iter(self._columns.regions)
648 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
649 # Docstring inherited from Query.
650 return self._columns.regions[name].column.label(f"{name}_region")
652 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
653 # Docstring inherited from Query.
654 base = self._columns.datasets
655 if base is None:
656 return None
657 ingestDate = base.ingestDate
658 if ingestDate is not None:
659 ingestDate = ingestDate.label("ingest_date")
660 return DatasetQueryColumns(
661 datasetType=base.datasetType,
662 id=base.id.label("dataset_id"),
663 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
664 ingestDate=ingestDate,
665 )
667 @property
668 def sql(self) -> sqlalchemy.sql.FromClause:
669 # Docstring inherited from Query.
670 simpleQuery = self._simpleQuery.copy()
671 for dimension in self.graph:
672 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
673 for element in self.spatial:
674 simpleQuery.columns.append(self.getRegionColumn(element.name))
675 datasetColumns = self.getDatasetColumns()
676 if datasetColumns is not None:
677 simpleQuery.columns.extend(datasetColumns)
678 sql = simpleQuery.combine()
679 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
680 return sql.distinct()
681 else:
682 return sql
684 def subset(self, *, graph: Optional[DimensionGraph] = None,
685 datasets: bool = True,
686 unique: bool = False) -> Query:
687 # Docstring inherited from Query.
688 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
689 if columns is None:
690 return self
691 if columns.isEmpty():
692 return EmptyQuery(self.graph.universe, self.managers)
693 return DirectQuery(
694 simpleQuery=self._simpleQuery.copy(),
695 columns=columns,
696 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
697 graph=graph,
698 whereRegion=self.whereRegion if not unique else None,
699 managers=self.managers,
700 )
702 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
703 # Docstring inherited from Query.
704 from ._builder import QueryBuilder
705 if summary is None:
706 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
707 if not summary.requested.issubset(self.graph):
708 raise NotImplementedError(
709 f"Query.makeBuilder does not yet support augmenting dimensions "
710 f"({summary.requested.dimensions}) beyond those originally included in the query "
711 f"({self.graph.dimensions})."
712 )
713 builder = QueryBuilder(summary, managers=self.managers)
714 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
715 datasets=self.getDatasetColumns())
716 return builder
719class MaterializedQuery(Query):
720 """A `Query` implementation that represents query results saved in a
721 temporary table.
723 `MaterializedQuery` instances should not be constructed directly; use
724 `Query.materialize()` instead.
726 Parameters
727 ----------
728 table : `sqlalchemy.schema.Table`
729 SQLAlchemy object represnting the temporary table.
730 spatial : `Iterable` [ `DimensionElement` ]
731 Spatial dimension elements whose regions must overlap for each valid
732 result row (which may reject some rows that are in the table).
733 datasetType : `DatasetType`
734 The `DatasetType` of datasets returned by this query, or `None`
735 if there are no dataset results
736 isUnique : `bool`
737 If `True`, the table's rows are unique, and there is no need to
738 add ``SELECT DISTINCT`` to gaurantee this in results.
739 graph : `DimensionGraph`
740 Dimensions included in the columns of this table.
741 whereRegion : `Region` or `None`
742 A spatial region all result-row regions must overlap to be valid (which
743 may reject some rows that are in the table).
744 managers : `RegistryManagers`
745 A struct containing `Registry` manager helper objects, forwarded to
746 the `Query` constructor.
747 """
748 def __init__(self, *,
749 table: sqlalchemy.schema.Table,
750 spatial: Iterable[DimensionElement],
751 datasetType: Optional[DatasetType],
752 isUnique: bool,
753 graph: DimensionGraph,
754 whereRegion: Optional[Region],
755 managers: RegistryManagers):
756 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
757 self._table = table
758 self._spatial = tuple(spatial)
759 self._datasetType = datasetType
760 self._isUnique = isUnique
762 def isUnique(self) -> bool:
763 # Docstring inherited from Query.
764 return self._isUnique
766 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
767 # Docstring inherited from Query.
768 return self._table.columns[name]
770 @property
771 def spatial(self) -> Iterator[DimensionElement]:
772 # Docstring inherited from Query.
773 return iter(self._spatial)
775 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
776 # Docstring inherited from Query.
777 return self._table.columns[f"{name}_region"]
779 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
780 # Docstring inherited from Query.
781 if self._datasetType is not None:
782 return DatasetQueryColumns(
783 datasetType=self._datasetType,
784 id=self._table.columns["dataset_id"],
785 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
786 ingestDate=None,
787 )
788 else:
789 return None
791 @property
792 def sql(self) -> sqlalchemy.sql.FromClause:
793 # Docstring inherited from Query.
794 return self._table.select()
796 @contextmanager
797 def materialize(self, db: Database) -> Iterator[Query]:
798 # Docstring inherited from Query.
799 yield self
801 def subset(self, *, graph: Optional[DimensionGraph] = None,
802 datasets: bool = True,
803 unique: bool = False) -> Query:
804 # Docstring inherited from Query.
805 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
806 if columns is None:
807 return self
808 if columns.isEmpty():
809 return EmptyQuery(self.graph.universe, managers=self.managers)
810 simpleQuery = SimpleQuery()
811 simpleQuery.join(self._table)
812 return DirectQuery(
813 simpleQuery=simpleQuery,
814 columns=columns,
815 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
816 graph=graph,
817 whereRegion=self.whereRegion if not unique else None,
818 managers=self.managers,
819 )
821 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
822 # Docstring inherited from Query.
823 from ._builder import QueryBuilder
824 if summary is None:
825 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
826 if not summary.requested.issubset(self.graph):
827 raise NotImplementedError(
828 f"Query.makeBuilder does not yet support augmenting dimensions "
829 f"({summary.requested.dimensions}) beyond those originally included in the query "
830 f"({self.graph.dimensions})."
831 )
832 builder = QueryBuilder(summary, managers=self.managers)
833 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
834 return builder
837class EmptyQuery(Query):
838 """A `Query` implementation that handes the special case where the query
839 would have no columns.
841 Parameters
842 ----------
843 universe : `DimensionUniverse`
844 Set of all dimensions from which the null set is extracted.
845 managers : `RegistryManagers`
846 A struct containing the registry manager instances used by the query
847 system.
848 """
849 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers):
850 super().__init__(graph=universe.empty, whereRegion=None, managers=managers)
852 def isUnique(self) -> bool:
853 # Docstring inherited from Query.
854 return True
856 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
857 # Docstring inherited from Query.
858 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
860 @property
861 def spatial(self) -> Iterator[DimensionElement]:
862 # Docstring inherited from Query.
863 return iter(())
865 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
866 # Docstring inherited from Query.
867 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
869 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
870 # Docstring inherited from Query.
871 return None
873 def rows(self, db: Database, *, region: Optional[Region] = None
874 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
875 yield None
877 @property
878 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
879 # Docstring inherited from Query.
880 return None
882 @contextmanager
883 def materialize(self, db: Database) -> Iterator[Query]:
884 # Docstring inherited from Query.
885 yield self
887 def subset(self, *, graph: Optional[DimensionGraph] = None,
888 datasets: bool = True,
889 unique: bool = False) -> Query:
890 # Docstring inherited from Query.
891 assert graph is None or graph.issubset(self.graph)
892 return self
894 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
895 # Docstring inherited from Query.
896 from ._builder import QueryBuilder
897 if summary is None:
898 summary = QuerySummary(self.graph)
899 if not summary.requested.issubset(self.graph):
900 raise NotImplementedError(
901 f"Query.makeBuilder does not yet support augmenting dimensions "
902 f"({summary.requested.dimensions}) beyond those originally included in the query "
903 f"({self.graph.dimensions})."
904 )
905 return QueryBuilder(summary, managers=self.managers)