Coverage for python/lsst/daf/butler/registry/queries/_query.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import enum
28import itertools
29from typing import (
30 Callable,
31 Dict,
32 Iterable,
33 Iterator,
34 Mapping,
35 Optional,
36 Tuple,
37 TYPE_CHECKING,
38)
40import sqlalchemy
42from lsst.sphgeom import Region
44from ...core import (
45 addDimensionForeignKey,
46 DataCoordinate,
47 DatasetRef,
48 DatasetType,
49 ddl,
50 Dimension,
51 DimensionElement,
52 DimensionGraph,
53 DimensionRecord,
54 DimensionUniverse,
55 SpatialRegionDatabaseRepresentation,
56 SimpleQuery,
57)
58from ..interfaces import Database
59from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
61if TYPE_CHECKING: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true
62 from ._builder import QueryBuilder
65class Query(ABC):
66 """An abstract base class for queries that return some combination of
67 `DatasetRef` and `DataCoordinate` objects.
69 Parameters
70 ----------
71 graph : `DimensionGraph`
72 Object describing the dimensions included in the query.
73 whereRegion : `lsst.sphgeom.Region`, optional
74 Region that all region columns in all returned rows must overlap.
75 managers : `RegistryManagers`
76 A struct containing the registry manager instances used by the query
77 system.
79 Notes
80 -----
81 The `Query` hierarchy abstracts over the database/SQL representation of a
82 particular set of data IDs or datasets. It is expected to be used as a
83 backend for other objects that provide more natural interfaces for one or
84 both of these, not as part of a public interface to query results.
85 """
86 def __init__(self, *,
87 graph: DimensionGraph,
88 whereRegion: Optional[Region],
89 managers: RegistryManagers,
90 ):
91 self.graph = graph
92 self.whereRegion = whereRegion
93 self.managers = managers
95 @abstractmethod
96 def isUnique(self) -> bool:
97 """Return `True` if this query's rows are guaranteed to be unique, and
98 `False` otherwise.
100 If this query has dataset results (`datasetType` is not `None`),
101 uniqueness applies to the `DatasetRef` instances returned by
102 `extractDatasetRef` from the result of `rows`. If it does not have
103 dataset results, uniqueness applies to the `DataCoordinate` instances
104 returned by `extractDataId`.
105 """
106 raise NotImplementedError()
108 @abstractmethod
109 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
110 """Return the query column that contains the primary key value for
111 the dimension with the given name.
113 Parameters
114 ----------
115 name : `str`
116 Name of the dimension.
118 Returns
119 -------
120 column : `sqlalchemy.sql.ColumnElement`.
121 SQLAlchemy object representing a column in the query.
123 Notes
124 -----
125 This method is intended primarily as a hook for subclasses to implement
126 and the ABC to call in order to provide higher-level functionality;
127 code that uses `Query` objects (but does not implement one) should
128 usually not have to call this method.
129 """
130 raise NotImplementedError()
132 @property
133 @abstractmethod
134 def spatial(self) -> Iterator[DimensionElement]:
135 """An iterator over the dimension element columns used in post-query
136 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
138 Notes
139 -----
140 This property is intended primarily as a hook for subclasses to
141 implement and the ABC to call in order to provide higher-level
142 functionality; code that uses `Query` objects (but does not implement
143 one) should usually not have to access this property.
144 """
145 raise NotImplementedError()
147 @abstractmethod
148 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
149 """Return a region column for one of the dimension elements iterated
150 over by `spatial`.
152 Parameters
153 ----------
154 name : `str`
155 Name of the element.
157 Returns
158 -------
159 column : `sqlalchemy.sql.ColumnElement`
160 SQLAlchemy representing a result column in the query.
162 Notes
163 -----
164 This method is intended primarily as a hook for subclasses to implement
165 and the ABC to call in order to provide higher-level functionality;
166 code that uses `Query` objects (but does not implement one) should
167 usually not have to call this method.
168 """
169 raise NotImplementedError()
171 @property
172 def datasetType(self) -> Optional[DatasetType]:
173 """The `DatasetType` of datasets returned by this query, or `None`
174 if there are no dataset results (`DatasetType` or `None`).
175 """
176 cols = self.getDatasetColumns()
177 if cols is None:
178 return None
179 return cols.datasetType
181 @abstractmethod
182 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
183 """Return the columns for the datasets returned by this query.
185 Returns
186 -------
187 columns : `DatasetQueryColumns` or `None`
188 Struct containing SQLAlchemy representations of the result columns
189 for a dataset.
191 Notes
192 -----
193 This method is intended primarily as a hook for subclasses to implement
194 and the ABC to call in order to provide higher-level functionality;
195 code that uses `Query` objects (but does not implement one) should
196 usually not have to call this method.
197 """
198 raise NotImplementedError()
200 @property
201 @abstractmethod
202 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
203 """A SQLAlchemy object representing the full query
204 (`sqlalchemy.sql.FromClause` or `None`).
206 This is `None` in the special case where the query has no columns, and
207 only one logical row.
208 """
209 raise NotImplementedError()
211 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]:
212 """Return a callable that can perform extra Python-side filtering of
213 query results.
215 To get the expected results from a query, the returned predicate *must*
216 be used to ignore rows for which it returns `False`; this permits the
217 `QueryBuilder` implementation to move logic from the database to Python
218 without changing the public interface.
220 Parameters
221 ----------
222 region : `sphgeom.Region`, optional
223 A region that any result-row regions must overlap in order for the
224 predicate to return `True`. If not provided, this will be
225 ``self.whereRegion``, if that exists.
227 Returns
228 -------
229 func : `Callable`
230 A callable that takes a single `sqlalchemy.engine.RowProxy`
231 argmument and returns `bool`.
232 """
233 whereRegion = region if region is not None else self.whereRegion
235 def closure(row: sqlalchemy.engine.RowProxy) -> bool:
236 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial]
237 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
238 return False
239 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
241 return closure
243 def rows(self, db: Database, *, region: Optional[Region] = None
244 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
245 """Execute the query and yield result rows, applying `predicate`.
247 Parameters
248 ----------
249 region : `sphgeom.Region`, optional
250 A region that any result-row regions must overlap in order to be
251 yielded. If not provided, this will be ``self.whereRegion``, if
252 that exists.
254 Yields
255 ------
256 row : `sqlalchemy.engine.RowProxy` or `None`
257 Result row from the query. `None` may yielded exactly once instead
258 of any real rows to indicate an empty query (see `EmptyQuery`).
259 """
260 predicate = self.predicate(region)
261 for row in db.query(self.sql):
262 if predicate(row):
263 yield row
265 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
266 dimensions: Iterable[Dimension]) -> tuple:
267 """Extract a tuple of data ID values from a result row.
269 Parameters
270 ----------
271 row : `sqlalchemy.engine.RowProxy` or `None`
272 A result row from a SQLAlchemy SELECT query, or `None` to indicate
273 the row from an `EmptyQuery`.
274 dimensions : `Iterable` [ `Dimension` ]
275 The dimensions to include in the returned tuple, in order.
277 Returns
278 -------
279 values : `tuple`
280 A tuple of dimension primary key values.
281 """
282 if row is None:
283 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
284 return ()
285 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
287 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
288 graph: Optional[DimensionGraph] = None,
289 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
290 ) -> DataCoordinate:
291 """Extract a data ID from a result row.
293 Parameters
294 ----------
295 row : `sqlalchemy.engine.RowProxy` or `None`
296 A result row from a SQLAlchemy SELECT query, or `None` to indicate
297 the row from an `EmptyQuery`.
298 graph : `DimensionGraph`, optional
299 The dimensions the returned data ID should identify. If not
300 provided, this will be all dimensions in `QuerySummary.requested`.
301 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
302 Nested mapping containing records to attach to the returned
303 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
304 return `True`. If provided, outer keys must include all dimension
305 element names in ``graph``, and inner keys should be tuples of
306 dimension primary key values in the same order as
307 ``element.graph.required``. If not provided,
308 `DataCoordinate.hasRecords` will return `False` on the returned
309 object.
311 Returns
312 -------
313 dataId : `DataCoordinate`
314 A data ID that identifies all required and implied dimensions. If
315 ``records is not None``, this is have
316 `~DataCoordinate.hasRecords()` return `True`.
317 """
318 if graph is None:
319 graph = self.graph
320 if not graph:
321 return DataCoordinate.makeEmpty(self.graph.universe)
322 dataId = DataCoordinate.fromFullValues(
323 graph,
324 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
325 )
326 if records is not None:
327 recordsForRow = {}
328 for element in graph.elements:
329 key = tuple(dataId.subset(element.graph).values())
330 recordsForRow[element.name] = records[element.name].get(key)
331 return dataId.expanded(recordsForRow)
332 else:
333 return dataId
335 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
336 dataId: Optional[DataCoordinate] = None,
337 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
338 ) -> DatasetRef:
339 """Extract a `DatasetRef` from a result row.
341 Parameters
342 ----------
343 row : `sqlalchemy.engine.RowProxy`
344 A result row from a SQLAlchemy SELECT query.
345 dataId : `DataCoordinate`
346 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
347 `DataCoordinate` is constructed from ``row`` if `None`.
348 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
349 Records to use to return an `ExpandedDataCoordinate`. If provided,
350 outer keys must include all dimension element names in ``graph``,
351 and inner keys should be tuples of dimension primary key values
352 in the same order as ``element.graph.required``.
354 Returns
355 -------
356 ref : `DatasetRef`
357 Reference to the dataset; guaranteed to have `DatasetRef.id` not
358 `None`.
359 """
360 datasetColumns = self.getDatasetColumns()
361 assert datasetColumns is not None
362 if dataId is None:
363 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
364 runRecord = self.managers.collections[row[datasetColumns.runKey]]
365 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name)
367 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
368 """Helper method for subclass implementations of `materialize`.
370 Parameters
371 ----------
372 constraints : `bool`, optional
373 If `True` (`False` is default), define a specification that
374 includes actual foreign key constraints for logical foreign keys.
375 Some database engines do not permit temporary tables to reference
376 normal tables, so this should be `False` when generating a spec
377 for a temporary table unless the database engine is known to
378 support them.
380 Returns
381 -------
382 spec : `ddl.TableSpec`
383 Specification for a table that could hold this query's result rows.
384 """
385 unique = self.isUnique()
386 spec = ddl.TableSpec(fields=())
387 for dimension in self.graph:
388 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
389 for element in self.spatial:
390 spec.fields.update(
391 SpatialRegionDatabaseRepresentation.makeFieldSpecs(
392 nullable=True,
393 name=f"{element.name}_region",
394 )
395 )
396 datasetColumns = self.getDatasetColumns()
397 if datasetColumns is not None:
398 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
399 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
400 return spec
402 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
403 datasets: bool = True,
404 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
405 """Helper method for subclass implementations of `subset`.
407 Parameters
408 ----------
409 graph : `DimensionGraph`, optional
410 Dimensions to include in the new `Query` being constructed.
411 ``subset`` implementations should generally just forward their
412 own ``graph`` argument here.
413 datasets : `bool`, optional
414 Whether the new `Query` should include dataset results. Defaults
415 to `True`, but is ignored if ``self`` does not include dataset
416 results.
417 unique : `bool`, optional
418 Whether the new `Query` should guarantee unique results (this may
419 come with a performance penalty).
421 Returns
422 -------
423 graph : `DimensionGraph`
424 The dimensions of the new `Query`. This is exactly the same as
425 the argument of the same name, with ``self.graph`` used if that
426 argument is `None`.
427 columns : `QueryColumns` or `None`
428 A struct containing the SQLAlchemy column objects to use in the
429 new query, contructed by delegating to other (mostly abstract)
430 methods on ``self``. If `None`, `subset` may return ``self``.
431 """
432 if graph is None:
433 graph = self.graph
434 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
435 and (self.isUnique() or not unique)):
436 return graph, None
437 columns = QueryColumns()
438 for dimension in graph.dimensions:
439 col = self.getDimensionColumn(dimension.name)
440 columns.keys[dimension] = [col]
441 if not unique:
442 for element in self.spatial:
443 col = self.getRegionColumn(element.name)
444 columns.regions[element] = col
445 if datasets and self.getDatasetColumns() is not None:
446 columns.datasets = self.getDatasetColumns()
447 return graph, columns
449 @contextmanager
450 def materialize(self, db: Database) -> Iterator[Query]:
451 """Execute this query and insert its results into a temporary table.
453 Parameters
454 ----------
455 db : `Database`
456 Database engine to execute the query against.
458 Returns
459 -------
460 context : `typing.ContextManager` [ `MaterializedQuery` ]
461 A context manager that ensures the temporary table is created and
462 populated in ``__enter__`` (returning a `MaterializedQuery` object
463 backed by that table), and dropped in ``__exit__``. If ``self``
464 is already a `MaterializedQuery`, ``__enter__`` may just return
465 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
466 an outer context manager should already take care of everything
467 else).
468 """
469 spec = self._makeTableSpec()
470 with db.session() as session:
471 table = session.makeTemporaryTable(spec)
472 db.insert(table, select=self.sql, names=spec.fields.names)
473 yield MaterializedQuery(table=table,
474 spatial=self.spatial,
475 datasetType=self.datasetType,
476 isUnique=self.isUnique(),
477 graph=self.graph,
478 whereRegion=self.whereRegion,
479 managers=self.managers)
480 session.dropTemporaryTable(table)
482 @abstractmethod
483 def subset(self, *, graph: Optional[DimensionGraph] = None,
484 datasets: bool = True,
485 unique: bool = False) -> Query:
486 """Return a new `Query` whose columns and/or rows are (mostly) subset
487 of this one's.
489 Parameters
490 ----------
491 graph : `DimensionGraph`, optional
492 Dimensions to include in the new `Query` being constructed.
493 If `None` (default), ``self.graph`` is used.
494 datasets : `bool`, optional
495 Whether the new `Query` should include dataset results. Defaults
496 to `True`, but is ignored if ``self`` does not include dataset
497 results.
498 unique : `bool`, optional
499 Whether the new `Query` should guarantee unique results (this may
500 come with a performance penalty).
502 Returns
503 -------
504 query : `Query`
505 A query object corresponding to the given inputs. May be ``self``
506 if no changes were requested.
508 Notes
509 -----
510 The way spatial overlaps are handled at present makes it impossible to
511 fully guarantee in general that the new query's rows are a subset of
512 this one's while also returning unique rows. That's because the
513 database is only capable of performing approximate, conservative
514 overlaps via the common skypix system; we defer actual region overlap
515 operations to per-result-row Python logic. But including the region
516 columns necessary to do that postprocessing in the query makes it
517 impossible to do a SELECT DISTINCT on the user-visible dimensions of
518 the query. For example, consider starting with a query with dimensions
519 (instrument, skymap, visit, tract). That involves a spatial join
520 between visit and tract, and we include the region columns from both
521 tables in the results in order to only actually yield result rows
522 (see `predicate` and `rows`) where the regions in those two columns
523 overlap. If the user then wants to subset to just (skymap, tract) with
524 unique results, we have two unpalatable options:
526 - we can do a SELECT DISTINCT with just the skymap and tract columns
527 in the SELECT clause, dropping all detailed overlap information and
528 including some tracts that did not actually overlap any of the
529 visits in the original query (but were regarded as _possibly_
530 overlapping via the coarser, common-skypix relationships);
532 - we can include the tract and visit region columns in the query, and
533 continue to filter out the non-overlapping pairs, but completely
534 disregard the user's request for unique tracts.
536 This interface specifies that implementations must do the former, as
537 that's what makes things efficient in our most important use case
538 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
539 improve this situation in the future by putting exact overlap
540 information in the database, either by using built-in (but
541 engine-specific) spatial database functionality or (more likely)
542 switching to a scheme in which pairwise dimension spatial relationships
543 are explicitly precomputed (for e.g. combinations of instruments and
544 skymaps).
545 """
546 raise NotImplementedError()
548 @abstractmethod
549 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
550 """Return a `QueryBuilder` that can be used to construct a new `Query`
551 that is joined to (and hence constrained by) this one.
553 Parameters
554 ----------
555 summary : `QuerySummary`, optional
556 A `QuerySummary` instance that specifies the dimensions and any
557 additional constraints to include in the new query being
558 constructed, or `None` to use the dimensions of ``self`` with no
559 additional constraints.
560 """
561 raise NotImplementedError()
563 graph: DimensionGraph
564 """The dimensions identified by this query and included in any data IDs
565 created from its result rows (`DimensionGraph`).
566 """
568 whereRegion: Optional[Region]
569 """A spatial region that all regions in all rows returned by this query
570 must overlap (`lsst.sphgeom.Region` or `None`).
571 """
573 managers: RegistryManagers
574 """A struct containing `Registry` helper object (`RegistryManagers`).
575 """
578class DirectQueryUniqueness(enum.Enum):
579 """An enum representing the ways in which a query can have unique rows (or
580 not).
581 """
583 NOT_UNIQUE = enum.auto()
584 """The query is not expected to have unique rows.
585 """
587 NATURALLY_UNIQUE = enum.auto()
588 """The construction of the query guarantees that it will have unique
589 result rows, even without SELECT DISTINCT or a GROUP BY clause.
590 """
592 NEEDS_DISTINCT = enum.auto()
593 """The query is expected to yield unique result rows, and needs to use
594 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
595 """
598class DirectQuery(Query):
599 """A `Query` implementation that represents a direct SELECT query that
600 usually joins many tables.
602 `DirectQuery` objects should generally only be constructed by
603 `QueryBuilder` or the methods of other `Query` objects.
605 Parameters
606 ----------
607 simpleQuery : `SimpleQuery`
608 Struct representing the actual SELECT, FROM, and WHERE clauses.
609 columns : `QueryColumns`
610 Columns that are referenced in the query in any clause.
611 uniqueness : `DirectQueryUniqueness`
612 Enum value indicating whether the query should yield unique result
613 rows, and if so whether that needs to be explicitly requested of the
614 database.
615 graph : `DimensionGraph`
616 Object describing the dimensions included in the query.
617 whereRegion : `lsst.sphgeom.Region`, optional
618 Region that all region columns in all returned rows must overlap.
619 managers : `RegistryManagers`
620 Struct containing the `Registry` manager helper objects, to be
621 forwarded to the `Query` constructor.
622 """
623 def __init__(self, *,
624 simpleQuery: SimpleQuery,
625 columns: QueryColumns,
626 uniqueness: DirectQueryUniqueness,
627 graph: DimensionGraph,
628 whereRegion: Optional[Region],
629 managers: RegistryManagers):
630 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
631 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
632 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
633 self._simpleQuery = simpleQuery
634 self._columns = columns
635 self._uniqueness = uniqueness
636 self._datasetQueryColumns: Optional[DatasetQueryColumns] = None
637 self._dimensionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
638 self._regionColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {}
640 def isUnique(self) -> bool:
641 # Docstring inherited from Query.
642 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
644 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
645 # Docstring inherited from Query.
646 column = self._dimensionColumns.get(name)
647 if column is None:
648 column = self._columns.getKeyColumn(name).label(name)
649 self._dimensionColumns[name] = column
650 return column
652 @property
653 def spatial(self) -> Iterator[DimensionElement]:
654 # Docstring inherited from Query.
655 return iter(self._columns.regions)
657 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
658 # Docstring inherited from Query.
659 column = self._regionColumns.get(name)
660 if column is None:
661 column = self._columns.regions[name].column.label(f"{name}_region")
662 self._regionColumns[name] = column
663 return column
665 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
666 # Docstring inherited from Query.
667 if self._datasetQueryColumns is None:
668 base = self._columns.datasets
669 if base is None:
670 return None
671 ingestDate = base.ingestDate
672 if ingestDate is not None:
673 ingestDate = ingestDate.label("ingest_date")
674 self._datasetQueryColumns = DatasetQueryColumns(
675 datasetType=base.datasetType,
676 id=base.id.label("dataset_id"),
677 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
678 ingestDate=ingestDate,
679 )
680 return self._datasetQueryColumns
682 @property
683 def sql(self) -> sqlalchemy.sql.FromClause:
684 # Docstring inherited from Query.
685 simpleQuery = self._simpleQuery.copy()
686 for dimension in self.graph:
687 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
688 for element in self.spatial:
689 simpleQuery.columns.append(self.getRegionColumn(element.name))
690 datasetColumns = self.getDatasetColumns()
691 if datasetColumns is not None:
692 simpleQuery.columns.extend(datasetColumns)
693 sql = simpleQuery.combine()
694 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
695 return sql.distinct()
696 else:
697 return sql
699 def subset(self, *, graph: Optional[DimensionGraph] = None,
700 datasets: bool = True,
701 unique: bool = False) -> Query:
702 # Docstring inherited from Query.
703 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
704 if columns is None:
705 return self
706 if columns.isEmpty():
707 return EmptyQuery(self.graph.universe, self.managers)
708 return DirectQuery(
709 simpleQuery=self._simpleQuery.copy(),
710 columns=columns,
711 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
712 graph=graph,
713 whereRegion=self.whereRegion if not unique else None,
714 managers=self.managers,
715 )
717 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
718 # Docstring inherited from Query.
719 from ._builder import QueryBuilder
720 if summary is None:
721 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
722 if not summary.requested.issubset(self.graph):
723 raise NotImplementedError(
724 f"Query.makeBuilder does not yet support augmenting dimensions "
725 f"({summary.requested.dimensions}) beyond those originally included in the query "
726 f"({self.graph.dimensions})."
727 )
728 builder = QueryBuilder(summary, managers=self.managers)
729 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
730 datasets=self.getDatasetColumns())
731 return builder
734class MaterializedQuery(Query):
735 """A `Query` implementation that represents query results saved in a
736 temporary table.
738 `MaterializedQuery` instances should not be constructed directly; use
739 `Query.materialize()` instead.
741 Parameters
742 ----------
743 table : `sqlalchemy.schema.Table`
744 SQLAlchemy object represnting the temporary table.
745 spatial : `Iterable` [ `DimensionElement` ]
746 Spatial dimension elements whose regions must overlap for each valid
747 result row (which may reject some rows that are in the table).
748 datasetType : `DatasetType`
749 The `DatasetType` of datasets returned by this query, or `None`
750 if there are no dataset results
751 isUnique : `bool`
752 If `True`, the table's rows are unique, and there is no need to
753 add ``SELECT DISTINCT`` to gaurantee this in results.
754 graph : `DimensionGraph`
755 Dimensions included in the columns of this table.
756 whereRegion : `Region` or `None`
757 A spatial region all result-row regions must overlap to be valid (which
758 may reject some rows that are in the table).
759 managers : `RegistryManagers`
760 A struct containing `Registry` manager helper objects, forwarded to
761 the `Query` constructor.
762 """
763 def __init__(self, *,
764 table: sqlalchemy.schema.Table,
765 spatial: Iterable[DimensionElement],
766 datasetType: Optional[DatasetType],
767 isUnique: bool,
768 graph: DimensionGraph,
769 whereRegion: Optional[Region],
770 managers: RegistryManagers):
771 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
772 self._table = table
773 self._spatial = tuple(spatial)
774 self._datasetType = datasetType
775 self._isUnique = isUnique
777 def isUnique(self) -> bool:
778 # Docstring inherited from Query.
779 return self._isUnique
781 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
782 # Docstring inherited from Query.
783 return self._table.columns[name]
785 @property
786 def spatial(self) -> Iterator[DimensionElement]:
787 # Docstring inherited from Query.
788 return iter(self._spatial)
790 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
791 # Docstring inherited from Query.
792 return self._table.columns[f"{name}_region"]
794 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
795 # Docstring inherited from Query.
796 if self._datasetType is not None:
797 return DatasetQueryColumns(
798 datasetType=self._datasetType,
799 id=self._table.columns["dataset_id"],
800 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
801 ingestDate=None,
802 )
803 else:
804 return None
806 @property
807 def sql(self) -> sqlalchemy.sql.FromClause:
808 # Docstring inherited from Query.
809 return self._table.select()
811 @contextmanager
812 def materialize(self, db: Database) -> Iterator[Query]:
813 # Docstring inherited from Query.
814 yield self
816 def subset(self, *, graph: Optional[DimensionGraph] = None,
817 datasets: bool = True,
818 unique: bool = False) -> Query:
819 # Docstring inherited from Query.
820 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
821 if columns is None:
822 return self
823 if columns.isEmpty():
824 return EmptyQuery(self.graph.universe, managers=self.managers)
825 simpleQuery = SimpleQuery()
826 simpleQuery.join(self._table)
827 return DirectQuery(
828 simpleQuery=simpleQuery,
829 columns=columns,
830 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
831 graph=graph,
832 whereRegion=self.whereRegion if not unique else None,
833 managers=self.managers,
834 )
836 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
837 # Docstring inherited from Query.
838 from ._builder import QueryBuilder
839 if summary is None:
840 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
841 if not summary.requested.issubset(self.graph):
842 raise NotImplementedError(
843 f"Query.makeBuilder does not yet support augmenting dimensions "
844 f"({summary.requested.dimensions}) beyond those originally included in the query "
845 f"({self.graph.dimensions})."
846 )
847 builder = QueryBuilder(summary, managers=self.managers)
848 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
849 return builder
852class EmptyQuery(Query):
853 """A `Query` implementation that handes the special case where the query
854 would have no columns.
856 Parameters
857 ----------
858 universe : `DimensionUniverse`
859 Set of all dimensions from which the null set is extracted.
860 managers : `RegistryManagers`
861 A struct containing the registry manager instances used by the query
862 system.
863 """
864 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers):
865 super().__init__(graph=universe.empty, whereRegion=None, managers=managers)
867 def isUnique(self) -> bool:
868 # Docstring inherited from Query.
869 return True
871 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
872 # Docstring inherited from Query.
873 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
875 @property
876 def spatial(self) -> Iterator[DimensionElement]:
877 # Docstring inherited from Query.
878 return iter(())
880 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
881 # Docstring inherited from Query.
882 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
884 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
885 # Docstring inherited from Query.
886 return None
888 def rows(self, db: Database, *, region: Optional[Region] = None
889 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
890 yield None
892 @property
893 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
894 # Docstring inherited from Query.
895 return None
897 @contextmanager
898 def materialize(self, db: Database) -> Iterator[Query]:
899 # Docstring inherited from Query.
900 yield self
902 def subset(self, *, graph: Optional[DimensionGraph] = None,
903 datasets: bool = True,
904 unique: bool = False) -> Query:
905 # Docstring inherited from Query.
906 assert graph is None or graph.issubset(self.graph)
907 return self
909 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
910 # Docstring inherited from Query.
911 from ._builder import QueryBuilder
912 if summary is None:
913 summary = QuerySummary(self.graph)
914 if not summary.requested.issubset(self.graph):
915 raise NotImplementedError(
916 f"Query.makeBuilder does not yet support augmenting dimensions "
917 f"({summary.requested.dimensions}) beyond those originally included in the query "
918 f"({self.graph.dimensions})."
919 )
920 return QueryBuilder(summary, managers=self.managers)