Coverage for python/lsst/daf/butler/registry/queries/_query.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("Query",)
25from abc import ABC, abstractmethod
26from contextlib import contextmanager
27import enum
28import itertools
29from typing import (
30 Callable,
31 Iterable,
32 Iterator,
33 Mapping,
34 Optional,
35 Tuple,
36 TYPE_CHECKING,
37)
39import sqlalchemy
41from lsst.sphgeom import Region
43from ...core import (
44 addDimensionForeignKey,
45 DataCoordinate,
46 DatasetRef,
47 DatasetType,
48 ddl,
49 Dimension,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 SpatialRegionDatabaseRepresentation,
55 SimpleQuery,
56)
57from ..interfaces import Database
58from ._structs import DatasetQueryColumns, QueryColumns, QuerySummary, RegistryManagers
60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 from ._builder import QueryBuilder
64class Query(ABC):
65 """An abstract base class for queries that return some combination of
66 `DatasetRef` and `DataCoordinate` objects.
68 Parameters
69 ----------
70 graph : `DimensionGraph`
71 Object describing the dimensions included in the query.
72 whereRegion : `lsst.sphgeom.Region`, optional
73 Region that all region columns in all returned rows must overlap.
74 managers : `RegistryManagers`
75 A struct containing the registry manager instances used by the query
76 system.
78 Notes
79 -----
80 The `Query` hierarchy abstracts over the database/SQL representation of a
81 particular set of data IDs or datasets. It is expected to be used as a
82 backend for other objects that provide more natural interfaces for one or
83 both of these, not as part of a public interface to query results.
84 """
85 def __init__(self, *,
86 graph: DimensionGraph,
87 whereRegion: Optional[Region],
88 managers: RegistryManagers,
89 ):
90 self.graph = graph
91 self.whereRegion = whereRegion
92 self.managers = managers
94 @abstractmethod
95 def isUnique(self) -> bool:
96 """Return `True` if this query's rows are guaranteed to be unique, and
97 `False` otherwise.
99 If this query has dataset results (`datasetType` is not `None`),
100 uniqueness applies to the `DatasetRef` instances returned by
101 `extractDatasetRef` from the result of `rows`. If it does not have
102 dataset results, uniqueness applies to the `DataCoordinate` instances
103 returned by `extractDataId`.
104 """
105 raise NotImplementedError()
107 @abstractmethod
108 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
109 """Return the query column that contains the primary key value for
110 the dimension with the given name.
112 Parameters
113 ----------
114 name : `str`
115 Name of the dimension.
117 Returns
118 -------
119 column : `sqlalchemy.sql.ColumnElement`.
120 SQLAlchemy object representing a column in the query.
122 Notes
123 -----
124 This method is intended primarily as a hook for subclasses to implement
125 and the ABC to call in order to provide higher-level functionality;
126 code that uses `Query` objects (but does not implement one) should
127 usually not have to call this method.
128 """
129 raise NotImplementedError()
131 @property
132 @abstractmethod
133 def spatial(self) -> Iterator[DimensionElement]:
134 """An iterator over the dimension element columns used in post-query
135 filtering of spatial overlaps (`Iterator` [ `DimensionElement` ]).
137 Notes
138 -----
139 This property is intended primarily as a hook for subclasses to
140 implement and the ABC to call in order to provide higher-level
141 functionality; code that uses `Query` objects (but does not implement
142 one) should usually not have to access this property.
143 """
144 raise NotImplementedError()
146 @abstractmethod
147 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
148 """Return a region column for one of the dimension elements iterated
149 over by `spatial`.
151 Parameters
152 ----------
153 name : `str`
154 Name of the element.
156 Returns
157 -------
158 column : `sqlalchemy.sql.ColumnElement`
159 SQLAlchemy representing a result column in the query.
161 Notes
162 -----
163 This method is intended primarily as a hook for subclasses to implement
164 and the ABC to call in order to provide higher-level functionality;
165 code that uses `Query` objects (but does not implement one) should
166 usually not have to call this method.
167 """
168 raise NotImplementedError()
170 @property
171 def datasetType(self) -> Optional[DatasetType]:
172 """The `DatasetType` of datasets returned by this query, or `None`
173 if there are no dataset results (`DatasetType` or `None`).
174 """
175 cols = self.getDatasetColumns()
176 if cols is None:
177 return None
178 return cols.datasetType
180 @abstractmethod
181 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
182 """Return the columns for the datasets returned by this query.
184 Returns
185 -------
186 columns : `DatasetQueryColumns` or `None`
187 Struct containing SQLAlchemy representations of the result columns
188 for a dataset.
190 Notes
191 -----
192 This method is intended primarily as a hook for subclasses to implement
193 and the ABC to call in order to provide higher-level functionality;
194 code that uses `Query` objects (but does not implement one) should
195 usually not have to call this method.
196 """
197 raise NotImplementedError()
199 @property
200 @abstractmethod
201 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
202 """A SQLAlchemy object representing the full query
203 (`sqlalchemy.sql.FromClause` or `None`).
205 This is `None` in the special case where the query has no columns, and
206 only one logical row.
207 """
208 raise NotImplementedError()
210 def predicate(self, region: Optional[Region] = None) -> Callable[[sqlalchemy.engine.RowProxy], bool]:
211 """Return a callable that can perform extra Python-side filtering of
212 query results.
214 To get the expected results from a query, the returned predicate *must*
215 be used to ignore rows for which it returns `False`; this permits the
216 `QueryBuilder` implementation to move logic from the database to Python
217 without changing the public interface.
219 Parameters
220 ----------
221 region : `sphgeom.Region`, optional
222 A region that any result-row regions must overlap in order for the
223 predicate to return `True`. If not provided, this will be
224 ``self.whereRegion``, if that exists.
226 Returns
227 -------
228 func : `Callable`
229 A callable that takes a single `sqlalchemy.engine.RowProxy`
230 argmument and returns `bool`.
231 """
232 whereRegion = region if region is not None else self.whereRegion
234 def closure(row: sqlalchemy.engine.RowProxy) -> bool:
235 rowRegions = [row[self.getRegionColumn(element.name)] for element in self.spatial]
236 if whereRegion and any(r.isDisjointFrom(whereRegion) for r in rowRegions):
237 return False
238 return not any(a.isDisjointFrom(b) for a, b in itertools.combinations(rowRegions, 2))
240 return closure
242 def rows(self, db: Database, *, region: Optional[Region] = None
243 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
244 """Execute the query and yield result rows, applying `predicate`.
246 Parameters
247 ----------
248 region : `sphgeom.Region`, optional
249 A region that any result-row regions must overlap in order to be
250 yielded. If not provided, this will be ``self.whereRegion``, if
251 that exists.
253 Yields
254 ------
255 row : `sqlalchemy.engine.RowProxy` or `None`
256 Result row from the query. `None` may yielded exactly once instead
257 of any real rows to indicate an empty query (see `EmptyQuery`).
258 """
259 predicate = self.predicate(region)
260 for row in db.query(self.sql):
261 if predicate(row):
262 yield row
264 def extractDimensionsTuple(self, row: Optional[sqlalchemy.engine.RowProxy],
265 dimensions: Iterable[Dimension]) -> tuple:
266 """Extract a tuple of data ID values from a result row.
268 Parameters
269 ----------
270 row : `sqlalchemy.engine.RowProxy` or `None`
271 A result row from a SQLAlchemy SELECT query, or `None` to indicate
272 the row from an `EmptyQuery`.
273 dimensions : `Iterable` [ `Dimension` ]
274 The dimensions to include in the returned tuple, in order.
276 Returns
277 -------
278 values : `tuple`
279 A tuple of dimension primary key values.
280 """
281 if row is None:
282 assert not tuple(dimensions), "Can only utilize empty query row when there are no dimensions."
283 return ()
284 return tuple(row[self.getDimensionColumn(dimension.name)] for dimension in dimensions)
286 def extractDataId(self, row: Optional[sqlalchemy.engine.RowProxy], *,
287 graph: Optional[DimensionGraph] = None,
288 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
289 ) -> DataCoordinate:
290 """Extract a data ID from a result row.
292 Parameters
293 ----------
294 row : `sqlalchemy.engine.RowProxy` or `None`
295 A result row from a SQLAlchemy SELECT query, or `None` to indicate
296 the row from an `EmptyQuery`.
297 graph : `DimensionGraph`, optional
298 The dimensions the returned data ID should identify. If not
299 provided, this will be all dimensions in `QuerySummary.requested`.
300 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
301 Nested mapping containing records to attach to the returned
302 `DataCoordinate`, for which `~DataCoordinate.hasRecords` will
303 return `True`. If provided, outer keys must include all dimension
304 element names in ``graph``, and inner keys should be tuples of
305 dimension primary key values in the same order as
306 ``element.graph.required``. If not provided,
307 `DataCoordinate.hasRecords` will return `False` on the returned
308 object.
310 Returns
311 -------
312 dataId : `DataCoordinate`
313 A data ID that identifies all required and implied dimensions. If
314 ``records is not None``, this is have
315 `~DataCoordinate.hasRecords()` return `True`.
316 """
317 if graph is None:
318 graph = self.graph
319 if not graph:
320 return DataCoordinate.makeEmpty(self.graph.universe)
321 dataId = DataCoordinate.fromFullValues(
322 graph,
323 self.extractDimensionsTuple(row, itertools.chain(graph.required, graph.implied))
324 )
325 if records is not None:
326 recordsForRow = {}
327 for element in graph.elements:
328 key = tuple(dataId.subset(element.graph).values())
329 recordsForRow[element.name] = records[element.name].get(key)
330 return dataId.expanded(recordsForRow)
331 else:
332 return dataId
334 def extractDatasetRef(self, row: sqlalchemy.engine.RowProxy,
335 dataId: Optional[DataCoordinate] = None,
336 records: Optional[Mapping[str, Mapping[tuple, DimensionRecord]]] = None,
337 ) -> DatasetRef:
338 """Extract a `DatasetRef` from a result row.
340 Parameters
341 ----------
342 row : `sqlalchemy.engine.RowProxy`
343 A result row from a SQLAlchemy SELECT query.
344 dataId : `DataCoordinate`
345 Data ID to attach to the `DatasetRef`. A minimal (i.e. base class)
346 `DataCoordinate` is constructed from ``row`` if `None`.
347 records : `Mapping` [ `str`, `Mapping` [ `tuple`, `DimensionRecord` ] ]
348 Records to use to return an `ExpandedDataCoordinate`. If provided,
349 outer keys must include all dimension element names in ``graph``,
350 and inner keys should be tuples of dimension primary key values
351 in the same order as ``element.graph.required``.
353 Returns
354 -------
355 ref : `DatasetRef`
356 Reference to the dataset; guaranteed to have `DatasetRef.id` not
357 `None`.
358 """
359 datasetColumns = self.getDatasetColumns()
360 assert datasetColumns is not None
361 if dataId is None:
362 dataId = self.extractDataId(row, graph=datasetColumns.datasetType.dimensions, records=records)
363 runRecord = self.managers.collections[row[datasetColumns.runKey]]
364 return DatasetRef(datasetColumns.datasetType, dataId, id=row[datasetColumns.id], run=runRecord.name)
366 def _makeTableSpec(self, constraints: bool = False) -> ddl.TableSpec:
367 """Helper method for subclass implementations of `materialize`.
369 Parameters
370 ----------
371 constraints : `bool`, optional
372 If `True` (`False` is default), define a specification that
373 includes actual foreign key constraints for logical foreign keys.
374 Some database engines do not permit temporary tables to reference
375 normal tables, so this should be `False` when generating a spec
376 for a temporary table unless the database engine is known to
377 support them.
379 Returns
380 -------
381 spec : `ddl.TableSpec`
382 Specification for a table that could hold this query's result rows.
383 """
384 unique = self.isUnique()
385 spec = ddl.TableSpec(fields=())
386 for dimension in self.graph:
387 addDimensionForeignKey(spec, dimension, primaryKey=unique, constraint=constraints)
388 for element in self.spatial:
389 spec.fields.update(
390 SpatialRegionDatabaseRepresentation.makeFieldSpecs(
391 nullable=True,
392 name=f"{element.name}_region",
393 )
394 )
395 datasetColumns = self.getDatasetColumns()
396 if datasetColumns is not None:
397 self.managers.datasets.addDatasetForeignKey(spec, primaryKey=unique, constraint=constraints)
398 self.managers.collections.addRunForeignKey(spec, nullable=False, constraint=constraints)
399 return spec
401 def _makeSubsetQueryColumns(self, *, graph: Optional[DimensionGraph] = None,
402 datasets: bool = True,
403 unique: bool = False) -> Tuple[DimensionGraph, Optional[QueryColumns]]:
404 """Helper method for subclass implementations of `subset`.
406 Parameters
407 ----------
408 graph : `DimensionGraph`, optional
409 Dimensions to include in the new `Query` being constructed.
410 ``subset`` implementations should generally just forward their
411 own ``graph`` argument here.
412 datasets : `bool`, optional
413 Whether the new `Query` should include dataset results. Defaults
414 to `True`, but is ignored if ``self`` does not include dataset
415 results.
416 unique : `bool`, optional
417 Whether the new `Query` should guarantee unique results (this may
418 come with a performance penalty).
420 Returns
421 -------
422 graph : `DimensionGraph`
423 The dimensions of the new `Query`. This is exactly the same as
424 the argument of the same name, with ``self.graph`` used if that
425 argument is `None`.
426 columns : `QueryColumns` or `None`
427 A struct containing the SQLAlchemy column objects to use in the
428 new query, contructed by delegating to other (mostly abstract)
429 methods on ``self``. If `None`, `subset` may return ``self``.
430 """
431 if graph is None:
432 graph = self.graph
433 if (graph == self.graph and (self.getDatasetColumns() is None or datasets)
434 and (self.isUnique() or not unique)):
435 return graph, None
436 columns = QueryColumns()
437 for dimension in graph.dimensions:
438 col = self.getDimensionColumn(dimension.name)
439 columns.keys[dimension] = [col]
440 if not unique:
441 for element in self.spatial:
442 col = self.getRegionColumn(element.name)
443 columns.regions[element] = col
444 if datasets and self.getDatasetColumns() is not None:
445 columns.datasets = self.getDatasetColumns()
446 return graph, columns
448 @contextmanager
449 def materialize(self, db: Database) -> Iterator[Query]:
450 """Execute this query and insert its results into a temporary table.
452 Parameters
453 ----------
454 db : `Database`
455 Database engine to execute the query against.
457 Returns
458 -------
459 context : `typing.ContextManager` [ `MaterializedQuery` ]
460 A context manager that ensures the temporary table is created and
461 populated in ``__enter__`` (returning a `MaterializedQuery` object
462 backed by that table), and dropped in ``__exit__``. If ``self``
463 is already a `MaterializedQuery`, ``__enter__`` may just return
464 ``self`` and ``__exit__`` may do nothing (reflecting the fact that
465 an outer context manager should already take care of everything
466 else).
467 """
468 spec = self._makeTableSpec()
469 with db.session() as session:
470 table = session.makeTemporaryTable(spec)
471 db.insert(table, select=self.sql, names=spec.fields.names)
472 yield MaterializedQuery(table=table,
473 spatial=self.spatial,
474 datasetType=self.datasetType,
475 isUnique=self.isUnique(),
476 graph=self.graph,
477 whereRegion=self.whereRegion,
478 managers=self.managers)
479 session.dropTemporaryTable(table)
481 @abstractmethod
482 def subset(self, *, graph: Optional[DimensionGraph] = None,
483 datasets: bool = True,
484 unique: bool = False) -> Query:
485 """Return a new `Query` whose columns and/or rows are (mostly) subset
486 of this one's.
488 Parameters
489 ----------
490 graph : `DimensionGraph`, optional
491 Dimensions to include in the new `Query` being constructed.
492 If `None` (default), ``self.graph`` is used.
493 datasets : `bool`, optional
494 Whether the new `Query` should include dataset results. Defaults
495 to `True`, but is ignored if ``self`` does not include dataset
496 results.
497 unique : `bool`, optional
498 Whether the new `Query` should guarantee unique results (this may
499 come with a performance penalty).
501 Returns
502 -------
503 query : `Query`
504 A query object corresponding to the given inputs. May be ``self``
505 if no changes were requested.
507 Notes
508 -----
509 The way spatial overlaps are handled at present makes it impossible to
510 fully guarantee in general that the new query's rows are a subset of
511 this one's while also returning unique rows. That's because the
512 database is only capable of performing approximate, conservative
513 overlaps via the common skypix system; we defer actual region overlap
514 operations to per-result-row Python logic. But including the region
515 columns necessary to do that postprocessing in the query makes it
516 impossible to do a SELECT DISTINCT on the user-visible dimensions of
517 the query. For example, consider starting with a query with dimensions
518 (instrument, skymap, visit, tract). That involves a spatial join
519 between visit and tract, and we include the region columns from both
520 tables in the results in order to only actually yield result rows
521 (see `predicate` and `rows`) where the regions in those two columns
522 overlap. If the user then wants to subset to just (skymap, tract) with
523 unique results, we have two unpalatable options:
525 - we can do a SELECT DISTINCT with just the skymap and tract columns
526 in the SELECT clause, dropping all detailed overlap information and
527 including some tracts that did not actually overlap any of the
528 visits in the original query (but were regarded as _possibly_
529 overlapping via the coarser, common-skypix relationships);
531 - we can include the tract and visit region columns in the query, and
532 continue to filter out the non-overlapping pairs, but completely
533 disregard the user's request for unique tracts.
535 This interface specifies that implementations must do the former, as
536 that's what makes things efficient in our most important use case
537 (``QuantumGraph`` generation in ``pipe_base``). We may be able to
538 improve this situation in the future by putting exact overlap
539 information in the database, either by using built-in (but
540 engine-specific) spatial database functionality or (more likely)
541 switching to a scheme in which pairwise dimension spatial relationships
542 are explicitly precomputed (for e.g. combinations of instruments and
543 skymaps).
544 """
545 raise NotImplementedError()
547 @abstractmethod
548 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
549 """Return a `QueryBuilder` that can be used to construct a new `Query`
550 that is joined to (and hence constrained by) this one.
552 Parameters
553 ----------
554 summary : `QuerySummary`, optional
555 A `QuerySummary` instance that specifies the dimensions and any
556 additional constraints to include in the new query being
557 constructed, or `None` to use the dimensions of ``self`` with no
558 additional constraints.
559 """
560 raise NotImplementedError()
562 graph: DimensionGraph
563 """The dimensions identified by this query and included in any data IDs
564 created from its result rows (`DimensionGraph`).
565 """
567 whereRegion: Optional[Region]
568 """A spatial region that all regions in all rows returned by this query
569 must overlap (`lsst.sphgeom.Region` or `None`).
570 """
572 managers: RegistryManagers
573 """A struct containing `Registry` helper object (`RegistryManagers`).
574 """
577class DirectQueryUniqueness(enum.Enum):
578 """An enum representing the ways in which a query can have unique rows (or
579 not).
580 """
582 NOT_UNIQUE = enum.auto()
583 """The query is not expected to have unique rows.
584 """
586 NATURALLY_UNIQUE = enum.auto()
587 """The construction of the query guarantees that it will have unique
588 result rows, even without SELECT DISTINCT or a GROUP BY clause.
589 """
591 NEEDS_DISTINCT = enum.auto()
592 """The query is expected to yield unique result rows, and needs to use
593 SELECT DISTINCT or an equivalent GROUP BY clause to achieve this.
594 """
597class DirectQuery(Query):
598 """A `Query` implementation that represents a direct SELECT query that
599 usually joins many tables.
601 `DirectQuery` objects should generally only be constructed by
602 `QueryBuilder` or the methods of other `Query` objects.
604 Parameters
605 ----------
606 simpleQuery : `SimpleQuery`
607 Struct representing the actual SELECT, FROM, and WHERE clauses.
608 columns : `QueryColumns`
609 Columns that are referenced in the query in any clause.
610 uniqueness : `DirectQueryUniqueness`
611 Enum value indicating whether the query should yield unique result
612 rows, and if so whether that needs to be explicitly requested of the
613 database.
614 graph : `DimensionGraph`
615 Object describing the dimensions included in the query.
616 whereRegion : `lsst.sphgeom.Region`, optional
617 Region that all region columns in all returned rows must overlap.
618 managers : `RegistryManagers`
619 Struct containing the `Registry` manager helper objects, to be
620 forwarded to the `Query` constructor.
621 """
622 def __init__(self, *,
623 simpleQuery: SimpleQuery,
624 columns: QueryColumns,
625 uniqueness: DirectQueryUniqueness,
626 graph: DimensionGraph,
627 whereRegion: Optional[Region],
628 managers: RegistryManagers):
629 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
630 assert not simpleQuery.columns, "Columns should always be set on a copy in .sql"
631 assert not columns.isEmpty(), "EmptyQuery must be used when a query would have no columns."
632 self._simpleQuery = simpleQuery
633 self._columns = columns
634 self._uniqueness = uniqueness
636 def isUnique(self) -> bool:
637 # Docstring inherited from Query.
638 return self._uniqueness is not DirectQueryUniqueness.NOT_UNIQUE
640 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
641 # Docstring inherited from Query.
642 return self._columns.getKeyColumn(name).label(name)
644 @property
645 def spatial(self) -> Iterator[DimensionElement]:
646 # Docstring inherited from Query.
647 return iter(self._columns.regions)
649 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
650 # Docstring inherited from Query.
651 return self._columns.regions[name].column.label(f"{name}_region")
653 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
654 # Docstring inherited from Query.
655 base = self._columns.datasets
656 if base is None:
657 return None
658 ingestDate = base.ingestDate
659 if ingestDate is not None:
660 ingestDate = ingestDate.label("ingest_date")
661 return DatasetQueryColumns(
662 datasetType=base.datasetType,
663 id=base.id.label("dataset_id"),
664 runKey=base.runKey.label(self.managers.collections.getRunForeignKeyName()),
665 ingestDate=ingestDate,
666 )
668 @property
669 def sql(self) -> sqlalchemy.sql.FromClause:
670 # Docstring inherited from Query.
671 simpleQuery = self._simpleQuery.copy()
672 for dimension in self.graph:
673 simpleQuery.columns.append(self.getDimensionColumn(dimension.name))
674 for element in self.spatial:
675 simpleQuery.columns.append(self.getRegionColumn(element.name))
676 datasetColumns = self.getDatasetColumns()
677 if datasetColumns is not None:
678 simpleQuery.columns.extend(datasetColumns)
679 sql = simpleQuery.combine()
680 if self._uniqueness is DirectQueryUniqueness.NEEDS_DISTINCT:
681 return sql.distinct()
682 else:
683 return sql
685 def subset(self, *, graph: Optional[DimensionGraph] = None,
686 datasets: bool = True,
687 unique: bool = False) -> Query:
688 # Docstring inherited from Query.
689 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
690 if columns is None:
691 return self
692 if columns.isEmpty():
693 return EmptyQuery(self.graph.universe, self.managers)
694 return DirectQuery(
695 simpleQuery=self._simpleQuery.copy(),
696 columns=columns,
697 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
698 graph=graph,
699 whereRegion=self.whereRegion if not unique else None,
700 managers=self.managers,
701 )
703 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
704 # Docstring inherited from Query.
705 from ._builder import QueryBuilder
706 if summary is None:
707 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
708 if not summary.requested.issubset(self.graph):
709 raise NotImplementedError(
710 f"Query.makeBuilder does not yet support augmenting dimensions "
711 f"({summary.requested.dimensions}) beyond those originally included in the query "
712 f"({self.graph.dimensions})."
713 )
714 builder = QueryBuilder(summary, managers=self.managers)
715 builder.joinTable(self.sql.alias(), dimensions=self.graph.dimensions,
716 datasets=self.getDatasetColumns())
717 return builder
720class MaterializedQuery(Query):
721 """A `Query` implementation that represents query results saved in a
722 temporary table.
724 `MaterializedQuery` instances should not be constructed directly; use
725 `Query.materialize()` instead.
727 Parameters
728 ----------
729 table : `sqlalchemy.schema.Table`
730 SQLAlchemy object represnting the temporary table.
731 spatial : `Iterable` [ `DimensionElement` ]
732 Spatial dimension elements whose regions must overlap for each valid
733 result row (which may reject some rows that are in the table).
734 datasetType : `DatasetType`
735 The `DatasetType` of datasets returned by this query, or `None`
736 if there are no dataset results
737 isUnique : `bool`
738 If `True`, the table's rows are unique, and there is no need to
739 add ``SELECT DISTINCT`` to gaurantee this in results.
740 graph : `DimensionGraph`
741 Dimensions included in the columns of this table.
742 whereRegion : `Region` or `None`
743 A spatial region all result-row regions must overlap to be valid (which
744 may reject some rows that are in the table).
745 managers : `RegistryManagers`
746 A struct containing `Registry` manager helper objects, forwarded to
747 the `Query` constructor.
748 """
749 def __init__(self, *,
750 table: sqlalchemy.schema.Table,
751 spatial: Iterable[DimensionElement],
752 datasetType: Optional[DatasetType],
753 isUnique: bool,
754 graph: DimensionGraph,
755 whereRegion: Optional[Region],
756 managers: RegistryManagers):
757 super().__init__(graph=graph, whereRegion=whereRegion, managers=managers)
758 self._table = table
759 self._spatial = tuple(spatial)
760 self._datasetType = datasetType
761 self._isUnique = isUnique
763 def isUnique(self) -> bool:
764 # Docstring inherited from Query.
765 return self._isUnique
767 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
768 # Docstring inherited from Query.
769 return self._table.columns[name]
771 @property
772 def spatial(self) -> Iterator[DimensionElement]:
773 # Docstring inherited from Query.
774 return iter(self._spatial)
776 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
777 # Docstring inherited from Query.
778 return self._table.columns[f"{name}_region"]
780 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
781 # Docstring inherited from Query.
782 if self._datasetType is not None:
783 return DatasetQueryColumns(
784 datasetType=self._datasetType,
785 id=self._table.columns["dataset_id"],
786 runKey=self._table.columns[self.managers.collections.getRunForeignKeyName()],
787 ingestDate=None,
788 )
789 else:
790 return None
792 @property
793 def sql(self) -> sqlalchemy.sql.FromClause:
794 # Docstring inherited from Query.
795 return self._table.select()
797 @contextmanager
798 def materialize(self, db: Database) -> Iterator[Query]:
799 # Docstring inherited from Query.
800 yield self
802 def subset(self, *, graph: Optional[DimensionGraph] = None,
803 datasets: bool = True,
804 unique: bool = False) -> Query:
805 # Docstring inherited from Query.
806 graph, columns = self._makeSubsetQueryColumns(graph=graph, datasets=datasets, unique=unique)
807 if columns is None:
808 return self
809 if columns.isEmpty():
810 return EmptyQuery(self.graph.universe, managers=self.managers)
811 simpleQuery = SimpleQuery()
812 simpleQuery.join(self._table)
813 return DirectQuery(
814 simpleQuery=simpleQuery,
815 columns=columns,
816 uniqueness=DirectQueryUniqueness.NEEDS_DISTINCT if unique else DirectQueryUniqueness.NOT_UNIQUE,
817 graph=graph,
818 whereRegion=self.whereRegion if not unique else None,
819 managers=self.managers,
820 )
822 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
823 # Docstring inherited from Query.
824 from ._builder import QueryBuilder
825 if summary is None:
826 summary = QuerySummary(self.graph, whereRegion=self.whereRegion)
827 if not summary.requested.issubset(self.graph):
828 raise NotImplementedError(
829 f"Query.makeBuilder does not yet support augmenting dimensions "
830 f"({summary.requested.dimensions}) beyond those originally included in the query "
831 f"({self.graph.dimensions})."
832 )
833 builder = QueryBuilder(summary, managers=self.managers)
834 builder.joinTable(self._table, dimensions=self.graph.dimensions, datasets=self.getDatasetColumns())
835 return builder
838class EmptyQuery(Query):
839 """A `Query` implementation that handes the special case where the query
840 would have no columns.
842 Parameters
843 ----------
844 universe : `DimensionUniverse`
845 Set of all dimensions from which the null set is extracted.
846 managers : `RegistryManagers`
847 A struct containing the registry manager instances used by the query
848 system.
849 """
850 def __init__(self, universe: DimensionUniverse, managers: RegistryManagers):
851 super().__init__(graph=universe.empty, whereRegion=None, managers=managers)
853 def isUnique(self) -> bool:
854 # Docstring inherited from Query.
855 return True
857 def getDimensionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
858 # Docstring inherited from Query.
859 raise KeyError(f"No dimension {name} in query (no dimensions at all, actually).")
861 @property
862 def spatial(self) -> Iterator[DimensionElement]:
863 # Docstring inherited from Query.
864 return iter(())
866 def getRegionColumn(self, name: str) -> sqlalchemy.sql.ColumnElement:
867 # Docstring inherited from Query.
868 raise KeyError(f"No region for {name} in query (no regions at all, actually).")
870 def getDatasetColumns(self) -> Optional[DatasetQueryColumns]:
871 # Docstring inherited from Query.
872 return None
874 def rows(self, db: Database, *, region: Optional[Region] = None
875 ) -> Iterator[Optional[sqlalchemy.engine.RowProxy]]:
876 yield None
878 @property
879 def sql(self) -> Optional[sqlalchemy.sql.FromClause]:
880 # Docstring inherited from Query.
881 return None
883 @contextmanager
884 def materialize(self, db: Database) -> Iterator[Query]:
885 # Docstring inherited from Query.
886 yield self
888 def subset(self, *, graph: Optional[DimensionGraph] = None,
889 datasets: bool = True,
890 unique: bool = False) -> Query:
891 # Docstring inherited from Query.
892 assert graph is None or graph.issubset(self.graph)
893 return self
895 def makeBuilder(self, summary: Optional[QuerySummary] = None) -> QueryBuilder:
896 # Docstring inherited from Query.
897 from ._builder import QueryBuilder
898 if summary is None:
899 summary = QuerySummary(self.graph)
900 if not summary.requested.issubset(self.graph):
901 raise NotImplementedError(
902 f"Query.makeBuilder does not yet support augmenting dimensions "
903 f"({summary.requested.dimensions}) beyond those originally included in the query "
904 f"({self.graph.dimensions})."
905 )
906 return QueryBuilder(summary, managers=self.managers)