Coverage for python/lsst/daf/butler/registry/queries/_results.py: 56%
185 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-02 09:34 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-02 09:34 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "ChainedDatasetQueryResults",
25 "DatabaseDimensionRecordQueryResults",
26 "DataCoordinateQueryResults",
27 "DatasetQueryResults",
28 "DimensionRecordQueryResults",
29 "ParentDatasetQueryResults",
30)
32import itertools
33from abc import abstractmethod
34from collections.abc import Iterable, Iterator, Sequence
35from contextlib import AbstractContextManager, ExitStack, contextmanager
36from typing import Any
38from ...core import (
39 DataCoordinate,
40 DataCoordinateIterable,
41 DatasetRef,
42 DatasetType,
43 DimensionElement,
44 DimensionGraph,
45 DimensionRecord,
46)
47from ._query import Query
48from ._structs import OrderByClause
51class DataCoordinateQueryResults(DataCoordinateIterable):
52 """An enhanced implementation of `DataCoordinateIterable` that represents
53 data IDs retrieved from a database query.
55 Parameters
56 ----------
57 query : `Query`
58 Query object that backs this class.
60 Notes
61 -----
62 The `Query` class now implements essentially all of this class's
63 functionality; "QueryResult" classes like this one now exist only to
64 provide interface backwards compatibility and more specific iterator
65 types.
66 """
68 def __init__(self, query: Query):
69 self._query = query
71 __slots__ = ("_query",)
73 def __iter__(self) -> Iterator[DataCoordinate]:
74 return self._query.iter_data_ids()
76 def __repr__(self) -> str:
77 return f"<DataCoordinate iterator with dimensions={self.graph}>"
79 @property
80 def graph(self) -> DimensionGraph:
81 # Docstring inherited from DataCoordinateIterable.
82 return self._query.dimensions
84 def hasFull(self) -> bool:
85 # Docstring inherited from DataCoordinateIterable.
86 return True
88 def hasRecords(self) -> bool:
89 # Docstring inherited from DataCoordinateIterable.
90 return self._query.has_record_columns is True or not self.graph
92 @contextmanager
93 def materialize(self) -> Iterator[DataCoordinateQueryResults]:
94 """Insert this query's results into a temporary table.
96 Returns
97 -------
98 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ]
99 A context manager that ensures the temporary table is created and
100 populated in ``__enter__`` (returning a results object backed by
101 that table), and dropped in ``__exit__``. If ``self`` is already
102 materialized, the context manager may do nothing (reflecting the
103 fact that an outer context manager should already take care of
104 everything else).
106 Notes
107 -----
108 When using a very large result set to perform multiple queries (e.g.
109 multiple calls to `subset` with different arguments, or even a single
110 call to `expanded`), it may be much more efficient to start by
111 materializing the query and only then performing the follow up queries.
112 It may also be less efficient, depending on how well database engine's
113 query optimizer can simplify those particular follow-up queries and
114 how efficiently it caches query results even when the are not
115 explicitly inserted into a temporary table. See `expanded` and
116 `subset` for examples.
117 """
118 with self._query.open_context():
119 yield DataCoordinateQueryResults(self._query.materialized())
121 def expanded(self) -> DataCoordinateQueryResults:
122 """Return a results object for which `hasRecords` returns `True`.
124 This method may involve actually executing database queries to fetch
125 `DimensionRecord` objects.
127 Returns
128 -------
129 results : `DataCoordinateQueryResults`
130 A results object for which `hasRecords` returns `True`. May be
131 ``self`` if that is already the case.
133 Notes
134 -----
135 For very result sets, it may be much more efficient to call
136 `materialize` before calling `expanded`, to avoid performing the
137 original query multiple times (as a subquery) in the follow-up queries
138 that fetch dimension records. For example::
140 with registry.queryDataIds(...).materialize() as tempDataIds:
141 dataIdsWithRecords = tempDataIds.expanded()
142 for dataId in dataIdsWithRecords:
143 ...
144 """
145 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True))
147 def subset(
148 self, graph: DimensionGraph | None = None, *, unique: bool = False
149 ) -> DataCoordinateQueryResults:
150 """Return a results object containing a subset of the dimensions of
151 this one, and/or a unique near-subset of its rows.
153 This method may involve actually executing database queries to fetch
154 `DimensionRecord` objects.
156 Parameters
157 ----------
158 graph : `DimensionGraph`, optional
159 Dimensions to include in the new results object. If `None`,
160 ``self.graph`` is used.
161 unique : `bool`, optional
162 If `True` (`False` is default), the query should only return unique
163 data IDs. This is implemented in the database; to obtain unique
164 results via Python-side processing (which may be more efficient in
165 some cases), use `toSet` to construct a `DataCoordinateSet` from
166 this results object instead.
168 Returns
169 -------
170 results : `DataCoordinateQueryResults`
171 A results object corresponding to the given criteria. May be
172 ``self`` if it already qualifies.
174 Raises
175 ------
176 ValueError
177 Raised when ``graph`` is not a subset of the dimension graph in
178 this result.
180 Notes
181 -----
182 This method can only return a "near-subset" of the original result rows
183 in general because of subtleties in how spatial overlaps are
184 implemented; see `Query.projected` for more information.
186 When calling `subset` multiple times on the same very large result set,
187 it may be much more efficient to call `materialize` first. For
188 example::
190 dimensions1 = DimensionGraph(...)
191 dimensions2 = DimensionGraph(...)
192 with registry.queryDataIds(...).materialize() as tempDataIds:
193 for dataId1 in tempDataIds.subset(
194 graph=dimensions1,
195 unique=True):
196 ...
197 for dataId2 in tempDataIds.subset(
198 graph=dimensions2,
199 unique=True):
200 ...
201 """
202 if graph is None:
203 graph = self.graph
204 if not graph.issubset(self.graph):
205 raise ValueError(f"{graph} is not a subset of {self.graph}")
206 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True)
207 return DataCoordinateQueryResults(query)
209 def findDatasets(
210 self,
211 datasetType: Any,
212 collections: Any,
213 *,
214 findFirst: bool = True,
215 components: bool | None = None,
216 ) -> DatasetQueryResults:
217 """Find datasets using the data IDs identified by this query.
219 Parameters
220 ----------
221 datasetType : `DatasetType` or `str`
222 Dataset type or the name of one to search for. Must have
223 dimensions that are a subset of ``self.graph``.
224 collections : `Any`
225 An expression that fully or partially identifies the collections
226 to search for the dataset, such as a `str`, `re.Pattern`, or
227 iterable thereof. ``...`` can be used to return all collections.
228 See :ref:`daf_butler_collection_expressions` for more information.
229 findFirst : `bool`, optional
230 If `True` (default), for each result data ID, only yield one
231 `DatasetRef`, from the first collection in which a dataset of that
232 dataset type appears (according to the order of ``collections``
233 passed in). If `True`, ``collections`` must not contain regular
234 expressions and may not be ``...``.
235 components : `bool`, optional
236 If `True`, apply all expression patterns to component dataset type
237 names as well. If `False`, never apply patterns to components. If
238 `None` (default), apply patterns to components only if their parent
239 datasets were not matched by the expression. Fully-specified
240 component datasets (`str` or `DatasetType` instances) are always
241 included.
243 Values other than `False` are deprecated, and only `False` will be
244 supported after v26. After v27 this argument will be removed
245 entirely.
247 Returns
248 -------
249 datasets : `ParentDatasetQueryResults`
250 A lazy-evaluation object representing dataset query results,
251 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all
252 nested data IDs in those dataset references will have records as
253 well.
255 Raises
256 ------
257 MissingDatasetTypeError
258 Raised if the given dataset type is not registered.
259 """
260 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard(
261 datasetType, components=components, explicit_only=True
262 )
263 return ParentDatasetQueryResults(
264 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True),
265 parent_dataset_type,
266 components_found,
267 )
269 def findRelatedDatasets(
270 self,
271 datasetType: DatasetType | str,
272 collections: Any,
273 *,
274 findFirst: bool = True,
275 dimensions: DimensionGraph | None = None,
276 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
277 """Find datasets using the data IDs identified by this query, and
278 return them along with the original data IDs.
280 This is a variant of `findDatasets` that is often more useful when
281 the target dataset type does not have all of the dimensions of the
282 original data ID query, as is generally the case with calibration
283 lookups.
285 Parameters
286 ----------
287 datasetType : `DatasetType` or `str`
288 Dataset type or the name of one to search for. Must have
289 dimensions that are a subset of ``self.graph``.
290 collections : `Any`
291 An expression that fully or partially identifies the collections
292 to search for the dataset, such as a `str`, `re.Pattern`, or
293 iterable thereof. ``...`` can be used to return all collections.
294 See :ref:`daf_butler_collection_expressions` for more information.
295 findFirst : `bool`, optional
296 If `True` (default), for each data ID in ``self``, only yield one
297 `DatasetRef`, from the first collection in which a dataset of that
298 dataset type appears (according to the order of ``collections``
299 passed in). If `True`, ``collections`` must not contain regular
300 expressions and may not be ``...``. Note that this is not the
301 same as yielding one `DatasetRef` for each yielded data ID if
302 ``dimensions`` is not `None`.
303 dimensions : `DimensionGraph`, optional
304 The dimensions of the data IDs returned. Must be a subset of
305 ``self.dimensions``.
307 Returns
308 -------
309 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \
310 `DatasetRef` ] ]
311 An iterable of (data ID, dataset reference) pairs.
313 Raises
314 ------
315 MissingDatasetTypeError
316 Raised if the given dataset type is not registered.
317 """
318 if dimensions is None:
319 dimensions = self.graph
320 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard(
321 datasetType, components=False, explicit_only=True
322 )
323 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True)
324 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions)
326 def count(self, *, exact: bool = True, discard: bool = False) -> int:
327 """Count the number of rows this query would return.
329 Parameters
330 ----------
331 exact : `bool`, optional
332 If `True`, run the full query and perform post-query filtering if
333 needed to account for that filtering in the count. If `False`, the
334 result may be an upper bound.
335 discard : `bool`, optional
336 If `True`, compute the exact count even if it would require running
337 the full query and then throwing away the result rows after
338 counting them. If `False`, this is an error, as the user would
339 usually be better off executing the query first to fetch its rows
340 into a new query (or passing ``exact=False``). Ignored if
341 ``exact=False``.
343 Returns
344 -------
345 count : `int`
346 The number of rows the query would return, or an upper bound if
347 ``exact=False``.
349 Notes
350 -----
351 This counts the number of rows returned, not the number of unique rows
352 returned, so even with ``exact=True`` it may provide only an upper
353 bound on the number of *deduplicated* result rows.
354 """
355 return self._query.count(exact=exact, discard=discard)
357 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
358 """Test whether this query returns any results.
360 Parameters
361 ----------
362 execute : `bool`, optional
363 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
364 determined prior to execution that the query would return no rows.
365 exact : `bool`, optional
366 If `True`, run the full query and perform post-query filtering if
367 needed, until at least one result row is found. If `False`, the
368 returned result does not account for post-query filtering, and
369 hence may be `True` even when all result rows would be filtered
370 out.
372 Returns
373 -------
374 any : `bool`
375 `True` if the query would (or might, depending on arguments) yield
376 result rows. `False` if it definitely would not.
377 """
378 return self._query.any(execute=execute, exact=exact)
380 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
381 """Return human-readable messages that may help explain why the query
382 yields no results.
384 Parameters
385 ----------
386 execute : `bool`, optional
387 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
388 of aspects of the tree to more precisely determine where rows were
389 filtered out.
391 Returns
392 -------
393 messages : `~collections.abc.Iterable` [ `str` ]
394 String messages that describe reasons the query might not yield any
395 results.
396 """
397 return self._query.explain_no_results(execute=execute)
399 def order_by(self, *args: str) -> DataCoordinateQueryResults:
400 """Make the iterator return ordered results.
402 Parameters
403 ----------
404 *args : `str`
405 Names of the columns/dimensions to use for ordering. Column name
406 can be prefixed with minus (``-``) to use descending ordering.
408 Returns
409 -------
410 result : `DataCoordinateQueryResults`
411 Returns ``self`` instance which is updated to return ordered
412 result.
414 Notes
415 -----
416 This method modifies the iterator in place and returns the same
417 instance to support method chaining.
418 """
419 clause = OrderByClause.parse_general(args, self._query.dimensions)
420 self._query = self._query.sorted(clause.terms, defer=True)
421 return self
423 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
424 """Make the iterator return limited number of records.
426 Parameters
427 ----------
428 limit : `int`
429 Upper limit on the number of returned records.
430 offset : `int` or `None`, optional
431 The number of records to skip before returning at most ``limit``
432 records. `None` is interpreted the same as zero for backwards
433 compatibility.
435 Returns
436 -------
437 result : `DataCoordinateQueryResults`
438 Returns ``self`` instance which is updated to return limited set
439 of records.
441 Notes
442 -----
443 This method modifies the iterator in place and returns the same
444 instance to support method chaining. Normally this method is used
445 together with `order_by` method.
446 """
447 if offset is None:
448 offset = 0
449 self._query = self._query.sliced(offset, offset + limit, defer=True)
450 return self
453class DatasetQueryResults(Iterable[DatasetRef]):
454 """An interface for objects that represent the results of queries for
455 datasets.
456 """
458 @abstractmethod
459 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
460 """Group results by parent dataset type.
462 Returns
463 -------
464 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ]
465 An iterator over `DatasetQueryResults` instances that are each
466 responsible for a single parent dataset type (either just that
467 dataset type, one or more of its component dataset types, or both).
468 """
469 raise NotImplementedError()
471 @abstractmethod
472 def materialize(self) -> AbstractContextManager[DatasetQueryResults]:
473 """Insert this query's results into a temporary table.
475 Returns
476 -------
477 context : `typing.ContextManager` [ `DatasetQueryResults` ]
478 A context manager that ensures the temporary table is created and
479 populated in ``__enter__`` (returning a results object backed by
480 that table), and dropped in ``__exit__``. If ``self`` is already
481 materialized, the context manager may do nothing (reflecting the
482 fact that an outer context manager should already take care of
483 everything else).
484 """
485 raise NotImplementedError()
487 @abstractmethod
488 def expanded(self) -> DatasetQueryResults:
489 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords`
490 returns `True` for all data IDs in returned `DatasetRef` objects.
492 Returns
493 -------
494 expanded : `DatasetQueryResults`
495 Either a new `DatasetQueryResults` instance or ``self``, if it is
496 already expanded.
498 Notes
499 -----
500 As with `DataCoordinateQueryResults.expanded`, it may be more efficient
501 to call `materialize` before expanding data IDs for very large result
502 sets.
503 """
504 raise NotImplementedError()
506 @abstractmethod
507 def count(self, *, exact: bool = True, discard: bool = False) -> int:
508 """Count the number of rows this query would return.
510 Parameters
511 ----------
512 exact : `bool`, optional
513 If `True`, run the full query and perform post-query filtering if
514 needed to account for that filtering in the count. If `False`, the
515 result may be an upper bound.
516 discard : `bool`, optional
517 If `True`, compute the exact count even if it would require running
518 the full query and then throwing away the result rows after
519 counting them. If `False`, this is an error, as the user would
520 usually be better off executing the query first to fetch its rows
521 into a new query (or passing ``exact=False``). Ignored if
522 ``exact=False``.
524 Returns
525 -------
526 count : `int`
527 The number of rows the query would return, or an upper bound if
528 ``exact=False``.
530 Notes
531 -----
532 This counts the number of rows returned, not the number of unique rows
533 returned, so even with ``exact=True`` it may provide only an upper
534 bound on the number of *deduplicated* result rows.
535 """
536 raise NotImplementedError()
538 @abstractmethod
539 def any(
540 self,
541 *,
542 execute: bool = True,
543 exact: bool = True,
544 ) -> bool:
545 """Test whether this query returns any results.
547 Parameters
548 ----------
549 execute : `bool`, optional
550 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
551 determined prior to execution that the query would return no rows.
552 exact : `bool`, optional
553 If `True`, run the full query and perform post-query filtering if
554 needed, until at least one result row is found. If `False`, the
555 returned result does not account for post-query filtering, and
556 hence may be `True` even when all result rows would be filtered
557 out.
559 Returns
560 -------
561 any : `bool`
562 `True` if the query would (or might, depending on arguments) yield
563 result rows. `False` if it definitely would not.
564 """
565 raise NotImplementedError()
567 @abstractmethod
568 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
569 """Return human-readable messages that may help explain why the query
570 yields no results.
572 Parameters
573 ----------
574 execute : `bool`, optional
575 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
576 of aspects of the tree to more precisely determine where rows were
577 filtered out.
579 Returns
580 -------
581 messages : `~collections.abc.Iterable` [ `str` ]
582 String messages that describe reasons the query might not yield any
583 results.
584 """
585 raise NotImplementedError()
587 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]:
588 """Group results by dataset type.
590 This is a private hook for the interface defined by
591 `DatasetRef.iter_by_type`, enabling much more efficient
592 processing of heterogeneous `DatasetRef` iterables when they come
593 directly from queries.
594 """
595 for parent_results in self.byParentDatasetType():
596 for component in parent_results.components:
597 dataset_type = parent_results.parentDatasetType
598 if component is not None:
599 dataset_type = dataset_type.makeComponentDatasetType(component)
600 yield (dataset_type, parent_results.withComponents((component,)))
603class ParentDatasetQueryResults(DatasetQueryResults):
604 """An object that represents results from a query for datasets with a
605 single parent `DatasetType`.
607 Parameters
608 ----------
609 query : `Query`
610 Low-level query object that backs these results.
611 dataset_type : `DatasetType`
612 Parent dataset type for all datasets returned by this query.
613 components : `~collections.abc.Sequence` [ `str` or `None` ], optional
614 Names of components to include in iteration. `None` may be included
615 (at most once) to include the parent dataset type.
617 Notes
618 -----
619 The `Query` class now implements essentially all of this class's
620 functionality; "QueryResult" classes like this one now exist only to
621 provide interface backwards compatibility and more specific iterator
622 types.
623 """
625 def __init__(
626 self,
627 query: Query,
628 dataset_type: DatasetType,
629 components: Sequence[str | None] = (None,),
630 ):
631 self._query = query
632 self._dataset_type = dataset_type
633 self._components = components
635 __slots__ = ("_query", "_dataset_type", "_components")
637 def __iter__(self) -> Iterator[DatasetRef]:
638 return self._query.iter_dataset_refs(self._dataset_type, self._components)
640 def __repr__(self) -> str:
641 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>"
643 @property
644 def components(self) -> Sequence[str | None]:
645 """The components of the parent dataset type included in these results
646 (`~collections.abc.Sequence` [ `str` or `None` ]).
647 """
648 return self._components
650 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
651 # Docstring inherited from DatasetQueryResults.
652 yield self
654 @contextmanager
655 def materialize(self) -> Iterator[ParentDatasetQueryResults]:
656 # Docstring inherited from DatasetQueryResults.
657 with self._query.open_context():
658 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components)
660 @property
661 def parentDatasetType(self) -> DatasetType:
662 """The parent dataset type for all datasets in this iterable
663 (`DatasetType`).
664 """
665 return self._dataset_type
667 @property
668 def dataIds(self) -> DataCoordinateQueryResults:
669 """A lazy-evaluation object representing a query for just the data
670 IDs of the datasets that would be returned by this query
671 (`DataCoordinateQueryResults`).
673 The returned object is not in general `zip`-iterable with ``self``;
674 it may be in a different order or have (or not have) duplicates.
675 """
676 return DataCoordinateQueryResults(self._query.projected(defer=True))
678 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults:
679 """Return a new query results object for the same parent datasets but
680 different components.
682 components : `~collections.abc.Sequence` [ `str` or `None` ]
683 Names of components to include in iteration. `None` may be
684 included (at most once) to include the parent dataset type.
685 """
686 return ParentDatasetQueryResults(self._query, self._dataset_type, components)
688 def expanded(self) -> ParentDatasetQueryResults:
689 # Docstring inherited from DatasetQueryResults.
690 return ParentDatasetQueryResults(
691 self._query.with_record_columns(defer=True), self._dataset_type, self._components
692 )
694 def count(self, *, exact: bool = True, discard: bool = False) -> int:
695 # Docstring inherited.
696 return len(self._components) * self._query.count(exact=exact, discard=discard)
698 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
699 # Docstring inherited.
700 return self._query.any(execute=execute, exact=exact)
702 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
703 # Docstring inherited.
704 return self._query.explain_no_results(execute=execute)
707class ChainedDatasetQueryResults(DatasetQueryResults):
708 """A `DatasetQueryResults` implementation that simply chains together
709 other results objects, each for a different parent dataset type.
711 Parameters
712 ----------
713 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ]
714 The underlying results objects this object will chain together.
715 doomed_by : `~collections.abc.Iterable` [ `str` ], optional
716 A list of messages (appropriate for e.g. logging or exceptions) that
717 explain why the query is known to return no results even before it is
718 executed. Queries with a non-empty list will never be executed.
719 Child results objects may also have their own list.
720 """
722 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()):
723 self._chain = chain
724 self._doomed_by = tuple(doomed_by)
726 __slots__ = ("_chain",)
728 def __iter__(self) -> Iterator[DatasetRef]:
729 return itertools.chain.from_iterable(self._chain)
731 def __repr__(self) -> str:
732 return "<DatasetRef iterator for multiple dataset types>"
734 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
735 # Docstring inherited from DatasetQueryResults.
736 return iter(self._chain)
738 @contextmanager
739 def materialize(self) -> Iterator[ChainedDatasetQueryResults]:
740 # Docstring inherited from DatasetQueryResults.
741 with ExitStack() as stack:
742 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain])
744 def expanded(self) -> ChainedDatasetQueryResults:
745 # Docstring inherited from DatasetQueryResults.
746 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by)
748 def count(self, *, exact: bool = True, discard: bool = False) -> int:
749 # Docstring inherited.
750 return sum(r.count(exact=exact, discard=discard) for r in self._chain)
752 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
753 # Docstring inherited.
754 return any(r.any(execute=execute, exact=exact) for r in self._chain)
756 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
757 # Docstring inherited.
758 result = list(self._doomed_by)
759 for r in self._chain:
760 result.extend(r.explain_no_results(execute=execute))
761 return result
764class DimensionRecordQueryResults(Iterable[DimensionRecord]):
765 """An interface for objects that represent the results of queries for
766 dimension records.
767 """
769 @property
770 @abstractmethod
771 def element(self) -> DimensionElement:
772 raise NotImplementedError()
774 @abstractmethod
775 def run(self) -> DimensionRecordQueryResults:
776 raise NotImplementedError()
778 @abstractmethod
779 def count(self, *, exact: bool = True, discard: bool = False) -> int:
780 """Count the number of rows this query would return.
782 Parameters
783 ----------
784 exact : `bool`, optional
785 If `True`, run the full query and perform post-query filtering if
786 needed to account for that filtering in the count. If `False`, the
787 result may be an upper bound.
788 discard : `bool`, optional
789 If `True`, compute the exact count even if it would require running
790 the full query and then throwing away the result rows after
791 counting them. If `False`, this is an error, as the user would
792 usually be better off executing the query first to fetch its rows
793 into a new query (or passing ``exact=False``). Ignored if
794 ``exact=False``.
796 Returns
797 -------
798 count : `int`
799 The number of rows the query would return, or an upper bound if
800 ``exact=False``.
802 Notes
803 -----
804 This counts the number of rows returned, not the number of unique rows
805 returned, so even with ``exact=True`` it may provide only an upper
806 bound on the number of *deduplicated* result rows.
807 """
808 raise NotImplementedError()
810 @abstractmethod
811 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
812 """Test whether this query returns any results.
814 Parameters
815 ----------
816 execute : `bool`, optional
817 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
818 determined prior to execution that the query would return no rows.
819 exact : `bool`, optional
820 If `True`, run the full query and perform post-query filtering if
821 needed, until at least one result row is found. If `False`, the
822 returned result does not account for post-query filtering, and
823 hence may be `True` even when all result rows would be filtered
824 out.
826 Returns
827 -------
828 any : `bool`
829 `True` if the query would (or might, depending on arguments) yield
830 result rows. `False` if it definitely would not.
831 """
832 raise NotImplementedError()
834 @abstractmethod
835 def order_by(self, *args: str) -> DimensionRecordQueryResults:
836 """Make the iterator return ordered result.
838 Parameters
839 ----------
840 *args : `str`
841 Names of the columns/dimensions to use for ordering. Column name
842 can be prefixed with minus (``-``) to use descending ordering.
844 Returns
845 -------
846 result : `DimensionRecordQueryResults`
847 Returns ``self`` instance which is updated to return ordered
848 result.
850 Notes
851 -----
852 This method can modify the iterator in place and return the same
853 instance.
854 """
855 raise NotImplementedError()
857 @abstractmethod
858 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
859 """Make the iterator return limited number of records.
861 Parameters
862 ----------
863 limit : `int`
864 Upper limit on the number of returned records.
865 offset : `int` or `None`
866 The number of records to skip before returning at most ``limit``
867 records. `None` is interpreted the same as zero for backwards
868 compatibility.
870 Returns
871 -------
872 result : `DimensionRecordQueryResults`
873 Returns ``self`` instance which is updated to return limited set of
874 records.
876 Notes
877 -----
878 This method can modify the iterator in place and return the same
879 instance. Normally this method is used together with `order_by` method.
880 """
881 raise NotImplementedError()
883 @abstractmethod
884 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
885 """Return human-readable messages that may help explain why the query
886 yields no results.
888 Parameters
889 ----------
890 execute : `bool`, optional
891 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
892 of aspects of the tree to more precisely determine where rows were
893 filtered out.
895 Returns
896 -------
897 messages : `~collections.abc.Iterable` [ `str` ]
898 String messages that describe reasons the query might not yield any
899 results.
900 """
901 raise NotImplementedError()
904class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults):
905 """Implementation of DimensionRecordQueryResults using database query.
907 Parameters
908 ----------
909 query : `Query`
910 Query object that backs this class.
911 element : `DimensionElement`
912 Element whose records this object returns.
914 Notes
915 -----
916 The `Query` class now implements essentially all of this class's
917 functionality; "QueryResult" classes like this one now exist only to
918 provide interface backwards compatibility and more specific iterator
919 types.
920 """
922 def __init__(self, query: Query, element: DimensionElement):
923 self._query = query
924 self._element = element
926 @property
927 def element(self) -> DimensionElement:
928 return self._element
930 def __iter__(self) -> Iterator[DimensionRecord]:
931 return self._query.iter_dimension_records(self._element)
933 def run(self) -> DimensionRecordQueryResults:
934 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element)
936 def count(self, *, exact: bool = True, discard: bool = False) -> int:
937 # Docstring inherited from base class.
938 return self._query.count(exact=exact)
940 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
941 # Docstring inherited from base class.
942 return self._query.any(execute=execute, exact=exact)
944 def order_by(self, *args: str) -> DimensionRecordQueryResults:
945 # Docstring inherited from base class.
946 clause = OrderByClause.parse_element(args, self._element)
947 self._query = self._query.sorted(clause.terms, defer=True)
948 return self
950 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
951 # Docstring inherited from base class.
952 if offset is None:
953 offset = 0
954 self._query = self._query.sliced(offset, offset + limit, defer=True)
955 return self
957 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
958 # Docstring inherited.
959 return self._query.explain_no_results(execute=execute)