Coverage for python/lsst/daf/butler/registry/queries/_results.py: 57%
187 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "ChainedDatasetQueryResults",
31 "DatabaseDimensionRecordQueryResults",
32 "DataCoordinateQueryResults",
33 "DatasetQueryResults",
34 "DimensionRecordQueryResults",
35 "ParentDatasetQueryResults",
36)
38import itertools
39from abc import abstractmethod
40from collections.abc import Iterable, Iterator, Sequence
41from contextlib import AbstractContextManager, ExitStack, contextmanager
42from typing import Any
44from ..._dataset_ref import DatasetRef
45from ..._dataset_type import DatasetType
46from ...dimensions import (
47 DataCoordinate,
48 DataCoordinateIterable,
49 DimensionElement,
50 DimensionGraph,
51 DimensionRecord,
52)
53from ._query import Query
54from ._structs import OrderByClause
57class DataCoordinateQueryResults(DataCoordinateIterable):
58 """An enhanced implementation of `DataCoordinateIterable` that represents
59 data IDs retrieved from a database query.
61 Parameters
62 ----------
63 query : `Query`
64 Query object that backs this class.
66 Notes
67 -----
68 The `Query` class now implements essentially all of this class's
69 functionality; "QueryResult" classes like this one now exist only to
70 provide interface backwards compatibility and more specific iterator
71 types.
72 """
74 def __init__(self, query: Query):
75 self._query = query
77 __slots__ = ("_query",)
79 def __iter__(self) -> Iterator[DataCoordinate]:
80 return self._query.iter_data_ids()
82 def __repr__(self) -> str:
83 return f"<DataCoordinate iterator with dimensions={self.graph}>"
85 @property
86 def graph(self) -> DimensionGraph:
87 # Docstring inherited from DataCoordinateIterable.
88 return self._query.dimensions
90 def hasFull(self) -> bool:
91 # Docstring inherited from DataCoordinateIterable.
92 return True
94 def hasRecords(self) -> bool:
95 # Docstring inherited from DataCoordinateIterable.
96 return self._query.has_record_columns is True or not self.graph
98 @contextmanager
99 def materialize(self) -> Iterator[DataCoordinateQueryResults]:
100 """Insert this query's results into a temporary table.
102 Returns
103 -------
104 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ]
105 A context manager that ensures the temporary table is created and
106 populated in ``__enter__`` (returning a results object backed by
107 that table), and dropped in ``__exit__``. If ``self`` is already
108 materialized, the context manager may do nothing (reflecting the
109 fact that an outer context manager should already take care of
110 everything else).
112 Notes
113 -----
114 When using a very large result set to perform multiple queries (e.g.
115 multiple calls to `subset` with different arguments, or even a single
116 call to `expanded`), it may be much more efficient to start by
117 materializing the query and only then performing the follow up queries.
118 It may also be less efficient, depending on how well database engine's
119 query optimizer can simplify those particular follow-up queries and
120 how efficiently it caches query results even when the are not
121 explicitly inserted into a temporary table. See `expanded` and
122 `subset` for examples.
123 """
124 with self._query.open_context():
125 yield DataCoordinateQueryResults(self._query.materialized())
127 def expanded(self) -> DataCoordinateQueryResults:
128 """Return a results object for which `hasRecords` returns `True`.
130 This method may involve actually executing database queries to fetch
131 `DimensionRecord` objects.
133 Returns
134 -------
135 results : `DataCoordinateQueryResults`
136 A results object for which `hasRecords` returns `True`. May be
137 ``self`` if that is already the case.
139 Notes
140 -----
141 For very result sets, it may be much more efficient to call
142 `materialize` before calling `expanded`, to avoid performing the
143 original query multiple times (as a subquery) in the follow-up queries
144 that fetch dimension records. For example::
146 with registry.queryDataIds(...).materialize() as tempDataIds:
147 dataIdsWithRecords = tempDataIds.expanded()
148 for dataId in dataIdsWithRecords:
149 ...
150 """
151 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True))
153 def subset(
154 self, graph: DimensionGraph | None = None, *, unique: bool = False
155 ) -> DataCoordinateQueryResults:
156 """Return a results object containing a subset of the dimensions of
157 this one, and/or a unique near-subset of its rows.
159 This method may involve actually executing database queries to fetch
160 `DimensionRecord` objects.
162 Parameters
163 ----------
164 graph : `DimensionGraph`, optional
165 Dimensions to include in the new results object. If `None`,
166 ``self.graph`` is used.
167 unique : `bool`, optional
168 If `True` (`False` is default), the query should only return unique
169 data IDs. This is implemented in the database; to obtain unique
170 results via Python-side processing (which may be more efficient in
171 some cases), use `toSet` to construct a `DataCoordinateSet` from
172 this results object instead.
174 Returns
175 -------
176 results : `DataCoordinateQueryResults`
177 A results object corresponding to the given criteria. May be
178 ``self`` if it already qualifies.
180 Raises
181 ------
182 ValueError
183 Raised when ``graph`` is not a subset of the dimension graph in
184 this result.
186 Notes
187 -----
188 This method can only return a "near-subset" of the original result rows
189 in general because of subtleties in how spatial overlaps are
190 implemented; see `Query.projected` for more information.
192 When calling `subset` multiple times on the same very large result set,
193 it may be much more efficient to call `materialize` first. For
194 example::
196 dimensions1 = DimensionGraph(...)
197 dimensions2 = DimensionGraph(...)
198 with registry.queryDataIds(...).materialize() as tempDataIds:
199 for dataId1 in tempDataIds.subset(
200 graph=dimensions1,
201 unique=True):
202 ...
203 for dataId2 in tempDataIds.subset(
204 graph=dimensions2,
205 unique=True):
206 ...
207 """
208 if graph is None:
209 graph = self.graph
210 if not graph.issubset(self.graph):
211 raise ValueError(f"{graph} is not a subset of {self.graph}")
212 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True)
213 return DataCoordinateQueryResults(query)
215 def findDatasets(
216 self,
217 datasetType: Any,
218 collections: Any,
219 *,
220 findFirst: bool = True,
221 components: bool | None = None,
222 ) -> DatasetQueryResults:
223 """Find datasets using the data IDs identified by this query.
225 Parameters
226 ----------
227 datasetType : `DatasetType` or `str`
228 Dataset type or the name of one to search for. Must have
229 dimensions that are a subset of ``self.graph``.
230 collections : `Any`
231 An expression that fully or partially identifies the collections
232 to search for the dataset, such as a `str`, `re.Pattern`, or
233 iterable thereof. ``...`` can be used to return all collections.
234 See :ref:`daf_butler_collection_expressions` for more information.
235 findFirst : `bool`, optional
236 If `True` (default), for each result data ID, only yield one
237 `DatasetRef`, from the first collection in which a dataset of that
238 dataset type appears (according to the order of ``collections``
239 passed in). If `True`, ``collections`` must not contain regular
240 expressions and may not be ``...``.
241 components : `bool`, optional
242 If `True`, apply all expression patterns to component dataset type
243 names as well. If `False`, never apply patterns to components. If
244 `None` (default), apply patterns to components only if their parent
245 datasets were not matched by the expression. Fully-specified
246 component datasets (`str` or `DatasetType` instances) are always
247 included.
249 Values other than `False` are deprecated, and only `False` will be
250 supported after v26. After v27 this argument will be removed
251 entirely.
253 Returns
254 -------
255 datasets : `ParentDatasetQueryResults`
256 A lazy-evaluation object representing dataset query results,
257 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all
258 nested data IDs in those dataset references will have records as
259 well.
261 Raises
262 ------
263 MissingDatasetTypeError
264 Raised if the given dataset type is not registered.
265 """
266 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard(
267 datasetType, components=components, explicit_only=True
268 )
269 return ParentDatasetQueryResults(
270 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True),
271 parent_dataset_type,
272 components_found,
273 )
275 def findRelatedDatasets(
276 self,
277 datasetType: DatasetType | str,
278 collections: Any,
279 *,
280 findFirst: bool = True,
281 dimensions: DimensionGraph | None = None,
282 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
283 """Find datasets using the data IDs identified by this query, and
284 return them along with the original data IDs.
286 This is a variant of `findDatasets` that is often more useful when
287 the target dataset type does not have all of the dimensions of the
288 original data ID query, as is generally the case with calibration
289 lookups.
291 Parameters
292 ----------
293 datasetType : `DatasetType` or `str`
294 Dataset type or the name of one to search for. Must have
295 dimensions that are a subset of ``self.graph``.
296 collections : `Any`
297 An expression that fully or partially identifies the collections
298 to search for the dataset, such as a `str`, `re.Pattern`, or
299 iterable thereof. ``...`` can be used to return all collections.
300 See :ref:`daf_butler_collection_expressions` for more information.
301 findFirst : `bool`, optional
302 If `True` (default), for each data ID in ``self``, only yield one
303 `DatasetRef`, from the first collection in which a dataset of that
304 dataset type appears (according to the order of ``collections``
305 passed in). If `True`, ``collections`` must not contain regular
306 expressions and may not be ``...``. Note that this is not the
307 same as yielding one `DatasetRef` for each yielded data ID if
308 ``dimensions`` is not `None`.
309 dimensions : `DimensionGraph`, optional
310 The dimensions of the data IDs returned. Must be a subset of
311 ``self.dimensions``.
313 Returns
314 -------
315 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \
316 `DatasetRef` ] ]
317 An iterable of (data ID, dataset reference) pairs.
319 Raises
320 ------
321 MissingDatasetTypeError
322 Raised if the given dataset type is not registered.
323 """
324 if dimensions is None:
325 dimensions = self.graph
326 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard(
327 datasetType, components=False, explicit_only=True
328 )
329 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True)
330 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions)
332 def count(self, *, exact: bool = True, discard: bool = False) -> int:
333 """Count the number of rows this query would return.
335 Parameters
336 ----------
337 exact : `bool`, optional
338 If `True`, run the full query and perform post-query filtering if
339 needed to account for that filtering in the count. If `False`, the
340 result may be an upper bound.
341 discard : `bool`, optional
342 If `True`, compute the exact count even if it would require running
343 the full query and then throwing away the result rows after
344 counting them. If `False`, this is an error, as the user would
345 usually be better off executing the query first to fetch its rows
346 into a new query (or passing ``exact=False``). Ignored if
347 ``exact=False``.
349 Returns
350 -------
351 count : `int`
352 The number of rows the query would return, or an upper bound if
353 ``exact=False``.
355 Notes
356 -----
357 This counts the number of rows returned, not the number of unique rows
358 returned, so even with ``exact=True`` it may provide only an upper
359 bound on the number of *deduplicated* result rows.
360 """
361 return self._query.count(exact=exact, discard=discard)
363 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
364 """Test whether this query returns any results.
366 Parameters
367 ----------
368 execute : `bool`, optional
369 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
370 determined prior to execution that the query would return no rows.
371 exact : `bool`, optional
372 If `True`, run the full query and perform post-query filtering if
373 needed, until at least one result row is found. If `False`, the
374 returned result does not account for post-query filtering, and
375 hence may be `True` even when all result rows would be filtered
376 out.
378 Returns
379 -------
380 any : `bool`
381 `True` if the query would (or might, depending on arguments) yield
382 result rows. `False` if it definitely would not.
383 """
384 return self._query.any(execute=execute, exact=exact)
386 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
387 """Return human-readable messages that may help explain why the query
388 yields no results.
390 Parameters
391 ----------
392 execute : `bool`, optional
393 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
394 of aspects of the tree to more precisely determine where rows were
395 filtered out.
397 Returns
398 -------
399 messages : `~collections.abc.Iterable` [ `str` ]
400 String messages that describe reasons the query might not yield any
401 results.
402 """
403 return self._query.explain_no_results(execute=execute)
405 def order_by(self, *args: str) -> DataCoordinateQueryResults:
406 """Make the iterator return ordered results.
408 Parameters
409 ----------
410 *args : `str`
411 Names of the columns/dimensions to use for ordering. Column name
412 can be prefixed with minus (``-``) to use descending ordering.
414 Returns
415 -------
416 result : `DataCoordinateQueryResults`
417 Returns ``self`` instance which is updated to return ordered
418 result.
420 Notes
421 -----
422 This method modifies the iterator in place and returns the same
423 instance to support method chaining.
424 """
425 clause = OrderByClause.parse_general(args, self._query.dimensions)
426 self._query = self._query.sorted(clause.terms, defer=True)
427 return self
429 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
430 """Make the iterator return limited number of records.
432 Parameters
433 ----------
434 limit : `int`
435 Upper limit on the number of returned records.
436 offset : `int` or `None`, optional
437 The number of records to skip before returning at most ``limit``
438 records. `None` is interpreted the same as zero for backwards
439 compatibility.
441 Returns
442 -------
443 result : `DataCoordinateQueryResults`
444 Returns ``self`` instance which is updated to return limited set
445 of records.
447 Notes
448 -----
449 This method modifies the iterator in place and returns the same
450 instance to support method chaining. Normally this method is used
451 together with `order_by` method.
452 """
453 if offset is None:
454 offset = 0
455 self._query = self._query.sliced(offset, offset + limit, defer=True)
456 return self
459class DatasetQueryResults(Iterable[DatasetRef]):
460 """An interface for objects that represent the results of queries for
461 datasets.
462 """
464 @abstractmethod
465 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
466 """Group results by parent dataset type.
468 Returns
469 -------
470 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ]
471 An iterator over `DatasetQueryResults` instances that are each
472 responsible for a single parent dataset type (either just that
473 dataset type, one or more of its component dataset types, or both).
474 """
475 raise NotImplementedError()
477 @abstractmethod
478 def materialize(self) -> AbstractContextManager[DatasetQueryResults]:
479 """Insert this query's results into a temporary table.
481 Returns
482 -------
483 context : `typing.ContextManager` [ `DatasetQueryResults` ]
484 A context manager that ensures the temporary table is created and
485 populated in ``__enter__`` (returning a results object backed by
486 that table), and dropped in ``__exit__``. If ``self`` is already
487 materialized, the context manager may do nothing (reflecting the
488 fact that an outer context manager should already take care of
489 everything else).
490 """
491 raise NotImplementedError()
493 @abstractmethod
494 def expanded(self) -> DatasetQueryResults:
495 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords`
496 returns `True` for all data IDs in returned `DatasetRef` objects.
498 Returns
499 -------
500 expanded : `DatasetQueryResults`
501 Either a new `DatasetQueryResults` instance or ``self``, if it is
502 already expanded.
504 Notes
505 -----
506 As with `DataCoordinateQueryResults.expanded`, it may be more efficient
507 to call `materialize` before expanding data IDs for very large result
508 sets.
509 """
510 raise NotImplementedError()
512 @abstractmethod
513 def count(self, *, exact: bool = True, discard: bool = False) -> int:
514 """Count the number of rows this query would return.
516 Parameters
517 ----------
518 exact : `bool`, optional
519 If `True`, run the full query and perform post-query filtering if
520 needed to account for that filtering in the count. If `False`, the
521 result may be an upper bound.
522 discard : `bool`, optional
523 If `True`, compute the exact count even if it would require running
524 the full query and then throwing away the result rows after
525 counting them. If `False`, this is an error, as the user would
526 usually be better off executing the query first to fetch its rows
527 into a new query (or passing ``exact=False``). Ignored if
528 ``exact=False``.
530 Returns
531 -------
532 count : `int`
533 The number of rows the query would return, or an upper bound if
534 ``exact=False``.
536 Notes
537 -----
538 This counts the number of rows returned, not the number of unique rows
539 returned, so even with ``exact=True`` it may provide only an upper
540 bound on the number of *deduplicated* result rows.
541 """
542 raise NotImplementedError()
544 @abstractmethod
545 def any(
546 self,
547 *,
548 execute: bool = True,
549 exact: bool = True,
550 ) -> bool:
551 """Test whether this query returns any results.
553 Parameters
554 ----------
555 execute : `bool`, optional
556 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
557 determined prior to execution that the query would return no rows.
558 exact : `bool`, optional
559 If `True`, run the full query and perform post-query filtering if
560 needed, until at least one result row is found. If `False`, the
561 returned result does not account for post-query filtering, and
562 hence may be `True` even when all result rows would be filtered
563 out.
565 Returns
566 -------
567 any : `bool`
568 `True` if the query would (or might, depending on arguments) yield
569 result rows. `False` if it definitely would not.
570 """
571 raise NotImplementedError()
573 @abstractmethod
574 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
575 """Return human-readable messages that may help explain why the query
576 yields no results.
578 Parameters
579 ----------
580 execute : `bool`, optional
581 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
582 of aspects of the tree to more precisely determine where rows were
583 filtered out.
585 Returns
586 -------
587 messages : `~collections.abc.Iterable` [ `str` ]
588 String messages that describe reasons the query might not yield any
589 results.
590 """
591 raise NotImplementedError()
593 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]:
594 """Group results by dataset type.
596 This is a private hook for the interface defined by
597 `DatasetRef.iter_by_type`, enabling much more efficient
598 processing of heterogeneous `DatasetRef` iterables when they come
599 directly from queries.
600 """
601 for parent_results in self.byParentDatasetType():
602 for component in parent_results.components:
603 dataset_type = parent_results.parentDatasetType
604 if component is not None:
605 dataset_type = dataset_type.makeComponentDatasetType(component)
606 yield (dataset_type, parent_results.withComponents((component,)))
609class ParentDatasetQueryResults(DatasetQueryResults):
610 """An object that represents results from a query for datasets with a
611 single parent `DatasetType`.
613 Parameters
614 ----------
615 query : `Query`
616 Low-level query object that backs these results.
617 dataset_type : `DatasetType`
618 Parent dataset type for all datasets returned by this query.
619 components : `~collections.abc.Sequence` [ `str` or `None` ], optional
620 Names of components to include in iteration. `None` may be included
621 (at most once) to include the parent dataset type.
623 Notes
624 -----
625 The `Query` class now implements essentially all of this class's
626 functionality; "QueryResult" classes like this one now exist only to
627 provide interface backwards compatibility and more specific iterator
628 types.
629 """
631 def __init__(
632 self,
633 query: Query,
634 dataset_type: DatasetType,
635 components: Sequence[str | None] = (None,),
636 ):
637 self._query = query
638 self._dataset_type = dataset_type
639 self._components = components
641 __slots__ = ("_query", "_dataset_type", "_components")
643 def __iter__(self) -> Iterator[DatasetRef]:
644 return self._query.iter_dataset_refs(self._dataset_type, self._components)
646 def __repr__(self) -> str:
647 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>"
649 @property
650 def components(self) -> Sequence[str | None]:
651 """The components of the parent dataset type included in these results
652 (`~collections.abc.Sequence` [ `str` or `None` ]).
653 """
654 return self._components
656 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
657 # Docstring inherited from DatasetQueryResults.
658 yield self
660 @contextmanager
661 def materialize(self) -> Iterator[ParentDatasetQueryResults]:
662 # Docstring inherited from DatasetQueryResults.
663 with self._query.open_context():
664 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components)
666 @property
667 def parentDatasetType(self) -> DatasetType:
668 """The parent dataset type for all datasets in this iterable
669 (`DatasetType`).
670 """
671 return self._dataset_type
673 @property
674 def dataIds(self) -> DataCoordinateQueryResults:
675 """A lazy-evaluation object representing a query for just the data
676 IDs of the datasets that would be returned by this query
677 (`DataCoordinateQueryResults`).
679 The returned object is not in general `zip`-iterable with ``self``;
680 it may be in a different order or have (or not have) duplicates.
681 """
682 return DataCoordinateQueryResults(self._query.projected(defer=True))
684 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults:
685 """Return a new query results object for the same parent datasets but
686 different components.
688 components : `~collections.abc.Sequence` [ `str` or `None` ]
689 Names of components to include in iteration. `None` may be
690 included (at most once) to include the parent dataset type.
691 """
692 return ParentDatasetQueryResults(self._query, self._dataset_type, components)
694 def expanded(self) -> ParentDatasetQueryResults:
695 # Docstring inherited from DatasetQueryResults.
696 return ParentDatasetQueryResults(
697 self._query.with_record_columns(defer=True), self._dataset_type, self._components
698 )
700 def count(self, *, exact: bool = True, discard: bool = False) -> int:
701 # Docstring inherited.
702 return len(self._components) * self._query.count(exact=exact, discard=discard)
704 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
705 # Docstring inherited.
706 return self._query.any(execute=execute, exact=exact)
708 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
709 # Docstring inherited.
710 return self._query.explain_no_results(execute=execute)
713class ChainedDatasetQueryResults(DatasetQueryResults):
714 """A `DatasetQueryResults` implementation that simply chains together
715 other results objects, each for a different parent dataset type.
717 Parameters
718 ----------
719 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ]
720 The underlying results objects this object will chain together.
721 doomed_by : `~collections.abc.Iterable` [ `str` ], optional
722 A list of messages (appropriate for e.g. logging or exceptions) that
723 explain why the query is known to return no results even before it is
724 executed. Queries with a non-empty list will never be executed.
725 Child results objects may also have their own list.
726 """
728 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()):
729 self._chain = chain
730 self._doomed_by = tuple(doomed_by)
732 __slots__ = ("_chain",)
734 def __iter__(self) -> Iterator[DatasetRef]:
735 return itertools.chain.from_iterable(self._chain)
737 def __repr__(self) -> str:
738 return "<DatasetRef iterator for multiple dataset types>"
740 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
741 # Docstring inherited from DatasetQueryResults.
742 return iter(self._chain)
744 @contextmanager
745 def materialize(self) -> Iterator[ChainedDatasetQueryResults]:
746 # Docstring inherited from DatasetQueryResults.
747 with ExitStack() as stack:
748 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain])
750 def expanded(self) -> ChainedDatasetQueryResults:
751 # Docstring inherited from DatasetQueryResults.
752 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by)
754 def count(self, *, exact: bool = True, discard: bool = False) -> int:
755 # Docstring inherited.
756 return sum(r.count(exact=exact, discard=discard) for r in self._chain)
758 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
759 # Docstring inherited.
760 return any(r.any(execute=execute, exact=exact) for r in self._chain)
762 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
763 # Docstring inherited.
764 result = list(self._doomed_by)
765 for r in self._chain:
766 result.extend(r.explain_no_results(execute=execute))
767 return result
770class DimensionRecordQueryResults(Iterable[DimensionRecord]):
771 """An interface for objects that represent the results of queries for
772 dimension records.
773 """
775 @property
776 @abstractmethod
777 def element(self) -> DimensionElement:
778 raise NotImplementedError()
780 @abstractmethod
781 def run(self) -> DimensionRecordQueryResults:
782 raise NotImplementedError()
784 @abstractmethod
785 def count(self, *, exact: bool = True, discard: bool = False) -> int:
786 """Count the number of rows this query would return.
788 Parameters
789 ----------
790 exact : `bool`, optional
791 If `True`, run the full query and perform post-query filtering if
792 needed to account for that filtering in the count. If `False`, the
793 result may be an upper bound.
794 discard : `bool`, optional
795 If `True`, compute the exact count even if it would require running
796 the full query and then throwing away the result rows after
797 counting them. If `False`, this is an error, as the user would
798 usually be better off executing the query first to fetch its rows
799 into a new query (or passing ``exact=False``). Ignored if
800 ``exact=False``.
802 Returns
803 -------
804 count : `int`
805 The number of rows the query would return, or an upper bound if
806 ``exact=False``.
808 Notes
809 -----
810 This counts the number of rows returned, not the number of unique rows
811 returned, so even with ``exact=True`` it may provide only an upper
812 bound on the number of *deduplicated* result rows.
813 """
814 raise NotImplementedError()
816 @abstractmethod
817 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
818 """Test whether this query returns any results.
820 Parameters
821 ----------
822 execute : `bool`, optional
823 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
824 determined prior to execution that the query would return no rows.
825 exact : `bool`, optional
826 If `True`, run the full query and perform post-query filtering if
827 needed, until at least one result row is found. If `False`, the
828 returned result does not account for post-query filtering, and
829 hence may be `True` even when all result rows would be filtered
830 out.
832 Returns
833 -------
834 any : `bool`
835 `True` if the query would (or might, depending on arguments) yield
836 result rows. `False` if it definitely would not.
837 """
838 raise NotImplementedError()
840 @abstractmethod
841 def order_by(self, *args: str) -> DimensionRecordQueryResults:
842 """Make the iterator return ordered result.
844 Parameters
845 ----------
846 *args : `str`
847 Names of the columns/dimensions to use for ordering. Column name
848 can be prefixed with minus (``-``) to use descending ordering.
850 Returns
851 -------
852 result : `DimensionRecordQueryResults`
853 Returns ``self`` instance which is updated to return ordered
854 result.
856 Notes
857 -----
858 This method can modify the iterator in place and return the same
859 instance.
860 """
861 raise NotImplementedError()
863 @abstractmethod
864 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
865 """Make the iterator return limited number of records.
867 Parameters
868 ----------
869 limit : `int`
870 Upper limit on the number of returned records.
871 offset : `int` or `None`
872 The number of records to skip before returning at most ``limit``
873 records. `None` is interpreted the same as zero for backwards
874 compatibility.
876 Returns
877 -------
878 result : `DimensionRecordQueryResults`
879 Returns ``self`` instance which is updated to return limited set of
880 records.
882 Notes
883 -----
884 This method can modify the iterator in place and return the same
885 instance. Normally this method is used together with `order_by` method.
886 """
887 raise NotImplementedError()
889 @abstractmethod
890 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
891 """Return human-readable messages that may help explain why the query
892 yields no results.
894 Parameters
895 ----------
896 execute : `bool`, optional
897 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
898 of aspects of the tree to more precisely determine where rows were
899 filtered out.
901 Returns
902 -------
903 messages : `~collections.abc.Iterable` [ `str` ]
904 String messages that describe reasons the query might not yield any
905 results.
906 """
907 raise NotImplementedError()
910class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults):
911 """Implementation of DimensionRecordQueryResults using database query.
913 Parameters
914 ----------
915 query : `Query`
916 Query object that backs this class.
917 element : `DimensionElement`
918 Element whose records this object returns.
920 Notes
921 -----
922 The `Query` class now implements essentially all of this class's
923 functionality; "QueryResult" classes like this one now exist only to
924 provide interface backwards compatibility and more specific iterator
925 types.
926 """
928 def __init__(self, query: Query, element: DimensionElement):
929 self._query = query
930 self._element = element
932 @property
933 def element(self) -> DimensionElement:
934 return self._element
936 def __iter__(self) -> Iterator[DimensionRecord]:
937 return self._query.iter_dimension_records(self._element)
939 def run(self) -> DimensionRecordQueryResults:
940 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element)
942 def count(self, *, exact: bool = True, discard: bool = False) -> int:
943 # Docstring inherited from base class.
944 return self._query.count(exact=exact)
946 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
947 # Docstring inherited from base class.
948 return self._query.any(execute=execute, exact=exact)
950 def order_by(self, *args: str) -> DimensionRecordQueryResults:
951 # Docstring inherited from base class.
952 clause = OrderByClause.parse_element(args, self._element)
953 self._query = self._query.sorted(clause.terms, defer=True)
954 return self
956 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
957 # Docstring inherited from base class.
958 if offset is None:
959 offset = 0
960 self._query = self._query.sliced(offset, offset + limit, defer=True)
961 return self
963 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
964 # Docstring inherited.
965 return self._query.explain_no_results(execute=execute)