Coverage for python/lsst/daf/butler/registry/queries/_results.py: 57%
201 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "ChainedDatasetQueryResults",
31 "DatabaseDimensionRecordQueryResults",
32 "DataCoordinateQueryResults",
33 "DatasetQueryResults",
34 "DimensionRecordQueryResults",
35 "ParentDatasetQueryResults",
36)
38import itertools
39from abc import abstractmethod
40from collections.abc import Iterable, Iterator, Sequence
41from contextlib import AbstractContextManager, ExitStack, contextmanager
42from typing import Any
44from deprecated.sphinx import deprecated
46from ..._dataset_ref import DatasetRef
47from ..._dataset_type import DatasetType
48from ..._exceptions_legacy import DatasetTypeError
49from ...dimensions import (
50 DataCoordinate,
51 DataCoordinateIterable,
52 DimensionElement,
53 DimensionGraph,
54 DimensionGroup,
55 DimensionRecord,
56)
57from ._query import Query
58from ._structs import OrderByClause
61class DataCoordinateQueryResults(DataCoordinateIterable):
62 """An enhanced implementation of `DataCoordinateIterable` that represents
63 data IDs retrieved from a database query.
65 Parameters
66 ----------
67 query : `Query`
68 Query object that backs this class.
70 Notes
71 -----
72 The `Query` class now implements essentially all of this class's
73 functionality; "QueryResult" classes like this one now exist only to
74 provide interface backwards compatibility and more specific iterator
75 types.
76 """
78 def __init__(self, query: Query):
79 self._query = query
81 __slots__ = ("_query",)
83 def __iter__(self) -> Iterator[DataCoordinate]:
84 return self._query.iter_data_ids()
86 def __repr__(self) -> str:
87 return f"<DataCoordinate iterator with dimensions={self.graph}>"
89 @property
90 @deprecated(
91 "Deprecated in favor of .dimensions. Will be removed after v27.",
92 version="v27",
93 category=FutureWarning,
94 )
95 def graph(self) -> DimensionGraph:
96 # Docstring inherited from DataCoordinateIterable.
97 return self._query.dimensions._as_graph()
99 @property
100 def dimensions(self) -> DimensionGroup:
101 """The dimensions of the data IDs returned by this query."""
102 return self._query.dimensions
104 def hasFull(self) -> bool:
105 # Docstring inherited from DataCoordinateIterable.
106 return True
108 def hasRecords(self) -> bool:
109 # Docstring inherited from DataCoordinateIterable.
110 return self._query.has_record_columns is True or not self.dimensions
112 @contextmanager
113 def materialize(self) -> Iterator[DataCoordinateQueryResults]:
114 """Insert this query's results into a temporary table.
116 Returns
117 -------
118 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ]
119 A context manager that ensures the temporary table is created and
120 populated in ``__enter__`` (returning a results object backed by
121 that table), and dropped in ``__exit__``. If ``self`` is already
122 materialized, the context manager may do nothing (reflecting the
123 fact that an outer context manager should already take care of
124 everything else).
126 Notes
127 -----
128 When using a very large result set to perform multiple queries (e.g.
129 multiple calls to `subset` with different arguments, or even a single
130 call to `expanded`), it may be much more efficient to start by
131 materializing the query and only then performing the follow up queries.
132 It may also be less efficient, depending on how well database engine's
133 query optimizer can simplify those particular follow-up queries and
134 how efficiently it caches query results even when the are not
135 explicitly inserted into a temporary table. See `expanded` and
136 `subset` for examples.
137 """
138 with self._query.open_context():
139 yield DataCoordinateQueryResults(self._query.materialized())
141 def expanded(self) -> DataCoordinateQueryResults:
142 """Return a results object for which `hasRecords` returns `True`.
144 This method may involve actually executing database queries to fetch
145 `DimensionRecord` objects.
147 Returns
148 -------
149 results : `DataCoordinateQueryResults`
150 A results object for which `hasRecords` returns `True`. May be
151 ``self`` if that is already the case.
153 Notes
154 -----
155 For very result sets, it may be much more efficient to call
156 `materialize` before calling `expanded`, to avoid performing the
157 original query multiple times (as a subquery) in the follow-up queries
158 that fetch dimension records. For example::
160 with registry.queryDataIds(...).materialize() as tempDataIds:
161 dataIdsWithRecords = tempDataIds.expanded()
162 for dataId in dataIdsWithRecords:
163 ...
164 """
165 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True))
167 def subset(
168 self,
169 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None,
170 *,
171 unique: bool = False,
172 ) -> DataCoordinateQueryResults:
173 """Return a results object containing a subset of the dimensions of
174 this one, and/or a unique near-subset of its rows.
176 This method may involve actually executing database queries to fetch
177 `DimensionRecord` objects.
179 Parameters
180 ----------
181 dimensions : `DimensionGroup`, `DimensionGraph`, or \
182 `~collections.abc.Iterable` [ `str`], optional
183 Dimensions to include in the new results object. If `None`,
184 ``self.dimensions`` is used.
185 unique : `bool`, optional
186 If `True` (`False` is default), the query should only return unique
187 data IDs. This is implemented in the database; to obtain unique
188 results via Python-side processing (which may be more efficient in
189 some cases), use `toSet` to construct a `DataCoordinateSet` from
190 this results object instead.
192 Returns
193 -------
194 results : `DataCoordinateQueryResults`
195 A results object corresponding to the given criteria. May be
196 ``self`` if it already qualifies.
198 Raises
199 ------
200 ValueError
201 Raised when ``dimensions`` is not a subset of the dimensions in
202 this result.
204 Notes
205 -----
206 This method can only return a "near-subset" of the original result rows
207 in general because of subtleties in how spatial overlaps are
208 implemented; see `Query.projected` for more information.
210 When calling `subset` multiple times on the same very large result set,
211 it may be much more efficient to call `materialize` first. For
212 example::
214 dimensions1 = DimensionGroup(...)
215 dimensions2 = DimensionGroup(...)
216 with registry.queryDataIds(...).materialize() as tempDataIds:
217 for dataId1 in tempDataIds.subset(dimensions1, unique=True):
218 ...
219 for dataId2 in tempDataIds.subset(dimensions2, unique=True):
220 ...
221 """
222 if dimensions is None:
223 dimensions = self.dimensions
224 else:
225 dimensions = self.dimensions.universe.conform(dimensions)
226 if not dimensions.issubset(self.dimensions):
227 raise ValueError(f"{dimensions} is not a subset of {self.dimensions}")
228 query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True)
229 return DataCoordinateQueryResults(query)
231 def findDatasets(
232 self,
233 datasetType: DatasetType | str,
234 collections: Any,
235 *,
236 findFirst: bool = True,
237 components: bool = False,
238 ) -> ParentDatasetQueryResults:
239 """Find datasets using the data IDs identified by this query.
241 Parameters
242 ----------
243 datasetType : `DatasetType` or `str`
244 Dataset type or the name of one to search for. Must have
245 dimensions that are a subset of ``self.graph``.
246 collections : `Any`
247 An expression that fully or partially identifies the collections
248 to search for the dataset, such as a `str`, `re.Pattern`, or
249 iterable thereof. ``...`` can be used to return all collections.
250 See :ref:`daf_butler_collection_expressions` for more information.
251 findFirst : `bool`, optional
252 If `True` (default), for each result data ID, only yield one
253 `DatasetRef`, from the first collection in which a dataset of that
254 dataset type appears (according to the order of ``collections``
255 passed in). If `True`, ``collections`` must not contain regular
256 expressions and may not be ``...``.
257 components : `bool`, optional
258 Must be `False`. Provided only for backwards compatibility. After
259 v27 this argument will be removed entirely.
261 Returns
262 -------
263 datasets : `ParentDatasetQueryResults`
264 A lazy-evaluation object representing dataset query results,
265 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all
266 nested data IDs in those dataset references will have records as
267 well.
269 Raises
270 ------
271 MissingDatasetTypeError
272 Raised if the given dataset type is not registered.
273 """
274 if components is not False:
275 raise DatasetTypeError(
276 "Dataset component queries are no longer supported by Registry. Use "
277 "DatasetType methods to obtain components from parent dataset types instead."
278 )
279 resolved_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard(
280 datasetType, explicit_only=True
281 )
282 return ParentDatasetQueryResults(
283 self._query.find_datasets(resolved_dataset_type, collections, find_first=findFirst, defer=True),
284 resolved_dataset_type,
285 [None],
286 )
288 def findRelatedDatasets(
289 self,
290 datasetType: DatasetType | str,
291 collections: Any,
292 *,
293 findFirst: bool = True,
294 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None,
295 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
296 """Find datasets using the data IDs identified by this query, and
297 return them along with the original data IDs.
299 This is a variant of `findDatasets` that is often more useful when
300 the target dataset type does not have all of the dimensions of the
301 original data ID query, as is generally the case with calibration
302 lookups.
304 Parameters
305 ----------
306 datasetType : `DatasetType` or `str`
307 Dataset type or the name of one to search for. Must have
308 dimensions that are a subset of ``self.graph``.
309 collections : `Any`
310 An expression that fully or partially identifies the collections
311 to search for the dataset, such as a `str`, `re.Pattern`, or
312 iterable thereof. ``...`` can be used to return all collections.
313 See :ref:`daf_butler_collection_expressions` for more information.
314 findFirst : `bool`, optional
315 If `True` (default), for each data ID in ``self``, only yield one
316 `DatasetRef`, from the first collection in which a dataset of that
317 dataset type appears (according to the order of ``collections``
318 passed in). If `True`, ``collections`` must not contain regular
319 expressions and may not be ``...``. Note that this is not the
320 same as yielding one `DatasetRef` for each yielded data ID if
321 ``dimensions`` is not `None`.
322 dimensions : `DimensionGroup`, `DimensionGraph`, or \
323 `~collections.abc.Iterable` [ `str` ], optional
324 The dimensions of the data IDs returned. Must be a subset of
325 ``self.dimensions``.
327 Returns
328 -------
329 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \
330 `DatasetRef` ] ]
331 An iterable of (data ID, dataset reference) pairs.
333 Raises
334 ------
335 MissingDatasetTypeError
336 Raised if the given dataset type is not registered.
337 """
338 if dimensions is None:
339 dimensions = self.dimensions
340 else:
341 dimensions = self.universe.conform(dimensions)
342 parent_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard(
343 datasetType, explicit_only=True
344 )
345 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True)
346 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions)
348 def count(self, *, exact: bool = True, discard: bool = False) -> int:
349 """Count the number of rows this query would return.
351 Parameters
352 ----------
353 exact : `bool`, optional
354 If `True`, run the full query and perform post-query filtering if
355 needed to account for that filtering in the count. If `False`, the
356 result may be an upper bound.
357 discard : `bool`, optional
358 If `True`, compute the exact count even if it would require running
359 the full query and then throwing away the result rows after
360 counting them. If `False`, this is an error, as the user would
361 usually be better off executing the query first to fetch its rows
362 into a new query (or passing ``exact=False``). Ignored if
363 ``exact=False``.
365 Returns
366 -------
367 count : `int`
368 The number of rows the query would return, or an upper bound if
369 ``exact=False``.
371 Notes
372 -----
373 This counts the number of rows returned, not the number of unique rows
374 returned, so even with ``exact=True`` it may provide only an upper
375 bound on the number of *deduplicated* result rows.
376 """
377 return self._query.count(exact=exact, discard=discard)
379 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
380 """Test whether this query returns any results.
382 Parameters
383 ----------
384 execute : `bool`, optional
385 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
386 determined prior to execution that the query would return no rows.
387 exact : `bool`, optional
388 If `True`, run the full query and perform post-query filtering if
389 needed, until at least one result row is found. If `False`, the
390 returned result does not account for post-query filtering, and
391 hence may be `True` even when all result rows would be filtered
392 out.
394 Returns
395 -------
396 any : `bool`
397 `True` if the query would (or might, depending on arguments) yield
398 result rows. `False` if it definitely would not.
399 """
400 return self._query.any(execute=execute, exact=exact)
402 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
403 """Return human-readable messages that may help explain why the query
404 yields no results.
406 Parameters
407 ----------
408 execute : `bool`, optional
409 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
410 of aspects of the tree to more precisely determine where rows were
411 filtered out.
413 Returns
414 -------
415 messages : `~collections.abc.Iterable` [ `str` ]
416 String messages that describe reasons the query might not yield any
417 results.
418 """
419 return self._query.explain_no_results(execute=execute)
421 def order_by(self, *args: str) -> DataCoordinateQueryResults:
422 """Make the iterator return ordered results.
424 Parameters
425 ----------
426 *args : `str`
427 Names of the columns/dimensions to use for ordering. Column name
428 can be prefixed with minus (``-``) to use descending ordering.
430 Returns
431 -------
432 result : `DataCoordinateQueryResults`
433 Returns ``self`` instance which is updated to return ordered
434 result.
436 Notes
437 -----
438 This method modifies the iterator in place and returns the same
439 instance to support method chaining.
440 """
441 clause = OrderByClause.parse_general(args, self._query.dimensions)
442 self._query = self._query.sorted(clause.terms, defer=True)
443 return self
445 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
446 """Make the iterator return limited number of records.
448 Parameters
449 ----------
450 limit : `int`
451 Upper limit on the number of returned records.
452 offset : `int` or `None`, optional
453 The number of records to skip before returning at most ``limit``
454 records. `None` is interpreted the same as zero for backwards
455 compatibility.
457 Returns
458 -------
459 result : `DataCoordinateQueryResults`
460 Returns ``self`` instance which is updated to return limited set
461 of records.
463 Notes
464 -----
465 This method modifies the iterator in place and returns the same
466 instance to support method chaining. Normally this method is used
467 together with `order_by` method.
468 """
469 if offset is None:
470 offset = 0
471 self._query = self._query.sliced(offset, offset + limit, defer=True)
472 return self
475class DatasetQueryResults(Iterable[DatasetRef]):
476 """An interface for objects that represent the results of queries for
477 datasets.
478 """
480 @abstractmethod
481 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
482 """Group results by parent dataset type.
484 Returns
485 -------
486 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ]
487 An iterator over `DatasetQueryResults` instances that are each
488 responsible for a single parent dataset type (either just that
489 dataset type, one or more of its component dataset types, or both).
490 """
491 raise NotImplementedError()
493 @abstractmethod
494 def materialize(self) -> AbstractContextManager[DatasetQueryResults]:
495 """Insert this query's results into a temporary table.
497 Returns
498 -------
499 context : `typing.ContextManager` [ `DatasetQueryResults` ]
500 A context manager that ensures the temporary table is created and
501 populated in ``__enter__`` (returning a results object backed by
502 that table), and dropped in ``__exit__``. If ``self`` is already
503 materialized, the context manager may do nothing (reflecting the
504 fact that an outer context manager should already take care of
505 everything else).
506 """
507 raise NotImplementedError()
509 @abstractmethod
510 def expanded(self) -> DatasetQueryResults:
511 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords`
512 returns `True` for all data IDs in returned `DatasetRef` objects.
514 Returns
515 -------
516 expanded : `DatasetQueryResults`
517 Either a new `DatasetQueryResults` instance or ``self``, if it is
518 already expanded.
520 Notes
521 -----
522 As with `DataCoordinateQueryResults.expanded`, it may be more efficient
523 to call `materialize` before expanding data IDs for very large result
524 sets.
525 """
526 raise NotImplementedError()
528 @abstractmethod
529 def count(self, *, exact: bool = True, discard: bool = False) -> int:
530 """Count the number of rows this query would return.
532 Parameters
533 ----------
534 exact : `bool`, optional
535 If `True`, run the full query and perform post-query filtering if
536 needed to account for that filtering in the count. If `False`, the
537 result may be an upper bound.
538 discard : `bool`, optional
539 If `True`, compute the exact count even if it would require running
540 the full query and then throwing away the result rows after
541 counting them. If `False`, this is an error, as the user would
542 usually be better off executing the query first to fetch its rows
543 into a new query (or passing ``exact=False``). Ignored if
544 ``exact=False``.
546 Returns
547 -------
548 count : `int`
549 The number of rows the query would return, or an upper bound if
550 ``exact=False``.
552 Notes
553 -----
554 This counts the number of rows returned, not the number of unique rows
555 returned, so even with ``exact=True`` it may provide only an upper
556 bound on the number of *deduplicated* result rows.
557 """
558 raise NotImplementedError()
560 @abstractmethod
561 def any(
562 self,
563 *,
564 execute: bool = True,
565 exact: bool = True,
566 ) -> bool:
567 """Test whether this query returns any results.
569 Parameters
570 ----------
571 execute : `bool`, optional
572 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
573 determined prior to execution that the query would return no rows.
574 exact : `bool`, optional
575 If `True`, run the full query and perform post-query filtering if
576 needed, until at least one result row is found. If `False`, the
577 returned result does not account for post-query filtering, and
578 hence may be `True` even when all result rows would be filtered
579 out.
581 Returns
582 -------
583 any : `bool`
584 `True` if the query would (or might, depending on arguments) yield
585 result rows. `False` if it definitely would not.
586 """
587 raise NotImplementedError()
589 @abstractmethod
590 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
591 """Return human-readable messages that may help explain why the query
592 yields no results.
594 Parameters
595 ----------
596 execute : `bool`, optional
597 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
598 of aspects of the tree to more precisely determine where rows were
599 filtered out.
601 Returns
602 -------
603 messages : `~collections.abc.Iterable` [ `str` ]
604 String messages that describe reasons the query might not yield any
605 results.
606 """
607 raise NotImplementedError()
609 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]:
610 """Group results by dataset type.
612 This is a private hook for the interface defined by
613 `DatasetRef.iter_by_type`, enabling much more efficient
614 processing of heterogeneous `DatasetRef` iterables when they come
615 directly from queries.
616 """
617 for parent_results in self.byParentDatasetType():
618 for component in parent_results._components:
619 dataset_type = parent_results.parentDatasetType
620 if component is not None:
621 dataset_type = dataset_type.makeComponentDatasetType(component)
622 if tuple(parent_results._components) == (component,):
623 # Usual case, and in the future (after component support
624 # has been fully removed) the only case.
625 yield dataset_type, parent_results
626 else:
627 # General case that emits a deprecation warning.
628 yield (dataset_type, parent_results.withComponents((component,)))
631class ParentDatasetQueryResults(DatasetQueryResults):
632 """An object that represents results from a query for datasets with a
633 single parent `DatasetType`.
635 Parameters
636 ----------
637 query : `Query`
638 Low-level query object that backs these results.
639 dataset_type : `DatasetType`
640 Parent dataset type for all datasets returned by this query.
641 components : `~collections.abc.Sequence` [ `str` or `None` ], optional
642 Names of components to include in iteration. `None` may be included
643 (at most once) to include the parent dataset type.
645 Notes
646 -----
647 The `Query` class now implements essentially all of this class's
648 functionality; "QueryResult" classes like this one now exist only to
649 provide interface backwards compatibility and more specific iterator
650 types.
651 """
653 def __init__(
654 self,
655 query: Query,
656 dataset_type: DatasetType,
657 components: Sequence[str | None] = (None,),
658 ):
659 self._query = query
660 self._dataset_type = dataset_type
661 self._components = components
663 __slots__ = ("_query", "_dataset_type", "_components")
665 def __iter__(self) -> Iterator[DatasetRef]:
666 return self._query.iter_dataset_refs(self._dataset_type, self._components)
668 def __repr__(self) -> str:
669 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>"
671 @property
672 @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning)
673 def components(self) -> Sequence[str | None]:
674 """The components of the parent dataset type included in these results
675 (`~collections.abc.Sequence` [ `str` or `None` ]).
676 """
677 return self._components
679 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
680 # Docstring inherited from DatasetQueryResults.
681 yield self
683 @contextmanager
684 def materialize(self) -> Iterator[ParentDatasetQueryResults]:
685 # Docstring inherited from DatasetQueryResults.
686 with self._query.open_context():
687 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components)
689 @property
690 def parentDatasetType(self) -> DatasetType:
691 """The parent dataset type for all datasets in this iterable
692 (`DatasetType`).
693 """
694 return self._dataset_type
696 @property
697 def dataIds(self) -> DataCoordinateQueryResults:
698 """A lazy-evaluation object representing a query for just the data
699 IDs of the datasets that would be returned by this query
700 (`DataCoordinateQueryResults`).
702 The returned object is not in general `zip`-iterable with ``self``;
703 it may be in a different order or have (or not have) duplicates.
704 """
705 return DataCoordinateQueryResults(self._query.projected(defer=True))
707 @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning)
708 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults:
709 """Return a new query results object for the same parent datasets but
710 different components.
712 Parameters
713 ----------
714 components : `~collections.abc.Sequence` [ `str` or `None` ]
715 Names of components to include in iteration. `None` may be
716 included (at most once) to include the parent dataset type.
717 """
718 return ParentDatasetQueryResults(self._query, self._dataset_type, components)
720 def expanded(self) -> ParentDatasetQueryResults:
721 # Docstring inherited from DatasetQueryResults.
722 return ParentDatasetQueryResults(
723 self._query.with_record_columns(defer=True), self._dataset_type, self._components
724 )
726 def count(self, *, exact: bool = True, discard: bool = False) -> int:
727 # Docstring inherited.
728 return len(self._components) * self._query.count(exact=exact, discard=discard)
730 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
731 # Docstring inherited.
732 return self._query.any(execute=execute, exact=exact)
734 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
735 # Docstring inherited.
736 return self._query.explain_no_results(execute=execute)
739class ChainedDatasetQueryResults(DatasetQueryResults):
740 """A `DatasetQueryResults` implementation that simply chains together
741 other results objects, each for a different parent dataset type.
743 Parameters
744 ----------
745 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ]
746 The underlying results objects this object will chain together.
747 doomed_by : `~collections.abc.Iterable` [ `str` ], optional
748 A list of messages (appropriate for e.g. logging or exceptions) that
749 explain why the query is known to return no results even before it is
750 executed. Queries with a non-empty list will never be executed.
751 Child results objects may also have their own list.
752 """
754 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()):
755 self._chain = chain
756 self._doomed_by = tuple(doomed_by)
758 __slots__ = ("_chain",)
760 def __iter__(self) -> Iterator[DatasetRef]:
761 return itertools.chain.from_iterable(self._chain)
763 def __repr__(self) -> str:
764 return "<DatasetRef iterator for multiple dataset types>"
766 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
767 # Docstring inherited from DatasetQueryResults.
768 return iter(self._chain)
770 @contextmanager
771 def materialize(self) -> Iterator[ChainedDatasetQueryResults]:
772 # Docstring inherited from DatasetQueryResults.
773 with ExitStack() as stack:
774 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain])
776 def expanded(self) -> ChainedDatasetQueryResults:
777 # Docstring inherited from DatasetQueryResults.
778 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by)
780 def count(self, *, exact: bool = True, discard: bool = False) -> int:
781 # Docstring inherited.
782 return sum(r.count(exact=exact, discard=discard) for r in self._chain)
784 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
785 # Docstring inherited.
786 return any(r.any(execute=execute, exact=exact) for r in self._chain)
788 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
789 # Docstring inherited.
790 result = list(self._doomed_by)
791 for r in self._chain:
792 result.extend(r.explain_no_results(execute=execute))
793 return result
796class DimensionRecordQueryResults(Iterable[DimensionRecord]):
797 """An interface for objects that represent the results of queries for
798 dimension records.
799 """
801 @property
802 @abstractmethod
803 def element(self) -> DimensionElement:
804 raise NotImplementedError()
806 @abstractmethod
807 def run(self) -> DimensionRecordQueryResults:
808 raise NotImplementedError()
810 @abstractmethod
811 def count(self, *, exact: bool = True, discard: bool = False) -> int:
812 """Count the number of rows this query would return.
814 Parameters
815 ----------
816 exact : `bool`, optional
817 If `True`, run the full query and perform post-query filtering if
818 needed to account for that filtering in the count. If `False`, the
819 result may be an upper bound.
820 discard : `bool`, optional
821 If `True`, compute the exact count even if it would require running
822 the full query and then throwing away the result rows after
823 counting them. If `False`, this is an error, as the user would
824 usually be better off executing the query first to fetch its rows
825 into a new query (or passing ``exact=False``). Ignored if
826 ``exact=False``.
828 Returns
829 -------
830 count : `int`
831 The number of rows the query would return, or an upper bound if
832 ``exact=False``.
834 Notes
835 -----
836 This counts the number of rows returned, not the number of unique rows
837 returned, so even with ``exact=True`` it may provide only an upper
838 bound on the number of *deduplicated* result rows.
839 """
840 raise NotImplementedError()
842 @abstractmethod
843 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
844 """Test whether this query returns any results.
846 Parameters
847 ----------
848 execute : `bool`, optional
849 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
850 determined prior to execution that the query would return no rows.
851 exact : `bool`, optional
852 If `True`, run the full query and perform post-query filtering if
853 needed, until at least one result row is found. If `False`, the
854 returned result does not account for post-query filtering, and
855 hence may be `True` even when all result rows would be filtered
856 out.
858 Returns
859 -------
860 any : `bool`
861 `True` if the query would (or might, depending on arguments) yield
862 result rows. `False` if it definitely would not.
863 """
864 raise NotImplementedError()
866 @abstractmethod
867 def order_by(self, *args: str) -> DimensionRecordQueryResults:
868 """Make the iterator return ordered result.
870 Parameters
871 ----------
872 *args : `str`
873 Names of the columns/dimensions to use for ordering. Column name
874 can be prefixed with minus (``-``) to use descending ordering.
876 Returns
877 -------
878 result : `DimensionRecordQueryResults`
879 Returns ``self`` instance which is updated to return ordered
880 result.
882 Notes
883 -----
884 This method can modify the iterator in place and return the same
885 instance.
886 """
887 raise NotImplementedError()
889 @abstractmethod
890 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
891 """Make the iterator return limited number of records.
893 Parameters
894 ----------
895 limit : `int`
896 Upper limit on the number of returned records.
897 offset : `int` or `None`
898 The number of records to skip before returning at most ``limit``
899 records. `None` is interpreted the same as zero for backwards
900 compatibility.
902 Returns
903 -------
904 result : `DimensionRecordQueryResults`
905 Returns ``self`` instance which is updated to return limited set of
906 records.
908 Notes
909 -----
910 This method can modify the iterator in place and return the same
911 instance. Normally this method is used together with `order_by` method.
912 """
913 raise NotImplementedError()
915 @abstractmethod
916 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
917 """Return human-readable messages that may help explain why the query
918 yields no results.
920 Parameters
921 ----------
922 execute : `bool`, optional
923 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
924 of aspects of the tree to more precisely determine where rows were
925 filtered out.
927 Returns
928 -------
929 messages : `~collections.abc.Iterable` [ `str` ]
930 String messages that describe reasons the query might not yield any
931 results.
932 """
933 raise NotImplementedError()
936class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults):
937 """Implementation of DimensionRecordQueryResults using database query.
939 Parameters
940 ----------
941 query : `Query`
942 Query object that backs this class.
943 element : `DimensionElement`
944 Element whose records this object returns.
946 Notes
947 -----
948 The `Query` class now implements essentially all of this class's
949 functionality; "QueryResult" classes like this one now exist only to
950 provide interface backwards compatibility and more specific iterator
951 types.
952 """
954 def __init__(self, query: Query, element: DimensionElement):
955 self._query = query
956 self._element = element
958 @property
959 def element(self) -> DimensionElement:
960 return self._element
962 def __iter__(self) -> Iterator[DimensionRecord]:
963 return self._query.iter_dimension_records(self._element)
965 def run(self) -> DimensionRecordQueryResults:
966 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element)
968 def count(self, *, exact: bool = True, discard: bool = False) -> int:
969 # Docstring inherited from base class.
970 return self._query.count(exact=exact)
972 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
973 # Docstring inherited from base class.
974 return self._query.any(execute=execute, exact=exact)
976 def order_by(self, *args: str) -> DimensionRecordQueryResults:
977 # Docstring inherited from base class.
978 clause = OrderByClause.parse_element(args, self._element)
979 self._query = self._query.sorted(clause.terms, defer=True)
980 return self
982 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
983 # Docstring inherited from base class.
984 if offset is None:
985 offset = 0
986 self._query = self._query.sliced(offset, offset + limit, defer=True)
987 return self
989 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
990 # Docstring inherited.
991 return self._query.explain_no_results(execute=execute)