Coverage for python/lsst/daf/butler/registry/queries/_results.py: 58%
194 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "ChainedDatasetQueryResults",
31 "DatabaseDimensionRecordQueryResults",
32 "DataCoordinateQueryResults",
33 "DatasetQueryResults",
34 "DimensionRecordQueryResults",
35 "ParentDatasetQueryResults",
36)
38import itertools
39from abc import abstractmethod
40from collections.abc import Iterable, Iterator, Sequence
41from contextlib import AbstractContextManager, ExitStack, contextmanager
42from typing import Any
44from deprecated.sphinx import deprecated
46from ..._dataset_ref import DatasetRef
47from ..._dataset_type import DatasetType
48from ...dimensions import (
49 DataCoordinate,
50 DataCoordinateIterable,
51 DimensionElement,
52 DimensionGraph,
53 DimensionGroup,
54 DimensionRecord,
55)
56from ._query import Query
57from ._structs import OrderByClause
60class DataCoordinateQueryResults(DataCoordinateIterable):
61 """An enhanced implementation of `DataCoordinateIterable` that represents
62 data IDs retrieved from a database query.
64 Parameters
65 ----------
66 query : `Query`
67 Query object that backs this class.
69 Notes
70 -----
71 The `Query` class now implements essentially all of this class's
72 functionality; "QueryResult" classes like this one now exist only to
73 provide interface backwards compatibility and more specific iterator
74 types.
75 """
77 def __init__(self, query: Query):
78 self._query = query
80 __slots__ = ("_query",)
82 def __iter__(self) -> Iterator[DataCoordinate]:
83 return self._query.iter_data_ids()
85 def __repr__(self) -> str:
86 return f"<DataCoordinate iterator with dimensions={self.graph}>"
88 @property
89 @deprecated(
90 "Deprecated in favor of .dimensions. Will be removed after v27.",
91 version="v27",
92 category=FutureWarning,
93 )
94 def graph(self) -> DimensionGraph:
95 # Docstring inherited from DataCoordinateIterable.
96 return self._query.dimensions._as_graph()
98 @property
99 def dimensions(self) -> DimensionGroup:
100 """The dimensions of the data IDs returned by this query."""
101 return self._query.dimensions
103 def hasFull(self) -> bool:
104 # Docstring inherited from DataCoordinateIterable.
105 return True
107 def hasRecords(self) -> bool:
108 # Docstring inherited from DataCoordinateIterable.
109 return self._query.has_record_columns is True or not self.dimensions
111 @contextmanager
112 def materialize(self) -> Iterator[DataCoordinateQueryResults]:
113 """Insert this query's results into a temporary table.
115 Returns
116 -------
117 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ]
118 A context manager that ensures the temporary table is created and
119 populated in ``__enter__`` (returning a results object backed by
120 that table), and dropped in ``__exit__``. If ``self`` is already
121 materialized, the context manager may do nothing (reflecting the
122 fact that an outer context manager should already take care of
123 everything else).
125 Notes
126 -----
127 When using a very large result set to perform multiple queries (e.g.
128 multiple calls to `subset` with different arguments, or even a single
129 call to `expanded`), it may be much more efficient to start by
130 materializing the query and only then performing the follow up queries.
131 It may also be less efficient, depending on how well database engine's
132 query optimizer can simplify those particular follow-up queries and
133 how efficiently it caches query results even when the are not
134 explicitly inserted into a temporary table. See `expanded` and
135 `subset` for examples.
136 """
137 with self._query.open_context():
138 yield DataCoordinateQueryResults(self._query.materialized())
140 def expanded(self) -> DataCoordinateQueryResults:
141 """Return a results object for which `hasRecords` returns `True`.
143 This method may involve actually executing database queries to fetch
144 `DimensionRecord` objects.
146 Returns
147 -------
148 results : `DataCoordinateQueryResults`
149 A results object for which `hasRecords` returns `True`. May be
150 ``self`` if that is already the case.
152 Notes
153 -----
154 For very result sets, it may be much more efficient to call
155 `materialize` before calling `expanded`, to avoid performing the
156 original query multiple times (as a subquery) in the follow-up queries
157 that fetch dimension records. For example::
159 with registry.queryDataIds(...).materialize() as tempDataIds:
160 dataIdsWithRecords = tempDataIds.expanded()
161 for dataId in dataIdsWithRecords:
162 ...
163 """
164 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True))
166 def subset(
167 self,
168 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None,
169 *,
170 unique: bool = False,
171 ) -> DataCoordinateQueryResults:
172 """Return a results object containing a subset of the dimensions of
173 this one, and/or a unique near-subset of its rows.
175 This method may involve actually executing database queries to fetch
176 `DimensionRecord` objects.
178 Parameters
179 ----------
180 dimensions : `DimensionGroup`, `DimensionGraph`, or \
181 `~collections.abc.Iterable` [ `str`], optional
182 Dimensions to include in the new results object. If `None`,
183 ``self.dimensions`` is used.
184 unique : `bool`, optional
185 If `True` (`False` is default), the query should only return unique
186 data IDs. This is implemented in the database; to obtain unique
187 results via Python-side processing (which may be more efficient in
188 some cases), use `toSet` to construct a `DataCoordinateSet` from
189 this results object instead.
191 Returns
192 -------
193 results : `DataCoordinateQueryResults`
194 A results object corresponding to the given criteria. May be
195 ``self`` if it already qualifies.
197 Raises
198 ------
199 ValueError
200 Raised when ``dimensions`` is not a subset of the dimensions in
201 this result.
203 Notes
204 -----
205 This method can only return a "near-subset" of the original result rows
206 in general because of subtleties in how spatial overlaps are
207 implemented; see `Query.projected` for more information.
209 When calling `subset` multiple times on the same very large result set,
210 it may be much more efficient to call `materialize` first. For
211 example::
213 dimensions1 = DimensionGroup(...)
214 dimensions2 = DimensionGroup(...)
215 with registry.queryDataIds(...).materialize() as tempDataIds:
216 for dataId1 in tempDataIds.subset(dimensions1, unique=True):
217 ...
218 for dataId2 in tempDataIds.subset(dimensions2, unique=True):
219 ...
220 """
221 if dimensions is None:
222 dimensions = self.dimensions
223 else:
224 dimensions = self.dimensions.universe.conform(dimensions)
225 if not dimensions.issubset(self.dimensions):
226 raise ValueError(f"{dimensions} is not a subset of {self.dimensions}")
227 query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True)
228 return DataCoordinateQueryResults(query)
230 def findDatasets(
231 self,
232 datasetType: Any,
233 collections: Any,
234 *,
235 findFirst: bool = True,
236 components: bool | None = None,
237 ) -> DatasetQueryResults:
238 """Find datasets using the data IDs identified by this query.
240 Parameters
241 ----------
242 datasetType : `DatasetType` or `str`
243 Dataset type or the name of one to search for. Must have
244 dimensions that are a subset of ``self.graph``.
245 collections : `Any`
246 An expression that fully or partially identifies the collections
247 to search for the dataset, such as a `str`, `re.Pattern`, or
248 iterable thereof. ``...`` can be used to return all collections.
249 See :ref:`daf_butler_collection_expressions` for more information.
250 findFirst : `bool`, optional
251 If `True` (default), for each result data ID, only yield one
252 `DatasetRef`, from the first collection in which a dataset of that
253 dataset type appears (according to the order of ``collections``
254 passed in). If `True`, ``collections`` must not contain regular
255 expressions and may not be ``...``.
256 components : `bool`, optional
257 If `True`, apply all expression patterns to component dataset type
258 names as well. If `False`, never apply patterns to components. If
259 `None` (default), apply patterns to components only if their parent
260 datasets were not matched by the expression. Fully-specified
261 component datasets (`str` or `DatasetType` instances) are always
262 included.
264 Values other than `False` are deprecated, and only `False` will be
265 supported after v26. After v27 this argument will be removed
266 entirely.
268 Returns
269 -------
270 datasets : `ParentDatasetQueryResults`
271 A lazy-evaluation object representing dataset query results,
272 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all
273 nested data IDs in those dataset references will have records as
274 well.
276 Raises
277 ------
278 MissingDatasetTypeError
279 Raised if the given dataset type is not registered.
280 """
281 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard(
282 datasetType, components=components, explicit_only=True
283 )
284 return ParentDatasetQueryResults(
285 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True),
286 parent_dataset_type,
287 components_found,
288 )
290 def findRelatedDatasets(
291 self,
292 datasetType: DatasetType | str,
293 collections: Any,
294 *,
295 findFirst: bool = True,
296 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None,
297 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]:
298 """Find datasets using the data IDs identified by this query, and
299 return them along with the original data IDs.
301 This is a variant of `findDatasets` that is often more useful when
302 the target dataset type does not have all of the dimensions of the
303 original data ID query, as is generally the case with calibration
304 lookups.
306 Parameters
307 ----------
308 datasetType : `DatasetType` or `str`
309 Dataset type or the name of one to search for. Must have
310 dimensions that are a subset of ``self.graph``.
311 collections : `Any`
312 An expression that fully or partially identifies the collections
313 to search for the dataset, such as a `str`, `re.Pattern`, or
314 iterable thereof. ``...`` can be used to return all collections.
315 See :ref:`daf_butler_collection_expressions` for more information.
316 findFirst : `bool`, optional
317 If `True` (default), for each data ID in ``self``, only yield one
318 `DatasetRef`, from the first collection in which a dataset of that
319 dataset type appears (according to the order of ``collections``
320 passed in). If `True`, ``collections`` must not contain regular
321 expressions and may not be ``...``. Note that this is not the
322 same as yielding one `DatasetRef` for each yielded data ID if
323 ``dimensions`` is not `None`.
324 dimensions : `DimensionGroup`, `DimensionGraph`, or \
325 `~collections.abc.Iterable` [ `str` ], optional
326 The dimensions of the data IDs returned. Must be a subset of
327 ``self.dimensions``.
329 Returns
330 -------
331 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \
332 `DatasetRef` ] ]
333 An iterable of (data ID, dataset reference) pairs.
335 Raises
336 ------
337 MissingDatasetTypeError
338 Raised if the given dataset type is not registered.
339 """
340 if dimensions is None:
341 dimensions = self.dimensions
342 else:
343 dimensions = self.universe.conform(dimensions)
344 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard(
345 datasetType, components=False, explicit_only=True
346 )
347 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True)
348 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions)
350 def count(self, *, exact: bool = True, discard: bool = False) -> int:
351 """Count the number of rows this query would return.
353 Parameters
354 ----------
355 exact : `bool`, optional
356 If `True`, run the full query and perform post-query filtering if
357 needed to account for that filtering in the count. If `False`, the
358 result may be an upper bound.
359 discard : `bool`, optional
360 If `True`, compute the exact count even if it would require running
361 the full query and then throwing away the result rows after
362 counting them. If `False`, this is an error, as the user would
363 usually be better off executing the query first to fetch its rows
364 into a new query (or passing ``exact=False``). Ignored if
365 ``exact=False``.
367 Returns
368 -------
369 count : `int`
370 The number of rows the query would return, or an upper bound if
371 ``exact=False``.
373 Notes
374 -----
375 This counts the number of rows returned, not the number of unique rows
376 returned, so even with ``exact=True`` it may provide only an upper
377 bound on the number of *deduplicated* result rows.
378 """
379 return self._query.count(exact=exact, discard=discard)
381 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
382 """Test whether this query returns any results.
384 Parameters
385 ----------
386 execute : `bool`, optional
387 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
388 determined prior to execution that the query would return no rows.
389 exact : `bool`, optional
390 If `True`, run the full query and perform post-query filtering if
391 needed, until at least one result row is found. If `False`, the
392 returned result does not account for post-query filtering, and
393 hence may be `True` even when all result rows would be filtered
394 out.
396 Returns
397 -------
398 any : `bool`
399 `True` if the query would (or might, depending on arguments) yield
400 result rows. `False` if it definitely would not.
401 """
402 return self._query.any(execute=execute, exact=exact)
404 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
405 """Return human-readable messages that may help explain why the query
406 yields no results.
408 Parameters
409 ----------
410 execute : `bool`, optional
411 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
412 of aspects of the tree to more precisely determine where rows were
413 filtered out.
415 Returns
416 -------
417 messages : `~collections.abc.Iterable` [ `str` ]
418 String messages that describe reasons the query might not yield any
419 results.
420 """
421 return self._query.explain_no_results(execute=execute)
423 def order_by(self, *args: str) -> DataCoordinateQueryResults:
424 """Make the iterator return ordered results.
426 Parameters
427 ----------
428 *args : `str`
429 Names of the columns/dimensions to use for ordering. Column name
430 can be prefixed with minus (``-``) to use descending ordering.
432 Returns
433 -------
434 result : `DataCoordinateQueryResults`
435 Returns ``self`` instance which is updated to return ordered
436 result.
438 Notes
439 -----
440 This method modifies the iterator in place and returns the same
441 instance to support method chaining.
442 """
443 clause = OrderByClause.parse_general(args, self._query.dimensions)
444 self._query = self._query.sorted(clause.terms, defer=True)
445 return self
447 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults:
448 """Make the iterator return limited number of records.
450 Parameters
451 ----------
452 limit : `int`
453 Upper limit on the number of returned records.
454 offset : `int` or `None`, optional
455 The number of records to skip before returning at most ``limit``
456 records. `None` is interpreted the same as zero for backwards
457 compatibility.
459 Returns
460 -------
461 result : `DataCoordinateQueryResults`
462 Returns ``self`` instance which is updated to return limited set
463 of records.
465 Notes
466 -----
467 This method modifies the iterator in place and returns the same
468 instance to support method chaining. Normally this method is used
469 together with `order_by` method.
470 """
471 if offset is None:
472 offset = 0
473 self._query = self._query.sliced(offset, offset + limit, defer=True)
474 return self
477class DatasetQueryResults(Iterable[DatasetRef]):
478 """An interface for objects that represent the results of queries for
479 datasets.
480 """
482 @abstractmethod
483 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
484 """Group results by parent dataset type.
486 Returns
487 -------
488 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ]
489 An iterator over `DatasetQueryResults` instances that are each
490 responsible for a single parent dataset type (either just that
491 dataset type, one or more of its component dataset types, or both).
492 """
493 raise NotImplementedError()
495 @abstractmethod
496 def materialize(self) -> AbstractContextManager[DatasetQueryResults]:
497 """Insert this query's results into a temporary table.
499 Returns
500 -------
501 context : `typing.ContextManager` [ `DatasetQueryResults` ]
502 A context manager that ensures the temporary table is created and
503 populated in ``__enter__`` (returning a results object backed by
504 that table), and dropped in ``__exit__``. If ``self`` is already
505 materialized, the context manager may do nothing (reflecting the
506 fact that an outer context manager should already take care of
507 everything else).
508 """
509 raise NotImplementedError()
511 @abstractmethod
512 def expanded(self) -> DatasetQueryResults:
513 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords`
514 returns `True` for all data IDs in returned `DatasetRef` objects.
516 Returns
517 -------
518 expanded : `DatasetQueryResults`
519 Either a new `DatasetQueryResults` instance or ``self``, if it is
520 already expanded.
522 Notes
523 -----
524 As with `DataCoordinateQueryResults.expanded`, it may be more efficient
525 to call `materialize` before expanding data IDs for very large result
526 sets.
527 """
528 raise NotImplementedError()
530 @abstractmethod
531 def count(self, *, exact: bool = True, discard: bool = False) -> int:
532 """Count the number of rows this query would return.
534 Parameters
535 ----------
536 exact : `bool`, optional
537 If `True`, run the full query and perform post-query filtering if
538 needed to account for that filtering in the count. If `False`, the
539 result may be an upper bound.
540 discard : `bool`, optional
541 If `True`, compute the exact count even if it would require running
542 the full query and then throwing away the result rows after
543 counting them. If `False`, this is an error, as the user would
544 usually be better off executing the query first to fetch its rows
545 into a new query (or passing ``exact=False``). Ignored if
546 ``exact=False``.
548 Returns
549 -------
550 count : `int`
551 The number of rows the query would return, or an upper bound if
552 ``exact=False``.
554 Notes
555 -----
556 This counts the number of rows returned, not the number of unique rows
557 returned, so even with ``exact=True`` it may provide only an upper
558 bound on the number of *deduplicated* result rows.
559 """
560 raise NotImplementedError()
562 @abstractmethod
563 def any(
564 self,
565 *,
566 execute: bool = True,
567 exact: bool = True,
568 ) -> bool:
569 """Test whether this query returns any results.
571 Parameters
572 ----------
573 execute : `bool`, optional
574 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
575 determined prior to execution that the query would return no rows.
576 exact : `bool`, optional
577 If `True`, run the full query and perform post-query filtering if
578 needed, until at least one result row is found. If `False`, the
579 returned result does not account for post-query filtering, and
580 hence may be `True` even when all result rows would be filtered
581 out.
583 Returns
584 -------
585 any : `bool`
586 `True` if the query would (or might, depending on arguments) yield
587 result rows. `False` if it definitely would not.
588 """
589 raise NotImplementedError()
591 @abstractmethod
592 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
593 """Return human-readable messages that may help explain why the query
594 yields no results.
596 Parameters
597 ----------
598 execute : `bool`, optional
599 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
600 of aspects of the tree to more precisely determine where rows were
601 filtered out.
603 Returns
604 -------
605 messages : `~collections.abc.Iterable` [ `str` ]
606 String messages that describe reasons the query might not yield any
607 results.
608 """
609 raise NotImplementedError()
611 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]:
612 """Group results by dataset type.
614 This is a private hook for the interface defined by
615 `DatasetRef.iter_by_type`, enabling much more efficient
616 processing of heterogeneous `DatasetRef` iterables when they come
617 directly from queries.
618 """
619 for parent_results in self.byParentDatasetType():
620 for component in parent_results.components:
621 dataset_type = parent_results.parentDatasetType
622 if component is not None:
623 dataset_type = dataset_type.makeComponentDatasetType(component)
624 yield (dataset_type, parent_results.withComponents((component,)))
627class ParentDatasetQueryResults(DatasetQueryResults):
628 """An object that represents results from a query for datasets with a
629 single parent `DatasetType`.
631 Parameters
632 ----------
633 query : `Query`
634 Low-level query object that backs these results.
635 dataset_type : `DatasetType`
636 Parent dataset type for all datasets returned by this query.
637 components : `~collections.abc.Sequence` [ `str` or `None` ], optional
638 Names of components to include in iteration. `None` may be included
639 (at most once) to include the parent dataset type.
641 Notes
642 -----
643 The `Query` class now implements essentially all of this class's
644 functionality; "QueryResult" classes like this one now exist only to
645 provide interface backwards compatibility and more specific iterator
646 types.
647 """
649 def __init__(
650 self,
651 query: Query,
652 dataset_type: DatasetType,
653 components: Sequence[str | None] = (None,),
654 ):
655 self._query = query
656 self._dataset_type = dataset_type
657 self._components = components
659 __slots__ = ("_query", "_dataset_type", "_components")
661 def __iter__(self) -> Iterator[DatasetRef]:
662 return self._query.iter_dataset_refs(self._dataset_type, self._components)
664 def __repr__(self) -> str:
665 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>"
667 @property
668 def components(self) -> Sequence[str | None]:
669 """The components of the parent dataset type included in these results
670 (`~collections.abc.Sequence` [ `str` or `None` ]).
671 """
672 return self._components
674 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
675 # Docstring inherited from DatasetQueryResults.
676 yield self
678 @contextmanager
679 def materialize(self) -> Iterator[ParentDatasetQueryResults]:
680 # Docstring inherited from DatasetQueryResults.
681 with self._query.open_context():
682 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components)
684 @property
685 def parentDatasetType(self) -> DatasetType:
686 """The parent dataset type for all datasets in this iterable
687 (`DatasetType`).
688 """
689 return self._dataset_type
691 @property
692 def dataIds(self) -> DataCoordinateQueryResults:
693 """A lazy-evaluation object representing a query for just the data
694 IDs of the datasets that would be returned by this query
695 (`DataCoordinateQueryResults`).
697 The returned object is not in general `zip`-iterable with ``self``;
698 it may be in a different order or have (or not have) duplicates.
699 """
700 return DataCoordinateQueryResults(self._query.projected(defer=True))
702 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults:
703 """Return a new query results object for the same parent datasets but
704 different components.
706 components : `~collections.abc.Sequence` [ `str` or `None` ]
707 Names of components to include in iteration. `None` may be
708 included (at most once) to include the parent dataset type.
709 """
710 return ParentDatasetQueryResults(self._query, self._dataset_type, components)
712 def expanded(self) -> ParentDatasetQueryResults:
713 # Docstring inherited from DatasetQueryResults.
714 return ParentDatasetQueryResults(
715 self._query.with_record_columns(defer=True), self._dataset_type, self._components
716 )
718 def count(self, *, exact: bool = True, discard: bool = False) -> int:
719 # Docstring inherited.
720 return len(self._components) * self._query.count(exact=exact, discard=discard)
722 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
723 # Docstring inherited.
724 return self._query.any(execute=execute, exact=exact)
726 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
727 # Docstring inherited.
728 return self._query.explain_no_results(execute=execute)
731class ChainedDatasetQueryResults(DatasetQueryResults):
732 """A `DatasetQueryResults` implementation that simply chains together
733 other results objects, each for a different parent dataset type.
735 Parameters
736 ----------
737 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ]
738 The underlying results objects this object will chain together.
739 doomed_by : `~collections.abc.Iterable` [ `str` ], optional
740 A list of messages (appropriate for e.g. logging or exceptions) that
741 explain why the query is known to return no results even before it is
742 executed. Queries with a non-empty list will never be executed.
743 Child results objects may also have their own list.
744 """
746 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()):
747 self._chain = chain
748 self._doomed_by = tuple(doomed_by)
750 __slots__ = ("_chain",)
752 def __iter__(self) -> Iterator[DatasetRef]:
753 return itertools.chain.from_iterable(self._chain)
755 def __repr__(self) -> str:
756 return "<DatasetRef iterator for multiple dataset types>"
758 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]:
759 # Docstring inherited from DatasetQueryResults.
760 return iter(self._chain)
762 @contextmanager
763 def materialize(self) -> Iterator[ChainedDatasetQueryResults]:
764 # Docstring inherited from DatasetQueryResults.
765 with ExitStack() as stack:
766 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain])
768 def expanded(self) -> ChainedDatasetQueryResults:
769 # Docstring inherited from DatasetQueryResults.
770 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by)
772 def count(self, *, exact: bool = True, discard: bool = False) -> int:
773 # Docstring inherited.
774 return sum(r.count(exact=exact, discard=discard) for r in self._chain)
776 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
777 # Docstring inherited.
778 return any(r.any(execute=execute, exact=exact) for r in self._chain)
780 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
781 # Docstring inherited.
782 result = list(self._doomed_by)
783 for r in self._chain:
784 result.extend(r.explain_no_results(execute=execute))
785 return result
788class DimensionRecordQueryResults(Iterable[DimensionRecord]):
789 """An interface for objects that represent the results of queries for
790 dimension records.
791 """
793 @property
794 @abstractmethod
795 def element(self) -> DimensionElement:
796 raise NotImplementedError()
798 @abstractmethod
799 def run(self) -> DimensionRecordQueryResults:
800 raise NotImplementedError()
802 @abstractmethod
803 def count(self, *, exact: bool = True, discard: bool = False) -> int:
804 """Count the number of rows this query would return.
806 Parameters
807 ----------
808 exact : `bool`, optional
809 If `True`, run the full query and perform post-query filtering if
810 needed to account for that filtering in the count. If `False`, the
811 result may be an upper bound.
812 discard : `bool`, optional
813 If `True`, compute the exact count even if it would require running
814 the full query and then throwing away the result rows after
815 counting them. If `False`, this is an error, as the user would
816 usually be better off executing the query first to fetch its rows
817 into a new query (or passing ``exact=False``). Ignored if
818 ``exact=False``.
820 Returns
821 -------
822 count : `int`
823 The number of rows the query would return, or an upper bound if
824 ``exact=False``.
826 Notes
827 -----
828 This counts the number of rows returned, not the number of unique rows
829 returned, so even with ``exact=True`` it may provide only an upper
830 bound on the number of *deduplicated* result rows.
831 """
832 raise NotImplementedError()
834 @abstractmethod
835 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
836 """Test whether this query returns any results.
838 Parameters
839 ----------
840 execute : `bool`, optional
841 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
842 determined prior to execution that the query would return no rows.
843 exact : `bool`, optional
844 If `True`, run the full query and perform post-query filtering if
845 needed, until at least one result row is found. If `False`, the
846 returned result does not account for post-query filtering, and
847 hence may be `True` even when all result rows would be filtered
848 out.
850 Returns
851 -------
852 any : `bool`
853 `True` if the query would (or might, depending on arguments) yield
854 result rows. `False` if it definitely would not.
855 """
856 raise NotImplementedError()
858 @abstractmethod
859 def order_by(self, *args: str) -> DimensionRecordQueryResults:
860 """Make the iterator return ordered result.
862 Parameters
863 ----------
864 *args : `str`
865 Names of the columns/dimensions to use for ordering. Column name
866 can be prefixed with minus (``-``) to use descending ordering.
868 Returns
869 -------
870 result : `DimensionRecordQueryResults`
871 Returns ``self`` instance which is updated to return ordered
872 result.
874 Notes
875 -----
876 This method can modify the iterator in place and return the same
877 instance.
878 """
879 raise NotImplementedError()
881 @abstractmethod
882 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
883 """Make the iterator return limited number of records.
885 Parameters
886 ----------
887 limit : `int`
888 Upper limit on the number of returned records.
889 offset : `int` or `None`
890 The number of records to skip before returning at most ``limit``
891 records. `None` is interpreted the same as zero for backwards
892 compatibility.
894 Returns
895 -------
896 result : `DimensionRecordQueryResults`
897 Returns ``self`` instance which is updated to return limited set of
898 records.
900 Notes
901 -----
902 This method can modify the iterator in place and return the same
903 instance. Normally this method is used together with `order_by` method.
904 """
905 raise NotImplementedError()
907 @abstractmethod
908 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
909 """Return human-readable messages that may help explain why the query
910 yields no results.
912 Parameters
913 ----------
914 execute : `bool`, optional
915 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
916 of aspects of the tree to more precisely determine where rows were
917 filtered out.
919 Returns
920 -------
921 messages : `~collections.abc.Iterable` [ `str` ]
922 String messages that describe reasons the query might not yield any
923 results.
924 """
925 raise NotImplementedError()
928class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults):
929 """Implementation of DimensionRecordQueryResults using database query.
931 Parameters
932 ----------
933 query : `Query`
934 Query object that backs this class.
935 element : `DimensionElement`
936 Element whose records this object returns.
938 Notes
939 -----
940 The `Query` class now implements essentially all of this class's
941 functionality; "QueryResult" classes like this one now exist only to
942 provide interface backwards compatibility and more specific iterator
943 types.
944 """
946 def __init__(self, query: Query, element: DimensionElement):
947 self._query = query
948 self._element = element
950 @property
951 def element(self) -> DimensionElement:
952 return self._element
954 def __iter__(self) -> Iterator[DimensionRecord]:
955 return self._query.iter_dimension_records(self._element)
957 def run(self) -> DimensionRecordQueryResults:
958 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element)
960 def count(self, *, exact: bool = True, discard: bool = False) -> int:
961 # Docstring inherited from base class.
962 return self._query.count(exact=exact)
964 def any(self, *, execute: bool = True, exact: bool = True) -> bool:
965 # Docstring inherited from base class.
966 return self._query.any(execute=execute, exact=exact)
968 def order_by(self, *args: str) -> DimensionRecordQueryResults:
969 # Docstring inherited from base class.
970 clause = OrderByClause.parse_element(args, self._element)
971 self._query = self._query.sorted(clause.terms, defer=True)
972 return self
974 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults:
975 # Docstring inherited from base class.
976 if offset is None:
977 offset = 0
978 self._query = self._query.sliced(offset, offset + limit, defer=True)
979 return self
981 def explain_no_results(self, execute: bool = True) -> Iterable[str]:
982 # Docstring inherited.
983 return self._query.explain_no_results(execute=execute)