Coverage for python/lsst/daf/butler/registry/queries/_results.py: 58%

179 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "ChainedDatasetQueryResults", 

25 "DatabaseDimensionRecordQueryResults", 

26 "DataCoordinateQueryResults", 

27 "DatasetQueryResults", 

28 "DimensionRecordQueryResults", 

29 "ParentDatasetQueryResults", 

30) 

31 

32import itertools 

33from abc import abstractmethod 

34from collections.abc import Iterable, Iterator, Sequence 

35from contextlib import AbstractContextManager, ExitStack, contextmanager 

36from typing import Any 

37 

38from ...core import ( 

39 DataCoordinate, 

40 DataCoordinateIterable, 

41 DatasetRef, 

42 DatasetType, 

43 DimensionElement, 

44 DimensionGraph, 

45 DimensionRecord, 

46) 

47from ._query import Query 

48from ._structs import OrderByClause 

49 

50 

51class DataCoordinateQueryResults(DataCoordinateIterable): 

52 """An enhanced implementation of `DataCoordinateIterable` that represents 

53 data IDs retrieved from a database query. 

54 

55 Parameters 

56 ---------- 

57 query : `Query` 

58 Query object that backs this class. 

59 

60 Notes 

61 ----- 

62 The `Query` class now implements essentially all of this class's 

63 functionality; "QueryResult" classes like this one now exist only to 

64 provide interface backwards compatibility and more specific iterator 

65 types. 

66 """ 

67 

68 def __init__(self, query: Query): 

69 self._query = query 

70 

71 __slots__ = ("_query",) 

72 

73 def __iter__(self) -> Iterator[DataCoordinate]: 

74 return self._query.iter_data_ids() 

75 

76 def __repr__(self) -> str: 

77 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

78 

79 @property 

80 def graph(self) -> DimensionGraph: 

81 # Docstring inherited from DataCoordinateIterable. 

82 return self._query.dimensions 

83 

84 def hasFull(self) -> bool: 

85 # Docstring inherited from DataCoordinateIterable. 

86 return True 

87 

88 def hasRecords(self) -> bool: 

89 # Docstring inherited from DataCoordinateIterable. 

90 return self._query.has_record_columns is True or not self.graph 

91 

92 @contextmanager 

93 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

94 """Insert this query's results into a temporary table. 

95 

96 Returns 

97 ------- 

98 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

99 A context manager that ensures the temporary table is created and 

100 populated in ``__enter__`` (returning a results object backed by 

101 that table), and dropped in ``__exit__``. If ``self`` is already 

102 materialized, the context manager may do nothing (reflecting the 

103 fact that an outer context manager should already take care of 

104 everything else). 

105 

106 Notes 

107 ----- 

108 When using a very large result set to perform multiple queries (e.g. 

109 multiple calls to `subset` with different arguments, or even a single 

110 call to `expanded`), it may be much more efficient to start by 

111 materializing the query and only then performing the follow up queries. 

112 It may also be less efficient, depending on how well database engine's 

113 query optimizer can simplify those particular follow-up queries and 

114 how efficiently it caches query results even when the are not 

115 explicitly inserted into a temporary table. See `expanded` and 

116 `subset` for examples. 

117 """ 

118 with self._query.open_context(): 

119 yield DataCoordinateQueryResults(self._query.materialized()) 

120 

121 def expanded(self) -> DataCoordinateQueryResults: 

122 """Return a results object for which `hasRecords` returns `True`. 

123 

124 This method may involve actually executing database queries to fetch 

125 `DimensionRecord` objects. 

126 

127 Returns 

128 ------- 

129 results : `DataCoordinateQueryResults` 

130 A results object for which `hasRecords` returns `True`. May be 

131 ``self`` if that is already the case. 

132 

133 Notes 

134 ----- 

135 For very result sets, it may be much more efficient to call 

136 `materialize` before calling `expanded`, to avoid performing the 

137 original query multiple times (as a subquery) in the follow-up queries 

138 that fetch dimension records. For example:: 

139 

140 with registry.queryDataIds(...).materialize() as tempDataIds: 

141 dataIdsWithRecords = tempDataIds.expanded() 

142 for dataId in dataIdsWithRecords: 

143 ... 

144 """ 

145 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

146 

147 def subset( 

148 self, graph: DimensionGraph | None = None, *, unique: bool = False 

149 ) -> DataCoordinateQueryResults: 

150 """Return a results object containing a subset of the dimensions of 

151 this one, and/or a unique near-subset of its rows. 

152 

153 This method may involve actually executing database queries to fetch 

154 `DimensionRecord` objects. 

155 

156 Parameters 

157 ---------- 

158 graph : `DimensionGraph`, optional 

159 Dimensions to include in the new results object. If `None`, 

160 ``self.graph`` is used. 

161 unique : `bool`, optional 

162 If `True` (`False` is default), the query should only return unique 

163 data IDs. This is implemented in the database; to obtain unique 

164 results via Python-side processing (which may be more efficient in 

165 some cases), use `toSet` to construct a `DataCoordinateSet` from 

166 this results object instead. 

167 

168 Returns 

169 ------- 

170 results : `DataCoordinateQueryResults` 

171 A results object corresponding to the given criteria. May be 

172 ``self`` if it already qualifies. 

173 

174 Raises 

175 ------ 

176 ValueError 

177 Raised when ``graph`` is not a subset of the dimension graph in 

178 this result. 

179 

180 Notes 

181 ----- 

182 This method can only return a "near-subset" of the original result rows 

183 in general because of subtleties in how spatial overlaps are 

184 implemented; see `Query.projected` for more information. 

185 

186 When calling `subset` multiple times on the same very large result set, 

187 it may be much more efficient to call `materialize` first. For 

188 example:: 

189 

190 dimensions1 = DimensionGraph(...) 

191 dimensions2 = DimensionGraph(...) 

192 with registry.queryDataIds(...).materialize() as tempDataIds: 

193 for dataId1 in tempDataIds.subset( 

194 graph=dimensions1, 

195 unique=True): 

196 ... 

197 for dataId2 in tempDataIds.subset( 

198 graph=dimensions2, 

199 unique=True): 

200 ... 

201 """ 

202 if graph is None: 

203 graph = self.graph 

204 if not graph.issubset(self.graph): 

205 raise ValueError(f"{graph} is not a subset of {self.graph}") 

206 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True) 

207 return DataCoordinateQueryResults(query) 

208 

209 def findDatasets( 

210 self, 

211 datasetType: Any, 

212 collections: Any, 

213 *, 

214 findFirst: bool = True, 

215 components: bool | None = None, 

216 ) -> DatasetQueryResults: 

217 """Find datasets using the data IDs identified by this query. 

218 

219 Parameters 

220 ---------- 

221 datasetType : `DatasetType` or `str` 

222 Dataset type or the name of one to search for. Must have 

223 dimensions that are a subset of ``self.graph``. 

224 collections : `Any` 

225 An expression that fully or partially identifies the collections 

226 to search for the dataset, such as a `str`, `re.Pattern`, or 

227 iterable thereof. ``...`` can be used to return all collections. 

228 See :ref:`daf_butler_collection_expressions` for more information. 

229 findFirst : `bool`, optional 

230 If `True` (default), for each result data ID, only yield one 

231 `DatasetRef`, from the first collection in which a dataset of that 

232 dataset type appears (according to the order of ``collections`` 

233 passed in). If `True`, ``collections`` must not contain regular 

234 expressions and may not be ``...``. 

235 components : `bool`, optional 

236 If `True`, apply all expression patterns to component dataset type 

237 names as well. If `False`, never apply patterns to components. If 

238 `None` (default), apply patterns to components only if their parent 

239 datasets were not matched by the expression. Fully-specified 

240 component datasets (`str` or `DatasetType` instances) are always 

241 included. 

242 

243 Values other than `False` are deprecated, and only `False` will be 

244 supported after v26. After v27 this argument will be removed 

245 entirely. 

246 

247 Returns 

248 ------- 

249 datasets : `ParentDatasetQueryResults` 

250 A lazy-evaluation object representing dataset query results, 

251 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

252 nested data IDs in those dataset references will have records as 

253 well. 

254 

255 Raises 

256 ------ 

257 ValueError 

258 Raised if ``datasetType.dimensions.issubset(self.graph) is False``. 

259 MissingDatasetTypeError 

260 Raised if the given dataset type is not registered. 

261 """ 

262 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

263 datasetType, components=components, explicit_only=True 

264 ) 

265 return ParentDatasetQueryResults( 

266 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

267 parent_dataset_type, 

268 components_found, 

269 ) 

270 

271 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

272 """Count the number of rows this query would return. 

273 

274 Parameters 

275 ---------- 

276 exact : `bool`, optional 

277 If `True`, run the full query and perform post-query filtering if 

278 needed to account for that filtering in the count. If `False`, the 

279 result may be an upper bound. 

280 discard : `bool`, optional 

281 If `True`, compute the exact count even if it would require running 

282 the full query and then throwing away the result rows after 

283 counting them. If `False`, this is an error, as the user would 

284 usually be better off executing the query first to fetch its rows 

285 into a new query (or passing ``exact=False``). Ignored if 

286 ``exact=False``. 

287 

288 Returns 

289 ------- 

290 count : `int` 

291 The number of rows the query would return, or an upper bound if 

292 ``exact=False``. 

293 

294 Notes 

295 ----- 

296 This counts the number of rows returned, not the number of unique rows 

297 returned, so even with ``exact=True`` it may provide only an upper 

298 bound on the number of *deduplicated* result rows. 

299 """ 

300 return self._query.count(exact=exact, discard=discard) 

301 

302 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

303 """Test whether this query returns any results. 

304 

305 Parameters 

306 ---------- 

307 execute : `bool`, optional 

308 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

309 determined prior to execution that the query would return no rows. 

310 exact : `bool`, optional 

311 If `True`, run the full query and perform post-query filtering if 

312 needed, until at least one result row is found. If `False`, the 

313 returned result does not account for post-query filtering, and 

314 hence may be `True` even when all result rows would be filtered 

315 out. 

316 

317 Returns 

318 ------- 

319 any : `bool` 

320 `True` if the query would (or might, depending on arguments) yield 

321 result rows. `False` if it definitely would not. 

322 """ 

323 return self._query.any(execute=execute, exact=exact) 

324 

325 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

326 """Return human-readable messages that may help explain why the query 

327 yields no results. 

328 

329 Parameters 

330 ---------- 

331 execute : `bool`, optional 

332 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

333 of aspects of the tree to more precisely determine where rows were 

334 filtered out. 

335 

336 Returns 

337 ------- 

338 messages : `~collections.abc.Iterable` [ `str` ] 

339 String messages that describe reasons the query might not yield any 

340 results. 

341 """ 

342 return self._query.explain_no_results(execute=execute) 

343 

344 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

345 """Make the iterator return ordered results. 

346 

347 Parameters 

348 ---------- 

349 *args : `str` 

350 Names of the columns/dimensions to use for ordering. Column name 

351 can be prefixed with minus (``-``) to use descending ordering. 

352 

353 Returns 

354 ------- 

355 result : `DataCoordinateQueryResults` 

356 Returns ``self`` instance which is updated to return ordered 

357 result. 

358 

359 Notes 

360 ----- 

361 This method modifies the iterator in place and returns the same 

362 instance to support method chaining. 

363 """ 

364 clause = OrderByClause.parse_general(args, self._query.dimensions) 

365 self._query = self._query.sorted(clause.terms, defer=True) 

366 return self 

367 

368 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

369 """Make the iterator return limited number of records. 

370 

371 Parameters 

372 ---------- 

373 limit : `int` 

374 Upper limit on the number of returned records. 

375 offset : `int` or `None`, optional 

376 The number of records to skip before returning at most ``limit`` 

377 records. `None` is interpreted the same as zero for backwards 

378 compatibility. 

379 

380 Returns 

381 ------- 

382 result : `DataCoordinateQueryResults` 

383 Returns ``self`` instance which is updated to return limited set 

384 of records. 

385 

386 Notes 

387 ----- 

388 This method modifies the iterator in place and returns the same 

389 instance to support method chaining. Normally this method is used 

390 together with `order_by` method. 

391 """ 

392 if offset is None: 

393 offset = 0 

394 self._query = self._query.sliced(offset, offset + limit, defer=True) 

395 return self 

396 

397 

398class DatasetQueryResults(Iterable[DatasetRef]): 

399 """An interface for objects that represent the results of queries for 

400 datasets. 

401 """ 

402 

403 @abstractmethod 

404 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

405 """Group results by parent dataset type. 

406 

407 Returns 

408 ------- 

409 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

410 An iterator over `DatasetQueryResults` instances that are each 

411 responsible for a single parent dataset type (either just that 

412 dataset type, one or more of its component dataset types, or both). 

413 """ 

414 raise NotImplementedError() 

415 

416 @abstractmethod 

417 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

418 """Insert this query's results into a temporary table. 

419 

420 Returns 

421 ------- 

422 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

423 A context manager that ensures the temporary table is created and 

424 populated in ``__enter__`` (returning a results object backed by 

425 that table), and dropped in ``__exit__``. If ``self`` is already 

426 materialized, the context manager may do nothing (reflecting the 

427 fact that an outer context manager should already take care of 

428 everything else). 

429 """ 

430 raise NotImplementedError() 

431 

432 @abstractmethod 

433 def expanded(self) -> DatasetQueryResults: 

434 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

435 returns `True` for all data IDs in returned `DatasetRef` objects. 

436 

437 Returns 

438 ------- 

439 expanded : `DatasetQueryResults` 

440 Either a new `DatasetQueryResults` instance or ``self``, if it is 

441 already expanded. 

442 

443 Notes 

444 ----- 

445 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

446 to call `materialize` before expanding data IDs for very large result 

447 sets. 

448 """ 

449 raise NotImplementedError() 

450 

451 @abstractmethod 

452 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

453 """Count the number of rows this query would return. 

454 

455 Parameters 

456 ---------- 

457 exact : `bool`, optional 

458 If `True`, run the full query and perform post-query filtering if 

459 needed to account for that filtering in the count. If `False`, the 

460 result may be an upper bound. 

461 discard : `bool`, optional 

462 If `True`, compute the exact count even if it would require running 

463 the full query and then throwing away the result rows after 

464 counting them. If `False`, this is an error, as the user would 

465 usually be better off executing the query first to fetch its rows 

466 into a new query (or passing ``exact=False``). Ignored if 

467 ``exact=False``. 

468 

469 Returns 

470 ------- 

471 count : `int` 

472 The number of rows the query would return, or an upper bound if 

473 ``exact=False``. 

474 

475 Notes 

476 ----- 

477 This counts the number of rows returned, not the number of unique rows 

478 returned, so even with ``exact=True`` it may provide only an upper 

479 bound on the number of *deduplicated* result rows. 

480 """ 

481 raise NotImplementedError() 

482 

483 @abstractmethod 

484 def any( 

485 self, 

486 *, 

487 execute: bool = True, 

488 exact: bool = True, 

489 ) -> bool: 

490 """Test whether this query returns any results. 

491 

492 Parameters 

493 ---------- 

494 execute : `bool`, optional 

495 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

496 determined prior to execution that the query would return no rows. 

497 exact : `bool`, optional 

498 If `True`, run the full query and perform post-query filtering if 

499 needed, until at least one result row is found. If `False`, the 

500 returned result does not account for post-query filtering, and 

501 hence may be `True` even when all result rows would be filtered 

502 out. 

503 

504 Returns 

505 ------- 

506 any : `bool` 

507 `True` if the query would (or might, depending on arguments) yield 

508 result rows. `False` if it definitely would not. 

509 """ 

510 raise NotImplementedError() 

511 

512 @abstractmethod 

513 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

514 """Return human-readable messages that may help explain why the query 

515 yields no results. 

516 

517 Parameters 

518 ---------- 

519 execute : `bool`, optional 

520 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

521 of aspects of the tree to more precisely determine where rows were 

522 filtered out. 

523 

524 Returns 

525 ------- 

526 messages : `~collections.abc.Iterable` [ `str` ] 

527 String messages that describe reasons the query might not yield any 

528 results. 

529 """ 

530 raise NotImplementedError() 

531 

532 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

533 """Group results by dataset type. 

534 

535 This is a private hook for the interface defined by 

536 `DatasetRef.iter_by_type`, enabling much more efficient 

537 processing of heterogeneous `DatasetRef` iterables when they come 

538 directly from queries. 

539 """ 

540 for parent_results in self.byParentDatasetType(): 

541 for component in parent_results.components: 

542 dataset_type = parent_results.parentDatasetType 

543 if component is not None: 

544 dataset_type = dataset_type.makeComponentDatasetType(component) 

545 yield (dataset_type, parent_results.withComponents((component,))) 

546 

547 

548class ParentDatasetQueryResults(DatasetQueryResults): 

549 """An object that represents results from a query for datasets with a 

550 single parent `DatasetType`. 

551 

552 Parameters 

553 ---------- 

554 query : `Query` 

555 Low-level query object that backs these results. 

556 dataset_type : `DatasetType` 

557 Parent dataset type for all datasets returned by this query. 

558 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

559 Names of components to include in iteration. `None` may be included 

560 (at most once) to include the parent dataset type. 

561 

562 Notes 

563 ----- 

564 The `Query` class now implements essentially all of this class's 

565 functionality; "QueryResult" classes like this one now exist only to 

566 provide interface backwards compatibility and more specific iterator 

567 types. 

568 """ 

569 

570 def __init__( 

571 self, 

572 query: Query, 

573 dataset_type: DatasetType, 

574 components: Sequence[str | None] = (None,), 

575 ): 

576 self._query = query 

577 self._dataset_type = dataset_type 

578 self._components = components 

579 

580 __slots__ = ("_query", "_dataset_type", "_components") 

581 

582 def __iter__(self) -> Iterator[DatasetRef]: 

583 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

584 

585 def __repr__(self) -> str: 

586 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

587 

588 @property 

589 def components(self) -> Sequence[str | None]: 

590 """The components of the parent dataset type included in these results 

591 (`~collections.abc.Sequence` [ `str` or `None` ]). 

592 """ 

593 return self._components 

594 

595 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

596 # Docstring inherited from DatasetQueryResults. 

597 yield self 

598 

599 @contextmanager 

600 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

601 # Docstring inherited from DatasetQueryResults. 

602 with self._query.open_context(): 

603 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

604 

605 @property 

606 def parentDatasetType(self) -> DatasetType: 

607 """The parent dataset type for all datasets in this iterable 

608 (`DatasetType`). 

609 """ 

610 return self._dataset_type 

611 

612 @property 

613 def dataIds(self) -> DataCoordinateQueryResults: 

614 """A lazy-evaluation object representing a query for just the data 

615 IDs of the datasets that would be returned by this query 

616 (`DataCoordinateQueryResults`). 

617 

618 The returned object is not in general `zip`-iterable with ``self``; 

619 it may be in a different order or have (or not have) duplicates. 

620 """ 

621 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

622 

623 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

624 """Return a new query results object for the same parent datasets but 

625 different components. 

626 

627 components : `~collections.abc.Sequence` [ `str` or `None` ] 

628 Names of components to include in iteration. `None` may be 

629 included (at most once) to include the parent dataset type. 

630 """ 

631 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

632 

633 def expanded(self) -> ParentDatasetQueryResults: 

634 # Docstring inherited from DatasetQueryResults. 

635 return ParentDatasetQueryResults( 

636 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

637 ) 

638 

639 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

640 # Docstring inherited. 

641 return len(self._components) * self._query.count(exact=exact, discard=discard) 

642 

643 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

644 # Docstring inherited. 

645 return self._query.any(execute=execute, exact=exact) 

646 

647 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

648 # Docstring inherited. 

649 return self._query.explain_no_results(execute=execute) 

650 

651 

652class ChainedDatasetQueryResults(DatasetQueryResults): 

653 """A `DatasetQueryResults` implementation that simply chains together 

654 other results objects, each for a different parent dataset type. 

655 

656 Parameters 

657 ---------- 

658 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

659 The underlying results objects this object will chain together. 

660 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

661 A list of messages (appropriate for e.g. logging or exceptions) that 

662 explain why the query is known to return no results even before it is 

663 executed. Queries with a non-empty list will never be executed. 

664 Child results objects may also have their own list. 

665 """ 

666 

667 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

668 self._chain = chain 

669 self._doomed_by = tuple(doomed_by) 

670 

671 __slots__ = ("_chain",) 

672 

673 def __iter__(self) -> Iterator[DatasetRef]: 

674 return itertools.chain.from_iterable(self._chain) 

675 

676 def __repr__(self) -> str: 

677 return "<DatasetRef iterator for multiple dataset types>" 

678 

679 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

680 # Docstring inherited from DatasetQueryResults. 

681 return iter(self._chain) 

682 

683 @contextmanager 

684 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

685 # Docstring inherited from DatasetQueryResults. 

686 with ExitStack() as stack: 

687 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

688 

689 def expanded(self) -> ChainedDatasetQueryResults: 

690 # Docstring inherited from DatasetQueryResults. 

691 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

692 

693 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

694 # Docstring inherited. 

695 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

696 

697 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

698 # Docstring inherited. 

699 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

700 

701 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

702 # Docstring inherited. 

703 result = list(self._doomed_by) 

704 for r in self._chain: 

705 result.extend(r.explain_no_results(execute=execute)) 

706 return result 

707 

708 

709class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

710 """An interface for objects that represent the results of queries for 

711 dimension records. 

712 """ 

713 

714 @property 

715 @abstractmethod 

716 def element(self) -> DimensionElement: 

717 raise NotImplementedError() 

718 

719 @abstractmethod 

720 def run(self) -> DimensionRecordQueryResults: 

721 raise NotImplementedError() 

722 

723 @abstractmethod 

724 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

725 """Count the number of rows this query would return. 

726 

727 Parameters 

728 ---------- 

729 exact : `bool`, optional 

730 If `True`, run the full query and perform post-query filtering if 

731 needed to account for that filtering in the count. If `False`, the 

732 result may be an upper bound. 

733 discard : `bool`, optional 

734 If `True`, compute the exact count even if it would require running 

735 the full query and then throwing away the result rows after 

736 counting them. If `False`, this is an error, as the user would 

737 usually be better off executing the query first to fetch its rows 

738 into a new query (or passing ``exact=False``). Ignored if 

739 ``exact=False``. 

740 

741 Returns 

742 ------- 

743 count : `int` 

744 The number of rows the query would return, or an upper bound if 

745 ``exact=False``. 

746 

747 Notes 

748 ----- 

749 This counts the number of rows returned, not the number of unique rows 

750 returned, so even with ``exact=True`` it may provide only an upper 

751 bound on the number of *deduplicated* result rows. 

752 """ 

753 raise NotImplementedError() 

754 

755 @abstractmethod 

756 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

757 """Test whether this query returns any results. 

758 

759 Parameters 

760 ---------- 

761 execute : `bool`, optional 

762 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

763 determined prior to execution that the query would return no rows. 

764 exact : `bool`, optional 

765 If `True`, run the full query and perform post-query filtering if 

766 needed, until at least one result row is found. If `False`, the 

767 returned result does not account for post-query filtering, and 

768 hence may be `True` even when all result rows would be filtered 

769 out. 

770 

771 Returns 

772 ------- 

773 any : `bool` 

774 `True` if the query would (or might, depending on arguments) yield 

775 result rows. `False` if it definitely would not. 

776 """ 

777 raise NotImplementedError() 

778 

779 @abstractmethod 

780 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

781 """Make the iterator return ordered result. 

782 

783 Parameters 

784 ---------- 

785 *args : `str` 

786 Names of the columns/dimensions to use for ordering. Column name 

787 can be prefixed with minus (``-``) to use descending ordering. 

788 

789 Returns 

790 ------- 

791 result : `DimensionRecordQueryResults` 

792 Returns ``self`` instance which is updated to return ordered 

793 result. 

794 

795 Notes 

796 ----- 

797 This method can modify the iterator in place and return the same 

798 instance. 

799 """ 

800 raise NotImplementedError() 

801 

802 @abstractmethod 

803 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

804 """Make the iterator return limited number of records. 

805 

806 Parameters 

807 ---------- 

808 limit : `int` 

809 Upper limit on the number of returned records. 

810 offset : `int` or `None` 

811 The number of records to skip before returning at most ``limit`` 

812 records. `None` is interpreted the same as zero for backwards 

813 compatibility. 

814 

815 Returns 

816 ------- 

817 result : `DimensionRecordQueryResults` 

818 Returns ``self`` instance which is updated to return limited set of 

819 records. 

820 

821 Notes 

822 ----- 

823 This method can modify the iterator in place and return the same 

824 instance. Normally this method is used together with `order_by` method. 

825 """ 

826 raise NotImplementedError() 

827 

828 @abstractmethod 

829 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

830 """Return human-readable messages that may help explain why the query 

831 yields no results. 

832 

833 Parameters 

834 ---------- 

835 execute : `bool`, optional 

836 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

837 of aspects of the tree to more precisely determine where rows were 

838 filtered out. 

839 

840 Returns 

841 ------- 

842 messages : `~collections.abc.Iterable` [ `str` ] 

843 String messages that describe reasons the query might not yield any 

844 results. 

845 """ 

846 raise NotImplementedError() 

847 

848 

849class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

850 """Implementation of DimensionRecordQueryResults using database query. 

851 

852 Parameters 

853 ---------- 

854 query : `Query` 

855 Query object that backs this class. 

856 element : `DimensionElement` 

857 Element whose records this object returns. 

858 

859 Notes 

860 ----- 

861 The `Query` class now implements essentially all of this class's 

862 functionality; "QueryResult" classes like this one now exist only to 

863 provide interface backwards compatibility and more specific iterator 

864 types. 

865 """ 

866 

867 def __init__(self, query: Query, element: DimensionElement): 

868 self._query = query 

869 self._element = element 

870 

871 @property 

872 def element(self) -> DimensionElement: 

873 return self._element 

874 

875 def __iter__(self) -> Iterator[DimensionRecord]: 

876 return self._query.iter_dimension_records(self._element) 

877 

878 def run(self) -> DimensionRecordQueryResults: 

879 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

880 

881 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

882 # Docstring inherited from base class. 

883 return self._query.count(exact=exact) 

884 

885 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

886 # Docstring inherited from base class. 

887 return self._query.any(execute=execute, exact=exact) 

888 

889 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

890 # Docstring inherited from base class. 

891 clause = OrderByClause.parse_element(args, self._element) 

892 self._query = self._query.sorted(clause.terms, defer=True) 

893 return self 

894 

895 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

896 # Docstring inherited from base class. 

897 if offset is None: 

898 offset = 0 

899 self._query = self._query.sliced(offset, offset + limit, defer=True) 

900 return self 

901 

902 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

903 # Docstring inherited. 

904 return self._query.explain_no_results(execute=execute)