Coverage for python/lsst/daf/butler/registry/queries/_results.py: 48%

182 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-23 11:08 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "ChainedDatasetQueryResults", 

25 "DatabaseDimensionRecordQueryResults", 

26 "DataCoordinateQueryResults", 

27 "DatasetQueryResults", 

28 "DimensionRecordQueryResults", 

29 "ParentDatasetQueryResults", 

30) 

31 

32import itertools 

33from abc import abstractmethod 

34from collections.abc import Iterable, Iterator, Sequence 

35from contextlib import AbstractContextManager, ExitStack, contextmanager 

36from typing import Any 

37 

38from ...core import ( 

39 DataCoordinate, 

40 DataCoordinateIterable, 

41 DatasetRef, 

42 DatasetType, 

43 DimensionElement, 

44 DimensionGraph, 

45 DimensionRecord, 

46) 

47from ._query import Query 

48from ._structs import OrderByClause 

49 

50 

51class DataCoordinateQueryResults(DataCoordinateIterable): 

52 """An enhanced implementation of `DataCoordinateIterable` that represents 

53 data IDs retrieved from a database query. 

54 

55 Parameters 

56 ---------- 

57 query : `Query` 

58 Query object that backs this class. 

59 

60 Notes 

61 ----- 

62 The `Query` class now implements essentially all of this class's 

63 functionality; "QueryResult" classes like this one now exist only to 

64 provide interface backwards compatibility and more specific iterator 

65 types. 

66 """ 

67 

68 def __init__(self, query: Query): 

69 self._query = query 

70 

71 __slots__ = ("_query",) 

72 

73 def __iter__(self) -> Iterator[DataCoordinate]: 

74 return self._query.iter_data_ids() 

75 

76 def __repr__(self) -> str: 

77 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

78 

79 @property 

80 def graph(self) -> DimensionGraph: 

81 # Docstring inherited from DataCoordinateIterable. 

82 return self._query.dimensions 

83 

84 def hasFull(self) -> bool: 

85 # Docstring inherited from DataCoordinateIterable. 

86 return True 

87 

88 def hasRecords(self) -> bool: 

89 # Docstring inherited from DataCoordinateIterable. 

90 return self._query.has_record_columns is True or not self.graph 

91 

92 @contextmanager 

93 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

94 """Insert this query's results into a temporary table. 

95 

96 Returns 

97 ------- 

98 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

99 A context manager that ensures the temporary table is created and 

100 populated in ``__enter__`` (returning a results object backed by 

101 that table), and dropped in ``__exit__``. If ``self`` is already 

102 materialized, the context manager may do nothing (reflecting the 

103 fact that an outer context manager should already take care of 

104 everything else). 

105 

106 Notes 

107 ----- 

108 When using a very large result set to perform multiple queries (e.g. 

109 multiple calls to `subset` with different arguments, or even a single 

110 call to `expanded`), it may be much more efficient to start by 

111 materializing the query and only then performing the follow up queries. 

112 It may also be less efficient, depending on how well database engine's 

113 query optimizer can simplify those particular follow-up queries and 

114 how efficiently it caches query results even when the are not 

115 explicitly inserted into a temporary table. See `expanded` and 

116 `subset` for examples. 

117 """ 

118 with self._query.open_context(): 

119 yield DataCoordinateQueryResults(self._query.materialized()) 

120 

121 def expanded(self) -> DataCoordinateQueryResults: 

122 """Return a results object for which `hasRecords` returns `True`. 

123 

124 This method may involve actually executing database queries to fetch 

125 `DimensionRecord` objects. 

126 

127 Returns 

128 ------- 

129 results : `DataCoordinateQueryResults` 

130 A results object for which `hasRecords` returns `True`. May be 

131 ``self`` if that is already the case. 

132 

133 Notes 

134 ----- 

135 For very result sets, it may be much more efficient to call 

136 `materialize` before calling `expanded`, to avoid performing the 

137 original query multiple times (as a subquery) in the follow-up queries 

138 that fetch dimension records. For example:: 

139 

140 with registry.queryDataIds(...).materialize() as tempDataIds: 

141 dataIdsWithRecords = tempDataIds.expanded() 

142 for dataId in dataIdsWithRecords: 

143 ... 

144 """ 

145 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

146 

147 def subset( 

148 self, graph: DimensionGraph | None = None, *, unique: bool = False 

149 ) -> DataCoordinateQueryResults: 

150 """Return a results object containing a subset of the dimensions of 

151 this one, and/or a unique near-subset of its rows. 

152 

153 This method may involve actually executing database queries to fetch 

154 `DimensionRecord` objects. 

155 

156 Parameters 

157 ---------- 

158 graph : `DimensionGraph`, optional 

159 Dimensions to include in the new results object. If `None`, 

160 ``self.graph`` is used. 

161 unique : `bool`, optional 

162 If `True` (`False` is default), the query should only return unique 

163 data IDs. This is implemented in the database; to obtain unique 

164 results via Python-side processing (which may be more efficient in 

165 some cases), use `toSet` to construct a `DataCoordinateSet` from 

166 this results object instead. 

167 

168 Returns 

169 ------- 

170 results : `DataCoordinateQueryResults` 

171 A results object corresponding to the given criteria. May be 

172 ``self`` if it already qualifies. 

173 

174 Raises 

175 ------ 

176 ValueError 

177 Raised when ``graph`` is not a subset of the dimension graph in 

178 this result. 

179 

180 Notes 

181 ----- 

182 This method can only return a "near-subset" of the original result rows 

183 in general because of subtleties in how spatial overlaps are 

184 implemented; see `Query.projected` for more information. 

185 

186 When calling `subset` multiple times on the same very large result set, 

187 it may be much more efficient to call `materialize` first. For 

188 example:: 

189 

190 dimensions1 = DimensionGraph(...) 

191 dimensions2 = DimensionGraph(...) 

192 with registry.queryDataIds(...).materialize() as tempDataIds: 

193 for dataId1 in tempDataIds.subset( 

194 graph=dimensions1, 

195 unique=True): 

196 ... 

197 for dataId2 in tempDataIds.subset( 

198 graph=dimensions2, 

199 unique=True): 

200 ... 

201 """ 

202 if graph is None: 

203 graph = self.graph 

204 if not graph.issubset(self.graph): 

205 raise ValueError(f"{graph} is not a subset of {self.graph}") 

206 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True) 

207 return DataCoordinateQueryResults(query) 

208 

209 def findDatasets( 

210 self, 

211 datasetType: Any, 

212 collections: Any, 

213 *, 

214 findFirst: bool = True, 

215 components: bool | None = None, 

216 ) -> DatasetQueryResults: 

217 """Find datasets using the data IDs identified by this query. 

218 

219 Parameters 

220 ---------- 

221 datasetType : `DatasetType` or `str` 

222 Dataset type or the name of one to search for. Must have 

223 dimensions that are a subset of ``self.graph``. 

224 collections : `Any` 

225 An expression that fully or partially identifies the collections 

226 to search for the dataset, such as a `str`, `re.Pattern`, or 

227 iterable thereof. ``...`` can be used to return all collections. 

228 See :ref:`daf_butler_collection_expressions` for more information. 

229 findFirst : `bool`, optional 

230 If `True` (default), for each result data ID, only yield one 

231 `DatasetRef`, from the first collection in which a dataset of that 

232 dataset type appears (according to the order of ``collections`` 

233 passed in). If `True`, ``collections`` must not contain regular 

234 expressions and may not be ``...``. 

235 components : `bool`, optional 

236 If `True`, apply all expression patterns to component dataset type 

237 names as well. If `False`, never apply patterns to components. If 

238 `None` (default), apply patterns to components only if their parent 

239 datasets were not matched by the expression. Fully-specified 

240 component datasets (`str` or `DatasetType` instances) are always 

241 included. 

242 

243 Values other than `False` are deprecated, and only `False` will be 

244 supported after v26. After v27 this argument will be removed 

245 entirely. 

246 

247 Returns 

248 ------- 

249 datasets : `ParentDatasetQueryResults` 

250 A lazy-evaluation object representing dataset query results, 

251 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

252 nested data IDs in those dataset references will have records as 

253 well. 

254 

255 Raises 

256 ------ 

257 ValueError 

258 Raised if ``datasetType.dimensions.issubset(self.graph) is False``. 

259 MissingDatasetTypeError 

260 Raised if the given dataset type is not registered. 

261 """ 

262 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

263 datasetType, components=components, explicit_only=True 

264 ) 

265 return ParentDatasetQueryResults( 

266 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

267 parent_dataset_type, 

268 components_found, 

269 ) 

270 

271 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

272 """Count the number of rows this query would return. 

273 

274 Parameters 

275 ---------- 

276 exact : `bool`, optional 

277 If `True`, run the full query and perform post-query filtering if 

278 needed to account for that filtering in the count. If `False`, the 

279 result may be an upper bound. 

280 discard : `bool`, optional 

281 If `True`, compute the exact count even if it would require running 

282 the full query and then throwing away the result rows after 

283 counting them. If `False`, this is an error, as the user would 

284 usually be better off executing the query first to fetch its rows 

285 into a new query (or passing ``exact=False``). Ignored if 

286 ``exact=False``. 

287 

288 Returns 

289 ------- 

290 count : `int` 

291 The number of rows the query would return, or an upper bound if 

292 ``exact=False``. 

293 

294 Notes 

295 ----- 

296 This counts the number of rows returned, not the number of unique rows 

297 returned, so even with ``exact=True`` it may provide only an upper 

298 bound on the number of *deduplicated* result rows. 

299 """ 

300 return self._query.count(exact=exact, discard=discard) 

301 

302 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

303 """Test whether this query returns any results. 

304 

305 Parameters 

306 ---------- 

307 execute : `bool`, optional 

308 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

309 determined prior to execution that the query would return no rows. 

310 exact : `bool`, optional 

311 If `True`, run the full query and perform post-query filtering if 

312 needed, until at least one result row is found. If `False`, the 

313 returned result does not account for post-query filtering, and 

314 hence may be `True` even when all result rows would be filtered 

315 out. 

316 

317 Returns 

318 ------- 

319 any : `bool` 

320 `True` if the query would (or might, depending on arguments) yield 

321 result rows. `False` if it definitely would not. 

322 """ 

323 return self._query.any(execute=execute, exact=exact) 

324 

325 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

326 """Return human-readable messages that may help explain why the query 

327 yields no results. 

328 

329 Parameters 

330 ---------- 

331 execute : `bool`, optional 

332 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

333 of aspects of the tree to more precisely determine where rows were 

334 filtered out. 

335 

336 Returns 

337 ------- 

338 messages : `Iterable` [ `str` ] 

339 String messages that describe reasons the query might not yield any 

340 results. 

341 """ 

342 return self._query.explain_no_results(execute=execute) 

343 

344 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

345 """Make the iterator return ordered results. 

346 

347 Parameters 

348 ---------- 

349 *args : `str` 

350 Names of the columns/dimensions to use for ordering. Column name 

351 can be prefixed with minus (``-``) to use descending ordering. 

352 

353 Returns 

354 ------- 

355 result : `DataCoordinateQueryResults` 

356 Returns ``self`` instance which is updated to return ordered 

357 result. 

358 

359 Notes 

360 ----- 

361 This method modifies the iterator in place and returns the same 

362 instance to support method chaining. 

363 """ 

364 clause = OrderByClause.parse_general(args, self._query.dimensions) 

365 self._query = self._query.sorted(clause.terms, defer=True) 

366 return self 

367 

368 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

369 """Make the iterator return limited number of records. 

370 

371 Parameters 

372 ---------- 

373 limit : `int` 

374 Upper limit on the number of returned records. 

375 offset : `int` or `None`, optional 

376 The number of records to skip before returning at most ``limit`` 

377 records. `None` is interpreted the same as zero for backwards 

378 compatibility. 

379 

380 Returns 

381 ------- 

382 result : `DataCoordinateQueryResults` 

383 Returns ``self`` instance which is updated to return limited set 

384 of records. 

385 

386 Notes 

387 ----- 

388 This method modifies the iterator in place and returns the same 

389 instance to support method chaining. Normally this method is used 

390 together with `order_by` method. 

391 """ 

392 if offset is None: 

393 offset = 0 

394 self._query = self._query.sliced(offset, offset + limit, defer=True) 

395 return self 

396 

397 

398class DatasetQueryResults(Iterable[DatasetRef]): 

399 """An interface for objects that represent the results of queries for 

400 datasets. 

401 """ 

402 

403 @abstractmethod 

404 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

405 """Group results by parent dataset type. 

406 

407 Returns 

408 ------- 

409 iter : `Iterator` [ `ParentDatasetQueryResults` ] 

410 An iterator over `DatasetQueryResults` instances that are each 

411 responsible for a single parent dataset type (either just that 

412 dataset type, one or more of its component dataset types, or both). 

413 """ 

414 raise NotImplementedError() 

415 

416 @abstractmethod 

417 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

418 """Insert this query's results into a temporary table. 

419 

420 Returns 

421 ------- 

422 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

423 A context manager that ensures the temporary table is created and 

424 populated in ``__enter__`` (returning a results object backed by 

425 that table), and dropped in ``__exit__``. If ``self`` is already 

426 materialized, the context manager may do nothing (reflecting the 

427 fact that an outer context manager should already take care of 

428 everything else). 

429 """ 

430 raise NotImplementedError() 

431 

432 @abstractmethod 

433 def expanded(self) -> DatasetQueryResults: 

434 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

435 returns `True` for all data IDs in returned `DatasetRef` objects. 

436 

437 Returns 

438 ------- 

439 expanded : `DatasetQueryResults` 

440 Either a new `DatasetQueryResults` instance or ``self``, if it is 

441 already expanded. 

442 

443 Notes 

444 ----- 

445 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

446 to call `materialize` before expanding data IDs for very large result 

447 sets. 

448 """ 

449 raise NotImplementedError() 

450 

451 @abstractmethod 

452 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

453 """Count the number of rows this query would return. 

454 

455 Parameters 

456 ---------- 

457 exact : `bool`, optional 

458 If `True`, run the full query and perform post-query filtering if 

459 needed to account for that filtering in the count. If `False`, the 

460 result may be an upper bound. 

461 discard : `bool`, optional 

462 If `True`, compute the exact count even if it would require running 

463 the full query and then throwing away the result rows after 

464 counting them. If `False`, this is an error, as the user would 

465 usually be better off executing the query first to fetch its rows 

466 into a new query (or passing ``exact=False``). Ignored if 

467 ``exact=False``. 

468 

469 Returns 

470 ------- 

471 count : `int` 

472 The number of rows the query would return, or an upper bound if 

473 ``exact=False``. 

474 

475 Notes 

476 ----- 

477 This counts the number of rows returned, not the number of unique rows 

478 returned, so even with ``exact=True`` it may provide only an upper 

479 bound on the number of *deduplicated* result rows. 

480 """ 

481 raise NotImplementedError() 

482 

483 @abstractmethod 

484 def any( 

485 self, 

486 *, 

487 execute: bool = True, 

488 exact: bool = True, 

489 ) -> bool: 

490 """Test whether this query returns any results. 

491 

492 Parameters 

493 ---------- 

494 execute : `bool`, optional 

495 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

496 determined prior to execution that the query would return no rows. 

497 exact : `bool`, optional 

498 If `True`, run the full query and perform post-query filtering if 

499 needed, until at least one result row is found. If `False`, the 

500 returned result does not account for post-query filtering, and 

501 hence may be `True` even when all result rows would be filtered 

502 out. 

503 

504 Returns 

505 ------- 

506 any : `bool` 

507 `True` if the query would (or might, depending on arguments) yield 

508 result rows. `False` if it definitely would not. 

509 """ 

510 raise NotImplementedError() 

511 

512 @abstractmethod 

513 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

514 """Return human-readable messages that may help explain why the query 

515 yields no results. 

516 

517 Parameters 

518 ---------- 

519 execute : `bool`, optional 

520 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

521 of aspects of the tree to more precisely determine where rows were 

522 filtered out. 

523 

524 Returns 

525 ------- 

526 messages : `Iterable` [ `str` ] 

527 String messages that describe reasons the query might not yield any 

528 results. 

529 """ 

530 raise NotImplementedError() 

531 

532 

533class ParentDatasetQueryResults(DatasetQueryResults): 

534 """An object that represents results from a query for datasets with a 

535 single parent `DatasetType`. 

536 

537 Parameters 

538 ---------- 

539 query : `Query` 

540 Low-level query object that backs these results. 

541 dataset_type : `DatasetType` 

542 Parent dataset type for all datasets returned by this query. 

543 components : `Sequence` [ `str` or `None` ], optional 

544 Names of components to include in iteration. `None` may be included 

545 (at most once) to include the parent dataset type. 

546 

547 Notes 

548 ----- 

549 The `Query` class now implements essentially all of this class's 

550 functionality; "QueryResult" classes like this one now exist only to 

551 provide interface backwards compatibility and more specific iterator 

552 types. 

553 """ 

554 

555 def __init__( 

556 self, 

557 query: Query, 

558 dataset_type: DatasetType, 

559 components: Sequence[str | None] = (None,), 

560 ): 

561 self._query = query 

562 self._dataset_type = dataset_type 

563 self._components = components 

564 

565 __slots__ = ("_query", "_dataset_type", "_components") 

566 

567 def __iter__(self) -> Iterator[DatasetRef]: 

568 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

569 

570 def __repr__(self) -> str: 

571 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

572 

573 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

574 # Docstring inherited from DatasetQueryResults. 

575 yield self 

576 

577 @contextmanager 

578 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

579 # Docstring inherited from DatasetQueryResults. 

580 with self._query.open_context(): 

581 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

582 

583 @property 

584 def parentDatasetType(self) -> DatasetType: 

585 """The parent dataset type for all datasets in this iterable 

586 (`DatasetType`). 

587 """ 

588 return self._dataset_type 

589 

590 @property 

591 def dataIds(self) -> DataCoordinateQueryResults: 

592 """A lazy-evaluation object representing a query for just the data 

593 IDs of the datasets that would be returned by this query 

594 (`DataCoordinateQueryResults`). 

595 

596 The returned object is not in general `zip`-iterable with ``self``; 

597 it may be in a different order or have (or not have) duplicates. 

598 """ 

599 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

600 

601 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

602 """Return a new query results object for the same parent datasets but 

603 different components. 

604 

605 components : `Sequence` [ `str` or `None` ] 

606 Names of components to include in iteration. `None` may be 

607 included (at most once) to include the parent dataset type. 

608 """ 

609 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

610 

611 def expanded(self) -> ParentDatasetQueryResults: 

612 # Docstring inherited from DatasetQueryResults. 

613 return ParentDatasetQueryResults( 

614 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

615 ) 

616 

617 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

618 # Docstring inherited. 

619 return len(self._components) * self._query.count(exact=exact, discard=discard) 

620 

621 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

622 # Docstring inherited. 

623 return self._query.any(execute=execute, exact=exact) 

624 

625 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

626 # Docstring inherited. 

627 return self._query.explain_no_results(execute=execute) 

628 

629 

630class ChainedDatasetQueryResults(DatasetQueryResults): 

631 """A `DatasetQueryResults` implementation that simply chains together 

632 other results objects, each for a different parent dataset type. 

633 

634 Parameters 

635 ---------- 

636 chain : `Sequence` [ `ParentDatasetQueryResults` ] 

637 The underlying results objects this object will chain together. 

638 doomed_by : `Iterable` [ `str` ], optional 

639 A list of messages (appropriate for e.g. logging or exceptions) that 

640 explain why the query is known to return no results even before it is 

641 executed. Queries with a non-empty list will never be executed. 

642 Child results objects may also have their own list. 

643 """ 

644 

645 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

646 self._chain = chain 

647 self._doomed_by = tuple(doomed_by) 

648 

649 __slots__ = ("_chain",) 

650 

651 def __iter__(self) -> Iterator[DatasetRef]: 

652 return itertools.chain.from_iterable(self._chain) 

653 

654 def __repr__(self) -> str: 

655 return "<DatasetRef iterator for multiple dataset types>" 

656 

657 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

658 # Docstring inherited from DatasetQueryResults. 

659 return iter(self._chain) 

660 

661 @contextmanager 

662 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

663 # Docstring inherited from DatasetQueryResults. 

664 with ExitStack() as stack: 

665 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

666 

667 def expanded(self) -> ChainedDatasetQueryResults: 

668 # Docstring inherited from DatasetQueryResults. 

669 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

670 

671 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

672 # Docstring inherited. 

673 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

674 

675 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

676 # Docstring inherited. 

677 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

678 

679 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

680 # Docstring inherited. 

681 result = list(self._doomed_by) 

682 for r in self._chain: 

683 result.extend(r.explain_no_results(execute=execute)) 

684 return result 

685 

686 

687class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

688 """An interface for objects that represent the results of queries for 

689 dimension records. 

690 """ 

691 

692 @property 

693 @abstractmethod 

694 def element(self) -> DimensionElement: 

695 raise NotImplementedError() 

696 

697 @abstractmethod 

698 def run(self) -> DimensionRecordQueryResults: 

699 raise NotImplementedError() 

700 

701 @abstractmethod 

702 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

703 """Count the number of rows this query would return. 

704 

705 Parameters 

706 ---------- 

707 exact : `bool`, optional 

708 If `True`, run the full query and perform post-query filtering if 

709 needed to account for that filtering in the count. If `False`, the 

710 result may be an upper bound. 

711 discard : `bool`, optional 

712 If `True`, compute the exact count even if it would require running 

713 the full query and then throwing away the result rows after 

714 counting them. If `False`, this is an error, as the user would 

715 usually be better off executing the query first to fetch its rows 

716 into a new query (or passing ``exact=False``). Ignored if 

717 ``exact=False``. 

718 

719 Returns 

720 ------- 

721 count : `int` 

722 The number of rows the query would return, or an upper bound if 

723 ``exact=False``. 

724 

725 Notes 

726 ----- 

727 This counts the number of rows returned, not the number of unique rows 

728 returned, so even with ``exact=True`` it may provide only an upper 

729 bound on the number of *deduplicated* result rows. 

730 """ 

731 raise NotImplementedError() 

732 

733 @abstractmethod 

734 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

735 """Test whether this query returns any results. 

736 

737 Parameters 

738 ---------- 

739 execute : `bool`, optional 

740 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

741 determined prior to execution that the query would return no rows. 

742 exact : `bool`, optional 

743 If `True`, run the full query and perform post-query filtering if 

744 needed, until at least one result row is found. If `False`, the 

745 returned result does not account for post-query filtering, and 

746 hence may be `True` even when all result rows would be filtered 

747 out. 

748 

749 Returns 

750 ------- 

751 any : `bool` 

752 `True` if the query would (or might, depending on arguments) yield 

753 result rows. `False` if it definitely would not. 

754 """ 

755 raise NotImplementedError() 

756 

757 @abstractmethod 

758 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

759 """Make the iterator return ordered result. 

760 

761 Parameters 

762 ---------- 

763 *args : `str` 

764 Names of the columns/dimensions to use for ordering. Column name 

765 can be prefixed with minus (``-``) to use descending ordering. 

766 

767 Returns 

768 ------- 

769 result : `DimensionRecordQueryResults` 

770 Returns ``self`` instance which is updated to return ordered 

771 result. 

772 

773 Notes 

774 ----- 

775 This method can modify the iterator in place and return the same 

776 instance. 

777 """ 

778 raise NotImplementedError() 

779 

780 @abstractmethod 

781 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

782 """Make the iterator return limited number of records. 

783 

784 Parameters 

785 ---------- 

786 limit : `int` 

787 Upper limit on the number of returned records. 

788 offset : `int` or `None` 

789 The number of records to skip before returning at most ``limit`` 

790 records. `None` is interpreted the same as zero for backwards 

791 compatibility. 

792 

793 Returns 

794 ------- 

795 result : `DimensionRecordQueryResults` 

796 Returns ``self`` instance which is updated to return limited set of 

797 records. 

798 

799 Notes 

800 ----- 

801 This method can modify the iterator in place and return the same 

802 instance. Normally this method is used together with `order_by` method. 

803 """ 

804 raise NotImplementedError() 

805 

806 @abstractmethod 

807 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

808 """Return human-readable messages that may help explain why the query 

809 yields no results. 

810 

811 Parameters 

812 ---------- 

813 execute : `bool`, optional 

814 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

815 of aspects of the tree to more precisely determine where rows were 

816 filtered out. 

817 

818 Returns 

819 ------- 

820 messages : `Iterable` [ `str` ] 

821 String messages that describe reasons the query might not yield any 

822 results. 

823 """ 

824 raise NotImplementedError() 

825 

826 

827class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

828 """Implementation of DimensionRecordQueryResults using database query. 

829 

830 Parameters 

831 ---------- 

832 query : `Query` 

833 Query object that backs this class. 

834 element : `DimensionElement` 

835 Element whose records this object returns. 

836 

837 Notes 

838 ----- 

839 The `Query` class now implements essentially all of this class's 

840 functionality; "QueryResult" classes like this one now exist only to 

841 provide interface backwards compatibility and more specific iterator 

842 types. 

843 """ 

844 

845 def __init__(self, query: Query, element: DimensionElement): 

846 self._query = query 

847 self._element = element 

848 

849 @property 

850 def element(self) -> DimensionElement: 

851 return self._element 

852 

853 def __iter__(self) -> Iterator[DimensionRecord]: 

854 return self._query.iter_dimension_records(self._element) 

855 

856 def run(self) -> DimensionRecordQueryResults: 

857 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

858 

859 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

860 # Docstring inherited from base class. 

861 return self._query.count(exact=exact) 

862 

863 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

864 # Docstring inherited from base class. 

865 return self._query.any(execute=execute, exact=exact) 

866 

867 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

868 # Docstring inherited from base class. 

869 clause = OrderByClause.parse_element(args, self._element) 

870 self._query = self._query.sorted(clause.terms, defer=True) 

871 return self 

872 

873 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

874 # Docstring inherited from base class. 

875 if offset is None: 

876 offset = 0 

877 self._query = self._query.sliced(offset, offset + limit, defer=True) 

878 return self 

879 

880 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

881 # Docstring inherited. 

882 return self._query.explain_no_results(execute=execute)