Coverage for python/lsst/daf/butler/registry/queries/_results.py: 56%

185 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-02 09:34 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "ChainedDatasetQueryResults", 

25 "DatabaseDimensionRecordQueryResults", 

26 "DataCoordinateQueryResults", 

27 "DatasetQueryResults", 

28 "DimensionRecordQueryResults", 

29 "ParentDatasetQueryResults", 

30) 

31 

32import itertools 

33from abc import abstractmethod 

34from collections.abc import Iterable, Iterator, Sequence 

35from contextlib import AbstractContextManager, ExitStack, contextmanager 

36from typing import Any 

37 

38from ...core import ( 

39 DataCoordinate, 

40 DataCoordinateIterable, 

41 DatasetRef, 

42 DatasetType, 

43 DimensionElement, 

44 DimensionGraph, 

45 DimensionRecord, 

46) 

47from ._query import Query 

48from ._structs import OrderByClause 

49 

50 

51class DataCoordinateQueryResults(DataCoordinateIterable): 

52 """An enhanced implementation of `DataCoordinateIterable` that represents 

53 data IDs retrieved from a database query. 

54 

55 Parameters 

56 ---------- 

57 query : `Query` 

58 Query object that backs this class. 

59 

60 Notes 

61 ----- 

62 The `Query` class now implements essentially all of this class's 

63 functionality; "QueryResult" classes like this one now exist only to 

64 provide interface backwards compatibility and more specific iterator 

65 types. 

66 """ 

67 

68 def __init__(self, query: Query): 

69 self._query = query 

70 

71 __slots__ = ("_query",) 

72 

73 def __iter__(self) -> Iterator[DataCoordinate]: 

74 return self._query.iter_data_ids() 

75 

76 def __repr__(self) -> str: 

77 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

78 

79 @property 

80 def graph(self) -> DimensionGraph: 

81 # Docstring inherited from DataCoordinateIterable. 

82 return self._query.dimensions 

83 

84 def hasFull(self) -> bool: 

85 # Docstring inherited from DataCoordinateIterable. 

86 return True 

87 

88 def hasRecords(self) -> bool: 

89 # Docstring inherited from DataCoordinateIterable. 

90 return self._query.has_record_columns is True or not self.graph 

91 

92 @contextmanager 

93 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

94 """Insert this query's results into a temporary table. 

95 

96 Returns 

97 ------- 

98 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

99 A context manager that ensures the temporary table is created and 

100 populated in ``__enter__`` (returning a results object backed by 

101 that table), and dropped in ``__exit__``. If ``self`` is already 

102 materialized, the context manager may do nothing (reflecting the 

103 fact that an outer context manager should already take care of 

104 everything else). 

105 

106 Notes 

107 ----- 

108 When using a very large result set to perform multiple queries (e.g. 

109 multiple calls to `subset` with different arguments, or even a single 

110 call to `expanded`), it may be much more efficient to start by 

111 materializing the query and only then performing the follow up queries. 

112 It may also be less efficient, depending on how well database engine's 

113 query optimizer can simplify those particular follow-up queries and 

114 how efficiently it caches query results even when the are not 

115 explicitly inserted into a temporary table. See `expanded` and 

116 `subset` for examples. 

117 """ 

118 with self._query.open_context(): 

119 yield DataCoordinateQueryResults(self._query.materialized()) 

120 

121 def expanded(self) -> DataCoordinateQueryResults: 

122 """Return a results object for which `hasRecords` returns `True`. 

123 

124 This method may involve actually executing database queries to fetch 

125 `DimensionRecord` objects. 

126 

127 Returns 

128 ------- 

129 results : `DataCoordinateQueryResults` 

130 A results object for which `hasRecords` returns `True`. May be 

131 ``self`` if that is already the case. 

132 

133 Notes 

134 ----- 

135 For very result sets, it may be much more efficient to call 

136 `materialize` before calling `expanded`, to avoid performing the 

137 original query multiple times (as a subquery) in the follow-up queries 

138 that fetch dimension records. For example:: 

139 

140 with registry.queryDataIds(...).materialize() as tempDataIds: 

141 dataIdsWithRecords = tempDataIds.expanded() 

142 for dataId in dataIdsWithRecords: 

143 ... 

144 """ 

145 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

146 

147 def subset( 

148 self, graph: DimensionGraph | None = None, *, unique: bool = False 

149 ) -> DataCoordinateQueryResults: 

150 """Return a results object containing a subset of the dimensions of 

151 this one, and/or a unique near-subset of its rows. 

152 

153 This method may involve actually executing database queries to fetch 

154 `DimensionRecord` objects. 

155 

156 Parameters 

157 ---------- 

158 graph : `DimensionGraph`, optional 

159 Dimensions to include in the new results object. If `None`, 

160 ``self.graph`` is used. 

161 unique : `bool`, optional 

162 If `True` (`False` is default), the query should only return unique 

163 data IDs. This is implemented in the database; to obtain unique 

164 results via Python-side processing (which may be more efficient in 

165 some cases), use `toSet` to construct a `DataCoordinateSet` from 

166 this results object instead. 

167 

168 Returns 

169 ------- 

170 results : `DataCoordinateQueryResults` 

171 A results object corresponding to the given criteria. May be 

172 ``self`` if it already qualifies. 

173 

174 Raises 

175 ------ 

176 ValueError 

177 Raised when ``graph`` is not a subset of the dimension graph in 

178 this result. 

179 

180 Notes 

181 ----- 

182 This method can only return a "near-subset" of the original result rows 

183 in general because of subtleties in how spatial overlaps are 

184 implemented; see `Query.projected` for more information. 

185 

186 When calling `subset` multiple times on the same very large result set, 

187 it may be much more efficient to call `materialize` first. For 

188 example:: 

189 

190 dimensions1 = DimensionGraph(...) 

191 dimensions2 = DimensionGraph(...) 

192 with registry.queryDataIds(...).materialize() as tempDataIds: 

193 for dataId1 in tempDataIds.subset( 

194 graph=dimensions1, 

195 unique=True): 

196 ... 

197 for dataId2 in tempDataIds.subset( 

198 graph=dimensions2, 

199 unique=True): 

200 ... 

201 """ 

202 if graph is None: 

203 graph = self.graph 

204 if not graph.issubset(self.graph): 

205 raise ValueError(f"{graph} is not a subset of {self.graph}") 

206 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True) 

207 return DataCoordinateQueryResults(query) 

208 

209 def findDatasets( 

210 self, 

211 datasetType: Any, 

212 collections: Any, 

213 *, 

214 findFirst: bool = True, 

215 components: bool | None = None, 

216 ) -> DatasetQueryResults: 

217 """Find datasets using the data IDs identified by this query. 

218 

219 Parameters 

220 ---------- 

221 datasetType : `DatasetType` or `str` 

222 Dataset type or the name of one to search for. Must have 

223 dimensions that are a subset of ``self.graph``. 

224 collections : `Any` 

225 An expression that fully or partially identifies the collections 

226 to search for the dataset, such as a `str`, `re.Pattern`, or 

227 iterable thereof. ``...`` can be used to return all collections. 

228 See :ref:`daf_butler_collection_expressions` for more information. 

229 findFirst : `bool`, optional 

230 If `True` (default), for each result data ID, only yield one 

231 `DatasetRef`, from the first collection in which a dataset of that 

232 dataset type appears (according to the order of ``collections`` 

233 passed in). If `True`, ``collections`` must not contain regular 

234 expressions and may not be ``...``. 

235 components : `bool`, optional 

236 If `True`, apply all expression patterns to component dataset type 

237 names as well. If `False`, never apply patterns to components. If 

238 `None` (default), apply patterns to components only if their parent 

239 datasets were not matched by the expression. Fully-specified 

240 component datasets (`str` or `DatasetType` instances) are always 

241 included. 

242 

243 Values other than `False` are deprecated, and only `False` will be 

244 supported after v26. After v27 this argument will be removed 

245 entirely. 

246 

247 Returns 

248 ------- 

249 datasets : `ParentDatasetQueryResults` 

250 A lazy-evaluation object representing dataset query results, 

251 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

252 nested data IDs in those dataset references will have records as 

253 well. 

254 

255 Raises 

256 ------ 

257 MissingDatasetTypeError 

258 Raised if the given dataset type is not registered. 

259 """ 

260 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

261 datasetType, components=components, explicit_only=True 

262 ) 

263 return ParentDatasetQueryResults( 

264 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

265 parent_dataset_type, 

266 components_found, 

267 ) 

268 

269 def findRelatedDatasets( 

270 self, 

271 datasetType: DatasetType | str, 

272 collections: Any, 

273 *, 

274 findFirst: bool = True, 

275 dimensions: DimensionGraph | None = None, 

276 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: 

277 """Find datasets using the data IDs identified by this query, and 

278 return them along with the original data IDs. 

279 

280 This is a variant of `findDatasets` that is often more useful when 

281 the target dataset type does not have all of the dimensions of the 

282 original data ID query, as is generally the case with calibration 

283 lookups. 

284 

285 Parameters 

286 ---------- 

287 datasetType : `DatasetType` or `str` 

288 Dataset type or the name of one to search for. Must have 

289 dimensions that are a subset of ``self.graph``. 

290 collections : `Any` 

291 An expression that fully or partially identifies the collections 

292 to search for the dataset, such as a `str`, `re.Pattern`, or 

293 iterable thereof. ``...`` can be used to return all collections. 

294 See :ref:`daf_butler_collection_expressions` for more information. 

295 findFirst : `bool`, optional 

296 If `True` (default), for each data ID in ``self``, only yield one 

297 `DatasetRef`, from the first collection in which a dataset of that 

298 dataset type appears (according to the order of ``collections`` 

299 passed in). If `True`, ``collections`` must not contain regular 

300 expressions and may not be ``...``. Note that this is not the 

301 same as yielding one `DatasetRef` for each yielded data ID if 

302 ``dimensions`` is not `None`. 

303 dimensions : `DimensionGraph`, optional 

304 The dimensions of the data IDs returned. Must be a subset of 

305 ``self.dimensions``. 

306 

307 Returns 

308 ------- 

309 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \ 

310 `DatasetRef` ] ] 

311 An iterable of (data ID, dataset reference) pairs. 

312 

313 Raises 

314 ------ 

315 MissingDatasetTypeError 

316 Raised if the given dataset type is not registered. 

317 """ 

318 if dimensions is None: 

319 dimensions = self.graph 

320 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( 

321 datasetType, components=False, explicit_only=True 

322 ) 

323 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) 

324 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) 

325 

326 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

327 """Count the number of rows this query would return. 

328 

329 Parameters 

330 ---------- 

331 exact : `bool`, optional 

332 If `True`, run the full query and perform post-query filtering if 

333 needed to account for that filtering in the count. If `False`, the 

334 result may be an upper bound. 

335 discard : `bool`, optional 

336 If `True`, compute the exact count even if it would require running 

337 the full query and then throwing away the result rows after 

338 counting them. If `False`, this is an error, as the user would 

339 usually be better off executing the query first to fetch its rows 

340 into a new query (or passing ``exact=False``). Ignored if 

341 ``exact=False``. 

342 

343 Returns 

344 ------- 

345 count : `int` 

346 The number of rows the query would return, or an upper bound if 

347 ``exact=False``. 

348 

349 Notes 

350 ----- 

351 This counts the number of rows returned, not the number of unique rows 

352 returned, so even with ``exact=True`` it may provide only an upper 

353 bound on the number of *deduplicated* result rows. 

354 """ 

355 return self._query.count(exact=exact, discard=discard) 

356 

357 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

358 """Test whether this query returns any results. 

359 

360 Parameters 

361 ---------- 

362 execute : `bool`, optional 

363 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

364 determined prior to execution that the query would return no rows. 

365 exact : `bool`, optional 

366 If `True`, run the full query and perform post-query filtering if 

367 needed, until at least one result row is found. If `False`, the 

368 returned result does not account for post-query filtering, and 

369 hence may be `True` even when all result rows would be filtered 

370 out. 

371 

372 Returns 

373 ------- 

374 any : `bool` 

375 `True` if the query would (or might, depending on arguments) yield 

376 result rows. `False` if it definitely would not. 

377 """ 

378 return self._query.any(execute=execute, exact=exact) 

379 

380 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

381 """Return human-readable messages that may help explain why the query 

382 yields no results. 

383 

384 Parameters 

385 ---------- 

386 execute : `bool`, optional 

387 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

388 of aspects of the tree to more precisely determine where rows were 

389 filtered out. 

390 

391 Returns 

392 ------- 

393 messages : `~collections.abc.Iterable` [ `str` ] 

394 String messages that describe reasons the query might not yield any 

395 results. 

396 """ 

397 return self._query.explain_no_results(execute=execute) 

398 

399 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

400 """Make the iterator return ordered results. 

401 

402 Parameters 

403 ---------- 

404 *args : `str` 

405 Names of the columns/dimensions to use for ordering. Column name 

406 can be prefixed with minus (``-``) to use descending ordering. 

407 

408 Returns 

409 ------- 

410 result : `DataCoordinateQueryResults` 

411 Returns ``self`` instance which is updated to return ordered 

412 result. 

413 

414 Notes 

415 ----- 

416 This method modifies the iterator in place and returns the same 

417 instance to support method chaining. 

418 """ 

419 clause = OrderByClause.parse_general(args, self._query.dimensions) 

420 self._query = self._query.sorted(clause.terms, defer=True) 

421 return self 

422 

423 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

424 """Make the iterator return limited number of records. 

425 

426 Parameters 

427 ---------- 

428 limit : `int` 

429 Upper limit on the number of returned records. 

430 offset : `int` or `None`, optional 

431 The number of records to skip before returning at most ``limit`` 

432 records. `None` is interpreted the same as zero for backwards 

433 compatibility. 

434 

435 Returns 

436 ------- 

437 result : `DataCoordinateQueryResults` 

438 Returns ``self`` instance which is updated to return limited set 

439 of records. 

440 

441 Notes 

442 ----- 

443 This method modifies the iterator in place and returns the same 

444 instance to support method chaining. Normally this method is used 

445 together with `order_by` method. 

446 """ 

447 if offset is None: 

448 offset = 0 

449 self._query = self._query.sliced(offset, offset + limit, defer=True) 

450 return self 

451 

452 

453class DatasetQueryResults(Iterable[DatasetRef]): 

454 """An interface for objects that represent the results of queries for 

455 datasets. 

456 """ 

457 

458 @abstractmethod 

459 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

460 """Group results by parent dataset type. 

461 

462 Returns 

463 ------- 

464 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

465 An iterator over `DatasetQueryResults` instances that are each 

466 responsible for a single parent dataset type (either just that 

467 dataset type, one or more of its component dataset types, or both). 

468 """ 

469 raise NotImplementedError() 

470 

471 @abstractmethod 

472 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

473 """Insert this query's results into a temporary table. 

474 

475 Returns 

476 ------- 

477 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

478 A context manager that ensures the temporary table is created and 

479 populated in ``__enter__`` (returning a results object backed by 

480 that table), and dropped in ``__exit__``. If ``self`` is already 

481 materialized, the context manager may do nothing (reflecting the 

482 fact that an outer context manager should already take care of 

483 everything else). 

484 """ 

485 raise NotImplementedError() 

486 

487 @abstractmethod 

488 def expanded(self) -> DatasetQueryResults: 

489 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

490 returns `True` for all data IDs in returned `DatasetRef` objects. 

491 

492 Returns 

493 ------- 

494 expanded : `DatasetQueryResults` 

495 Either a new `DatasetQueryResults` instance or ``self``, if it is 

496 already expanded. 

497 

498 Notes 

499 ----- 

500 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

501 to call `materialize` before expanding data IDs for very large result 

502 sets. 

503 """ 

504 raise NotImplementedError() 

505 

506 @abstractmethod 

507 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

508 """Count the number of rows this query would return. 

509 

510 Parameters 

511 ---------- 

512 exact : `bool`, optional 

513 If `True`, run the full query and perform post-query filtering if 

514 needed to account for that filtering in the count. If `False`, the 

515 result may be an upper bound. 

516 discard : `bool`, optional 

517 If `True`, compute the exact count even if it would require running 

518 the full query and then throwing away the result rows after 

519 counting them. If `False`, this is an error, as the user would 

520 usually be better off executing the query first to fetch its rows 

521 into a new query (or passing ``exact=False``). Ignored if 

522 ``exact=False``. 

523 

524 Returns 

525 ------- 

526 count : `int` 

527 The number of rows the query would return, or an upper bound if 

528 ``exact=False``. 

529 

530 Notes 

531 ----- 

532 This counts the number of rows returned, not the number of unique rows 

533 returned, so even with ``exact=True`` it may provide only an upper 

534 bound on the number of *deduplicated* result rows. 

535 """ 

536 raise NotImplementedError() 

537 

538 @abstractmethod 

539 def any( 

540 self, 

541 *, 

542 execute: bool = True, 

543 exact: bool = True, 

544 ) -> bool: 

545 """Test whether this query returns any results. 

546 

547 Parameters 

548 ---------- 

549 execute : `bool`, optional 

550 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

551 determined prior to execution that the query would return no rows. 

552 exact : `bool`, optional 

553 If `True`, run the full query and perform post-query filtering if 

554 needed, until at least one result row is found. If `False`, the 

555 returned result does not account for post-query filtering, and 

556 hence may be `True` even when all result rows would be filtered 

557 out. 

558 

559 Returns 

560 ------- 

561 any : `bool` 

562 `True` if the query would (or might, depending on arguments) yield 

563 result rows. `False` if it definitely would not. 

564 """ 

565 raise NotImplementedError() 

566 

567 @abstractmethod 

568 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

569 """Return human-readable messages that may help explain why the query 

570 yields no results. 

571 

572 Parameters 

573 ---------- 

574 execute : `bool`, optional 

575 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

576 of aspects of the tree to more precisely determine where rows were 

577 filtered out. 

578 

579 Returns 

580 ------- 

581 messages : `~collections.abc.Iterable` [ `str` ] 

582 String messages that describe reasons the query might not yield any 

583 results. 

584 """ 

585 raise NotImplementedError() 

586 

587 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

588 """Group results by dataset type. 

589 

590 This is a private hook for the interface defined by 

591 `DatasetRef.iter_by_type`, enabling much more efficient 

592 processing of heterogeneous `DatasetRef` iterables when they come 

593 directly from queries. 

594 """ 

595 for parent_results in self.byParentDatasetType(): 

596 for component in parent_results.components: 

597 dataset_type = parent_results.parentDatasetType 

598 if component is not None: 

599 dataset_type = dataset_type.makeComponentDatasetType(component) 

600 yield (dataset_type, parent_results.withComponents((component,))) 

601 

602 

603class ParentDatasetQueryResults(DatasetQueryResults): 

604 """An object that represents results from a query for datasets with a 

605 single parent `DatasetType`. 

606 

607 Parameters 

608 ---------- 

609 query : `Query` 

610 Low-level query object that backs these results. 

611 dataset_type : `DatasetType` 

612 Parent dataset type for all datasets returned by this query. 

613 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

614 Names of components to include in iteration. `None` may be included 

615 (at most once) to include the parent dataset type. 

616 

617 Notes 

618 ----- 

619 The `Query` class now implements essentially all of this class's 

620 functionality; "QueryResult" classes like this one now exist only to 

621 provide interface backwards compatibility and more specific iterator 

622 types. 

623 """ 

624 

625 def __init__( 

626 self, 

627 query: Query, 

628 dataset_type: DatasetType, 

629 components: Sequence[str | None] = (None,), 

630 ): 

631 self._query = query 

632 self._dataset_type = dataset_type 

633 self._components = components 

634 

635 __slots__ = ("_query", "_dataset_type", "_components") 

636 

637 def __iter__(self) -> Iterator[DatasetRef]: 

638 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

639 

640 def __repr__(self) -> str: 

641 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

642 

643 @property 

644 def components(self) -> Sequence[str | None]: 

645 """The components of the parent dataset type included in these results 

646 (`~collections.abc.Sequence` [ `str` or `None` ]). 

647 """ 

648 return self._components 

649 

650 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

651 # Docstring inherited from DatasetQueryResults. 

652 yield self 

653 

654 @contextmanager 

655 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

656 # Docstring inherited from DatasetQueryResults. 

657 with self._query.open_context(): 

658 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

659 

660 @property 

661 def parentDatasetType(self) -> DatasetType: 

662 """The parent dataset type for all datasets in this iterable 

663 (`DatasetType`). 

664 """ 

665 return self._dataset_type 

666 

667 @property 

668 def dataIds(self) -> DataCoordinateQueryResults: 

669 """A lazy-evaluation object representing a query for just the data 

670 IDs of the datasets that would be returned by this query 

671 (`DataCoordinateQueryResults`). 

672 

673 The returned object is not in general `zip`-iterable with ``self``; 

674 it may be in a different order or have (or not have) duplicates. 

675 """ 

676 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

677 

678 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

679 """Return a new query results object for the same parent datasets but 

680 different components. 

681 

682 components : `~collections.abc.Sequence` [ `str` or `None` ] 

683 Names of components to include in iteration. `None` may be 

684 included (at most once) to include the parent dataset type. 

685 """ 

686 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

687 

688 def expanded(self) -> ParentDatasetQueryResults: 

689 # Docstring inherited from DatasetQueryResults. 

690 return ParentDatasetQueryResults( 

691 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

692 ) 

693 

694 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

695 # Docstring inherited. 

696 return len(self._components) * self._query.count(exact=exact, discard=discard) 

697 

698 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

699 # Docstring inherited. 

700 return self._query.any(execute=execute, exact=exact) 

701 

702 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

703 # Docstring inherited. 

704 return self._query.explain_no_results(execute=execute) 

705 

706 

707class ChainedDatasetQueryResults(DatasetQueryResults): 

708 """A `DatasetQueryResults` implementation that simply chains together 

709 other results objects, each for a different parent dataset type. 

710 

711 Parameters 

712 ---------- 

713 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

714 The underlying results objects this object will chain together. 

715 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

716 A list of messages (appropriate for e.g. logging or exceptions) that 

717 explain why the query is known to return no results even before it is 

718 executed. Queries with a non-empty list will never be executed. 

719 Child results objects may also have their own list. 

720 """ 

721 

722 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

723 self._chain = chain 

724 self._doomed_by = tuple(doomed_by) 

725 

726 __slots__ = ("_chain",) 

727 

728 def __iter__(self) -> Iterator[DatasetRef]: 

729 return itertools.chain.from_iterable(self._chain) 

730 

731 def __repr__(self) -> str: 

732 return "<DatasetRef iterator for multiple dataset types>" 

733 

734 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

735 # Docstring inherited from DatasetQueryResults. 

736 return iter(self._chain) 

737 

738 @contextmanager 

739 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

740 # Docstring inherited from DatasetQueryResults. 

741 with ExitStack() as stack: 

742 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

743 

744 def expanded(self) -> ChainedDatasetQueryResults: 

745 # Docstring inherited from DatasetQueryResults. 

746 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

747 

748 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

749 # Docstring inherited. 

750 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

751 

752 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

753 # Docstring inherited. 

754 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

755 

756 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

757 # Docstring inherited. 

758 result = list(self._doomed_by) 

759 for r in self._chain: 

760 result.extend(r.explain_no_results(execute=execute)) 

761 return result 

762 

763 

764class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

765 """An interface for objects that represent the results of queries for 

766 dimension records. 

767 """ 

768 

769 @property 

770 @abstractmethod 

771 def element(self) -> DimensionElement: 

772 raise NotImplementedError() 

773 

774 @abstractmethod 

775 def run(self) -> DimensionRecordQueryResults: 

776 raise NotImplementedError() 

777 

778 @abstractmethod 

779 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

780 """Count the number of rows this query would return. 

781 

782 Parameters 

783 ---------- 

784 exact : `bool`, optional 

785 If `True`, run the full query and perform post-query filtering if 

786 needed to account for that filtering in the count. If `False`, the 

787 result may be an upper bound. 

788 discard : `bool`, optional 

789 If `True`, compute the exact count even if it would require running 

790 the full query and then throwing away the result rows after 

791 counting them. If `False`, this is an error, as the user would 

792 usually be better off executing the query first to fetch its rows 

793 into a new query (or passing ``exact=False``). Ignored if 

794 ``exact=False``. 

795 

796 Returns 

797 ------- 

798 count : `int` 

799 The number of rows the query would return, or an upper bound if 

800 ``exact=False``. 

801 

802 Notes 

803 ----- 

804 This counts the number of rows returned, not the number of unique rows 

805 returned, so even with ``exact=True`` it may provide only an upper 

806 bound on the number of *deduplicated* result rows. 

807 """ 

808 raise NotImplementedError() 

809 

810 @abstractmethod 

811 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

812 """Test whether this query returns any results. 

813 

814 Parameters 

815 ---------- 

816 execute : `bool`, optional 

817 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

818 determined prior to execution that the query would return no rows. 

819 exact : `bool`, optional 

820 If `True`, run the full query and perform post-query filtering if 

821 needed, until at least one result row is found. If `False`, the 

822 returned result does not account for post-query filtering, and 

823 hence may be `True` even when all result rows would be filtered 

824 out. 

825 

826 Returns 

827 ------- 

828 any : `bool` 

829 `True` if the query would (or might, depending on arguments) yield 

830 result rows. `False` if it definitely would not. 

831 """ 

832 raise NotImplementedError() 

833 

834 @abstractmethod 

835 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

836 """Make the iterator return ordered result. 

837 

838 Parameters 

839 ---------- 

840 *args : `str` 

841 Names of the columns/dimensions to use for ordering. Column name 

842 can be prefixed with minus (``-``) to use descending ordering. 

843 

844 Returns 

845 ------- 

846 result : `DimensionRecordQueryResults` 

847 Returns ``self`` instance which is updated to return ordered 

848 result. 

849 

850 Notes 

851 ----- 

852 This method can modify the iterator in place and return the same 

853 instance. 

854 """ 

855 raise NotImplementedError() 

856 

857 @abstractmethod 

858 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

859 """Make the iterator return limited number of records. 

860 

861 Parameters 

862 ---------- 

863 limit : `int` 

864 Upper limit on the number of returned records. 

865 offset : `int` or `None` 

866 The number of records to skip before returning at most ``limit`` 

867 records. `None` is interpreted the same as zero for backwards 

868 compatibility. 

869 

870 Returns 

871 ------- 

872 result : `DimensionRecordQueryResults` 

873 Returns ``self`` instance which is updated to return limited set of 

874 records. 

875 

876 Notes 

877 ----- 

878 This method can modify the iterator in place and return the same 

879 instance. Normally this method is used together with `order_by` method. 

880 """ 

881 raise NotImplementedError() 

882 

883 @abstractmethod 

884 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

885 """Return human-readable messages that may help explain why the query 

886 yields no results. 

887 

888 Parameters 

889 ---------- 

890 execute : `bool`, optional 

891 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

892 of aspects of the tree to more precisely determine where rows were 

893 filtered out. 

894 

895 Returns 

896 ------- 

897 messages : `~collections.abc.Iterable` [ `str` ] 

898 String messages that describe reasons the query might not yield any 

899 results. 

900 """ 

901 raise NotImplementedError() 

902 

903 

904class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

905 """Implementation of DimensionRecordQueryResults using database query. 

906 

907 Parameters 

908 ---------- 

909 query : `Query` 

910 Query object that backs this class. 

911 element : `DimensionElement` 

912 Element whose records this object returns. 

913 

914 Notes 

915 ----- 

916 The `Query` class now implements essentially all of this class's 

917 functionality; "QueryResult" classes like this one now exist only to 

918 provide interface backwards compatibility and more specific iterator 

919 types. 

920 """ 

921 

922 def __init__(self, query: Query, element: DimensionElement): 

923 self._query = query 

924 self._element = element 

925 

926 @property 

927 def element(self) -> DimensionElement: 

928 return self._element 

929 

930 def __iter__(self) -> Iterator[DimensionRecord]: 

931 return self._query.iter_dimension_records(self._element) 

932 

933 def run(self) -> DimensionRecordQueryResults: 

934 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

935 

936 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

937 # Docstring inherited from base class. 

938 return self._query.count(exact=exact) 

939 

940 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

941 # Docstring inherited from base class. 

942 return self._query.any(execute=execute, exact=exact) 

943 

944 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

945 # Docstring inherited from base class. 

946 clause = OrderByClause.parse_element(args, self._element) 

947 self._query = self._query.sorted(clause.terms, defer=True) 

948 return self 

949 

950 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

951 # Docstring inherited from base class. 

952 if offset is None: 

953 offset = 0 

954 self._query = self._query.sliced(offset, offset + limit, defer=True) 

955 return self 

956 

957 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

958 # Docstring inherited. 

959 return self._query.explain_no_results(execute=execute)