Coverage for python/lsst/daf/butler/registry/queries/_results.py: 58%

194 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "ChainedDatasetQueryResults", 

31 "DatabaseDimensionRecordQueryResults", 

32 "DataCoordinateQueryResults", 

33 "DatasetQueryResults", 

34 "DimensionRecordQueryResults", 

35 "ParentDatasetQueryResults", 

36) 

37 

38import itertools 

39from abc import abstractmethod 

40from collections.abc import Iterable, Iterator, Sequence 

41from contextlib import AbstractContextManager, ExitStack, contextmanager 

42from typing import Any 

43 

44from deprecated.sphinx import deprecated 

45 

46from ..._dataset_ref import DatasetRef 

47from ..._dataset_type import DatasetType 

48from ...dimensions import ( 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DimensionElement, 

52 DimensionGraph, 

53 DimensionGroup, 

54 DimensionRecord, 

55) 

56from ._query import Query 

57from ._structs import OrderByClause 

58 

59 

60class DataCoordinateQueryResults(DataCoordinateIterable): 

61 """An enhanced implementation of `DataCoordinateIterable` that represents 

62 data IDs retrieved from a database query. 

63 

64 Parameters 

65 ---------- 

66 query : `Query` 

67 Query object that backs this class. 

68 

69 Notes 

70 ----- 

71 The `Query` class now implements essentially all of this class's 

72 functionality; "QueryResult" classes like this one now exist only to 

73 provide interface backwards compatibility and more specific iterator 

74 types. 

75 """ 

76 

77 def __init__(self, query: Query): 

78 self._query = query 

79 

80 __slots__ = ("_query",) 

81 

82 def __iter__(self) -> Iterator[DataCoordinate]: 

83 return self._query.iter_data_ids() 

84 

85 def __repr__(self) -> str: 

86 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

87 

88 @property 

89 @deprecated( 

90 "Deprecated in favor of .dimensions. Will be removed after v27.", 

91 version="v27", 

92 category=FutureWarning, 

93 ) 

94 def graph(self) -> DimensionGraph: 

95 # Docstring inherited from DataCoordinateIterable. 

96 return self._query.dimensions._as_graph() 

97 

98 @property 

99 def dimensions(self) -> DimensionGroup: 

100 """The dimensions of the data IDs returned by this query.""" 

101 return self._query.dimensions 

102 

103 def hasFull(self) -> bool: 

104 # Docstring inherited from DataCoordinateIterable. 

105 return True 

106 

107 def hasRecords(self) -> bool: 

108 # Docstring inherited from DataCoordinateIterable. 

109 return self._query.has_record_columns is True or not self.dimensions 

110 

111 @contextmanager 

112 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

113 """Insert this query's results into a temporary table. 

114 

115 Returns 

116 ------- 

117 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

118 A context manager that ensures the temporary table is created and 

119 populated in ``__enter__`` (returning a results object backed by 

120 that table), and dropped in ``__exit__``. If ``self`` is already 

121 materialized, the context manager may do nothing (reflecting the 

122 fact that an outer context manager should already take care of 

123 everything else). 

124 

125 Notes 

126 ----- 

127 When using a very large result set to perform multiple queries (e.g. 

128 multiple calls to `subset` with different arguments, or even a single 

129 call to `expanded`), it may be much more efficient to start by 

130 materializing the query and only then performing the follow up queries. 

131 It may also be less efficient, depending on how well database engine's 

132 query optimizer can simplify those particular follow-up queries and 

133 how efficiently it caches query results even when the are not 

134 explicitly inserted into a temporary table. See `expanded` and 

135 `subset` for examples. 

136 """ 

137 with self._query.open_context(): 

138 yield DataCoordinateQueryResults(self._query.materialized()) 

139 

140 def expanded(self) -> DataCoordinateQueryResults: 

141 """Return a results object for which `hasRecords` returns `True`. 

142 

143 This method may involve actually executing database queries to fetch 

144 `DimensionRecord` objects. 

145 

146 Returns 

147 ------- 

148 results : `DataCoordinateQueryResults` 

149 A results object for which `hasRecords` returns `True`. May be 

150 ``self`` if that is already the case. 

151 

152 Notes 

153 ----- 

154 For very result sets, it may be much more efficient to call 

155 `materialize` before calling `expanded`, to avoid performing the 

156 original query multiple times (as a subquery) in the follow-up queries 

157 that fetch dimension records. For example:: 

158 

159 with registry.queryDataIds(...).materialize() as tempDataIds: 

160 dataIdsWithRecords = tempDataIds.expanded() 

161 for dataId in dataIdsWithRecords: 

162 ... 

163 """ 

164 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

165 

166 def subset( 

167 self, 

168 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

169 *, 

170 unique: bool = False, 

171 ) -> DataCoordinateQueryResults: 

172 """Return a results object containing a subset of the dimensions of 

173 this one, and/or a unique near-subset of its rows. 

174 

175 This method may involve actually executing database queries to fetch 

176 `DimensionRecord` objects. 

177 

178 Parameters 

179 ---------- 

180 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

181 `~collections.abc.Iterable` [ `str`], optional 

182 Dimensions to include in the new results object. If `None`, 

183 ``self.dimensions`` is used. 

184 unique : `bool`, optional 

185 If `True` (`False` is default), the query should only return unique 

186 data IDs. This is implemented in the database; to obtain unique 

187 results via Python-side processing (which may be more efficient in 

188 some cases), use `toSet` to construct a `DataCoordinateSet` from 

189 this results object instead. 

190 

191 Returns 

192 ------- 

193 results : `DataCoordinateQueryResults` 

194 A results object corresponding to the given criteria. May be 

195 ``self`` if it already qualifies. 

196 

197 Raises 

198 ------ 

199 ValueError 

200 Raised when ``dimensions`` is not a subset of the dimensions in 

201 this result. 

202 

203 Notes 

204 ----- 

205 This method can only return a "near-subset" of the original result rows 

206 in general because of subtleties in how spatial overlaps are 

207 implemented; see `Query.projected` for more information. 

208 

209 When calling `subset` multiple times on the same very large result set, 

210 it may be much more efficient to call `materialize` first. For 

211 example:: 

212 

213 dimensions1 = DimensionGroup(...) 

214 dimensions2 = DimensionGroup(...) 

215 with registry.queryDataIds(...).materialize() as tempDataIds: 

216 for dataId1 in tempDataIds.subset(dimensions1, unique=True): 

217 ... 

218 for dataId2 in tempDataIds.subset(dimensions2, unique=True): 

219 ... 

220 """ 

221 if dimensions is None: 

222 dimensions = self.dimensions 

223 else: 

224 dimensions = self.dimensions.universe.conform(dimensions) 

225 if not dimensions.issubset(self.dimensions): 

226 raise ValueError(f"{dimensions} is not a subset of {self.dimensions}") 

227 query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True) 

228 return DataCoordinateQueryResults(query) 

229 

230 def findDatasets( 

231 self, 

232 datasetType: Any, 

233 collections: Any, 

234 *, 

235 findFirst: bool = True, 

236 components: bool | None = None, 

237 ) -> DatasetQueryResults: 

238 """Find datasets using the data IDs identified by this query. 

239 

240 Parameters 

241 ---------- 

242 datasetType : `DatasetType` or `str` 

243 Dataset type or the name of one to search for. Must have 

244 dimensions that are a subset of ``self.graph``. 

245 collections : `Any` 

246 An expression that fully or partially identifies the collections 

247 to search for the dataset, such as a `str`, `re.Pattern`, or 

248 iterable thereof. ``...`` can be used to return all collections. 

249 See :ref:`daf_butler_collection_expressions` for more information. 

250 findFirst : `bool`, optional 

251 If `True` (default), for each result data ID, only yield one 

252 `DatasetRef`, from the first collection in which a dataset of that 

253 dataset type appears (according to the order of ``collections`` 

254 passed in). If `True`, ``collections`` must not contain regular 

255 expressions and may not be ``...``. 

256 components : `bool`, optional 

257 If `True`, apply all expression patterns to component dataset type 

258 names as well. If `False`, never apply patterns to components. If 

259 `None` (default), apply patterns to components only if their parent 

260 datasets were not matched by the expression. Fully-specified 

261 component datasets (`str` or `DatasetType` instances) are always 

262 included. 

263 

264 Values other than `False` are deprecated, and only `False` will be 

265 supported after v26. After v27 this argument will be removed 

266 entirely. 

267 

268 Returns 

269 ------- 

270 datasets : `ParentDatasetQueryResults` 

271 A lazy-evaluation object representing dataset query results, 

272 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

273 nested data IDs in those dataset references will have records as 

274 well. 

275 

276 Raises 

277 ------ 

278 MissingDatasetTypeError 

279 Raised if the given dataset type is not registered. 

280 """ 

281 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

282 datasetType, components=components, explicit_only=True 

283 ) 

284 return ParentDatasetQueryResults( 

285 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

286 parent_dataset_type, 

287 components_found, 

288 ) 

289 

290 def findRelatedDatasets( 

291 self, 

292 datasetType: DatasetType | str, 

293 collections: Any, 

294 *, 

295 findFirst: bool = True, 

296 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

297 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: 

298 """Find datasets using the data IDs identified by this query, and 

299 return them along with the original data IDs. 

300 

301 This is a variant of `findDatasets` that is often more useful when 

302 the target dataset type does not have all of the dimensions of the 

303 original data ID query, as is generally the case with calibration 

304 lookups. 

305 

306 Parameters 

307 ---------- 

308 datasetType : `DatasetType` or `str` 

309 Dataset type or the name of one to search for. Must have 

310 dimensions that are a subset of ``self.graph``. 

311 collections : `Any` 

312 An expression that fully or partially identifies the collections 

313 to search for the dataset, such as a `str`, `re.Pattern`, or 

314 iterable thereof. ``...`` can be used to return all collections. 

315 See :ref:`daf_butler_collection_expressions` for more information. 

316 findFirst : `bool`, optional 

317 If `True` (default), for each data ID in ``self``, only yield one 

318 `DatasetRef`, from the first collection in which a dataset of that 

319 dataset type appears (according to the order of ``collections`` 

320 passed in). If `True`, ``collections`` must not contain regular 

321 expressions and may not be ``...``. Note that this is not the 

322 same as yielding one `DatasetRef` for each yielded data ID if 

323 ``dimensions`` is not `None`. 

324 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

325 `~collections.abc.Iterable` [ `str` ], optional 

326 The dimensions of the data IDs returned. Must be a subset of 

327 ``self.dimensions``. 

328 

329 Returns 

330 ------- 

331 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \ 

332 `DatasetRef` ] ] 

333 An iterable of (data ID, dataset reference) pairs. 

334 

335 Raises 

336 ------ 

337 MissingDatasetTypeError 

338 Raised if the given dataset type is not registered. 

339 """ 

340 if dimensions is None: 

341 dimensions = self.dimensions 

342 else: 

343 dimensions = self.universe.conform(dimensions) 

344 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( 

345 datasetType, components=False, explicit_only=True 

346 ) 

347 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) 

348 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) 

349 

350 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

351 """Count the number of rows this query would return. 

352 

353 Parameters 

354 ---------- 

355 exact : `bool`, optional 

356 If `True`, run the full query and perform post-query filtering if 

357 needed to account for that filtering in the count. If `False`, the 

358 result may be an upper bound. 

359 discard : `bool`, optional 

360 If `True`, compute the exact count even if it would require running 

361 the full query and then throwing away the result rows after 

362 counting them. If `False`, this is an error, as the user would 

363 usually be better off executing the query first to fetch its rows 

364 into a new query (or passing ``exact=False``). Ignored if 

365 ``exact=False``. 

366 

367 Returns 

368 ------- 

369 count : `int` 

370 The number of rows the query would return, or an upper bound if 

371 ``exact=False``. 

372 

373 Notes 

374 ----- 

375 This counts the number of rows returned, not the number of unique rows 

376 returned, so even with ``exact=True`` it may provide only an upper 

377 bound on the number of *deduplicated* result rows. 

378 """ 

379 return self._query.count(exact=exact, discard=discard) 

380 

381 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

382 """Test whether this query returns any results. 

383 

384 Parameters 

385 ---------- 

386 execute : `bool`, optional 

387 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

388 determined prior to execution that the query would return no rows. 

389 exact : `bool`, optional 

390 If `True`, run the full query and perform post-query filtering if 

391 needed, until at least one result row is found. If `False`, the 

392 returned result does not account for post-query filtering, and 

393 hence may be `True` even when all result rows would be filtered 

394 out. 

395 

396 Returns 

397 ------- 

398 any : `bool` 

399 `True` if the query would (or might, depending on arguments) yield 

400 result rows. `False` if it definitely would not. 

401 """ 

402 return self._query.any(execute=execute, exact=exact) 

403 

404 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

405 """Return human-readable messages that may help explain why the query 

406 yields no results. 

407 

408 Parameters 

409 ---------- 

410 execute : `bool`, optional 

411 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

412 of aspects of the tree to more precisely determine where rows were 

413 filtered out. 

414 

415 Returns 

416 ------- 

417 messages : `~collections.abc.Iterable` [ `str` ] 

418 String messages that describe reasons the query might not yield any 

419 results. 

420 """ 

421 return self._query.explain_no_results(execute=execute) 

422 

423 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

424 """Make the iterator return ordered results. 

425 

426 Parameters 

427 ---------- 

428 *args : `str` 

429 Names of the columns/dimensions to use for ordering. Column name 

430 can be prefixed with minus (``-``) to use descending ordering. 

431 

432 Returns 

433 ------- 

434 result : `DataCoordinateQueryResults` 

435 Returns ``self`` instance which is updated to return ordered 

436 result. 

437 

438 Notes 

439 ----- 

440 This method modifies the iterator in place and returns the same 

441 instance to support method chaining. 

442 """ 

443 clause = OrderByClause.parse_general(args, self._query.dimensions) 

444 self._query = self._query.sorted(clause.terms, defer=True) 

445 return self 

446 

447 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

448 """Make the iterator return limited number of records. 

449 

450 Parameters 

451 ---------- 

452 limit : `int` 

453 Upper limit on the number of returned records. 

454 offset : `int` or `None`, optional 

455 The number of records to skip before returning at most ``limit`` 

456 records. `None` is interpreted the same as zero for backwards 

457 compatibility. 

458 

459 Returns 

460 ------- 

461 result : `DataCoordinateQueryResults` 

462 Returns ``self`` instance which is updated to return limited set 

463 of records. 

464 

465 Notes 

466 ----- 

467 This method modifies the iterator in place and returns the same 

468 instance to support method chaining. Normally this method is used 

469 together with `order_by` method. 

470 """ 

471 if offset is None: 

472 offset = 0 

473 self._query = self._query.sliced(offset, offset + limit, defer=True) 

474 return self 

475 

476 

477class DatasetQueryResults(Iterable[DatasetRef]): 

478 """An interface for objects that represent the results of queries for 

479 datasets. 

480 """ 

481 

482 @abstractmethod 

483 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

484 """Group results by parent dataset type. 

485 

486 Returns 

487 ------- 

488 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

489 An iterator over `DatasetQueryResults` instances that are each 

490 responsible for a single parent dataset type (either just that 

491 dataset type, one or more of its component dataset types, or both). 

492 """ 

493 raise NotImplementedError() 

494 

495 @abstractmethod 

496 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

497 """Insert this query's results into a temporary table. 

498 

499 Returns 

500 ------- 

501 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

502 A context manager that ensures the temporary table is created and 

503 populated in ``__enter__`` (returning a results object backed by 

504 that table), and dropped in ``__exit__``. If ``self`` is already 

505 materialized, the context manager may do nothing (reflecting the 

506 fact that an outer context manager should already take care of 

507 everything else). 

508 """ 

509 raise NotImplementedError() 

510 

511 @abstractmethod 

512 def expanded(self) -> DatasetQueryResults: 

513 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

514 returns `True` for all data IDs in returned `DatasetRef` objects. 

515 

516 Returns 

517 ------- 

518 expanded : `DatasetQueryResults` 

519 Either a new `DatasetQueryResults` instance or ``self``, if it is 

520 already expanded. 

521 

522 Notes 

523 ----- 

524 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

525 to call `materialize` before expanding data IDs for very large result 

526 sets. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

532 """Count the number of rows this query would return. 

533 

534 Parameters 

535 ---------- 

536 exact : `bool`, optional 

537 If `True`, run the full query and perform post-query filtering if 

538 needed to account for that filtering in the count. If `False`, the 

539 result may be an upper bound. 

540 discard : `bool`, optional 

541 If `True`, compute the exact count even if it would require running 

542 the full query and then throwing away the result rows after 

543 counting them. If `False`, this is an error, as the user would 

544 usually be better off executing the query first to fetch its rows 

545 into a new query (or passing ``exact=False``). Ignored if 

546 ``exact=False``. 

547 

548 Returns 

549 ------- 

550 count : `int` 

551 The number of rows the query would return, or an upper bound if 

552 ``exact=False``. 

553 

554 Notes 

555 ----- 

556 This counts the number of rows returned, not the number of unique rows 

557 returned, so even with ``exact=True`` it may provide only an upper 

558 bound on the number of *deduplicated* result rows. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def any( 

564 self, 

565 *, 

566 execute: bool = True, 

567 exact: bool = True, 

568 ) -> bool: 

569 """Test whether this query returns any results. 

570 

571 Parameters 

572 ---------- 

573 execute : `bool`, optional 

574 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

575 determined prior to execution that the query would return no rows. 

576 exact : `bool`, optional 

577 If `True`, run the full query and perform post-query filtering if 

578 needed, until at least one result row is found. If `False`, the 

579 returned result does not account for post-query filtering, and 

580 hence may be `True` even when all result rows would be filtered 

581 out. 

582 

583 Returns 

584 ------- 

585 any : `bool` 

586 `True` if the query would (or might, depending on arguments) yield 

587 result rows. `False` if it definitely would not. 

588 """ 

589 raise NotImplementedError() 

590 

591 @abstractmethod 

592 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

593 """Return human-readable messages that may help explain why the query 

594 yields no results. 

595 

596 Parameters 

597 ---------- 

598 execute : `bool`, optional 

599 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

600 of aspects of the tree to more precisely determine where rows were 

601 filtered out. 

602 

603 Returns 

604 ------- 

605 messages : `~collections.abc.Iterable` [ `str` ] 

606 String messages that describe reasons the query might not yield any 

607 results. 

608 """ 

609 raise NotImplementedError() 

610 

611 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

612 """Group results by dataset type. 

613 

614 This is a private hook for the interface defined by 

615 `DatasetRef.iter_by_type`, enabling much more efficient 

616 processing of heterogeneous `DatasetRef` iterables when they come 

617 directly from queries. 

618 """ 

619 for parent_results in self.byParentDatasetType(): 

620 for component in parent_results.components: 

621 dataset_type = parent_results.parentDatasetType 

622 if component is not None: 

623 dataset_type = dataset_type.makeComponentDatasetType(component) 

624 yield (dataset_type, parent_results.withComponents((component,))) 

625 

626 

627class ParentDatasetQueryResults(DatasetQueryResults): 

628 """An object that represents results from a query for datasets with a 

629 single parent `DatasetType`. 

630 

631 Parameters 

632 ---------- 

633 query : `Query` 

634 Low-level query object that backs these results. 

635 dataset_type : `DatasetType` 

636 Parent dataset type for all datasets returned by this query. 

637 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

638 Names of components to include in iteration. `None` may be included 

639 (at most once) to include the parent dataset type. 

640 

641 Notes 

642 ----- 

643 The `Query` class now implements essentially all of this class's 

644 functionality; "QueryResult" classes like this one now exist only to 

645 provide interface backwards compatibility and more specific iterator 

646 types. 

647 """ 

648 

649 def __init__( 

650 self, 

651 query: Query, 

652 dataset_type: DatasetType, 

653 components: Sequence[str | None] = (None,), 

654 ): 

655 self._query = query 

656 self._dataset_type = dataset_type 

657 self._components = components 

658 

659 __slots__ = ("_query", "_dataset_type", "_components") 

660 

661 def __iter__(self) -> Iterator[DatasetRef]: 

662 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

663 

664 def __repr__(self) -> str: 

665 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

666 

667 @property 

668 def components(self) -> Sequence[str | None]: 

669 """The components of the parent dataset type included in these results 

670 (`~collections.abc.Sequence` [ `str` or `None` ]). 

671 """ 

672 return self._components 

673 

674 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

675 # Docstring inherited from DatasetQueryResults. 

676 yield self 

677 

678 @contextmanager 

679 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

680 # Docstring inherited from DatasetQueryResults. 

681 with self._query.open_context(): 

682 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

683 

684 @property 

685 def parentDatasetType(self) -> DatasetType: 

686 """The parent dataset type for all datasets in this iterable 

687 (`DatasetType`). 

688 """ 

689 return self._dataset_type 

690 

691 @property 

692 def dataIds(self) -> DataCoordinateQueryResults: 

693 """A lazy-evaluation object representing a query for just the data 

694 IDs of the datasets that would be returned by this query 

695 (`DataCoordinateQueryResults`). 

696 

697 The returned object is not in general `zip`-iterable with ``self``; 

698 it may be in a different order or have (or not have) duplicates. 

699 """ 

700 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

701 

702 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

703 """Return a new query results object for the same parent datasets but 

704 different components. 

705 

706 components : `~collections.abc.Sequence` [ `str` or `None` ] 

707 Names of components to include in iteration. `None` may be 

708 included (at most once) to include the parent dataset type. 

709 """ 

710 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

711 

712 def expanded(self) -> ParentDatasetQueryResults: 

713 # Docstring inherited from DatasetQueryResults. 

714 return ParentDatasetQueryResults( 

715 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

716 ) 

717 

718 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

719 # Docstring inherited. 

720 return len(self._components) * self._query.count(exact=exact, discard=discard) 

721 

722 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

723 # Docstring inherited. 

724 return self._query.any(execute=execute, exact=exact) 

725 

726 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

727 # Docstring inherited. 

728 return self._query.explain_no_results(execute=execute) 

729 

730 

731class ChainedDatasetQueryResults(DatasetQueryResults): 

732 """A `DatasetQueryResults` implementation that simply chains together 

733 other results objects, each for a different parent dataset type. 

734 

735 Parameters 

736 ---------- 

737 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

738 The underlying results objects this object will chain together. 

739 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

740 A list of messages (appropriate for e.g. logging or exceptions) that 

741 explain why the query is known to return no results even before it is 

742 executed. Queries with a non-empty list will never be executed. 

743 Child results objects may also have their own list. 

744 """ 

745 

746 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

747 self._chain = chain 

748 self._doomed_by = tuple(doomed_by) 

749 

750 __slots__ = ("_chain",) 

751 

752 def __iter__(self) -> Iterator[DatasetRef]: 

753 return itertools.chain.from_iterable(self._chain) 

754 

755 def __repr__(self) -> str: 

756 return "<DatasetRef iterator for multiple dataset types>" 

757 

758 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

759 # Docstring inherited from DatasetQueryResults. 

760 return iter(self._chain) 

761 

762 @contextmanager 

763 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

764 # Docstring inherited from DatasetQueryResults. 

765 with ExitStack() as stack: 

766 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

767 

768 def expanded(self) -> ChainedDatasetQueryResults: 

769 # Docstring inherited from DatasetQueryResults. 

770 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

771 

772 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

773 # Docstring inherited. 

774 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

775 

776 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

777 # Docstring inherited. 

778 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

779 

780 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

781 # Docstring inherited. 

782 result = list(self._doomed_by) 

783 for r in self._chain: 

784 result.extend(r.explain_no_results(execute=execute)) 

785 return result 

786 

787 

788class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

789 """An interface for objects that represent the results of queries for 

790 dimension records. 

791 """ 

792 

793 @property 

794 @abstractmethod 

795 def element(self) -> DimensionElement: 

796 raise NotImplementedError() 

797 

798 @abstractmethod 

799 def run(self) -> DimensionRecordQueryResults: 

800 raise NotImplementedError() 

801 

802 @abstractmethod 

803 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

804 """Count the number of rows this query would return. 

805 

806 Parameters 

807 ---------- 

808 exact : `bool`, optional 

809 If `True`, run the full query and perform post-query filtering if 

810 needed to account for that filtering in the count. If `False`, the 

811 result may be an upper bound. 

812 discard : `bool`, optional 

813 If `True`, compute the exact count even if it would require running 

814 the full query and then throwing away the result rows after 

815 counting them. If `False`, this is an error, as the user would 

816 usually be better off executing the query first to fetch its rows 

817 into a new query (or passing ``exact=False``). Ignored if 

818 ``exact=False``. 

819 

820 Returns 

821 ------- 

822 count : `int` 

823 The number of rows the query would return, or an upper bound if 

824 ``exact=False``. 

825 

826 Notes 

827 ----- 

828 This counts the number of rows returned, not the number of unique rows 

829 returned, so even with ``exact=True`` it may provide only an upper 

830 bound on the number of *deduplicated* result rows. 

831 """ 

832 raise NotImplementedError() 

833 

834 @abstractmethod 

835 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

836 """Test whether this query returns any results. 

837 

838 Parameters 

839 ---------- 

840 execute : `bool`, optional 

841 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

842 determined prior to execution that the query would return no rows. 

843 exact : `bool`, optional 

844 If `True`, run the full query and perform post-query filtering if 

845 needed, until at least one result row is found. If `False`, the 

846 returned result does not account for post-query filtering, and 

847 hence may be `True` even when all result rows would be filtered 

848 out. 

849 

850 Returns 

851 ------- 

852 any : `bool` 

853 `True` if the query would (or might, depending on arguments) yield 

854 result rows. `False` if it definitely would not. 

855 """ 

856 raise NotImplementedError() 

857 

858 @abstractmethod 

859 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

860 """Make the iterator return ordered result. 

861 

862 Parameters 

863 ---------- 

864 *args : `str` 

865 Names of the columns/dimensions to use for ordering. Column name 

866 can be prefixed with minus (``-``) to use descending ordering. 

867 

868 Returns 

869 ------- 

870 result : `DimensionRecordQueryResults` 

871 Returns ``self`` instance which is updated to return ordered 

872 result. 

873 

874 Notes 

875 ----- 

876 This method can modify the iterator in place and return the same 

877 instance. 

878 """ 

879 raise NotImplementedError() 

880 

881 @abstractmethod 

882 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

883 """Make the iterator return limited number of records. 

884 

885 Parameters 

886 ---------- 

887 limit : `int` 

888 Upper limit on the number of returned records. 

889 offset : `int` or `None` 

890 The number of records to skip before returning at most ``limit`` 

891 records. `None` is interpreted the same as zero for backwards 

892 compatibility. 

893 

894 Returns 

895 ------- 

896 result : `DimensionRecordQueryResults` 

897 Returns ``self`` instance which is updated to return limited set of 

898 records. 

899 

900 Notes 

901 ----- 

902 This method can modify the iterator in place and return the same 

903 instance. Normally this method is used together with `order_by` method. 

904 """ 

905 raise NotImplementedError() 

906 

907 @abstractmethod 

908 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

909 """Return human-readable messages that may help explain why the query 

910 yields no results. 

911 

912 Parameters 

913 ---------- 

914 execute : `bool`, optional 

915 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

916 of aspects of the tree to more precisely determine where rows were 

917 filtered out. 

918 

919 Returns 

920 ------- 

921 messages : `~collections.abc.Iterable` [ `str` ] 

922 String messages that describe reasons the query might not yield any 

923 results. 

924 """ 

925 raise NotImplementedError() 

926 

927 

928class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

929 """Implementation of DimensionRecordQueryResults using database query. 

930 

931 Parameters 

932 ---------- 

933 query : `Query` 

934 Query object that backs this class. 

935 element : `DimensionElement` 

936 Element whose records this object returns. 

937 

938 Notes 

939 ----- 

940 The `Query` class now implements essentially all of this class's 

941 functionality; "QueryResult" classes like this one now exist only to 

942 provide interface backwards compatibility and more specific iterator 

943 types. 

944 """ 

945 

946 def __init__(self, query: Query, element: DimensionElement): 

947 self._query = query 

948 self._element = element 

949 

950 @property 

951 def element(self) -> DimensionElement: 

952 return self._element 

953 

954 def __iter__(self) -> Iterator[DimensionRecord]: 

955 return self._query.iter_dimension_records(self._element) 

956 

957 def run(self) -> DimensionRecordQueryResults: 

958 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

959 

960 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

961 # Docstring inherited from base class. 

962 return self._query.count(exact=exact) 

963 

964 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

965 # Docstring inherited from base class. 

966 return self._query.any(execute=execute, exact=exact) 

967 

968 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

969 # Docstring inherited from base class. 

970 clause = OrderByClause.parse_element(args, self._element) 

971 self._query = self._query.sorted(clause.terms, defer=True) 

972 return self 

973 

974 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

975 # Docstring inherited from base class. 

976 if offset is None: 

977 offset = 0 

978 self._query = self._query.sliced(offset, offset + limit, defer=True) 

979 return self 

980 

981 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

982 # Docstring inherited. 

983 return self._query.explain_no_results(execute=execute)