Coverage for python/lsst/daf/butler/registry/queries/_results.py: 57%

187 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "ChainedDatasetQueryResults", 

31 "DatabaseDimensionRecordQueryResults", 

32 "DataCoordinateQueryResults", 

33 "DatasetQueryResults", 

34 "DimensionRecordQueryResults", 

35 "ParentDatasetQueryResults", 

36) 

37 

38import itertools 

39from abc import abstractmethod 

40from collections.abc import Iterable, Iterator, Sequence 

41from contextlib import AbstractContextManager, ExitStack, contextmanager 

42from typing import Any 

43 

44from ..._dataset_ref import DatasetRef 

45from ..._dataset_type import DatasetType 

46from ...dimensions import ( 

47 DataCoordinate, 

48 DataCoordinateIterable, 

49 DimensionElement, 

50 DimensionGraph, 

51 DimensionRecord, 

52) 

53from ._query import Query 

54from ._structs import OrderByClause 

55 

56 

57class DataCoordinateQueryResults(DataCoordinateIterable): 

58 """An enhanced implementation of `DataCoordinateIterable` that represents 

59 data IDs retrieved from a database query. 

60 

61 Parameters 

62 ---------- 

63 query : `Query` 

64 Query object that backs this class. 

65 

66 Notes 

67 ----- 

68 The `Query` class now implements essentially all of this class's 

69 functionality; "QueryResult" classes like this one now exist only to 

70 provide interface backwards compatibility and more specific iterator 

71 types. 

72 """ 

73 

74 def __init__(self, query: Query): 

75 self._query = query 

76 

77 __slots__ = ("_query",) 

78 

79 def __iter__(self) -> Iterator[DataCoordinate]: 

80 return self._query.iter_data_ids() 

81 

82 def __repr__(self) -> str: 

83 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

84 

85 @property 

86 def graph(self) -> DimensionGraph: 

87 # Docstring inherited from DataCoordinateIterable. 

88 return self._query.dimensions 

89 

90 def hasFull(self) -> bool: 

91 # Docstring inherited from DataCoordinateIterable. 

92 return True 

93 

94 def hasRecords(self) -> bool: 

95 # Docstring inherited from DataCoordinateIterable. 

96 return self._query.has_record_columns is True or not self.graph 

97 

98 @contextmanager 

99 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

100 """Insert this query's results into a temporary table. 

101 

102 Returns 

103 ------- 

104 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

105 A context manager that ensures the temporary table is created and 

106 populated in ``__enter__`` (returning a results object backed by 

107 that table), and dropped in ``__exit__``. If ``self`` is already 

108 materialized, the context manager may do nothing (reflecting the 

109 fact that an outer context manager should already take care of 

110 everything else). 

111 

112 Notes 

113 ----- 

114 When using a very large result set to perform multiple queries (e.g. 

115 multiple calls to `subset` with different arguments, or even a single 

116 call to `expanded`), it may be much more efficient to start by 

117 materializing the query and only then performing the follow up queries. 

118 It may also be less efficient, depending on how well database engine's 

119 query optimizer can simplify those particular follow-up queries and 

120 how efficiently it caches query results even when the are not 

121 explicitly inserted into a temporary table. See `expanded` and 

122 `subset` for examples. 

123 """ 

124 with self._query.open_context(): 

125 yield DataCoordinateQueryResults(self._query.materialized()) 

126 

127 def expanded(self) -> DataCoordinateQueryResults: 

128 """Return a results object for which `hasRecords` returns `True`. 

129 

130 This method may involve actually executing database queries to fetch 

131 `DimensionRecord` objects. 

132 

133 Returns 

134 ------- 

135 results : `DataCoordinateQueryResults` 

136 A results object for which `hasRecords` returns `True`. May be 

137 ``self`` if that is already the case. 

138 

139 Notes 

140 ----- 

141 For very result sets, it may be much more efficient to call 

142 `materialize` before calling `expanded`, to avoid performing the 

143 original query multiple times (as a subquery) in the follow-up queries 

144 that fetch dimension records. For example:: 

145 

146 with registry.queryDataIds(...).materialize() as tempDataIds: 

147 dataIdsWithRecords = tempDataIds.expanded() 

148 for dataId in dataIdsWithRecords: 

149 ... 

150 """ 

151 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

152 

153 def subset( 

154 self, graph: DimensionGraph | None = None, *, unique: bool = False 

155 ) -> DataCoordinateQueryResults: 

156 """Return a results object containing a subset of the dimensions of 

157 this one, and/or a unique near-subset of its rows. 

158 

159 This method may involve actually executing database queries to fetch 

160 `DimensionRecord` objects. 

161 

162 Parameters 

163 ---------- 

164 graph : `DimensionGraph`, optional 

165 Dimensions to include in the new results object. If `None`, 

166 ``self.graph`` is used. 

167 unique : `bool`, optional 

168 If `True` (`False` is default), the query should only return unique 

169 data IDs. This is implemented in the database; to obtain unique 

170 results via Python-side processing (which may be more efficient in 

171 some cases), use `toSet` to construct a `DataCoordinateSet` from 

172 this results object instead. 

173 

174 Returns 

175 ------- 

176 results : `DataCoordinateQueryResults` 

177 A results object corresponding to the given criteria. May be 

178 ``self`` if it already qualifies. 

179 

180 Raises 

181 ------ 

182 ValueError 

183 Raised when ``graph`` is not a subset of the dimension graph in 

184 this result. 

185 

186 Notes 

187 ----- 

188 This method can only return a "near-subset" of the original result rows 

189 in general because of subtleties in how spatial overlaps are 

190 implemented; see `Query.projected` for more information. 

191 

192 When calling `subset` multiple times on the same very large result set, 

193 it may be much more efficient to call `materialize` first. For 

194 example:: 

195 

196 dimensions1 = DimensionGraph(...) 

197 dimensions2 = DimensionGraph(...) 

198 with registry.queryDataIds(...).materialize() as tempDataIds: 

199 for dataId1 in tempDataIds.subset( 

200 graph=dimensions1, 

201 unique=True): 

202 ... 

203 for dataId2 in tempDataIds.subset( 

204 graph=dimensions2, 

205 unique=True): 

206 ... 

207 """ 

208 if graph is None: 

209 graph = self.graph 

210 if not graph.issubset(self.graph): 

211 raise ValueError(f"{graph} is not a subset of {self.graph}") 

212 query = self._query.projected(graph, unique=unique, defer=True, drop_postprocessing=True) 

213 return DataCoordinateQueryResults(query) 

214 

215 def findDatasets( 

216 self, 

217 datasetType: Any, 

218 collections: Any, 

219 *, 

220 findFirst: bool = True, 

221 components: bool | None = None, 

222 ) -> DatasetQueryResults: 

223 """Find datasets using the data IDs identified by this query. 

224 

225 Parameters 

226 ---------- 

227 datasetType : `DatasetType` or `str` 

228 Dataset type or the name of one to search for. Must have 

229 dimensions that are a subset of ``self.graph``. 

230 collections : `Any` 

231 An expression that fully or partially identifies the collections 

232 to search for the dataset, such as a `str`, `re.Pattern`, or 

233 iterable thereof. ``...`` can be used to return all collections. 

234 See :ref:`daf_butler_collection_expressions` for more information. 

235 findFirst : `bool`, optional 

236 If `True` (default), for each result data ID, only yield one 

237 `DatasetRef`, from the first collection in which a dataset of that 

238 dataset type appears (according to the order of ``collections`` 

239 passed in). If `True`, ``collections`` must not contain regular 

240 expressions and may not be ``...``. 

241 components : `bool`, optional 

242 If `True`, apply all expression patterns to component dataset type 

243 names as well. If `False`, never apply patterns to components. If 

244 `None` (default), apply patterns to components only if their parent 

245 datasets were not matched by the expression. Fully-specified 

246 component datasets (`str` or `DatasetType` instances) are always 

247 included. 

248 

249 Values other than `False` are deprecated, and only `False` will be 

250 supported after v26. After v27 this argument will be removed 

251 entirely. 

252 

253 Returns 

254 ------- 

255 datasets : `ParentDatasetQueryResults` 

256 A lazy-evaluation object representing dataset query results, 

257 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

258 nested data IDs in those dataset references will have records as 

259 well. 

260 

261 Raises 

262 ------ 

263 MissingDatasetTypeError 

264 Raised if the given dataset type is not registered. 

265 """ 

266 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

267 datasetType, components=components, explicit_only=True 

268 ) 

269 return ParentDatasetQueryResults( 

270 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

271 parent_dataset_type, 

272 components_found, 

273 ) 

274 

275 def findRelatedDatasets( 

276 self, 

277 datasetType: DatasetType | str, 

278 collections: Any, 

279 *, 

280 findFirst: bool = True, 

281 dimensions: DimensionGraph | None = None, 

282 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: 

283 """Find datasets using the data IDs identified by this query, and 

284 return them along with the original data IDs. 

285 

286 This is a variant of `findDatasets` that is often more useful when 

287 the target dataset type does not have all of the dimensions of the 

288 original data ID query, as is generally the case with calibration 

289 lookups. 

290 

291 Parameters 

292 ---------- 

293 datasetType : `DatasetType` or `str` 

294 Dataset type or the name of one to search for. Must have 

295 dimensions that are a subset of ``self.graph``. 

296 collections : `Any` 

297 An expression that fully or partially identifies the collections 

298 to search for the dataset, such as a `str`, `re.Pattern`, or 

299 iterable thereof. ``...`` can be used to return all collections. 

300 See :ref:`daf_butler_collection_expressions` for more information. 

301 findFirst : `bool`, optional 

302 If `True` (default), for each data ID in ``self``, only yield one 

303 `DatasetRef`, from the first collection in which a dataset of that 

304 dataset type appears (according to the order of ``collections`` 

305 passed in). If `True`, ``collections`` must not contain regular 

306 expressions and may not be ``...``. Note that this is not the 

307 same as yielding one `DatasetRef` for each yielded data ID if 

308 ``dimensions`` is not `None`. 

309 dimensions : `DimensionGraph`, optional 

310 The dimensions of the data IDs returned. Must be a subset of 

311 ``self.dimensions``. 

312 

313 Returns 

314 ------- 

315 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \ 

316 `DatasetRef` ] ] 

317 An iterable of (data ID, dataset reference) pairs. 

318 

319 Raises 

320 ------ 

321 MissingDatasetTypeError 

322 Raised if the given dataset type is not registered. 

323 """ 

324 if dimensions is None: 

325 dimensions = self.graph 

326 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( 

327 datasetType, components=False, explicit_only=True 

328 ) 

329 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) 

330 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) 

331 

332 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

333 """Count the number of rows this query would return. 

334 

335 Parameters 

336 ---------- 

337 exact : `bool`, optional 

338 If `True`, run the full query and perform post-query filtering if 

339 needed to account for that filtering in the count. If `False`, the 

340 result may be an upper bound. 

341 discard : `bool`, optional 

342 If `True`, compute the exact count even if it would require running 

343 the full query and then throwing away the result rows after 

344 counting them. If `False`, this is an error, as the user would 

345 usually be better off executing the query first to fetch its rows 

346 into a new query (or passing ``exact=False``). Ignored if 

347 ``exact=False``. 

348 

349 Returns 

350 ------- 

351 count : `int` 

352 The number of rows the query would return, or an upper bound if 

353 ``exact=False``. 

354 

355 Notes 

356 ----- 

357 This counts the number of rows returned, not the number of unique rows 

358 returned, so even with ``exact=True`` it may provide only an upper 

359 bound on the number of *deduplicated* result rows. 

360 """ 

361 return self._query.count(exact=exact, discard=discard) 

362 

363 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

364 """Test whether this query returns any results. 

365 

366 Parameters 

367 ---------- 

368 execute : `bool`, optional 

369 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

370 determined prior to execution that the query would return no rows. 

371 exact : `bool`, optional 

372 If `True`, run the full query and perform post-query filtering if 

373 needed, until at least one result row is found. If `False`, the 

374 returned result does not account for post-query filtering, and 

375 hence may be `True` even when all result rows would be filtered 

376 out. 

377 

378 Returns 

379 ------- 

380 any : `bool` 

381 `True` if the query would (or might, depending on arguments) yield 

382 result rows. `False` if it definitely would not. 

383 """ 

384 return self._query.any(execute=execute, exact=exact) 

385 

386 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

387 """Return human-readable messages that may help explain why the query 

388 yields no results. 

389 

390 Parameters 

391 ---------- 

392 execute : `bool`, optional 

393 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

394 of aspects of the tree to more precisely determine where rows were 

395 filtered out. 

396 

397 Returns 

398 ------- 

399 messages : `~collections.abc.Iterable` [ `str` ] 

400 String messages that describe reasons the query might not yield any 

401 results. 

402 """ 

403 return self._query.explain_no_results(execute=execute) 

404 

405 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

406 """Make the iterator return ordered results. 

407 

408 Parameters 

409 ---------- 

410 *args : `str` 

411 Names of the columns/dimensions to use for ordering. Column name 

412 can be prefixed with minus (``-``) to use descending ordering. 

413 

414 Returns 

415 ------- 

416 result : `DataCoordinateQueryResults` 

417 Returns ``self`` instance which is updated to return ordered 

418 result. 

419 

420 Notes 

421 ----- 

422 This method modifies the iterator in place and returns the same 

423 instance to support method chaining. 

424 """ 

425 clause = OrderByClause.parse_general(args, self._query.dimensions) 

426 self._query = self._query.sorted(clause.terms, defer=True) 

427 return self 

428 

429 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

430 """Make the iterator return limited number of records. 

431 

432 Parameters 

433 ---------- 

434 limit : `int` 

435 Upper limit on the number of returned records. 

436 offset : `int` or `None`, optional 

437 The number of records to skip before returning at most ``limit`` 

438 records. `None` is interpreted the same as zero for backwards 

439 compatibility. 

440 

441 Returns 

442 ------- 

443 result : `DataCoordinateQueryResults` 

444 Returns ``self`` instance which is updated to return limited set 

445 of records. 

446 

447 Notes 

448 ----- 

449 This method modifies the iterator in place and returns the same 

450 instance to support method chaining. Normally this method is used 

451 together with `order_by` method. 

452 """ 

453 if offset is None: 

454 offset = 0 

455 self._query = self._query.sliced(offset, offset + limit, defer=True) 

456 return self 

457 

458 

459class DatasetQueryResults(Iterable[DatasetRef]): 

460 """An interface for objects that represent the results of queries for 

461 datasets. 

462 """ 

463 

464 @abstractmethod 

465 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

466 """Group results by parent dataset type. 

467 

468 Returns 

469 ------- 

470 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

471 An iterator over `DatasetQueryResults` instances that are each 

472 responsible for a single parent dataset type (either just that 

473 dataset type, one or more of its component dataset types, or both). 

474 """ 

475 raise NotImplementedError() 

476 

477 @abstractmethod 

478 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

479 """Insert this query's results into a temporary table. 

480 

481 Returns 

482 ------- 

483 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

484 A context manager that ensures the temporary table is created and 

485 populated in ``__enter__`` (returning a results object backed by 

486 that table), and dropped in ``__exit__``. If ``self`` is already 

487 materialized, the context manager may do nothing (reflecting the 

488 fact that an outer context manager should already take care of 

489 everything else). 

490 """ 

491 raise NotImplementedError() 

492 

493 @abstractmethod 

494 def expanded(self) -> DatasetQueryResults: 

495 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

496 returns `True` for all data IDs in returned `DatasetRef` objects. 

497 

498 Returns 

499 ------- 

500 expanded : `DatasetQueryResults` 

501 Either a new `DatasetQueryResults` instance or ``self``, if it is 

502 already expanded. 

503 

504 Notes 

505 ----- 

506 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

507 to call `materialize` before expanding data IDs for very large result 

508 sets. 

509 """ 

510 raise NotImplementedError() 

511 

512 @abstractmethod 

513 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

514 """Count the number of rows this query would return. 

515 

516 Parameters 

517 ---------- 

518 exact : `bool`, optional 

519 If `True`, run the full query and perform post-query filtering if 

520 needed to account for that filtering in the count. If `False`, the 

521 result may be an upper bound. 

522 discard : `bool`, optional 

523 If `True`, compute the exact count even if it would require running 

524 the full query and then throwing away the result rows after 

525 counting them. If `False`, this is an error, as the user would 

526 usually be better off executing the query first to fetch its rows 

527 into a new query (or passing ``exact=False``). Ignored if 

528 ``exact=False``. 

529 

530 Returns 

531 ------- 

532 count : `int` 

533 The number of rows the query would return, or an upper bound if 

534 ``exact=False``. 

535 

536 Notes 

537 ----- 

538 This counts the number of rows returned, not the number of unique rows 

539 returned, so even with ``exact=True`` it may provide only an upper 

540 bound on the number of *deduplicated* result rows. 

541 """ 

542 raise NotImplementedError() 

543 

544 @abstractmethod 

545 def any( 

546 self, 

547 *, 

548 execute: bool = True, 

549 exact: bool = True, 

550 ) -> bool: 

551 """Test whether this query returns any results. 

552 

553 Parameters 

554 ---------- 

555 execute : `bool`, optional 

556 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

557 determined prior to execution that the query would return no rows. 

558 exact : `bool`, optional 

559 If `True`, run the full query and perform post-query filtering if 

560 needed, until at least one result row is found. If `False`, the 

561 returned result does not account for post-query filtering, and 

562 hence may be `True` even when all result rows would be filtered 

563 out. 

564 

565 Returns 

566 ------- 

567 any : `bool` 

568 `True` if the query would (or might, depending on arguments) yield 

569 result rows. `False` if it definitely would not. 

570 """ 

571 raise NotImplementedError() 

572 

573 @abstractmethod 

574 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

575 """Return human-readable messages that may help explain why the query 

576 yields no results. 

577 

578 Parameters 

579 ---------- 

580 execute : `bool`, optional 

581 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

582 of aspects of the tree to more precisely determine where rows were 

583 filtered out. 

584 

585 Returns 

586 ------- 

587 messages : `~collections.abc.Iterable` [ `str` ] 

588 String messages that describe reasons the query might not yield any 

589 results. 

590 """ 

591 raise NotImplementedError() 

592 

593 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

594 """Group results by dataset type. 

595 

596 This is a private hook for the interface defined by 

597 `DatasetRef.iter_by_type`, enabling much more efficient 

598 processing of heterogeneous `DatasetRef` iterables when they come 

599 directly from queries. 

600 """ 

601 for parent_results in self.byParentDatasetType(): 

602 for component in parent_results.components: 

603 dataset_type = parent_results.parentDatasetType 

604 if component is not None: 

605 dataset_type = dataset_type.makeComponentDatasetType(component) 

606 yield (dataset_type, parent_results.withComponents((component,))) 

607 

608 

609class ParentDatasetQueryResults(DatasetQueryResults): 

610 """An object that represents results from a query for datasets with a 

611 single parent `DatasetType`. 

612 

613 Parameters 

614 ---------- 

615 query : `Query` 

616 Low-level query object that backs these results. 

617 dataset_type : `DatasetType` 

618 Parent dataset type for all datasets returned by this query. 

619 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

620 Names of components to include in iteration. `None` may be included 

621 (at most once) to include the parent dataset type. 

622 

623 Notes 

624 ----- 

625 The `Query` class now implements essentially all of this class's 

626 functionality; "QueryResult" classes like this one now exist only to 

627 provide interface backwards compatibility and more specific iterator 

628 types. 

629 """ 

630 

631 def __init__( 

632 self, 

633 query: Query, 

634 dataset_type: DatasetType, 

635 components: Sequence[str | None] = (None,), 

636 ): 

637 self._query = query 

638 self._dataset_type = dataset_type 

639 self._components = components 

640 

641 __slots__ = ("_query", "_dataset_type", "_components") 

642 

643 def __iter__(self) -> Iterator[DatasetRef]: 

644 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

645 

646 def __repr__(self) -> str: 

647 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

648 

649 @property 

650 def components(self) -> Sequence[str | None]: 

651 """The components of the parent dataset type included in these results 

652 (`~collections.abc.Sequence` [ `str` or `None` ]). 

653 """ 

654 return self._components 

655 

656 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

657 # Docstring inherited from DatasetQueryResults. 

658 yield self 

659 

660 @contextmanager 

661 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

662 # Docstring inherited from DatasetQueryResults. 

663 with self._query.open_context(): 

664 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

665 

666 @property 

667 def parentDatasetType(self) -> DatasetType: 

668 """The parent dataset type for all datasets in this iterable 

669 (`DatasetType`). 

670 """ 

671 return self._dataset_type 

672 

673 @property 

674 def dataIds(self) -> DataCoordinateQueryResults: 

675 """A lazy-evaluation object representing a query for just the data 

676 IDs of the datasets that would be returned by this query 

677 (`DataCoordinateQueryResults`). 

678 

679 The returned object is not in general `zip`-iterable with ``self``; 

680 it may be in a different order or have (or not have) duplicates. 

681 """ 

682 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

683 

684 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

685 """Return a new query results object for the same parent datasets but 

686 different components. 

687 

688 components : `~collections.abc.Sequence` [ `str` or `None` ] 

689 Names of components to include in iteration. `None` may be 

690 included (at most once) to include the parent dataset type. 

691 """ 

692 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

693 

694 def expanded(self) -> ParentDatasetQueryResults: 

695 # Docstring inherited from DatasetQueryResults. 

696 return ParentDatasetQueryResults( 

697 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

698 ) 

699 

700 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

701 # Docstring inherited. 

702 return len(self._components) * self._query.count(exact=exact, discard=discard) 

703 

704 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

705 # Docstring inherited. 

706 return self._query.any(execute=execute, exact=exact) 

707 

708 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

709 # Docstring inherited. 

710 return self._query.explain_no_results(execute=execute) 

711 

712 

713class ChainedDatasetQueryResults(DatasetQueryResults): 

714 """A `DatasetQueryResults` implementation that simply chains together 

715 other results objects, each for a different parent dataset type. 

716 

717 Parameters 

718 ---------- 

719 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

720 The underlying results objects this object will chain together. 

721 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

722 A list of messages (appropriate for e.g. logging or exceptions) that 

723 explain why the query is known to return no results even before it is 

724 executed. Queries with a non-empty list will never be executed. 

725 Child results objects may also have their own list. 

726 """ 

727 

728 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

729 self._chain = chain 

730 self._doomed_by = tuple(doomed_by) 

731 

732 __slots__ = ("_chain",) 

733 

734 def __iter__(self) -> Iterator[DatasetRef]: 

735 return itertools.chain.from_iterable(self._chain) 

736 

737 def __repr__(self) -> str: 

738 return "<DatasetRef iterator for multiple dataset types>" 

739 

740 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

741 # Docstring inherited from DatasetQueryResults. 

742 return iter(self._chain) 

743 

744 @contextmanager 

745 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

746 # Docstring inherited from DatasetQueryResults. 

747 with ExitStack() as stack: 

748 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

749 

750 def expanded(self) -> ChainedDatasetQueryResults: 

751 # Docstring inherited from DatasetQueryResults. 

752 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

753 

754 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

755 # Docstring inherited. 

756 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

757 

758 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

759 # Docstring inherited. 

760 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

761 

762 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

763 # Docstring inherited. 

764 result = list(self._doomed_by) 

765 for r in self._chain: 

766 result.extend(r.explain_no_results(execute=execute)) 

767 return result 

768 

769 

770class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

771 """An interface for objects that represent the results of queries for 

772 dimension records. 

773 """ 

774 

775 @property 

776 @abstractmethod 

777 def element(self) -> DimensionElement: 

778 raise NotImplementedError() 

779 

780 @abstractmethod 

781 def run(self) -> DimensionRecordQueryResults: 

782 raise NotImplementedError() 

783 

784 @abstractmethod 

785 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

786 """Count the number of rows this query would return. 

787 

788 Parameters 

789 ---------- 

790 exact : `bool`, optional 

791 If `True`, run the full query and perform post-query filtering if 

792 needed to account for that filtering in the count. If `False`, the 

793 result may be an upper bound. 

794 discard : `bool`, optional 

795 If `True`, compute the exact count even if it would require running 

796 the full query and then throwing away the result rows after 

797 counting them. If `False`, this is an error, as the user would 

798 usually be better off executing the query first to fetch its rows 

799 into a new query (or passing ``exact=False``). Ignored if 

800 ``exact=False``. 

801 

802 Returns 

803 ------- 

804 count : `int` 

805 The number of rows the query would return, or an upper bound if 

806 ``exact=False``. 

807 

808 Notes 

809 ----- 

810 This counts the number of rows returned, not the number of unique rows 

811 returned, so even with ``exact=True`` it may provide only an upper 

812 bound on the number of *deduplicated* result rows. 

813 """ 

814 raise NotImplementedError() 

815 

816 @abstractmethod 

817 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

818 """Test whether this query returns any results. 

819 

820 Parameters 

821 ---------- 

822 execute : `bool`, optional 

823 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

824 determined prior to execution that the query would return no rows. 

825 exact : `bool`, optional 

826 If `True`, run the full query and perform post-query filtering if 

827 needed, until at least one result row is found. If `False`, the 

828 returned result does not account for post-query filtering, and 

829 hence may be `True` even when all result rows would be filtered 

830 out. 

831 

832 Returns 

833 ------- 

834 any : `bool` 

835 `True` if the query would (or might, depending on arguments) yield 

836 result rows. `False` if it definitely would not. 

837 """ 

838 raise NotImplementedError() 

839 

840 @abstractmethod 

841 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

842 """Make the iterator return ordered result. 

843 

844 Parameters 

845 ---------- 

846 *args : `str` 

847 Names of the columns/dimensions to use for ordering. Column name 

848 can be prefixed with minus (``-``) to use descending ordering. 

849 

850 Returns 

851 ------- 

852 result : `DimensionRecordQueryResults` 

853 Returns ``self`` instance which is updated to return ordered 

854 result. 

855 

856 Notes 

857 ----- 

858 This method can modify the iterator in place and return the same 

859 instance. 

860 """ 

861 raise NotImplementedError() 

862 

863 @abstractmethod 

864 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

865 """Make the iterator return limited number of records. 

866 

867 Parameters 

868 ---------- 

869 limit : `int` 

870 Upper limit on the number of returned records. 

871 offset : `int` or `None` 

872 The number of records to skip before returning at most ``limit`` 

873 records. `None` is interpreted the same as zero for backwards 

874 compatibility. 

875 

876 Returns 

877 ------- 

878 result : `DimensionRecordQueryResults` 

879 Returns ``self`` instance which is updated to return limited set of 

880 records. 

881 

882 Notes 

883 ----- 

884 This method can modify the iterator in place and return the same 

885 instance. Normally this method is used together with `order_by` method. 

886 """ 

887 raise NotImplementedError() 

888 

889 @abstractmethod 

890 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

891 """Return human-readable messages that may help explain why the query 

892 yields no results. 

893 

894 Parameters 

895 ---------- 

896 execute : `bool`, optional 

897 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

898 of aspects of the tree to more precisely determine where rows were 

899 filtered out. 

900 

901 Returns 

902 ------- 

903 messages : `~collections.abc.Iterable` [ `str` ] 

904 String messages that describe reasons the query might not yield any 

905 results. 

906 """ 

907 raise NotImplementedError() 

908 

909 

910class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

911 """Implementation of DimensionRecordQueryResults using database query. 

912 

913 Parameters 

914 ---------- 

915 query : `Query` 

916 Query object that backs this class. 

917 element : `DimensionElement` 

918 Element whose records this object returns. 

919 

920 Notes 

921 ----- 

922 The `Query` class now implements essentially all of this class's 

923 functionality; "QueryResult" classes like this one now exist only to 

924 provide interface backwards compatibility and more specific iterator 

925 types. 

926 """ 

927 

928 def __init__(self, query: Query, element: DimensionElement): 

929 self._query = query 

930 self._element = element 

931 

932 @property 

933 def element(self) -> DimensionElement: 

934 return self._element 

935 

936 def __iter__(self) -> Iterator[DimensionRecord]: 

937 return self._query.iter_dimension_records(self._element) 

938 

939 def run(self) -> DimensionRecordQueryResults: 

940 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

941 

942 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

943 # Docstring inherited from base class. 

944 return self._query.count(exact=exact) 

945 

946 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

947 # Docstring inherited from base class. 

948 return self._query.any(execute=execute, exact=exact) 

949 

950 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

951 # Docstring inherited from base class. 

952 clause = OrderByClause.parse_element(args, self._element) 

953 self._query = self._query.sorted(clause.terms, defer=True) 

954 return self 

955 

956 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

957 # Docstring inherited from base class. 

958 if offset is None: 

959 offset = 0 

960 self._query = self._query.sliced(offset, offset + limit, defer=True) 

961 return self 

962 

963 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

964 # Docstring inherited. 

965 return self._query.explain_no_results(execute=execute)