Coverage for python/lsst/daf/butler/registry/queries/_results.py: 57%

201 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "ChainedDatasetQueryResults", 

31 "DatabaseDimensionRecordQueryResults", 

32 "DataCoordinateQueryResults", 

33 "DatasetQueryResults", 

34 "DimensionRecordQueryResults", 

35 "ParentDatasetQueryResults", 

36) 

37 

38import itertools 

39from abc import abstractmethod 

40from collections.abc import Iterable, Iterator, Sequence 

41from contextlib import AbstractContextManager, ExitStack, contextmanager 

42from typing import Any 

43 

44from deprecated.sphinx import deprecated 

45 

46from ..._dataset_ref import DatasetRef 

47from ..._dataset_type import DatasetType 

48from ..._exceptions_legacy import DatasetTypeError 

49from ...dimensions import ( 

50 DataCoordinate, 

51 DataCoordinateIterable, 

52 DimensionElement, 

53 DimensionGraph, 

54 DimensionGroup, 

55 DimensionRecord, 

56) 

57from ._query import Query 

58from ._structs import OrderByClause 

59 

60 

61class DataCoordinateQueryResults(DataCoordinateIterable): 

62 """An enhanced implementation of `DataCoordinateIterable` that represents 

63 data IDs retrieved from a database query. 

64 

65 Parameters 

66 ---------- 

67 query : `Query` 

68 Query object that backs this class. 

69 

70 Notes 

71 ----- 

72 The `Query` class now implements essentially all of this class's 

73 functionality; "QueryResult" classes like this one now exist only to 

74 provide interface backwards compatibility and more specific iterator 

75 types. 

76 """ 

77 

78 def __init__(self, query: Query): 

79 self._query = query 

80 

81 __slots__ = ("_query",) 

82 

83 def __iter__(self) -> Iterator[DataCoordinate]: 

84 return self._query.iter_data_ids() 

85 

86 def __repr__(self) -> str: 

87 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

88 

89 @property 

90 @deprecated( 

91 "Deprecated in favor of .dimensions. Will be removed after v27.", 

92 version="v27", 

93 category=FutureWarning, 

94 ) 

95 def graph(self) -> DimensionGraph: 

96 # Docstring inherited from DataCoordinateIterable. 

97 return self._query.dimensions._as_graph() 

98 

99 @property 

100 def dimensions(self) -> DimensionGroup: 

101 """The dimensions of the data IDs returned by this query.""" 

102 return self._query.dimensions 

103 

104 def hasFull(self) -> bool: 

105 # Docstring inherited from DataCoordinateIterable. 

106 return True 

107 

108 def hasRecords(self) -> bool: 

109 # Docstring inherited from DataCoordinateIterable. 

110 return self._query.has_record_columns is True or not self.dimensions 

111 

112 @contextmanager 

113 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

114 """Insert this query's results into a temporary table. 

115 

116 Returns 

117 ------- 

118 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

119 A context manager that ensures the temporary table is created and 

120 populated in ``__enter__`` (returning a results object backed by 

121 that table), and dropped in ``__exit__``. If ``self`` is already 

122 materialized, the context manager may do nothing (reflecting the 

123 fact that an outer context manager should already take care of 

124 everything else). 

125 

126 Notes 

127 ----- 

128 When using a very large result set to perform multiple queries (e.g. 

129 multiple calls to `subset` with different arguments, or even a single 

130 call to `expanded`), it may be much more efficient to start by 

131 materializing the query and only then performing the follow up queries. 

132 It may also be less efficient, depending on how well database engine's 

133 query optimizer can simplify those particular follow-up queries and 

134 how efficiently it caches query results even when the are not 

135 explicitly inserted into a temporary table. See `expanded` and 

136 `subset` for examples. 

137 """ 

138 with self._query.open_context(): 

139 yield DataCoordinateQueryResults(self._query.materialized()) 

140 

141 def expanded(self) -> DataCoordinateQueryResults: 

142 """Return a results object for which `hasRecords` returns `True`. 

143 

144 This method may involve actually executing database queries to fetch 

145 `DimensionRecord` objects. 

146 

147 Returns 

148 ------- 

149 results : `DataCoordinateQueryResults` 

150 A results object for which `hasRecords` returns `True`. May be 

151 ``self`` if that is already the case. 

152 

153 Notes 

154 ----- 

155 For very result sets, it may be much more efficient to call 

156 `materialize` before calling `expanded`, to avoid performing the 

157 original query multiple times (as a subquery) in the follow-up queries 

158 that fetch dimension records. For example:: 

159 

160 with registry.queryDataIds(...).materialize() as tempDataIds: 

161 dataIdsWithRecords = tempDataIds.expanded() 

162 for dataId in dataIdsWithRecords: 

163 ... 

164 """ 

165 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

166 

167 def subset( 

168 self, 

169 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

170 *, 

171 unique: bool = False, 

172 ) -> DataCoordinateQueryResults: 

173 """Return a results object containing a subset of the dimensions of 

174 this one, and/or a unique near-subset of its rows. 

175 

176 This method may involve actually executing database queries to fetch 

177 `DimensionRecord` objects. 

178 

179 Parameters 

180 ---------- 

181 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

182 `~collections.abc.Iterable` [ `str`], optional 

183 Dimensions to include in the new results object. If `None`, 

184 ``self.dimensions`` is used. 

185 unique : `bool`, optional 

186 If `True` (`False` is default), the query should only return unique 

187 data IDs. This is implemented in the database; to obtain unique 

188 results via Python-side processing (which may be more efficient in 

189 some cases), use `toSet` to construct a `DataCoordinateSet` from 

190 this results object instead. 

191 

192 Returns 

193 ------- 

194 results : `DataCoordinateQueryResults` 

195 A results object corresponding to the given criteria. May be 

196 ``self`` if it already qualifies. 

197 

198 Raises 

199 ------ 

200 ValueError 

201 Raised when ``dimensions`` is not a subset of the dimensions in 

202 this result. 

203 

204 Notes 

205 ----- 

206 This method can only return a "near-subset" of the original result rows 

207 in general because of subtleties in how spatial overlaps are 

208 implemented; see `Query.projected` for more information. 

209 

210 When calling `subset` multiple times on the same very large result set, 

211 it may be much more efficient to call `materialize` first. For 

212 example:: 

213 

214 dimensions1 = DimensionGroup(...) 

215 dimensions2 = DimensionGroup(...) 

216 with registry.queryDataIds(...).materialize() as tempDataIds: 

217 for dataId1 in tempDataIds.subset(dimensions1, unique=True): 

218 ... 

219 for dataId2 in tempDataIds.subset(dimensions2, unique=True): 

220 ... 

221 """ 

222 if dimensions is None: 

223 dimensions = self.dimensions 

224 else: 

225 dimensions = self.dimensions.universe.conform(dimensions) 

226 if not dimensions.issubset(self.dimensions): 

227 raise ValueError(f"{dimensions} is not a subset of {self.dimensions}") 

228 query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True) 

229 return DataCoordinateQueryResults(query) 

230 

231 def findDatasets( 

232 self, 

233 datasetType: DatasetType | str, 

234 collections: Any, 

235 *, 

236 findFirst: bool = True, 

237 components: bool = False, 

238 ) -> ParentDatasetQueryResults: 

239 """Find datasets using the data IDs identified by this query. 

240 

241 Parameters 

242 ---------- 

243 datasetType : `DatasetType` or `str` 

244 Dataset type or the name of one to search for. Must have 

245 dimensions that are a subset of ``self.graph``. 

246 collections : `Any` 

247 An expression that fully or partially identifies the collections 

248 to search for the dataset, such as a `str`, `re.Pattern`, or 

249 iterable thereof. ``...`` can be used to return all collections. 

250 See :ref:`daf_butler_collection_expressions` for more information. 

251 findFirst : `bool`, optional 

252 If `True` (default), for each result data ID, only yield one 

253 `DatasetRef`, from the first collection in which a dataset of that 

254 dataset type appears (according to the order of ``collections`` 

255 passed in). If `True`, ``collections`` must not contain regular 

256 expressions and may not be ``...``. 

257 components : `bool`, optional 

258 Must be `False`. Provided only for backwards compatibility. After 

259 v27 this argument will be removed entirely. 

260 

261 Returns 

262 ------- 

263 datasets : `ParentDatasetQueryResults` 

264 A lazy-evaluation object representing dataset query results, 

265 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

266 nested data IDs in those dataset references will have records as 

267 well. 

268 

269 Raises 

270 ------ 

271 MissingDatasetTypeError 

272 Raised if the given dataset type is not registered. 

273 """ 

274 if components is not False: 

275 raise DatasetTypeError( 

276 "Dataset component queries are no longer supported by Registry. Use " 

277 "DatasetType methods to obtain components from parent dataset types instead." 

278 ) 

279 resolved_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard( 

280 datasetType, explicit_only=True 

281 ) 

282 return ParentDatasetQueryResults( 

283 self._query.find_datasets(resolved_dataset_type, collections, find_first=findFirst, defer=True), 

284 resolved_dataset_type, 

285 [None], 

286 ) 

287 

288 def findRelatedDatasets( 

289 self, 

290 datasetType: DatasetType | str, 

291 collections: Any, 

292 *, 

293 findFirst: bool = True, 

294 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

295 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: 

296 """Find datasets using the data IDs identified by this query, and 

297 return them along with the original data IDs. 

298 

299 This is a variant of `findDatasets` that is often more useful when 

300 the target dataset type does not have all of the dimensions of the 

301 original data ID query, as is generally the case with calibration 

302 lookups. 

303 

304 Parameters 

305 ---------- 

306 datasetType : `DatasetType` or `str` 

307 Dataset type or the name of one to search for. Must have 

308 dimensions that are a subset of ``self.graph``. 

309 collections : `Any` 

310 An expression that fully or partially identifies the collections 

311 to search for the dataset, such as a `str`, `re.Pattern`, or 

312 iterable thereof. ``...`` can be used to return all collections. 

313 See :ref:`daf_butler_collection_expressions` for more information. 

314 findFirst : `bool`, optional 

315 If `True` (default), for each data ID in ``self``, only yield one 

316 `DatasetRef`, from the first collection in which a dataset of that 

317 dataset type appears (according to the order of ``collections`` 

318 passed in). If `True`, ``collections`` must not contain regular 

319 expressions and may not be ``...``. Note that this is not the 

320 same as yielding one `DatasetRef` for each yielded data ID if 

321 ``dimensions`` is not `None`. 

322 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

323 `~collections.abc.Iterable` [ `str` ], optional 

324 The dimensions of the data IDs returned. Must be a subset of 

325 ``self.dimensions``. 

326 

327 Returns 

328 ------- 

329 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \ 

330 `DatasetRef` ] ] 

331 An iterable of (data ID, dataset reference) pairs. 

332 

333 Raises 

334 ------ 

335 MissingDatasetTypeError 

336 Raised if the given dataset type is not registered. 

337 """ 

338 if dimensions is None: 

339 dimensions = self.dimensions 

340 else: 

341 dimensions = self.universe.conform(dimensions) 

342 parent_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard( 

343 datasetType, explicit_only=True 

344 ) 

345 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) 

346 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) 

347 

348 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

349 """Count the number of rows this query would return. 

350 

351 Parameters 

352 ---------- 

353 exact : `bool`, optional 

354 If `True`, run the full query and perform post-query filtering if 

355 needed to account for that filtering in the count. If `False`, the 

356 result may be an upper bound. 

357 discard : `bool`, optional 

358 If `True`, compute the exact count even if it would require running 

359 the full query and then throwing away the result rows after 

360 counting them. If `False`, this is an error, as the user would 

361 usually be better off executing the query first to fetch its rows 

362 into a new query (or passing ``exact=False``). Ignored if 

363 ``exact=False``. 

364 

365 Returns 

366 ------- 

367 count : `int` 

368 The number of rows the query would return, or an upper bound if 

369 ``exact=False``. 

370 

371 Notes 

372 ----- 

373 This counts the number of rows returned, not the number of unique rows 

374 returned, so even with ``exact=True`` it may provide only an upper 

375 bound on the number of *deduplicated* result rows. 

376 """ 

377 return self._query.count(exact=exact, discard=discard) 

378 

379 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

380 """Test whether this query returns any results. 

381 

382 Parameters 

383 ---------- 

384 execute : `bool`, optional 

385 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

386 determined prior to execution that the query would return no rows. 

387 exact : `bool`, optional 

388 If `True`, run the full query and perform post-query filtering if 

389 needed, until at least one result row is found. If `False`, the 

390 returned result does not account for post-query filtering, and 

391 hence may be `True` even when all result rows would be filtered 

392 out. 

393 

394 Returns 

395 ------- 

396 any : `bool` 

397 `True` if the query would (or might, depending on arguments) yield 

398 result rows. `False` if it definitely would not. 

399 """ 

400 return self._query.any(execute=execute, exact=exact) 

401 

402 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

403 """Return human-readable messages that may help explain why the query 

404 yields no results. 

405 

406 Parameters 

407 ---------- 

408 execute : `bool`, optional 

409 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

410 of aspects of the tree to more precisely determine where rows were 

411 filtered out. 

412 

413 Returns 

414 ------- 

415 messages : `~collections.abc.Iterable` [ `str` ] 

416 String messages that describe reasons the query might not yield any 

417 results. 

418 """ 

419 return self._query.explain_no_results(execute=execute) 

420 

421 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

422 """Make the iterator return ordered results. 

423 

424 Parameters 

425 ---------- 

426 *args : `str` 

427 Names of the columns/dimensions to use for ordering. Column name 

428 can be prefixed with minus (``-``) to use descending ordering. 

429 

430 Returns 

431 ------- 

432 result : `DataCoordinateQueryResults` 

433 Returns ``self`` instance which is updated to return ordered 

434 result. 

435 

436 Notes 

437 ----- 

438 This method modifies the iterator in place and returns the same 

439 instance to support method chaining. 

440 """ 

441 clause = OrderByClause.parse_general(args, self._query.dimensions) 

442 self._query = self._query.sorted(clause.terms, defer=True) 

443 return self 

444 

445 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

446 """Make the iterator return limited number of records. 

447 

448 Parameters 

449 ---------- 

450 limit : `int` 

451 Upper limit on the number of returned records. 

452 offset : `int` or `None`, optional 

453 The number of records to skip before returning at most ``limit`` 

454 records. `None` is interpreted the same as zero for backwards 

455 compatibility. 

456 

457 Returns 

458 ------- 

459 result : `DataCoordinateQueryResults` 

460 Returns ``self`` instance which is updated to return limited set 

461 of records. 

462 

463 Notes 

464 ----- 

465 This method modifies the iterator in place and returns the same 

466 instance to support method chaining. Normally this method is used 

467 together with `order_by` method. 

468 """ 

469 if offset is None: 

470 offset = 0 

471 self._query = self._query.sliced(offset, offset + limit, defer=True) 

472 return self 

473 

474 

475class DatasetQueryResults(Iterable[DatasetRef]): 

476 """An interface for objects that represent the results of queries for 

477 datasets. 

478 """ 

479 

480 @abstractmethod 

481 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

482 """Group results by parent dataset type. 

483 

484 Returns 

485 ------- 

486 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

487 An iterator over `DatasetQueryResults` instances that are each 

488 responsible for a single parent dataset type (either just that 

489 dataset type, one or more of its component dataset types, or both). 

490 """ 

491 raise NotImplementedError() 

492 

493 @abstractmethod 

494 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

495 """Insert this query's results into a temporary table. 

496 

497 Returns 

498 ------- 

499 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

500 A context manager that ensures the temporary table is created and 

501 populated in ``__enter__`` (returning a results object backed by 

502 that table), and dropped in ``__exit__``. If ``self`` is already 

503 materialized, the context manager may do nothing (reflecting the 

504 fact that an outer context manager should already take care of 

505 everything else). 

506 """ 

507 raise NotImplementedError() 

508 

509 @abstractmethod 

510 def expanded(self) -> DatasetQueryResults: 

511 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

512 returns `True` for all data IDs in returned `DatasetRef` objects. 

513 

514 Returns 

515 ------- 

516 expanded : `DatasetQueryResults` 

517 Either a new `DatasetQueryResults` instance or ``self``, if it is 

518 already expanded. 

519 

520 Notes 

521 ----- 

522 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

523 to call `materialize` before expanding data IDs for very large result 

524 sets. 

525 """ 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

530 """Count the number of rows this query would return. 

531 

532 Parameters 

533 ---------- 

534 exact : `bool`, optional 

535 If `True`, run the full query and perform post-query filtering if 

536 needed to account for that filtering in the count. If `False`, the 

537 result may be an upper bound. 

538 discard : `bool`, optional 

539 If `True`, compute the exact count even if it would require running 

540 the full query and then throwing away the result rows after 

541 counting them. If `False`, this is an error, as the user would 

542 usually be better off executing the query first to fetch its rows 

543 into a new query (or passing ``exact=False``). Ignored if 

544 ``exact=False``. 

545 

546 Returns 

547 ------- 

548 count : `int` 

549 The number of rows the query would return, or an upper bound if 

550 ``exact=False``. 

551 

552 Notes 

553 ----- 

554 This counts the number of rows returned, not the number of unique rows 

555 returned, so even with ``exact=True`` it may provide only an upper 

556 bound on the number of *deduplicated* result rows. 

557 """ 

558 raise NotImplementedError() 

559 

560 @abstractmethod 

561 def any( 

562 self, 

563 *, 

564 execute: bool = True, 

565 exact: bool = True, 

566 ) -> bool: 

567 """Test whether this query returns any results. 

568 

569 Parameters 

570 ---------- 

571 execute : `bool`, optional 

572 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

573 determined prior to execution that the query would return no rows. 

574 exact : `bool`, optional 

575 If `True`, run the full query and perform post-query filtering if 

576 needed, until at least one result row is found. If `False`, the 

577 returned result does not account for post-query filtering, and 

578 hence may be `True` even when all result rows would be filtered 

579 out. 

580 

581 Returns 

582 ------- 

583 any : `bool` 

584 `True` if the query would (or might, depending on arguments) yield 

585 result rows. `False` if it definitely would not. 

586 """ 

587 raise NotImplementedError() 

588 

589 @abstractmethod 

590 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

591 """Return human-readable messages that may help explain why the query 

592 yields no results. 

593 

594 Parameters 

595 ---------- 

596 execute : `bool`, optional 

597 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

598 of aspects of the tree to more precisely determine where rows were 

599 filtered out. 

600 

601 Returns 

602 ------- 

603 messages : `~collections.abc.Iterable` [ `str` ] 

604 String messages that describe reasons the query might not yield any 

605 results. 

606 """ 

607 raise NotImplementedError() 

608 

609 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

610 """Group results by dataset type. 

611 

612 This is a private hook for the interface defined by 

613 `DatasetRef.iter_by_type`, enabling much more efficient 

614 processing of heterogeneous `DatasetRef` iterables when they come 

615 directly from queries. 

616 """ 

617 for parent_results in self.byParentDatasetType(): 

618 for component in parent_results._components: 

619 dataset_type = parent_results.parentDatasetType 

620 if component is not None: 

621 dataset_type = dataset_type.makeComponentDatasetType(component) 

622 if tuple(parent_results._components) == (component,): 

623 # Usual case, and in the future (after component support 

624 # has been fully removed) the only case. 

625 yield dataset_type, parent_results 

626 else: 

627 # General case that emits a deprecation warning. 

628 yield (dataset_type, parent_results.withComponents((component,))) 

629 

630 

631class ParentDatasetQueryResults(DatasetQueryResults): 

632 """An object that represents results from a query for datasets with a 

633 single parent `DatasetType`. 

634 

635 Parameters 

636 ---------- 

637 query : `Query` 

638 Low-level query object that backs these results. 

639 dataset_type : `DatasetType` 

640 Parent dataset type for all datasets returned by this query. 

641 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

642 Names of components to include in iteration. `None` may be included 

643 (at most once) to include the parent dataset type. 

644 

645 Notes 

646 ----- 

647 The `Query` class now implements essentially all of this class's 

648 functionality; "QueryResult" classes like this one now exist only to 

649 provide interface backwards compatibility and more specific iterator 

650 types. 

651 """ 

652 

653 def __init__( 

654 self, 

655 query: Query, 

656 dataset_type: DatasetType, 

657 components: Sequence[str | None] = (None,), 

658 ): 

659 self._query = query 

660 self._dataset_type = dataset_type 

661 self._components = components 

662 

663 __slots__ = ("_query", "_dataset_type", "_components") 

664 

665 def __iter__(self) -> Iterator[DatasetRef]: 

666 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

667 

668 def __repr__(self) -> str: 

669 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

670 

671 @property 

672 @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning) 

673 def components(self) -> Sequence[str | None]: 

674 """The components of the parent dataset type included in these results 

675 (`~collections.abc.Sequence` [ `str` or `None` ]). 

676 """ 

677 return self._components 

678 

679 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

680 # Docstring inherited from DatasetQueryResults. 

681 yield self 

682 

683 @contextmanager 

684 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

685 # Docstring inherited from DatasetQueryResults. 

686 with self._query.open_context(): 

687 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

688 

689 @property 

690 def parentDatasetType(self) -> DatasetType: 

691 """The parent dataset type for all datasets in this iterable 

692 (`DatasetType`). 

693 """ 

694 return self._dataset_type 

695 

696 @property 

697 def dataIds(self) -> DataCoordinateQueryResults: 

698 """A lazy-evaluation object representing a query for just the data 

699 IDs of the datasets that would be returned by this query 

700 (`DataCoordinateQueryResults`). 

701 

702 The returned object is not in general `zip`-iterable with ``self``; 

703 it may be in a different order or have (or not have) duplicates. 

704 """ 

705 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

706 

707 @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning) 

708 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

709 """Return a new query results object for the same parent datasets but 

710 different components. 

711 

712 Parameters 

713 ---------- 

714 components : `~collections.abc.Sequence` [ `str` or `None` ] 

715 Names of components to include in iteration. `None` may be 

716 included (at most once) to include the parent dataset type. 

717 """ 

718 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

719 

720 def expanded(self) -> ParentDatasetQueryResults: 

721 # Docstring inherited from DatasetQueryResults. 

722 return ParentDatasetQueryResults( 

723 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

724 ) 

725 

726 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

727 # Docstring inherited. 

728 return len(self._components) * self._query.count(exact=exact, discard=discard) 

729 

730 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

731 # Docstring inherited. 

732 return self._query.any(execute=execute, exact=exact) 

733 

734 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

735 # Docstring inherited. 

736 return self._query.explain_no_results(execute=execute) 

737 

738 

739class ChainedDatasetQueryResults(DatasetQueryResults): 

740 """A `DatasetQueryResults` implementation that simply chains together 

741 other results objects, each for a different parent dataset type. 

742 

743 Parameters 

744 ---------- 

745 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

746 The underlying results objects this object will chain together. 

747 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

748 A list of messages (appropriate for e.g. logging or exceptions) that 

749 explain why the query is known to return no results even before it is 

750 executed. Queries with a non-empty list will never be executed. 

751 Child results objects may also have their own list. 

752 """ 

753 

754 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

755 self._chain = chain 

756 self._doomed_by = tuple(doomed_by) 

757 

758 __slots__ = ("_chain",) 

759 

760 def __iter__(self) -> Iterator[DatasetRef]: 

761 return itertools.chain.from_iterable(self._chain) 

762 

763 def __repr__(self) -> str: 

764 return "<DatasetRef iterator for multiple dataset types>" 

765 

766 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

767 # Docstring inherited from DatasetQueryResults. 

768 return iter(self._chain) 

769 

770 @contextmanager 

771 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

772 # Docstring inherited from DatasetQueryResults. 

773 with ExitStack() as stack: 

774 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

775 

776 def expanded(self) -> ChainedDatasetQueryResults: 

777 # Docstring inherited from DatasetQueryResults. 

778 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

779 

780 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

781 # Docstring inherited. 

782 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

783 

784 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

785 # Docstring inherited. 

786 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

787 

788 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

789 # Docstring inherited. 

790 result = list(self._doomed_by) 

791 for r in self._chain: 

792 result.extend(r.explain_no_results(execute=execute)) 

793 return result 

794 

795 

796class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

797 """An interface for objects that represent the results of queries for 

798 dimension records. 

799 """ 

800 

801 @property 

802 @abstractmethod 

803 def element(self) -> DimensionElement: 

804 raise NotImplementedError() 

805 

806 @abstractmethod 

807 def run(self) -> DimensionRecordQueryResults: 

808 raise NotImplementedError() 

809 

810 @abstractmethod 

811 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

812 """Count the number of rows this query would return. 

813 

814 Parameters 

815 ---------- 

816 exact : `bool`, optional 

817 If `True`, run the full query and perform post-query filtering if 

818 needed to account for that filtering in the count. If `False`, the 

819 result may be an upper bound. 

820 discard : `bool`, optional 

821 If `True`, compute the exact count even if it would require running 

822 the full query and then throwing away the result rows after 

823 counting them. If `False`, this is an error, as the user would 

824 usually be better off executing the query first to fetch its rows 

825 into a new query (or passing ``exact=False``). Ignored if 

826 ``exact=False``. 

827 

828 Returns 

829 ------- 

830 count : `int` 

831 The number of rows the query would return, or an upper bound if 

832 ``exact=False``. 

833 

834 Notes 

835 ----- 

836 This counts the number of rows returned, not the number of unique rows 

837 returned, so even with ``exact=True`` it may provide only an upper 

838 bound on the number of *deduplicated* result rows. 

839 """ 

840 raise NotImplementedError() 

841 

842 @abstractmethod 

843 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

844 """Test whether this query returns any results. 

845 

846 Parameters 

847 ---------- 

848 execute : `bool`, optional 

849 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

850 determined prior to execution that the query would return no rows. 

851 exact : `bool`, optional 

852 If `True`, run the full query and perform post-query filtering if 

853 needed, until at least one result row is found. If `False`, the 

854 returned result does not account for post-query filtering, and 

855 hence may be `True` even when all result rows would be filtered 

856 out. 

857 

858 Returns 

859 ------- 

860 any : `bool` 

861 `True` if the query would (or might, depending on arguments) yield 

862 result rows. `False` if it definitely would not. 

863 """ 

864 raise NotImplementedError() 

865 

866 @abstractmethod 

867 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

868 """Make the iterator return ordered result. 

869 

870 Parameters 

871 ---------- 

872 *args : `str` 

873 Names of the columns/dimensions to use for ordering. Column name 

874 can be prefixed with minus (``-``) to use descending ordering. 

875 

876 Returns 

877 ------- 

878 result : `DimensionRecordQueryResults` 

879 Returns ``self`` instance which is updated to return ordered 

880 result. 

881 

882 Notes 

883 ----- 

884 This method can modify the iterator in place and return the same 

885 instance. 

886 """ 

887 raise NotImplementedError() 

888 

889 @abstractmethod 

890 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

891 """Make the iterator return limited number of records. 

892 

893 Parameters 

894 ---------- 

895 limit : `int` 

896 Upper limit on the number of returned records. 

897 offset : `int` or `None` 

898 The number of records to skip before returning at most ``limit`` 

899 records. `None` is interpreted the same as zero for backwards 

900 compatibility. 

901 

902 Returns 

903 ------- 

904 result : `DimensionRecordQueryResults` 

905 Returns ``self`` instance which is updated to return limited set of 

906 records. 

907 

908 Notes 

909 ----- 

910 This method can modify the iterator in place and return the same 

911 instance. Normally this method is used together with `order_by` method. 

912 """ 

913 raise NotImplementedError() 

914 

915 @abstractmethod 

916 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

917 """Return human-readable messages that may help explain why the query 

918 yields no results. 

919 

920 Parameters 

921 ---------- 

922 execute : `bool`, optional 

923 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

924 of aspects of the tree to more precisely determine where rows were 

925 filtered out. 

926 

927 Returns 

928 ------- 

929 messages : `~collections.abc.Iterable` [ `str` ] 

930 String messages that describe reasons the query might not yield any 

931 results. 

932 """ 

933 raise NotImplementedError() 

934 

935 

936class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

937 """Implementation of DimensionRecordQueryResults using database query. 

938 

939 Parameters 

940 ---------- 

941 query : `Query` 

942 Query object that backs this class. 

943 element : `DimensionElement` 

944 Element whose records this object returns. 

945 

946 Notes 

947 ----- 

948 The `Query` class now implements essentially all of this class's 

949 functionality; "QueryResult" classes like this one now exist only to 

950 provide interface backwards compatibility and more specific iterator 

951 types. 

952 """ 

953 

954 def __init__(self, query: Query, element: DimensionElement): 

955 self._query = query 

956 self._element = element 

957 

958 @property 

959 def element(self) -> DimensionElement: 

960 return self._element 

961 

962 def __iter__(self) -> Iterator[DimensionRecord]: 

963 return self._query.iter_dimension_records(self._element) 

964 

965 def run(self) -> DimensionRecordQueryResults: 

966 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

967 

968 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

969 # Docstring inherited from base class. 

970 return self._query.count(exact=exact) 

971 

972 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

973 # Docstring inherited from base class. 

974 return self._query.any(execute=execute, exact=exact) 

975 

976 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

977 # Docstring inherited from base class. 

978 clause = OrderByClause.parse_element(args, self._element) 

979 self._query = self._query.sorted(clause.terms, defer=True) 

980 return self 

981 

982 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

983 # Docstring inherited from base class. 

984 if offset is None: 

985 offset = 0 

986 self._query = self._query.sliced(offset, offset + limit, defer=True) 

987 return self 

988 

989 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

990 # Docstring inherited. 

991 return self._query.explain_no_results(execute=execute)