Coverage for python/lsst/daf/butler/registry/queries/_results.py: 58%

195 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 10:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "ChainedDatasetQueryResults", 

31 "DatabaseDimensionRecordQueryResults", 

32 "DataCoordinateQueryResults", 

33 "DatasetQueryResults", 

34 "DimensionRecordQueryResults", 

35 "ParentDatasetQueryResults", 

36) 

37 

38import itertools 

39from abc import abstractmethod 

40from collections.abc import Iterable, Iterator, Sequence 

41from contextlib import AbstractContextManager, ExitStack, contextmanager 

42from typing import Any 

43 

44from deprecated.sphinx import deprecated 

45 

46from ..._dataset_ref import DatasetRef 

47from ..._dataset_type import DatasetType 

48from ...dimensions import ( 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DimensionElement, 

52 DimensionGraph, 

53 DimensionGroup, 

54 DimensionRecord, 

55) 

56from ._query import Query 

57from ._structs import OrderByClause 

58 

59 

60class DataCoordinateQueryResults(DataCoordinateIterable): 

61 """An enhanced implementation of `DataCoordinateIterable` that represents 

62 data IDs retrieved from a database query. 

63 

64 Parameters 

65 ---------- 

66 query : `Query` 

67 Query object that backs this class. 

68 

69 Notes 

70 ----- 

71 The `Query` class now implements essentially all of this class's 

72 functionality; "QueryResult" classes like this one now exist only to 

73 provide interface backwards compatibility and more specific iterator 

74 types. 

75 """ 

76 

77 def __init__(self, query: Query): 

78 self._query = query 

79 

80 __slots__ = ("_query",) 

81 

82 def __iter__(self) -> Iterator[DataCoordinate]: 

83 return self._query.iter_data_ids() 

84 

85 def __repr__(self) -> str: 

86 return f"<DataCoordinate iterator with dimensions={self.graph}>" 

87 

88 @property 

89 @deprecated( 

90 "Deprecated in favor of .dimensions. Will be removed after v27.", 

91 version="v27", 

92 category=FutureWarning, 

93 ) 

94 def graph(self) -> DimensionGraph: 

95 # Docstring inherited from DataCoordinateIterable. 

96 return self._query.dimensions._as_graph() 

97 

98 @property 

99 def dimensions(self) -> DimensionGroup: 

100 """The dimensions of the data IDs returned by this query.""" 

101 return self._query.dimensions 

102 

103 def hasFull(self) -> bool: 

104 # Docstring inherited from DataCoordinateIterable. 

105 return True 

106 

107 def hasRecords(self) -> bool: 

108 # Docstring inherited from DataCoordinateIterable. 

109 return self._query.has_record_columns is True or not self.dimensions 

110 

111 @contextmanager 

112 def materialize(self) -> Iterator[DataCoordinateQueryResults]: 

113 """Insert this query's results into a temporary table. 

114 

115 Returns 

116 ------- 

117 context : `typing.ContextManager` [ `DataCoordinateQueryResults` ] 

118 A context manager that ensures the temporary table is created and 

119 populated in ``__enter__`` (returning a results object backed by 

120 that table), and dropped in ``__exit__``. If ``self`` is already 

121 materialized, the context manager may do nothing (reflecting the 

122 fact that an outer context manager should already take care of 

123 everything else). 

124 

125 Notes 

126 ----- 

127 When using a very large result set to perform multiple queries (e.g. 

128 multiple calls to `subset` with different arguments, or even a single 

129 call to `expanded`), it may be much more efficient to start by 

130 materializing the query and only then performing the follow up queries. 

131 It may also be less efficient, depending on how well database engine's 

132 query optimizer can simplify those particular follow-up queries and 

133 how efficiently it caches query results even when the are not 

134 explicitly inserted into a temporary table. See `expanded` and 

135 `subset` for examples. 

136 """ 

137 with self._query.open_context(): 

138 yield DataCoordinateQueryResults(self._query.materialized()) 

139 

140 def expanded(self) -> DataCoordinateQueryResults: 

141 """Return a results object for which `hasRecords` returns `True`. 

142 

143 This method may involve actually executing database queries to fetch 

144 `DimensionRecord` objects. 

145 

146 Returns 

147 ------- 

148 results : `DataCoordinateQueryResults` 

149 A results object for which `hasRecords` returns `True`. May be 

150 ``self`` if that is already the case. 

151 

152 Notes 

153 ----- 

154 For very result sets, it may be much more efficient to call 

155 `materialize` before calling `expanded`, to avoid performing the 

156 original query multiple times (as a subquery) in the follow-up queries 

157 that fetch dimension records. For example:: 

158 

159 with registry.queryDataIds(...).materialize() as tempDataIds: 

160 dataIdsWithRecords = tempDataIds.expanded() 

161 for dataId in dataIdsWithRecords: 

162 ... 

163 """ 

164 return DataCoordinateQueryResults(self._query.with_record_columns(defer=True)) 

165 

166 def subset( 

167 self, 

168 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

169 *, 

170 unique: bool = False, 

171 ) -> DataCoordinateQueryResults: 

172 """Return a results object containing a subset of the dimensions of 

173 this one, and/or a unique near-subset of its rows. 

174 

175 This method may involve actually executing database queries to fetch 

176 `DimensionRecord` objects. 

177 

178 Parameters 

179 ---------- 

180 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

181 `~collections.abc.Iterable` [ `str`], optional 

182 Dimensions to include in the new results object. If `None`, 

183 ``self.dimensions`` is used. 

184 unique : `bool`, optional 

185 If `True` (`False` is default), the query should only return unique 

186 data IDs. This is implemented in the database; to obtain unique 

187 results via Python-side processing (which may be more efficient in 

188 some cases), use `toSet` to construct a `DataCoordinateSet` from 

189 this results object instead. 

190 

191 Returns 

192 ------- 

193 results : `DataCoordinateQueryResults` 

194 A results object corresponding to the given criteria. May be 

195 ``self`` if it already qualifies. 

196 

197 Raises 

198 ------ 

199 ValueError 

200 Raised when ``dimensions`` is not a subset of the dimensions in 

201 this result. 

202 

203 Notes 

204 ----- 

205 This method can only return a "near-subset" of the original result rows 

206 in general because of subtleties in how spatial overlaps are 

207 implemented; see `Query.projected` for more information. 

208 

209 When calling `subset` multiple times on the same very large result set, 

210 it may be much more efficient to call `materialize` first. For 

211 example:: 

212 

213 dimensions1 = DimensionGroup(...) 

214 dimensions2 = DimensionGroup(...) 

215 with registry.queryDataIds(...).materialize() as tempDataIds: 

216 for dataId1 in tempDataIds.subset(dimensions1, unique=True): 

217 ... 

218 for dataId2 in tempDataIds.subset(dimensions2, unique=True): 

219 ... 

220 """ 

221 if dimensions is None: 

222 dimensions = self.dimensions 

223 else: 

224 dimensions = self.dimensions.universe.conform(dimensions) 

225 if not dimensions.issubset(self.dimensions): 

226 raise ValueError(f"{dimensions} is not a subset of {self.dimensions}") 

227 query = self._query.projected(dimensions.names, unique=unique, defer=True, drop_postprocessing=True) 

228 return DataCoordinateQueryResults(query) 

229 

230 def findDatasets( 

231 self, 

232 datasetType: Any, 

233 collections: Any, 

234 *, 

235 findFirst: bool = True, 

236 components: bool | None = None, 

237 ) -> DatasetQueryResults: 

238 """Find datasets using the data IDs identified by this query. 

239 

240 Parameters 

241 ---------- 

242 datasetType : `DatasetType` or `str` 

243 Dataset type or the name of one to search for. Must have 

244 dimensions that are a subset of ``self.graph``. 

245 collections : `Any` 

246 An expression that fully or partially identifies the collections 

247 to search for the dataset, such as a `str`, `re.Pattern`, or 

248 iterable thereof. ``...`` can be used to return all collections. 

249 See :ref:`daf_butler_collection_expressions` for more information. 

250 findFirst : `bool`, optional 

251 If `True` (default), for each result data ID, only yield one 

252 `DatasetRef`, from the first collection in which a dataset of that 

253 dataset type appears (according to the order of ``collections`` 

254 passed in). If `True`, ``collections`` must not contain regular 

255 expressions and may not be ``...``. 

256 components : `bool`, optional 

257 If `True`, apply all expression patterns to component dataset type 

258 names as well. If `False`, never apply patterns to components. If 

259 `None` (default), apply patterns to components only if their parent 

260 datasets were not matched by the expression. Fully-specified 

261 component datasets (`str` or `DatasetType` instances) are always 

262 included. 

263 

264 Values other than `False` are deprecated, and only `False` will be 

265 supported after v26. After v27 this argument will be removed 

266 entirely. 

267 

268 Returns 

269 ------- 

270 datasets : `ParentDatasetQueryResults` 

271 A lazy-evaluation object representing dataset query results, 

272 iterable over `DatasetRef` objects. If ``self.hasRecords()``, all 

273 nested data IDs in those dataset references will have records as 

274 well. 

275 

276 Raises 

277 ------ 

278 MissingDatasetTypeError 

279 Raised if the given dataset type is not registered. 

280 """ 

281 parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( 

282 datasetType, components=components, explicit_only=True 

283 ) 

284 return ParentDatasetQueryResults( 

285 self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), 

286 parent_dataset_type, 

287 components_found, 

288 ) 

289 

290 def findRelatedDatasets( 

291 self, 

292 datasetType: DatasetType | str, 

293 collections: Any, 

294 *, 

295 findFirst: bool = True, 

296 dimensions: DimensionGroup | DimensionGraph | Iterable[str] | None = None, 

297 ) -> Iterable[tuple[DataCoordinate, DatasetRef]]: 

298 """Find datasets using the data IDs identified by this query, and 

299 return them along with the original data IDs. 

300 

301 This is a variant of `findDatasets` that is often more useful when 

302 the target dataset type does not have all of the dimensions of the 

303 original data ID query, as is generally the case with calibration 

304 lookups. 

305 

306 Parameters 

307 ---------- 

308 datasetType : `DatasetType` or `str` 

309 Dataset type or the name of one to search for. Must have 

310 dimensions that are a subset of ``self.graph``. 

311 collections : `Any` 

312 An expression that fully or partially identifies the collections 

313 to search for the dataset, such as a `str`, `re.Pattern`, or 

314 iterable thereof. ``...`` can be used to return all collections. 

315 See :ref:`daf_butler_collection_expressions` for more information. 

316 findFirst : `bool`, optional 

317 If `True` (default), for each data ID in ``self``, only yield one 

318 `DatasetRef`, from the first collection in which a dataset of that 

319 dataset type appears (according to the order of ``collections`` 

320 passed in). If `True`, ``collections`` must not contain regular 

321 expressions and may not be ``...``. Note that this is not the 

322 same as yielding one `DatasetRef` for each yielded data ID if 

323 ``dimensions`` is not `None`. 

324 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

325 `~collections.abc.Iterable` [ `str` ], optional 

326 The dimensions of the data IDs returned. Must be a subset of 

327 ``self.dimensions``. 

328 

329 Returns 

330 ------- 

331 pairs : `~collections.abc.Iterable` [ `tuple` [ `DataCoordinate`, \ 

332 `DatasetRef` ] ] 

333 An iterable of (data ID, dataset reference) pairs. 

334 

335 Raises 

336 ------ 

337 MissingDatasetTypeError 

338 Raised if the given dataset type is not registered. 

339 """ 

340 if dimensions is None: 

341 dimensions = self.dimensions 

342 else: 

343 dimensions = self.universe.conform(dimensions) 

344 parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( 

345 datasetType, components=False, explicit_only=True 

346 ) 

347 query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) 

348 return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) 

349 

350 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

351 """Count the number of rows this query would return. 

352 

353 Parameters 

354 ---------- 

355 exact : `bool`, optional 

356 If `True`, run the full query and perform post-query filtering if 

357 needed to account for that filtering in the count. If `False`, the 

358 result may be an upper bound. 

359 discard : `bool`, optional 

360 If `True`, compute the exact count even if it would require running 

361 the full query and then throwing away the result rows after 

362 counting them. If `False`, this is an error, as the user would 

363 usually be better off executing the query first to fetch its rows 

364 into a new query (or passing ``exact=False``). Ignored if 

365 ``exact=False``. 

366 

367 Returns 

368 ------- 

369 count : `int` 

370 The number of rows the query would return, or an upper bound if 

371 ``exact=False``. 

372 

373 Notes 

374 ----- 

375 This counts the number of rows returned, not the number of unique rows 

376 returned, so even with ``exact=True`` it may provide only an upper 

377 bound on the number of *deduplicated* result rows. 

378 """ 

379 return self._query.count(exact=exact, discard=discard) 

380 

381 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

382 """Test whether this query returns any results. 

383 

384 Parameters 

385 ---------- 

386 execute : `bool`, optional 

387 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

388 determined prior to execution that the query would return no rows. 

389 exact : `bool`, optional 

390 If `True`, run the full query and perform post-query filtering if 

391 needed, until at least one result row is found. If `False`, the 

392 returned result does not account for post-query filtering, and 

393 hence may be `True` even when all result rows would be filtered 

394 out. 

395 

396 Returns 

397 ------- 

398 any : `bool` 

399 `True` if the query would (or might, depending on arguments) yield 

400 result rows. `False` if it definitely would not. 

401 """ 

402 return self._query.any(execute=execute, exact=exact) 

403 

404 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

405 """Return human-readable messages that may help explain why the query 

406 yields no results. 

407 

408 Parameters 

409 ---------- 

410 execute : `bool`, optional 

411 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

412 of aspects of the tree to more precisely determine where rows were 

413 filtered out. 

414 

415 Returns 

416 ------- 

417 messages : `~collections.abc.Iterable` [ `str` ] 

418 String messages that describe reasons the query might not yield any 

419 results. 

420 """ 

421 return self._query.explain_no_results(execute=execute) 

422 

423 def order_by(self, *args: str) -> DataCoordinateQueryResults: 

424 """Make the iterator return ordered results. 

425 

426 Parameters 

427 ---------- 

428 *args : `str` 

429 Names of the columns/dimensions to use for ordering. Column name 

430 can be prefixed with minus (``-``) to use descending ordering. 

431 

432 Returns 

433 ------- 

434 result : `DataCoordinateQueryResults` 

435 Returns ``self`` instance which is updated to return ordered 

436 result. 

437 

438 Notes 

439 ----- 

440 This method modifies the iterator in place and returns the same 

441 instance to support method chaining. 

442 """ 

443 clause = OrderByClause.parse_general(args, self._query.dimensions) 

444 self._query = self._query.sorted(clause.terms, defer=True) 

445 return self 

446 

447 def limit(self, limit: int, offset: int | None = 0) -> DataCoordinateQueryResults: 

448 """Make the iterator return limited number of records. 

449 

450 Parameters 

451 ---------- 

452 limit : `int` 

453 Upper limit on the number of returned records. 

454 offset : `int` or `None`, optional 

455 The number of records to skip before returning at most ``limit`` 

456 records. `None` is interpreted the same as zero for backwards 

457 compatibility. 

458 

459 Returns 

460 ------- 

461 result : `DataCoordinateQueryResults` 

462 Returns ``self`` instance which is updated to return limited set 

463 of records. 

464 

465 Notes 

466 ----- 

467 This method modifies the iterator in place and returns the same 

468 instance to support method chaining. Normally this method is used 

469 together with `order_by` method. 

470 """ 

471 if offset is None: 

472 offset = 0 

473 self._query = self._query.sliced(offset, offset + limit, defer=True) 

474 return self 

475 

476 

477class DatasetQueryResults(Iterable[DatasetRef]): 

478 """An interface for objects that represent the results of queries for 

479 datasets. 

480 """ 

481 

482 @abstractmethod 

483 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

484 """Group results by parent dataset type. 

485 

486 Returns 

487 ------- 

488 iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] 

489 An iterator over `DatasetQueryResults` instances that are each 

490 responsible for a single parent dataset type (either just that 

491 dataset type, one or more of its component dataset types, or both). 

492 """ 

493 raise NotImplementedError() 

494 

495 @abstractmethod 

496 def materialize(self) -> AbstractContextManager[DatasetQueryResults]: 

497 """Insert this query's results into a temporary table. 

498 

499 Returns 

500 ------- 

501 context : `typing.ContextManager` [ `DatasetQueryResults` ] 

502 A context manager that ensures the temporary table is created and 

503 populated in ``__enter__`` (returning a results object backed by 

504 that table), and dropped in ``__exit__``. If ``self`` is already 

505 materialized, the context manager may do nothing (reflecting the 

506 fact that an outer context manager should already take care of 

507 everything else). 

508 """ 

509 raise NotImplementedError() 

510 

511 @abstractmethod 

512 def expanded(self) -> DatasetQueryResults: 

513 """Return a `DatasetQueryResults` for which `DataCoordinate.hasRecords` 

514 returns `True` for all data IDs in returned `DatasetRef` objects. 

515 

516 Returns 

517 ------- 

518 expanded : `DatasetQueryResults` 

519 Either a new `DatasetQueryResults` instance or ``self``, if it is 

520 already expanded. 

521 

522 Notes 

523 ----- 

524 As with `DataCoordinateQueryResults.expanded`, it may be more efficient 

525 to call `materialize` before expanding data IDs for very large result 

526 sets. 

527 """ 

528 raise NotImplementedError() 

529 

530 @abstractmethod 

531 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

532 """Count the number of rows this query would return. 

533 

534 Parameters 

535 ---------- 

536 exact : `bool`, optional 

537 If `True`, run the full query and perform post-query filtering if 

538 needed to account for that filtering in the count. If `False`, the 

539 result may be an upper bound. 

540 discard : `bool`, optional 

541 If `True`, compute the exact count even if it would require running 

542 the full query and then throwing away the result rows after 

543 counting them. If `False`, this is an error, as the user would 

544 usually be better off executing the query first to fetch its rows 

545 into a new query (or passing ``exact=False``). Ignored if 

546 ``exact=False``. 

547 

548 Returns 

549 ------- 

550 count : `int` 

551 The number of rows the query would return, or an upper bound if 

552 ``exact=False``. 

553 

554 Notes 

555 ----- 

556 This counts the number of rows returned, not the number of unique rows 

557 returned, so even with ``exact=True`` it may provide only an upper 

558 bound on the number of *deduplicated* result rows. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def any( 

564 self, 

565 *, 

566 execute: bool = True, 

567 exact: bool = True, 

568 ) -> bool: 

569 """Test whether this query returns any results. 

570 

571 Parameters 

572 ---------- 

573 execute : `bool`, optional 

574 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

575 determined prior to execution that the query would return no rows. 

576 exact : `bool`, optional 

577 If `True`, run the full query and perform post-query filtering if 

578 needed, until at least one result row is found. If `False`, the 

579 returned result does not account for post-query filtering, and 

580 hence may be `True` even when all result rows would be filtered 

581 out. 

582 

583 Returns 

584 ------- 

585 any : `bool` 

586 `True` if the query would (or might, depending on arguments) yield 

587 result rows. `False` if it definitely would not. 

588 """ 

589 raise NotImplementedError() 

590 

591 @abstractmethod 

592 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

593 """Return human-readable messages that may help explain why the query 

594 yields no results. 

595 

596 Parameters 

597 ---------- 

598 execute : `bool`, optional 

599 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

600 of aspects of the tree to more precisely determine where rows were 

601 filtered out. 

602 

603 Returns 

604 ------- 

605 messages : `~collections.abc.Iterable` [ `str` ] 

606 String messages that describe reasons the query might not yield any 

607 results. 

608 """ 

609 raise NotImplementedError() 

610 

611 def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetRef]]]: 

612 """Group results by dataset type. 

613 

614 This is a private hook for the interface defined by 

615 `DatasetRef.iter_by_type`, enabling much more efficient 

616 processing of heterogeneous `DatasetRef` iterables when they come 

617 directly from queries. 

618 """ 

619 for parent_results in self.byParentDatasetType(): 

620 for component in parent_results.components: 

621 dataset_type = parent_results.parentDatasetType 

622 if component is not None: 

623 dataset_type = dataset_type.makeComponentDatasetType(component) 

624 yield (dataset_type, parent_results.withComponents((component,))) 

625 

626 

627class ParentDatasetQueryResults(DatasetQueryResults): 

628 """An object that represents results from a query for datasets with a 

629 single parent `DatasetType`. 

630 

631 Parameters 

632 ---------- 

633 query : `Query` 

634 Low-level query object that backs these results. 

635 dataset_type : `DatasetType` 

636 Parent dataset type for all datasets returned by this query. 

637 components : `~collections.abc.Sequence` [ `str` or `None` ], optional 

638 Names of components to include in iteration. `None` may be included 

639 (at most once) to include the parent dataset type. 

640 

641 Notes 

642 ----- 

643 The `Query` class now implements essentially all of this class's 

644 functionality; "QueryResult" classes like this one now exist only to 

645 provide interface backwards compatibility and more specific iterator 

646 types. 

647 """ 

648 

649 def __init__( 

650 self, 

651 query: Query, 

652 dataset_type: DatasetType, 

653 components: Sequence[str | None] = (None,), 

654 ): 

655 self._query = query 

656 self._dataset_type = dataset_type 

657 self._components = components 

658 

659 __slots__ = ("_query", "_dataset_type", "_components") 

660 

661 def __iter__(self) -> Iterator[DatasetRef]: 

662 return self._query.iter_dataset_refs(self._dataset_type, self._components) 

663 

664 def __repr__(self) -> str: 

665 return f"<DatasetRef iterator for [components of] {self._dataset_type.name}>" 

666 

667 @property 

668 @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning) 

669 def components(self) -> Sequence[str | None]: 

670 """The components of the parent dataset type included in these results 

671 (`~collections.abc.Sequence` [ `str` or `None` ]). 

672 """ 

673 return self._components 

674 

675 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

676 # Docstring inherited from DatasetQueryResults. 

677 yield self 

678 

679 @contextmanager 

680 def materialize(self) -> Iterator[ParentDatasetQueryResults]: 

681 # Docstring inherited from DatasetQueryResults. 

682 with self._query.open_context(): 

683 yield ParentDatasetQueryResults(self._query.materialized(), self._dataset_type, self._components) 

684 

685 @property 

686 def parentDatasetType(self) -> DatasetType: 

687 """The parent dataset type for all datasets in this iterable 

688 (`DatasetType`). 

689 """ 

690 return self._dataset_type 

691 

692 @property 

693 def dataIds(self) -> DataCoordinateQueryResults: 

694 """A lazy-evaluation object representing a query for just the data 

695 IDs of the datasets that would be returned by this query 

696 (`DataCoordinateQueryResults`). 

697 

698 The returned object is not in general `zip`-iterable with ``self``; 

699 it may be in a different order or have (or not have) duplicates. 

700 """ 

701 return DataCoordinateQueryResults(self._query.projected(defer=True)) 

702 

703 def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: 

704 """Return a new query results object for the same parent datasets but 

705 different components. 

706 

707 components : `~collections.abc.Sequence` [ `str` or `None` ] 

708 Names of components to include in iteration. `None` may be 

709 included (at most once) to include the parent dataset type. 

710 """ 

711 return ParentDatasetQueryResults(self._query, self._dataset_type, components) 

712 

713 def expanded(self) -> ParentDatasetQueryResults: 

714 # Docstring inherited from DatasetQueryResults. 

715 return ParentDatasetQueryResults( 

716 self._query.with_record_columns(defer=True), self._dataset_type, self._components 

717 ) 

718 

719 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

720 # Docstring inherited. 

721 return len(self._components) * self._query.count(exact=exact, discard=discard) 

722 

723 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

724 # Docstring inherited. 

725 return self._query.any(execute=execute, exact=exact) 

726 

727 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

728 # Docstring inherited. 

729 return self._query.explain_no_results(execute=execute) 

730 

731 

732class ChainedDatasetQueryResults(DatasetQueryResults): 

733 """A `DatasetQueryResults` implementation that simply chains together 

734 other results objects, each for a different parent dataset type. 

735 

736 Parameters 

737 ---------- 

738 chain : `~collections.abc.Sequence` [ `ParentDatasetQueryResults` ] 

739 The underlying results objects this object will chain together. 

740 doomed_by : `~collections.abc.Iterable` [ `str` ], optional 

741 A list of messages (appropriate for e.g. logging or exceptions) that 

742 explain why the query is known to return no results even before it is 

743 executed. Queries with a non-empty list will never be executed. 

744 Child results objects may also have their own list. 

745 """ 

746 

747 def __init__(self, chain: Sequence[ParentDatasetQueryResults], doomed_by: Iterable[str] = ()): 

748 self._chain = chain 

749 self._doomed_by = tuple(doomed_by) 

750 

751 __slots__ = ("_chain",) 

752 

753 def __iter__(self) -> Iterator[DatasetRef]: 

754 return itertools.chain.from_iterable(self._chain) 

755 

756 def __repr__(self) -> str: 

757 return "<DatasetRef iterator for multiple dataset types>" 

758 

759 def byParentDatasetType(self) -> Iterator[ParentDatasetQueryResults]: 

760 # Docstring inherited from DatasetQueryResults. 

761 return iter(self._chain) 

762 

763 @contextmanager 

764 def materialize(self) -> Iterator[ChainedDatasetQueryResults]: 

765 # Docstring inherited from DatasetQueryResults. 

766 with ExitStack() as stack: 

767 yield ChainedDatasetQueryResults([stack.enter_context(r.materialize()) for r in self._chain]) 

768 

769 def expanded(self) -> ChainedDatasetQueryResults: 

770 # Docstring inherited from DatasetQueryResults. 

771 return ChainedDatasetQueryResults([r.expanded() for r in self._chain], self._doomed_by) 

772 

773 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

774 # Docstring inherited. 

775 return sum(r.count(exact=exact, discard=discard) for r in self._chain) 

776 

777 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

778 # Docstring inherited. 

779 return any(r.any(execute=execute, exact=exact) for r in self._chain) 

780 

781 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

782 # Docstring inherited. 

783 result = list(self._doomed_by) 

784 for r in self._chain: 

785 result.extend(r.explain_no_results(execute=execute)) 

786 return result 

787 

788 

789class DimensionRecordQueryResults(Iterable[DimensionRecord]): 

790 """An interface for objects that represent the results of queries for 

791 dimension records. 

792 """ 

793 

794 @property 

795 @abstractmethod 

796 def element(self) -> DimensionElement: 

797 raise NotImplementedError() 

798 

799 @abstractmethod 

800 def run(self) -> DimensionRecordQueryResults: 

801 raise NotImplementedError() 

802 

803 @abstractmethod 

804 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

805 """Count the number of rows this query would return. 

806 

807 Parameters 

808 ---------- 

809 exact : `bool`, optional 

810 If `True`, run the full query and perform post-query filtering if 

811 needed to account for that filtering in the count. If `False`, the 

812 result may be an upper bound. 

813 discard : `bool`, optional 

814 If `True`, compute the exact count even if it would require running 

815 the full query and then throwing away the result rows after 

816 counting them. If `False`, this is an error, as the user would 

817 usually be better off executing the query first to fetch its rows 

818 into a new query (or passing ``exact=False``). Ignored if 

819 ``exact=False``. 

820 

821 Returns 

822 ------- 

823 count : `int` 

824 The number of rows the query would return, or an upper bound if 

825 ``exact=False``. 

826 

827 Notes 

828 ----- 

829 This counts the number of rows returned, not the number of unique rows 

830 returned, so even with ``exact=True`` it may provide only an upper 

831 bound on the number of *deduplicated* result rows. 

832 """ 

833 raise NotImplementedError() 

834 

835 @abstractmethod 

836 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

837 """Test whether this query returns any results. 

838 

839 Parameters 

840 ---------- 

841 execute : `bool`, optional 

842 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

843 determined prior to execution that the query would return no rows. 

844 exact : `bool`, optional 

845 If `True`, run the full query and perform post-query filtering if 

846 needed, until at least one result row is found. If `False`, the 

847 returned result does not account for post-query filtering, and 

848 hence may be `True` even when all result rows would be filtered 

849 out. 

850 

851 Returns 

852 ------- 

853 any : `bool` 

854 `True` if the query would (or might, depending on arguments) yield 

855 result rows. `False` if it definitely would not. 

856 """ 

857 raise NotImplementedError() 

858 

859 @abstractmethod 

860 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

861 """Make the iterator return ordered result. 

862 

863 Parameters 

864 ---------- 

865 *args : `str` 

866 Names of the columns/dimensions to use for ordering. Column name 

867 can be prefixed with minus (``-``) to use descending ordering. 

868 

869 Returns 

870 ------- 

871 result : `DimensionRecordQueryResults` 

872 Returns ``self`` instance which is updated to return ordered 

873 result. 

874 

875 Notes 

876 ----- 

877 This method can modify the iterator in place and return the same 

878 instance. 

879 """ 

880 raise NotImplementedError() 

881 

882 @abstractmethod 

883 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

884 """Make the iterator return limited number of records. 

885 

886 Parameters 

887 ---------- 

888 limit : `int` 

889 Upper limit on the number of returned records. 

890 offset : `int` or `None` 

891 The number of records to skip before returning at most ``limit`` 

892 records. `None` is interpreted the same as zero for backwards 

893 compatibility. 

894 

895 Returns 

896 ------- 

897 result : `DimensionRecordQueryResults` 

898 Returns ``self`` instance which is updated to return limited set of 

899 records. 

900 

901 Notes 

902 ----- 

903 This method can modify the iterator in place and return the same 

904 instance. Normally this method is used together with `order_by` method. 

905 """ 

906 raise NotImplementedError() 

907 

908 @abstractmethod 

909 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

910 """Return human-readable messages that may help explain why the query 

911 yields no results. 

912 

913 Parameters 

914 ---------- 

915 execute : `bool`, optional 

916 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

917 of aspects of the tree to more precisely determine where rows were 

918 filtered out. 

919 

920 Returns 

921 ------- 

922 messages : `~collections.abc.Iterable` [ `str` ] 

923 String messages that describe reasons the query might not yield any 

924 results. 

925 """ 

926 raise NotImplementedError() 

927 

928 

929class DatabaseDimensionRecordQueryResults(DimensionRecordQueryResults): 

930 """Implementation of DimensionRecordQueryResults using database query. 

931 

932 Parameters 

933 ---------- 

934 query : `Query` 

935 Query object that backs this class. 

936 element : `DimensionElement` 

937 Element whose records this object returns. 

938 

939 Notes 

940 ----- 

941 The `Query` class now implements essentially all of this class's 

942 functionality; "QueryResult" classes like this one now exist only to 

943 provide interface backwards compatibility and more specific iterator 

944 types. 

945 """ 

946 

947 def __init__(self, query: Query, element: DimensionElement): 

948 self._query = query 

949 self._element = element 

950 

951 @property 

952 def element(self) -> DimensionElement: 

953 return self._element 

954 

955 def __iter__(self) -> Iterator[DimensionRecord]: 

956 return self._query.iter_dimension_records(self._element) 

957 

958 def run(self) -> DimensionRecordQueryResults: 

959 return DatabaseDimensionRecordQueryResults(self._query.run(), self._element) 

960 

961 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

962 # Docstring inherited from base class. 

963 return self._query.count(exact=exact) 

964 

965 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

966 # Docstring inherited from base class. 

967 return self._query.any(execute=execute, exact=exact) 

968 

969 def order_by(self, *args: str) -> DimensionRecordQueryResults: 

970 # Docstring inherited from base class. 

971 clause = OrderByClause.parse_element(args, self._element) 

972 self._query = self._query.sorted(clause.terms, defer=True) 

973 return self 

974 

975 def limit(self, limit: int, offset: int | None = 0) -> DimensionRecordQueryResults: 

976 # Docstring inherited from base class. 

977 if offset is None: 

978 offset = 0 

979 self._query = self._query.sliced(offset, offset + limit, defer=True) 

980 return self 

981 

982 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

983 # Docstring inherited. 

984 return self._query.explain_no_results(execute=execute)