Coverage for python/lsst/daf/butler/direct_query_driver/_driver.py: 15%

416 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 02:48 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import uuid 

31 

32__all__ = ("DirectQueryDriver",) 

33 

34import dataclasses 

35import logging 

36import sys 

37from collections.abc import Iterable, Mapping, Set 

38from contextlib import ExitStack 

39from typing import TYPE_CHECKING, Any, cast, overload 

40 

41import sqlalchemy 

42 

43from .. import ddl 

44from .._dataset_type import DatasetType 

45from .._exceptions import InvalidQueryError 

46from ..dimensions import ( 

47 DataCoordinate, 

48 DataIdValue, 

49 DimensionGroup, 

50 DimensionRecordSet, 

51 DimensionUniverse, 

52 SkyPixDimension, 

53) 

54from ..name_shrinker import NameShrinker 

55from ..queries import tree as qt 

56from ..queries.driver import ( 

57 DataCoordinateResultPage, 

58 DatasetRefResultPage, 

59 DimensionRecordResultPage, 

60 GeneralResultPage, 

61 PageKey, 

62 QueryDriver, 

63 ResultPage, 

64) 

65from ..queries.result_specs import ( 

66 DataCoordinateResultSpec, 

67 DatasetRefResultSpec, 

68 DimensionRecordResultSpec, 

69 GeneralResultSpec, 

70 ResultSpec, 

71) 

72from ..registry import CollectionSummary, CollectionType, NoDefaultCollectionError 

73from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord 

74from ..registry.managers import RegistryManagerInstances 

75from ._postprocessing import Postprocessing 

76from ._query_builder import QueryBuilder, QueryJoiner 

77from ._query_plan import ( 

78 QueryFindFirstPlan, 

79 QueryJoinsPlan, 

80 QueryPlan, 

81 QueryProjectionPlan, 

82 ResolvedDatasetSearch, 

83) 

84from ._sql_column_visitor import SqlColumnVisitor 

85 

86if TYPE_CHECKING: 

87 from ..registry.interfaces import Database 

88 

89 

90_LOG = logging.getLogger(__name__) 

91 

92 

93class DirectQueryDriver(QueryDriver): 

94 """The `QueryDriver` implementation for `DirectButler`. 

95 

96 Parameters 

97 ---------- 

98 db : `Database` 

99 Abstraction for the SQL database. 

100 universe : `DimensionUniverse` 

101 Definitions of all dimensions. 

102 managers : `RegistryManagerInstances` 

103 Struct of registry manager objects. 

104 default_collections : `Sequence` [ `str `] 

105 Default collection search path. 

106 default_data_id : DataCoordinate, 

107 Default governor dimension values. 

108 raw_page_size : `int`, optional 

109 Number of database rows to fetch for each result page. The actual 

110 number of rows in a page may be smaller due to postprocessing. 

111 constant_rows_limit : `int`, optional 

112 Maximum number of uploaded rows to include in queries via 

113 `Database.constant_rows`; above this limit a temporary table is used 

114 instead. 

115 postprocessing_filter_factor : `int`, optional 

116 The number of database rows we expect to have to fetch to yield a 

117 single output row for queries that involve postprocessing. This is 

118 purely a performance tuning parameter that attempts to balance between 

119 fetching too much and requiring multiple fetches; the true value is 

120 highly dependent on the actual query. 

121 """ 

122 

123 def __init__( 

124 self, 

125 db: Database, 

126 universe: DimensionUniverse, 

127 managers: RegistryManagerInstances, 

128 default_collections: Iterable[str], 

129 default_data_id: DataCoordinate, 

130 raw_page_size: int = 10000, 

131 constant_rows_limit: int = 1000, 

132 postprocessing_filter_factor: int = 10, 

133 ): 

134 self.db = db 

135 self.managers = managers 

136 self._universe = universe 

137 self._default_collections = tuple(default_collections) 

138 self._default_data_id = default_data_id 

139 self._materializations: dict[qt.MaterializationKey, _MaterializationState] = {} 

140 self._upload_tables: dict[qt.DataCoordinateUploadKey, sqlalchemy.FromClause] = {} 

141 self._exit_stack: ExitStack | None = None 

142 self._raw_page_size = raw_page_size 

143 self._postprocessing_filter_factor = postprocessing_filter_factor 

144 self._constant_rows_limit = constant_rows_limit 

145 self._cursors: dict[PageKey, _Cursor] = {} 

146 

147 def __enter__(self) -> None: 

148 self._exit_stack = ExitStack() 

149 # It might be nice to defer opening a transaction here until first use 

150 # to reduce the time spent in transactions. But it's worth noting that 

151 # this is the default low-level behavior of the Python SQLite driver, 

152 # and it makes it incredibly prone to deadlocks. We might be okay 

153 # here, because Query doesn't do true write operations - just temp 

154 # table writes - but I'm not confident that's enough to make delayed 

155 # transaction starts safe against deadlocks, and it'd be more 

156 # complicated to implement anyway. 

157 # 

158 # We start a transaction rather than just opening a connection to make 

159 # temp table and cursors work with pg_bouncer transaction affinity. 

160 self._exit_stack.enter_context(self.db.transaction(for_temp_tables=True)) 

161 

162 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: 

163 assert self._exit_stack is not None 

164 self._materializations.clear() 

165 self._upload_tables.clear() 

166 while self._cursors: 

167 _, cursor = self._cursors.popitem() 

168 cursor.close(exc_type, exc_value, traceback) 

169 self._exit_stack.__exit__(exc_type, exc_value, traceback) 

170 self._exit_stack = None 

171 

172 @property 

173 def universe(self) -> DimensionUniverse: 

174 return self._universe 

175 

176 @overload 

177 def execute( 177 ↛ exitline 177 didn't jump to the function exit

178 self, result_spec: DataCoordinateResultSpec, tree: qt.QueryTree 

179 ) -> DataCoordinateResultPage: ... 

180 

181 @overload 

182 def execute( 182 ↛ exitline 182 didn't jump to the function exit

183 self, result_spec: DimensionRecordResultSpec, tree: qt.QueryTree 

184 ) -> DimensionRecordResultPage: ... 

185 

186 @overload 

187 def execute(self, result_spec: DatasetRefResultSpec, tree: qt.QueryTree) -> DatasetRefResultPage: ... 187 ↛ exitline 187 didn't return from function 'execute', because

188 

189 @overload 

190 def execute(self, result_spec: GeneralResultSpec, tree: qt.QueryTree) -> GeneralResultPage: ... 190 ↛ exitline 190 didn't return from function 'execute', because

191 

192 def execute(self, result_spec: ResultSpec, tree: qt.QueryTree) -> ResultPage: 

193 # Docstring inherited. 

194 if self._exit_stack is None: 

195 raise RuntimeError("QueryDriver context must be entered before queries can be executed.") 

196 _, builder = self.build_query( 

197 tree, 

198 final_columns=result_spec.get_result_columns(), 

199 order_by=result_spec.order_by, 

200 find_first_dataset=result_spec.find_first_dataset, 

201 ) 

202 sql_select = builder.select() 

203 if result_spec.order_by: 

204 visitor = SqlColumnVisitor(builder.joiner, self) 

205 sql_select = sql_select.order_by(*[visitor.expect_scalar(term) for term in result_spec.order_by]) 

206 if result_spec.limit is not None: 

207 if builder.postprocessing: 

208 builder.postprocessing.limit = result_spec.limit 

209 else: 

210 sql_select = sql_select.limit(result_spec.limit) 

211 if builder.postprocessing.limit is not None: 

212 # We might want to fetch many fewer rows than the default page 

213 # size if we have to implement limit in postprocessing. 

214 raw_page_size = min( 

215 self._postprocessing_filter_factor * builder.postprocessing.limit, 

216 self._raw_page_size, 

217 ) 

218 else: 

219 raw_page_size = self._raw_page_size 

220 # Execute the query by initializing a _Cursor object that manages the 

221 # lifetime of the result. 

222 cursor = _Cursor( 

223 self.db, 

224 sql_select, 

225 result_spec=result_spec, 

226 name_shrinker=builder.joiner.name_shrinker, 

227 postprocessing=builder.postprocessing, 

228 raw_page_size=raw_page_size, 

229 ) 

230 result_page = cursor.next() 

231 if result_page.next_key is not None: 

232 # Cursor has not been exhausted; add it to the driver for use by 

233 # fetch_next_page. 

234 self._cursors[result_page.next_key] = cursor 

235 return result_page 

236 

237 @overload 

238 def fetch_next_page( 238 ↛ exitline 238 didn't jump to the function exit

239 self, result_spec: DataCoordinateResultSpec, key: PageKey 

240 ) -> DataCoordinateResultPage: ... 

241 

242 @overload 

243 def fetch_next_page( 243 ↛ exitline 243 didn't jump to the function exit

244 self, result_spec: DimensionRecordResultSpec, key: PageKey 

245 ) -> DimensionRecordResultPage: ... 

246 

247 @overload 

248 def fetch_next_page(self, result_spec: DatasetRefResultSpec, key: PageKey) -> DatasetRefResultPage: ... 248 ↛ exitline 248 didn't return from function 'fetch_next_page', because

249 

250 @overload 

251 def fetch_next_page(self, result_spec: GeneralResultSpec, key: PageKey) -> GeneralResultPage: ... 251 ↛ exitline 251 didn't return from function 'fetch_next_page', because

252 

253 def fetch_next_page(self, result_spec: ResultSpec, key: PageKey) -> ResultPage: 

254 # Docstring inherited. 

255 try: 

256 cursor = self._cursors.pop(key) 

257 except KeyError: 

258 raise RuntimeError("Cannot continue query result iteration after the query context has closed.") 

259 result_page = cursor.next() 

260 if result_page.next_key is not None: 

261 self._cursors[result_page.next_key] = cursor 

262 return result_page 

263 

264 def materialize( 

265 self, 

266 tree: qt.QueryTree, 

267 dimensions: DimensionGroup, 

268 datasets: frozenset[str], 

269 key: qt.MaterializationKey | None = None, 

270 ) -> qt.MaterializationKey: 

271 # Docstring inherited. 

272 if self._exit_stack is None: 

273 raise RuntimeError("QueryDriver context must be entered before 'materialize' is called.") 

274 _, builder = self.build_query(tree, qt.ColumnSet(dimensions)) 

275 # Current implementation ignores 'datasets' aside from remembering 

276 # them, because figuring out what to put in the temporary table for 

277 # them is tricky, especially if calibration collections are involved. 

278 # That's okay because: 

279 # 

280 # - the query whose results we materialize includes the dataset 

281 # searches as constraints; 

282 # 

283 # - we still (in Query.materialize) join the dataset searches back in 

284 # anyway, and given materialized data IDs the join to the dataset 

285 # search is straightforward and definitely well-indexed, and not much 

286 # (if at all) worse than joining back in on a materialized UUID. 

287 # 

288 sql_select = builder.select() 

289 table = self._exit_stack.enter_context(self.db.temporary_table(builder.make_table_spec())) 

290 self.db.insert(table, select=sql_select) 

291 if key is None: 

292 key = uuid.uuid4() 

293 self._materializations[key] = _MaterializationState(table, datasets, builder.postprocessing) 

294 return key 

295 

296 def upload_data_coordinates( 

297 self, 

298 dimensions: DimensionGroup, 

299 rows: Iterable[tuple[DataIdValue, ...]], 

300 key: qt.DataCoordinateUploadKey | None = None, 

301 ) -> qt.DataCoordinateUploadKey: 

302 # Docstring inherited. 

303 if self._exit_stack is None: 

304 raise RuntimeError( 

305 "QueryDriver context must be entered before 'upload_data_coordinates' is called." 

306 ) 

307 columns = qt.ColumnSet(dimensions).drop_implied_dimension_keys() 

308 table_spec = ddl.TableSpec( 

309 [columns.get_column_spec(logical_table, field).to_sql_spec() for logical_table, field in columns] 

310 ) 

311 dict_rows: list[dict[str, Any]] 

312 if not columns: 

313 table_spec.fields.add( 

314 ddl.FieldSpec( 

315 QueryBuilder.EMPTY_COLUMNS_NAME, dtype=QueryBuilder.EMPTY_COLUMNS_TYPE, nullable=True 

316 ) 

317 ) 

318 dict_rows = [{QueryBuilder.EMPTY_COLUMNS_NAME: None}] 

319 else: 

320 dict_rows = [dict(zip(dimensions.required, values)) for values in rows] 

321 from_clause: sqlalchemy.FromClause 

322 if len(dict_rows) > self._constant_rows_limit: 

323 from_clause = self._exit_stack.enter_context(self.db.temporary_table(table_spec)) 

324 self.db.insert(from_clause, *dict_rows) 

325 else: 

326 from_clause = self.db.constant_rows(table_spec.fields, *dict_rows) 

327 if key is None: 

328 key = uuid.uuid4() 

329 self._upload_tables[key] = from_clause 

330 return key 

331 

332 def count( 

333 self, 

334 tree: qt.QueryTree, 

335 result_spec: ResultSpec, 

336 *, 

337 exact: bool, 

338 discard: bool, 

339 ) -> int: 

340 # Docstring inherited. 

341 columns = result_spec.get_result_columns() 

342 plan, builder = self.build_query(tree, columns, find_first_dataset=result_spec.find_first_dataset) 

343 if not all(d.collection_records for d in plan.joins.datasets.values()): 

344 return 0 

345 if not exact: 

346 builder.postprocessing = Postprocessing() 

347 if builder.postprocessing: 

348 if not discard: 

349 raise InvalidQueryError("Cannot count query rows exactly without discarding them.") 

350 sql_select = builder.select() 

351 builder.postprocessing.limit = result_spec.limit 

352 n = 0 

353 with self.db.query(sql_select.execution_options(yield_per=self._raw_page_size)) as results: 

354 for _ in builder.postprocessing.apply(results): 

355 n += 1 

356 return n 

357 # If the query has DISTINCT or GROUP BY, nest it in a subquery so we 

358 # count deduplicated rows. 

359 builder = builder.nested() 

360 # Replace the columns of the query with just COUNT(*). 

361 builder.columns = qt.ColumnSet(self._universe.empty.as_group()) 

362 count_func: sqlalchemy.ColumnElement[int] = sqlalchemy.func.count() 

363 builder.joiner.special["_ROWCOUNT"] = count_func 

364 # Render and run the query. 

365 sql_select = builder.select() 

366 with self.db.query(sql_select) as result: 

367 count = cast(int, result.scalar()) 

368 if result_spec.limit is not None: 

369 count = min(count, result_spec.limit) 

370 return count 

371 

372 def any(self, tree: qt.QueryTree, *, execute: bool, exact: bool) -> bool: 

373 # Docstring inherited. 

374 plan, builder = self.build_query(tree, qt.ColumnSet(tree.dimensions)) 

375 if not all(d.collection_records for d in plan.joins.datasets.values()): 

376 return False 

377 if not execute: 

378 if exact: 

379 raise InvalidQueryError("Cannot obtain exact result for 'any' without executing.") 

380 return True 

381 if builder.postprocessing and exact: 

382 sql_select = builder.select() 

383 with self.db.query( 

384 sql_select.execution_options(yield_per=self._postprocessing_filter_factor) 

385 ) as result: 

386 for _ in builder.postprocessing.apply(result): 

387 return True 

388 return False 

389 sql_select = builder.select().limit(1) 

390 with self.db.query(sql_select) as result: 

391 return result.first() is not None 

392 

393 def explain_no_results(self, tree: qt.QueryTree, execute: bool) -> Iterable[str]: 

394 # Docstring inherited. 

395 plan, _ = self.analyze_query(tree, qt.ColumnSet(tree.dimensions)) 

396 if plan.joins.messages or not execute: 

397 return plan.joins.messages 

398 # TODO: guess at ways to split up query that might fail or succeed if 

399 # run separately, execute them with LIMIT 1 and report the results. 

400 return [] 

401 

402 def get_dataset_type(self, name: str) -> DatasetType: 

403 # Docstring inherited 

404 return self.managers.datasets[name].datasetType 

405 

406 def get_default_collections(self) -> tuple[str, ...]: 

407 # Docstring inherited. 

408 if not self._default_collections: 

409 raise NoDefaultCollectionError("No collections provided and no default collections.") 

410 return self._default_collections 

411 

412 def build_query( 

413 self, 

414 tree: qt.QueryTree, 

415 final_columns: qt.ColumnSet, 

416 order_by: Iterable[qt.OrderExpression] = (), 

417 find_first_dataset: str | None = None, 

418 ) -> tuple[QueryPlan, QueryBuilder]: 

419 """Convert a query description into a mostly-completed `QueryBuilder`. 

420 

421 Parameters 

422 ---------- 

423 tree : `.queries.tree.QueryTree` 

424 Description of the joins and row filters in the query. 

425 final_columns : `.queries.tree.ColumnSet` 

426 Final output columns that should be emitted by the SQL query. 

427 order_by : `~collections.abc.Iterable` [ \ 

428 `.queries.tree.OrderExpression` ], optional 

429 Column expressions to sort by. 

430 find_first_dataset : `str` or `None`, optional 

431 Name of a dataset type for which only one result row for each data 

432 ID should be returned, with the colletions searched in order. 

433 

434 Returns 

435 ------- 

436 plan : `QueryPlan` 

437 Plan used to transform the query into SQL, including some 

438 information (e.g. diagnostics about doomed-to-fail dataset 

439 searches) that isn't transferred into the builder itself. 

440 builder : `QueryBuilder` 

441 Builder object that can be used to create a SQL SELECT via its 

442 `~QueryBuilder.select` method. We return this instead of a 

443 `sqlalchemy.Select` object itself to allow different methods to 

444 customize the SELECT clause itself (e.g. `count` can replace the 

445 columns selected with ``COUNT(*)``). 

446 """ 

447 # See the QueryPlan docs for an overview of what these stages of query 

448 # construction do. 

449 plan, builder = self.analyze_query(tree, final_columns, order_by, find_first_dataset) 

450 self.apply_query_joins(plan.joins, builder.joiner) 

451 self.apply_query_projection(plan.projection, builder) 

452 builder = self.apply_query_find_first(plan.find_first, builder) 

453 builder.columns = plan.final_columns 

454 return plan, builder 

455 

456 def analyze_query( 

457 self, 

458 tree: qt.QueryTree, 

459 final_columns: qt.ColumnSet, 

460 order_by: Iterable[qt.OrderExpression] = (), 

461 find_first_dataset: str | None = None, 

462 ) -> tuple[QueryPlan, QueryBuilder]: 

463 """Construct a plan for building a query and initialize a builder. 

464 

465 Parameters 

466 ---------- 

467 tree : `.queries.tree.QueryTree` 

468 Description of the joins and row filters in the query. 

469 final_columns : `.queries.tree.ColumnSet` 

470 Final output columns that should be emitted by the SQL query. 

471 order_by : `~collections.abc.Iterable` [ \ 

472 `.queries.tree.OrderExpression` ], optional 

473 Column expressions to sort by. 

474 find_first_dataset : `str` or `None`, optional 

475 Name of a dataset type for which only one result row for each data 

476 ID should be returned, with the colletions searched in order. 

477 

478 Returns 

479 ------- 

480 plan : `QueryPlan` 

481 Plan used to transform the query into SQL, including some 

482 information (e.g. diagnostics about doomed-to-fail dataset 

483 searches) that isn't transferred into the builder itself. 

484 builder : `QueryBuilder` 

485 Builder object initialized with overlap joins and constraints 

486 potentially included, with the remainder still present in 

487 `QueryJoinPlans.predicate`. 

488 """ 

489 # The fact that this method returns both a QueryPlan and an initial 

490 # QueryBuilder (rather than just a QueryPlan) is a tradeoff that lets 

491 # DimensionRecordStorageManager.process_query_overlaps (which is called 

492 # by the `_analyze_query_tree` call below) pull out overlap expressions 

493 # from the predicate at the same time it turns them into SQL table 

494 # joins (in the builder). 

495 joins_plan, builder = self._analyze_query_tree(tree) 

496 

497 # The "projection" columns differ from the final columns by not 

498 # omitting any dimension keys (this keeps queries for different result 

499 # types more similar during construction), including any columns needed 

500 # only by order_by terms, and including the collection key if we need 

501 # it for GROUP BY or DISTINCT. 

502 projection_plan = QueryProjectionPlan( 

503 final_columns.copy(), joins_plan.datasets, find_first_dataset=find_first_dataset 

504 ) 

505 projection_plan.columns.restore_dimension_keys() 

506 for term in order_by: 

507 term.gather_required_columns(projection_plan.columns) 

508 # The projection gets interesting if it does not have all of the 

509 # dimension keys or dataset fields of the "joins" stage, because that 

510 # means it needs to do a GROUP BY or DISTINCT ON to get unique rows. 

511 if projection_plan.columns.dimensions != joins_plan.columns.dimensions: 

512 assert projection_plan.columns.dimensions.issubset(joins_plan.columns.dimensions) 

513 # We're going from a larger set of dimensions to a smaller set, 

514 # that means we'll be doing a SELECT DISTINCT [ON] or GROUP BY. 

515 projection_plan.needs_dimension_distinct = True 

516 for dataset_type, fields_for_dataset in joins_plan.columns.dataset_fields.items(): 

517 if not projection_plan.columns.dataset_fields[dataset_type]: 

518 # The "joins"-stage query has one row for each collection for 

519 # each data ID, but the projection-stage query just wants 

520 # one row for each data ID. 

521 if len(joins_plan.datasets[dataset_type].collection_records) > 1: 

522 projection_plan.needs_dataset_distinct = True 

523 break 

524 # If there are any dataset fields being propagated through that 

525 # projection and there is more than one collection, we need to 

526 # include the collection_key column so we can use that as one of 

527 # the DISTINCT or GROUP BY columns. 

528 for dataset_type, fields_for_dataset in projection_plan.columns.dataset_fields.items(): 

529 if len(joins_plan.datasets[dataset_type].collection_records) > 1: 

530 fields_for_dataset.add("collection_key") 

531 if projection_plan: 

532 # If there's a projection and we're doing postprocessing, we might 

533 # be collapsing the dimensions of the postprocessing regions. When 

534 # that happens, we want to apply an aggregate function to them that 

535 # computes the union of the regions that are grouped together. 

536 for element in builder.postprocessing.iter_missing(projection_plan.columns): 

537 if element.name not in projection_plan.columns.dimensions.elements: 

538 projection_plan.region_aggregates.append(element) 

539 

540 # The joins-stage query also needs to include all columns needed by the 

541 # downstream projection query. Note that this: 

542 # - never adds new dimensions to the joins stage (since those are 

543 # always a superset of the projection-stage dimensions); 

544 # - does not affect our determination of 

545 # projection_plan.needs_dataset_distinct, because any dataset fields 

546 # being added to the joins stage here are already in the projection. 

547 joins_plan.columns.update(projection_plan.columns) 

548 

549 find_first_plan = None 

550 if find_first_dataset is not None: 

551 find_first_plan = QueryFindFirstPlan(joins_plan.datasets[find_first_dataset]) 

552 # If we're doing a find-first search and there's a calibration 

553 # collection in play, we need to make sure the rows coming out of 

554 # the base query have only one timespan for each data ID + 

555 # collection, and we can only do that with a GROUP BY and COUNT 

556 # that we inspect in postprocessing. 

557 if find_first_plan.search.is_calibration_search: 

558 builder.postprocessing.check_validity_match_count = True 

559 plan = QueryPlan( 

560 joins=joins_plan, 

561 projection=projection_plan, 

562 find_first=find_first_plan, 

563 final_columns=final_columns, 

564 ) 

565 return plan, builder 

566 

567 def apply_query_joins(self, plan: QueryJoinsPlan, joiner: QueryJoiner) -> None: 

568 """Modify a `QueryJoiner` to include all tables and other FROM and 

569 WHERE clause terms needed. 

570 

571 Parameters 

572 ---------- 

573 plan : `QueryJoinPlan` 

574 Component of a `QueryPlan` relevant for the "joins" stage. 

575 joiner : `QueryJoiner` 

576 Component of a `QueryBuilder` that holds the FROM and WHERE 

577 clauses. This is expected to be initialized by `analyze_query` 

578 and will be modified in-place on return. 

579 """ 

580 # Process data coordinate upload joins. 

581 for upload_key, upload_dimensions in plan.data_coordinate_uploads.items(): 

582 joiner.join( 

583 QueryJoiner(self.db, self._upload_tables[upload_key]).extract_dimensions( 

584 upload_dimensions.required 

585 ) 

586 ) 

587 # Process materialization joins. We maintain a set of dataset types 

588 # that were included in a materialization; searches for these datasets 

589 # can be dropped if they are only present to provide a constraint on 

590 # data IDs, since that's already embedded in a materialization. 

591 materialized_datasets: set[str] = set() 

592 for materialization_key, materialization_dimensions in plan.materializations.items(): 

593 materialized_datasets.update( 

594 self._join_materialization(joiner, materialization_key, materialization_dimensions) 

595 ) 

596 # Process dataset joins. 

597 for dataset_search in plan.datasets.values(): 

598 self._join_dataset_search( 

599 joiner, 

600 dataset_search, 

601 plan.columns.dataset_fields[dataset_search.name], 

602 ) 

603 # Join in dimension element tables that we know we need relationships 

604 # or columns from. 

605 for element in plan.iter_mandatory(): 

606 joiner.join( 

607 self.managers.dimensions.make_query_joiner( 

608 element, plan.columns.dimension_fields[element.name] 

609 ) 

610 ) 

611 # See if any dimension keys are still missing, and if so join in their 

612 # tables. Note that we know there are no fields needed from these. 

613 while not (joiner.dimension_keys.keys() >= plan.columns.dimensions.names): 

614 # Look for opportunities to join in multiple dimensions via single 

615 # table, to reduce the total number of tables joined in. 

616 missing_dimension_names = plan.columns.dimensions.names - joiner.dimension_keys.keys() 

617 best = self._universe[ 

618 max( 

619 missing_dimension_names, 

620 key=lambda name: len(self._universe[name].dimensions.names & missing_dimension_names), 

621 ) 

622 ] 

623 joiner.join(self.managers.dimensions.make_query_joiner(best, frozenset())) 

624 # Add the WHERE clause to the joiner. 

625 joiner.where(plan.predicate.visit(SqlColumnVisitor(joiner, self))) 

626 

627 def apply_query_projection(self, plan: QueryProjectionPlan, builder: QueryBuilder) -> None: 

628 """Modify `QueryBuilder` to reflect the "projection" stage of query 

629 construction, which can involve a GROUP BY or DISTINCT [ON] clause 

630 that enforces uniqueness. 

631 

632 Parameters 

633 ---------- 

634 plan : `QueryProjectionPlan` 

635 Component of a `QueryPlan` relevant for the "projection" stage. 

636 builder : `QueryBuilder` 

637 Builder object that will be modified in place. Expected to be 

638 initialized by `analyze_query` and further modified by 

639 `apply_query_joins`. 

640 """ 

641 builder.columns = plan.columns 

642 if not plan and not builder.postprocessing.check_validity_match_count: 

643 # Rows are already unique; nothing else to do in this method. 

644 return 

645 # This method generates either a SELECT DISTINCT [ON] or a SELECT with 

646 # GROUP BY. We'll work out which as we go. 

647 have_aggregates: bool = False 

648 # Dimension key columns form at least most of our GROUP BY or DISTINCT 

649 # ON clause. 

650 unique_keys: list[sqlalchemy.ColumnElement[Any]] = [ 

651 builder.joiner.dimension_keys[k][0] for k in plan.columns.dimensions.data_coordinate_keys 

652 ] 

653 # There are two reasons we might need an aggregate function: 

654 # - to make sure temporal constraints and joins have resulted in at 

655 # most one validity range match for each data ID and collection, 

656 # when we're doing a find-first query. 

657 # - to compute the unions of regions we need for postprocessing, when 

658 # the data IDs for those regions are not wholly included in the 

659 # results (i.e. we need to postprocess on 

660 # visit_detector_region.region, but the output rows don't have 

661 # detector, just visit - so we compute the union of the 

662 # visit_detector region over all matched detectors). 

663 if builder.postprocessing.check_validity_match_count: 

664 builder.joiner.special[builder.postprocessing.VALIDITY_MATCH_COUNT] = ( 

665 sqlalchemy.func.count().label(builder.postprocessing.VALIDITY_MATCH_COUNT) 

666 ) 

667 have_aggregates = True 

668 for element in plan.region_aggregates: 

669 builder.joiner.fields[element.name]["region"] = ddl.Base64Region.union_aggregate( 

670 builder.joiner.fields[element.name]["region"] 

671 ) 

672 have_aggregates = True 

673 # Many of our fields derive their uniqueness from the unique_key 

674 # fields: if rows are uniqe over the 'unique_key' fields, then they're 

675 # automatically unique over these 'derived_fields'. We just remember 

676 # these as pairs of (logical_table, field) for now. 

677 derived_fields: list[tuple[str, str]] = [] 

678 # All dimension record fields are derived fields. 

679 for element_name, fields_for_element in plan.columns.dimension_fields.items(): 

680 for element_field in fields_for_element: 

681 derived_fields.append((element_name, element_field)) 

682 # Some dataset fields are derived fields and some are unique keys, and 

683 # it depends on the kinds of collection(s) we're searching and whether 

684 # it's a find-first query. 

685 for dataset_type, fields_for_dataset in plan.columns.dataset_fields.items(): 

686 for dataset_field in fields_for_dataset: 

687 if dataset_field == "collection_key": 

688 # If the collection_key field is present, it's needed for 

689 # uniqueness if we're looking in more than one collection. 

690 # If not, it's a derived field. 

691 if len(plan.datasets[dataset_type].collection_records) > 1: 

692 unique_keys.append(builder.joiner.fields[dataset_type]["collection_key"]) 

693 else: 

694 derived_fields.append((dataset_type, "collection_key")) 

695 elif dataset_field == "timespan" and plan.datasets[dataset_type].is_calibration_search: 

696 # If we're doing a non-find-first query against a 

697 # CALIBRATION collection, the timespan is also a unique 

698 # key... 

699 if dataset_type == plan.find_first_dataset: 

700 # ...unless we're doing a find-first search on this 

701 # dataset, in which case we need to use ANY_VALUE on 

702 # the timespan and check that _VALIDITY_MATCH_COUNT 

703 # (added earlier) is one, indicating that there was 

704 # indeed only one timespan for each data ID in each 

705 # collection that survived the base query's WHERE 

706 # clauses and JOINs. 

707 if not self.db.has_any_aggregate: 

708 raise NotImplementedError( 

709 f"Cannot generate query that returns {dataset_type}.timespan after a " 

710 "find-first search, because this a database does not support the ANY_VALUE " 

711 "aggregate function (or equivalent)." 

712 ) 

713 builder.joiner.timespans[dataset_type] = builder.joiner.timespans[ 

714 dataset_type 

715 ].apply_any_aggregate(self.db.apply_any_aggregate) 

716 else: 

717 unique_keys.extend(builder.joiner.timespans[dataset_type].flatten()) 

718 else: 

719 # Other dataset fields derive their uniqueness from key 

720 # fields. 

721 derived_fields.append((dataset_type, dataset_field)) 

722 if not have_aggregates and not derived_fields: 

723 # SELECT DISTINCT is sufficient. 

724 builder.distinct = True 

725 elif not have_aggregates and self.db.has_distinct_on: 

726 # SELECT DISTINCT ON is sufficient and supported by this database. 

727 builder.distinct = unique_keys 

728 else: 

729 # GROUP BY is the only option. 

730 if derived_fields: 

731 if self.db.has_any_aggregate: 

732 for logical_table, field in derived_fields: 

733 if field == "timespan": 

734 builder.joiner.timespans[logical_table] = builder.joiner.timespans[ 

735 logical_table 

736 ].apply_any_aggregate(self.db.apply_any_aggregate) 

737 else: 

738 builder.joiner.fields[logical_table][field] = self.db.apply_any_aggregate( 

739 builder.joiner.fields[logical_table][field] 

740 ) 

741 else: 

742 _LOG.warning( 

743 "Adding %d fields to GROUP BY because this database backend does not support the " 

744 "ANY_VALUE aggregate function (or equivalent). This may result in a poor query " 

745 "plan. Materializing the query first sometimes avoids this problem.", 

746 len(derived_fields), 

747 ) 

748 for logical_table, field in derived_fields: 

749 if field == "timespan": 

750 unique_keys.extend(builder.joiner.timespans[logical_table].flatten()) 

751 else: 

752 unique_keys.append(builder.joiner.fields[logical_table][field]) 

753 builder.group_by = unique_keys 

754 

755 def apply_query_find_first(self, plan: QueryFindFirstPlan | None, builder: QueryBuilder) -> QueryBuilder: 

756 """Modify an under-construction SQL query to return only one row for 

757 each data ID, searching collections in order. 

758 

759 Parameters 

760 ---------- 

761 plan : `QueryFindFirstPlan` or `None` 

762 Component of a `QueryPlan` relevant for the "find first" stage. 

763 builder : `QueryBuilder` 

764 Builder object as produced by `apply_query_projection`. This 

765 object should be considered to be consumed by this method - the 

766 same instance may or may not be returned, and if it is not 

767 returned, its state is not defined. 

768 

769 Returns 

770 ------- 

771 builder : `QueryBuilder` 

772 Modified query builder that includes the find-first resolution, if 

773 one was needed. 

774 """ 

775 if not plan: 

776 return builder 

777 # The query we're building looks like this: 

778 # 

779 # WITH {dst}_base AS ( 

780 # {target} 

781 # ... 

782 # ) 

783 # SELECT 

784 # {dst}_window.*, 

785 # FROM ( 

786 # SELECT 

787 # {dst}_base.*, 

788 # ROW_NUMBER() OVER ( 

789 # PARTITION BY {dst_base}.{dimensions} 

790 # ORDER BY {rank} 

791 # ) AS rownum 

792 # ) {dst}_window 

793 # WHERE 

794 # {dst}_window.rownum = 1; 

795 # 

796 # The outermost SELECT will be represented by the QueryBuilder we 

797 # return. The QueryBuilder we're given corresponds to the Common Table 

798 # Expression (CTE) at the top. 

799 # 

800 # For SQLite only, we could use a much simpler GROUP BY instead, 

801 # because it extends the standard to do exactly what we want when MIN 

802 # or MAX appears once and a column does not have an aggregate function 

803 # (https://www.sqlite.org/quirks.html). But since that doesn't work 

804 # with PostgreSQL it doesn't help us. 

805 # 

806 builder = builder.nested(cte=True, force=True) 

807 # We start by filling out the "window" SELECT statement... 

808 partition_by = [builder.joiner.dimension_keys[d][0] for d in builder.columns.dimensions.required] 

809 rank_sql_column = sqlalchemy.case( 

810 {record.key: n for n, record in enumerate(plan.search.collection_records)}, 

811 value=builder.joiner.fields[plan.dataset_type]["collection_key"], 

812 ) 

813 if partition_by: 

814 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over( 

815 partition_by=partition_by, order_by=rank_sql_column 

816 ) 

817 else: 

818 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over( 

819 order_by=rank_sql_column 

820 ) 

821 # ... and then turn that into a subquery with a constraint on rownum. 

822 builder = builder.nested(force=True) 

823 # We can now add the WHERE constraint on rownum into the outer query. 

824 builder.joiner.where(builder.joiner.special["_ROWNUM"] == 1) 

825 # Don't propagate _ROWNUM into downstream queries. 

826 del builder.joiner.special["_ROWNUM"] 

827 return builder 

828 

829 def _analyze_query_tree(self, tree: qt.QueryTree) -> tuple[QueryJoinsPlan, QueryBuilder]: 

830 """Start constructing a plan for building a query from a 

831 `.queries.tree.QueryTree`. 

832 

833 Parameters 

834 ---------- 

835 tree : `.queries.tree.QueryTree` 

836 Description of the joins and row filters in the query. 

837 

838 Returns 

839 ------- 

840 plan : `QueryJoinsPlan` 

841 Initial component of the plan relevant for the "joins" stage, 

842 including all joins and columns needed by ``tree``. Additional 

843 columns will be added to this plan later. 

844 builder : `QueryBuilder` 

845 Builder object initialized with overlap joins and constraints 

846 potentially included, with the remainder still present in 

847 `QueryJoinPlans.predicate`. 

848 """ 

849 # Delegate to the dimensions manager to rewrite the predicate and start 

850 # a QueryBuilder to cover any spatial overlap joins or constraints. 

851 # We'll return that QueryBuilder at the end. 

852 ( 

853 predicate, 

854 builder, 

855 ) = self.managers.dimensions.process_query_overlaps( 

856 tree.dimensions, 

857 tree.predicate, 

858 tree.get_joined_dimension_groups(), 

859 ) 

860 result = QueryJoinsPlan(predicate=predicate, columns=builder.columns) 

861 # Add columns required by postprocessing. 

862 builder.postprocessing.gather_columns_required(result.columns) 

863 # We also check that the predicate doesn't reference any dimensions 

864 # without constraining their governor dimensions, since that's a 

865 # particularly easy mistake to make and it's almost never intentional. 

866 # We also allow the registry data ID values to provide governor values. 

867 where_columns = qt.ColumnSet(self.universe.empty.as_group()) 

868 result.predicate.gather_required_columns(where_columns) 

869 for governor in where_columns.dimensions.governors: 

870 if governor not in result.constraint_data_id: 

871 if governor in self._default_data_id.dimensions: 

872 result.constraint_data_id[governor] = self._default_data_id[governor] 

873 else: 

874 raise InvalidQueryError( 

875 f"Query 'where' expression references a dimension dependent on {governor} without " 

876 "constraining it directly." 

877 ) 

878 # Add materializations, which can also bring in more postprocessing. 

879 for m_key, m_dimensions in tree.materializations.items(): 

880 m_state = self._materializations[m_key] 

881 result.materializations[m_key] = m_dimensions 

882 # When a query is materialized, the new tree has an empty 

883 # (trivially true) predicate because the original was used to make 

884 # the materialized rows. But the original postprocessing isn't 

885 # executed when the materialization happens, so we have to include 

886 # it here. 

887 builder.postprocessing.spatial_join_filtering.extend( 

888 m_state.postprocessing.spatial_join_filtering 

889 ) 

890 builder.postprocessing.spatial_where_filtering.extend( 

891 m_state.postprocessing.spatial_where_filtering 

892 ) 

893 # Add data coordinate uploads. 

894 result.data_coordinate_uploads.update(tree.data_coordinate_uploads) 

895 # Add dataset_searches and filter out collections that don't have the 

896 # right dataset type or governor dimensions. 

897 for dataset_type_name, dataset_search in tree.datasets.items(): 

898 resolved_dataset_search = self._resolve_dataset_search( 

899 dataset_type_name, dataset_search, result.constraint_data_id 

900 ) 

901 result.datasets[dataset_type_name] = resolved_dataset_search 

902 if not resolved_dataset_search.collection_records: 

903 result.messages.append(f"Search for dataset type {dataset_type_name!r} is doomed to fail.") 

904 result.messages.extend(resolved_dataset_search.messages) 

905 return result, builder 

906 

907 def _resolve_dataset_search( 

908 self, 

909 dataset_type_name: str, 

910 dataset_search: qt.DatasetSearch, 

911 constraint_data_id: Mapping[str, DataIdValue], 

912 ) -> ResolvedDatasetSearch: 

913 """Resolve the collections that should actually be searched for 

914 datasets of a particular type. 

915 

916 Parameters 

917 ---------- 

918 dataset_type_name : `str` 

919 Name of the dataset being searched for. 

920 dataset_search : `.queries.tree.DatasetSearch` 

921 Struct holding the dimensions and original collection search path. 

922 constraint_data_id : `~collections.abc.Mapping` 

923 Data ID mapping derived from the query predicate that may be used 

924 to filter out some collections based on their governor dimensions. 

925 

926 Returns 

927 ------- 

928 resolved : `ResolvedDatasetSearch` 

929 Struct that extends `dataset_search`` with the dataset type name 

930 and resolved collection records. 

931 """ 

932 result = ResolvedDatasetSearch(dataset_type_name, dataset_search.dimensions) 

933 for collection_record, collection_summary in self._resolve_collection_path( 

934 dataset_search.collections 

935 ): 

936 rejected: bool = False 

937 if result.name not in collection_summary.dataset_types.names: 

938 result.messages.append( 

939 f"No datasets of type {result.name!r} in collection {collection_record.name!r}." 

940 ) 

941 rejected = True 

942 for governor in constraint_data_id.keys() & collection_summary.governors.keys(): 

943 if constraint_data_id[governor] not in collection_summary.governors[governor]: 

944 result.messages.append( 

945 f"No datasets with {governor}={constraint_data_id[governor]!r} " 

946 f"in collection {collection_record.name!r}." 

947 ) 

948 rejected = True 

949 if not rejected: 

950 if collection_record.type is CollectionType.CALIBRATION: 

951 result.is_calibration_search = True 

952 result.collection_records.append(collection_record) 

953 if result.dimensions != self.get_dataset_type(dataset_type_name).dimensions.as_group(): 

954 # This is really for server-side defensiveness; it's hard to 

955 # imagine the query getting different dimensions for a dataset 

956 # type in two calls to the same query driver. 

957 raise InvalidQueryError( 

958 f"Incorrect dimensions {result.dimensions} for dataset {dataset_type_name} " 

959 f"in query (vs. {self.get_dataset_type(dataset_type_name).dimensions.as_group()})." 

960 ) 

961 return result 

962 

963 def _resolve_collection_path( 

964 self, collections: Iterable[str] 

965 ) -> list[tuple[CollectionRecord, CollectionSummary]]: 

966 """Expand an ordered iterable of collection names into a list of 

967 collection records and summaries. 

968 

969 Parameters 

970 ---------- 

971 collections : `~collections.abc.Iterable` [ `str` ] 

972 Ordered iterable of collections. 

973 

974 Returns 

975 ------- 

976 resolved : `list` [ `tuple` [ `.registry.interfaces.CollectionRecord`,\ 

977 `.registry.CollectionSummary` ] ] 

978 Tuples of collection record and summary. `~CollectionType.CHAINED` 

979 collections are flattened out and not included. 

980 """ 

981 result: list[tuple[CollectionRecord, CollectionSummary]] = [] 

982 done: set[str] = set() 

983 

984 # Eventually we really want this recursive Python code to be replaced 

985 # by a recursive SQL query, especially if we extend this method to 

986 # support collection glob patterns to support public APIs we don't yet 

987 # have in the new query system (but will need to add). 

988 

989 def recurse(collection_names: Iterable[str]) -> None: 

990 for collection_name in collection_names: 

991 if collection_name not in done: 

992 done.add(collection_name) 

993 record = self.managers.collections.find(collection_name) 

994 

995 if record.type is CollectionType.CHAINED: 

996 recurse(cast(ChainedCollectionRecord, record).children) 

997 else: 

998 result.append((record, self.managers.datasets.getCollectionSummary(record))) 

999 

1000 recurse(collections) 

1001 

1002 return result 

1003 

1004 def _join_materialization( 

1005 self, 

1006 joiner: QueryJoiner, 

1007 key: qt.MaterializationKey, 

1008 dimensions: DimensionGroup, 

1009 ) -> frozenset[str]: 

1010 """Join a materialization into an under-construction query. 

1011 

1012 Parameters 

1013 ---------- 

1014 joiner : `QueryJoiner` 

1015 Component of a `QueryBuilder` that holds the FROM and WHERE 

1016 clauses. This will be modified in-place on return. 

1017 key : `.queries.tree.MaterializationKey` 

1018 Unique identifier created for this materialization when it was 

1019 created. 

1020 dimensions : `DimensionGroup` 

1021 Dimensions of the materialization. 

1022 

1023 Returns 

1024 ------- 

1025 datasets : `frozenset` [ `str` ] 

1026 Dataset types that were included as constraints when this 

1027 materialization was created. 

1028 """ 

1029 columns = qt.ColumnSet(dimensions) 

1030 m_state = self._materializations[key] 

1031 joiner.join(QueryJoiner(self.db, m_state.table).extract_columns(columns, m_state.postprocessing)) 

1032 return m_state.datasets 

1033 

1034 def _join_dataset_search( 

1035 self, 

1036 joiner: QueryJoiner, 

1037 resolved_search: ResolvedDatasetSearch, 

1038 fields: Set[str], 

1039 ) -> None: 

1040 """Join a dataset search into an under-construction query. 

1041 

1042 Parameters 

1043 ---------- 

1044 joiner : `QueryJoiner` 

1045 Component of a `QueryBuilder` that holds the FROM and WHERE 

1046 clauses. This will be modified in-place on return. 

1047 resolved_search : `ResolvedDatasetSearch` 

1048 Struct that describes the dataset type and collections. 

1049 fields : `~collections.abc.Set` [ `str` ] 

1050 Dataset fields to include. 

1051 """ 

1052 storage = self.managers.datasets[resolved_search.name] 

1053 # The next two asserts will need to be dropped (and the implications 

1054 # dealt with instead) if materializations start having dataset fields. 

1055 assert ( 

1056 resolved_search.name not in joiner.fields 

1057 ), "Dataset fields have unexpectedly already been joined in." 

1058 assert ( 

1059 resolved_search.name not in joiner.timespans 

1060 ), "Dataset timespan has unexpectedly already been joined in." 

1061 joiner.join(storage.make_query_joiner(resolved_search.collection_records, fields)) 

1062 

1063 

1064@dataclasses.dataclass 

1065class _MaterializationState: 

1066 table: sqlalchemy.Table 

1067 datasets: frozenset[str] 

1068 postprocessing: Postprocessing 

1069 

1070 

1071class _Cursor: 

1072 """A helper class for managing paged query results and cursor lifetimes. 

1073 

1074 This class holds a context manager for the SQLAlchemy cursor object but is 

1075 not itself a context manager. It always cleans up (i.e. calls its `close` 

1076 method) when it raises an exception or exhausts the cursor, but external 

1077 code is responsible for calling `close` when the cursor is abandoned before 

1078 it is exhausted, including when that happens due to an external exception. 

1079 

1080 Parameters 

1081 ---------- 

1082 db : `.registry.interface.Database` 

1083 Database to run the query against. 

1084 sql : `sqlalchemy.Executable` 

1085 SQL query to execute. 

1086 result : `ResultSpec` 

1087 Specification of the result type. 

1088 name_shrinker : `NameShrinker` or `None` 

1089 Object that was used to shrink dataset column names to fit within the 

1090 database identifier limit. 

1091 postprocessing : `Postprocessing` 

1092 Post-query filtering and checks to perform. 

1093 raw_page_size : `int` 

1094 Maximum number of SQL result rows to return in each page, before 

1095 postprocessing. 

1096 """ 

1097 

1098 def __init__( 

1099 self, 

1100 db: Database, 

1101 sql: sqlalchemy.Executable, 

1102 result_spec: ResultSpec, 

1103 name_shrinker: NameShrinker | None, 

1104 postprocessing: Postprocessing, 

1105 raw_page_size: int, 

1106 ): 

1107 self._result_spec = result_spec 

1108 self._name_shrinker = name_shrinker 

1109 self._raw_page_size = raw_page_size 

1110 self._postprocessing = postprocessing 

1111 self._timespan_repr_cls = db.getTimespanRepresentation() 

1112 self._context = db.query(sql, execution_options=dict(yield_per=raw_page_size)) 

1113 cursor = self._context.__enter__() 

1114 try: 

1115 self._iterator = cursor.partitions() 

1116 except: # noqa: E722 

1117 self._context.__exit__(*sys.exc_info()) 

1118 raise 

1119 

1120 def close(self, exc_type: Any = None, exc_value: Any = None, traceback: Any = None) -> None: 

1121 """Close this cursor. 

1122 

1123 Parameters 

1124 ---------- 

1125 exc_type : `type` 

1126 Exception type as obtained from `sys.exc_info`, or `None` if there 

1127 was no error. 

1128 exc_value : `BaseException` or `None` 

1129 Exception instance as obtained from `sys.exc_info`, or `None` if 

1130 there was no error. 

1131 traceback : `object` 

1132 Traceback as obtained from `sys.exc_info`, or `None` if there was 

1133 no error. 

1134 """ 

1135 self._context.__exit__(exc_type, exc_value, traceback) 

1136 

1137 def next(self) -> ResultPage: 

1138 """Return the next result page from this query. 

1139 

1140 When there are no more results after this result page, the `next_page` 

1141 attribute of the returned object is `None` and the cursor will be 

1142 closed. The cursor is also closed if this method raises an exception. 

1143 """ 

1144 try: 

1145 raw_page = next(self._iterator, tuple()) 

1146 if len(raw_page) == self._raw_page_size: 

1147 # There's some chance we got unlucky and this page exactly 

1148 # finishes off the query, and we won't know the next page does 

1149 # not exist until we try to fetch it. But that's better than 

1150 # always fetching the next page up front. 

1151 next_key = uuid.uuid4() 

1152 else: 

1153 next_key = None 

1154 self.close() 

1155 

1156 postprocessed_rows = self._postprocessing.apply(raw_page) 

1157 match self._result_spec: 

1158 case DimensionRecordResultSpec(): 

1159 return self._convert_dimension_record_results(postprocessed_rows, next_key) 

1160 case _: 

1161 raise NotImplementedError("TODO") 

1162 except: # noqa: E722 

1163 self._context.__exit__(*sys.exc_info()) 

1164 raise 

1165 

1166 def _convert_dimension_record_results( 

1167 self, 

1168 raw_rows: Iterable[sqlalchemy.Row], 

1169 next_key: PageKey | None, 

1170 ) -> DimensionRecordResultPage: 

1171 """Convert a raw SQL result iterable into a page of `DimensionRecord` 

1172 query results. 

1173 

1174 Parameters 

1175 ---------- 

1176 raw_rows : `~collections.abc.Iterable` [ `sqlalchemy.Row` ] 

1177 Iterable of SQLAlchemy rows, with `Postprocessing` filters already 

1178 applied. 

1179 next_key : `PageKey` or `None` 

1180 Key for the next page to add into the returned page object. 

1181 

1182 Returns 

1183 ------- 

1184 result_page : `DimensionRecordResultPage` 

1185 Page object that holds a `DimensionRecord` container. 

1186 """ 

1187 result_spec = cast(DimensionRecordResultSpec, self._result_spec) 

1188 record_set = DimensionRecordSet(result_spec.element) 

1189 record_cls = result_spec.element.RecordClass 

1190 if isinstance(result_spec.element, SkyPixDimension): 

1191 pixelization = result_spec.element.pixelization 

1192 id_qualified_name = qt.ColumnSet.get_qualified_name(result_spec.element.name, None) 

1193 for raw_row in raw_rows: 

1194 pixel_id = raw_row._mapping[id_qualified_name] 

1195 record_set.add(record_cls(id=pixel_id, region=pixelization.pixel(pixel_id))) 

1196 else: 

1197 # Mapping from DimensionRecord attribute name to qualified column 

1198 # name, but as a list of tuples since we'd just iterate over items 

1199 # anyway. 

1200 column_map = list( 

1201 zip( 

1202 result_spec.element.schema.dimensions.names, 

1203 result_spec.element.dimensions.names, 

1204 ) 

1205 ) 

1206 for field in result_spec.element.schema.remainder.names: 

1207 if field != "timespan": 

1208 column_map.append( 

1209 (field, qt.ColumnSet.get_qualified_name(result_spec.element.name, field)) 

1210 ) 

1211 if result_spec.element.temporal: 

1212 timespan_qualified_name = qt.ColumnSet.get_qualified_name( 

1213 result_spec.element.name, "timespan" 

1214 ) 

1215 else: 

1216 timespan_qualified_name = None 

1217 for raw_row in raw_rows: 

1218 m = raw_row._mapping 

1219 d = {k: m[v] for k, v in column_map} 

1220 if timespan_qualified_name is not None: 

1221 d["timespan"] = self._timespan_repr_cls.extract(m, name=timespan_qualified_name) 

1222 record_set.add(record_cls(**d)) 

1223 return DimensionRecordResultPage(spec=result_spec, next_key=next_key, rows=record_set)