Coverage for python/lsst/daf/butler/direct_query_driver/_driver.py: 15%
412 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import uuid
32__all__ = ("DirectQueryDriver",)
34import dataclasses
35import logging
36import sys
37from collections.abc import Iterable, Mapping, Set
38from contextlib import ExitStack
39from typing import TYPE_CHECKING, Any, cast, overload
41import sqlalchemy
43from .. import ddl
44from .._dataset_type import DatasetType
45from ..dimensions import DataIdValue, DimensionGroup, DimensionRecordSet, DimensionUniverse, SkyPixDimension
46from ..name_shrinker import NameShrinker
47from ..queries import tree as qt
48from ..queries.driver import (
49 DataCoordinateResultPage,
50 DatasetRefResultPage,
51 DimensionRecordResultPage,
52 GeneralResultPage,
53 PageKey,
54 QueryDriver,
55 ResultPage,
56)
57from ..queries.result_specs import (
58 DataCoordinateResultSpec,
59 DatasetRefResultSpec,
60 DimensionRecordResultSpec,
61 GeneralResultSpec,
62 ResultSpec,
63)
64from ..registry import CollectionSummary, CollectionType, NoDefaultCollectionError, RegistryDefaults
65from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord
66from ..registry.managers import RegistryManagerInstances
67from ._postprocessing import Postprocessing
68from ._query_builder import QueryBuilder, QueryJoiner
69from ._query_plan import (
70 QueryFindFirstPlan,
71 QueryJoinsPlan,
72 QueryPlan,
73 QueryProjectionPlan,
74 ResolvedDatasetSearch,
75)
76from ._sql_column_visitor import SqlColumnVisitor
78if TYPE_CHECKING:
79 from ..registry.interfaces import Database
82_LOG = logging.getLogger(__name__)
85class DirectQueryDriver(QueryDriver):
86 """The `QueryDriver` implementation for `DirectButler`.
88 Parameters
89 ----------
90 db : `Database`
91 Abstraction for the SQL database.
92 universe : `DimensionUniverse`
93 Definitions of all dimensions.
94 managers : `RegistryManagerInstances`
95 Struct of registry manager objects.
96 defaults : `RegistryDefaults`
97 Struct holding the default collection search path and governor
98 dimensions.
99 raw_page_size : `int`, optional
100 Number of database rows to fetch for each result page. The actual
101 number of rows in a page may be smaller due to postprocessing.
102 constant_rows_limit : `int`, optional
103 Maximum number of uploaded rows to include in queries via
104 `Database.constant_rows`; above this limit a temporary table is used
105 instead.
106 postprocessing_filter_factor : `int`, optional
107 The number of database rows we expect to have to fetch to yield a
108 single output row for queries that involve postprocessing. This is
109 purely a performance tuning parameter that attempts to balance between
110 fetching too much and requiring multiple fetches; the true value is
111 highly dependent on the actual query.
112 """
114 def __init__(
115 self,
116 db: Database,
117 universe: DimensionUniverse,
118 managers: RegistryManagerInstances,
119 defaults: RegistryDefaults,
120 raw_page_size: int = 10000,
121 constant_rows_limit: int = 1000,
122 postprocessing_filter_factor: int = 10,
123 ):
124 self.db = db
125 self.managers = managers
126 self._universe = universe
127 self._defaults = defaults
128 self._materializations: dict[qt.MaterializationKey, _MaterializationState] = {}
129 self._upload_tables: dict[qt.DataCoordinateUploadKey, sqlalchemy.FromClause] = {}
130 self._exit_stack: ExitStack | None = None
131 self._raw_page_size = raw_page_size
132 self._postprocessing_filter_factor = postprocessing_filter_factor
133 self._constant_rows_limit = constant_rows_limit
134 self._cursors: dict[PageKey, _Cursor] = {}
136 def __enter__(self) -> None:
137 self._exit_stack = ExitStack()
138 # It might be nice to defer opening a transaction here until first use
139 # to reduce the time spent in transactions. But it's worth noting that
140 # this is the default low-level behavior of the Python SQLite driver,
141 # and it makes it incredibly prone to deadlocks. We might be okay
142 # here, because Query doesn't do true write operations - just temp
143 # table writes - but I'm not confident that's enough to make delayed
144 # transaction starts safe against deadlocks, and it'd be more
145 # complicated to implement anyway.
146 #
147 # We start a transaction rather than just opening a connection to make
148 # temp table and cursors work with pg_bouncer transaction affinity.
149 self._exit_stack.enter_context(self.db.transaction(for_temp_tables=True))
151 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
152 assert self._exit_stack is not None
153 self._materializations.clear()
154 self._upload_tables.clear()
155 while self._cursors:
156 _, cursor = self._cursors.popitem()
157 cursor.close(exc_type, exc_value, traceback)
158 self._exit_stack.__exit__(exc_type, exc_value, traceback)
159 self._exit_stack = None
161 @property
162 def universe(self) -> DimensionUniverse:
163 return self._universe
165 @overload
166 def execute( 166 ↛ exitline 166 didn't jump to the function exit
167 self, result_spec: DataCoordinateResultSpec, tree: qt.QueryTree
168 ) -> DataCoordinateResultPage: ...
170 @overload
171 def execute( 171 ↛ exitline 171 didn't jump to the function exit
172 self, result_spec: DimensionRecordResultSpec, tree: qt.QueryTree
173 ) -> DimensionRecordResultPage: ...
175 @overload
176 def execute(self, result_spec: DatasetRefResultSpec, tree: qt.QueryTree) -> DatasetRefResultPage: ... 176 ↛ exitline 176 didn't return from function 'execute', because
178 @overload
179 def execute(self, result_spec: GeneralResultSpec, tree: qt.QueryTree) -> GeneralResultPage: ... 179 ↛ exitline 179 didn't return from function 'execute', because
181 def execute(self, result_spec: ResultSpec, tree: qt.QueryTree) -> ResultPage:
182 # Docstring inherited.
183 if self._exit_stack is None:
184 raise RuntimeError("QueryDriver context must be entered before queries can be executed.")
185 _, builder = self.build_query(
186 tree,
187 final_columns=result_spec.get_result_columns(),
188 order_by=result_spec.order_by,
189 find_first_dataset=result_spec.find_first_dataset,
190 )
191 sql_select = builder.select()
192 if result_spec.order_by:
193 visitor = SqlColumnVisitor(builder.joiner, self)
194 sql_select = sql_select.order_by(*[visitor.expect_scalar(term) for term in result_spec.order_by])
195 if result_spec.limit is not None:
196 if builder.postprocessing:
197 builder.postprocessing.limit = result_spec.limit
198 else:
199 sql_select = sql_select.limit(result_spec.limit)
200 if builder.postprocessing.limit is not None:
201 # We might want to fetch many fewer rows than the default page
202 # size if we have to implement limit in postprocessing.
203 raw_page_size = min(
204 self._postprocessing_filter_factor * builder.postprocessing.limit,
205 self._raw_page_size,
206 )
207 else:
208 raw_page_size = self._raw_page_size
209 # Execute the query by initializing a _Cursor object that manages the
210 # lifetime of the result.
211 cursor = _Cursor(
212 self.db,
213 sql_select,
214 result_spec=result_spec,
215 name_shrinker=builder.joiner.name_shrinker,
216 postprocessing=builder.postprocessing,
217 raw_page_size=raw_page_size,
218 )
219 result_page = cursor.next()
220 if result_page.next_key is not None:
221 # Cursor has not been exhausted; add it to the driver for use by
222 # fetch_next_page.
223 self._cursors[result_page.next_key] = cursor
224 return result_page
226 @overload
227 def fetch_next_page( 227 ↛ exitline 227 didn't jump to the function exit
228 self, result_spec: DataCoordinateResultSpec, key: PageKey
229 ) -> DataCoordinateResultPage: ...
231 @overload
232 def fetch_next_page( 232 ↛ exitline 232 didn't jump to the function exit
233 self, result_spec: DimensionRecordResultSpec, key: PageKey
234 ) -> DimensionRecordResultPage: ...
236 @overload
237 def fetch_next_page(self, result_spec: DatasetRefResultSpec, key: PageKey) -> DatasetRefResultPage: ... 237 ↛ exitline 237 didn't return from function 'fetch_next_page', because
239 @overload
240 def fetch_next_page(self, result_spec: GeneralResultSpec, key: PageKey) -> GeneralResultPage: ... 240 ↛ exitline 240 didn't return from function 'fetch_next_page', because
242 def fetch_next_page(self, result_spec: ResultSpec, key: PageKey) -> ResultPage:
243 # Docstring inherited.
244 try:
245 cursor = self._cursors.pop(key)
246 except KeyError:
247 raise RuntimeError("Cannot continue query result iteration after the query context has closed.")
248 result_page = cursor.next()
249 if result_page.next_key is not None:
250 self._cursors[result_page.next_key] = cursor
251 return result_page
253 def materialize(
254 self,
255 tree: qt.QueryTree,
256 dimensions: DimensionGroup,
257 datasets: frozenset[str],
258 ) -> qt.MaterializationKey:
259 # Docstring inherited.
260 if self._exit_stack is None:
261 raise RuntimeError("QueryDriver context must be entered before 'materialize' is called.")
262 _, builder = self.build_query(tree, qt.ColumnSet(dimensions))
263 # Current implementation ignores 'datasets' aside from remembering
264 # them, because figuring out what to put in the temporary table for
265 # them is tricky, especially if calibration collections are involved.
266 # That's okay because:
267 #
268 # - the query whose results we materialize includes the dataset
269 # searches as constraints;
270 #
271 # - we still (in Query.materialize) join the dataset searches back in
272 # anyway, and given materialized data IDs the join to the dataset
273 # search is straightforward and definitely well-indexed, and not much
274 # (if at all) worse than joining back in on a materialized UUID.
275 #
276 sql_select = builder.select()
277 table = self._exit_stack.enter_context(self.db.temporary_table(builder.make_table_spec()))
278 self.db.insert(table, select=sql_select)
279 key = uuid.uuid4()
280 self._materializations[key] = _MaterializationState(table, datasets, builder.postprocessing)
281 return key
283 def upload_data_coordinates(
284 self, dimensions: DimensionGroup, rows: Iterable[tuple[DataIdValue, ...]]
285 ) -> qt.DataCoordinateUploadKey:
286 # Docstring inherited.
287 if self._exit_stack is None:
288 raise RuntimeError(
289 "QueryDriver context must be entered before 'upload_data_coordinates' is called."
290 )
291 columns = qt.ColumnSet(dimensions).drop_implied_dimension_keys()
292 table_spec = ddl.TableSpec(
293 [columns.get_column_spec(logical_table, field).to_sql_spec() for logical_table, field in columns]
294 )
295 dict_rows: list[dict[str, Any]]
296 if not columns:
297 table_spec.fields.add(
298 ddl.FieldSpec(
299 QueryBuilder.EMPTY_COLUMNS_NAME, dtype=QueryBuilder.EMPTY_COLUMNS_TYPE, nullable=True
300 )
301 )
302 dict_rows = [{QueryBuilder.EMPTY_COLUMNS_NAME: None}]
303 else:
304 dict_rows = [dict(zip(dimensions.required, values)) for values in rows]
305 from_clause: sqlalchemy.FromClause
306 if len(dict_rows) > self._constant_rows_limit:
307 from_clause = self._exit_stack.enter_context(self.db.temporary_table(table_spec))
308 self.db.insert(from_clause, *dict_rows)
309 else:
310 from_clause = self.db.constant_rows(table_spec.fields, *dict_rows)
311 key = uuid.uuid4()
312 self._upload_tables[key] = from_clause
313 return key
315 def count(
316 self,
317 tree: qt.QueryTree,
318 result_spec: ResultSpec,
319 *,
320 exact: bool,
321 discard: bool,
322 ) -> int:
323 # Docstring inherited.
324 columns = result_spec.get_result_columns()
325 plan, builder = self.build_query(tree, columns, find_first_dataset=result_spec.find_first_dataset)
326 if not all(d.collection_records for d in plan.joins.datasets.values()):
327 return 0
328 if not exact:
329 builder.postprocessing = Postprocessing()
330 if builder.postprocessing:
331 if not discard:
332 raise RuntimeError("Cannot count query rows exactly without discarding them.")
333 sql_select = builder.select()
334 builder.postprocessing.limit = result_spec.limit
335 n = 0
336 with self.db.query(sql_select.execution_options(yield_per=self._raw_page_size)) as results:
337 for _ in builder.postprocessing.apply(results):
338 n += 1
339 return n
340 # If the query has DISTINCT or GROUP BY, nest it in a subquery so we
341 # count deduplicated rows.
342 builder = builder.nested()
343 # Replace the columns of the query with just COUNT(*).
344 builder.columns = qt.ColumnSet(self._universe.empty.as_group())
345 count_func: sqlalchemy.ColumnElement[int] = sqlalchemy.func.count()
346 builder.joiner.special["_ROWCOUNT"] = count_func
347 # Render and run the query.
348 sql_select = builder.select()
349 with self.db.query(sql_select) as result:
350 count = cast(int, result.scalar())
351 if result_spec.limit is not None:
352 count = min(count, result_spec.limit)
353 return count
355 def any(self, tree: qt.QueryTree, *, execute: bool, exact: bool) -> bool:
356 # Docstring inherited.
357 plan, builder = self.build_query(tree, qt.ColumnSet(tree.dimensions))
358 if not all(d.collection_records for d in plan.joins.datasets.values()):
359 return False
360 if not execute:
361 if exact:
362 raise RuntimeError("Cannot obtain exact result for 'any' without executing.")
363 return True
364 if builder.postprocessing and exact:
365 sql_select = builder.select()
366 with self.db.query(
367 sql_select.execution_options(yield_per=self._postprocessing_filter_factor)
368 ) as result:
369 for _ in builder.postprocessing.apply(result):
370 return True
371 return False
372 sql_select = builder.select().limit(1)
373 with self.db.query(sql_select) as result:
374 return result.first() is not None
376 def explain_no_results(self, tree: qt.QueryTree, execute: bool) -> Iterable[str]:
377 # Docstring inherited.
378 plan, _ = self.analyze_query(tree, qt.ColumnSet(tree.dimensions))
379 if plan.joins.messages or not execute:
380 return plan.joins.messages
381 # TODO: guess at ways to split up query that might fail or succeed if
382 # run separately, execute them with LIMIT 1 and report the results.
383 return []
385 def get_dataset_type(self, name: str) -> DatasetType:
386 # Docstring inherited
387 return self.managers.datasets[name].datasetType
389 def get_default_collections(self) -> tuple[str, ...]:
390 # Docstring inherited.
391 if not self._defaults.collections:
392 raise NoDefaultCollectionError("No collections provided and no default collections.")
393 return tuple(self._defaults.collections)
395 def build_query(
396 self,
397 tree: qt.QueryTree,
398 final_columns: qt.ColumnSet,
399 order_by: Iterable[qt.OrderExpression] = (),
400 find_first_dataset: str | None = None,
401 ) -> tuple[QueryPlan, QueryBuilder]:
402 """Convert a query description into a mostly-completed `QueryBuilder`.
404 Parameters
405 ----------
406 tree : `.queries.tree.QueryTree`
407 Description of the joins and row filters in the query.
408 final_columns : `.queries.tree.ColumnSet`
409 Final output columns that should be emitted by the SQL query.
410 order_by : `~collections.abc.Iterable` [ \
411 `.queries.tree.OrderExpression` ], optional
412 Column expressions to sort by.
413 find_first_dataset : `str` or `None`, optional
414 Name of a dataset type for which only one result row for each data
415 ID should be returned, with the colletions searched in order.
417 Returns
418 -------
419 plan : `QueryPlan`
420 Plan used to transform the query into SQL, including some
421 information (e.g. diagnostics about doomed-to-fail dataset
422 searches) that isn't transferred into the builder itself.
423 builder : `QueryBuilder`
424 Builder object that can be used to create a SQL SELECT via its
425 `~QueryBuilder.select` method. We return this instead of a
426 `sqlalchemy.Select` object itself to allow different methods to
427 customize the SELECT clause itself (e.g. `count` can replace the
428 columns selected with ``COUNT(*)``).
429 """
430 # See the QueryPlan docs for an overview of what these stages of query
431 # construction do.
432 plan, builder = self.analyze_query(tree, final_columns, order_by, find_first_dataset)
433 self.apply_query_joins(plan.joins, builder.joiner)
434 self.apply_query_projection(plan.projection, builder)
435 builder = self.apply_query_find_first(plan.find_first, builder)
436 builder.columns = plan.final_columns
437 return plan, builder
439 def analyze_query(
440 self,
441 tree: qt.QueryTree,
442 final_columns: qt.ColumnSet,
443 order_by: Iterable[qt.OrderExpression] = (),
444 find_first_dataset: str | None = None,
445 ) -> tuple[QueryPlan, QueryBuilder]:
446 """Construct a plan for building a query and initialize a builder.
448 Parameters
449 ----------
450 tree : `.queries.tree.QueryTree`
451 Description of the joins and row filters in the query.
452 final_columns : `.queries.tree.ColumnSet`
453 Final output columns that should be emitted by the SQL query.
454 order_by : `~collections.abc.Iterable` [ \
455 `.queries.tree.OrderExpression` ], optional
456 Column expressions to sort by.
457 find_first_dataset : `str` or `None`, optional
458 Name of a dataset type for which only one result row for each data
459 ID should be returned, with the colletions searched in order.
461 Returns
462 -------
463 plan : `QueryPlan`
464 Plan used to transform the query into SQL, including some
465 information (e.g. diagnostics about doomed-to-fail dataset
466 searches) that isn't transferred into the builder itself.
467 builder : `QueryBuilder`
468 Builder object initialized with overlap joins and constraints
469 potentially included, with the remainder still present in
470 `QueryJoinPlans.predicate`.
471 """
472 # The fact that this method returns both a QueryPlan and an initial
473 # QueryBuilder (rather than just a QueryPlan) is a tradeoff that lets
474 # DimensionRecordStorageManager.process_query_overlaps (which is called
475 # by the `_analyze_query_tree` call below) pull out overlap expressions
476 # from the predicate at the same time it turns them into SQL table
477 # joins (in the builder).
478 joins_plan, builder = self._analyze_query_tree(tree)
480 # The "projection" columns differ from the final columns by not
481 # omitting any dimension keys (this keeps queries for different result
482 # types more similar during construction), including any columns needed
483 # only by order_by terms, and including the collection key if we need
484 # it for GROUP BY or DISTINCT.
485 projection_plan = QueryProjectionPlan(
486 final_columns.copy(), joins_plan.datasets, find_first_dataset=find_first_dataset
487 )
488 projection_plan.columns.restore_dimension_keys()
489 for term in order_by:
490 term.gather_required_columns(projection_plan.columns)
491 # The projection gets interesting if it does not have all of the
492 # dimension keys or dataset fields of the "joins" stage, because that
493 # means it needs to do a GROUP BY or DISTINCT ON to get unique rows.
494 if projection_plan.columns.dimensions != joins_plan.columns.dimensions:
495 assert projection_plan.columns.dimensions.issubset(joins_plan.columns.dimensions)
496 # We're going from a larger set of dimensions to a smaller set,
497 # that means we'll be doing a SELECT DISTINCT [ON] or GROUP BY.
498 projection_plan.needs_dimension_distinct = True
499 for dataset_type, fields_for_dataset in joins_plan.columns.dataset_fields.items():
500 if not projection_plan.columns.dataset_fields[dataset_type]:
501 # The "joins"-stage query has one row for each collection for
502 # each data ID, but the projection-stage query just wants
503 # one row for each data ID.
504 if len(joins_plan.datasets[dataset_type].collection_records) > 1:
505 projection_plan.needs_dataset_distinct = True
506 break
507 # If there are any dataset fields being propagated through that
508 # projection and there is more than one collection, we need to
509 # include the collection_key column so we can use that as one of
510 # the DISTINCT or GROUP BY columns.
511 for dataset_type, fields_for_dataset in projection_plan.columns.dataset_fields.items():
512 if len(joins_plan.datasets[dataset_type].collection_records) > 1:
513 fields_for_dataset.add("collection_key")
514 if projection_plan:
515 # If there's a projection and we're doing postprocessing, we might
516 # be collapsing the dimensions of the postprocessing regions. When
517 # that happens, we want to apply an aggregate function to them that
518 # computes the union of the regions that are grouped together.
519 for element in builder.postprocessing.iter_missing(projection_plan.columns):
520 if element.name not in projection_plan.columns.dimensions.elements:
521 projection_plan.region_aggregates.append(element)
523 # The joins-stage query also needs to include all columns needed by the
524 # downstream projection query. Note that this:
525 # - never adds new dimensions to the joins stage (since those are
526 # always a superset of the projection-stage dimensions);
527 # - does not affect our determination of
528 # projection_plan.needs_dataset_distinct, because any dataset fields
529 # being added to the joins stage here are already in the projection.
530 joins_plan.columns.update(projection_plan.columns)
532 find_first_plan = None
533 if find_first_dataset is not None:
534 find_first_plan = QueryFindFirstPlan(joins_plan.datasets[find_first_dataset])
535 # If we're doing a find-first search and there's a calibration
536 # collection in play, we need to make sure the rows coming out of
537 # the base query have only one timespan for each data ID +
538 # collection, and we can only do that with a GROUP BY and COUNT
539 # that we inspect in postprocessing.
540 if find_first_plan.search.is_calibration_search:
541 builder.postprocessing.check_validity_match_count = True
542 plan = QueryPlan(
543 joins=joins_plan,
544 projection=projection_plan,
545 find_first=find_first_plan,
546 final_columns=final_columns,
547 )
548 return plan, builder
550 def apply_query_joins(self, plan: QueryJoinsPlan, joiner: QueryJoiner) -> None:
551 """Modify a `QueryJoiner` to include all tables and other FROM and
552 WHERE clause terms needed.
554 Parameters
555 ----------
556 plan : `QueryJoinPlan`
557 Component of a `QueryPlan` relevant for the "joins" stage.
558 joiner : `QueryJoiner`
559 Component of a `QueryBuilder` that holds the FROM and WHERE
560 clauses. This is expected to be initialized by `analyze_query`
561 and will be modified in-place on return.
562 """
563 # Process data coordinate upload joins.
564 for upload_key, upload_dimensions in plan.data_coordinate_uploads.items():
565 joiner.join(
566 QueryJoiner(self.db, self._upload_tables[upload_key]).extract_dimensions(
567 upload_dimensions.required
568 )
569 )
570 # Process materialization joins. We maintain a set of dataset types
571 # that were included in a materialization; searches for these datasets
572 # can be dropped if they are only present to provide a constraint on
573 # data IDs, since that's already embedded in a materialization.
574 materialized_datasets: set[str] = set()
575 for materialization_key, materialization_dimensions in plan.materializations.items():
576 materialized_datasets.update(
577 self._join_materialization(joiner, materialization_key, materialization_dimensions)
578 )
579 # Process dataset joins.
580 for dataset_search in plan.datasets.values():
581 self._join_dataset_search(
582 joiner,
583 dataset_search,
584 plan.columns.dataset_fields[dataset_search.name],
585 )
586 # Join in dimension element tables that we know we need relationships
587 # or columns from.
588 for element in plan.iter_mandatory():
589 joiner.join(
590 self.managers.dimensions.make_query_joiner(
591 element, plan.columns.dimension_fields[element.name]
592 )
593 )
594 # See if any dimension keys are still missing, and if so join in their
595 # tables. Note that we know there are no fields needed from these.
596 while not (joiner.dimension_keys.keys() >= plan.columns.dimensions.names):
597 # Look for opportunities to join in multiple dimensions via single
598 # table, to reduce the total number of tables joined in.
599 missing_dimension_names = plan.columns.dimensions.names - joiner.dimension_keys.keys()
600 best = self._universe[
601 max(
602 missing_dimension_names,
603 key=lambda name: len(self._universe[name].dimensions.names & missing_dimension_names),
604 )
605 ]
606 joiner.join(self.managers.dimensions.make_query_joiner(best, frozenset()))
607 # Add the WHERE clause to the joiner.
608 joiner.where(plan.predicate.visit(SqlColumnVisitor(joiner, self)))
610 def apply_query_projection(self, plan: QueryProjectionPlan, builder: QueryBuilder) -> None:
611 """Modify `QueryBuilder` to reflect the "projection" stage of query
612 construction, which can involve a GROUP BY or DISTINCT [ON] clause
613 that enforces uniqueness.
615 Parameters
616 ----------
617 plan : `QueryProjectionPlan`
618 Component of a `QueryPlan` relevant for the "projection" stage.
619 builder : `QueryBuilder`
620 Builder object that will be modified in place. Expected to be
621 initialized by `analyze_query` and further modified by
622 `apply_query_joins`.
623 """
624 builder.columns = plan.columns
625 if not plan and not builder.postprocessing.check_validity_match_count:
626 # Rows are already unique; nothing else to do in this method.
627 return
628 # This method generates either a SELECT DISTINCT [ON] or a SELECT with
629 # GROUP BY. We'll work out which as we go.
630 have_aggregates: bool = False
631 # Dimension key columns form at least most of our GROUP BY or DISTINCT
632 # ON clause.
633 unique_keys: list[sqlalchemy.ColumnElement[Any]] = [
634 builder.joiner.dimension_keys[k][0] for k in plan.columns.dimensions.data_coordinate_keys
635 ]
636 # There are two reasons we might need an aggregate function:
637 # - to make sure temporal constraints and joins have resulted in at
638 # most one validity range match for each data ID and collection,
639 # when we're doing a find-first query.
640 # - to compute the unions of regions we need for postprocessing, when
641 # the data IDs for those regions are not wholly included in the
642 # results (i.e. we need to postprocess on
643 # visit_detector_region.region, but the output rows don't have
644 # detector, just visit - so we compute the union of the
645 # visit_detector region over all matched detectors).
646 if builder.postprocessing.check_validity_match_count:
647 builder.joiner.special[builder.postprocessing.VALIDITY_MATCH_COUNT] = (
648 sqlalchemy.func.count().label(builder.postprocessing.VALIDITY_MATCH_COUNT)
649 )
650 have_aggregates = True
651 for element in plan.region_aggregates:
652 builder.joiner.fields[element.name]["region"] = ddl.Base64Region.union_aggregate(
653 builder.joiner.fields[element.name]["region"]
654 )
655 have_aggregates = True
656 # Many of our fields derive their uniqueness from the unique_key
657 # fields: if rows are uniqe over the 'unique_key' fields, then they're
658 # automatically unique over these 'derived_fields'. We just remember
659 # these as pairs of (logical_table, field) for now.
660 derived_fields: list[tuple[str, str]] = []
661 # All dimension record fields are derived fields.
662 for element_name, fields_for_element in plan.columns.dimension_fields.items():
663 for element_field in fields_for_element:
664 derived_fields.append((element_name, element_field))
665 # Some dataset fields are derived fields and some are unique keys, and
666 # it depends on the kinds of collection(s) we're searching and whether
667 # it's a find-first query.
668 for dataset_type, fields_for_dataset in plan.columns.dataset_fields.items():
669 for dataset_field in fields_for_dataset:
670 if dataset_field == "collection_key":
671 # If the collection_key field is present, it's needed for
672 # uniqueness if we're looking in more than one collection.
673 # If not, it's a derived field.
674 if len(plan.datasets[dataset_type].collection_records) > 1:
675 unique_keys.append(builder.joiner.fields[dataset_type]["collection_key"])
676 else:
677 derived_fields.append((dataset_type, "collection_key"))
678 elif dataset_field == "timespan" and plan.datasets[dataset_type].is_calibration_search:
679 # If we're doing a non-find-first query against a
680 # CALIBRATION collection, the timespan is also a unique
681 # key...
682 if dataset_type == plan.find_first_dataset:
683 # ...unless we're doing a find-first search on this
684 # dataset, in which case we need to use ANY_VALUE on
685 # the timespan and check that _VALIDITY_MATCH_COUNT
686 # (added earlier) is one, indicating that there was
687 # indeed only one timespan for each data ID in each
688 # collection that survived the base query's WHERE
689 # clauses and JOINs.
690 if not self.db.has_any_aggregate:
691 raise NotImplementedError(
692 f"Cannot generate query that returns {dataset_type}.timespan after a "
693 "find-first search, because this a database does not support the ANY_VALUE "
694 "aggregate function (or equivalent)."
695 )
696 builder.joiner.timespans[dataset_type] = builder.joiner.timespans[
697 dataset_type
698 ].apply_any_aggregate(self.db.apply_any_aggregate)
699 else:
700 unique_keys.extend(builder.joiner.timespans[dataset_type].flatten())
701 else:
702 # Other dataset fields derive their uniqueness from key
703 # fields.
704 derived_fields.append((dataset_type, dataset_field))
705 if not have_aggregates and not derived_fields:
706 # SELECT DISTINCT is sufficient.
707 builder.distinct = True
708 elif not have_aggregates and self.db.has_distinct_on:
709 # SELECT DISTINCT ON is sufficient and supported by this database.
710 builder.distinct = unique_keys
711 else:
712 # GROUP BY is the only option.
713 if derived_fields:
714 if self.db.has_any_aggregate:
715 for logical_table, field in derived_fields:
716 if field == "timespan":
717 builder.joiner.timespans[logical_table] = builder.joiner.timespans[
718 logical_table
719 ].apply_any_aggregate(self.db.apply_any_aggregate)
720 else:
721 builder.joiner.fields[logical_table][field] = self.db.apply_any_aggregate(
722 builder.joiner.fields[logical_table][field]
723 )
724 else:
725 _LOG.warning(
726 "Adding %d fields to GROUP BY because this database backend does not support the "
727 "ANY_VALUE aggregate function (or equivalent). This may result in a poor query "
728 "plan. Materializing the query first sometimes avoids this problem.",
729 len(derived_fields),
730 )
731 for logical_table, field in derived_fields:
732 if field == "timespan":
733 unique_keys.extend(builder.joiner.timespans[logical_table].flatten())
734 else:
735 unique_keys.append(builder.joiner.fields[logical_table][field])
736 builder.group_by = unique_keys
738 def apply_query_find_first(self, plan: QueryFindFirstPlan | None, builder: QueryBuilder) -> QueryBuilder:
739 """Modify an under-construction SQL query to return only one row for
740 each data ID, searching collections in order.
742 Parameters
743 ----------
744 plan : `QueryFindFirstPlan` or `None`
745 Component of a `QueryPlan` relevant for the "find first" stage.
746 builder : `QueryBuilder`
747 Builder object as produced by `apply_query_projection`. This
748 object should be considered to be consumed by this method - the
749 same instance may or may not be returned, and if it is not
750 returned, its state is not defined.
752 Returns
753 -------
754 builder : `QueryBuilder`
755 Modified query builder that includes the find-first resolution, if
756 one was needed.
757 """
758 if not plan:
759 return builder
760 # The query we're building looks like this:
761 #
762 # WITH {dst}_base AS (
763 # {target}
764 # ...
765 # )
766 # SELECT
767 # {dst}_window.*,
768 # FROM (
769 # SELECT
770 # {dst}_base.*,
771 # ROW_NUMBER() OVER (
772 # PARTITION BY {dst_base}.{dimensions}
773 # ORDER BY {rank}
774 # ) AS rownum
775 # ) {dst}_window
776 # WHERE
777 # {dst}_window.rownum = 1;
778 #
779 # The outermost SELECT will be represented by the QueryBuilder we
780 # return. The QueryBuilder we're given corresponds to the Common Table
781 # Expression (CTE) at the top.
782 #
783 # For SQLite only, we could use a much simpler GROUP BY instead,
784 # because it extends the standard to do exactly what we want when MIN
785 # or MAX appears once and a column does not have an aggregate function
786 # (https://www.sqlite.org/quirks.html). But since that doesn't work
787 # with PostgreSQL it doesn't help us.
788 #
789 builder = builder.nested(cte=True, force=True)
790 # We start by filling out the "window" SELECT statement...
791 partition_by = [builder.joiner.dimension_keys[d][0] for d in builder.columns.dimensions.required]
792 rank_sql_column = sqlalchemy.case(
793 {record.key: n for n, record in enumerate(plan.search.collection_records)},
794 value=builder.joiner.fields[plan.dataset_type]["collection_key"],
795 )
796 if partition_by:
797 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over(
798 partition_by=partition_by, order_by=rank_sql_column
799 )
800 else:
801 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over(
802 order_by=rank_sql_column
803 )
804 # ... and then turn that into a subquery with a constraint on rownum.
805 builder = builder.nested(force=True)
806 # We can now add the WHERE constraint on rownum into the outer query.
807 builder.joiner.where(builder.joiner.special["_ROWNUM"] == 1)
808 # Don't propagate _ROWNUM into downstream queries.
809 del builder.joiner.special["_ROWNUM"]
810 return builder
812 def _analyze_query_tree(self, tree: qt.QueryTree) -> tuple[QueryJoinsPlan, QueryBuilder]:
813 """Start constructing a plan for building a query from a
814 `.queries.tree.QueryTree`.
816 Parameters
817 ----------
818 tree : `.queries.tree.QueryTree`
819 Description of the joins and row filters in the query.
821 Returns
822 -------
823 plan : `QueryJoinsPlan`
824 Initial component of the plan relevant for the "joins" stage,
825 including all joins and columns needed by ``tree``. Additional
826 columns will be added to this plan later.
827 builder : `QueryBuilder`
828 Builder object initialized with overlap joins and constraints
829 potentially included, with the remainder still present in
830 `QueryJoinPlans.predicate`.
831 """
832 # Delegate to the dimensions manager to rewrite the predicate and start
833 # a QueryBuilder to cover any spatial overlap joins or constraints.
834 # We'll return that QueryBuilder at the end.
835 (
836 predicate,
837 builder,
838 ) = self.managers.dimensions.process_query_overlaps(
839 tree.dimensions,
840 tree.predicate,
841 tree.get_joined_dimension_groups(),
842 )
843 result = QueryJoinsPlan(predicate=predicate, columns=builder.columns)
844 # Add columns required by postprocessing.
845 builder.postprocessing.gather_columns_required(result.columns)
846 # We also check that the predicate doesn't reference any dimensions
847 # without constraining their governor dimensions, since that's a
848 # particularly easy mistake to make and it's almost never intentional.
849 # We also allow the registry data ID values to provide governor values.
850 where_columns = qt.ColumnSet(self.universe.empty.as_group())
851 result.predicate.gather_required_columns(where_columns)
852 for governor in where_columns.dimensions.governors:
853 if governor not in result.constraint_data_id:
854 if governor in self._defaults.dataId.dimensions:
855 result.constraint_data_id[governor] = self._defaults.dataId[governor]
856 else:
857 raise qt.InvalidQueryError(
858 f"Query 'where' expression references a dimension dependent on {governor} without "
859 "constraining it directly."
860 )
861 # Add materializations, which can also bring in more postprocessing.
862 for m_key, m_dimensions in tree.materializations.items():
863 m_state = self._materializations[m_key]
864 result.materializations[m_key] = m_dimensions
865 # When a query is materialized, the new tree has an empty
866 # (trivially true) predicate because the original was used to make
867 # the materialized rows. But the original postprocessing isn't
868 # executed when the materialization happens, so we have to include
869 # it here.
870 builder.postprocessing.spatial_join_filtering.extend(
871 m_state.postprocessing.spatial_join_filtering
872 )
873 builder.postprocessing.spatial_where_filtering.extend(
874 m_state.postprocessing.spatial_where_filtering
875 )
876 # Add data coordinate uploads.
877 result.data_coordinate_uploads.update(tree.data_coordinate_uploads)
878 # Add dataset_searches and filter out collections that don't have the
879 # right dataset type or governor dimensions.
880 for dataset_type_name, dataset_search in tree.datasets.items():
881 resolved_dataset_search = self._resolve_dataset_search(
882 dataset_type_name, dataset_search, result.constraint_data_id
883 )
884 result.datasets[dataset_type_name] = resolved_dataset_search
885 if not resolved_dataset_search.collection_records:
886 result.messages.append(f"Search for dataset type {dataset_type_name!r} is doomed to fail.")
887 result.messages.extend(resolved_dataset_search.messages)
888 return result, builder
890 def _resolve_dataset_search(
891 self,
892 dataset_type_name: str,
893 dataset_search: qt.DatasetSearch,
894 constraint_data_id: Mapping[str, DataIdValue],
895 ) -> ResolvedDatasetSearch:
896 """Resolve the collections that should actually be searched for
897 datasets of a particular type.
899 Parameters
900 ----------
901 dataset_type_name : `str`
902 Name of the dataset being searched for.
903 dataset_search : `.queries.tree.DatasetSearch`
904 Struct holding the dimensions and original collection search path.
905 constraint_data_id : `~collections.abc.Mapping`
906 Data ID mapping derived from the query predicate that may be used
907 to filter out some collections based on their governor dimensions.
909 Returns
910 -------
911 resolved : `ResolvedDatasetSearch`
912 Struct that extends `dataset_search`` with the dataset type name
913 and resolved collection records.
914 """
915 result = ResolvedDatasetSearch(dataset_type_name, dataset_search.dimensions)
916 for collection_record, collection_summary in self._resolve_collection_path(
917 dataset_search.collections
918 ):
919 rejected: bool = False
920 if result.name not in collection_summary.dataset_types.names:
921 result.messages.append(
922 f"No datasets of type {result.name!r} in collection {collection_record.name!r}."
923 )
924 rejected = True
925 for governor in constraint_data_id.keys() & collection_summary.governors.keys():
926 if constraint_data_id[governor] not in collection_summary.governors[governor]:
927 result.messages.append(
928 f"No datasets with {governor}={constraint_data_id[governor]!r} "
929 f"in collection {collection_record.name!r}."
930 )
931 rejected = True
932 if not rejected:
933 if collection_record.type is CollectionType.CALIBRATION:
934 result.is_calibration_search = True
935 result.collection_records.append(collection_record)
936 if result.dimensions != self.get_dataset_type(dataset_type_name).dimensions.as_group():
937 # This is really for server-side defensiveness; it's hard to
938 # imagine the query getting different dimensions for a dataset
939 # type in two calls to the same query driver.
940 raise qt.InvalidQueryError(
941 f"Incorrect dimensions {result.dimensions} for dataset {dataset_type_name} "
942 f"in query (vs. {self.get_dataset_type(dataset_type_name).dimensions.as_group()})."
943 )
944 return result
946 def _resolve_collection_path(
947 self, collections: Iterable[str]
948 ) -> list[tuple[CollectionRecord, CollectionSummary]]:
949 """Expand an ordered iterable of collection names into a list of
950 collection records and summaries.
952 Parameters
953 ----------
954 collections : `~collections.abc.Iterable` [ `str` ]
955 Ordered iterable of collections.
957 Returns
958 -------
959 resolved : `list` [ `tuple` [ `.registry.interfaces.CollectionRecord`,\
960 `.registry.CollectionSummary` ] ]
961 Tuples of collection record and summary. `~CollectionType.CHAINED`
962 collections are flattened out and not included.
963 """
964 result: list[tuple[CollectionRecord, CollectionSummary]] = []
965 done: set[str] = set()
967 # Eventually we really want this recursive Python code to be replaced
968 # by a recursive SQL query, especially if we extend this method to
969 # support collection glob patterns to support public APIs we don't yet
970 # have in the new query system (but will need to add).
972 def recurse(collection_names: Iterable[str]) -> None:
973 for collection_name in collection_names:
974 if collection_name not in done:
975 done.add(collection_name)
976 record = self.managers.collections.find(collection_name)
978 if record.type is CollectionType.CHAINED:
979 recurse(cast(ChainedCollectionRecord, record).children)
980 else:
981 result.append((record, self.managers.datasets.getCollectionSummary(record)))
983 recurse(collections)
985 return result
987 def _join_materialization(
988 self,
989 joiner: QueryJoiner,
990 key: qt.MaterializationKey,
991 dimensions: DimensionGroup,
992 ) -> frozenset[str]:
993 """Join a materialization into an under-construction query.
995 Parameters
996 ----------
997 joiner : `QueryJoiner`
998 Component of a `QueryBuilder` that holds the FROM and WHERE
999 clauses. This will be modified in-place on return.
1000 key : `.queries.tree.MaterializationKey`
1001 Unique identifier created for this materialization when it was
1002 created.
1003 dimensions : `DimensionGroup`
1004 Dimensions of the materialization.
1006 Returns
1007 -------
1008 datasets : `frozenset` [ `str` ]
1009 Dataset types that were included as constraints when this
1010 materialization was created.
1011 """
1012 columns = qt.ColumnSet(dimensions)
1013 m_state = self._materializations[key]
1014 joiner.join(QueryJoiner(self.db, m_state.table).extract_columns(columns, m_state.postprocessing))
1015 return m_state.datasets
1017 def _join_dataset_search(
1018 self,
1019 joiner: QueryJoiner,
1020 resolved_search: ResolvedDatasetSearch,
1021 fields: Set[str],
1022 ) -> None:
1023 """Join a dataset search into an under-construction query.
1025 Parameters
1026 ----------
1027 joiner : `QueryJoiner`
1028 Component of a `QueryBuilder` that holds the FROM and WHERE
1029 clauses. This will be modified in-place on return.
1030 resolved_search : `ResolvedDatasetSearch`
1031 Struct that describes the dataset type and collections.
1032 fields : `~collections.abc.Set` [ `str` ]
1033 Dataset fields to include.
1034 """
1035 storage = self.managers.datasets[resolved_search.name]
1036 # The next two asserts will need to be dropped (and the implications
1037 # dealt with instead) if materializations start having dataset fields.
1038 assert (
1039 resolved_search.name not in joiner.fields
1040 ), "Dataset fields have unexpectedly already been joined in."
1041 assert (
1042 resolved_search.name not in joiner.timespans
1043 ), "Dataset timespan has unexpectedly already been joined in."
1044 joiner.join(storage.make_query_joiner(resolved_search.collection_records, fields))
1047@dataclasses.dataclass
1048class _MaterializationState:
1049 table: sqlalchemy.Table
1050 datasets: frozenset[str]
1051 postprocessing: Postprocessing
1054class _Cursor:
1055 """A helper class for managing paged query results and cursor lifetimes.
1057 This class holds a context manager for the SQLAlchemy cursor object but is
1058 not itself a context manager. It always cleans up (i.e. calls its `close`
1059 method) when it raises an exception or exhausts the cursor, but external
1060 code is responsible for calling `close` when the cursor is abandoned before
1061 it is exhausted, including when that happens due to an external exception.
1063 Parameters
1064 ----------
1065 db : `.registry.interface.Database`
1066 Database to run the query against.
1067 sql : `sqlalchemy.Executable`
1068 SQL query to execute.
1069 result : `ResultSpec`
1070 Specification of the result type.
1071 name_shrinker : `NameShrinker` or `None`
1072 Object that was used to shrink dataset column names to fit within the
1073 database identifier limit.
1074 postprocessing : `Postprocessing`
1075 Post-query filtering and checks to perform.
1076 raw_page_size : `int`
1077 Maximum number of SQL result rows to return in each page, before
1078 postprocessing.
1079 """
1081 def __init__(
1082 self,
1083 db: Database,
1084 sql: sqlalchemy.Executable,
1085 result_spec: ResultSpec,
1086 name_shrinker: NameShrinker | None,
1087 postprocessing: Postprocessing,
1088 raw_page_size: int,
1089 ):
1090 self._result_spec = result_spec
1091 self._name_shrinker = name_shrinker
1092 self._raw_page_size = raw_page_size
1093 self._postprocessing = postprocessing
1094 self._timespan_repr_cls = db.getTimespanRepresentation()
1095 self._context = db.query(sql, execution_options=dict(yield_per=raw_page_size))
1096 cursor = self._context.__enter__()
1097 try:
1098 self._iterator = cursor.partitions()
1099 except: # noqa: E722
1100 self._context.__exit__(*sys.exc_info())
1101 raise
1103 def close(self, exc_type: Any = None, exc_value: Any = None, traceback: Any = None) -> None:
1104 """Close this cursor.
1106 Parameters
1107 ----------
1108 exc_type : `type`
1109 Exception type as obtained from `sys.exc_info`, or `None` if there
1110 was no error.
1111 exc_value : `BaseException` or `None`
1112 Exception instance as obtained from `sys.exc_info`, or `None` if
1113 there was no error.
1114 traceback : `object`
1115 Traceback as obtained from `sys.exc_info`, or `None` if there was
1116 no error.
1117 """
1118 self._context.__exit__(exc_type, exc_value, traceback)
1120 def next(self) -> ResultPage:
1121 """Return the next result page from this query.
1123 When there are no more results after this result page, the `next_page`
1124 attribute of the returned object is `None` and the cursor will be
1125 closed. The cursor is also closed if this method raises an exception.
1126 """
1127 try:
1128 raw_page = next(self._iterator, tuple())
1129 if len(raw_page) == self._raw_page_size:
1130 # There's some chance we got unlucky and this page exactly
1131 # finishes off the query, and we won't know the next page does
1132 # not exist until we try to fetch it. But that's better than
1133 # always fetching the next page up front.
1134 next_key = uuid.uuid4()
1135 else:
1136 next_key = None
1137 self.close()
1139 postprocessed_rows = self._postprocessing.apply(raw_page)
1140 match self._result_spec:
1141 case DimensionRecordResultSpec():
1142 return self._convert_dimension_record_results(postprocessed_rows, next_key)
1143 case _:
1144 raise NotImplementedError("TODO")
1145 except: # noqa: E722
1146 self._context.__exit__(*sys.exc_info())
1147 raise
1149 def _convert_dimension_record_results(
1150 self,
1151 raw_rows: Iterable[sqlalchemy.Row],
1152 next_key: PageKey | None,
1153 ) -> DimensionRecordResultPage:
1154 """Convert a raw SQL result iterable into a page of `DimensionRecord`
1155 query results.
1157 Parameters
1158 ----------
1159 raw_rows : `~collections.abc.Iterable` [ `sqlalchemy.Row` ]
1160 Iterable of SQLAlchemy rows, with `Postprocessing` filters already
1161 applied.
1162 next_key : `PageKey` or `None`
1163 Key for the next page to add into the returned page object.
1165 Returns
1166 -------
1167 result_page : `DimensionRecordResultPage`
1168 Page object that holds a `DimensionRecord` container.
1169 """
1170 result_spec = cast(DimensionRecordResultSpec, self._result_spec)
1171 record_set = DimensionRecordSet(result_spec.element)
1172 record_cls = result_spec.element.RecordClass
1173 if isinstance(result_spec.element, SkyPixDimension):
1174 pixelization = result_spec.element.pixelization
1175 id_qualified_name = qt.ColumnSet.get_qualified_name(result_spec.element.name, None)
1176 for raw_row in raw_rows:
1177 pixel_id = raw_row._mapping[id_qualified_name]
1178 record_set.add(record_cls(id=pixel_id, region=pixelization.pixel(pixel_id)))
1179 else:
1180 # Mapping from DimensionRecord attribute name to qualified column
1181 # name, but as a list of tuples since we'd just iterate over items
1182 # anyway.
1183 column_map = list(
1184 zip(
1185 result_spec.element.schema.dimensions.names,
1186 result_spec.element.dimensions.names,
1187 )
1188 )
1189 for field in result_spec.element.schema.remainder.names:
1190 if field != "timespan":
1191 column_map.append(
1192 (field, qt.ColumnSet.get_qualified_name(result_spec.element.name, field))
1193 )
1194 if result_spec.element.temporal:
1195 timespan_qualified_name = qt.ColumnSet.get_qualified_name(
1196 result_spec.element.name, "timespan"
1197 )
1198 else:
1199 timespan_qualified_name = None
1200 for raw_row in raw_rows:
1201 m = raw_row._mapping
1202 d = {k: m[v] for k, v in column_map}
1203 if timespan_qualified_name is not None:
1204 d["timespan"] = self._timespan_repr_cls.extract(m, name=timespan_qualified_name)
1205 record_set.add(record_cls(**d))
1206 return DimensionRecordResultPage(spec=result_spec, next_key=next_key, rows=record_set)