Coverage for python/lsst/daf/butler/direct_query_driver/_driver.py: 15%
416 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import uuid
32__all__ = ("DirectQueryDriver",)
34import dataclasses
35import logging
36import sys
37from collections.abc import Iterable, Mapping, Set
38from contextlib import ExitStack
39from typing import TYPE_CHECKING, Any, cast, overload
41import sqlalchemy
43from .. import ddl
44from .._dataset_type import DatasetType
45from .._exceptions import InvalidQueryError
46from ..dimensions import (
47 DataCoordinate,
48 DataIdValue,
49 DimensionGroup,
50 DimensionRecordSet,
51 DimensionUniverse,
52 SkyPixDimension,
53)
54from ..name_shrinker import NameShrinker
55from ..queries import tree as qt
56from ..queries.driver import (
57 DataCoordinateResultPage,
58 DatasetRefResultPage,
59 DimensionRecordResultPage,
60 GeneralResultPage,
61 PageKey,
62 QueryDriver,
63 ResultPage,
64)
65from ..queries.result_specs import (
66 DataCoordinateResultSpec,
67 DatasetRefResultSpec,
68 DimensionRecordResultSpec,
69 GeneralResultSpec,
70 ResultSpec,
71)
72from ..registry import CollectionSummary, CollectionType, NoDefaultCollectionError
73from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord
74from ..registry.managers import RegistryManagerInstances
75from ._postprocessing import Postprocessing
76from ._query_builder import QueryBuilder, QueryJoiner
77from ._query_plan import (
78 QueryFindFirstPlan,
79 QueryJoinsPlan,
80 QueryPlan,
81 QueryProjectionPlan,
82 ResolvedDatasetSearch,
83)
84from ._sql_column_visitor import SqlColumnVisitor
86if TYPE_CHECKING:
87 from ..registry.interfaces import Database
90_LOG = logging.getLogger(__name__)
93class DirectQueryDriver(QueryDriver):
94 """The `QueryDriver` implementation for `DirectButler`.
96 Parameters
97 ----------
98 db : `Database`
99 Abstraction for the SQL database.
100 universe : `DimensionUniverse`
101 Definitions of all dimensions.
102 managers : `RegistryManagerInstances`
103 Struct of registry manager objects.
104 default_collections : `Sequence` [ `str `]
105 Default collection search path.
106 default_data_id : DataCoordinate,
107 Default governor dimension values.
108 raw_page_size : `int`, optional
109 Number of database rows to fetch for each result page. The actual
110 number of rows in a page may be smaller due to postprocessing.
111 constant_rows_limit : `int`, optional
112 Maximum number of uploaded rows to include in queries via
113 `Database.constant_rows`; above this limit a temporary table is used
114 instead.
115 postprocessing_filter_factor : `int`, optional
116 The number of database rows we expect to have to fetch to yield a
117 single output row for queries that involve postprocessing. This is
118 purely a performance tuning parameter that attempts to balance between
119 fetching too much and requiring multiple fetches; the true value is
120 highly dependent on the actual query.
121 """
123 def __init__(
124 self,
125 db: Database,
126 universe: DimensionUniverse,
127 managers: RegistryManagerInstances,
128 default_collections: Iterable[str],
129 default_data_id: DataCoordinate,
130 raw_page_size: int = 10000,
131 constant_rows_limit: int = 1000,
132 postprocessing_filter_factor: int = 10,
133 ):
134 self.db = db
135 self.managers = managers
136 self._universe = universe
137 self._default_collections = tuple(default_collections)
138 self._default_data_id = default_data_id
139 self._materializations: dict[qt.MaterializationKey, _MaterializationState] = {}
140 self._upload_tables: dict[qt.DataCoordinateUploadKey, sqlalchemy.FromClause] = {}
141 self._exit_stack: ExitStack | None = None
142 self._raw_page_size = raw_page_size
143 self._postprocessing_filter_factor = postprocessing_filter_factor
144 self._constant_rows_limit = constant_rows_limit
145 self._cursors: dict[PageKey, _Cursor] = {}
147 def __enter__(self) -> None:
148 self._exit_stack = ExitStack()
149 # It might be nice to defer opening a transaction here until first use
150 # to reduce the time spent in transactions. But it's worth noting that
151 # this is the default low-level behavior of the Python SQLite driver,
152 # and it makes it incredibly prone to deadlocks. We might be okay
153 # here, because Query doesn't do true write operations - just temp
154 # table writes - but I'm not confident that's enough to make delayed
155 # transaction starts safe against deadlocks, and it'd be more
156 # complicated to implement anyway.
157 #
158 # We start a transaction rather than just opening a connection to make
159 # temp table and cursors work with pg_bouncer transaction affinity.
160 self._exit_stack.enter_context(self.db.transaction(for_temp_tables=True))
162 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
163 assert self._exit_stack is not None
164 self._materializations.clear()
165 self._upload_tables.clear()
166 while self._cursors:
167 _, cursor = self._cursors.popitem()
168 cursor.close(exc_type, exc_value, traceback)
169 self._exit_stack.__exit__(exc_type, exc_value, traceback)
170 self._exit_stack = None
172 @property
173 def universe(self) -> DimensionUniverse:
174 return self._universe
176 @overload
177 def execute( 177 ↛ exitline 177 didn't jump to the function exit
178 self, result_spec: DataCoordinateResultSpec, tree: qt.QueryTree
179 ) -> DataCoordinateResultPage: ...
181 @overload
182 def execute( 182 ↛ exitline 182 didn't jump to the function exit
183 self, result_spec: DimensionRecordResultSpec, tree: qt.QueryTree
184 ) -> DimensionRecordResultPage: ...
186 @overload
187 def execute(self, result_spec: DatasetRefResultSpec, tree: qt.QueryTree) -> DatasetRefResultPage: ... 187 ↛ exitline 187 didn't return from function 'execute', because
189 @overload
190 def execute(self, result_spec: GeneralResultSpec, tree: qt.QueryTree) -> GeneralResultPage: ... 190 ↛ exitline 190 didn't return from function 'execute', because
192 def execute(self, result_spec: ResultSpec, tree: qt.QueryTree) -> ResultPage:
193 # Docstring inherited.
194 if self._exit_stack is None:
195 raise RuntimeError("QueryDriver context must be entered before queries can be executed.")
196 _, builder = self.build_query(
197 tree,
198 final_columns=result_spec.get_result_columns(),
199 order_by=result_spec.order_by,
200 find_first_dataset=result_spec.find_first_dataset,
201 )
202 sql_select = builder.select()
203 if result_spec.order_by:
204 visitor = SqlColumnVisitor(builder.joiner, self)
205 sql_select = sql_select.order_by(*[visitor.expect_scalar(term) for term in result_spec.order_by])
206 if result_spec.limit is not None:
207 if builder.postprocessing:
208 builder.postprocessing.limit = result_spec.limit
209 else:
210 sql_select = sql_select.limit(result_spec.limit)
211 if builder.postprocessing.limit is not None:
212 # We might want to fetch many fewer rows than the default page
213 # size if we have to implement limit in postprocessing.
214 raw_page_size = min(
215 self._postprocessing_filter_factor * builder.postprocessing.limit,
216 self._raw_page_size,
217 )
218 else:
219 raw_page_size = self._raw_page_size
220 # Execute the query by initializing a _Cursor object that manages the
221 # lifetime of the result.
222 cursor = _Cursor(
223 self.db,
224 sql_select,
225 result_spec=result_spec,
226 name_shrinker=builder.joiner.name_shrinker,
227 postprocessing=builder.postprocessing,
228 raw_page_size=raw_page_size,
229 )
230 result_page = cursor.next()
231 if result_page.next_key is not None:
232 # Cursor has not been exhausted; add it to the driver for use by
233 # fetch_next_page.
234 self._cursors[result_page.next_key] = cursor
235 return result_page
237 @overload
238 def fetch_next_page( 238 ↛ exitline 238 didn't jump to the function exit
239 self, result_spec: DataCoordinateResultSpec, key: PageKey
240 ) -> DataCoordinateResultPage: ...
242 @overload
243 def fetch_next_page( 243 ↛ exitline 243 didn't jump to the function exit
244 self, result_spec: DimensionRecordResultSpec, key: PageKey
245 ) -> DimensionRecordResultPage: ...
247 @overload
248 def fetch_next_page(self, result_spec: DatasetRefResultSpec, key: PageKey) -> DatasetRefResultPage: ... 248 ↛ exitline 248 didn't return from function 'fetch_next_page', because
250 @overload
251 def fetch_next_page(self, result_spec: GeneralResultSpec, key: PageKey) -> GeneralResultPage: ... 251 ↛ exitline 251 didn't return from function 'fetch_next_page', because
253 def fetch_next_page(self, result_spec: ResultSpec, key: PageKey) -> ResultPage:
254 # Docstring inherited.
255 try:
256 cursor = self._cursors.pop(key)
257 except KeyError:
258 raise RuntimeError("Cannot continue query result iteration after the query context has closed.")
259 result_page = cursor.next()
260 if result_page.next_key is not None:
261 self._cursors[result_page.next_key] = cursor
262 return result_page
264 def materialize(
265 self,
266 tree: qt.QueryTree,
267 dimensions: DimensionGroup,
268 datasets: frozenset[str],
269 key: qt.MaterializationKey | None = None,
270 ) -> qt.MaterializationKey:
271 # Docstring inherited.
272 if self._exit_stack is None:
273 raise RuntimeError("QueryDriver context must be entered before 'materialize' is called.")
274 _, builder = self.build_query(tree, qt.ColumnSet(dimensions))
275 # Current implementation ignores 'datasets' aside from remembering
276 # them, because figuring out what to put in the temporary table for
277 # them is tricky, especially if calibration collections are involved.
278 # That's okay because:
279 #
280 # - the query whose results we materialize includes the dataset
281 # searches as constraints;
282 #
283 # - we still (in Query.materialize) join the dataset searches back in
284 # anyway, and given materialized data IDs the join to the dataset
285 # search is straightforward and definitely well-indexed, and not much
286 # (if at all) worse than joining back in on a materialized UUID.
287 #
288 sql_select = builder.select()
289 table = self._exit_stack.enter_context(self.db.temporary_table(builder.make_table_spec()))
290 self.db.insert(table, select=sql_select)
291 if key is None:
292 key = uuid.uuid4()
293 self._materializations[key] = _MaterializationState(table, datasets, builder.postprocessing)
294 return key
296 def upload_data_coordinates(
297 self,
298 dimensions: DimensionGroup,
299 rows: Iterable[tuple[DataIdValue, ...]],
300 key: qt.DataCoordinateUploadKey | None = None,
301 ) -> qt.DataCoordinateUploadKey:
302 # Docstring inherited.
303 if self._exit_stack is None:
304 raise RuntimeError(
305 "QueryDriver context must be entered before 'upload_data_coordinates' is called."
306 )
307 columns = qt.ColumnSet(dimensions).drop_implied_dimension_keys()
308 table_spec = ddl.TableSpec(
309 [columns.get_column_spec(logical_table, field).to_sql_spec() for logical_table, field in columns]
310 )
311 dict_rows: list[dict[str, Any]]
312 if not columns:
313 table_spec.fields.add(
314 ddl.FieldSpec(
315 QueryBuilder.EMPTY_COLUMNS_NAME, dtype=QueryBuilder.EMPTY_COLUMNS_TYPE, nullable=True
316 )
317 )
318 dict_rows = [{QueryBuilder.EMPTY_COLUMNS_NAME: None}]
319 else:
320 dict_rows = [dict(zip(dimensions.required, values)) for values in rows]
321 from_clause: sqlalchemy.FromClause
322 if len(dict_rows) > self._constant_rows_limit:
323 from_clause = self._exit_stack.enter_context(self.db.temporary_table(table_spec))
324 self.db.insert(from_clause, *dict_rows)
325 else:
326 from_clause = self.db.constant_rows(table_spec.fields, *dict_rows)
327 if key is None:
328 key = uuid.uuid4()
329 self._upload_tables[key] = from_clause
330 return key
332 def count(
333 self,
334 tree: qt.QueryTree,
335 result_spec: ResultSpec,
336 *,
337 exact: bool,
338 discard: bool,
339 ) -> int:
340 # Docstring inherited.
341 columns = result_spec.get_result_columns()
342 plan, builder = self.build_query(tree, columns, find_first_dataset=result_spec.find_first_dataset)
343 if not all(d.collection_records for d in plan.joins.datasets.values()):
344 return 0
345 if not exact:
346 builder.postprocessing = Postprocessing()
347 if builder.postprocessing:
348 if not discard:
349 raise InvalidQueryError("Cannot count query rows exactly without discarding them.")
350 sql_select = builder.select()
351 builder.postprocessing.limit = result_spec.limit
352 n = 0
353 with self.db.query(sql_select.execution_options(yield_per=self._raw_page_size)) as results:
354 for _ in builder.postprocessing.apply(results):
355 n += 1
356 return n
357 # If the query has DISTINCT or GROUP BY, nest it in a subquery so we
358 # count deduplicated rows.
359 builder = builder.nested()
360 # Replace the columns of the query with just COUNT(*).
361 builder.columns = qt.ColumnSet(self._universe.empty.as_group())
362 count_func: sqlalchemy.ColumnElement[int] = sqlalchemy.func.count()
363 builder.joiner.special["_ROWCOUNT"] = count_func
364 # Render and run the query.
365 sql_select = builder.select()
366 with self.db.query(sql_select) as result:
367 count = cast(int, result.scalar())
368 if result_spec.limit is not None:
369 count = min(count, result_spec.limit)
370 return count
372 def any(self, tree: qt.QueryTree, *, execute: bool, exact: bool) -> bool:
373 # Docstring inherited.
374 plan, builder = self.build_query(tree, qt.ColumnSet(tree.dimensions))
375 if not all(d.collection_records for d in plan.joins.datasets.values()):
376 return False
377 if not execute:
378 if exact:
379 raise InvalidQueryError("Cannot obtain exact result for 'any' without executing.")
380 return True
381 if builder.postprocessing and exact:
382 sql_select = builder.select()
383 with self.db.query(
384 sql_select.execution_options(yield_per=self._postprocessing_filter_factor)
385 ) as result:
386 for _ in builder.postprocessing.apply(result):
387 return True
388 return False
389 sql_select = builder.select().limit(1)
390 with self.db.query(sql_select) as result:
391 return result.first() is not None
393 def explain_no_results(self, tree: qt.QueryTree, execute: bool) -> Iterable[str]:
394 # Docstring inherited.
395 plan, _ = self.analyze_query(tree, qt.ColumnSet(tree.dimensions))
396 if plan.joins.messages or not execute:
397 return plan.joins.messages
398 # TODO: guess at ways to split up query that might fail or succeed if
399 # run separately, execute them with LIMIT 1 and report the results.
400 return []
402 def get_dataset_type(self, name: str) -> DatasetType:
403 # Docstring inherited
404 return self.managers.datasets[name].datasetType
406 def get_default_collections(self) -> tuple[str, ...]:
407 # Docstring inherited.
408 if not self._default_collections:
409 raise NoDefaultCollectionError("No collections provided and no default collections.")
410 return self._default_collections
412 def build_query(
413 self,
414 tree: qt.QueryTree,
415 final_columns: qt.ColumnSet,
416 order_by: Iterable[qt.OrderExpression] = (),
417 find_first_dataset: str | None = None,
418 ) -> tuple[QueryPlan, QueryBuilder]:
419 """Convert a query description into a mostly-completed `QueryBuilder`.
421 Parameters
422 ----------
423 tree : `.queries.tree.QueryTree`
424 Description of the joins and row filters in the query.
425 final_columns : `.queries.tree.ColumnSet`
426 Final output columns that should be emitted by the SQL query.
427 order_by : `~collections.abc.Iterable` [ \
428 `.queries.tree.OrderExpression` ], optional
429 Column expressions to sort by.
430 find_first_dataset : `str` or `None`, optional
431 Name of a dataset type for which only one result row for each data
432 ID should be returned, with the colletions searched in order.
434 Returns
435 -------
436 plan : `QueryPlan`
437 Plan used to transform the query into SQL, including some
438 information (e.g. diagnostics about doomed-to-fail dataset
439 searches) that isn't transferred into the builder itself.
440 builder : `QueryBuilder`
441 Builder object that can be used to create a SQL SELECT via its
442 `~QueryBuilder.select` method. We return this instead of a
443 `sqlalchemy.Select` object itself to allow different methods to
444 customize the SELECT clause itself (e.g. `count` can replace the
445 columns selected with ``COUNT(*)``).
446 """
447 # See the QueryPlan docs for an overview of what these stages of query
448 # construction do.
449 plan, builder = self.analyze_query(tree, final_columns, order_by, find_first_dataset)
450 self.apply_query_joins(plan.joins, builder.joiner)
451 self.apply_query_projection(plan.projection, builder)
452 builder = self.apply_query_find_first(plan.find_first, builder)
453 builder.columns = plan.final_columns
454 return plan, builder
456 def analyze_query(
457 self,
458 tree: qt.QueryTree,
459 final_columns: qt.ColumnSet,
460 order_by: Iterable[qt.OrderExpression] = (),
461 find_first_dataset: str | None = None,
462 ) -> tuple[QueryPlan, QueryBuilder]:
463 """Construct a plan for building a query and initialize a builder.
465 Parameters
466 ----------
467 tree : `.queries.tree.QueryTree`
468 Description of the joins and row filters in the query.
469 final_columns : `.queries.tree.ColumnSet`
470 Final output columns that should be emitted by the SQL query.
471 order_by : `~collections.abc.Iterable` [ \
472 `.queries.tree.OrderExpression` ], optional
473 Column expressions to sort by.
474 find_first_dataset : `str` or `None`, optional
475 Name of a dataset type for which only one result row for each data
476 ID should be returned, with the colletions searched in order.
478 Returns
479 -------
480 plan : `QueryPlan`
481 Plan used to transform the query into SQL, including some
482 information (e.g. diagnostics about doomed-to-fail dataset
483 searches) that isn't transferred into the builder itself.
484 builder : `QueryBuilder`
485 Builder object initialized with overlap joins and constraints
486 potentially included, with the remainder still present in
487 `QueryJoinPlans.predicate`.
488 """
489 # The fact that this method returns both a QueryPlan and an initial
490 # QueryBuilder (rather than just a QueryPlan) is a tradeoff that lets
491 # DimensionRecordStorageManager.process_query_overlaps (which is called
492 # by the `_analyze_query_tree` call below) pull out overlap expressions
493 # from the predicate at the same time it turns them into SQL table
494 # joins (in the builder).
495 joins_plan, builder = self._analyze_query_tree(tree)
497 # The "projection" columns differ from the final columns by not
498 # omitting any dimension keys (this keeps queries for different result
499 # types more similar during construction), including any columns needed
500 # only by order_by terms, and including the collection key if we need
501 # it for GROUP BY or DISTINCT.
502 projection_plan = QueryProjectionPlan(
503 final_columns.copy(), joins_plan.datasets, find_first_dataset=find_first_dataset
504 )
505 projection_plan.columns.restore_dimension_keys()
506 for term in order_by:
507 term.gather_required_columns(projection_plan.columns)
508 # The projection gets interesting if it does not have all of the
509 # dimension keys or dataset fields of the "joins" stage, because that
510 # means it needs to do a GROUP BY or DISTINCT ON to get unique rows.
511 if projection_plan.columns.dimensions != joins_plan.columns.dimensions:
512 assert projection_plan.columns.dimensions.issubset(joins_plan.columns.dimensions)
513 # We're going from a larger set of dimensions to a smaller set,
514 # that means we'll be doing a SELECT DISTINCT [ON] or GROUP BY.
515 projection_plan.needs_dimension_distinct = True
516 for dataset_type, fields_for_dataset in joins_plan.columns.dataset_fields.items():
517 if not projection_plan.columns.dataset_fields[dataset_type]:
518 # The "joins"-stage query has one row for each collection for
519 # each data ID, but the projection-stage query just wants
520 # one row for each data ID.
521 if len(joins_plan.datasets[dataset_type].collection_records) > 1:
522 projection_plan.needs_dataset_distinct = True
523 break
524 # If there are any dataset fields being propagated through that
525 # projection and there is more than one collection, we need to
526 # include the collection_key column so we can use that as one of
527 # the DISTINCT or GROUP BY columns.
528 for dataset_type, fields_for_dataset in projection_plan.columns.dataset_fields.items():
529 if len(joins_plan.datasets[dataset_type].collection_records) > 1:
530 fields_for_dataset.add("collection_key")
531 if projection_plan:
532 # If there's a projection and we're doing postprocessing, we might
533 # be collapsing the dimensions of the postprocessing regions. When
534 # that happens, we want to apply an aggregate function to them that
535 # computes the union of the regions that are grouped together.
536 for element in builder.postprocessing.iter_missing(projection_plan.columns):
537 if element.name not in projection_plan.columns.dimensions.elements:
538 projection_plan.region_aggregates.append(element)
540 # The joins-stage query also needs to include all columns needed by the
541 # downstream projection query. Note that this:
542 # - never adds new dimensions to the joins stage (since those are
543 # always a superset of the projection-stage dimensions);
544 # - does not affect our determination of
545 # projection_plan.needs_dataset_distinct, because any dataset fields
546 # being added to the joins stage here are already in the projection.
547 joins_plan.columns.update(projection_plan.columns)
549 find_first_plan = None
550 if find_first_dataset is not None:
551 find_first_plan = QueryFindFirstPlan(joins_plan.datasets[find_first_dataset])
552 # If we're doing a find-first search and there's a calibration
553 # collection in play, we need to make sure the rows coming out of
554 # the base query have only one timespan for each data ID +
555 # collection, and we can only do that with a GROUP BY and COUNT
556 # that we inspect in postprocessing.
557 if find_first_plan.search.is_calibration_search:
558 builder.postprocessing.check_validity_match_count = True
559 plan = QueryPlan(
560 joins=joins_plan,
561 projection=projection_plan,
562 find_first=find_first_plan,
563 final_columns=final_columns,
564 )
565 return plan, builder
567 def apply_query_joins(self, plan: QueryJoinsPlan, joiner: QueryJoiner) -> None:
568 """Modify a `QueryJoiner` to include all tables and other FROM and
569 WHERE clause terms needed.
571 Parameters
572 ----------
573 plan : `QueryJoinPlan`
574 Component of a `QueryPlan` relevant for the "joins" stage.
575 joiner : `QueryJoiner`
576 Component of a `QueryBuilder` that holds the FROM and WHERE
577 clauses. This is expected to be initialized by `analyze_query`
578 and will be modified in-place on return.
579 """
580 # Process data coordinate upload joins.
581 for upload_key, upload_dimensions in plan.data_coordinate_uploads.items():
582 joiner.join(
583 QueryJoiner(self.db, self._upload_tables[upload_key]).extract_dimensions(
584 upload_dimensions.required
585 )
586 )
587 # Process materialization joins. We maintain a set of dataset types
588 # that were included in a materialization; searches for these datasets
589 # can be dropped if they are only present to provide a constraint on
590 # data IDs, since that's already embedded in a materialization.
591 materialized_datasets: set[str] = set()
592 for materialization_key, materialization_dimensions in plan.materializations.items():
593 materialized_datasets.update(
594 self._join_materialization(joiner, materialization_key, materialization_dimensions)
595 )
596 # Process dataset joins.
597 for dataset_search in plan.datasets.values():
598 self._join_dataset_search(
599 joiner,
600 dataset_search,
601 plan.columns.dataset_fields[dataset_search.name],
602 )
603 # Join in dimension element tables that we know we need relationships
604 # or columns from.
605 for element in plan.iter_mandatory():
606 joiner.join(
607 self.managers.dimensions.make_query_joiner(
608 element, plan.columns.dimension_fields[element.name]
609 )
610 )
611 # See if any dimension keys are still missing, and if so join in their
612 # tables. Note that we know there are no fields needed from these.
613 while not (joiner.dimension_keys.keys() >= plan.columns.dimensions.names):
614 # Look for opportunities to join in multiple dimensions via single
615 # table, to reduce the total number of tables joined in.
616 missing_dimension_names = plan.columns.dimensions.names - joiner.dimension_keys.keys()
617 best = self._universe[
618 max(
619 missing_dimension_names,
620 key=lambda name: len(self._universe[name].dimensions.names & missing_dimension_names),
621 )
622 ]
623 joiner.join(self.managers.dimensions.make_query_joiner(best, frozenset()))
624 # Add the WHERE clause to the joiner.
625 joiner.where(plan.predicate.visit(SqlColumnVisitor(joiner, self)))
627 def apply_query_projection(self, plan: QueryProjectionPlan, builder: QueryBuilder) -> None:
628 """Modify `QueryBuilder` to reflect the "projection" stage of query
629 construction, which can involve a GROUP BY or DISTINCT [ON] clause
630 that enforces uniqueness.
632 Parameters
633 ----------
634 plan : `QueryProjectionPlan`
635 Component of a `QueryPlan` relevant for the "projection" stage.
636 builder : `QueryBuilder`
637 Builder object that will be modified in place. Expected to be
638 initialized by `analyze_query` and further modified by
639 `apply_query_joins`.
640 """
641 builder.columns = plan.columns
642 if not plan and not builder.postprocessing.check_validity_match_count:
643 # Rows are already unique; nothing else to do in this method.
644 return
645 # This method generates either a SELECT DISTINCT [ON] or a SELECT with
646 # GROUP BY. We'll work out which as we go.
647 have_aggregates: bool = False
648 # Dimension key columns form at least most of our GROUP BY or DISTINCT
649 # ON clause.
650 unique_keys: list[sqlalchemy.ColumnElement[Any]] = [
651 builder.joiner.dimension_keys[k][0] for k in plan.columns.dimensions.data_coordinate_keys
652 ]
653 # There are two reasons we might need an aggregate function:
654 # - to make sure temporal constraints and joins have resulted in at
655 # most one validity range match for each data ID and collection,
656 # when we're doing a find-first query.
657 # - to compute the unions of regions we need for postprocessing, when
658 # the data IDs for those regions are not wholly included in the
659 # results (i.e. we need to postprocess on
660 # visit_detector_region.region, but the output rows don't have
661 # detector, just visit - so we compute the union of the
662 # visit_detector region over all matched detectors).
663 if builder.postprocessing.check_validity_match_count:
664 builder.joiner.special[builder.postprocessing.VALIDITY_MATCH_COUNT] = (
665 sqlalchemy.func.count().label(builder.postprocessing.VALIDITY_MATCH_COUNT)
666 )
667 have_aggregates = True
668 for element in plan.region_aggregates:
669 builder.joiner.fields[element.name]["region"] = ddl.Base64Region.union_aggregate(
670 builder.joiner.fields[element.name]["region"]
671 )
672 have_aggregates = True
673 # Many of our fields derive their uniqueness from the unique_key
674 # fields: if rows are uniqe over the 'unique_key' fields, then they're
675 # automatically unique over these 'derived_fields'. We just remember
676 # these as pairs of (logical_table, field) for now.
677 derived_fields: list[tuple[str, str]] = []
678 # All dimension record fields are derived fields.
679 for element_name, fields_for_element in plan.columns.dimension_fields.items():
680 for element_field in fields_for_element:
681 derived_fields.append((element_name, element_field))
682 # Some dataset fields are derived fields and some are unique keys, and
683 # it depends on the kinds of collection(s) we're searching and whether
684 # it's a find-first query.
685 for dataset_type, fields_for_dataset in plan.columns.dataset_fields.items():
686 for dataset_field in fields_for_dataset:
687 if dataset_field == "collection_key":
688 # If the collection_key field is present, it's needed for
689 # uniqueness if we're looking in more than one collection.
690 # If not, it's a derived field.
691 if len(plan.datasets[dataset_type].collection_records) > 1:
692 unique_keys.append(builder.joiner.fields[dataset_type]["collection_key"])
693 else:
694 derived_fields.append((dataset_type, "collection_key"))
695 elif dataset_field == "timespan" and plan.datasets[dataset_type].is_calibration_search:
696 # If we're doing a non-find-first query against a
697 # CALIBRATION collection, the timespan is also a unique
698 # key...
699 if dataset_type == plan.find_first_dataset:
700 # ...unless we're doing a find-first search on this
701 # dataset, in which case we need to use ANY_VALUE on
702 # the timespan and check that _VALIDITY_MATCH_COUNT
703 # (added earlier) is one, indicating that there was
704 # indeed only one timespan for each data ID in each
705 # collection that survived the base query's WHERE
706 # clauses and JOINs.
707 if not self.db.has_any_aggregate:
708 raise NotImplementedError(
709 f"Cannot generate query that returns {dataset_type}.timespan after a "
710 "find-first search, because this a database does not support the ANY_VALUE "
711 "aggregate function (or equivalent)."
712 )
713 builder.joiner.timespans[dataset_type] = builder.joiner.timespans[
714 dataset_type
715 ].apply_any_aggregate(self.db.apply_any_aggregate)
716 else:
717 unique_keys.extend(builder.joiner.timespans[dataset_type].flatten())
718 else:
719 # Other dataset fields derive their uniqueness from key
720 # fields.
721 derived_fields.append((dataset_type, dataset_field))
722 if not have_aggregates and not derived_fields:
723 # SELECT DISTINCT is sufficient.
724 builder.distinct = True
725 elif not have_aggregates and self.db.has_distinct_on:
726 # SELECT DISTINCT ON is sufficient and supported by this database.
727 builder.distinct = unique_keys
728 else:
729 # GROUP BY is the only option.
730 if derived_fields:
731 if self.db.has_any_aggregate:
732 for logical_table, field in derived_fields:
733 if field == "timespan":
734 builder.joiner.timespans[logical_table] = builder.joiner.timespans[
735 logical_table
736 ].apply_any_aggregate(self.db.apply_any_aggregate)
737 else:
738 builder.joiner.fields[logical_table][field] = self.db.apply_any_aggregate(
739 builder.joiner.fields[logical_table][field]
740 )
741 else:
742 _LOG.warning(
743 "Adding %d fields to GROUP BY because this database backend does not support the "
744 "ANY_VALUE aggregate function (or equivalent). This may result in a poor query "
745 "plan. Materializing the query first sometimes avoids this problem.",
746 len(derived_fields),
747 )
748 for logical_table, field in derived_fields:
749 if field == "timespan":
750 unique_keys.extend(builder.joiner.timespans[logical_table].flatten())
751 else:
752 unique_keys.append(builder.joiner.fields[logical_table][field])
753 builder.group_by = unique_keys
755 def apply_query_find_first(self, plan: QueryFindFirstPlan | None, builder: QueryBuilder) -> QueryBuilder:
756 """Modify an under-construction SQL query to return only one row for
757 each data ID, searching collections in order.
759 Parameters
760 ----------
761 plan : `QueryFindFirstPlan` or `None`
762 Component of a `QueryPlan` relevant for the "find first" stage.
763 builder : `QueryBuilder`
764 Builder object as produced by `apply_query_projection`. This
765 object should be considered to be consumed by this method - the
766 same instance may or may not be returned, and if it is not
767 returned, its state is not defined.
769 Returns
770 -------
771 builder : `QueryBuilder`
772 Modified query builder that includes the find-first resolution, if
773 one was needed.
774 """
775 if not plan:
776 return builder
777 # The query we're building looks like this:
778 #
779 # WITH {dst}_base AS (
780 # {target}
781 # ...
782 # )
783 # SELECT
784 # {dst}_window.*,
785 # FROM (
786 # SELECT
787 # {dst}_base.*,
788 # ROW_NUMBER() OVER (
789 # PARTITION BY {dst_base}.{dimensions}
790 # ORDER BY {rank}
791 # ) AS rownum
792 # ) {dst}_window
793 # WHERE
794 # {dst}_window.rownum = 1;
795 #
796 # The outermost SELECT will be represented by the QueryBuilder we
797 # return. The QueryBuilder we're given corresponds to the Common Table
798 # Expression (CTE) at the top.
799 #
800 # For SQLite only, we could use a much simpler GROUP BY instead,
801 # because it extends the standard to do exactly what we want when MIN
802 # or MAX appears once and a column does not have an aggregate function
803 # (https://www.sqlite.org/quirks.html). But since that doesn't work
804 # with PostgreSQL it doesn't help us.
805 #
806 builder = builder.nested(cte=True, force=True)
807 # We start by filling out the "window" SELECT statement...
808 partition_by = [builder.joiner.dimension_keys[d][0] for d in builder.columns.dimensions.required]
809 rank_sql_column = sqlalchemy.case(
810 {record.key: n for n, record in enumerate(plan.search.collection_records)},
811 value=builder.joiner.fields[plan.dataset_type]["collection_key"],
812 )
813 if partition_by:
814 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over(
815 partition_by=partition_by, order_by=rank_sql_column
816 )
817 else:
818 builder.joiner.special["_ROWNUM"] = sqlalchemy.sql.func.row_number().over(
819 order_by=rank_sql_column
820 )
821 # ... and then turn that into a subquery with a constraint on rownum.
822 builder = builder.nested(force=True)
823 # We can now add the WHERE constraint on rownum into the outer query.
824 builder.joiner.where(builder.joiner.special["_ROWNUM"] == 1)
825 # Don't propagate _ROWNUM into downstream queries.
826 del builder.joiner.special["_ROWNUM"]
827 return builder
829 def _analyze_query_tree(self, tree: qt.QueryTree) -> tuple[QueryJoinsPlan, QueryBuilder]:
830 """Start constructing a plan for building a query from a
831 `.queries.tree.QueryTree`.
833 Parameters
834 ----------
835 tree : `.queries.tree.QueryTree`
836 Description of the joins and row filters in the query.
838 Returns
839 -------
840 plan : `QueryJoinsPlan`
841 Initial component of the plan relevant for the "joins" stage,
842 including all joins and columns needed by ``tree``. Additional
843 columns will be added to this plan later.
844 builder : `QueryBuilder`
845 Builder object initialized with overlap joins and constraints
846 potentially included, with the remainder still present in
847 `QueryJoinPlans.predicate`.
848 """
849 # Delegate to the dimensions manager to rewrite the predicate and start
850 # a QueryBuilder to cover any spatial overlap joins or constraints.
851 # We'll return that QueryBuilder at the end.
852 (
853 predicate,
854 builder,
855 ) = self.managers.dimensions.process_query_overlaps(
856 tree.dimensions,
857 tree.predicate,
858 tree.get_joined_dimension_groups(),
859 )
860 result = QueryJoinsPlan(predicate=predicate, columns=builder.columns)
861 # Add columns required by postprocessing.
862 builder.postprocessing.gather_columns_required(result.columns)
863 # We also check that the predicate doesn't reference any dimensions
864 # without constraining their governor dimensions, since that's a
865 # particularly easy mistake to make and it's almost never intentional.
866 # We also allow the registry data ID values to provide governor values.
867 where_columns = qt.ColumnSet(self.universe.empty.as_group())
868 result.predicate.gather_required_columns(where_columns)
869 for governor in where_columns.dimensions.governors:
870 if governor not in result.constraint_data_id:
871 if governor in self._default_data_id.dimensions:
872 result.constraint_data_id[governor] = self._default_data_id[governor]
873 else:
874 raise InvalidQueryError(
875 f"Query 'where' expression references a dimension dependent on {governor} without "
876 "constraining it directly."
877 )
878 # Add materializations, which can also bring in more postprocessing.
879 for m_key, m_dimensions in tree.materializations.items():
880 m_state = self._materializations[m_key]
881 result.materializations[m_key] = m_dimensions
882 # When a query is materialized, the new tree has an empty
883 # (trivially true) predicate because the original was used to make
884 # the materialized rows. But the original postprocessing isn't
885 # executed when the materialization happens, so we have to include
886 # it here.
887 builder.postprocessing.spatial_join_filtering.extend(
888 m_state.postprocessing.spatial_join_filtering
889 )
890 builder.postprocessing.spatial_where_filtering.extend(
891 m_state.postprocessing.spatial_where_filtering
892 )
893 # Add data coordinate uploads.
894 result.data_coordinate_uploads.update(tree.data_coordinate_uploads)
895 # Add dataset_searches and filter out collections that don't have the
896 # right dataset type or governor dimensions.
897 for dataset_type_name, dataset_search in tree.datasets.items():
898 resolved_dataset_search = self._resolve_dataset_search(
899 dataset_type_name, dataset_search, result.constraint_data_id
900 )
901 result.datasets[dataset_type_name] = resolved_dataset_search
902 if not resolved_dataset_search.collection_records:
903 result.messages.append(f"Search for dataset type {dataset_type_name!r} is doomed to fail.")
904 result.messages.extend(resolved_dataset_search.messages)
905 return result, builder
907 def _resolve_dataset_search(
908 self,
909 dataset_type_name: str,
910 dataset_search: qt.DatasetSearch,
911 constraint_data_id: Mapping[str, DataIdValue],
912 ) -> ResolvedDatasetSearch:
913 """Resolve the collections that should actually be searched for
914 datasets of a particular type.
916 Parameters
917 ----------
918 dataset_type_name : `str`
919 Name of the dataset being searched for.
920 dataset_search : `.queries.tree.DatasetSearch`
921 Struct holding the dimensions and original collection search path.
922 constraint_data_id : `~collections.abc.Mapping`
923 Data ID mapping derived from the query predicate that may be used
924 to filter out some collections based on their governor dimensions.
926 Returns
927 -------
928 resolved : `ResolvedDatasetSearch`
929 Struct that extends `dataset_search`` with the dataset type name
930 and resolved collection records.
931 """
932 result = ResolvedDatasetSearch(dataset_type_name, dataset_search.dimensions)
933 for collection_record, collection_summary in self._resolve_collection_path(
934 dataset_search.collections
935 ):
936 rejected: bool = False
937 if result.name not in collection_summary.dataset_types.names:
938 result.messages.append(
939 f"No datasets of type {result.name!r} in collection {collection_record.name!r}."
940 )
941 rejected = True
942 for governor in constraint_data_id.keys() & collection_summary.governors.keys():
943 if constraint_data_id[governor] not in collection_summary.governors[governor]:
944 result.messages.append(
945 f"No datasets with {governor}={constraint_data_id[governor]!r} "
946 f"in collection {collection_record.name!r}."
947 )
948 rejected = True
949 if not rejected:
950 if collection_record.type is CollectionType.CALIBRATION:
951 result.is_calibration_search = True
952 result.collection_records.append(collection_record)
953 if result.dimensions != self.get_dataset_type(dataset_type_name).dimensions.as_group():
954 # This is really for server-side defensiveness; it's hard to
955 # imagine the query getting different dimensions for a dataset
956 # type in two calls to the same query driver.
957 raise InvalidQueryError(
958 f"Incorrect dimensions {result.dimensions} for dataset {dataset_type_name} "
959 f"in query (vs. {self.get_dataset_type(dataset_type_name).dimensions.as_group()})."
960 )
961 return result
963 def _resolve_collection_path(
964 self, collections: Iterable[str]
965 ) -> list[tuple[CollectionRecord, CollectionSummary]]:
966 """Expand an ordered iterable of collection names into a list of
967 collection records and summaries.
969 Parameters
970 ----------
971 collections : `~collections.abc.Iterable` [ `str` ]
972 Ordered iterable of collections.
974 Returns
975 -------
976 resolved : `list` [ `tuple` [ `.registry.interfaces.CollectionRecord`,\
977 `.registry.CollectionSummary` ] ]
978 Tuples of collection record and summary. `~CollectionType.CHAINED`
979 collections are flattened out and not included.
980 """
981 result: list[tuple[CollectionRecord, CollectionSummary]] = []
982 done: set[str] = set()
984 # Eventually we really want this recursive Python code to be replaced
985 # by a recursive SQL query, especially if we extend this method to
986 # support collection glob patterns to support public APIs we don't yet
987 # have in the new query system (but will need to add).
989 def recurse(collection_names: Iterable[str]) -> None:
990 for collection_name in collection_names:
991 if collection_name not in done:
992 done.add(collection_name)
993 record = self.managers.collections.find(collection_name)
995 if record.type is CollectionType.CHAINED:
996 recurse(cast(ChainedCollectionRecord, record).children)
997 else:
998 result.append((record, self.managers.datasets.getCollectionSummary(record)))
1000 recurse(collections)
1002 return result
1004 def _join_materialization(
1005 self,
1006 joiner: QueryJoiner,
1007 key: qt.MaterializationKey,
1008 dimensions: DimensionGroup,
1009 ) -> frozenset[str]:
1010 """Join a materialization into an under-construction query.
1012 Parameters
1013 ----------
1014 joiner : `QueryJoiner`
1015 Component of a `QueryBuilder` that holds the FROM and WHERE
1016 clauses. This will be modified in-place on return.
1017 key : `.queries.tree.MaterializationKey`
1018 Unique identifier created for this materialization when it was
1019 created.
1020 dimensions : `DimensionGroup`
1021 Dimensions of the materialization.
1023 Returns
1024 -------
1025 datasets : `frozenset` [ `str` ]
1026 Dataset types that were included as constraints when this
1027 materialization was created.
1028 """
1029 columns = qt.ColumnSet(dimensions)
1030 m_state = self._materializations[key]
1031 joiner.join(QueryJoiner(self.db, m_state.table).extract_columns(columns, m_state.postprocessing))
1032 return m_state.datasets
1034 def _join_dataset_search(
1035 self,
1036 joiner: QueryJoiner,
1037 resolved_search: ResolvedDatasetSearch,
1038 fields: Set[str],
1039 ) -> None:
1040 """Join a dataset search into an under-construction query.
1042 Parameters
1043 ----------
1044 joiner : `QueryJoiner`
1045 Component of a `QueryBuilder` that holds the FROM and WHERE
1046 clauses. This will be modified in-place on return.
1047 resolved_search : `ResolvedDatasetSearch`
1048 Struct that describes the dataset type and collections.
1049 fields : `~collections.abc.Set` [ `str` ]
1050 Dataset fields to include.
1051 """
1052 storage = self.managers.datasets[resolved_search.name]
1053 # The next two asserts will need to be dropped (and the implications
1054 # dealt with instead) if materializations start having dataset fields.
1055 assert (
1056 resolved_search.name not in joiner.fields
1057 ), "Dataset fields have unexpectedly already been joined in."
1058 assert (
1059 resolved_search.name not in joiner.timespans
1060 ), "Dataset timespan has unexpectedly already been joined in."
1061 joiner.join(storage.make_query_joiner(resolved_search.collection_records, fields))
1064@dataclasses.dataclass
1065class _MaterializationState:
1066 table: sqlalchemy.Table
1067 datasets: frozenset[str]
1068 postprocessing: Postprocessing
1071class _Cursor:
1072 """A helper class for managing paged query results and cursor lifetimes.
1074 This class holds a context manager for the SQLAlchemy cursor object but is
1075 not itself a context manager. It always cleans up (i.e. calls its `close`
1076 method) when it raises an exception or exhausts the cursor, but external
1077 code is responsible for calling `close` when the cursor is abandoned before
1078 it is exhausted, including when that happens due to an external exception.
1080 Parameters
1081 ----------
1082 db : `.registry.interface.Database`
1083 Database to run the query against.
1084 sql : `sqlalchemy.Executable`
1085 SQL query to execute.
1086 result : `ResultSpec`
1087 Specification of the result type.
1088 name_shrinker : `NameShrinker` or `None`
1089 Object that was used to shrink dataset column names to fit within the
1090 database identifier limit.
1091 postprocessing : `Postprocessing`
1092 Post-query filtering and checks to perform.
1093 raw_page_size : `int`
1094 Maximum number of SQL result rows to return in each page, before
1095 postprocessing.
1096 """
1098 def __init__(
1099 self,
1100 db: Database,
1101 sql: sqlalchemy.Executable,
1102 result_spec: ResultSpec,
1103 name_shrinker: NameShrinker | None,
1104 postprocessing: Postprocessing,
1105 raw_page_size: int,
1106 ):
1107 self._result_spec = result_spec
1108 self._name_shrinker = name_shrinker
1109 self._raw_page_size = raw_page_size
1110 self._postprocessing = postprocessing
1111 self._timespan_repr_cls = db.getTimespanRepresentation()
1112 self._context = db.query(sql, execution_options=dict(yield_per=raw_page_size))
1113 cursor = self._context.__enter__()
1114 try:
1115 self._iterator = cursor.partitions()
1116 except: # noqa: E722
1117 self._context.__exit__(*sys.exc_info())
1118 raise
1120 def close(self, exc_type: Any = None, exc_value: Any = None, traceback: Any = None) -> None:
1121 """Close this cursor.
1123 Parameters
1124 ----------
1125 exc_type : `type`
1126 Exception type as obtained from `sys.exc_info`, or `None` if there
1127 was no error.
1128 exc_value : `BaseException` or `None`
1129 Exception instance as obtained from `sys.exc_info`, or `None` if
1130 there was no error.
1131 traceback : `object`
1132 Traceback as obtained from `sys.exc_info`, or `None` if there was
1133 no error.
1134 """
1135 self._context.__exit__(exc_type, exc_value, traceback)
1137 def next(self) -> ResultPage:
1138 """Return the next result page from this query.
1140 When there are no more results after this result page, the `next_page`
1141 attribute of the returned object is `None` and the cursor will be
1142 closed. The cursor is also closed if this method raises an exception.
1143 """
1144 try:
1145 raw_page = next(self._iterator, tuple())
1146 if len(raw_page) == self._raw_page_size:
1147 # There's some chance we got unlucky and this page exactly
1148 # finishes off the query, and we won't know the next page does
1149 # not exist until we try to fetch it. But that's better than
1150 # always fetching the next page up front.
1151 next_key = uuid.uuid4()
1152 else:
1153 next_key = None
1154 self.close()
1156 postprocessed_rows = self._postprocessing.apply(raw_page)
1157 match self._result_spec:
1158 case DimensionRecordResultSpec():
1159 return self._convert_dimension_record_results(postprocessed_rows, next_key)
1160 case _:
1161 raise NotImplementedError("TODO")
1162 except: # noqa: E722
1163 self._context.__exit__(*sys.exc_info())
1164 raise
1166 def _convert_dimension_record_results(
1167 self,
1168 raw_rows: Iterable[sqlalchemy.Row],
1169 next_key: PageKey | None,
1170 ) -> DimensionRecordResultPage:
1171 """Convert a raw SQL result iterable into a page of `DimensionRecord`
1172 query results.
1174 Parameters
1175 ----------
1176 raw_rows : `~collections.abc.Iterable` [ `sqlalchemy.Row` ]
1177 Iterable of SQLAlchemy rows, with `Postprocessing` filters already
1178 applied.
1179 next_key : `PageKey` or `None`
1180 Key for the next page to add into the returned page object.
1182 Returns
1183 -------
1184 result_page : `DimensionRecordResultPage`
1185 Page object that holds a `DimensionRecord` container.
1186 """
1187 result_spec = cast(DimensionRecordResultSpec, self._result_spec)
1188 record_set = DimensionRecordSet(result_spec.element)
1189 record_cls = result_spec.element.RecordClass
1190 if isinstance(result_spec.element, SkyPixDimension):
1191 pixelization = result_spec.element.pixelization
1192 id_qualified_name = qt.ColumnSet.get_qualified_name(result_spec.element.name, None)
1193 for raw_row in raw_rows:
1194 pixel_id = raw_row._mapping[id_qualified_name]
1195 record_set.add(record_cls(id=pixel_id, region=pixelization.pixel(pixel_id)))
1196 else:
1197 # Mapping from DimensionRecord attribute name to qualified column
1198 # name, but as a list of tuples since we'd just iterate over items
1199 # anyway.
1200 column_map = list(
1201 zip(
1202 result_spec.element.schema.dimensions.names,
1203 result_spec.element.dimensions.names,
1204 )
1205 )
1206 for field in result_spec.element.schema.remainder.names:
1207 if field != "timespan":
1208 column_map.append(
1209 (field, qt.ColumnSet.get_qualified_name(result_spec.element.name, field))
1210 )
1211 if result_spec.element.temporal:
1212 timespan_qualified_name = qt.ColumnSet.get_qualified_name(
1213 result_spec.element.name, "timespan"
1214 )
1215 else:
1216 timespan_qualified_name = None
1217 for raw_row in raw_rows:
1218 m = raw_row._mapping
1219 d = {k: m[v] for k, v in column_map}
1220 if timespan_qualified_name is not None:
1221 d["timespan"] = self._timespan_repr_cls.extract(m, name=timespan_qualified_name)
1222 record_set.add(record_cls(**d))
1223 return DimensionRecordResultPage(spec=result_spec, next_key=next_key, rows=record_set)