Coverage for python/lsst/daf/butler/registry/queries/_query_context.py: 54%
71 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("QueryContext",)
31from abc import abstractmethod
32from collections.abc import Iterable, Set
33from contextlib import AbstractContextManager
34from typing import Any
36import lsst.sphgeom
37from lsst.daf.relation import (
38 ColumnExpression,
39 ColumnTag,
40 Engine,
41 EngineError,
42 Predicate,
43 Processor,
44 Relation,
45 UnaryOperation,
46 iteration,
47)
49from ...core import DataCoordinate, DimensionKeyColumnTag, SkyPixDimension, Timespan
52class QueryContext(Processor, AbstractContextManager["QueryContext"]):
53 """A context manager interface for query operations that require some
54 connection-like state.
56 Notes
57 -----
58 `QueryContext` implementations are usually paired with a `QueryBackend`
59 implementation, with the division of responsibilities as follows:
61 - `QueryContext` implements the `lsst.daf.relation.Processor` interface,
62 and is hence responsible for executing multi-engine relation trees.
64 - `QueryContext` manages all state whose lifetime is a single query or set
65 of related queries (e.g. temporary tables) via its context manager
66 interface. Methods that do not involve this state should not require the
67 context manager to have been entered.
69 - `QueryContext` objects should be easily to construct by registry helper
70 code that doesn't have access to the full `Registry` data structure
71 itself, while `QueryBackend` instances can generally only be constructed
72 by code that does see essentially the full registry (for example,
73 `SqlQueryBackend` holds a `RegistryManagerInstances` struct, while
74 `SqlQueryContext` can be constructed with just a `Database` and
75 `ColumnTypeInfo`).
77 - `QueryBackend.context` is a factory for the associated `QueryContext`
78 type.
80 - `QueryBackend` methods that return relations accept the `QueryContext`
81 returned by its `~QueryBackend.context` method in case those methods
82 require state that should be cleaned up after the query is complete.
83 """
85 def __init__(self) -> None:
86 self.iteration_engine = iteration.Engine()
87 self.iteration_engine.functions[regions_overlap.__name__] = regions_overlap
89 iteration_engine: iteration.Engine
90 """The relation engine that all relations must ultimately be transferred
91 to in order to be executed by this context.
92 """
94 @property
95 def preferred_engine(self) -> Engine:
96 """Return the relation engine that this context prefers to execute
97 operations in (`lsst.daf.relation.Engine`).
98 """
99 return self.iteration_engine
101 @property
102 @abstractmethod
103 def is_open(self) -> bool:
104 """Whether the context manager has been entered (`bool`)."""
105 raise NotImplementedError()
107 def make_initial_relation(self, relation: Relation | None = None) -> Relation:
108 """Construct an initial relation suitable for this context.
110 Parameters
111 ----------
112 relation : `Relation`, optional
113 A user-provided initial relation. Must be included by
114 implementations when provided, but may be modified (e.g. by adding
115 a transfer to a new engine) when need to satisfy the context's
116 invariants.
117 """
118 if relation is None:
119 return self.preferred_engine.make_join_identity_relation()
120 return relation
122 def fetch_iterable(self, relation: Relation) -> iteration.RowIterable:
123 """Execute the given relation and return its rows as an iterable of
124 mappings.
126 Parameters
127 ----------
128 relation : `Relation`
129 Relation representing the query to execute.
131 Returns
132 -------
133 rows : `~lsst.daf.relation.iteration.RowIterable`
134 An iterable over rows, with each row a mapping from `ColumnTag`
135 to column value.
137 Notes
138 -----
139 A transfer to `iteration_engine` will be added to the root (end) of the
140 relation tree if the root is not already in the iteration engine.
142 Any transfers from other engines or persistent materializations will be
143 handled by delegating to `process` before execution in the iteration
144 engine.
146 To ensure the result is a multi-pass Python collection in memory,
147 ensure the given tree ends with a materialization operation in the
148 iteration engine.
149 """
150 # This transfer does nothing if the relation is already in the
151 # iteration engine.
152 relation = relation.transferred_to(self.iteration_engine)
153 relation = self.process(relation)
154 return self.iteration_engine.execute(relation)
156 @abstractmethod
157 def count(self, relation: Relation, *, exact: bool = True, discard: bool = False) -> int:
158 """Count the number of rows in the given relation.
160 Parameters
161 ----------
162 relation : `Relation`
163 Relation whose rows are to be counted.
164 exact : `bool`, optional
165 If `True` (default), return the exact number of rows. If `False`,
166 returning an upper bound is permitted if it can be done much more
167 efficiently, e.g. by running a SQL ``SELECT COUNT(*)`` query but
168 ignoring client-side filtering that would otherwise take place.
169 discard : `bool`, optional
170 If `True`, compute the exact count even if it would require running
171 the full query and then throwing away the result rows after
172 counting them. If `False`, this is an error, as the user would
173 usually be better off executing the query first to fetch its rows
174 into a new query (or passing ``exact=False``). Ignored if
175 ``exact=False``.
177 Returns
178 -------
179 n_rows : `int`
180 Number of rows in the relation, or an upper bound. This includes
181 duplicates, if there are any.
183 Raises
184 ------
185 RuntimeError
186 Raised if an exact count was requested and could not be obtained
187 without fetching and discarding rows.
188 """
189 raise NotImplementedError()
191 @abstractmethod
192 def any(self, relation: Relation, *, execute: bool = True, exact: bool = True) -> bool:
193 """Check whether this relation has any result rows at all.
195 Parameters
196 ----------
197 relation : `Relation`
198 Relation to be checked.
199 execute : `bool`, optional
200 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
201 determined prior to execution that the query would return no rows.
202 exact : `bool`, optional
203 If `True`, run the full query and perform post-query filtering if
204 needed, until at least one result row is found. If `False`, the
205 returned result does not account for post-query filtering, and
206 hence may be `True` even when all result rows would be filtered
207 out.
209 Returns
210 -------
211 any_rows : `bool`
212 Whether the relation has any rows, or if it may have any rows if
213 ``exact=False``.
215 Raises
216 ------
217 RuntimeError
218 Raised if an exact check was requested and could not be obtained
219 without executing the query.
220 """
221 raise NotImplementedError()
223 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> Any:
224 # Docstring inherited from lsst.daf.relation.Processor.
225 raise NotImplementedError("No transfers expected by base QueryContext implementation.")
227 def materialize(self, base: Relation, name: str) -> Any:
228 # Docstring inherited from lsst.daf.relation.Processor.
229 if base.engine == self.iteration_engine:
230 return self.iteration_engine.execute(base).materialized()
231 raise EngineError(f"Unexpected engine {base.engine} for base QueryContext implementation.")
233 @abstractmethod
234 def restore_columns(
235 self,
236 relation: Relation,
237 columns_required: Set[ColumnTag],
238 ) -> tuple[Relation, set[ColumnTag]]:
239 """Return a modified relation tree that attempts to restore columns
240 that were dropped by a projection operation.
242 Parameters
243 ----------
244 relation : `Relation`
245 Original relation tree.
246 columns_required : `~collections.abc.Set` [ `ColumnTag` ]
247 Columns to attempt to restore. May include columns already
248 present in the relation.
250 Returns
251 -------
252 modified : `Relation`
253 Possibly-modified tree with any projections that had dropped
254 requested columns replaced by projections that do not drop these
255 columns. Care is taken to ensure that join common columns and
256 deduplication behavior is preserved, even if that means some
257 columns are not restored.
258 columns_found : `set` [ `ColumnTag` ]
259 Columns from those requested that are present in ``modified``.
260 """
261 raise NotImplementedError()
263 @abstractmethod
264 def strip_postprocessing(self, relation: Relation) -> tuple[Relation, list[UnaryOperation]]:
265 """Return a modified relation tree without any iteration-engine
266 operations and any transfer to the iteration engine at the end.
268 Parameters
269 ----------
270 relation : `Relation`
271 Original relation tree.
273 Returns
274 -------
275 modified : `Relation`
276 Stripped relation tree, with engine != `iteration_engine`.
277 stripped : `UnaryOperation`
278 Operations that were stripped, in the same order they should be
279 reapplied (with ``transfer=True,
280 preferred_engine=iteration_engine``) to recover the original tree.
281 """
282 raise NotImplementedError()
284 @abstractmethod
285 def drop_invalidated_postprocessing(self, relation: Relation, new_columns: Set[ColumnTag]) -> Relation:
286 """Return a modified relation tree without iteration-engine operations
287 that require columns that are not in the given set.
289 Parameters
290 ----------
291 relation : `Relation`
292 Original relation tree.
293 new_columns : `~collections.abc.Set` [ `ColumnTag` ]
294 The only columns that postprocessing operations may require if they
295 are to be retained in the returned relation tree.
297 Returns
298 -------
299 modified : `Relation`
300 Modified relation tree with postprocessing operations incompatible
301 with ``new_columns`` removed.
302 """
303 raise NotImplementedError()
305 def make_data_coordinate_predicate(
306 self,
307 data_coordinate: DataCoordinate,
308 full: bool | None = None,
309 ) -> Predicate:
310 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
311 represents a data ID constraint.
313 Parameters
314 ----------
315 data_coordinate : `DataCoordinate`
316 Data ID whose keys and values should be transformed to predicate
317 equality constraints.
318 full : `bool`, optional
319 Whether to include constraints on implied dimensions (default is to
320 include implied dimensions if ``data_coordinate`` has them).
322 Returns
323 -------
324 predicate : `lsst.daf.relation.column_expressions.Predicate`
325 New predicate
326 """
327 if full is None:
328 full = data_coordinate.hasFull()
329 dimensions = data_coordinate.graph.required if not full else data_coordinate.graph.dimensions
330 terms: list[Predicate] = []
331 for dimension in dimensions:
332 dtype = dimension.primaryKey.getPythonType()
333 terms.append(
334 ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=dtype).eq(
335 ColumnExpression.literal(data_coordinate[dimension.name], dtype=dtype)
336 )
337 )
338 return Predicate.logical_and(*terms)
340 def make_spatial_region_skypix_predicate(
341 self,
342 dimension: SkyPixDimension,
343 region: lsst.sphgeom.Region,
344 ) -> Predicate:
345 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
346 tests whether two region columns overlap.
348 This operation only works with `iteration engines
349 <lsst.daf.relation.iteration.Engine>`; it is usually used to refine the
350 result of a join on `SkyPixDimension` columns in SQL.
352 Parameters
353 ----------
354 dimension : `SkyPixDimension`
355 Dimension whose key column is being constrained.
356 region : `lsst.sphgeom.Region`
357 Spatial region constraint to test against.
359 Returns
360 -------
361 predicate : `lsst.daf.relation.column_expressions.Predicate`
362 New predicate with the `DimensionKeyColumn` associated with
363 ``dimension`` as its only required column.
364 """
365 ref = ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=int)
366 terms: list[Predicate] = []
367 for begin, end in dimension.pixelization.envelope(region):
368 if begin + 1 == end:
369 terms.append(ref.eq(ColumnExpression.literal(begin, dtype=int)))
370 else:
371 terms.append(
372 ref.ge(ColumnExpression.literal(begin, dtype=int)).logical_and(
373 ref.lt(ColumnExpression.literal(end, dtype=int))
374 )
375 )
376 return Predicate.logical_or(*terms)
378 def make_spatial_region_overlap_predicate(
379 self,
380 lhs: ColumnExpression,
381 rhs: ColumnExpression,
382 ) -> Predicate:
383 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
384 tests whether two regions overlap.
386 This operation only works with
387 `iteration engines <lsst.daf.relation.iteration.Engine>`; it is usually
388 used to refine the result of a join or constraint on `SkyPixDimension`
389 columns in SQL.
391 Parameters
392 ----------
393 lhs : `lsst.daf.relation.column_expressions.ColumnExpression`
394 Expression for one spatial region.
395 rhs : `lsst.daf.relation.column_expressions.ColumnExpression`
396 Expression for the other spatial region.
398 Returns
399 -------
400 predicate : `lsst.daf.relation.column_expressions.Predicate`
401 New predicate with ``lhs`` and ``rhs`` as its required columns.
402 """
403 return lhs.predicate_method(regions_overlap.__name__, rhs, supporting_engine_types={iteration.Engine})
405 def make_timespan_overlap_predicate(
406 self,
407 tag: ColumnTag,
408 timespan: Timespan,
409 ) -> Predicate:
410 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
411 tests whether a timespan column overlaps a timespan literal.
413 Parameters
414 ----------
415 tag : `ColumnTag`
416 Identifier for a timespan column.
417 timespan : `Timespan`
418 Timespan literal selected rows must overlap.
420 Returns
421 -------
422 predicate : `lsst.daf.relation.column_expressions.Predicate`
423 New predicate.
424 """
425 return ColumnExpression.reference(tag, dtype=Timespan).predicate_method(
426 "overlaps", ColumnExpression.literal(timespan)
427 )
429 def make_data_id_relation(
430 self, data_ids: Set[DataCoordinate], dimension_names: Iterable[str]
431 ) -> Relation:
432 """Transform a set of data IDs into a relation.
434 Parameters
435 ----------
436 data_ids : `~collections.abc.Set` [ `DataCoordinate` ]
437 Data IDs to upload. All must have at least the dimensions given,
438 but may have more.
439 dimension_names : `~collections.abc.Iterable` [ `str` ]
440 Names of dimensions that will be the columns of the relation.
442 Returns
443 -------
444 relation : `Relation`
445 Relation in the iteration engine.
446 """
447 tags = DimensionKeyColumnTag.generate(dimension_names)
448 payload = iteration.RowSequence(
449 [{tag: data_id[tag.dimension] for tag in tags} for data_id in data_ids]
450 ).to_mapping(tags)
451 return self.iteration_engine.make_leaf(frozenset(tags), payload, name_prefix="upload")
454def regions_overlap(a: lsst.sphgeom.Region, b: lsst.sphgeom.Region) -> bool:
455 """Test whether a pair of regions overlap.
457 Parameters
458 ----------
459 a : `lsst.sphgeom.Region`
460 One region.
461 b : `lsst.sphgeom.Region`
462 The other region.
464 Returns
465 -------
466 overlap : `bool`
467 Whether the regions overlap.
468 """
469 return not (a.relate(b) & lsst.sphgeom.DISJOINT)