Coverage for python/lsst/daf/butler/registry/queries/_query_context.py: 55%
74 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 10:00 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 10:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("QueryContext",)
31from abc import abstractmethod
32from collections.abc import Iterable, Set
33from contextlib import AbstractContextManager
34from typing import Any
36import lsst.sphgeom
37from lsst.daf.relation import (
38 ColumnExpression,
39 ColumnTag,
40 Engine,
41 EngineError,
42 Predicate,
43 Processor,
44 Relation,
45 UnaryOperation,
46 iteration,
47)
49from ..._column_tags import DimensionKeyColumnTag
50from ..._timespan import Timespan
51from ...dimensions import DataCoordinate, SkyPixDimension
54class QueryContext(Processor, AbstractContextManager["QueryContext"]):
55 """A context manager interface for query operations that require some
56 connection-like state.
58 Notes
59 -----
60 `QueryContext` implementations are usually paired with a `QueryBackend`
61 implementation, with the division of responsibilities as follows:
63 - `QueryContext` implements the `lsst.daf.relation.Processor` interface,
64 and is hence responsible for executing multi-engine relation trees.
66 - `QueryContext` manages all state whose lifetime is a single query or set
67 of related queries (e.g. temporary tables) via its context manager
68 interface. Methods that do not involve this state should not require the
69 context manager to have been entered.
71 - `QueryContext` objects should be easily to construct by registry helper
72 code that doesn't have access to the full `Registry` data structure
73 itself, while `QueryBackend` instances can generally only be constructed
74 by code that does see essentially the full registry (for example,
75 `SqlQueryBackend` holds a `RegistryManagerInstances` struct, while
76 `SqlQueryContext` can be constructed with just a `Database` and
77 `ColumnTypeInfo`).
79 - `QueryBackend.context` is a factory for the associated `QueryContext`
80 type.
82 - `QueryBackend` methods that return relations accept the `QueryContext`
83 returned by its `~QueryBackend.context` method in case those methods
84 require state that should be cleaned up after the query is complete.
85 """
87 def __init__(self) -> None:
88 self.iteration_engine = iteration.Engine()
89 self.iteration_engine.functions[regions_overlap.__name__] = regions_overlap
91 iteration_engine: iteration.Engine
92 """The relation engine that all relations must ultimately be transferred
93 to in order to be executed by this context.
94 """
96 @property
97 def preferred_engine(self) -> Engine:
98 """Return the relation engine that this context prefers to execute
99 operations in (`lsst.daf.relation.Engine`).
100 """
101 return self.iteration_engine
103 @property
104 @abstractmethod
105 def is_open(self) -> bool:
106 """Whether the context manager has been entered (`bool`)."""
107 raise NotImplementedError()
109 def make_initial_relation(self, relation: Relation | None = None) -> Relation:
110 """Construct an initial relation suitable for this context.
112 Parameters
113 ----------
114 relation : `Relation`, optional
115 A user-provided initial relation. Must be included by
116 implementations when provided, but may be modified (e.g. by adding
117 a transfer to a new engine) when need to satisfy the context's
118 invariants.
119 """
120 if relation is None:
121 return self.preferred_engine.make_join_identity_relation()
122 return relation
124 def fetch_iterable(self, relation: Relation) -> iteration.RowIterable:
125 """Execute the given relation and return its rows as an iterable of
126 mappings.
128 Parameters
129 ----------
130 relation : `Relation`
131 Relation representing the query to execute.
133 Returns
134 -------
135 rows : `~lsst.daf.relation.iteration.RowIterable`
136 An iterable over rows, with each row a mapping from `ColumnTag`
137 to column value.
139 Notes
140 -----
141 A transfer to `iteration_engine` will be added to the root (end) of the
142 relation tree if the root is not already in the iteration engine.
144 Any transfers from other engines or persistent materializations will be
145 handled by delegating to `process` before execution in the iteration
146 engine.
148 To ensure the result is a multi-pass Python collection in memory,
149 ensure the given tree ends with a materialization operation in the
150 iteration engine.
151 """
152 # This transfer does nothing if the relation is already in the
153 # iteration engine.
154 relation = relation.transferred_to(self.iteration_engine)
155 relation = self.process(relation)
156 return self.iteration_engine.execute(relation)
158 @abstractmethod
159 def count(self, relation: Relation, *, exact: bool = True, discard: bool = False) -> int:
160 """Count the number of rows in the given relation.
162 Parameters
163 ----------
164 relation : `Relation`
165 Relation whose rows are to be counted.
166 exact : `bool`, optional
167 If `True` (default), return the exact number of rows. If `False`,
168 returning an upper bound is permitted if it can be done much more
169 efficiently, e.g. by running a SQL ``SELECT COUNT(*)`` query but
170 ignoring client-side filtering that would otherwise take place.
171 discard : `bool`, optional
172 If `True`, compute the exact count even if it would require running
173 the full query and then throwing away the result rows after
174 counting them. If `False`, this is an error, as the user would
175 usually be better off executing the query first to fetch its rows
176 into a new query (or passing ``exact=False``). Ignored if
177 ``exact=False``.
179 Returns
180 -------
181 n_rows : `int`
182 Number of rows in the relation, or an upper bound. This includes
183 duplicates, if there are any.
185 Raises
186 ------
187 RuntimeError
188 Raised if an exact count was requested and could not be obtained
189 without fetching and discarding rows.
190 """
191 raise NotImplementedError()
193 @abstractmethod
194 def any(self, relation: Relation, *, execute: bool = True, exact: bool = True) -> bool:
195 """Check whether this relation has any result rows at all.
197 Parameters
198 ----------
199 relation : `Relation`
200 Relation to be checked.
201 execute : `bool`, optional
202 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
203 determined prior to execution that the query would return no rows.
204 exact : `bool`, optional
205 If `True`, run the full query and perform post-query filtering if
206 needed, until at least one result row is found. If `False`, the
207 returned result does not account for post-query filtering, and
208 hence may be `True` even when all result rows would be filtered
209 out.
211 Returns
212 -------
213 any_rows : `bool`
214 Whether the relation has any rows, or if it may have any rows if
215 ``exact=False``.
217 Raises
218 ------
219 RuntimeError
220 Raised if an exact check was requested and could not be obtained
221 without executing the query.
222 """
223 raise NotImplementedError()
225 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> Any:
226 # Docstring inherited from lsst.daf.relation.Processor.
227 raise NotImplementedError("No transfers expected by base QueryContext implementation.")
229 def materialize(self, base: Relation, name: str) -> Any:
230 # Docstring inherited from lsst.daf.relation.Processor.
231 if base.engine == self.iteration_engine:
232 return self.iteration_engine.execute(base).materialized()
233 raise EngineError(f"Unexpected engine {base.engine} for base QueryContext implementation.")
235 @abstractmethod
236 def restore_columns(
237 self,
238 relation: Relation,
239 columns_required: Set[ColumnTag],
240 ) -> tuple[Relation, set[ColumnTag]]:
241 """Return a modified relation tree that attempts to restore columns
242 that were dropped by a projection operation.
244 Parameters
245 ----------
246 relation : `Relation`
247 Original relation tree.
248 columns_required : `~collections.abc.Set` [ `ColumnTag` ]
249 Columns to attempt to restore. May include columns already
250 present in the relation.
252 Returns
253 -------
254 modified : `Relation`
255 Possibly-modified tree with any projections that had dropped
256 requested columns replaced by projections that do not drop these
257 columns. Care is taken to ensure that join common columns and
258 deduplication behavior is preserved, even if that means some
259 columns are not restored.
260 columns_found : `set` [ `ColumnTag` ]
261 Columns from those requested that are present in ``modified``.
262 """
263 raise NotImplementedError()
265 @abstractmethod
266 def strip_postprocessing(self, relation: Relation) -> tuple[Relation, list[UnaryOperation]]:
267 """Return a modified relation tree without any iteration-engine
268 operations and any transfer to the iteration engine at the end.
270 Parameters
271 ----------
272 relation : `Relation`
273 Original relation tree.
275 Returns
276 -------
277 modified : `Relation`
278 Stripped relation tree, with engine != `iteration_engine`.
279 stripped : `UnaryOperation`
280 Operations that were stripped, in the same order they should be
281 reapplied (with ``transfer=True,
282 preferred_engine=iteration_engine``) to recover the original tree.
283 """
284 raise NotImplementedError()
286 @abstractmethod
287 def drop_invalidated_postprocessing(self, relation: Relation, new_columns: Set[ColumnTag]) -> Relation:
288 """Return a modified relation tree without iteration-engine operations
289 that require columns that are not in the given set.
291 Parameters
292 ----------
293 relation : `Relation`
294 Original relation tree.
295 new_columns : `~collections.abc.Set` [ `ColumnTag` ]
296 The only columns that postprocessing operations may require if they
297 are to be retained in the returned relation tree.
299 Returns
300 -------
301 modified : `Relation`
302 Modified relation tree with postprocessing operations incompatible
303 with ``new_columns`` removed.
304 """
305 raise NotImplementedError()
307 def make_data_coordinate_predicate(
308 self,
309 data_coordinate: DataCoordinate,
310 full: bool | None = None,
311 ) -> Predicate:
312 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
313 represents a data ID constraint.
315 Parameters
316 ----------
317 data_coordinate : `DataCoordinate`
318 Data ID whose keys and values should be transformed to predicate
319 equality constraints.
320 full : `bool`, optional
321 Whether to include constraints on implied dimensions (default is to
322 include implied dimensions if ``data_coordinate`` has them).
324 Returns
325 -------
326 predicate : `lsst.daf.relation.column_expressions.Predicate`
327 New predicate.
328 """
329 if full is None:
330 full = data_coordinate.hasFull()
331 dimension_names = (
332 data_coordinate.required if not full else data_coordinate.dimensions.data_coordinate_keys
333 )
334 terms: list[Predicate] = []
335 for dimension_name in dimension_names:
336 dimension = data_coordinate.universe.dimensions[dimension_name]
337 dtype = dimension.primaryKey.getPythonType()
338 terms.append(
339 ColumnExpression.reference(DimensionKeyColumnTag(dimension_name), dtype=dtype).eq(
340 ColumnExpression.literal(data_coordinate[dimension_name], dtype=dtype)
341 )
342 )
343 return Predicate.logical_and(*terms)
345 def make_spatial_region_skypix_predicate(
346 self,
347 dimension: SkyPixDimension,
348 region: lsst.sphgeom.Region,
349 ) -> Predicate:
350 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
351 tests whether two region columns overlap.
353 This operation only works with `iteration engines
354 <lsst.daf.relation.iteration.Engine>`; it is usually used to refine the
355 result of a join on `SkyPixDimension` columns in SQL.
357 Parameters
358 ----------
359 dimension : `SkyPixDimension`
360 Dimension whose key column is being constrained.
361 region : `lsst.sphgeom.Region`
362 Spatial region constraint to test against.
364 Returns
365 -------
366 predicate : `lsst.daf.relation.column_expressions.Predicate`
367 New predicate with the `DimensionKeyColumn` associated with
368 ``dimension`` as its only required column.
369 """
370 ref = ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=int)
371 terms: list[Predicate] = []
372 for begin, end in dimension.pixelization.envelope(region):
373 if begin + 1 == end:
374 terms.append(ref.eq(ColumnExpression.literal(begin, dtype=int)))
375 else:
376 terms.append(
377 ref.ge(ColumnExpression.literal(begin, dtype=int)).logical_and(
378 ref.lt(ColumnExpression.literal(end, dtype=int))
379 )
380 )
381 return Predicate.logical_or(*terms)
383 def make_spatial_region_overlap_predicate(
384 self,
385 lhs: ColumnExpression,
386 rhs: ColumnExpression,
387 ) -> Predicate:
388 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
389 tests whether two regions overlap.
391 This operation only works with
392 `iteration engines <lsst.daf.relation.iteration.Engine>`; it is usually
393 used to refine the result of a join or constraint on `SkyPixDimension`
394 columns in SQL.
396 Parameters
397 ----------
398 lhs : `lsst.daf.relation.column_expressions.ColumnExpression`
399 Expression for one spatial region.
400 rhs : `lsst.daf.relation.column_expressions.ColumnExpression`
401 Expression for the other spatial region.
403 Returns
404 -------
405 predicate : `lsst.daf.relation.column_expressions.Predicate`
406 New predicate with ``lhs`` and ``rhs`` as its required columns.
407 """
408 return lhs.predicate_method(regions_overlap.__name__, rhs, supporting_engine_types={iteration.Engine})
410 def make_timespan_overlap_predicate(
411 self,
412 tag: ColumnTag,
413 timespan: Timespan,
414 ) -> Predicate:
415 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
416 tests whether a timespan column overlaps a timespan literal.
418 Parameters
419 ----------
420 tag : `ColumnTag`
421 Identifier for a timespan column.
422 timespan : `Timespan`
423 Timespan literal selected rows must overlap.
425 Returns
426 -------
427 predicate : `lsst.daf.relation.column_expressions.Predicate`
428 New predicate.
429 """
430 return ColumnExpression.reference(tag, dtype=Timespan).predicate_method(
431 "overlaps", ColumnExpression.literal(timespan)
432 )
434 def make_data_id_relation(
435 self, data_ids: Set[DataCoordinate], dimension_names: Iterable[str]
436 ) -> Relation:
437 """Transform a set of data IDs into a relation.
439 Parameters
440 ----------
441 data_ids : `~collections.abc.Set` [ `DataCoordinate` ]
442 Data IDs to upload. All must have at least the dimensions given,
443 but may have more.
444 dimension_names : `~collections.abc.Iterable` [ `str` ]
445 Names of dimensions that will be the columns of the relation.
447 Returns
448 -------
449 relation : `Relation`
450 Relation in the iteration engine.
451 """
452 tags = DimensionKeyColumnTag.generate(dimension_names)
453 payload = iteration.RowSequence(
454 [{tag: data_id[tag.dimension] for tag in tags} for data_id in data_ids]
455 ).to_mapping(tags)
456 return self.iteration_engine.make_leaf(frozenset(tags), payload, name_prefix="upload")
459def regions_overlap(a: lsst.sphgeom.Region, b: lsst.sphgeom.Region) -> bool:
460 """Test whether a pair of regions overlap.
462 Parameters
463 ----------
464 a : `lsst.sphgeom.Region`
465 One region.
466 b : `lsst.sphgeom.Region`
467 The other region.
469 Returns
470 -------
471 overlap : `bool`
472 Whether the regions overlap.
473 """
474 return not (a.relate(b) & lsst.sphgeom.DISJOINT)