Coverage for python/lsst/daf/butler/registry/queries/_query_context.py: 41%
78 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-02 14:18 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-02 14:18 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryContext",)
25from abc import abstractmethod
26from collections.abc import Iterable, Set
27from contextlib import AbstractContextManager
28from typing import Any
30import lsst.sphgeom
31from lsst.daf.relation import (
32 ColumnExpression,
33 ColumnTag,
34 Engine,
35 EngineError,
36 Predicate,
37 Processor,
38 Relation,
39 UnaryOperation,
40 iteration,
41)
43from ...core import DataCoordinate, DimensionKeyColumnTag, SkyPixDimension, Timespan
46class QueryContext(Processor, AbstractContextManager["QueryContext"]):
47 """A context manager interface for query operations that require some
48 connection-like state.
50 Notes
51 -----
52 `QueryContext` implementations are usually paired with a `QueryBackend`
53 implementation, with the division of responsibilities as follows:
55 - `QueryContext` implements the `lsst.daf.relation.Processor` interface,
56 and is hence responsible for executing multi-engine relation trees.
58 - `QueryContext` manages all state whose lifetime is a single query or set
59 of related queries (e.g. temporary tables) via its context manager
60 interface. Methods that do not involve this state should not require the
61 context manager to have been entered.
63 - `QueryContext` objects should be easily to construct by registry helper
64 code that doesn't have access to the full `Registry` data structure
65 itself, while `QueryBackend` instances can generally only be constructed
66 by code that does see essentially the full registry (for example,
67 `SqlQueryBackend` holds a `RegistryManagerInstances` struct, while
68 `SqlQueryContext` can be constructed with just a `Database` and
69 `ColumnTypeInfo`).
71 - `QueryBackend.context` is a factory for the associated `QueryContext`
72 type.
74 - `QueryBackend` methods that return relations accept the `QueryContext`
75 returned by its `~QueryBackend.context` method in case those methods
76 require state that should be cleaned up after the query is complete.
77 """
79 def __init__(self) -> None:
80 self.iteration_engine = iteration.Engine()
81 self.iteration_engine.functions[regions_overlap.__name__] = regions_overlap
83 iteration_engine: iteration.Engine
84 """The relation engine that all relations must ultimately be transferred
85 to in order to be executed by this context.
86 """
88 @property
89 def preferred_engine(self) -> Engine:
90 """Return the relation engine that this context prefers to execute
91 operations in (`lsst.daf.relation.Engine`).
92 """
93 return self.iteration_engine
95 @property
96 @abstractmethod
97 def is_open(self) -> bool:
98 """Whether the context manager has been entered (`bool`)."""
99 raise NotImplementedError()
101 def make_initial_relation(self, relation: Relation | None = None) -> Relation:
102 """Construct an initial relation suitable for this context.
104 Parameters
105 ----------
106 relation : `Relation`, optional
107 A user-provided initial relation. Must be included by
108 implementations when provided, but may be modified (e.g. by adding
109 a transfer to a new engine) when need to satisfy the context's
110 invariants.
111 """
112 if relation is None:
113 return self.preferred_engine.make_join_identity_relation()
114 return relation
116 def fetch_iterable(self, relation: Relation) -> iteration.RowIterable:
117 """Execute the given relation and return its rows as an iterable of
118 mappings.
120 Parameters
121 ----------
122 relation : `Relation`
123 Relation representing the query to execute.
125 Returns
126 -------
127 rows : `~lsst.daf.relation.iteration.RowIterable`
128 An iterable over rows, with each row a mapping from `ColumnTag`
129 to column value.
131 Notes
132 -----
133 A transfer to `iteration_engine` will be added to the root (end) of the
134 relation tree if the root is not already in the iteration engine.
136 Any transfers from other engines or persistent materializations will be
137 handled by delegating to `process` before execution in the iteration
138 engine.
140 To ensure the result is a multi-pass Python collection in memory,
141 ensure the given tree ends with a materialization operation in the
142 iteration engine.
143 """
144 # This transfer does nothing if the relation is already in the
145 # iteration engine.
146 relation = relation.transferred_to(self.iteration_engine)
147 relation = self.process(relation)
148 return self.iteration_engine.execute(relation)
150 @abstractmethod
151 def count(self, relation: Relation, *, exact: bool = True, discard: bool = False) -> int:
152 """Count the number of rows in the given relation.
154 Parameters
155 ----------
156 relation : `Relation`
157 Relation whose rows are to be counted.
158 exact : `bool`, optional
159 If `True` (default), return the exact number of rows. If `False`,
160 returning an upper bound is permitted if it can be done much more
161 efficiently, e.g. by running a SQL ``SELECT COUNT(*)`` query but
162 ignoring client-side filtering that would otherwise take place.
163 discard : `bool`, optional
164 If `True`, compute the exact count even if it would require running
165 the full query and then throwing away the result rows after
166 counting them. If `False`, this is an error, as the user would
167 usually be better off executing the query first to fetch its rows
168 into a new query (or passing ``exact=False``). Ignored if
169 ``exact=False``.
171 Returns
172 -------
173 n_rows : `int`
174 Number of rows in the relation, or an upper bound. This includes
175 duplicates, if there are any.
177 Raises
178 ------
179 RuntimeError
180 Raised if an exact count was requested and could not be obtained
181 without fetching and discarding rows.
182 """
183 raise NotImplementedError()
185 @abstractmethod
186 def any(self, relation: Relation, *, execute: bool = True, exact: bool = True) -> bool:
187 """Check whether this relation has any result rows at all.
189 Parameters
190 ----------
191 relation : `Relation`
192 Relation to be checked.
193 execute : `bool`, optional
194 If `True`, execute at least a ``LIMIT 1`` query if it cannot be
195 determined prior to execution that the query would return no rows.
196 exact : `bool`, optional
197 If `True`, run the full query and perform post-query filtering if
198 needed, until at least one result row is found. If `False`, the
199 returned result does not account for post-query filtering, and
200 hence may be `True` even when all result rows would be filtered
201 out.
203 Returns
204 -------
205 any_rows : `bool`
206 Whether the relation has any rows, or if it may have any rows if
207 ``exact=False``.
209 Raises
210 ------
211 RuntimeError
212 Raised if an exact check was requested and could not be obtained
213 without executing the query.
214 """
215 raise NotImplementedError()
217 def transfer(self, source: Relation, destination: Engine, materialize_as: str | None) -> Any:
218 # Docstring inherited from lsst.daf.relation.Processor.
219 raise NotImplementedError("No transfers expected by base QueryContext implementation.")
221 def materialize(self, base: Relation, name: str) -> Any:
222 # Docstring inherited from lsst.daf.relation.Processor.
223 if base.engine == self.iteration_engine:
224 return self.iteration_engine.execute(base).materialized()
225 raise EngineError(f"Unexpected engine {base.engine} for base QueryContext implementation.")
227 @abstractmethod
228 def restore_columns(
229 self,
230 relation: Relation,
231 columns_required: Set[ColumnTag],
232 ) -> tuple[Relation, set[ColumnTag]]:
233 """Return a modified relation tree that attempts to restore columns
234 that were dropped by a projection operation.
236 Parameters
237 ----------
238 relation : `Relation`
239 Original relation tree.
240 columns_required : `~collections.abc.Set` [ `ColumnTag` ]
241 Columns to attempt to restore. May include columns already
242 present in the relation.
244 Returns
245 -------
246 modified : `Relation`
247 Possibly-modified tree with any projections that had dropped
248 requested columns replaced by projections that do not drop these
249 columns. Care is taken to ensure that join common columns and
250 deduplication behavior is preserved, even if that means some
251 columns are not restored.
252 columns_found : `set` [ `ColumnTag` ]
253 Columns from those requested that are present in ``modified``.
254 """
255 raise NotImplementedError()
257 @abstractmethod
258 def strip_postprocessing(self, relation: Relation) -> tuple[Relation, list[UnaryOperation]]:
259 """Return a modified relation tree without any iteration-engine
260 operations and any transfer to the iteration engine at the end.
262 Parameters
263 ----------
264 relation : `Relation`
265 Original relation tree.
267 Returns
268 -------
269 modified : `Relation`
270 Stripped relation tree, with engine != `iteration_engine`.
271 stripped : `UnaryOperation`
272 Operations that were stripped, in the same order they should be
273 reapplied (with ``transfer=True,
274 preferred_engine=iteration_engine``) to recover the original tree.
275 """
276 raise NotImplementedError()
278 @abstractmethod
279 def drop_invalidated_postprocessing(self, relation: Relation, new_columns: Set[ColumnTag]) -> Relation:
280 """Return a modified relation tree without iteration-engine operations
281 that require columns that are not in the given set.
283 Parameters
284 ----------
285 relation : `Relation`
286 Original relation tree.
287 new_columns : `~collections.abc.Set` [ `ColumnTag` ]
288 The only columns that postprocessing operations may require if they
289 are to be retained in the returned relation tree.
291 Returns
292 -------
293 modified : `Relation`
294 Modified relation tree with postprocessing operations incompatible
295 with ``new_columns`` removed.
296 """
297 raise NotImplementedError()
299 def make_data_coordinate_predicate(
300 self,
301 data_coordinate: DataCoordinate,
302 full: bool | None = None,
303 ) -> Predicate:
304 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
305 represents a data ID constraint.
307 Parameters
308 ----------
309 data_coordinate : `DataCoordinate`
310 Data ID whose keys and values should be transformed to predicate
311 equality constraints.
312 full : `bool`, optional
313 Whether to include constraints on implied dimensions (default is to
314 include implied dimensions if ``data_coordinate`` has them).
316 Returns
317 -------
318 predicate : `lsst.daf.relation.column_expressions.Predicate`
319 New predicate
320 """
321 if full is None:
322 full = data_coordinate.hasFull()
323 dimensions = data_coordinate.graph.required if not full else data_coordinate.graph.dimensions
324 terms: list[Predicate] = []
325 for dimension in dimensions:
326 dtype = dimension.primaryKey.getPythonType()
327 terms.append(
328 ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=dtype).eq(
329 ColumnExpression.literal(data_coordinate[dimension.name], dtype=dtype)
330 )
331 )
332 return Predicate.logical_and(*terms)
334 def make_spatial_region_skypix_predicate(
335 self,
336 dimension: SkyPixDimension,
337 region: lsst.sphgeom.Region,
338 ) -> Predicate:
339 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
340 tests whether two region columns overlap
342 This operation only works with `iteration engines
343 <lsst.daf.relation.iteration.Engine>`; it is usually used to refine the
344 result of a join on `SkyPixDimension` columns in SQL.
346 Parameters
347 ---------
348 dimension : `SkyPixDimension`
349 Dimension whose key column is being constrained.
350 region : `lsst.sphgeom.Region`
351 Spatial region constraint to test against.
353 Returns
354 -------
355 predicate : `lsst.daf.relation.column_expressions.Predicate`
356 New predicate with the `DimensionKeyColumn` associated with
357 ``dimension`` as its only required column.
358 """
359 ref = ColumnExpression.reference(DimensionKeyColumnTag(dimension.name), dtype=int)
360 terms: list[Predicate] = []
361 for begin, end in dimension.pixelization.envelope(region):
362 if begin + 1 == end:
363 terms.append(ref.eq(ColumnExpression.literal(begin, dtype=int)))
364 else:
365 terms.append(
366 ref.ge(ColumnExpression.literal(begin, dtype=int)).logical_and(
367 ref.lt(ColumnExpression.literal(end, dtype=int))
368 )
369 )
370 return Predicate.logical_or(*terms)
372 def make_spatial_region_overlap_predicate(
373 self,
374 lhs: ColumnExpression,
375 rhs: ColumnExpression,
376 ) -> Predicate:
377 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
378 tests whether two regions overlap
380 This operation only works with
381 `iteration engines <lsst.daf.relation.iteration.Engine>`; it is usually
382 used to refine the result of a join or constraint on `SkyPixDimension`
383 columns in SQL.
385 Parameters
386 ---------
387 lhs : `lsst.daf.relation.column_expressions.ColumnExpression`
388 Expression for one spatial region.
389 rhs : `lsst.daf.relation.column_expressions.ColumnExpression`
390 Expression for the other spatial region.
392 Returns
393 -------
394 predicate : `lsst.daf.relation.column_expressions.Predicate`
395 New predicate with ``lhs`` and ``rhs`` as its required columns.
396 """
397 return lhs.predicate_method(regions_overlap.__name__, rhs, supporting_engine_types={iteration.Engine})
399 def make_timespan_overlap_predicate(
400 self,
401 tag: ColumnTag,
402 timespan: Timespan,
403 ) -> Predicate:
404 """Return a `~lsst.daf.relation.column_expressions.Predicate` that
405 tests whether a timespan column overlaps a timespan literal.
407 Parameters
408 ----------
409 tag : `ColumnTag`
410 Identifier for a timespan column.
411 timespan : `Timespan`
412 Timespan literal selected rows must overlap.
414 Returns
415 -------
416 predicate : `lsst.daf.relation.column_expressions.Predicate`
417 New predicate.
418 """
419 return ColumnExpression.reference(tag, dtype=Timespan).predicate_method(
420 "overlaps", ColumnExpression.literal(timespan)
421 )
423 def make_data_id_relation(
424 self, data_ids: Set[DataCoordinate], dimension_names: Iterable[str]
425 ) -> Relation:
426 """Transform a set of data IDs into a relation.
428 Parameters
429 ---------
430 data_ids : `~collections.abc.Set` [ `DataCoordinate` ]
431 Data IDs to upload. All must have at least the dimensions given,
432 but may have more.
433 dimension_names : `Iterable` [ `str` ]
434 Names of dimensions that will be the columns of the relation.
436 Returns
437 -------
438 relation : `Relation`
439 Relation in the iteration engine.
440 """
441 tags = DimensionKeyColumnTag.generate(dimension_names)
442 payload = iteration.RowSequence(
443 [{tag: data_id[tag.dimension] for tag in tags} for data_id in data_ids]
444 ).to_mapping(tags)
445 return self.iteration_engine.make_leaf(frozenset(tags), payload, name_prefix="upload")
448def regions_overlap(a: lsst.sphgeom.Region, b: lsst.sphgeom.Region) -> bool:
449 """Test whether a pair of regions overlap.
451 Parameters
452 ----------
453 a : `lsst.sphgeom.Region`
454 One region.
455 b : `lsst.sphgeom.Region`
456 The other region.
458 Returns
459 -------
460 overlap : `bool`
461 Whether the regions overlap.
462 """
463 return not (a.relate(b) & lsst.sphgeom.DISJOINT)