Coverage for python/lsst/daf/butler/registry/queries/butler_sql_engine.py: 24%
71 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("ButlerSqlEngine",)
25import dataclasses
26from collections.abc import Iterable, Set
27from typing import Any, cast
29import astropy.time
30import sqlalchemy
31from lsst.daf.relation import ColumnTag, Relation, Sort, UnaryOperation, UnaryOperationRelation, sql
33from ...core import (
34 ColumnTypeInfo,
35 LogicalColumn,
36 Timespan,
37 TimespanDatabaseRepresentation,
38 ddl,
39 is_timespan_column,
40)
41from .find_first_dataset import FindFirstDataset
44@dataclasses.dataclass(repr=False, eq=False, kw_only=True)
45class ButlerSqlEngine(sql.Engine[LogicalColumn]):
46 """An extension of the `lsst.daf.relation.sql.Engine` class to add timespan
47 and `FindFirstDataset` operation support.
48 """
50 column_types: ColumnTypeInfo
51 """Struct containing information about column types that depend on registry
52 configuration.
53 """
55 def __str__(self) -> str:
56 return self.name
58 def __repr__(self) -> str:
59 return f"ButlerSqlEngine({self.name!r})@{id(self):0x}"
61 def _append_unary_to_select(self, operation: UnaryOperation, target: sql.Select) -> sql.Select:
62 # Docstring inherited.
63 # This override exists to add support for the custom FindFirstDataset
64 # operation.
65 match operation:
66 case FindFirstDataset():
67 if target.has_sort and not target.has_slice:
68 # Existing target is sorted, but not sliced. We want to
69 # move that sort outside (i.e. after) the FindFirstDataset,
70 # since otherwise the FindFirstDataset would put the Sort
71 # into a CTE where it will do nothing.
72 inner = target.reapply_skip(sort=Sort())
73 return sql.Select.apply_skip(operation._finish_apply(inner), sort=target.sort)
74 else:
75 # Apply the FindFirstDataset directly to the existing
76 # target, which we've already asserted starts with a
77 # Select. That existing Select will be used for the CTE
78 # that starts the FindFirstDataset implementation (see
79 # to_payload override).
80 return sql.Select.apply_skip(operation._finish_apply(target))
81 case _:
82 return super()._append_unary_to_select(operation, target)
84 def extract_mapping(
85 self, tags: Iterable[ColumnTag], sql_columns: sqlalchemy.sql.ColumnCollection
86 ) -> dict[ColumnTag, LogicalColumn]:
87 # Docstring inherited.
88 # This override exists to add support for Timespan columns.
89 result: dict[ColumnTag, LogicalColumn] = {}
90 for tag in tags:
91 if is_timespan_column(tag):
92 result[tag] = self.column_types.timespan_cls.from_columns(
93 sql_columns, name=tag.qualified_name
94 )
95 else:
96 result[tag] = sql_columns[tag.qualified_name]
97 return result
99 def select_items(
100 self,
101 items: Iterable[tuple[ColumnTag, LogicalColumn]],
102 sql_from: sqlalchemy.sql.FromClause,
103 *extra: sqlalchemy.sql.ColumnElement,
104 ) -> sqlalchemy.sql.Select:
105 # Docstring inherited.
106 # This override exists to add support for Timespan columns.
107 select_columns: list[sqlalchemy.sql.ColumnElement] = []
108 for tag, logical_column in items:
109 if is_timespan_column(tag):
110 select_columns.extend(
111 cast(TimespanDatabaseRepresentation, logical_column).flatten(name=tag.qualified_name)
112 )
113 else:
114 select_columns.append(
115 cast(sqlalchemy.sql.ColumnElement, logical_column).label(tag.qualified_name)
116 )
117 select_columns.extend(extra)
118 self.handle_empty_columns(select_columns)
119 return sqlalchemy.sql.select(*select_columns).select_from(sql_from)
121 def make_zero_select(self, tags: Set[ColumnTag]) -> sqlalchemy.sql.Select:
122 # Docstring inherited.
123 # This override exists to add support for Timespan columns.
124 select_columns: list[sqlalchemy.sql.ColumnElement] = []
125 for tag in tags:
126 if is_timespan_column(tag):
127 select_columns.extend(
128 self.column_types.timespan_cls.fromLiteral(None).flatten(name=tag.qualified_name)
129 )
130 else:
131 select_columns.append(sqlalchemy.sql.literal(None).label(tag.qualified_name))
132 self.handle_empty_columns(select_columns)
133 return sqlalchemy.sql.select(*select_columns).where(sqlalchemy.sql.literal(False))
135 def convert_column_literal(self, value: Any) -> LogicalColumn:
136 # Docstring inherited.
137 # This override exists to add support for Timespan columns.
138 if isinstance(value, Timespan):
139 return self.column_types.timespan_cls.fromLiteral(value)
140 elif isinstance(value, astropy.time.Time):
141 return sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai)
142 else:
143 return super().convert_column_literal(value)
145 def to_payload(self, relation: Relation) -> sql.Payload[LogicalColumn]:
146 # Docstring inherited.
147 # This override exists to add support for the custom FindFirstDataset
148 # operation.
149 match relation:
150 case UnaryOperationRelation(operation=FindFirstDataset() as operation, target=target):
151 # We build a subquery of the form below to search the
152 # collections in order.
153 #
154 # WITH {dst}_search AS (
155 # {target}
156 # ...
157 # )
158 # SELECT
159 # {dst}_window.*,
160 # FROM (
161 # SELECT
162 # {dst}_search.*,
163 # ROW_NUMBER() OVER (
164 # PARTITION BY {dst_search}.{operation.dimensions}
165 # ORDER BY {operation.rank}
166 # ) AS rownum
167 # ) {dst}_window
168 # WHERE
169 # {dst}_window.rownum = 1;
170 #
171 # We'll start with the Common Table Expression (CTE) at the
172 # top, which we mostly get from the target relation.
173 search = self.to_executable(target).cte(f"{operation.rank.dataset_type}_search")
174 # Now we fill out the SELECT from the CTE, and the subquery it
175 # contains (at the same time, since they have the same columns,
176 # aside from the special 'rownum' window-function column).
177 search_columns = self.extract_mapping(target.columns, search.columns)
178 partition_by = [search_columns[tag] for tag in operation.dimensions]
179 rownum_column = sqlalchemy.sql.func.row_number()
180 if partition_by:
181 rownum_column = rownum_column.over(
182 partition_by=partition_by, order_by=search_columns[operation.rank]
183 )
184 else:
185 rownum_column = rownum_column.over(order_by=search_columns[operation.rank])
186 window = self.select_items(
187 search_columns.items(), search, rownum_column.label("rownum")
188 ).subquery(f"{operation.rank.dataset_type}_window")
189 return sql.Payload(
190 from_clause=window,
191 columns_available=self.extract_mapping(target.columns, window.columns),
192 where=[window.columns["rownum"] == 1],
193 )
194 case _:
195 return super().to_payload(relation)