Coverage for python/lsst/daf/butler/registry/queries/butler_sql_engine.py: 24%

71 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-07 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("ButlerSqlEngine",) 

24 

25import dataclasses 

26from collections.abc import Iterable, Set 

27from typing import Any, cast 

28 

29import astropy.time 

30import sqlalchemy 

31from lsst.daf.relation import ColumnTag, Relation, Sort, UnaryOperation, UnaryOperationRelation, sql 

32 

33from ...core import ( 

34 ColumnTypeInfo, 

35 LogicalColumn, 

36 Timespan, 

37 TimespanDatabaseRepresentation, 

38 ddl, 

39 is_timespan_column, 

40) 

41from .find_first_dataset import FindFirstDataset 

42 

43 

44@dataclasses.dataclass(repr=False, eq=False, kw_only=True) 

45class ButlerSqlEngine(sql.Engine[LogicalColumn]): 

46 """An extension of the `lsst.daf.relation.sql.Engine` class to add timespan 

47 and `FindFirstDataset` operation support. 

48 """ 

49 

50 column_types: ColumnTypeInfo 

51 """Struct containing information about column types that depend on registry 

52 configuration. 

53 """ 

54 

55 def __str__(self) -> str: 

56 return self.name 

57 

58 def __repr__(self) -> str: 

59 return f"ButlerSqlEngine({self.name!r})@{id(self):0x}" 

60 

61 def _append_unary_to_select(self, operation: UnaryOperation, target: sql.Select) -> sql.Select: 

62 # Docstring inherited. 

63 # This override exists to add support for the custom FindFirstDataset 

64 # operation. 

65 match operation: 

66 case FindFirstDataset(): 

67 if target.has_sort and not target.has_slice: 

68 # Existing target is sorted, but not sliced. We want to 

69 # move that sort outside (i.e. after) the FindFirstDataset, 

70 # since otherwise the FindFirstDataset would put the Sort 

71 # into a CTE where it will do nothing. 

72 inner = target.reapply_skip(sort=Sort()) 

73 return sql.Select.apply_skip(operation._finish_apply(inner), sort=target.sort) 

74 else: 

75 # Apply the FindFirstDataset directly to the existing 

76 # target, which we've already asserted starts with a 

77 # Select. That existing Select will be used for the CTE 

78 # that starts the FindFirstDataset implementation (see 

79 # to_payload override). 

80 return sql.Select.apply_skip(operation._finish_apply(target)) 

81 case _: 

82 return super()._append_unary_to_select(operation, target) 

83 

84 def extract_mapping( 

85 self, tags: Iterable[ColumnTag], sql_columns: sqlalchemy.sql.ColumnCollection 

86 ) -> dict[ColumnTag, LogicalColumn]: 

87 # Docstring inherited. 

88 # This override exists to add support for Timespan columns. 

89 result: dict[ColumnTag, LogicalColumn] = {} 

90 for tag in tags: 

91 if is_timespan_column(tag): 

92 result[tag] = self.column_types.timespan_cls.from_columns( 

93 sql_columns, name=tag.qualified_name 

94 ) 

95 else: 

96 result[tag] = sql_columns[tag.qualified_name] 

97 return result 

98 

99 def select_items( 

100 self, 

101 items: Iterable[tuple[ColumnTag, LogicalColumn]], 

102 sql_from: sqlalchemy.sql.FromClause, 

103 *extra: sqlalchemy.sql.ColumnElement, 

104 ) -> sqlalchemy.sql.Select: 

105 # Docstring inherited. 

106 # This override exists to add support for Timespan columns. 

107 select_columns: list[sqlalchemy.sql.ColumnElement] = [] 

108 for tag, logical_column in items: 

109 if is_timespan_column(tag): 

110 select_columns.extend( 

111 cast(TimespanDatabaseRepresentation, logical_column).flatten(name=tag.qualified_name) 

112 ) 

113 else: 

114 select_columns.append( 

115 cast(sqlalchemy.sql.ColumnElement, logical_column).label(tag.qualified_name) 

116 ) 

117 select_columns.extend(extra) 

118 self.handle_empty_columns(select_columns) 

119 return sqlalchemy.sql.select(*select_columns).select_from(sql_from) 

120 

121 def make_zero_select(self, tags: Set[ColumnTag]) -> sqlalchemy.sql.Select: 

122 # Docstring inherited. 

123 # This override exists to add support for Timespan columns. 

124 select_columns: list[sqlalchemy.sql.ColumnElement] = [] 

125 for tag in tags: 

126 if is_timespan_column(tag): 

127 select_columns.extend( 

128 self.column_types.timespan_cls.fromLiteral(None).flatten(name=tag.qualified_name) 

129 ) 

130 else: 

131 select_columns.append(sqlalchemy.sql.literal(None).label(tag.qualified_name)) 

132 self.handle_empty_columns(select_columns) 

133 return sqlalchemy.sql.select(*select_columns).where(sqlalchemy.sql.literal(False)) 

134 

135 def convert_column_literal(self, value: Any) -> LogicalColumn: 

136 # Docstring inherited. 

137 # This override exists to add support for Timespan columns. 

138 if isinstance(value, Timespan): 

139 return self.column_types.timespan_cls.fromLiteral(value) 

140 elif isinstance(value, astropy.time.Time): 

141 return sqlalchemy.sql.literal(value, type_=ddl.AstropyTimeNsecTai) 

142 else: 

143 return super().convert_column_literal(value) 

144 

145 def to_payload(self, relation: Relation) -> sql.Payload[LogicalColumn]: 

146 # Docstring inherited. 

147 # This override exists to add support for the custom FindFirstDataset 

148 # operation. 

149 match relation: 

150 case UnaryOperationRelation(operation=FindFirstDataset() as operation, target=target): 

151 # We build a subquery of the form below to search the 

152 # collections in order. 

153 # 

154 # WITH {dst}_search AS ( 

155 # {target} 

156 # ... 

157 # ) 

158 # SELECT 

159 # {dst}_window.*, 

160 # FROM ( 

161 # SELECT 

162 # {dst}_search.*, 

163 # ROW_NUMBER() OVER ( 

164 # PARTITION BY {dst_search}.{operation.dimensions} 

165 # ORDER BY {operation.rank} 

166 # ) AS rownum 

167 # ) {dst}_window 

168 # WHERE 

169 # {dst}_window.rownum = 1; 

170 # 

171 # We'll start with the Common Table Expression (CTE) at the 

172 # top, which we mostly get from the target relation. 

173 search = self.to_executable(target).cte(f"{operation.rank.dataset_type}_search") 

174 # Now we fill out the SELECT from the CTE, and the subquery it 

175 # contains (at the same time, since they have the same columns, 

176 # aside from the special 'rownum' window-function column). 

177 search_columns = self.extract_mapping(target.columns, search.columns) 

178 partition_by = [search_columns[tag] for tag in operation.dimensions] 

179 rownum_column = sqlalchemy.sql.func.row_number() 

180 if partition_by: 

181 rownum_column = rownum_column.over( 

182 partition_by=partition_by, order_by=search_columns[operation.rank] 

183 ) 

184 else: 

185 rownum_column = rownum_column.over(order_by=search_columns[operation.rank]) 

186 window = self.select_items( 

187 search_columns.items(), search, rownum_column.label("rownum") 

188 ).subquery(f"{operation.rank.dataset_type}_window") 

189 return sql.Payload( 

190 from_clause=window, 

191 columns_available=self.extract_mapping(target.columns, window.columns), 

192 where=[window.columns["rownum"] == 1], 

193 ) 

194 case _: 

195 return super().to_payload(relation)