Coverage for python/lsst/daf/butler/queries/_base.py: 83%

36 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("QueryBase", "QueryResultsBase") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable, Mapping, Set 

34from typing import Any, Self 

35 

36from ..dimensions import DataId, DimensionGroup 

37from .convert_args import convert_order_by_args, convert_where_args 

38from .driver import QueryDriver 

39from .expression_factory import ExpressionProxy 

40from .tree import OrderExpression, Predicate, QueryTree 

41 

42 

43class QueryBase(ABC): 

44 """Common base class for `Query` and all `QueryResult` objects. 

45 

46 This class should rarely be referenced directly; it is public only because 

47 it provides public methods to its subclasses. 

48 

49 Parameters 

50 ---------- 

51 driver : `QueryDriver` 

52 Implementation object that knows how to actually execute queries. 

53 tree : `QueryTree` 

54 Description of the query as a tree of joins and column expressions. 

55 """ 

56 

57 def __init__(self, driver: QueryDriver, tree: QueryTree): 

58 self._driver = driver 

59 self._tree = tree 

60 

61 @property 

62 def dimensions(self) -> DimensionGroup: 

63 """All dimensions included in the query's columns.""" 

64 return self._tree.dimensions 

65 

66 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

67 """Test whether the query would return any rows. 

68 

69 Parameters 

70 ---------- 

71 execute : `bool`, optional 

72 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

73 determined prior to execution that the query would return no rows. 

74 exact : `bool`, optional 

75 If `True`, run the full query and perform post-query filtering if 

76 needed, until at least one result row is found. If `False`, the 

77 returned result does not account for post-query filtering, and 

78 hence may be `True` even when all result rows would be filtered 

79 out. 

80 

81 Returns 

82 ------- 

83 any : `bool` 

84 `True` if the query would (or might, depending on arguments) yield 

85 result rows. `False` if it definitely would not. 

86 """ 

87 return self._driver.any(self._tree, execute=execute, exact=exact) 

88 

89 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

90 """Return human-readable messages that may help explain why the query 

91 yields no results. 

92 

93 Parameters 

94 ---------- 

95 execute : `bool`, optional 

96 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

97 of aspects of the tree to more precisely determine where rows were 

98 filtered out. 

99 

100 Returns 

101 ------- 

102 messages : `~collections.abc.Iterable` [ `str` ] 

103 String messages that describe reasons the query might not yield any 

104 results. 

105 """ 

106 return self._driver.explain_no_results(self._tree, execute=execute) 

107 

108 @abstractmethod 

109 def where( 

110 self, 

111 *args: str | Predicate | DataId, 

112 bind: Mapping[str, Any] | None = None, 

113 **kwargs: int | str, 

114 ) -> Self: 

115 """Return a query with a boolean-expression filter on its rows. 

116 

117 Parameters 

118 ---------- 

119 *args 

120 Constraints to apply, combined with logical AND. Arguments may be 

121 `str` expressions to parse, `Predicate` objects (these are 

122 typically constructed via `expression_factory`) or data IDs. 

123 bind : `~collections.abc.Mapping` 

124 Mapping from string identifier appearing in a string expression to 

125 a literal value that should be substituted for it. This is 

126 recommended instead of embedding literals directly into the 

127 expression, especially for strings, timespans, or other types where 

128 quoting or formatting is nontrivial. 

129 **kwargs 

130 Data ID key value pairs that extend and override any present in 

131 ``*args``. 

132 

133 Returns 

134 ------- 

135 query : `QueryBase` 

136 A new query object with the given row filters (as well as any 

137 already present in ``self``). All row filters are combined with 

138 logical AND. 

139 

140 Notes 

141 ----- 

142 If an expression references a dimension or dimension element that is 

143 not already present in the query, it will be joined in, but dataset 

144 searches must already be joined into a query in order to reference 

145 their fields in expressions. 

146 

147 Data ID values are not checked for consistency; they are extracted from 

148 ``args`` and then ``kwargs`` and combined, with later values overriding 

149 earlier ones. 

150 """ 

151 raise NotImplementedError() 

152 

153 

154class QueryResultsBase(QueryBase): 

155 """Common base class for query result objects with countable rows.""" 

156 

157 @abstractmethod 

158 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

159 """Count the number of rows this query would return. 

160 

161 Parameters 

162 ---------- 

163 exact : `bool`, optional 

164 If `True`, run the full query and perform post-query filtering if 

165 needed to account for that filtering in the count. If `False`, the 

166 result may be an upper bound. 

167 discard : `bool`, optional 

168 If `True`, compute the exact count even if it would require running 

169 the full query and then throwing away the result rows after 

170 counting them. If `False`, this is an error, as the user would 

171 usually be better off executing the query first to fetch its rows 

172 into a new query (or passing ``exact=False``). Ignored if 

173 ``exact=False``. 

174 

175 Returns 

176 ------- 

177 count : `int` 

178 The number of rows the query would return, or an upper bound if 

179 ``exact=False``. 

180 """ 

181 raise NotImplementedError() 

182 

183 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self: 

184 """Return a new query that yields ordered results. 

185 

186 Parameters 

187 ---------- 

188 *args : `str` 

189 Names of the columns/dimensions to use for ordering. Column name 

190 can be prefixed with minus (``-``) to use descending ordering. 

191 

192 Returns 

193 ------- 

194 result : `QueryResultsBase` 

195 An ordered version of this query results object. 

196 

197 Notes 

198 ----- 

199 If this method is called multiple times, the new sort terms replace 

200 the old ones. 

201 """ 

202 return self._copy( 

203 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args) 

204 ) 

205 

206 def limit(self, limit: int | None = None) -> Self: 

207 """Return a new query that slices its result rows positionally. 

208 

209 Parameters 

210 ---------- 

211 limit : `int` or `None`, optional 

212 Upper limit on the number of returned records. `None` (default) 

213 means no limit. 

214 

215 Returns 

216 ------- 

217 result : `QueryResultsBase` 

218 A sliced version of this query results object. 

219 

220 Notes 

221 ----- 

222 If this method is called multiple times, the new slice parameters 

223 replace the old ones. Slicing always occurs after sorting, even if 

224 `limit` is called before `order_by`. 

225 """ 

226 return self._copy(self._tree, limit=limit) 

227 

228 def where( 

229 self, 

230 *args: str | Predicate | DataId, 

231 bind: Mapping[str, Any] | None = None, 

232 **kwargs: int | str, 

233 ) -> Self: 

234 # Docstring inherited. 

235 return self._copy( 

236 tree=self._tree.where( 

237 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs) 

238 ), 

239 driver=self._driver, 

240 ) 

241 

242 @abstractmethod 

243 def _get_datasets(self) -> Set[str]: 

244 """Return all dataset types included in the query's result rows.""" 

245 raise NotImplementedError() 

246 

247 @abstractmethod 

248 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self: 

249 """Return a modified copy of ``self``. 

250 

251 Implementations should validate modifications, not assume they are 

252 correct. 

253 """ 

254 raise NotImplementedError()