Coverage for python / lsst / daf / butler / queries / _base.py: 81%

36 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-01 08:18 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("QueryBase", "QueryResultsBase") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable, Mapping, Set 

34from typing import Any, Self 

35 

36from ..dimensions import DataId, DimensionGroup 

37from .convert_args import convert_order_by_args, convert_where_args 

38from .driver import QueryDriver 

39from .expression_factory import ExpressionProxy 

40from .tree import OrderExpression, Predicate, QueryTree 

41 

42 

43class QueryBase(ABC): 

44 """Common base class for `~lsst.daf.butler.queries.Query` and all 

45 ``QueryResult`` objects. 

46 

47 This class should rarely be referenced directly; it is public only because 

48 it provides public methods to its subclasses. 

49 

50 Parameters 

51 ---------- 

52 driver : `~lsst.daf.butler.queries.driver.QueryDriver` 

53 Implementation object that knows how to actually execute queries. 

54 tree : `~lsst.daf.butler.queries.tree.QueryTree` 

55 Description of the query as a tree of joins and column expressions. 

56 """ 

57 

58 def __init__(self, driver: QueryDriver, tree: QueryTree): 

59 self._driver = driver 

60 self._tree = tree 

61 

62 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

63 """Test whether the query would return any rows. 

64 

65 Parameters 

66 ---------- 

67 execute : `bool`, optional 

68 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

69 determined prior to execution that the query would return no rows. 

70 exact : `bool`, optional 

71 If `True`, run the full query and perform post-query filtering if 

72 needed, until at least one result row is found. If `False`, the 

73 returned result does not account for post-query filtering, and 

74 hence may be `True` even when all result rows would be filtered 

75 out. 

76 

77 Returns 

78 ------- 

79 any : `bool` 

80 `True` if the query would (or might, depending on arguments) yield 

81 result rows. `False` if it definitely would not. 

82 """ 

83 return self._driver.any(self._tree, execute=execute, exact=exact) 

84 

85 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

86 """Return human-readable messages that may help explain why the query 

87 yields no results. 

88 

89 Parameters 

90 ---------- 

91 execute : `bool`, optional 

92 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

93 of aspects of the tree to more precisely determine where rows were 

94 filtered out. 

95 

96 Returns 

97 ------- 

98 messages : `~collections.abc.Iterable` [ `str` ] 

99 String messages that describe reasons the query might not yield any 

100 results. 

101 """ 

102 return self._driver.explain_no_results(self._tree, execute=execute) 

103 

104 @abstractmethod 

105 def where( 

106 self, 

107 *args: str | Predicate | DataId, 

108 bind: Mapping[str, Any] | None = None, 

109 **kwargs: int | str, 

110 ) -> Self: 

111 """Return a query with a boolean-expression filter on its rows. 

112 

113 Parameters 

114 ---------- 

115 *args 

116 Constraints to apply, combined with logical AND. Arguments may be 

117 `str` expressions to parse, 

118 `~lsst.daf.butler.queries.tree.Predicate` objects (these are 

119 typically constructed via 

120 `Query.expression_factory <lsst.daf.butler.queries.Query.expression_factory>`) 

121 or data IDs. 

122 bind : `~collections.abc.Mapping` 

123 Mapping from string identifier appearing in a string expression to 

124 a literal value that should be substituted for it. This is 

125 recommended instead of embedding literals directly into the 

126 expression, especially for strings, timespans, or other types where 

127 quoting or formatting is nontrivial. 

128 **kwargs 

129 Data ID key value pairs that extend and override any present in 

130 ``*args``. 

131 

132 Returns 

133 ------- 

134 query : `QueryBase` 

135 A new query object with the given row filters (as well as any 

136 already present in ``self``). All row filters are combined with 

137 logical AND. 

138 

139 Notes 

140 ----- 

141 Expressions referring to dimensions or dimension elements are resolved 

142 automatically. References to dataset fields (see `expression_factory` 

143 for the distinction) may or may not be resolvable, depending on the 

144 implementation class. 

145 

146 Data ID values are not checked for consistency; they are extracted from 

147 ``args`` and then ``kwargs`` and combined, with later values overriding 

148 earlier ones. 

149 """ # noqa: W505, long docstrings 

150 raise NotImplementedError() 

151 

152 

153class QueryResultsBase(QueryBase): 

154 """Common base class for query result objects with countable rows.""" 

155 

156 @property 

157 @abstractmethod 

158 def dimensions(self) -> DimensionGroup: 

159 """All dimensions included in the query's columns.""" 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

164 """Count the number of rows this query would return. 

165 

166 Parameters 

167 ---------- 

168 exact : `bool`, optional 

169 If `True`, run the full query and perform post-query filtering if 

170 needed to account for that filtering in the count. If `False`, the 

171 result may be an upper bound. 

172 discard : `bool`, optional 

173 If `True`, compute the exact count even if it would require running 

174 the full query and then throwing away the result rows after 

175 counting them. If `False`, this is an error, as the user would 

176 usually be better off executing the query first to fetch its rows 

177 into a new query (or passing ``exact=False``). Ignored if 

178 ``exact=False``. 

179 

180 Returns 

181 ------- 

182 count : `int` 

183 The number of rows the query would return, or an upper bound if 

184 ``exact=False``. 

185 """ 

186 raise NotImplementedError() 

187 

188 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self: 

189 """Return a new query that yields ordered results. 

190 

191 Parameters 

192 ---------- 

193 *args : `str` 

194 Names of the columns/dimensions to use for ordering. Column name 

195 can be prefixed with minus (``-``) to use descending ordering. 

196 

197 Returns 

198 ------- 

199 result : `QueryResultsBase` 

200 An ordered version of this query results object. 

201 

202 Notes 

203 ----- 

204 If this method is called multiple times, the new sort terms replace 

205 the old ones. 

206 """ 

207 return self._copy( 

208 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args) 

209 ) 

210 

211 def limit(self, limit: int | None = None) -> Self: 

212 """Return a new query that slices its result rows positionally. 

213 

214 Parameters 

215 ---------- 

216 limit : `int` or `None`, optional 

217 Upper limit on the number of returned records. `None` (default) 

218 means no limit. 

219 

220 Returns 

221 ------- 

222 result : `QueryResultsBase` 

223 A sliced version of this query results object. 

224 

225 Notes 

226 ----- 

227 If this method is called multiple times, the new slice parameters 

228 replace the old ones. Slicing always occurs after sorting, even if 

229 `limit` is called before `order_by`. 

230 """ 

231 return self._copy(self._tree, limit=limit) 

232 

233 def where( 

234 self, 

235 *args: str | Predicate | DataId, 

236 bind: Mapping[str, Any] | None = None, 

237 **kwargs: int | str, 

238 ) -> Self: 

239 # Docstring inherited. 

240 return self._copy( 

241 tree=self._tree.where( 

242 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs) 

243 ), 

244 driver=self._driver, 

245 ) 

246 

247 @abstractmethod 

248 def _get_datasets(self) -> Set[str]: 

249 """Return all dataset types included in the query's result rows.""" 

250 raise NotImplementedError() 

251 

252 @abstractmethod 

253 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self: 

254 """Return a modified copy of ``self``. 

255 

256 Implementations should validate modifications, not assume they are 

257 correct. 

258 """ 

259 raise NotImplementedError()