Coverage for python/lsst/daf/butler/queries/_base.py: 86%

42 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-05 11:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("QueryBase", "HomogeneousQueryBase", "CountableQueryBase", "QueryResultsBase") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable, Mapping, Set 

34from typing import Any, Self 

35 

36from ..dimensions import DataId, DimensionGroup 

37from .convert_args import convert_order_by_args, convert_where_args 

38from .driver import QueryDriver 

39from .expression_factory import ExpressionProxy 

40from .tree import OrderExpression, Predicate, QueryTree 

41 

42 

43class QueryBase(ABC): 

44 """Common base class for `Query` and all `QueryResult` objects. 

45 

46 This class should rarely be referenced directly; it is public only because 

47 it provides public methods to its subclasses. 

48 """ 

49 

50 @abstractmethod 

51 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

52 """Test whether the query would return any rows. 

53 

54 Parameters 

55 ---------- 

56 execute : `bool`, optional 

57 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

58 determined prior to execution that the query would return no rows. 

59 exact : `bool`, optional 

60 If `True`, run the full query and perform post-query filtering if 

61 needed, until at least one result row is found. If `False`, the 

62 returned result does not account for post-query filtering, and 

63 hence may be `True` even when all result rows would be filtered 

64 out. 

65 

66 Returns 

67 ------- 

68 any : `bool` 

69 `True` if the query would (or might, depending on arguments) yield 

70 result rows. `False` if it definitely would not. 

71 """ 

72 raise NotImplementedError() 

73 

74 @abstractmethod 

75 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

76 """Return human-readable messages that may help explain why the query 

77 yields no results. 

78 

79 Parameters 

80 ---------- 

81 execute : `bool`, optional 

82 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

83 of aspects of the tree to more precisely determine where rows were 

84 filtered out. 

85 

86 Returns 

87 ------- 

88 messages : `~collections.abc.Iterable` [ `str` ] 

89 String messages that describe reasons the query might not yield any 

90 results. 

91 """ 

92 raise NotImplementedError() 

93 

94 @abstractmethod 

95 def where( 

96 self, 

97 *args: str | Predicate | DataId, 

98 bind: Mapping[str, Any] | None = None, 

99 **kwargs: Any, 

100 ) -> Self: 

101 """Return a query with a boolean-expression filter on its rows. 

102 

103 Parameters 

104 ---------- 

105 *args 

106 Constraints to apply, combined with logical AND. Arguments may be 

107 `str` expressions to parse, `Predicate` objects (these are 

108 typically constructed via `expression_factory`) or data IDs. 

109 bind : `~collections.abc.Mapping` 

110 Mapping from string identifier appearing in a string expression to 

111 a literal value that should be substituted for it. This is 

112 recommended instead of embedding literals directly into the 

113 expression, especially for strings, timespans, or other types where 

114 quoting or formatting is nontrivial. 

115 **kwargs 

116 Data ID key value pairs that extend and override any present in 

117 ``*args``. 

118 

119 Returns 

120 ------- 

121 query : `QueryBase` 

122 A new query object with the given row filters (as well as any 

123 already present in ``self``). All row filters are combined with 

124 logical AND. 

125 

126 Notes 

127 ----- 

128 If an expression references a dimension or dimension element that is 

129 not already present in the query, it will be joined in, but dataset 

130 searches must already be joined into a query in order to reference 

131 their fields in expressions. 

132 

133 Data ID values are not checked for consistency; they are extracted from 

134 ``args`` and then ``kwargs`` and combined, with later values overriding 

135 earlier ones. 

136 """ 

137 raise NotImplementedError() 

138 

139 

140class HomogeneousQueryBase(QueryBase): 

141 """Common base class for `Query` and query result classes that are 

142 iterables with consistent dimensions throughout. 

143 

144 This class should rarely be referenced directly; it is public only because 

145 it provides public methods to its subclasses. 

146 

147 Parameters 

148 ---------- 

149 driver : `QueryDriver` 

150 Implementation object that knows how to actually execute queries. 

151 tree : `QueryTree` 

152 Description of the query as a tree of joins and column expressions. 

153 """ 

154 

155 def __init__(self, driver: QueryDriver, tree: QueryTree): 

156 self._driver = driver 

157 self._tree = tree 

158 

159 @property 

160 def dimensions(self) -> DimensionGroup: 

161 """All dimensions included in the query's columns.""" 

162 return self._tree.dimensions 

163 

164 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

165 # Docstring inherited. 

166 return self._driver.any(self._tree, execute=execute, exact=exact) 

167 

168 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

169 # Docstring inherited. 

170 return self._driver.explain_no_results(self._tree, execute=execute) 

171 

172 

173class CountableQueryBase(QueryBase): 

174 """Common base class for query result objects for which the number of 

175 result rows is a well-defined concept. 

176 

177 This class should rarely be referenced directly; it is public only because 

178 it provides public methods to its subclasses. 

179 """ 

180 

181 @abstractmethod 

182 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

183 """Count the number of rows this query would return. 

184 

185 Parameters 

186 ---------- 

187 exact : `bool`, optional 

188 If `True`, run the full query and perform post-query filtering if 

189 needed to account for that filtering in the count. If `False`, the 

190 result may be an upper bound. 

191 discard : `bool`, optional 

192 If `True`, compute the exact count even if it would require running 

193 the full query and then throwing away the result rows after 

194 counting them. If `False`, this is an error, as the user would 

195 usually be better off executing the query first to fetch its rows 

196 into a new query (or passing ``exact=False``). Ignored if 

197 ``exact=False``. 

198 

199 Returns 

200 ------- 

201 count : `int` 

202 The number of rows the query would return, or an upper bound if 

203 ``exact=False``. 

204 """ 

205 raise NotImplementedError() 

206 

207 

208class QueryResultsBase(HomogeneousQueryBase, CountableQueryBase): 

209 """Common base class for query result objects with homogeneous dimensions 

210 and countable rows. 

211 """ 

212 

213 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self: 

214 """Return a new query that yields ordered results. 

215 

216 Parameters 

217 ---------- 

218 *args : `str` 

219 Names of the columns/dimensions to use for ordering. Column name 

220 can be prefixed with minus (``-``) to use descending ordering. 

221 

222 Returns 

223 ------- 

224 result : `QueryResultsBase` 

225 An ordered version of this query results object. 

226 

227 Notes 

228 ----- 

229 If this method is called multiple times, the new sort terms replace 

230 the old ones. 

231 """ 

232 return self._copy( 

233 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args) 

234 ) 

235 

236 def limit(self, limit: int | None = None, offset: int = 0) -> Self: 

237 """Return a new query that slices its result rows positionally. 

238 

239 Parameters 

240 ---------- 

241 limit : `int` or `None`, optional 

242 Upper limit on the number of returned records. `None` (default) 

243 means no limit. 

244 offset : `int`, optional 

245 The number of records to skip before returning at most ``limit`` 

246 records. 

247 

248 Returns 

249 ------- 

250 result : `QueryResultsBase` 

251 A sliced version of this query results object. 

252 

253 Notes 

254 ----- 

255 If this method is called multiple times, the new slice parameters 

256 replace the old ones. Slicing always occurs after sorting, even if 

257 `limit` is called before `order_by`. 

258 """ 

259 return self._copy(self._tree, limit=limit, offset=offset) 

260 

261 def where( 

262 self, 

263 *args: str | Predicate | DataId, 

264 bind: Mapping[str, Any] | None = None, 

265 **kwargs: Any, 

266 ) -> Self: 

267 # Docstring inherited. 

268 return self._copy( 

269 tree=self._tree.where( 

270 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs) 

271 ), 

272 driver=self._driver, 

273 ) 

274 

275 @abstractmethod 

276 def _get_datasets(self) -> Set[str]: 

277 """Return all dataset types included in the query's result rows.""" 

278 raise NotImplementedError() 

279 

280 @abstractmethod 

281 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self: 

282 """Return a modified copy of ``self``. 

283 

284 Implementations should validate odifications, not assume they are 

285 correct. 

286 """ 

287 raise NotImplementedError()