Coverage for python/lsst/daf/butler/queries/_base.py: 86%

37 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("QueryBase", "QueryResultsBase", "ValidityRangeMatchError") 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable, Mapping, Set 

34from typing import Any, Self 

35 

36from ..dimensions import DataId, DimensionGroup 

37from .convert_args import convert_order_by_args, convert_where_args 

38from .driver import QueryDriver 

39from .expression_factory import ExpressionProxy 

40from .tree import OrderExpression, Predicate, QueryTree 

41 

42 

43class ValidityRangeMatchError(RuntimeError): 

44 """Exception raised when a find-first calibration dataset query does not 

45 fully resolve validity ranges. 

46 

47 For a find-first query involving a calibration dataset to work, either the 

48 query's result rows need to include a temporal dimension or needs to be 

49 constrained temporally, such that each result row corresponds to a unique 

50 calibration dataset. This exception can be raised if those dimensions or 

51 constraint are missing, or if a temporal dimension timespan overlaps 

52 multiple validity ranges (e.g. the recommended bias changes in the middle 

53 of an exposure). 

54 """ 

55 

56 

57class QueryBase(ABC): 

58 """Common base class for `Query` and all `QueryResult` objects. 

59 

60 This class should rarely be referenced directly; it is public only because 

61 it provides public methods to its subclasses. 

62 

63 Parameters 

64 ---------- 

65 driver : `QueryDriver` 

66 Implementation object that knows how to actually execute queries. 

67 tree : `QueryTree` 

68 Description of the query as a tree of joins and column expressions. 

69 """ 

70 

71 def __init__(self, driver: QueryDriver, tree: QueryTree): 

72 self._driver = driver 

73 self._tree = tree 

74 

75 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

76 """Test whether the query would return any rows. 

77 

78 Parameters 

79 ---------- 

80 execute : `bool`, optional 

81 If `True`, execute at least a ``LIMIT 1`` query if it cannot be 

82 determined prior to execution that the query would return no rows. 

83 exact : `bool`, optional 

84 If `True`, run the full query and perform post-query filtering if 

85 needed, until at least one result row is found. If `False`, the 

86 returned result does not account for post-query filtering, and 

87 hence may be `True` even when all result rows would be filtered 

88 out. 

89 

90 Returns 

91 ------- 

92 any : `bool` 

93 `True` if the query would (or might, depending on arguments) yield 

94 result rows. `False` if it definitely would not. 

95 """ 

96 return self._driver.any(self._tree, execute=execute, exact=exact) 

97 

98 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

99 """Return human-readable messages that may help explain why the query 

100 yields no results. 

101 

102 Parameters 

103 ---------- 

104 execute : `bool`, optional 

105 If `True` (default) execute simplified versions (e.g. ``LIMIT 1``) 

106 of aspects of the tree to more precisely determine where rows were 

107 filtered out. 

108 

109 Returns 

110 ------- 

111 messages : `~collections.abc.Iterable` [ `str` ] 

112 String messages that describe reasons the query might not yield any 

113 results. 

114 """ 

115 return self._driver.explain_no_results(self._tree, execute=execute) 

116 

117 @abstractmethod 

118 def where( 

119 self, 

120 *args: str | Predicate | DataId, 

121 bind: Mapping[str, Any] | None = None, 

122 **kwargs: int | str, 

123 ) -> Self: 

124 """Return a query with a boolean-expression filter on its rows. 

125 

126 Parameters 

127 ---------- 

128 *args 

129 Constraints to apply, combined with logical AND. Arguments may be 

130 `str` expressions to parse, `Predicate` objects (these are 

131 typically constructed via `expression_factory`) or data IDs. 

132 bind : `~collections.abc.Mapping` 

133 Mapping from string identifier appearing in a string expression to 

134 a literal value that should be substituted for it. This is 

135 recommended instead of embedding literals directly into the 

136 expression, especially for strings, timespans, or other types where 

137 quoting or formatting is nontrivial. 

138 **kwargs 

139 Data ID key value pairs that extend and override any present in 

140 ``*args``. 

141 

142 Returns 

143 ------- 

144 query : `QueryBase` 

145 A new query object with the given row filters (as well as any 

146 already present in ``self``). All row filters are combined with 

147 logical AND. 

148 

149 Notes 

150 ----- 

151 If an expression references a dimension or dimension element that is 

152 not already present in the query, it will be joined in, but dataset 

153 searches must already be joined into a query in order to reference 

154 their fields in expressions. 

155 

156 Data ID values are not checked for consistency; they are extracted from 

157 ``args`` and then ``kwargs`` and combined, with later values overriding 

158 earlier ones. 

159 """ 

160 raise NotImplementedError() 

161 

162 

163class QueryResultsBase(QueryBase): 

164 """Common base class for query result objects with countable rows.""" 

165 

166 @property 

167 @abstractmethod 

168 def dimensions(self) -> DimensionGroup: 

169 """All dimensions included in the query's columns.""" 

170 raise NotImplementedError() 

171 

172 @abstractmethod 

173 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

174 """Count the number of rows this query would return. 

175 

176 Parameters 

177 ---------- 

178 exact : `bool`, optional 

179 If `True`, run the full query and perform post-query filtering if 

180 needed to account for that filtering in the count. If `False`, the 

181 result may be an upper bound. 

182 discard : `bool`, optional 

183 If `True`, compute the exact count even if it would require running 

184 the full query and then throwing away the result rows after 

185 counting them. If `False`, this is an error, as the user would 

186 usually be better off executing the query first to fetch its rows 

187 into a new query (or passing ``exact=False``). Ignored if 

188 ``exact=False``. 

189 

190 Returns 

191 ------- 

192 count : `int` 

193 The number of rows the query would return, or an upper bound if 

194 ``exact=False``. 

195 """ 

196 raise NotImplementedError() 

197 

198 def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self: 

199 """Return a new query that yields ordered results. 

200 

201 Parameters 

202 ---------- 

203 *args : `str` 

204 Names of the columns/dimensions to use for ordering. Column name 

205 can be prefixed with minus (``-``) to use descending ordering. 

206 

207 Returns 

208 ------- 

209 result : `QueryResultsBase` 

210 An ordered version of this query results object. 

211 

212 Notes 

213 ----- 

214 If this method is called multiple times, the new sort terms replace 

215 the old ones. 

216 """ 

217 return self._copy( 

218 self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args) 

219 ) 

220 

221 def limit(self, limit: int | None = None) -> Self: 

222 """Return a new query that slices its result rows positionally. 

223 

224 Parameters 

225 ---------- 

226 limit : `int` or `None`, optional 

227 Upper limit on the number of returned records. `None` (default) 

228 means no limit. 

229 

230 Returns 

231 ------- 

232 result : `QueryResultsBase` 

233 A sliced version of this query results object. 

234 

235 Notes 

236 ----- 

237 If this method is called multiple times, the new slice parameters 

238 replace the old ones. Slicing always occurs after sorting, even if 

239 `limit` is called before `order_by`. 

240 """ 

241 return self._copy(self._tree, limit=limit) 

242 

243 def where( 

244 self, 

245 *args: str | Predicate | DataId, 

246 bind: Mapping[str, Any] | None = None, 

247 **kwargs: int | str, 

248 ) -> Self: 

249 # Docstring inherited. 

250 return self._copy( 

251 tree=self._tree.where( 

252 convert_where_args(self.dimensions, self._get_datasets(), *args, bind=bind, **kwargs) 

253 ), 

254 driver=self._driver, 

255 ) 

256 

257 @abstractmethod 

258 def _get_datasets(self) -> Set[str]: 

259 """Return all dataset types included in the query's result rows.""" 

260 raise NotImplementedError() 

261 

262 @abstractmethod 

263 def _copy(self, tree: QueryTree, **kwargs: Any) -> Self: 

264 """Return a modified copy of ``self``. 

265 

266 Implementations should validate modifications, not assume they are 

267 correct. 

268 """ 

269 raise NotImplementedError()