Coverage for python/lsst/daf/butler/_query.py: 100%

12 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:48 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("Query",) 

31 

32from abc import ABC, abstractmethod 

33from collections.abc import Iterable, Mapping 

34from typing import TYPE_CHECKING, Any 

35 

36if TYPE_CHECKING: 

37 from ._query_results import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

38 from .dimensions import DataId, DimensionGroup 

39 from .registry._registry import CollectionArgType 

40 

41 

42class Query(ABC): 

43 """Interface for construction and execution of complex queries.""" 

44 

45 @abstractmethod 

46 def data_ids( 

47 self, 

48 dimensions: DimensionGroup | Iterable[str] | str, 

49 *, 

50 data_id: DataId | None = None, 

51 where: str = "", 

52 bind: Mapping[str, Any] | None = None, 

53 **kwargs: Any, 

54 ) -> DataCoordinateQueryResults: 

55 """Query for data IDs matching user-provided criteria. 

56 

57 Parameters 

58 ---------- 

59 dimensions : `DimensionGroup`, `str`, or \ 

60 `~collections.abc.Iterable` [`str`] 

61 The dimensions of the data IDs to yield, as either `DimensionGroup` 

62 instances or `str`. Will be automatically expanded to a complete 

63 `DimensionGroup`. 

64 data_id : `dict` or `DataCoordinate`, optional 

65 A data ID whose key-value pairs are used as equality constraints 

66 in the query. 

67 where : `str`, optional 

68 A string expression similar to a SQL WHERE clause. May involve 

69 any column of a dimension table or (as a shortcut for the primary 

70 key column of a dimension table) dimension name. See 

71 :ref:`daf_butler_dimension_expressions` for more information. 

72 bind : `~collections.abc.Mapping`, optional 

73 Mapping containing literal values that should be injected into the 

74 ``where`` expression, keyed by the identifiers they replace. 

75 Values of collection type can be expanded in some cases; see 

76 :ref:`daf_butler_dimension_expressions_identifiers` for more 

77 information. 

78 **kwargs 

79 Additional keyword arguments are forwarded to 

80 `DataCoordinate.standardize` when processing the ``data_id`` 

81 argument (and may be used to provide a constraining data ID even 

82 when the ``data_id`` argument is `None`). 

83 

84 Returns 

85 ------- 

86 dataIds : `DataCoordinateQueryResults` 

87 Data IDs matching the given query parameters. These are guaranteed 

88 to identify all dimensions (`DataCoordinate.hasFull` returns 

89 `True`), but will not contain `DimensionRecord` objects 

90 (`DataCoordinate.hasRecords` returns `False`). Call 

91 `~DataCoordinateQueryResults.expanded` on the 

92 returned object to fetch those (and consider using 

93 `~DataCoordinateQueryResults.materialize` on the 

94 returned object first if the expected number of rows is very 

95 large). See documentation for those methods for additional 

96 information. 

97 

98 Raises 

99 ------ 

100 lsst.daf.butler.registry.DataIdError 

101 Raised when ``data_id`` or keyword arguments specify unknown 

102 dimensions or values, or when they contain inconsistent values. 

103 lsst.daf.butler.registry.UserExpressionError 

104 Raised when ``where`` expression is invalid. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def datasets( 

110 self, 

111 dataset_type: Any, 

112 collections: CollectionArgType | None = None, 

113 *, 

114 find_first: bool = True, 

115 data_id: DataId | None = None, 

116 where: str = "", 

117 bind: Mapping[str, Any] | None = None, 

118 **kwargs: Any, 

119 ) -> DatasetQueryResults: 

120 """Query for and iterate over dataset references matching user-provided 

121 criteria. 

122 

123 Parameters 

124 ---------- 

125 dataset_type : dataset type expression 

126 An expression that fully or partially identifies the dataset types 

127 to be queried. Allowed types include `DatasetType`, `str`, 

128 `re.Pattern`, and iterables thereof. The special value ``...`` can 

129 be used to query all dataset types. See 

130 :ref:`daf_butler_dataset_type_expressions` for more information. 

131 collections : collection expression, optional 

132 An expression that identifies the collections to search, such as a 

133 `str` (for full matches or partial matches via globs), `re.Pattern` 

134 (for partial matches), or iterable thereof. ``...`` can be used to 

135 search all collections (actually just all `~CollectionType.RUN` 

136 collections, because this will still find all datasets). 

137 If not provided, the default collections are used. See 

138 :ref:`daf_butler_collection_expressions` for more information. 

139 find_first : `bool`, optional 

140 If `True` (default), for each result data ID, only yield one 

141 `DatasetRef` of each `DatasetType`, from the first collection in 

142 which a dataset of that dataset type appears (according to the 

143 order of ``collections`` passed in). If `True`, ``collections`` 

144 must not contain regular expressions and may not be ``...``. 

145 data_id : `dict` or `DataCoordinate`, optional 

146 A data ID whose key-value pairs are used as equality constraints 

147 in the query. 

148 where : `str`, optional 

149 A string expression similar to a SQL WHERE clause. May involve 

150 any column of a dimension table or (as a shortcut for the primary 

151 key column of a dimension table) dimension name. See 

152 :ref:`daf_butler_dimension_expressions` for more information. 

153 bind : `~collections.abc.Mapping`, optional 

154 Mapping containing literal values that should be injected into the 

155 ``where`` expression, keyed by the identifiers they replace. 

156 Values of collection type can be expanded in some cases; see 

157 :ref:`daf_butler_dimension_expressions_identifiers` for more 

158 information. 

159 **kwargs 

160 Additional keyword arguments are forwarded to 

161 `DataCoordinate.standardize` when processing the ``data_id`` 

162 argument (and may be used to provide a constraining data ID even 

163 when the ``data_id`` argument is `None`). 

164 

165 Returns 

166 ------- 

167 refs : `.queries.DatasetQueryResults` 

168 Dataset references matching the given query criteria. Nested data 

169 IDs are guaranteed to include values for all implied dimensions 

170 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

171 include dimension records (`DataCoordinate.hasRecords` will be 

172 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

173 called on the result object (which returns a new one). 

174 

175 Raises 

176 ------ 

177 lsst.daf.butler.registry.DatasetTypeExpressionError 

178 Raised when ``dataset_type`` expression is invalid. 

179 TypeError 

180 Raised when the arguments are incompatible, such as when a 

181 collection wildcard is passed when ``find_first`` is `True`, or 

182 when ``collections`` is `None` and default butler collections are 

183 not defined. 

184 lsst.daf.butler.registry.DataIdError 

185 Raised when ``data_id`` or keyword arguments specify unknown 

186 dimensions or values, or when they contain inconsistent values. 

187 lsst.daf.butler.registry.UserExpressionError 

188 Raised when ``where`` expression is invalid. 

189 

190 Notes 

191 ----- 

192 When multiple dataset types are queried in a single call, the 

193 results of this operation are equivalent to querying for each dataset 

194 type separately in turn, and no information about the relationships 

195 between datasets of different types is included. 

196 """ 

197 raise NotImplementedError() 

198 

199 @abstractmethod 

200 def dimension_records( 

201 self, 

202 element: str, 

203 *, 

204 data_id: DataId | None = None, 

205 where: str = "", 

206 bind: Mapping[str, Any] | None = None, 

207 **kwargs: Any, 

208 ) -> DimensionRecordQueryResults: 

209 """Query for dimension information matching user-provided criteria. 

210 

211 Parameters 

212 ---------- 

213 element : `str` 

214 The name of a dimension element to obtain records for. 

215 data_id : `dict` or `DataCoordinate`, optional 

216 A data ID whose key-value pairs are used as equality constraints 

217 in the query. 

218 where : `str`, optional 

219 A string expression similar to a SQL WHERE clause. See 

220 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

221 information. 

222 bind : `~collections.abc.Mapping`, optional 

223 Mapping containing literal values that should be injected into the 

224 ``where`` expression, keyed by the identifiers they replace. 

225 Values of collection type can be expanded in some cases; see 

226 :ref:`daf_butler_dimension_expressions_identifiers` for more 

227 information. 

228 **kwargs 

229 Additional keyword arguments are forwarded to 

230 `DataCoordinate.standardize` when processing the ``data_id`` 

231 argument (and may be used to provide a constraining data ID even 

232 when the ``data_id`` argument is `None`). 

233 

234 Returns 

235 ------- 

236 records : `.queries.DimensionRecordQueryResults` 

237 Data IDs matching the given query parameters. 

238 

239 Raises 

240 ------ 

241 lsst.daf.butler.registry.NoDefaultCollectionError 

242 Raised if ``collections`` is `None` and 

243 ``self.defaults.collections`` is `None`. 

244 lsst.daf.butler.registry.CollectionExpressionError 

245 Raised when ``collections`` expression is invalid. 

246 lsst.daf.butler.registry.DataIdError 

247 Raised when ``data_id`` or keyword arguments specify unknown 

248 dimensions or values, or when they contain inconsistent values. 

249 lsst.daf.butler.registry.DatasetTypeExpressionError 

250 Raised when ``datasetType`` expression is invalid. 

251 lsst.daf.butler.registry.UserExpressionError 

252 Raised when ``where`` expression is invalid. 

253 """ 

254 raise NotImplementedError()