Coverage for python/lsst/daf/butler/queries/_dataset_query_results.py: 60%

79 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-05 11:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DatasetQueryResults", 

32 "ChainedDatasetQueryResults", 

33 "SingleTypeDatasetQueryResults", 

34) 

35 

36import itertools 

37from abc import abstractmethod 

38from collections.abc import Iterable, Iterator, Mapping 

39from typing import TYPE_CHECKING, Any, final 

40 

41from .._dataset_ref import DatasetRef 

42from .._dataset_type import DatasetType 

43from ..dimensions import DataId 

44from ._base import CountableQueryBase, QueryResultsBase 

45from .driver import QueryDriver 

46from .result_specs import DataCoordinateResultSpec, DatasetRefResultSpec 

47from .tree import Predicate, QueryTree 

48 

49if TYPE_CHECKING: 

50 from ._data_coordinate_query_results import DataCoordinateQueryResults 

51 

52 

53class DatasetQueryResults(CountableQueryBase, Iterable[DatasetRef]): 

54 """A query for `DatasetRef` results.""" 

55 

56 @abstractmethod 

57 def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: 

58 """Group results by dataset type. 

59 

60 Returns 

61 ------- 

62 iter : `~collections.abc.Iterator` [ `SingleTypeDatasetQueryResults` ] 

63 An iterator over `DatasetQueryResults` instances that are each 

64 responsible for a single dataset type. 

65 """ 

66 raise NotImplementedError() 

67 

68 @property 

69 @abstractmethod 

70 def has_dimension_records(self) -> bool: 

71 """Whether all data IDs in this iterable contain dimension records.""" 

72 raise NotImplementedError() 

73 

74 @abstractmethod 

75 def with_dimension_records(self) -> DatasetQueryResults: 

76 """Return a results object for which `has_dimension_records` is 

77 `True`. 

78 """ 

79 raise NotImplementedError() 

80 

81 

82@final 

83class SingleTypeDatasetQueryResults(DatasetQueryResults, QueryResultsBase): 

84 """A query for `DatasetRef` results with a single dataset type. 

85 

86 Parameters 

87 ---------- 

88 driver : `QueryDriver` 

89 Implementation object that knows how to actually execute queries. 

90 tree : `QueryTree` 

91 Description of the query as a tree of joins and column expressions. The 

92 instance returned directly by the `Butler._query` entry point should be 

93 constructed via `make_unit_query_tree`. 

94 spec : `DatasetRefResultSpec` 

95 Specification of the query result rows, including output columns, 

96 ordering, and slicing. 

97 

98 Notes 

99 ----- 

100 This class should never be constructed directly by users; use 

101 `Query.datasets` instead. 

102 """ 

103 

104 def __init__(self, driver: QueryDriver, tree: QueryTree, spec: DatasetRefResultSpec): 

105 spec.validate_tree(tree) 

106 super().__init__(driver, tree) 

107 self._spec = spec 

108 

109 def __iter__(self) -> Iterator[DatasetRef]: 

110 page = self._driver.execute(self._spec, self._tree) 

111 yield from page.rows 

112 while page.next_key is not None: 

113 page = self._driver.fetch_next_page(self._spec, page.next_key) 

114 yield from page.rows 

115 

116 @property 

117 def dataset_type(self) -> DatasetType: 

118 # Docstring inherited. 

119 return DatasetType(self._spec.dataset_type_name, self._spec.dimensions, self._spec.storage_class_name) 

120 

121 @property 

122 def data_ids(self) -> DataCoordinateQueryResults: 

123 # Docstring inherited. 

124 from ._data_coordinate_query_results import DataCoordinateQueryResults 

125 

126 return DataCoordinateQueryResults( 

127 self._driver, 

128 tree=self._tree, 

129 spec=DataCoordinateResultSpec.model_construct( 

130 dimensions=self.dataset_type.dimensions.as_group(), 

131 include_dimension_records=self._spec.include_dimension_records, 

132 ), 

133 ) 

134 

135 @property 

136 def has_dimension_records(self) -> bool: 

137 # Docstring inherited. 

138 return self._spec.include_dimension_records 

139 

140 def with_dimension_records(self) -> SingleTypeDatasetQueryResults: 

141 # Docstring inherited. 

142 if self.has_dimension_records: 

143 return self 

144 return self._copy(tree=self._tree, include_dimension_records=True) 

145 

146 def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: 

147 # Docstring inherited. 

148 return iter((self,)) 

149 

150 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

151 # Docstring inherited. 

152 return self._driver.count(self._tree, self._spec, exact=exact, discard=discard) 

153 

154 def _copy(self, tree: QueryTree, **kwargs: Any) -> SingleTypeDatasetQueryResults: 

155 # Docstring inherited. 

156 return SingleTypeDatasetQueryResults(self._driver, tree, self._spec.model_copy(update=kwargs)) 

157 

158 def _get_datasets(self) -> frozenset[str]: 

159 # Docstring inherited. 

160 return frozenset({self.dataset_type.name}) 

161 

162 

163@final 

164class ChainedDatasetQueryResults(DatasetQueryResults): 

165 """A query for `DatasetRef` results with multiple dataset types. 

166 

167 Parameters 

168 ---------- 

169 by_dataset_type : `tuple` [ `SingleTypeDatasetQueryResults` ] 

170 Tuple of single-dataset-type query result objects to combine. 

171 

172 Notes 

173 ----- 

174 This class should never be constructed directly by users; use 

175 `Query.datasets` instead. 

176 """ 

177 

178 def __init__(self, by_dataset_type: tuple[SingleTypeDatasetQueryResults, ...]): 

179 self._by_dataset_type = by_dataset_type 

180 

181 def __iter__(self) -> Iterator[DatasetRef]: 

182 return itertools.chain.from_iterable(self._by_dataset_type) 

183 

184 def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: 

185 # Docstring inherited. 

186 return iter(self._by_dataset_type) 

187 

188 @property 

189 def has_dimension_records(self) -> bool: 

190 # Docstring inherited. 

191 return all(single_type_results.has_dimension_records for single_type_results in self._by_dataset_type) 

192 

193 def with_dimension_records(self) -> ChainedDatasetQueryResults: 

194 # Docstring inherited. 

195 return ChainedDatasetQueryResults( 

196 tuple( 

197 [ 

198 single_type_results.with_dimension_records() 

199 for single_type_results in self._by_dataset_type 

200 ] 

201 ) 

202 ) 

203 

204 def any(self, *, execute: bool = True, exact: bool = True) -> bool: 

205 # Docstring inherited. 

206 return any( 

207 single_type_results.any(execute=execute, exact=exact) 

208 for single_type_results in self._by_dataset_type 

209 ) 

210 

211 def explain_no_results(self, execute: bool = True) -> Iterable[str]: 

212 # Docstring inherited. 

213 messages: list[str] = [] 

214 for single_type_results in self._by_dataset_type: 

215 messages.extend(single_type_results.explain_no_results(execute=execute)) 

216 return messages 

217 

218 def count(self, *, exact: bool = True, discard: bool = False) -> int: 

219 # Docstring inherited. 

220 return sum( 

221 single_type_results.count(exact=exact, discard=discard) 

222 for single_type_results in self._by_dataset_type 

223 ) 

224 

225 def where( 

226 self, *args: DataId | str | Predicate, bind: Mapping[str, Any] | None = None, **kwargs: Any 

227 ) -> ChainedDatasetQueryResults: 

228 # Docstring inherited. 

229 return ChainedDatasetQueryResults( 

230 tuple( 

231 [ 

232 single_type_results.where(*args, bind=bind, **kwargs) 

233 for single_type_results in self._by_dataset_type 

234 ] 

235 ) 

236 )