Coverage for python/lsst/daf/butler/registry/_collection_summary.py: 28%

58 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-07 11:04 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("CollectionSummary",) 

30 

31import dataclasses 

32from collections.abc import Generator, Iterable, Mapping, Set 

33from typing import cast 

34 

35from .._dataset_ref import DatasetRef 

36from .._dataset_type import DatasetType 

37from .._named import NamedValueSet 

38from ..dimensions import DataCoordinate 

39 

40 

41@dataclasses.dataclass 

42class CollectionSummary: 

43 """A summary of the datasets that can be found in a collection.""" 

44 

45 def copy(self) -> CollectionSummary: 

46 """Return a deep copy of this object. 

47 

48 Returns 

49 ------- 

50 copy : `CollectionSummary` 

51 A copy of ``self`` that can be modified without modifying ``self`` 

52 at all. 

53 """ 

54 return CollectionSummary( 

55 dataset_types=self.dataset_types.copy(), 

56 governors={k: v.copy() for k, v in self.governors.items()}, 

57 ) 

58 

59 def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]: 

60 """Include the given datasets in the summary, yielding them back as a 

61 generator. 

62 

63 Parameters 

64 ---------- 

65 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

66 Datasets to include. 

67 

68 Yields 

69 ------ 

70 ref : `DatasetRef` 

71 The same dataset references originally passed in. 

72 

73 Notes 

74 ----- 

75 As a generator, this method does nothing if its return iterator is not 

76 used. Call `add_datasets` instead to avoid this; this method is 

77 intended for the case where the given iterable may be single-pass and a 

78 copy is not desired, but other processing needs to be done on its 

79 elements. 

80 """ 

81 for ref in refs: 

82 self.dataset_types.add(ref.datasetType) 

83 for gov in ref.dataId.dimensions.governors: 

84 self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov])) 

85 yield ref 

86 

87 def add_datasets(self, refs: Iterable[DatasetRef]) -> None: 

88 """Include the given datasets in the summary. 

89 

90 Parameters 

91 ---------- 

92 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

93 Datasets to include. 

94 """ 

95 for _ in self.add_datasets_generator(refs): 

96 pass 

97 

98 def add_data_ids_generator( 

99 self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate] 

100 ) -> Generator[DataCoordinate, None, None]: 

101 """Include the given dataset type and data IDs in the summary, yielding 

102 them back as a generator. 

103 

104 Parameters 

105 ---------- 

106 dataset_type : `DatasetType` 

107 Dataset type to include. 

108 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ] 

109 Data IDs to include. 

110 

111 Yields 

112 ------ 

113 data_id : `DataCoordinate` 

114 The same data IDs originally passed in. 

115 

116 Notes 

117 ----- 

118 As a generator, this method does nothing if its return iterator is not 

119 used. Call `add_data_ids` instead to avoid this; this method is 

120 intended for the case where the given iterable may be single-pass and a 

121 copy is not desired, but other processing needs to be done on its 

122 elements. 

123 """ 

124 self.dataset_types.add(dataset_type) 

125 for data_id in data_ids: 

126 for gov in data_id.dimensions.governors: 

127 self.governors.setdefault(gov, set()).add(cast(str, data_id[gov])) 

128 yield data_id 

129 

130 def add_data_ids(self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]) -> None: 

131 """Include the given dataset type and data IDs in the summary. 

132 

133 Parameters 

134 ---------- 

135 dataset_type : `DatasetType` 

136 Dataset type to include. 

137 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ] 

138 Data IDs to include. 

139 """ 

140 for _ in self.add_data_ids_generator(dataset_type, data_ids): 

141 pass 

142 

143 def update(self, *args: CollectionSummary) -> None: 

144 """Update this summary with dataset types and governor dimension values 

145 from other summaries. 

146 

147 Parameters 

148 ---------- 

149 *args : `CollectionSummary` 

150 Summaries to include in ``self``. 

151 """ 

152 for arg in args: 

153 self.dataset_types.update(arg.dataset_types) 

154 for gov, values in arg.governors.items(): 

155 self.governors.setdefault(gov, set()).update(values) 

156 

157 def union(*args: CollectionSummary) -> CollectionSummary: 

158 """Construct a summary that contains all dataset types and governor 

159 dimension values in any of the inputs. 

160 

161 Parameters 

162 ---------- 

163 *args : `CollectionSummary` 

164 Summaries to combine. 

165 

166 Returns 

167 ------- 

168 unioned : `CollectionSummary` 

169 New summary object that represents the union of the given ones. 

170 """ 

171 result = CollectionSummary() 

172 result.update(*args) 

173 return result 

174 

175 def is_compatible_with( 

176 self, 

177 dataset_type: DatasetType, 

178 dimensions: Mapping[str, Set[str]], 

179 rejections: list[str] | None = None, 

180 name: str | None = None, 

181 ) -> bool: 

182 """Test whether the collection summarized by this object should be 

183 queried for a given dataset type and governor dimension values. 

184 

185 Parameters 

186 ---------- 

187 dataset_type : `DatasetType` 

188 Dataset type being queried. If this collection has no instances of 

189 this dataset type (or its parent dataset type, if it is a 

190 component), `False` will always be returned. 

191 dimensions : `~collections.abc.Mapping` 

192 Bounds on the values governor dimensions can take in the query, 

193 usually from a WHERE expression, as a mapping from dimension name 

194 to a set of `str` governor dimension values. 

195 rejections : `list` [ `str` ], optional 

196 If provided, a list that will be populated with a log- or 

197 exception-friendly message explaining why this dataset is 

198 incompatible with this collection when `False` is returned. 

199 name : `str`, optional 

200 Name of the collection this object summarizes, for use in messages 

201 appended to ``rejections``. Ignored if ``rejections`` is `None`. 

202 

203 Returns 

204 ------- 

205 compatible : `bool` 

206 `True` if the dataset query described by this summary and the given 

207 arguments might yield non-empty results; `False` if the result from 

208 such a query is definitely empty. 

209 """ 

210 parent = dataset_type if not dataset_type.isComponent() else dataset_type.makeCompositeDatasetType() 

211 if parent.name not in self.dataset_types.names: 

212 if rejections is not None: 

213 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.") 

214 return False 

215 for gov_name in self.governors.keys() & dataset_type.dimensions.names & dimensions.keys(): 

216 values_in_collection = self.governors[gov_name] 

217 values_given = dimensions[gov_name] 

218 if values_in_collection.isdisjoint(values_given): 

219 if rejections is not None: 

220 rejections.append( 

221 f"No datasets with {gov_name} in {values_given} in collection {name!r}." 

222 ) 

223 return False 

224 return True 

225 

226 dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet) 

227 """Dataset types that may be present in the collection 

228 (`NamedValueSet` [ `DatasetType` ]). 

229 

230 A dataset type not in this set is definitely not in the collection, but 

231 the converse is not necessarily true. 

232 """ 

233 

234 governors: dict[str, set[str]] = dataclasses.field(default_factory=dict) 

235 """Governor data ID values that are present in the collection's dataset 

236 data IDs (`dict` [ `str`, `set` [ `str` ] ]). 

237 

238 A data ID value not in this restriction is not necessarily inconsistent 

239 with a query in the collection; such a search may only involve dataset 

240 types that do not include one or more governor dimensions in their data 

241 IDs, and hence the values of those data IDs are unconstrained by this 

242 collection in the query. 

243 """