Coverage for python/lsst/daf/butler/registry/_collection_summary.py: 27%

56 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("CollectionSummary",) 

30 

31import dataclasses 

32from collections.abc import Generator, Iterable, Mapping, Set 

33from typing import cast 

34 

35from ..core import DataCoordinate, DatasetRef, DatasetType 

36from ..core.named import NamedValueSet 

37 

38 

39@dataclasses.dataclass 

40class CollectionSummary: 

41 """A summary of the datasets that can be found in a collection.""" 

42 

43 def copy(self) -> CollectionSummary: 

44 """Return a deep copy of this object. 

45 

46 Returns 

47 ------- 

48 copy : `CollectionSummary` 

49 A copy of ``self`` that can be modified without modifying ``self`` 

50 at all. 

51 """ 

52 return CollectionSummary( 

53 dataset_types=self.dataset_types.copy(), 

54 governors={k: v.copy() for k, v in self.governors.items()}, 

55 ) 

56 

57 def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]: 

58 """Include the given datasets in the summary, yielding them back as a 

59 generator. 

60 

61 Parameters 

62 ---------- 

63 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

64 Datasets to include. 

65 

66 Yields 

67 ------ 

68 ref : `DatasetRef` 

69 The same dataset references originally passed in. 

70 

71 Notes 

72 ----- 

73 As a generator, this method does nothing if its return iterator is not 

74 used. Call `add_datasets` instead to avoid this; this method is 

75 intended for the case where the given iterable may be single-pass and a 

76 copy is not desired, but other processing needs to be done on its 

77 elements. 

78 """ 

79 for ref in refs: 

80 self.dataset_types.add(ref.datasetType) 

81 for gov in ref.dataId.graph.governors.names: 

82 self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov])) 

83 yield ref 

84 

85 def add_datasets(self, refs: Iterable[DatasetRef]) -> None: 

86 """Include the given datasets in the summary. 

87 

88 Parameters 

89 ---------- 

90 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

91 Datasets to include. 

92 """ 

93 for _ in self.add_datasets_generator(refs): 

94 pass 

95 

96 def add_data_ids_generator( 

97 self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate] 

98 ) -> Generator[DataCoordinate, None, None]: 

99 """Include the given dataset type and data IDs in the summary, yielding 

100 them back as a generator. 

101 

102 Parameters 

103 ---------- 

104 dataset_type : `DatasetType` 

105 Dataset type to include. 

106 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ] 

107 Data IDs to include. 

108 

109 Yields 

110 ------ 

111 data_id : `DataCoordinate` 

112 The same data IDs originally passed in. 

113 

114 Notes 

115 ----- 

116 As a generator, this method does nothing if its return iterator is not 

117 used. Call `add_data_ids` instead to avoid this; this method is 

118 intended for the case where the given iterable may be single-pass and a 

119 copy is not desired, but other processing needs to be done on its 

120 elements. 

121 """ 

122 self.dataset_types.add(dataset_type) 

123 for data_id in data_ids: 

124 for gov in data_id.graph.governors.names: 

125 self.governors.setdefault(gov, set()).add(cast(str, data_id[gov])) 

126 yield data_id 

127 

128 def add_data_ids(self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]) -> None: 

129 """Include the given dataset type and data IDs in the summary. 

130 

131 Parameters 

132 ---------- 

133 dataset_type : `DatasetType` 

134 Dataset type to include. 

135 data_ids : `~collections.abc.Iterable` [ `DataCoordinate` ] 

136 Data IDs to include. 

137 """ 

138 for _ in self.add_data_ids_generator(dataset_type, data_ids): 

139 pass 

140 

141 def update(self, *args: CollectionSummary) -> None: 

142 """Update this summary with dataset types and governor dimension values 

143 from other summaries. 

144 

145 Parameters 

146 ---------- 

147 *args : `CollectionSummary` 

148 Summaries to include in ``self``. 

149 """ 

150 for arg in args: 

151 self.dataset_types.update(arg.dataset_types) 

152 for gov, values in arg.governors.items(): 

153 self.governors.setdefault(gov, set()).update(values) 

154 

155 def union(*args: CollectionSummary) -> CollectionSummary: 

156 """Construct a summary that contains all dataset types and governor 

157 dimension values in any of the inputs. 

158 

159 Parameters 

160 ---------- 

161 *args : `CollectionSummary` 

162 Summaries to combine. 

163 

164 Returns 

165 ------- 

166 unioned : `CollectionSummary` 

167 New summary object that represents the union of the given ones. 

168 """ 

169 result = CollectionSummary() 

170 result.update(*args) 

171 return result 

172 

173 def is_compatible_with( 

174 self, 

175 dataset_type: DatasetType, 

176 dimensions: Mapping[str, Set[str]], 

177 rejections: list[str] | None = None, 

178 name: str | None = None, 

179 ) -> bool: 

180 """Test whether the collection summarized by this object should be 

181 queried for a given dataset type and governor dimension values. 

182 

183 Parameters 

184 ---------- 

185 dataset_type : `DatasetType` 

186 Dataset type being queried. If this collection has no instances of 

187 this dataset type (or its parent dataset type, if it is a 

188 component), `False` will always be returned. 

189 dimensions : `~collections.abc.Mapping` 

190 Bounds on the values governor dimensions can take in the query, 

191 usually from a WHERE expression, as a mapping from dimension name 

192 to a set of `str` governor dimension values. 

193 rejections : `list` [ `str` ], optional 

194 If provided, a list that will be populated with a log- or 

195 exception-friendly message explaining why this dataset is 

196 incompatible with this collection when `False` is returned. 

197 name : `str`, optional 

198 Name of the collection this object summarizes, for use in messages 

199 appended to ``rejections``. Ignored if ``rejections`` is `None`. 

200 

201 Returns 

202 ------- 

203 compatible : `bool` 

204 `True` if the dataset query described by this summary and the given 

205 arguments might yield non-empty results; `False` if the result from 

206 such a query is definitely empty. 

207 """ 

208 parent = dataset_type if not dataset_type.isComponent() else dataset_type.makeCompositeDatasetType() 

209 if parent.name not in self.dataset_types.names: 

210 if rejections is not None: 

211 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.") 

212 return False 

213 for gov_name in self.governors.keys() & dataset_type.dimensions.names & dimensions.keys(): 

214 values_in_collection = self.governors[gov_name] 

215 values_given = dimensions[gov_name] 

216 if values_in_collection.isdisjoint(values_given): 

217 if rejections is not None: 

218 rejections.append( 

219 f"No datasets with {gov_name} in {values_given} in collection {name!r}." 

220 ) 

221 return False 

222 return True 

223 

224 dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet) 

225 """Dataset types that may be present in the collection 

226 (`NamedValueSet` [ `DatasetType` ]). 

227 

228 A dataset type not in this set is definitely not in the collection, but 

229 the converse is not necessarily true. 

230 """ 

231 

232 governors: dict[str, set[str]] = dataclasses.field(default_factory=dict) 

233 """Governor data ID values that are present in the collection's dataset 

234 data IDs (`dict` [ `str`, `set` [ `str` ] ]). 

235 

236 A data ID value not in this restriction is not necessarily inconsistent 

237 with a query in the collection; such a search may only involve dataset 

238 types that do not include one or more governor dimensions in their data 

239 IDs, and hence the values of those data IDs are unconstrained by this 

240 collection in the query. 

241 """