Coverage for python/lsst/daf/butler/registry/_collection_summary.py: 26%

55 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-23 03:00 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("CollectionSummary",) 

24 

25import dataclasses 

26from typing import AbstractSet, Generator, Iterable, List, Mapping, Optional, cast 

27 

28from ..core import DataCoordinate, DatasetRef, DatasetType 

29from ..core.named import NamedValueSet 

30 

31 

32@dataclasses.dataclass 

33class CollectionSummary: 

34 """A summary of the datasets that can be found in a collection.""" 

35 

36 def copy(self) -> CollectionSummary: 

37 """Return a deep copy of this object. 

38 

39 Returns 

40 ------- 

41 copy : `CollectionSummary` 

42 A copy of ``self`` that can be modified without modifying ``self`` 

43 at all. 

44 """ 

45 return CollectionSummary( 

46 dataset_types=self.dataset_types.copy(), 

47 governors={k: v.copy() for k, v in self.governors.items()}, 

48 ) 

49 

50 def add_datasets_generator(self, refs: Iterable[DatasetRef]) -> Generator[DatasetRef, None, None]: 

51 """Include the given datasets in the summary, yielding them back as a 

52 generator. 

53 

54 Parameters 

55 ---------- 

56 refs : `Iterable` [ `DatasetRef` ] 

57 Datasets to include. 

58 

59 Yields 

60 ------ 

61 ref : `DatasetRef` 

62 The same dataset references originally passed in. 

63 

64 Notes 

65 ----- 

66 As a generator, this method does nothing if its return iterator is not 

67 used. Call `add_datasets` instead to avoid this; this method is 

68 intended for the case where the given iterable may be single-pass and a 

69 copy is not desired, but other processing needs to be done on its 

70 elements. 

71 """ 

72 for ref in refs: 

73 self.dataset_types.add(ref.datasetType) 

74 for gov in ref.dataId.graph.governors.names: 

75 self.governors.setdefault(gov, set()).add(cast(str, ref.dataId[gov])) 

76 yield ref 

77 

78 def add_datasets(self, refs: Iterable[DatasetRef]) -> None: 

79 """Include the given datasets in the summary. 

80 

81 Parameters 

82 ---------- 

83 refs : `Iterable` [ `DatasetRef` ] 

84 Datasets to include. 

85 """ 

86 for _ in self.add_datasets_generator(refs): 

87 pass 

88 

89 def add_data_ids_generator( 

90 self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate] 

91 ) -> Generator[DataCoordinate, None, None]: 

92 """Include the given dataset type and data IDs in the summary, yielding 

93 them back as a generator. 

94 

95 Parameters 

96 ---------- 

97 dataset_type : `DatasetType` 

98 Dataset type to include. 

99 data_ids : `Iterable` [ `DataCoordinate` ] 

100 Data IDs to include. 

101 

102 Yields 

103 ------ 

104 data_id : `DataCoordinate` 

105 The same data IDs originally passed in. 

106 

107 Notes 

108 ----- 

109 As a generator, this method does nothing if its return iterator is not 

110 used. Call `add_data_ids` instead to avoid this; this method is 

111 intended for the case where the given iterable may be single-pass and a 

112 copy is not desired, but other processing needs to be done on its 

113 elements. 

114 """ 

115 self.dataset_types.add(dataset_type) 

116 for data_id in data_ids: 

117 for gov in data_id.graph.governors.names: 

118 self.governors.setdefault(gov, set()).add(cast(str, data_id[gov])) 

119 yield data_id 

120 

121 def add_data_ids(self, dataset_type: DatasetType, data_ids: Iterable[DataCoordinate]) -> None: 

122 """Include the given dataset type and data IDs in the summary. 

123 

124 Parameters 

125 ---------- 

126 dataset_type : `DatasetType` 

127 Dataset type to include. 

128 data_ids : `Iterable` [ `DataCoordinate` ] 

129 Data IDs to include. 

130 """ 

131 for _ in self.add_data_ids_generator(dataset_type, data_ids): 

132 pass 

133 

134 def update(self, *args: CollectionSummary) -> None: 

135 """Update this summary with dataset types and governor dimension values 

136 from other summaries. 

137 

138 Parameters 

139 ---------- 

140 *args : `CollectionSummary` 

141 Summaries to include in ``self``. 

142 """ 

143 for arg in args: 

144 self.dataset_types.update(arg.dataset_types) 

145 for gov, values in arg.governors.items(): 

146 self.governors.setdefault(gov, set()).update(values) 

147 

148 def union(*args: CollectionSummary) -> CollectionSummary: 

149 """Construct a summary that contains all dataset types and governor 

150 dimension values in any of the inputs. 

151 

152 Parameters 

153 ---------- 

154 *args : `CollectionSummary` 

155 Summaries to combine. 

156 

157 Returns 

158 ------- 

159 unioned : `CollectionSummary` 

160 New summary object that represents the union of the given ones. 

161 """ 

162 result = CollectionSummary() 

163 result.update(*args) 

164 return result 

165 

166 def is_compatible_with( 

167 self, 

168 dataset_type: DatasetType, 

169 dimensions: Mapping[str, AbstractSet[str]], 

170 rejections: Optional[List[str]] = None, 

171 name: Optional[str] = None, 

172 ) -> bool: 

173 """Test whether the collection summarized by this object should be 

174 queried for a given dataset type and governor dimension values. 

175 

176 Parameters 

177 ---------- 

178 dataset_type : `DatasetType` 

179 Dataset type being queried. If this collection has no instances of 

180 this dataset type (or its parent dataset type, if it is a 

181 component), `False` will always be returned. 

182 dimensions : `Mapping` 

183 Bounds on the values governor dimensions can take in the query, 

184 usually from a WHERE expression, as a mapping from dimension name 

185 to a set of `str` governor dimension values. 

186 rejections : `list` [ `str` ], optional 

187 If provided, a list that will be populated with a log- or 

188 exception-friendly message explaining why this dataset is 

189 incompatible with this collection when `False` is returned. 

190 name : `str`, optional 

191 Name of the collection this object summarizes, for use in messages 

192 appended to ``rejections``. Ignored if ``rejections`` is `None`. 

193 

194 Returns 

195 ------- 

196 compatible : `bool` 

197 `True` if the dataset query described by this summary and the given 

198 arguments might yield non-empty results; `False` if the result from 

199 such a query is definitely empty. 

200 """ 

201 parent = dataset_type if not dataset_type.isComponent() else dataset_type.makeCompositeDatasetType() 

202 if parent.name not in self.dataset_types.names: 

203 if rejections is not None: 

204 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.") 

205 return False 

206 for gov_name in self.governors.keys() & dataset_type.dimensions.names & dimensions.keys(): 

207 values_in_collection = self.governors[gov_name] 

208 values_given = dimensions[gov_name] 

209 if values_in_collection.isdisjoint(values_given): 

210 if rejections is not None: 

211 rejections.append( 

212 f"No datasets with {gov_name} in {values_given} in collection {name!r}." 

213 ) 

214 return False 

215 return True 

216 

217 dataset_types: NamedValueSet[DatasetType] = dataclasses.field(default_factory=NamedValueSet) 

218 """Dataset types that may be present in the collection 

219 (`NamedValueSet` [ `DatasetType` ]). 

220 

221 A dataset type not in this set is definitely not in the collection, but 

222 the converse is not necessarily true. 

223 """ 

224 

225 governors: dict[str, set[str]] = dataclasses.field(default_factory=dict) 

226 """Governor data ID values that are present in the collection's dataset 

227 data IDs (`dict` [ `str`, `set` [ `str` ] ]). 

228 

229 A data ID value not in this restriction is not necessarily inconsistent 

230 with a query in the collection; such a search may only involve dataset 

231 types that do not include one or more governor dimensions in their data 

232 IDs, and hence the values of those data IDs are unconstrained by this 

233 collection in the query. 

234 """