Coverage for python/lsst/verify/extract_metricvalues.py: 8%

99 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-16 01:31 -0700

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tools for loading metric values from a butler and printing them, or from 

23two butlers and differencing them. 

24""" 

25__all__ = ["load_value", "load_timing", "load_memory", 

26 "print_metrics", "print_diff_metrics", "load_from_butler"] 

27 

28import astropy.units as u 

29 

30 

31def print_metrics(butler, kind, *, data_id_keys=None, 

32 data_id_restriction=None, verbose=False): 

33 """Print all metrics with measured values in the given repo. 

34 

35 Parameters 

36 ---------- 

37 butler : `lsst.daf.butler.Butler` 

38 Butler to load values from. 

39 kind : `str` 

40 Kind of metrics to load. 

41 data_id_keys : `collection` [`str`], optional 

42 List of Butler dataId keys to restrict the printed output to; 

43 for example: ``("detector", "visit")``. 

44 data_id_restriction : `dict`, optional 

45 Only include values whose dataId matches these key:value pairs; 

46 for example: ``{"detector": 50}``. 

47 verbose : `bool`, optional 

48 Print extra information when loading values. 

49 

50 Returns 

51 ------- 

52 output : `str` 

53 A formatted string with all the requested metric values. 

54 """ 

55 def value_formatter_default(value): 

56 return f"{value}" 

57 

58 def value_formatter_timing(value): 

59 return f"{value.datum.label}: {value.quantity:.4}" 

60 

61 def value_formatter_memory(value): 

62 return f"{value.datum.label}: {value.quantity.to(u.Mibyte):.5}" 

63 

64 match kind: 

65 case "value": 

66 result = load_value(butler, verbose=verbose) 

67 value_formatter = value_formatter_default 

68 case "timing": 

69 result = load_timing(butler, verbose=verbose) 

70 value_formatter = value_formatter_timing 

71 case "memory": 

72 result = load_memory(butler, verbose=verbose) 

73 value_formatter = value_formatter_memory 

74 case _: 

75 raise RuntimeError(f"Cannot handle kind={kind}") 

76 

77 old_data_id = None 

78 for (data_id, metric), value in sorted(result.items()): 

79 if not _match_data_id(data_id, data_id_restriction): 

80 continue 

81 if old_data_id != data_id: 

82 print(f"\n{_data_id_label(data_id, data_id_keys)}") 

83 old_data_id = data_id 

84 

85 print(value_formatter(value)) 

86 

87 

88def print_diff_metrics(butler1, butler2, data_id_keys=None, verbose=False): 

89 """Load metric values from two repos and print their differences. 

90 

91 This only supports differencing metrics that aren't time or memory-related. 

92 

93 Parameters 

94 ---------- 

95 butler1, butler2 : `lsst.daf.butler.Butler` 

96 Butlers to load values to difference from. 

97 data_id_keys : `collection` [`str`], optional 

98 List of Butler dataId keys to restrict the printed output to; 

99 for example: ``("detector", "visit")``. 

100 verbose : `bool`, optional 

101 Print extra information when loading values, and about failures. 

102 """ 

103 result1 = load_value(butler1) 

104 result2 = load_value(butler2) 

105 

106 same = 0 

107 failed = 0 

108 old_data_id = None 

109 for key in sorted(result1): 

110 data_id, metric = key 

111 if old_data_id != data_id: 

112 print(f"\n{_data_id_label(data_id, data_id_keys)}") 

113 old_data_id = data_id 

114 

115 try: 

116 value1 = result1[key] 

117 value2 = result2[key] 

118 except KeyError: 

119 print(f"Result 2 does not contain metric '{metric}'") 

120 failed += 1 

121 continue 

122 

123 delta = value2.quantity - value1.quantity 

124 if delta != 0 or verbose: 

125 print(f"{value1.datum.label}: {delta} / {value1.quantity}") 

126 if delta == 0: 

127 same += 1 

128 

129 print(f"Number of metrics that are the same in both runs: {same} / {len(result2)}") 

130 

131 if failed != 0: 

132 keys1 = sorted(list(result1.keys())) 

133 keys2 = sorted(list(result2.keys())) 

134 print() 

135 print(f"butler1 metrics found: {len(result1)}") 

136 print(f"butler2 metrics found: {len(result2)}") 

137 print(f"metrics in butler1 that were not found in butler2: {failed}") 

138 print("Check that the butler registry schemas are comparable, if most metrics are not being found.") 

139 print("Run with verbose mode (-v) for more info.") 

140 if verbose: 

141 print("Full DataCoordinates for the first key of each result, to compare schemas:") 

142 print(keys1[0][0].full) 

143 print(keys2[0][0].full) 

144 

145 

146def _match_data_id(data_id, data_id_restriction): 

147 """Return True if ``data_id`` matches a non-None ``data_id_restriction``. 

148 """ 

149 if data_id_restriction is None: 

150 return True 

151 for key, value in data_id_restriction.items(): 

152 if not (data_id[key] == value): 

153 return False 

154 return True 

155 

156 

157def _data_id_label(data_id, keys): 

158 """Return a string label for this data_id, optionally restricting the 

159 output to only certain key:value pairs. 

160 """ 

161 if keys is not None: 

162 return ', '.join(f"{key}: {data_id[key]}" for key in keys) 

163 else: 

164 return data_id 

165 

166 

167def load_value(butler, verbose=False): 

168 """Load all measured non-time/non-memory metrics in the given butler repo. 

169 

170 Parameters 

171 ---------- 

172 butler : `lsst.daf.butler.Butler` 

173 Butler to load values from. 

174 verbose : `bool`, optional 

175 Print extra information when loading values. 

176 

177 Returns 

178 ------- 

179 result : `dict` [`tuple`, `MetricValue`] 

180 The loaded metric values, keyed on data_id 

181 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

182 """ 

183 return load_from_butler(butler, "metricvalue*", reject_suffix=("Time", "Memory"), verbose=verbose) 

184 

185 

186def load_timing(butler, verbose=False): 

187 """Load all measured timing metrics in the given butler repo. 

188 

189 Parameters 

190 ---------- 

191 butler : `lsst.daf.butler.Butler` 

192 Butler to load values from. 

193 verbose : `bool`, optional 

194 Print extra information when loading values. 

195 

196 Returns 

197 ------- 

198 result : `dict` [`tuple`, `MetricValue`] 

199 The loaded metric values, keyed on data_id 

200 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

201 """ 

202 return load_from_butler(butler, "metricvalue*Time", verbose=verbose) 

203 

204 

205def load_memory(butler, verbose=False): 

206 """Load all measured memory usage metrics in the given butler repo. 

207 

208 Parameters 

209 ---------- 

210 butler : `lsst.daf.butler.Butler` 

211 Butler to load values from. 

212 verbose : `bool`, optional 

213 Print extra information when loading values. 

214 

215 Returns 

216 ------- 

217 result : `dict` [`tuple`, `MetricValue`] 

218 The loaded metric values, keyed on data_id 

219 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

220 """ 

221 return load_from_butler(butler, "metricvalue*Memory", verbose=verbose) 

222 

223 

224def load_from_butler(butler, query, reject_suffix=None, verbose=False): 

225 """ 

226 Parameters 

227 ---------- 

228 butler : `lsst.daf.butler.Butler` 

229 Butler created with the appropriate collections, etc. 

230 query : `str` 

231 Butler dataset query to get the metric names to load. 

232 reject_suffix : `str` or `iterable`, optional 

233 String or tuple of strings to not load if they appear at the end of 

234 the metric name. 

235 verbose : bool, optional 

236 Print extra information when loading. 

237 

238 Returns 

239 ------- 

240 result : `dict` [`tuple`, `MetricValue`] 

241 The loaded metric values, keyed on data_id 

242 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

243 """ 

244 # all possible metrics that have been registered 

245 metrics = list(butler.registry.queryDatasetTypes(query)) 

246 if reject_suffix is not None: 

247 metrics = [m for m in metrics if not m.name.endswith(reject_suffix)] 

248 

249 result = {} 

250 data_ids = set() 

251 for metric in metrics: 

252 # We only want one of each, so we need findFirst. 

253 datasets = set(butler.registry.queryDatasets(metric, findFirst=True)) 

254 for dataset in datasets: 

255 value = butler.getDirect(dataset) 

256 data_ids.add(dataset.dataId) 

257 result[(dataset.dataId, metric.name)] = value 

258 

259 if verbose: 

260 print(f"Loaded {len(result)} values for {len(data_ids)} dataIds and {len(metrics)} metrics.") 

261 return result