Coverage for python/lsst/verify/extract_metricvalues.py: 7%

101 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-14 02:16 -0700

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tools for loading metric values from a butler and printing them, or from 

23two butlers and differencing them. 

24 

25These functions are used by the 

26:doc:`print_metricvalues <scripts/print_metricvalues>` script. 

27""" 

28__all__ = ["load_value", "load_timing", "load_memory", 

29 "print_metrics", "print_diff_metrics", "load_from_butler"] 

30 

31import astropy.units as u 

32 

33 

34def print_metrics(butler, kind, *, data_id_keys=None, 

35 data_id_restriction=None, verbose=False): 

36 """Print all metrics with measured values in the given repo. 

37 

38 Parameters 

39 ---------- 

40 butler : `lsst.daf.butler.Butler` 

41 Butler to load values from. 

42 kind : `str` 

43 Kind of metrics to load. 

44 data_id_keys : `collection` [`str`], optional 

45 List of Butler dataId keys to restrict the printed output to; 

46 for example: ``("detector", "visit")``. 

47 data_id_restriction : `dict`, optional 

48 Only include values whose dataId matches these key:value pairs; 

49 for example: ``{"detector": 50}``. If a metric does not use a key, it 

50 is not included. 

51 verbose : `bool`, optional 

52 Print extra information when loading values. 

53 

54 Returns 

55 ------- 

56 output : `str` 

57 A formatted string with all the requested metric values. 

58 """ 

59 def value_formatter_default(value): 

60 return f"{value}" 

61 

62 def value_formatter_timing(value): 

63 return f"{value.datum.label}: {value.quantity:.4}" 

64 

65 def value_formatter_memory(value): 

66 return f"{value.datum.label}: {value.quantity.to(u.Mibyte):.5}" 

67 

68 match kind: 

69 case "value": 

70 result = load_value(butler, verbose=verbose) 

71 value_formatter = value_formatter_default 

72 case "timing": 

73 result = load_timing(butler, verbose=verbose) 

74 value_formatter = value_formatter_timing 

75 case "memory": 

76 result = load_memory(butler, verbose=verbose) 

77 value_formatter = value_formatter_memory 

78 case _: 

79 raise RuntimeError(f"Cannot handle kind={kind}") 

80 

81 old_data_id = None 

82 for (data_id, metric), value in sorted(result.items()): 

83 if not _match_data_id(data_id, data_id_restriction): 

84 continue 

85 if old_data_id != data_id: 

86 print(f"\n{_data_id_label(data_id, data_id_keys)}") 

87 old_data_id = data_id 

88 

89 print(value_formatter(value)) 

90 

91 

92def print_diff_metrics(butler1, butler2, data_id_keys=None, verbose=False): 

93 """Load metric values from two repos and print their differences. 

94 

95 This only supports differencing metrics that aren't time or memory-related. 

96 

97 Parameters 

98 ---------- 

99 butler1, butler2 : `lsst.daf.butler.Butler` 

100 Butlers to load values to difference from. 

101 data_id_keys : `collection` [`str`], optional 

102 List of Butler dataId keys to restrict the printed output to; 

103 for example: ``("detector", "visit")``. If a metric does not use all of 

104 of these keys, it is printed with default formatting. 

105 verbose : `bool`, optional 

106 Print extra information when loading values, and about failures. 

107 """ 

108 result1 = load_value(butler1) 

109 result2 = load_value(butler2) 

110 

111 same = 0 

112 failed = 0 

113 old_data_id = None 

114 for key in sorted(result1): 

115 data_id, metric = key 

116 if old_data_id != data_id: 

117 print(f"\n{_data_id_label(data_id, data_id_keys)}") 

118 old_data_id = data_id 

119 

120 try: 

121 value1 = result1[key] 

122 value2 = result2[key] 

123 except KeyError: 

124 print(f"Result 2 does not contain metric '{metric}'") 

125 failed += 1 

126 continue 

127 

128 delta = value2.quantity - value1.quantity 

129 if delta != 0 or verbose: 

130 print(f"{value1.datum.label}: {delta} / {value1.quantity}") 

131 if delta == 0: 

132 same += 1 

133 

134 print(f"Number of metrics that are the same in both runs: {same} / {len(result2)}") 

135 

136 if failed != 0: 

137 keys1 = sorted(list(result1.keys())) 

138 keys2 = sorted(list(result2.keys())) 

139 print() 

140 print(f"butler1 metrics found: {len(result1)}") 

141 print(f"butler2 metrics found: {len(result2)}") 

142 print(f"metrics in butler1 that were not found in butler2: {failed}") 

143 print("Check that the butler registry schemas are comparable, if most metrics are not being found.") 

144 print("Run with verbose mode (-v) for more info.") 

145 if verbose: 

146 print("Full DataCoordinates for the first key of each result, to compare schemas:") 

147 print(keys1[0][0].full) 

148 print(keys2[0][0].full) 

149 

150 

151def _match_data_id(data_id, data_id_restriction): 

152 """Return True if ``data_id`` matches a non-None ``data_id_restriction``. 

153 """ 

154 if data_id_restriction is None: 

155 return True 

156 for key, value in data_id_restriction.items(): 

157 if key not in data_id or (data_id[key] != value): 

158 return False 

159 return True 

160 

161 

162def _data_id_label(data_id, keys): 

163 """Return a string label for this data_id, optionally restricting the 

164 output to only certain key:value pairs. 

165 

166 If any of the specified keys are not in the data_id, this will return the 

167 default data_id formatting. 

168 """ 

169 if keys is None: 

170 return data_id 

171 

172 if not set(keys).issubset(set(data_id)): 

173 return data_id 

174 

175 return ', '.join(f"{key}: {data_id[key]}" for key in keys) 

176 

177 

178def load_value(butler, verbose=False): 

179 """Load all measured non-time/non-memory metrics in the given butler repo. 

180 

181 Parameters 

182 ---------- 

183 butler : `lsst.daf.butler.Butler` 

184 Butler to load values from. 

185 verbose : `bool`, optional 

186 Print extra information when loading values. 

187 

188 Returns 

189 ------- 

190 result : `dict` [`tuple`, `MetricValue`] 

191 The loaded metric values, keyed on data_id 

192 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

193 """ 

194 return load_from_butler(butler, "metricvalue*", reject_suffix=("Time", "Memory"), verbose=verbose) 

195 

196 

197def load_timing(butler, verbose=False): 

198 """Load all measured timing metrics in the given butler repo. 

199 

200 Parameters 

201 ---------- 

202 butler : `lsst.daf.butler.Butler` 

203 Butler to load values from. 

204 verbose : `bool`, optional 

205 Print extra information when loading values. 

206 

207 Returns 

208 ------- 

209 result : `dict` [`tuple`, `MetricValue`] 

210 The loaded metric values, keyed on data_id 

211 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

212 """ 

213 return load_from_butler(butler, "metricvalue*Time", verbose=verbose) 

214 

215 

216def load_memory(butler, verbose=False): 

217 """Load all measured memory usage metrics in the given butler repo. 

218 

219 Parameters 

220 ---------- 

221 butler : `lsst.daf.butler.Butler` 

222 Butler to load values from. 

223 verbose : `bool`, optional 

224 Print extra information when loading values. 

225 

226 Returns 

227 ------- 

228 result : `dict` [`tuple`, `MetricValue`] 

229 The loaded metric values, keyed on data_id 

230 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

231 """ 

232 return load_from_butler(butler, "metricvalue*Memory", verbose=verbose) 

233 

234 

235def load_from_butler(butler, query, reject_suffix=None, verbose=False): 

236 """ 

237 Parameters 

238 ---------- 

239 butler : `lsst.daf.butler.Butler` 

240 Butler created with the appropriate collections, etc. 

241 query : `str` 

242 Butler dataset query to get the metric names to load. 

243 reject_suffix : `str` or `iterable`, optional 

244 String or tuple of strings to not load if they appear at the end of 

245 the metric name. 

246 verbose : bool, optional 

247 Print extra information when loading. 

248 

249 Returns 

250 ------- 

251 result : `dict` [`tuple`, `MetricValue`] 

252 The loaded metric values, keyed on data_id 

253 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`). 

254 """ 

255 # all possible metrics that have been registered 

256 metrics = list(butler.registry.queryDatasetTypes(query)) 

257 if reject_suffix is not None: 

258 metrics = [m for m in metrics if not m.name.endswith(reject_suffix)] 

259 

260 result = {} 

261 data_ids = set() 

262 for metric in metrics: 

263 # We only want one of each, so we need findFirst. 

264 datasets = set(butler.registry.queryDatasets(metric, findFirst=True)) 

265 for dataset in datasets: 

266 value = butler.get(dataset) 

267 data_ids.add(dataset.dataId) 

268 result[(dataset.dataId, metric.name)] = value 

269 

270 if verbose: 

271 print(f"Loaded {len(result)} values for {len(data_ids)} dataIds and {len(metrics)} metrics.") 

272 return result