Coverage for python/lsst/verify/extract_metricvalues.py: 8%
99 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-20 01:56 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-20 01:56 -0700
1# This file is part of verify.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tools for loading metric values from a butler and printing them, or from
23two butlers and differencing them.
24"""
25__all__ = ["load_value", "load_timing", "load_memory",
26 "print_metrics", "print_diff_metrics", "load_from_butler"]
28import astropy.units as u
31def print_metrics(butler, kind, *, data_id_keys=None,
32 data_id_restriction=None, verbose=False):
33 """Print all metrics with measured values in the given repo.
35 Parameters
36 ----------
37 butler : `lsst.daf.butler.Butler`
38 Butler to load values from.
39 kind : `str`
40 Kind of metrics to load.
41 data_id_keys : `collection` [`str`], optional
42 List of Butler dataId keys to restrict the printed output to;
43 for example: ``("detector", "visit")``.
44 data_id_restriction : `dict`, optional
45 Only include values whose dataId matches these key:value pairs;
46 for example: ``{"detector": 50}``.
47 verbose : `bool`, optional
48 Print extra information when loading values.
50 Returns
51 -------
52 output : `str`
53 A formatted string with all the requested metric values.
54 """
55 def value_formatter_default(value):
56 return f"{value}"
58 def value_formatter_timing(value):
59 return f"{value.datum.label}: {value.quantity:.4}"
61 def value_formatter_memory(value):
62 return f"{value.datum.label}: {value.quantity.to(u.Mibyte):.5}"
64 match kind:
65 case "value":
66 result = load_value(butler, verbose=verbose)
67 value_formatter = value_formatter_default
68 case "timing":
69 result = load_timing(butler, verbose=verbose)
70 value_formatter = value_formatter_timing
71 case "memory":
72 result = load_memory(butler, verbose=verbose)
73 value_formatter = value_formatter_memory
74 case _:
75 raise RuntimeError(f"Cannot handle kind={kind}")
77 old_data_id = None
78 for (data_id, metric), value in sorted(result.items()):
79 if not _match_data_id(data_id, data_id_restriction):
80 continue
81 if old_data_id != data_id:
82 print(f"\n{_data_id_label(data_id, data_id_keys)}")
83 old_data_id = data_id
85 print(value_formatter(value))
88def print_diff_metrics(butler1, butler2, data_id_keys=None, verbose=False):
89 """Load metric values from two repos and print their differences.
91 This only supports differencing metrics that aren't time or memory-related.
93 Parameters
94 ----------
95 butler1, butler2 : `lsst.daf.butler.Butler`
96 Butlers to load values to difference from.
97 data_id_keys : `collection` [`str`], optional
98 List of Butler dataId keys to restrict the printed output to;
99 for example: ``("detector", "visit")``.
100 verbose : `bool`, optional
101 Print extra information when loading values, and about failures.
102 """
103 result1 = load_value(butler1)
104 result2 = load_value(butler2)
106 same = 0
107 failed = 0
108 old_data_id = None
109 for key in sorted(result1):
110 data_id, metric = key
111 if old_data_id != data_id:
112 print(f"\n{_data_id_label(data_id, data_id_keys)}")
113 old_data_id = data_id
115 try:
116 value1 = result1[key]
117 value2 = result2[key]
118 except KeyError:
119 print(f"Result 2 does not contain metric '{metric}'")
120 failed += 1
121 continue
123 delta = value2.quantity - value1.quantity
124 if delta != 0 or verbose:
125 print(f"{value1.datum.label}: {delta} / {value1.quantity}")
126 if delta == 0:
127 same += 1
129 print(f"Number of metrics that are the same in both runs: {same} / {len(result2)}")
131 if failed != 0:
132 keys1 = sorted(list(result1.keys()))
133 keys2 = sorted(list(result2.keys()))
134 print()
135 print(f"butler1 metrics found: {len(result1)}")
136 print(f"butler2 metrics found: {len(result2)}")
137 print(f"metrics in butler1 that were not found in butler2: {failed}")
138 print("Check that the butler registry schemas are comparable, if most metrics are not being found.")
139 print("Run with verbose mode (-v) for more info.")
140 if verbose:
141 print("Full DataCoordinates for the first key of each result, to compare schemas:")
142 print(keys1[0][0].full)
143 print(keys2[0][0].full)
146def _match_data_id(data_id, data_id_restriction):
147 """Return True if ``data_id`` matches a non-None ``data_id_restriction``.
148 """
149 if data_id_restriction is None:
150 return True
151 for key, value in data_id_restriction.items():
152 if not (data_id[key] == value):
153 return False
154 return True
157def _data_id_label(data_id, keys):
158 """Return a string label for this data_id, optionally restricting the
159 output to only certain key:value pairs.
160 """
161 if keys is not None:
162 return ', '.join(f"{key}: {data_id[key]}" for key in keys)
163 else:
164 return data_id
167def load_value(butler, verbose=False):
168 """Load all measured non-time/non-memory metrics in the given butler repo.
170 Parameters
171 ----------
172 butler : `lsst.daf.butler.Butler`
173 Butler to load values from.
174 verbose : `bool`, optional
175 Print extra information when loading values.
177 Returns
178 -------
179 result : `dict` [`tuple`, `MetricValue`]
180 The loaded metric values, keyed on data_id
181 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
182 """
183 return load_from_butler(butler, "metricvalue*", reject_suffix=("Time", "Memory"), verbose=verbose)
186def load_timing(butler, verbose=False):
187 """Load all measured timing metrics in the given butler repo.
189 Parameters
190 ----------
191 butler : `lsst.daf.butler.Butler`
192 Butler to load values from.
193 verbose : `bool`, optional
194 Print extra information when loading values.
196 Returns
197 -------
198 result : `dict` [`tuple`, `MetricValue`]
199 The loaded metric values, keyed on data_id
200 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
201 """
202 return load_from_butler(butler, "metricvalue*Time", verbose=verbose)
205def load_memory(butler, verbose=False):
206 """Load all measured memory usage metrics in the given butler repo.
208 Parameters
209 ----------
210 butler : `lsst.daf.butler.Butler`
211 Butler to load values from.
212 verbose : `bool`, optional
213 Print extra information when loading values.
215 Returns
216 -------
217 result : `dict` [`tuple`, `MetricValue`]
218 The loaded metric values, keyed on data_id
219 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
220 """
221 return load_from_butler(butler, "metricvalue*Memory", verbose=verbose)
224def load_from_butler(butler, query, reject_suffix=None, verbose=False):
225 """
226 Parameters
227 ----------
228 butler : `lsst.daf.butler.Butler`
229 Butler created with the appropriate collections, etc.
230 query : `str`
231 Butler dataset query to get the metric names to load.
232 reject_suffix : `str` or `iterable`, optional
233 String or tuple of strings to not load if they appear at the end of
234 the metric name.
235 verbose : bool, optional
236 Print extra information when loading.
238 Returns
239 -------
240 result : `dict` [`tuple`, `MetricValue`]
241 The loaded metric values, keyed on data_id
242 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
243 """
244 # all possible metrics that have been registered
245 metrics = list(butler.registry.queryDatasetTypes(query))
246 if reject_suffix is not None:
247 metrics = [m for m in metrics if not m.name.endswith(reject_suffix)]
249 result = {}
250 data_ids = set()
251 for metric in metrics:
252 # We only want one of each, so we need findFirst.
253 datasets = set(butler.registry.queryDatasets(metric, findFirst=True))
254 for dataset in datasets:
255 value = butler.getDirect(dataset)
256 data_ids.add(dataset.dataId)
257 result[(dataset.dataId, metric.name)] = value
259 if verbose:
260 print(f"Loaded {len(result)} values for {len(data_ids)} dataIds and {len(metrics)} metrics.")
261 return result