Coverage for python/lsst/verify/extract_metricvalues.py: 7%
101 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:54 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:54 +0000
1# This file is part of verify.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tools for loading metric values from a butler and printing them, or from
23two butlers and differencing them.
25These functions are used by the
26:doc:`print_metricvalues <scripts/print_metricvalues>` script.
27"""
28__all__ = ["load_value", "load_timing", "load_memory",
29 "print_metrics", "print_diff_metrics", "load_from_butler"]
31import astropy.units as u
34def print_metrics(butler, kind, *, data_id_keys=None,
35 data_id_restriction=None, verbose=False):
36 """Print all metrics with measured values in the given repo.
38 Parameters
39 ----------
40 butler : `lsst.daf.butler.Butler`
41 Butler to load values from.
42 kind : `str`
43 Kind of metrics to load.
44 data_id_keys : `collection` [`str`], optional
45 List of Butler dataId keys to restrict the printed output to;
46 for example: ``("detector", "visit")``.
47 data_id_restriction : `dict`, optional
48 Only include values whose dataId matches these key:value pairs;
49 for example: ``{"detector": 50}``. If a metric does not use a key, it
50 is not included.
51 verbose : `bool`, optional
52 Print extra information when loading values.
54 Returns
55 -------
56 output : `str`
57 A formatted string with all the requested metric values.
58 """
59 def value_formatter_default(value):
60 return f"{value}"
62 def value_formatter_timing(value):
63 return f"{value.datum.label}: {value.quantity:.4}"
65 def value_formatter_memory(value):
66 return f"{value.datum.label}: {value.quantity.to(u.Mibyte):.5}"
68 match kind:
69 case "value":
70 result = load_value(butler, verbose=verbose)
71 value_formatter = value_formatter_default
72 case "timing":
73 result = load_timing(butler, verbose=verbose)
74 value_formatter = value_formatter_timing
75 case "memory":
76 result = load_memory(butler, verbose=verbose)
77 value_formatter = value_formatter_memory
78 case _:
79 raise RuntimeError(f"Cannot handle kind={kind}")
81 old_data_id = None
82 for (data_id, metric), value in sorted(result.items()):
83 if not _match_data_id(data_id, data_id_restriction):
84 continue
85 if old_data_id != data_id:
86 print(f"\n{_data_id_label(data_id, data_id_keys)}")
87 old_data_id = data_id
89 print(value_formatter(value))
92def print_diff_metrics(butler1, butler2, data_id_keys=None, verbose=False):
93 """Load metric values from two repos and print their differences.
95 This only supports differencing metrics that aren't time or memory-related.
97 Parameters
98 ----------
99 butler1, butler2 : `lsst.daf.butler.Butler`
100 Butlers to load values to difference from.
101 data_id_keys : `collection` [`str`], optional
102 List of Butler dataId keys to restrict the printed output to;
103 for example: ``("detector", "visit")``. If a metric does not use all of
104 of these keys, it is printed with default formatting.
105 verbose : `bool`, optional
106 Print extra information when loading values, and about failures.
107 """
108 result1 = load_value(butler1)
109 result2 = load_value(butler2)
111 same = 0
112 failed = 0
113 old_data_id = None
114 for key in sorted(result1):
115 data_id, metric = key
116 if old_data_id != data_id:
117 print(f"\n{_data_id_label(data_id, data_id_keys)}")
118 old_data_id = data_id
120 try:
121 value1 = result1[key]
122 value2 = result2[key]
123 except KeyError:
124 print(f"Result 2 does not contain metric '{metric}'")
125 failed += 1
126 continue
128 delta = value2.quantity - value1.quantity
129 if delta != 0 or verbose:
130 print(f"{value1.datum.label}: {value2.quantity} - {value1.quantity} = {delta}")
131 if delta == 0:
132 same += 1
134 print(f"Number of metrics that are the same in both runs: {same} / {len(result2)}")
136 if failed != 0:
137 keys1 = sorted(list(result1.keys()))
138 keys2 = sorted(list(result2.keys()))
139 print()
140 print(f"butler1 metrics found: {len(result1)}")
141 print(f"butler2 metrics found: {len(result2)}")
142 print(f"metrics in butler1 that were not found in butler2: {failed}")
143 print("Check that the butler registry schemas are comparable, if most metrics are not being found.")
144 print("Run with verbose mode (-v) for more info.")
145 if verbose:
146 print("Full DataCoordinates for the first key of each result, to compare schemas:")
147 print(keys1[0][0].full)
148 print(keys2[0][0].full)
151def _match_data_id(data_id, data_id_restriction):
152 """Return True if ``data_id`` matches a non-None ``data_id_restriction``.
153 """
154 if data_id_restriction is None:
155 return True
156 for key, value in data_id_restriction.items():
157 if key not in data_id or (data_id[key] != value):
158 return False
159 return True
162def _data_id_label(data_id, keys):
163 """Return a string label for this data_id, optionally restricting the
164 output to only certain key:value pairs.
166 If any of the specified keys are not in the data_id, this will return the
167 default data_id formatting.
168 """
169 if keys is None:
170 return data_id
172 if not set(keys).issubset(set(data_id)):
173 return data_id
175 return ', '.join(f"{key}: {data_id[key]}" for key in keys)
178def load_value(butler, verbose=False):
179 """Load all measured non-time/non-memory metrics in the given butler repo.
181 Parameters
182 ----------
183 butler : `lsst.daf.butler.Butler`
184 Butler to load values from.
185 verbose : `bool`, optional
186 Print extra information when loading values.
188 Returns
189 -------
190 result : `dict` [`tuple`, `MetricValue`]
191 The loaded metric values, keyed on data_id
192 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
193 """
194 return load_from_butler(butler, "metricvalue*", reject_suffix=("Time", "Memory"), verbose=verbose)
197def load_timing(butler, verbose=False):
198 """Load all measured timing metrics in the given butler repo.
200 Parameters
201 ----------
202 butler : `lsst.daf.butler.Butler`
203 Butler to load values from.
204 verbose : `bool`, optional
205 Print extra information when loading values.
207 Returns
208 -------
209 result : `dict` [`tuple`, `MetricValue`]
210 The loaded metric values, keyed on data_id
211 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
212 """
213 return load_from_butler(butler, "metricvalue*Time", verbose=verbose)
216def load_memory(butler, verbose=False):
217 """Load all measured memory usage metrics in the given butler repo.
219 Parameters
220 ----------
221 butler : `lsst.daf.butler.Butler`
222 Butler to load values from.
223 verbose : `bool`, optional
224 Print extra information when loading values.
226 Returns
227 -------
228 result : `dict` [`tuple`, `MetricValue`]
229 The loaded metric values, keyed on data_id
230 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
231 """
232 return load_from_butler(butler, "metricvalue*Memory", verbose=verbose)
235def load_from_butler(butler, query, reject_suffix=None, verbose=False):
236 """
237 Parameters
238 ----------
239 butler : `lsst.daf.butler.Butler`
240 Butler created with the appropriate collections, etc.
241 query : `str`
242 Butler dataset query to get the metric names to load.
243 reject_suffix : `str` or `iterable`, optional
244 String or tuple of strings to not load if they appear at the end of
245 the metric name.
246 verbose : bool, optional
247 Print extra information when loading.
249 Returns
250 -------
251 result : `dict` [`tuple`, `MetricValue`]
252 The loaded metric values, keyed on data_id
253 (`~lsst.daf.butler.DataCoordiate`) and metric name (`str`).
254 """
255 # all possible metrics that have been registered
256 metrics = list(butler.registry.queryDatasetTypes(query))
257 if reject_suffix is not None:
258 metrics = [m for m in metrics if not m.name.endswith(reject_suffix)]
260 result = {}
261 data_ids = set()
262 for metric in metrics:
263 # We only want one of each, so we need findFirst.
264 datasets = set(butler.registry.queryDatasets(metric, findFirst=True))
265 for dataset in datasets:
266 value = butler.get(dataset)
267 data_ids.add(dataset.dataId)
268 result[(dataset.dataId, metric.name)] = value
270 if verbose:
271 print(f"Loaded {len(result)} values for {len(data_ids)} dataIds and {len(metrics)} metrics.")
272 return result