Coverage for python/lsst/ctrl/bps/bps_reports.py: 17%
146 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 10:06 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 10:06 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Classes and functions used in reporting run status.
29"""
31__all__ = ["BaseRunReport", "DetailedRunReport", "SummaryRunReport", "ExitCodesReport"]
33import abc
34import logging
36from astropy.table import Table
38from .wms_service import WmsStates
40_LOG = logging.getLogger(__name__)
43class BaseRunReport(abc.ABC):
44 """The base class representing a run report.
46 Parameters
47 ----------
48 fields : `list` [ `tuple` [ `str`, `str`]]
49 The list of column specification, fields, to include in the report.
50 Each field has a name and a type.
51 """
53 def __init__(self, fields):
54 self._table = Table(dtype=fields)
55 self._msg = None
57 def __eq__(self, other):
58 if isinstance(other, BaseRunReport):
59 return all(self._table == other._table)
60 return False
62 def __len__(self):
63 """Return the number of runs in the report."""
64 return len(self._table)
66 def __str__(self):
67 lines = list(self._table.pformat_all())
68 return "\n".join(lines)
70 @property
71 def message(self):
72 """Extra information a method need to pass to its caller (`str`)."""
73 return self._msg
75 def clear(self):
76 """Remove all entries from the report."""
77 self._msg = None
78 self._table.remove_rows(slice(len(self)))
80 def sort(self, columns, ascending=True):
81 """Sort the report entries according to one or more keys.
83 Parameters
84 ----------
85 columns : `str` | `list` [ `str` ]
86 The column(s) to order the report by.
87 ascending : `bool`, optional
88 Sort report entries in ascending order, default.
90 Raises
91 ------
92 AttributeError
93 Raised if supplied with non-existent column(s).
94 """
95 if isinstance(columns, str):
96 columns = [columns]
97 unknown_keys = set(columns) - set(self._table.colnames)
98 if unknown_keys:
99 raise AttributeError(
100 f"cannot sort the report entries: column(s) {', '.join(unknown_keys)} not found"
101 )
102 self._table.sort(keys=columns, reverse=not ascending)
104 @classmethod
105 def from_table(cls, table):
106 """Create a report from a table.
108 Parameters
109 ----------
110 table : `astropy.table.Table`
111 Information about a run in a tabular form.
113 Returns
114 -------
115 inst : `lsst.ctrl.bps.bps_reports.BaseRunReport`
116 A report created based on the information in the provided table.
117 """
118 inst = cls(table.dtype.descr)
119 inst._table = table.copy()
120 return inst
122 @abc.abstractmethod
123 def add(self, run_report, use_global_id=False):
124 """Add a single run info to the report.
126 Parameters
127 ----------
128 run_report : `lsst.ctrl.bps.WmsRunReport`
129 Information for single run.
130 use_global_id : `bool`, optional
131 If set, use global run id. Defaults to False which means that
132 the local id will be used instead.
134 Only applicable in the context of a WMS using distributed job
135 queues (e.g., HTCondor).
136 """
139class SummaryRunReport(BaseRunReport):
140 """A summary run report."""
142 def add(self, run_report, use_global_id=False):
143 # Docstring inherited from the base class.
145 # Flag any running workflow that might need human attention.
146 run_flag = " "
147 if run_report.state == WmsStates.RUNNING:
148 if run_report.job_state_counts.get(WmsStates.FAILED, 0):
149 run_flag = "F"
150 elif run_report.job_state_counts.get(WmsStates.DELETED, 0):
151 run_flag = "D"
152 elif run_report.job_state_counts.get(WmsStates.HELD, 0):
153 run_flag = "H"
155 # Estimate success rate.
156 percent_succeeded = "UNK"
157 _LOG.debug("total_number_jobs = %s", run_report.total_number_jobs)
158 _LOG.debug("run_report.job_state_counts = %s", run_report.job_state_counts)
159 if run_report.total_number_jobs:
160 succeeded = run_report.job_state_counts.get(WmsStates.SUCCEEDED, 0)
161 _LOG.debug("succeeded = %s", succeeded)
162 percent_succeeded = f"{int(succeeded / run_report.total_number_jobs * 100)}"
164 row = (
165 run_flag,
166 run_report.state.name,
167 percent_succeeded,
168 run_report.global_wms_id if use_global_id else run_report.wms_id,
169 run_report.operator,
170 run_report.project,
171 run_report.campaign,
172 run_report.payload,
173 run_report.run,
174 )
175 self._table.add_row(row)
178class DetailedRunReport(BaseRunReport):
179 """A detailed run report."""
181 def add(self, run_report, use_global_id=False):
182 # Docstring inherited from the base class.
184 # If run summary exists, use it to get the reference job counts.
185 by_label_expected = {}
186 if run_report.run_summary:
187 for part in run_report.run_summary.split(";"):
188 label, count = part.split(":")
189 by_label_expected[label] = int(count)
191 total = ["TOTAL"]
192 total.extend([run_report.job_state_counts[state] for state in WmsStates])
193 total.append(sum(by_label_expected.values()) if by_label_expected else run_report.total_number_jobs)
194 self._table.add_row(total)
196 # Use the provided job summary. If it doesn't exist, compile it from
197 # information about individual jobs.
198 if run_report.job_summary:
199 job_summary = run_report.job_summary
200 elif run_report.jobs:
201 job_summary = compile_job_summary(run_report.jobs)
202 else:
203 id_ = run_report.global_wms_id if use_global_id else run_report.wms_id
204 self._msg = f"WARNING: Job summary for run '{id_}' not available, report maybe incomplete."
205 return
207 if by_label_expected:
208 job_order = list(by_label_expected)
209 else:
210 job_order = sorted(job_summary)
211 self._msg = "WARNING: Could not determine order of pipeline, instead sorted alphabetically."
212 for label in job_order:
213 try:
214 counts = job_summary[label]
215 except KeyError:
216 counts = dict.fromkeys(WmsStates, -1)
217 else:
218 if label in by_label_expected:
219 already_counted = sum(counts.values())
220 if already_counted != by_label_expected[label]:
221 counts[WmsStates.UNREADY] += by_label_expected[label] - already_counted
223 run = [label]
224 run.extend([counts[state] for state in WmsStates])
225 run.append(by_label_expected[label] if by_label_expected else -1)
226 self._table.add_row(run)
228 def __str__(self):
229 alignments = ["<"] + [">"] * (len(self._table.colnames) - 1)
230 lines = list(self._table.pformat_all(align=alignments))
231 lines.insert(3, lines[1])
232 return str("\n".join(lines))
235class ExitCodesReport(BaseRunReport):
236 """An extension of run report to give information about
237 error handling from the wms service.
238 """
240 def add(self, run_report, use_global_id=False):
241 # Docstring inherited from the base class.
243 # Use label ordering from the run summary as it should reflect
244 # the ordering of the pipetasks in the pipeline.
245 labels = []
246 if run_report.run_summary:
247 for part in run_report.run_summary.split(";"):
248 label, _ = part.split(":")
249 labels.append(label)
250 else:
251 id_ = run_report.global_wms_id if use_global_id else run_report.wms_id
252 self._msg = f"WARNING: Job summary for run '{id_}' not available, report maybe incomplete."
253 return
255 # Payload (e.g. pipetask) error codes:
256 # * 1: general failure,
257 # * 2: command line error (e.g. unknown command and/or option).
258 pyld_error_codes = {1, 2}
260 exit_code_summary = run_report.exit_code_summary
261 for label in labels:
262 exit_codes = exit_code_summary[label]
264 pyld_errors = [code for code in exit_codes if code in pyld_error_codes]
265 pyld_error_count = len(pyld_errors)
266 pyld_error_summary = (
267 ", ".join(sorted(str(code) for code in set(pyld_errors))) if pyld_errors else "None"
268 )
270 infra_errors = [code for code in exit_codes if code not in pyld_error_codes]
271 infra_error_count = len(infra_errors)
272 infra_error_summary = (
273 ", ".join(sorted(str(code) for code in set(infra_errors))) if infra_errors else "None"
274 )
276 run = [label, pyld_error_count, pyld_error_summary, infra_error_count, infra_error_summary]
277 self._table.add_row(run)
279 def __str__(self):
280 alignments = ["<"] + [">"] * (len(self._table.colnames) - 1)
281 lines = list(self._table.pformat_all(align=alignments))
282 return str("\n".join(lines))
285def compile_job_summary(jobs):
286 """Compile job summary from information available for individual jobs.
288 Parameters
289 ----------
290 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`]
291 List of run reports.
293 Returns
294 -------
295 job_summary : `dict` [`str`, dict` [`lsst.ctrl.bps.WmsState`, `int`]]
296 The summary of the execution statuses for each job label in the run.
297 For each job label, execution statuses are mapped to number of jobs
298 having a given status.
299 """
300 job_summary = {}
301 by_label = group_jobs_by_label(jobs)
302 for label, job_group in by_label.items():
303 by_label_state = group_jobs_by_state(job_group)
304 _LOG.debug("by_label_state = %s", by_label_state)
305 counts = {state: len(jobs) for state, jobs in by_label_state.items()}
306 job_summary[label] = counts
307 return job_summary
310def group_jobs_by_state(jobs):
311 """Divide given jobs into groups based on their state value.
313 Parameters
314 ----------
315 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`]
316 Jobs to divide into groups based on state.
318 Returns
319 -------
320 by_state : `dict`
321 Mapping of job state to a list of jobs.
322 """
323 _LOG.debug("group_jobs_by_state: jobs=%s", jobs)
324 by_state = {state: [] for state in WmsStates}
325 for job in jobs:
326 by_state[job.state].append(job)
327 return by_state
330def group_jobs_by_label(jobs):
331 """Divide given jobs into groups based on their label value.
333 Parameters
334 ----------
335 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`]
336 Jobs to divide into groups based on label.
338 Returns
339 -------
340 by_label : `dict` [`str`, `list` [`lsst.ctrl.bps.WmsJobReport`]]
341 Mapping of job state to a list of jobs.
342 """
343 by_label = {}
344 for job in jobs:
345 group = by_label.setdefault(job.label, [])
346 group.append(job)
347 return by_label