Coverage for python/lsst/ctrl/bps/report.py: 7%
105 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 10:45 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 10:45 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Supporting functions for reporting on runs submitted to a WMS.
24Note: Expectations are that future reporting effort will revolve around LSST
25oriented database tables.
26"""
28import logging
30from astropy.table import Table
31from lsst.utils import doImport
33from . import WmsStates
35_LOG = logging.getLogger(__name__)
38def report(wms_service, run_id, user, hist_days, pass_thru, is_global=False):
39 """Print out summary of jobs submitted for execution.
41 Parameters
42 ----------
43 wms_service : `str`
44 Name of the class.
45 run_id : `str`
46 A run id the report will be restricted to.
47 user : `str`
48 A user name the report will be restricted to.
49 hist_days : int
50 Number of days
51 pass_thru : `str`
52 A string to pass directly to the WMS service class.
53 is_global : `bool`, optional
54 If set, all available job queues will be queried for job information.
55 Defaults to False which means that only a local job queue will be
56 queried for information.
58 Only applicable in the context of a WMS using distributed job queues
59 (e.g., HTCondor).
60 """
61 wms_service_class = doImport(wms_service)
62 wms_service = wms_service_class({})
64 # If reporting on single run, increase history until better mechanism
65 # for handling completed jobs is available.
66 if run_id:
67 hist_days = max(hist_days, 2)
69 runs, message = wms_service.report(run_id, user, hist_days, pass_thru, is_global=is_global)
71 if run_id:
72 for run in runs:
73 print_single_run_summary(run, is_global=is_global)
74 if not runs and not message:
75 print(
76 f"No records found for job id '{run_id}'. "
77 f"Hints: Double check id, retry with a larger --hist value (currently: {hist_days}), "
78 f"and/or use --global to search all job queues."
79 )
80 else:
81 summary = init_summary()
82 for run in sorted(runs, key=lambda j: j.wms_id if not is_global else j.global_wms_id):
83 summary = add_single_run_summary(summary, run, is_global=is_global)
84 for line in summary.pformat_all():
85 print(line)
86 if message:
87 print(message)
88 print("\n\n")
91def init_summary():
92 """Initialize the summary report table.
94 Returns
95 -------
96 table : `astropy.table.Table`
97 Initialized summary report table.
98 """
99 columns = [
100 ("X", "S"),
101 ("STATE", "S"),
102 ("%S", "S"),
103 ("ID", "S"),
104 ("OPERATOR", "S"),
105 ("PROJECT", "S"),
106 ("CAMPAIGN", "S"),
107 ("PAYLOAD", "S"),
108 ("RUN", "S"),
109 ]
110 return Table(dtype=columns)
113def add_single_run_summary(summary, run_report, is_global=False):
114 """Add a single run info to the summary.
116 Parameters
117 ----------
118 summary : `astropy.tables.Table`
119 The table representing the run summary.
120 run_report : `lsst.ctrl.bps.WmsRunReport`
121 Information for single run.
122 is_global : `bool`, optional
123 If set, all available job queues will be queried for job information.
124 Defaults to False which means that only a local job queue will be
125 queried for information.
127 Only applicable in the context of a WMS using distributed job queues
128 (e.g., HTCondor).
129 """
130 # Flag any running workflow that might need human attention
131 run_flag = " "
132 if run_report.state == WmsStates.RUNNING:
133 if run_report.job_state_counts.get(WmsStates.FAILED, 0):
134 run_flag = "F"
135 elif run_report.job_state_counts.get(WmsStates.DELETED, 0):
136 run_flag = "D"
137 elif run_report.job_state_counts.get(WmsStates.HELD, 0):
138 run_flag = "H"
140 percent_succeeded = "UNK"
141 _LOG.debug("total_number_jobs = %s", run_report.total_number_jobs)
142 _LOG.debug("run_report.job_state_counts = %s", run_report.job_state_counts)
143 if run_report.total_number_jobs:
144 succeeded = run_report.job_state_counts.get(WmsStates.SUCCEEDED, 0)
145 _LOG.debug("succeeded = %s", succeeded)
146 percent_succeeded = f"{int(succeeded / run_report.total_number_jobs * 100)}"
148 row = (
149 run_flag,
150 run_report.state.name,
151 percent_succeeded,
152 run_report.global_wms_id if is_global else run_report.wms_id,
153 run_report.operator,
154 run_report.project,
155 run_report.campaign,
156 run_report.payload,
157 run_report.run,
158 )
159 summary.add_row(row)
160 return summary
163def group_jobs_by_state(jobs):
164 """Divide given jobs into groups based on their state value.
166 Parameters
167 ----------
168 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`]
169 Jobs to divide into groups based on state.
171 Returns
172 -------
173 by_state : `dict`
174 Mapping of job state to a list of jobs.
175 """
176 _LOG.debug("group_jobs_by_state: jobs=%s", jobs)
177 by_state = {state: [] for state in WmsStates}
178 for job in jobs:
179 by_state[job.state].append(job)
180 return by_state
183def group_jobs_by_label(jobs):
184 """Divide given jobs into groups based on their label value.
186 Parameters
187 ----------
188 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`]
189 Jobs to divide into groups based on label.
191 Returns
192 -------
193 by_label : `dict` [`str`, `lsst.ctrl.bps.WmsJobReport`]
194 Mapping of job state to a list of jobs.
195 """
196 by_label = {}
197 for job in jobs:
198 group = by_label.setdefault(job.label, [])
199 group.append(job)
200 return by_label
203def print_single_run_summary(run_report, is_global=False):
204 """Print runtime info for single run including job summary per task abbrev.
206 Parameters
207 ----------
208 run_report : `lsst.ctrl.bps.WmsRunReport`
209 Summary runtime info for a run + runtime info for jobs.
210 is_global : `bool`, optional
211 If set, all available job queues will be queried for job information.
212 Defaults to False which means that only a local job queue will be
213 queried for information.
215 Only applicable in the context of a WMS using distributed job queues
216 (e.g., HTCondor).
217 """
218 # Print normal run summary.
219 summary = init_summary()
220 summary = add_single_run_summary(summary, run_report, is_global=is_global)
221 for line in summary.pformat_all():
222 print(line)
223 print("\n\n")
225 # Print more run information.
226 print(f"Path: {run_report.path}")
227 print(f"Global job id: {run_report.global_wms_id}")
228 print("\n\n")
230 by_label = group_jobs_by_label(run_report.jobs)
232 # Count the jobs by label and WMS state.
233 label_order = []
234 by_label_expected = {}
235 if run_report.run_summary:
236 for part in run_report.run_summary.split(";"):
237 label, count = part.split(":")
238 label_order.append(label)
239 by_label_expected[label] = int(count)
240 else:
241 print("Warning: Cannot determine order of pipeline. Instead printing alphabetical.")
242 label_order = sorted(by_label.keys())
244 # Initialize table for saving the detailed run info.
245 columns = [(" ", "S")] + [(s.name, "i") for s in WmsStates] + [("EXPECTED", "i")]
246 details = Table(dtype=columns)
248 total = ["TOTAL"]
249 total.extend([run_report.job_state_counts[state] for state in WmsStates])
250 total.append(sum(by_label_expected.values()))
251 details.add_row(total)
253 for label in label_order:
254 if label in by_label:
255 by_label_state = group_jobs_by_state(by_label[label])
256 _LOG.debug("by_label_state = %s", by_label_state)
257 counts = {state: len(jobs) for state, jobs in by_label_state.items()}
258 if label in by_label_expected:
259 already_counted = sum(counts.values())
260 if already_counted != by_label_expected[label]:
261 counts[WmsStates.UNREADY] += by_label_expected[label] - already_counted
262 else:
263 counts = dict.fromkeys(WmsStates, -1)
265 row = [label]
266 row.extend([counts[state] for state in WmsStates])
267 row.append([by_label_expected[label]])
268 details.add_row(row)
270 # Format the report summary and print it out.
271 alignments = ["<"]
272 alignments.extend([">" for _ in WmsStates])
273 alignments.append(">")
274 lines = details.pformat_all(align=alignments)
275 lines.insert(3, lines[1])
276 for line in lines:
277 print(line)