Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Supporting functions for reporting on runs submitted to a WMS. 

23 

24Note: Expectations are that future reporting effort will revolve around LSST 

25oriented database tables. 

26""" 

27 

28import logging 

29 

30from lsst.utils import doImport 

31 

32from . import WmsStates 

33 

34 

35_LOG = logging.getLogger(__name__) 

36 

37SUMMARY_FMT = "{:1} {:>10} {:>3} {:>9} {:10} {:10} {:20} {:20} {:<60}" 

38 

39 

40def report(wms_service, run_id, user, hist_days, pass_thru): 

41 """Print out summary of jobs submitted for execution. 

42 

43 Parameters 

44 ---------- 

45 wms_service : `str` 

46 Name of the class. 

47 run_id : `str` 

48 A run id the report will be restricted to. 

49 user : `str` 

50 A user name the report will be restricted to. 

51 hist_days : int 

52 Number of days 

53 pass_thru : `str` 

54 A string to pass directly to the WMS service class. 

55 """ 

56 wms_service_class = doImport(wms_service) 

57 wms_service = wms_service_class({}) 

58 

59 # If reporting on single run, increase history until better mechanism 

60 # for handling completed jobs is available. 

61 if run_id: 

62 hist_days = max(hist_days, 2) 

63 

64 runs, message = wms_service.report(run_id, user, hist_days, pass_thru) 

65 

66 if run_id: 

67 if not runs: 

68 print(f"No information found for id='{run_id}'.") 

69 print(f"Double check id and retry with a larger --hist value" 

70 f"(currently: {hist_days})") 

71 for run in runs: 

72 print_single_run_summary(run) 

73 else: 

74 print_headers() 

75 for run in sorted(runs, key=lambda j: j.wms_id): 

76 print_run(run) 

77 print(message) 

78 

79 

80def print_headers(): 

81 """Print headers. 

82 """ 

83 print(SUMMARY_FMT.format("X", "STATE", "%S", "ID", "OPERATOR", "PRJ", "CMPGN", "PAYLOAD", "RUN")) 

84 print("-" * 156) 

85 

86 

87def print_run(run_report): 

88 """Print single run info. 

89 

90 Parameters 

91 ---------- 

92 run_report : `lsst.ctrl.bps.WmsRunReport` 

93 Information for single run. 

94 """ 

95 # Flag any running workflow that might need human attention 

96 run_flag = " " 

97 if run_report.state == WmsStates.RUNNING: 

98 if run_report.job_state_counts.get(WmsStates.FAILED, 0): 

99 run_flag = "F" 

100 elif run_report.job_state_counts.get(WmsStates.DELETED, 0): 

101 run_flag = "D" 

102 elif run_report.job_state_counts.get(WmsStates.HELD, 0): 

103 run_flag = "H" 

104 

105 percent_succeeded = "UNK" 

106 _LOG.debug("total_number_jobs = %s", run_report.total_number_jobs) 

107 _LOG.debug("run_report.job_state_counts = %s", run_report.job_state_counts) 

108 if run_report.total_number_jobs: 

109 succeeded = run_report.job_state_counts.get(WmsStates.SUCCEEDED, 0) 

110 _LOG.debug("succeeded = %s", succeeded) 

111 percent_succeeded = f"{int(succeeded / run_report.total_number_jobs * 100)}" 

112 

113 print(SUMMARY_FMT.format(run_flag, run_report.state.name, percent_succeeded, run_report.wms_id, 

114 run_report.operator[:10], run_report.project[:10], run_report.campaign[:20], 

115 run_report.payload[:20], run_report.run[:60])) 

116 

117 

118def group_jobs_by_state(jobs): 

119 """Divide given jobs into groups based on their state value. 

120 

121 Parameters 

122 ---------- 

123 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

124 Jobs to divide into groups based on state. 

125 

126 Returns 

127 ------- 

128 by_state : `dict` 

129 Mapping of job state to a list of jobs. 

130 """ 

131 _LOG.debug("group_jobs_by_state: jobs=%s", jobs) 

132 by_state = dict.fromkeys(WmsStates) 

133 for state in by_state: 

134 by_state[state] = [] # Note: If added [] to fromkeys(), they shared single list. 

135 

136 for job in jobs: 

137 by_state[job.state].append(job) 

138 return by_state 

139 

140 

141def group_jobs_by_label(jobs): 

142 """Divide given jobs into groups based on their label value. 

143 

144 Parameters 

145 ---------- 

146 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

147 Jobs to divide into groups based on label. 

148 

149 Returns 

150 ------- 

151 by_label : `dict` [`str`, `lsst.ctrl.bps.WmsJobReport`] 

152 Mapping of job state to a list of jobs. 

153 """ 

154 by_label = {} 

155 for job in jobs: 

156 if job.label not in by_label: 

157 by_label[job.label] = [] 

158 by_label[job.label].append(job) 

159 return by_label 

160 

161 

162def print_single_run_summary(run_report): 

163 """Print runtime info for single run including job summary per task abbrev. 

164 

165 Parameters 

166 ---------- 

167 run_report : `lsst.ctrl.bps.WmsRunReport` 

168 Summary runtime info for a run + runtime info for jobs. 

169 """ 

170 # Print normal run summary. 

171 print_headers() 

172 print_run(run_report) 

173 print("\n\n") 

174 

175 # Print more run information. 

176 print(f"Path: {run_report.path}\n") 

177 

178 print(f"{'':35} {' | '.join([f'{s.name[:6]:6}' for s in WmsStates])}") 

179 print(f"{'Total':35} {' | '.join([f'{run_report.job_state_counts[s]:6}' for s in WmsStates])}") 

180 print("-" * (35 + 3 + (6 + 2) * (len(run_report.job_state_counts) + 1))) 

181 

182 by_label = group_jobs_by_label(run_report.jobs) 

183 

184 # Print job level info by print counts of jobs by label and WMS state. 

185 label_order = [] 

186 by_label_totals = {} 

187 if run_report.run_summary: 

188 # Workaround until get pipetaskInit job into run_summary 

189 if not run_report.run_summary.startswith("pipetaskInit"): 

190 label_order.append("pipetaskInit") 

191 by_label_totals["pipetaskInit"] = 1 

192 for part in run_report.run_summary.split(";"): 

193 label, count = part.split(":") 

194 label_order.append(label) 

195 by_label_totals[label] = int(count) 

196 else: 

197 print("Warning: Cannot determine order of pipeline. Instead printing alphabetical.") 

198 label_order = sorted(by_label.keys()) 

199 

200 for label in label_order: 

201 counts = dict.fromkeys(WmsStates, 0) 

202 if label in by_label: 

203 by_label_state = group_jobs_by_state(by_label[label]) 

204 _LOG.debug("by_label_state = %s", by_label_state) 

205 counts = dict.fromkeys(WmsStates) 

206 for state in WmsStates: 

207 counts[state] = len(by_label_state[state]) 

208 elif label in by_label_totals: 

209 already_counted = sum(counts.values()) 

210 if already_counted != by_label_totals[label]: 

211 counts[WmsStates.UNREADY] += by_label_totals[label] - already_counted 

212 else: 

213 counts = dict.fromkeys(WmsStates, -1) 

214 print(f"{label[:35]:35} {' | '.join([f'{counts[s]:6}' for s in WmsStates])}") 

215 print("\n")