Coverage for python/lsst/ctrl/bps/report.py: 7%

105 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 10:59 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Supporting functions for reporting on runs submitted to a WMS. 

23 

24Note: Expectations are that future reporting effort will revolve around LSST 

25oriented database tables. 

26""" 

27 

28import logging 

29 

30from astropy.table import Table 

31from lsst.utils import doImport 

32 

33from . import WmsStates 

34 

35_LOG = logging.getLogger(__name__) 

36 

37 

38def report(wms_service, run_id, user, hist_days, pass_thru, is_global=False): 

39 """Print out summary of jobs submitted for execution. 

40 

41 Parameters 

42 ---------- 

43 wms_service : `str` 

44 Name of the class. 

45 run_id : `str` 

46 A run id the report will be restricted to. 

47 user : `str` 

48 A user name the report will be restricted to. 

49 hist_days : int 

50 Number of days 

51 pass_thru : `str` 

52 A string to pass directly to the WMS service class. 

53 is_global : `bool`, optional 

54 If set, all available job queues will be queried for job information. 

55 Defaults to False which means that only a local job queue will be 

56 queried for information. 

57 

58 Only applicable in the context of a WMS using distributed job queues 

59 (e.g., HTCondor). 

60 """ 

61 wms_service_class = doImport(wms_service) 

62 wms_service = wms_service_class({}) 

63 

64 # If reporting on single run, increase history until better mechanism 

65 # for handling completed jobs is available. 

66 if run_id: 

67 hist_days = max(hist_days, 2) 

68 

69 runs, message = wms_service.report(run_id, user, hist_days, pass_thru, is_global=is_global) 

70 

71 if run_id: 

72 for run in runs: 

73 print_single_run_summary(run, is_global=is_global) 

74 if not runs and not message: 

75 print( 

76 f"No records found for job id '{run_id}'. " 

77 f"Hints: Double check id, retry with a larger --hist value (currently: {hist_days}), " 

78 f"and/or use --global to search all job queues." 

79 ) 

80 else: 

81 summary = init_summary() 

82 for run in sorted(runs, key=lambda j: j.wms_id if not is_global else j.global_wms_id): 

83 summary = add_single_run_summary(summary, run, is_global=is_global) 

84 for line in summary.pformat_all(): 

85 print(line) 

86 if message: 

87 print(message) 

88 print("\n\n") 

89 

90 

91def init_summary(): 

92 """Initialize the summary report table. 

93 

94 Returns 

95 ------- 

96 table : `astropy.table.Table` 

97 Initialized summary report table. 

98 """ 

99 columns = [ 

100 ("X", "S"), 

101 ("STATE", "S"), 

102 ("%S", "S"), 

103 ("ID", "S"), 

104 ("OPERATOR", "S"), 

105 ("PROJECT", "S"), 

106 ("CAMPAIGN", "S"), 

107 ("PAYLOAD", "S"), 

108 ("RUN", "S"), 

109 ] 

110 return Table(dtype=columns) 

111 

112 

113def add_single_run_summary(summary, run_report, is_global=False): 

114 """Add a single run info to the summary. 

115 

116 Parameters 

117 ---------- 

118 summary : `astropy.tables.Table` 

119 The table representing the run summary. 

120 run_report : `lsst.ctrl.bps.WmsRunReport` 

121 Information for single run. 

122 is_global : `bool`, optional 

123 If set, all available job queues will be queried for job information. 

124 Defaults to False which means that only a local job queue will be 

125 queried for information. 

126 

127 Only applicable in the context of a WMS using distributed job queues 

128 (e.g., HTCondor). 

129 """ 

130 # Flag any running workflow that might need human attention 

131 run_flag = " " 

132 if run_report.state == WmsStates.RUNNING: 

133 if run_report.job_state_counts.get(WmsStates.FAILED, 0): 

134 run_flag = "F" 

135 elif run_report.job_state_counts.get(WmsStates.DELETED, 0): 

136 run_flag = "D" 

137 elif run_report.job_state_counts.get(WmsStates.HELD, 0): 

138 run_flag = "H" 

139 

140 percent_succeeded = "UNK" 

141 _LOG.debug("total_number_jobs = %s", run_report.total_number_jobs) 

142 _LOG.debug("run_report.job_state_counts = %s", run_report.job_state_counts) 

143 if run_report.total_number_jobs: 

144 succeeded = run_report.job_state_counts.get(WmsStates.SUCCEEDED, 0) 

145 _LOG.debug("succeeded = %s", succeeded) 

146 percent_succeeded = f"{int(succeeded / run_report.total_number_jobs * 100)}" 

147 

148 row = ( 

149 run_flag, 

150 run_report.state.name, 

151 percent_succeeded, 

152 run_report.global_wms_id if is_global else run_report.wms_id, 

153 run_report.operator, 

154 run_report.project, 

155 run_report.campaign, 

156 run_report.payload, 

157 run_report.run, 

158 ) 

159 summary.add_row(row) 

160 return summary 

161 

162 

163def group_jobs_by_state(jobs): 

164 """Divide given jobs into groups based on their state value. 

165 

166 Parameters 

167 ---------- 

168 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

169 Jobs to divide into groups based on state. 

170 

171 Returns 

172 ------- 

173 by_state : `dict` 

174 Mapping of job state to a list of jobs. 

175 """ 

176 _LOG.debug("group_jobs_by_state: jobs=%s", jobs) 

177 by_state = {state: [] for state in WmsStates} 

178 for job in jobs: 

179 by_state[job.state].append(job) 

180 return by_state 

181 

182 

183def group_jobs_by_label(jobs): 

184 """Divide given jobs into groups based on their label value. 

185 

186 Parameters 

187 ---------- 

188 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

189 Jobs to divide into groups based on label. 

190 

191 Returns 

192 ------- 

193 by_label : `dict` [`str`, `lsst.ctrl.bps.WmsJobReport`] 

194 Mapping of job state to a list of jobs. 

195 """ 

196 by_label = {} 

197 for job in jobs: 

198 group = by_label.setdefault(job.label, []) 

199 group.append(job) 

200 return by_label 

201 

202 

203def print_single_run_summary(run_report, is_global=False): 

204 """Print runtime info for single run including job summary per task abbrev. 

205 

206 Parameters 

207 ---------- 

208 run_report : `lsst.ctrl.bps.WmsRunReport` 

209 Summary runtime info for a run + runtime info for jobs. 

210 is_global : `bool`, optional 

211 If set, all available job queues will be queried for job information. 

212 Defaults to False which means that only a local job queue will be 

213 queried for information. 

214 

215 Only applicable in the context of a WMS using distributed job queues 

216 (e.g., HTCondor). 

217 """ 

218 # Print normal run summary. 

219 summary = init_summary() 

220 summary = add_single_run_summary(summary, run_report, is_global=is_global) 

221 for line in summary.pformat_all(): 

222 print(line) 

223 print("\n\n") 

224 

225 # Print more run information. 

226 print(f"Path: {run_report.path}") 

227 print(f"Global job id: {run_report.global_wms_id}") 

228 print("\n\n") 

229 

230 by_label = group_jobs_by_label(run_report.jobs) 

231 

232 # Count the jobs by label and WMS state. 

233 label_order = [] 

234 by_label_expected = {} 

235 if run_report.run_summary: 

236 for part in run_report.run_summary.split(";"): 

237 label, count = part.split(":") 

238 label_order.append(label) 

239 by_label_expected[label] = int(count) 

240 else: 

241 print("Warning: Cannot determine order of pipeline. Instead printing alphabetical.") 

242 label_order = sorted(by_label.keys()) 

243 

244 # Initialize table for saving the detailed run info. 

245 columns = [(" ", "S")] + [(s.name, "i") for s in WmsStates] + [("EXPECTED", "i")] 

246 details = Table(dtype=columns) 

247 

248 total = ["TOTAL"] 

249 total.extend([run_report.job_state_counts[state] for state in WmsStates]) 

250 total.append(sum(by_label_expected.values())) 

251 details.add_row(total) 

252 

253 for label in label_order: 

254 if label in by_label: 

255 by_label_state = group_jobs_by_state(by_label[label]) 

256 _LOG.debug("by_label_state = %s", by_label_state) 

257 counts = {state: len(jobs) for state, jobs in by_label_state.items()} 

258 if label in by_label_expected: 

259 already_counted = sum(counts.values()) 

260 if already_counted != by_label_expected[label]: 

261 counts[WmsStates.UNREADY] += by_label_expected[label] - already_counted 

262 else: 

263 counts = dict.fromkeys(WmsStates, -1) 

264 

265 row = [label] 

266 row.extend([counts[state] for state in WmsStates]) 

267 row.append([by_label_expected[label]]) 

268 details.add_row(row) 

269 

270 # Format the report summary and print it out. 

271 alignments = ["<"] 

272 alignments.extend([">" for _ in WmsStates]) 

273 alignments.append(">") 

274 lines = details.pformat_all(align=alignments) 

275 lines.insert(3, lines[1]) 

276 for line in lines: 

277 print(line)