Coverage for python/lsst/ctrl/bps/bps_reports.py: 17%

120 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-01 03:04 -0700

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22""" 

23""" 

24 

25__all__ = ["BaseRunReport", "DetailedRunReport", "SummaryRunReport"] 

26 

27import abc 

28import logging 

29 

30from astropy.table import Table 

31 

32from .wms_service import WmsStates 

33 

34_LOG = logging.getLogger(__name__) 

35 

36 

37class BaseRunReport(abc.ABC): 

38 """The base class representing a run report. 

39 

40 Parameters 

41 ---------- 

42 fields : `list` [ `tuple` [ `str`, `str`]] 

43 The list of column specification, fields, to include in the report. 

44 Each field has a name and a type. 

45 """ 

46 

47 def __init__(self, fields): 

48 self._table = Table(dtype=fields) 

49 self._msg = None 

50 

51 def __eq__(self, other): 

52 if isinstance(other, BaseRunReport): 

53 return all(self._table == other._table) 

54 return False 

55 

56 def __len__(self): 

57 """Number of runs in the report.""" 

58 return len(self._table) 

59 

60 def __str__(self): 

61 lines = list(self._table.pformat_all()) 

62 return "\n".join(lines) 

63 

64 @property 

65 def message(self): 

66 """Extra information a method need to pass to its caller (`str`).""" 

67 return self._msg 

68 

69 def clear(self): 

70 """Remove all entries from the report.""" 

71 self._msg = None 

72 self._table.remove_rows(slice(len(self))) 

73 

74 def sort(self, columns, ascending=True): 

75 """Sort the report entries according to one or more keys. 

76 

77 Parameters 

78 ---------- 

79 columns : `str` | `list` [ `str` ] 

80 The column(s) to order the report by. 

81 ascending : `bool`, optional 

82 Sort report entries in ascending order, default. 

83 

84 Raises 

85 ------ 

86 AttributeError 

87 Raised if supplied with non-existent column(s). 

88 """ 

89 if isinstance(columns, str): 

90 columns = [columns] 

91 unknown_keys = set(columns) - set(self._table.colnames) 

92 if unknown_keys: 

93 raise AttributeError( 

94 f"cannot sort the report entries: column(s) {', '.join(unknown_keys)} not found" 

95 ) 

96 self._table.sort(keys=columns, reverse=not ascending) 

97 

98 @classmethod 

99 def from_table(cls, table): 

100 """Create a report from a table. 

101 

102 Parameters 

103 ---------- 

104 table : `astropy.table.Table` 

105 Information about a run in a tabular form. 

106 

107 Returns 

108 ------- 

109 inst : `lsst.ctrl.bps.report.BaseRunReport` 

110 A report created based on the information in the provided table. 

111 """ 

112 inst = cls(table.dtype.descr) 

113 inst._table = table.copy() 

114 return inst 

115 

116 @abc.abstractmethod 

117 def add(self, run_report, use_global_id=False): 

118 """Add a single run info to the report. 

119 

120 Parameters 

121 ---------- 

122 run_report : `lsst.ctrl.bps.WmsRunReport` 

123 Information for single run. 

124 use_global_id : `bool`, optional 

125 If set, use global run id. Defaults to False which means that 

126 the local id will be used instead. 

127 

128 Only applicable in the context of a WMS using distributed job 

129 queues (e.g., HTCondor). 

130 """ 

131 

132 

133class SummaryRunReport(BaseRunReport): 

134 """A summary run report.""" 

135 

136 def add(self, run_report, use_global_id=False): 

137 # Docstring inherited from the base class. 

138 

139 # Flag any running workflow that might need human attention. 

140 run_flag = " " 

141 if run_report.state == WmsStates.RUNNING: 

142 if run_report.job_state_counts.get(WmsStates.FAILED, 0): 

143 run_flag = "F" 

144 elif run_report.job_state_counts.get(WmsStates.DELETED, 0): 

145 run_flag = "D" 

146 elif run_report.job_state_counts.get(WmsStates.HELD, 0): 

147 run_flag = "H" 

148 

149 # Estimate success rate. 

150 percent_succeeded = "UNK" 

151 _LOG.debug("total_number_jobs = %s", run_report.total_number_jobs) 

152 _LOG.debug("run_report.job_state_counts = %s", run_report.job_state_counts) 

153 if run_report.total_number_jobs: 

154 succeeded = run_report.job_state_counts.get(WmsStates.SUCCEEDED, 0) 

155 _LOG.debug("succeeded = %s", succeeded) 

156 percent_succeeded = f"{int(succeeded / run_report.total_number_jobs * 100)}" 

157 

158 row = ( 

159 run_flag, 

160 run_report.state.name, 

161 percent_succeeded, 

162 run_report.global_wms_id if use_global_id else run_report.wms_id, 

163 run_report.operator, 

164 run_report.project, 

165 run_report.campaign, 

166 run_report.payload, 

167 run_report.run, 

168 ) 

169 self._table.add_row(row) 

170 

171 

172class DetailedRunReport(BaseRunReport): 

173 """A detailed run report.""" 

174 

175 def add(self, run_report, use_global_id=False): 

176 # Docstring inherited from the base class. 

177 

178 # If run summary exists, use it to get the reference job counts. 

179 by_label_expected = {} 

180 if run_report.run_summary: 

181 for part in run_report.run_summary.split(";"): 

182 label, count = part.split(":") 

183 by_label_expected[label] = int(count) 

184 

185 total = ["TOTAL"] 

186 total.extend([run_report.job_state_counts[state] for state in WmsStates]) 

187 total.append(sum(by_label_expected.values()) if by_label_expected else run_report.total_number_jobs) 

188 self._table.add_row(total) 

189 

190 # Use the provided job summary. If it doesn't exist, compile it from 

191 # information about individual jobs. 

192 if run_report.job_summary: 

193 job_summary = run_report.job_summary 

194 elif run_report.jobs: 

195 job_summary = compile_job_summary(run_report.jobs) 

196 else: 

197 id_ = run_report.global_wms_id if use_global_id else run_report.wms_id 

198 self._msg = f"WARNING: Job summary for run '{id_}' not available, report maybe incomplete." 

199 return 

200 

201 if by_label_expected: 

202 job_order = list(by_label_expected) 

203 else: 

204 job_order = sorted(job_summary) 

205 self._msg = "WARNING: Could not determine order of pipeline, instead sorted alphabetically." 

206 for label in job_order: 

207 try: 

208 counts = job_summary[label] 

209 except KeyError: 

210 counts = dict.fromkeys(WmsStates, -1) 

211 else: 

212 if label in by_label_expected: 

213 already_counted = sum(counts.values()) 

214 if already_counted != by_label_expected[label]: 

215 counts[WmsStates.UNREADY] += by_label_expected[label] - already_counted 

216 

217 run = [label] 

218 run.extend([counts[state] for state in WmsStates]) 

219 run.append(by_label_expected[label] if by_label_expected else -1) 

220 self._table.add_row(run) 

221 

222 def __str__(self): 

223 alignments = ["<"] + [">"] * (len(self._table.colnames) - 1) 

224 lines = list(self._table.pformat_all(align=alignments)) 

225 lines.insert(3, lines[1]) 

226 return str("\n".join(lines)) 

227 

228 

229def compile_job_summary(jobs): 

230 """Compile job summary from information available for individual jobs. 

231 

232 Parameters 

233 ---------- 

234 jobs : `list` [`lsst.ctrl.bps.WmsRunReport`] 

235 List of 

236 

237 Returns 

238 ------- 

239 job_summary : `dict` [`str`, dict` [`lsst.ctrl.bps.WmsState`, `int`]] 

240 The summary of the execution statuses for each job label in the run. 

241 For each job label, execution statuses are mapped to number of jobs 

242 having a given status. 

243 """ 

244 job_summary = {} 

245 by_label = group_jobs_by_label(jobs) 

246 for label, job_group in by_label.items(): 

247 by_label_state = group_jobs_by_state(job_group) 

248 _LOG.debug("by_label_state = %s", by_label_state) 

249 counts = {state: len(jobs) for state, jobs in by_label_state.items()} 

250 job_summary[label] = counts 

251 return job_summary 

252 

253 

254def group_jobs_by_state(jobs): 

255 """Divide given jobs into groups based on their state value. 

256 

257 Parameters 

258 ---------- 

259 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

260 Jobs to divide into groups based on state. 

261 

262 Returns 

263 ------- 

264 by_state : `dict` 

265 Mapping of job state to a list of jobs. 

266 """ 

267 _LOG.debug("group_jobs_by_state: jobs=%s", jobs) 

268 by_state = {state: [] for state in WmsStates} 

269 for job in jobs: 

270 by_state[job.state].append(job) 

271 return by_state 

272 

273 

274def group_jobs_by_label(jobs): 

275 """Divide given jobs into groups based on their label value. 

276 

277 Parameters 

278 ---------- 

279 jobs : `list` [`lsst.ctrl.bps.WmsJobReport`] 

280 Jobs to divide into groups based on label. 

281 

282 Returns 

283 ------- 

284 by_label : `dict` [`str`, `lsst.ctrl.bps.WmsJobReport`] 

285 Mapping of job state to a list of jobs. 

286 """ 

287 by_label = {} 

288 for job in jobs: 

289 group = by_label.setdefault(job.label, []) 

290 group.append(job) 

291 return by_label