Coverage for python/lsst/ctrl/bps/wms_service.py: 79%

73 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 14:46 -0800

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Base classes for working with a specific WMS""" 

23 

24 

25__all__ = [ 

26 "BaseWmsService", 

27 "BaseWmsWorkflow", 

28 "WmsJobReport", 

29 "WmsRunReport", 

30 "WmsStates", 

31] 

32 

33 

34import dataclasses 

35import logging 

36from abc import ABCMeta 

37from enum import Enum 

38 

39_LOG = logging.getLogger(__name__) 

40 

41 

42class WmsStates(Enum): 

43 """Run and job states""" 

44 

45 UNKNOWN = 0 # Can't determine state 

46 MISFIT = 1 # Determined state, but doesn't fit other states 

47 UNREADY = 2 # Still waiting for parents to finish 

48 READY = 3 # All of its parents have finished successfully 

49 PENDING = 4 # Ready to run, visible in batch queue 

50 RUNNING = 5 # Currently running 

51 DELETED = 6 # In the process of being deleted or already deleted 

52 HELD = 7 # In a hold state 

53 SUCCEEDED = 8 # Have completed with success status 

54 FAILED = 9 # Have completed with non-success status 

55 

56 

57@dataclasses.dataclass 

58class WmsJobReport: 

59 """WMS job information to be included in detailed report output""" 

60 

61 wms_id: str 

62 name: str 

63 label: str 

64 state: WmsStates 

65 

66 __slots__ = ("wms_id", "name", "label", "state") 

67 

68 

69@dataclasses.dataclass 

70class WmsRunReport: 

71 """WMS run information to be included in detailed report output""" 

72 

73 wms_id: str 

74 global_wms_id: str 

75 path: str 

76 label: str 

77 run: str 

78 project: str 

79 campaign: str 

80 payload: str 

81 operator: str 

82 run_summary: str 

83 state: WmsStates 

84 jobs: list 

85 total_number_jobs: int 

86 job_state_counts: dict 

87 

88 __slots__ = ( 

89 "wms_id", 

90 "global_wms_id", 

91 "path", 

92 "label", 

93 "run", 

94 "project", 

95 "campaign", 

96 "payload", 

97 "operator", 

98 "run_summary", 

99 "state", 

100 "total_number_jobs", 

101 "jobs", 

102 "job_state_counts", 

103 ) 

104 

105 

106class BaseWmsService: 

107 """Interface for interactions with a specific WMS. 

108 

109 Parameters 

110 ---------- 

111 config : `lsst.ctrl.bps.BpsConfig` 

112 Configuration needed by the WMS service. 

113 """ 

114 

115 def __init__(self, config): 

116 self.config = config 

117 

118 def prepare(self, config, generic_workflow, out_prefix=None): 

119 """Create submission for a generic workflow for a specific WMS. 

120 

121 Parameters 

122 ---------- 

123 config : `lsst.ctrl.bps.BpsConfig` 

124 BPS configuration. 

125 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

126 Generic representation of a single workflow 

127 out_prefix : `str` 

128 Prefix for all WMS output files 

129 

130 Returns 

131 ------- 

132 wms_workflow : `BaseWmsWorkflow` 

133 Prepared WMS Workflow to submit for execution 

134 """ 

135 raise NotImplementedError 

136 

137 def submit(self, workflow): 

138 """Submit a single WMS workflow 

139 

140 Parameters 

141 ---------- 

142 workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

143 Prepared WMS Workflow to submit for execution 

144 """ 

145 raise NotImplementedError 

146 

147 def restart(self, wms_workflow_id): 

148 """Restart a workflow from the point of failure. 

149 

150 Parameters 

151 ---------- 

152 wms_workflow_id : `str` 

153 Id that can be used by WMS service to identify workflow that 

154 need to be restarted. 

155 

156 Returns 

157 ------- 

158 wms_id : `str` 

159 Id of the restarted workflow. If restart failed, it will be set 

160 to None. 

161 run_name : `str` 

162 Name of the restarted workflow. If restart failed, it will be set 

163 to None. 

164 message : `str` 

165 A message describing any issues encountered during the restart. 

166 If there were no issue, an empty string is returned. 

167 """ 

168 raise NotImplementedError 

169 

170 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False): 

171 """Query WMS for list of submitted WMS workflows/jobs. 

172 

173 This should be a quick lookup function to create list of jobs for 

174 other functions. 

175 

176 Parameters 

177 ---------- 

178 wms_id : `int` or `str`, optional 

179 Id or path that can be used by WMS service to look up job. 

180 user : `str`, optional 

181 User whose submitted jobs should be listed. 

182 require_bps : `bool`, optional 

183 Whether to require jobs returned in list to be bps-submitted jobs. 

184 pass_thru : `str`, optional 

185 Information to pass through to WMS. 

186 is_global : `bool`, optional 

187 If set, all available job queues will be queried for job 

188 information. Defaults to False which means that only a local job 

189 queue will be queried for information. 

190 

191 Only applicable in the context of a WMS using distributed job 

192 queues (e.g., HTCondor). A WMS with a centralized job queue 

193 (e.g. PanDA) can safely ignore it. 

194 

195 Returns 

196 ------- 

197 job_ids : `list` [`Any`] 

198 Only job ids to be used by cancel and other functions. Typically 

199 this means top-level jobs (i.e., not children jobs). 

200 """ 

201 raise NotImplementedError 

202 

203 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False): 

204 """Query WMS for status of submitted WMS workflows. 

205 

206 Parameters 

207 ---------- 

208 wms_workflow_id : `int` or `str`, optional 

209 Id that can be used by WMS service to look up status. 

210 user : `str`, optional 

211 Limit report to submissions by this particular user. 

212 hist : `int`, optional 

213 Number of days to expand report to include finished WMS workflows. 

214 pass_thru : `str`, optional 

215 Additional arguments to pass through to the specific WMS service. 

216 is_global : `bool`, optional 

217 If set, all available job queues will be queried for job 

218 information. Defaults to False which means that only a local job 

219 queue will be queried for information. 

220 

221 Only applicable in the context of a WMS using distributed job 

222 queues (e.g., HTCondor). A WMS with a centralized job queue 

223 (e.g. PanDA) can safely ignore it. 

224 

225 Returns 

226 ------- 

227 run_reports : `list` [`lsst.ctrl.bps.WmsRunReport`] 

228 Status information for submitted WMS workflows. 

229 message : `str` 

230 Message to user on how to find more status information specific to 

231 this particular WMS. 

232 """ 

233 raise NotImplementedError 

234 

235 def cancel(self, wms_id, pass_thru=None): 

236 """Cancel submitted workflows/jobs. 

237 

238 Parameters 

239 ---------- 

240 wms_id : `str` 

241 ID or path of job that should be canceled. 

242 pass_thru : `str`, optional 

243 Information to pass through to WMS. 

244 

245 Returns 

246 ------- 

247 deleted : `bool` 

248 Whether successful deletion or not. Currently, if any doubt or any 

249 individual jobs not deleted, return False. 

250 message : `str` 

251 Any message from WMS (e.g., error details). 

252 """ 

253 raise NotImplementedError 

254 

255 def run_submission_checks(self): 

256 """Checks to run at start if running WMS specific submission steps. 

257 

258 Any exception other than NotImplementedError will halt submission. 

259 Submit directory may not yet exist when this is called. 

260 """ 

261 raise NotImplementedError 

262 

263 def ping(self, pass_thru): 

264 """Checks whether WMS services are up, reachable, and can authenticate 

265 if authentication is required. 

266 

267 The services to be checked are those needed for submit, report, cancel, 

268 restart, but ping cannot guarantee whether jobs would actually run 

269 successfully. 

270 

271 Parameters 

272 ---------- 

273 pass_thru : `str`, optional 

274 Information to pass through to WMS. 

275 

276 Returns 

277 ------- 

278 status : `int` 

279 0 for success, non-zero for failure 

280 message : `str` 

281 Any message from WMS (e.g., error details). 

282 """ 

283 raise NotImplementedError 

284 

285 

286class BaseWmsWorkflow(metaclass=ABCMeta): 

287 """Interface for single workflow specific to a WMS. 

288 

289 Parameters 

290 ---------- 

291 name : `str` 

292 Unique name of workflow. 

293 config : `lsst.ctrl.bps.BpsConfig` 

294 Generic workflow config. 

295 """ 

296 

297 def __init__(self, name, config): 

298 self.name = name 

299 self.config = config 

300 self.service_class = None 

301 self.run_id = None 

302 self.submit_path = None 

303 

304 @classmethod 

305 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class): 

306 """Create a WMS-specific workflow from a GenericWorkflow 

307 

308 Parameters 

309 ---------- 

310 config : `lsst.ctrl.bps.BpsConfig` 

311 Configuration values needed for generating a WMS specific workflow. 

312 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

313 Generic workflow from which to create the WMS-specific one. 

314 out_prefix : `str` 

315 Root directory to be used for WMS workflow inputs and outputs 

316 as well as internal WMS files. 

317 service_class : `str` 

318 Full module name of WMS service class that created this workflow. 

319 

320 Returns 

321 ------- 

322 wms_workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

323 A WMS specific workflow. 

324 """ 

325 

326 raise NotImplementedError 

327 

328 def write(self, out_prefix): 

329 """Write WMS files for this particular workflow. 

330 

331 Parameters 

332 ---------- 

333 out_prefix : `str` 

334 Root directory to be used for WMS workflow inputs and outputs 

335 as well as internal WMS files. 

336 """ 

337 raise NotImplementedError