Coverage for python/lsst/ctrl/bps/wms_service.py: 81%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

71 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Base classes for working with a specific WMS""" 

23 

24 

25__all__ = [ 

26 "BaseWmsService", 

27 "BaseWmsWorkflow", 

28 "WmsJobReport", 

29 "WmsRunReport", 

30 "WmsStates", 

31] 

32 

33 

34import logging 

35import dataclasses 

36from abc import ABCMeta 

37from enum import Enum 

38 

39 

40_LOG = logging.getLogger(__name__) 

41 

42 

43class WmsStates(Enum): 

44 """Run and job states 

45 """ 

46 UNKNOWN = 0 # Can't determine state 

47 MISFIT = 1 # Determined state, but doesn't fit other states 

48 UNREADY = 2 # Still waiting for parents to finish 

49 READY = 3 # All of its parents have finished successfully 

50 PENDING = 4 # Ready to run, visible in batch queue 

51 RUNNING = 5 # Currently running 

52 DELETED = 6 # In the process of being deleted or already deleted 

53 HELD = 7 # In a hold state 

54 SUCCEEDED = 8 # Have completed with success status 

55 FAILED = 9 # Have completed with non-success status 

56 

57 

58@dataclasses.dataclass 

59class WmsJobReport: 

60 """WMS job information to be included in detailed report output 

61 """ 

62 wms_id: str 

63 name: str 

64 label: str 

65 state: WmsStates 

66 

67 __slots__ = ('wms_id', 'name', 'label', 'state') 

68 

69 

70@dataclasses.dataclass 

71class WmsRunReport: 

72 """WMS run information to be included in detailed report output 

73 """ 

74 wms_id: str 

75 global_wms_id: str 

76 path: str 

77 label: str 

78 run: str 

79 project: str 

80 campaign: str 

81 payload: str 

82 operator: str 

83 run_summary: str 

84 state: WmsStates 

85 jobs: list 

86 total_number_jobs: int 

87 job_state_counts: dict 

88 

89 __slots__ = ('wms_id', 'global_wms_id', 'path', 'label', 'run', 'project', 'campaign', 'payload', 

90 'operator', 'run_summary', 'state', 'total_number_jobs', 'jobs', 'job_state_counts') 

91 

92 

93class BaseWmsService: 

94 """Interface for interactions with a specific WMS. 

95 

96 Parameters 

97 ---------- 

98 config : `lsst.ctrl.bps.BpsConfig` 

99 Configuration needed by the WMS service. 

100 """ 

101 def __init__(self, config): 

102 self.config = config 

103 

104 def prepare(self, config, generic_workflow, out_prefix=None): 

105 """Create submission for a generic workflow for a specific WMS. 

106 

107 Parameters 

108 ---------- 

109 config : `lsst.ctrl.bps.BpsConfig` 

110 BPS configuration. 

111 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

112 Generic representation of a single workflow 

113 out_prefix : `str` 

114 Prefix for all WMS output files 

115 

116 Returns 

117 ------- 

118 wms_workflow : `BaseWmsWorkflow` 

119 Prepared WMS Workflow to submit for execution 

120 """ 

121 raise NotImplementedError 

122 

123 def submit(self, workflow): 

124 """Submit a single WMS workflow 

125 

126 Parameters 

127 ---------- 

128 workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

129 Prepared WMS Workflow to submit for execution 

130 """ 

131 raise NotImplementedError 

132 

133 def restart(self, wms_workflow_id): 

134 """Restart a workflow from the point of failure. 

135 

136 Parameters 

137 ---------- 

138 wms_workflow_id : `str` 

139 Id that can be used by WMS service to identify workflow that 

140 need to be restarted. 

141 

142 Returns 

143 ------- 

144 wms_id : `str` 

145 Id of the restarted workflow. If restart failed, it will be set 

146 to None. 

147 run_name : `str` 

148 Name of the restarted workflow. If restart failed, it will be set 

149 to None. 

150 message : `str` 

151 A message describing any issues encountered during the restart. 

152 If there were no issue, an empty string is returned. 

153 """ 

154 raise NotImplementedError 

155 

156 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False): 

157 """Query WMS for list of submitted WMS workflows/jobs. 

158 

159 This should be a quick lookup function to create list of jobs for 

160 other functions. 

161 

162 Parameters 

163 ---------- 

164 wms_id : `int` or `str`, optional 

165 Id or path that can be used by WMS service to look up job. 

166 user : `str`, optional 

167 User whose submitted jobs should be listed. 

168 require_bps : `bool`, optional 

169 Whether to require jobs returned in list to be bps-submitted jobs. 

170 pass_thru : `str`, optional 

171 Information to pass through to WMS. 

172 is_global : `bool`, optional 

173 If set, all available job queues will be queried for job 

174 information. Defaults to False which means that only a local job 

175 queue will be queried for information. 

176 

177 Only applicable in the context of a WMS using distributed job 

178 queues (e.g., HTCondor). A WMS with a centralized job queue 

179 (e.g. PanDA) can safely ignore it. 

180 

181 Returns 

182 ------- 

183 job_ids : `list` [`Any`] 

184 Only job ids to be used by cancel and other functions. Typically 

185 this means top-level jobs (i.e., not children jobs). 

186 """ 

187 raise NotImplementedError 

188 

189 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False): 

190 """Query WMS for status of submitted WMS workflows. 

191 

192 Parameters 

193 ---------- 

194 wms_workflow_id : `int` or `str`, optional 

195 Id that can be used by WMS service to look up status. 

196 user : `str`, optional 

197 Limit report to submissions by this particular user. 

198 hist : `int`, optional 

199 Number of days to expand report to include finished WMS workflows. 

200 pass_thru : `str`, optional 

201 Additional arguments to pass through to the specific WMS service. 

202 is_global : `bool`, optional 

203 If set, all available job queues will be queried for job 

204 information. Defaults to False which means that only a local job 

205 queue will be queried for information. 

206 

207 Only applicable in the context of a WMS using distributed job 

208 queues (e.g., HTCondor). A WMS with a centralized job queue 

209 (e.g. PanDA) can safely ignore it. 

210 

211 Returns 

212 ------- 

213 run_reports : `list` [`lsst.ctrl.bps.WmsRunReport`] 

214 Status information for submitted WMS workflows. 

215 message : `str` 

216 Message to user on how to find more status information specific to 

217 this particular WMS. 

218 """ 

219 raise NotImplementedError 

220 

221 def cancel(self, wms_id, pass_thru=None): 

222 """Cancel submitted workflows/jobs. 

223 

224 Parameters 

225 ---------- 

226 wms_id : `str` 

227 ID or path of job that should be canceled. 

228 pass_thru : `str`, optional 

229 Information to pass through to WMS. 

230 

231 Returns 

232 -------- 

233 deleted : `bool` 

234 Whether successful deletion or not. Currently, if any doubt or any 

235 individual jobs not deleted, return False. 

236 message : `str` 

237 Any message from WMS (e.g., error details). 

238 """ 

239 raise NotImplementedError 

240 

241 def run_submission_checks(self): 

242 """Checks to run at start if running WMS specific submission steps. 

243 

244 Any exception other than NotImplementedError will halt submission. 

245 Submit directory may not yet exist when this is called. 

246 """ 

247 raise NotImplementedError 

248 

249 

250class BaseWmsWorkflow(metaclass=ABCMeta): 

251 """Interface for single workflow specific to a WMS. 

252 

253 Parameters 

254 ---------- 

255 name : `str` 

256 Unique name of workflow. 

257 config : `lsst.ctrl.bps.BpsConfig` 

258 Generic workflow config. 

259 """ 

260 def __init__(self, name, config): 

261 self.name = name 

262 self.config = config 

263 self.service_class = None 

264 self.run_id = None 

265 self.submit_path = None 

266 

267 @classmethod 

268 def from_generic_workflow(cls, config, generic_workflow, out_prefix, 

269 service_class): 

270 """Create a WMS-specific workflow from a GenericWorkflow 

271 

272 Parameters 

273 ---------- 

274 config : `lsst.ctrl.bps.BpsConfig` 

275 Configuration values needed for generating a WMS specific workflow. 

276 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

277 Generic workflow from which to create the WMS-specific one. 

278 out_prefix : `str` 

279 Root directory to be used for WMS workflow inputs and outputs 

280 as well as internal WMS files. 

281 service_class : `str` 

282 Full module name of WMS service class that created this workflow. 

283 

284 Returns 

285 ------- 

286 wms_workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

287 A WMS specific workflow. 

288 """ 

289 

290 raise NotImplementedError 

291 

292 def write(self, out_prefix): 

293 """Write WMS files for this particular workflow. 

294 

295 Parameters 

296 ---------- 

297 out_prefix : `str` 

298 Root directory to be used for WMS workflow inputs and outputs 

299 as well as internal WMS files. 

300 """ 

301 raise NotImplementedError