Coverage for python/lsst/ctrl/bps/drivers.py: 18%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

119 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Driver functions for each subcommand. 

23 

24Driver functions ensure that ensure all setup work is done before running 

25the subcommand method. 

26""" 

27 

28 

29__all__ = [ 

30 "acquire_qgraph_driver", 

31 "cluster_qgraph_driver", 

32 "transform_driver", 

33 "prepare_driver", 

34 "submit_driver", 

35 "report_driver", 

36 "cancel_driver", 

37] 

38 

39 

40import getpass 

41import logging 

42import os 

43import re 

44import shutil 

45from collections import Iterable 

46 

47 

48from lsst.obs.base import Instrument 

49from lsst.utils import doImport 

50from lsst.utils.timer import time_this 

51 

52from . import BPS_SEARCH_ORDER, BpsConfig 

53from .pre_transform import acquire_quantum_graph, cluster_quanta 

54from .transform import transform 

55from .prepare import prepare 

56from .submit import submit 

57from .cancel import cancel 

58from .report import report 

59 

60_LOG = logging.getLogger(__name__) 

61 

62 

63def _init_submission_driver(config_file, **kwargs): 

64 """Initialize runtime environment. 

65 

66 Parameters 

67 ---------- 

68 config_file : `str` 

69 Name of the configuration file. 

70 

71 Returns 

72 ------- 

73 config : `lsst.ctrl.bps.BpsConfig` 

74 Batch Processing Service configuration. 

75 """ 

76 config = BpsConfig(config_file, BPS_SEARCH_ORDER) 

77 

78 # Override config with command-line values 

79 # Handle diffs between pipetask argument names vs bps yaml 

80 translation = {"input": "inCollection", 

81 "output_run": "outputRun", 

82 "qgraph": "qgraphFile", 

83 "pipeline": "pipelineYaml"} 

84 for key, value in kwargs.items(): 

85 # Don't want to override config with None or empty string values. 

86 if value: 

87 # pipetask argument parser converts some values to list, 

88 # but bps will want string. 

89 if not isinstance(value, str) and isinstance(value, Iterable): 

90 value = ",".join(value) 

91 new_key = translation.get(key, re.sub(r"_(\S)", lambda match: match.group(1).upper(), key)) 

92 config[f".bps_cmdline.{new_key}"] = value 

93 

94 # Set some initial values 

95 config[".bps_defined.timestamp"] = Instrument.makeCollectionTimestamp() 

96 if "operator" not in config: 

97 config[".bps_defined.operator"] = getpass.getuser() 

98 

99 if "outCollection" in config: 

100 raise KeyError("outCollection is deprecated. Replace all outCollection references with outputRun.") 

101 

102 if "outputRun" not in config: 

103 raise KeyError("Must specify the output run collection using outputRun") 

104 

105 if "uniqProcName" not in config: 

106 config[".bps_defined.uniqProcName"] = config["outputRun"].replace("/", "_") 

107 

108 if "submitPath" not in config: 

109 raise KeyError("Must specify the submit-side run directory using submitPath") 

110 

111 # If requested, run WMS plugin checks early in submission process to 

112 # ensure WMS has what it will need for prepare() or submit(). 

113 

114 if kwargs.get("runWmsSubmissionChecks", False): 

115 found, wms_class = config.search("wmsServiceClass") 

116 if not found: 

117 raise KeyError("Missing wmsServiceClass in bps config. Aborting.") 

118 

119 # Check that can import wms service class. 

120 wms_service_class = doImport(wms_class) 

121 wms_service = wms_service_class(config) 

122 

123 try: 

124 wms_service.run_submission_checks() 

125 except NotImplementedError: 

126 # Allow various plugins to implement only when needed to do extra 

127 # checks. 

128 _LOG.debug("run_submission_checks is not implemented in %s.", wms_class) 

129 else: 

130 _LOG.debug("Skipping submission checks.") 

131 

132 # make submit directory to contain all outputs 

133 submit_path = config["submitPath"] 

134 os.makedirs(submit_path, exist_ok=True) 

135 config[".bps_defined.submitPath"] = submit_path 

136 

137 # save copy of configs (orig and expanded config) 

138 shutil.copy2(config_file, submit_path) 

139 with open(f"{submit_path}/{config['uniqProcName']}_config.yaml", "w") as fh: 

140 config.dump(fh) 

141 

142 return config 

143 

144 

145def acquire_qgraph_driver(config_file, **kwargs): 

146 """Read a quantum graph from a file or create one from pipeline definition. 

147 

148 Parameters 

149 ---------- 

150 config_file : `str` 

151 Name of the configuration file. 

152 

153 Returns 

154 ------- 

155 config : `lsst.ctrl.bps.BpsConfig` 

156 Updated configuration. 

157 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

158 A graph representing quanta. 

159 """ 

160 config = _init_submission_driver(config_file, **kwargs) 

161 submit_path = config[".bps_defined.submitPath"] 

162 

163 _LOG.info("Starting acquire stage (generating and/or reading quantum graph)") 

164 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Acquire stage completed"): 

165 qgraph_file, qgraph, execution_butler_dir = acquire_quantum_graph(config, out_prefix=submit_path) 

166 

167 config[".bps_defined.executionButlerDir"] = execution_butler_dir 

168 config[".bps_defined.runQgraphFile"] = qgraph_file 

169 return config, qgraph 

170 

171 

172def cluster_qgraph_driver(config_file, **kwargs): 

173 """Group quanta into clusters. 

174 

175 Parameters 

176 ---------- 

177 config_file : `str` 

178 Name of the configuration file. 

179 

180 Returns 

181 ------- 

182 config : `lsst.ctrl.bps.BpsConfig` 

183 Updated configuration. 

184 clustered_qgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

185 A graph representing clustered quanta. 

186 """ 

187 config, qgraph = acquire_qgraph_driver(config_file, **kwargs) 

188 

189 _LOG.info("Starting cluster stage (grouping quanta into jobs)") 

190 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Cluster stage completed"): 

191 clustered_qgraph = cluster_quanta(config, qgraph, config["uniqProcName"]) 

192 

193 submit_path = config[".bps_defined.submitPath"] 

194 _, save_clustered_qgraph = config.search("saveClusteredQgraph", opt={"default": False}) 

195 if save_clustered_qgraph: 

196 clustered_qgraph.save(os.path.join(submit_path, "bps_clustered_qgraph.pickle")) 

197 _, save_dot = config.search("saveDot", opt={"default": False}) 

198 if save_dot: 

199 clustered_qgraph.draw(os.path.join(submit_path, "bps_clustered_qgraph.dot")) 

200 return config, clustered_qgraph 

201 

202 

203def transform_driver(config_file, **kwargs): 

204 """Create a workflow for a specific workflow management system. 

205 

206 Parameters 

207 ---------- 

208 config_file : `str` 

209 Name of the configuration file. 

210 

211 Returns 

212 ------- 

213 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

214 Configuration to use when creating the workflow. 

215 generic_workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

216 Representation of the abstract/scientific workflow specific to a given 

217 workflow management system. 

218 """ 

219 config, clustered_qgraph = cluster_qgraph_driver(config_file, **kwargs) 

220 submit_path = config[".bps_defined.submitPath"] 

221 

222 _LOG.info("Starting transform stage (creating generic workflow)") 

223 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Transform stage completed"): 

224 generic_workflow, generic_workflow_config = transform(config, clustered_qgraph, submit_path) 

225 _LOG.info("Generic workflow name '%s'", generic_workflow.name) 

226 

227 _, save_workflow = config.search("saveGenericWorkflow", opt={"default": False}) 

228 if save_workflow: 

229 with open(os.path.join(submit_path, "bps_generic_workflow.pickle"), "wb") as outfh: 

230 generic_workflow.save(outfh, "pickle") 

231 _, save_dot = config.search("saveDot", opt={"default": False}) 

232 if save_dot: 

233 with open(os.path.join(submit_path, "bps_generic_workflow.dot"), "w") as outfh: 

234 generic_workflow.draw(outfh, "dot") 

235 return generic_workflow_config, generic_workflow 

236 

237 

238def prepare_driver(config_file, **kwargs): 

239 """Create a representation of the generic workflow. 

240 

241 Parameters 

242 ---------- 

243 config_file : `str` 

244 Name of the configuration file. 

245 

246 Returns 

247 ------- 

248 wms_config : `lsst.ctrl.bps.BpsConfig` 

249 Configuration to use when creating the workflow. 

250 workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

251 Representation of the abstract/scientific workflow specific to a given 

252 workflow management system. 

253 """ 

254 kwargs.setdefault("runWmsSubmissionChecks", True) 

255 generic_workflow_config, generic_workflow = transform_driver(config_file, **kwargs) 

256 submit_path = generic_workflow_config[".bps_defined.submitPath"] 

257 

258 _LOG.info("Starting prepare stage (creating specific implementation of workflow)") 

259 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Prepare stage completed"): 

260 wms_workflow = prepare(generic_workflow_config, generic_workflow, submit_path) 

261 

262 wms_workflow_config = generic_workflow_config 

263 print(f"Submit dir: {wms_workflow.submit_path}") 

264 return wms_workflow_config, wms_workflow 

265 

266 

267def submit_driver(config_file, **kwargs): 

268 """Submit workflow for execution. 

269 

270 Parameters 

271 ---------- 

272 config_file : `str` 

273 Name of the configuration file. 

274 """ 

275 kwargs.setdefault("runWmsSubmissionChecks", True) 

276 

277 _LOG.info("Starting submission process") 

278 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed entire submission process"): 

279 wms_workflow_config, wms_workflow = prepare_driver(config_file, **kwargs) 

280 

281 _LOG.info("Starting submit stage") 

282 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed submit stage"): 

283 submit(wms_workflow_config, wms_workflow) 

284 _LOG.info("Run '%s' submitted for execution with id '%s'", wms_workflow.name, wms_workflow.run_id) 

285 

286 print(f"Run Id: {wms_workflow.run_id}") 

287 

288 

289def report_driver(wms_service, run_id, user, hist_days, pass_thru, is_global=False): 

290 """Print out summary of jobs submitted for execution. 

291 

292 Parameters 

293 ---------- 

294 wms_service : `str` 

295 Name of the class. 

296 run_id : `str` 

297 A run id the report will be restricted to. 

298 user : `str` 

299 A user name the report will be restricted to. 

300 hist_days : int 

301 Number of days 

302 pass_thru : `str` 

303 A string to pass directly to the WMS service class. 

304 is_global : `bool`, optional 

305 If set, all available job queues will be queried for job information. 

306 Defaults to False which means that only a local job queue will be 

307 queried for information. 

308 

309 Only applicable in the context of a WMS using distributed job queues 

310 (e.g., HTCondor). 

311 """ 

312 report(wms_service, run_id, user, hist_days, pass_thru, is_global=is_global) 

313 

314 

315def cancel_driver(wms_service, run_id, user, require_bps, pass_thru, is_global=False): 

316 """Cancel submitted workflows. 

317 

318 Parameters 

319 ---------- 

320 wms_service : `str` 

321 Name of the Workload Management System service class. 

322 run_id : `str` 

323 ID or path of job that should be canceled. 

324 user : `str` 

325 User whose submitted jobs should be canceled. 

326 require_bps : `bool` 

327 Whether to require given run_id/user to be a bps submitted job. 

328 pass_thru : `str` 

329 Information to pass through to WMS. 

330 is_global : `bool`, optional 

331 If set, all available job queues will be checked for jobs to cancel. 

332 Defaults to False which means that only a local job queue will be 

333 checked. 

334 

335 Only applicable in the context of a WMS using distributed job queues 

336 (e.g., HTCondor). 

337 """ 

338 cancel(wms_service, run_id, user, require_bps, pass_thru, is_global=is_global)