Coverage for python/lsst/ctrl/bps/drivers.py: 18%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

127 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Driver functions for each subcommand. 

23 

24Driver functions ensure that ensure all setup work is done before running 

25the subcommand method. 

26""" 

27 

28 

29__all__ = [ 

30 "acquire_qgraph_driver", 

31 "cluster_qgraph_driver", 

32 "transform_driver", 

33 "prepare_driver", 

34 "submit_driver", 

35 "report_driver", 

36 "cancel_driver", 

37] 

38 

39 

40import errno 

41import getpass 

42import logging 

43import os 

44import re 

45import shutil 

46from collections import Iterable 

47from pathlib import Path 

48 

49 

50from lsst.obs.base import Instrument 

51from lsst.utils import doImport 

52from lsst.utils.timer import time_this 

53 

54from . import BPS_SEARCH_ORDER, BpsConfig 

55from .pre_transform import acquire_quantum_graph, cluster_quanta 

56from .transform import transform 

57from .prepare import prepare 

58from .submit import submit 

59from .cancel import cancel 

60from .report import report 

61 

62_LOG = logging.getLogger(__name__) 

63 

64 

65def _init_submission_driver(config_file, **kwargs): 

66 """Initialize runtime environment. 

67 

68 Parameters 

69 ---------- 

70 config_file : `str` 

71 Name of the configuration file. 

72 

73 Returns 

74 ------- 

75 config : `lsst.ctrl.bps.BpsConfig` 

76 Batch Processing Service configuration. 

77 """ 

78 config = BpsConfig(config_file, BPS_SEARCH_ORDER) 

79 

80 # Override config with command-line values 

81 # Handle diffs between pipetask argument names vs bps yaml 

82 translation = {"input": "inCollection", 

83 "output_run": "outputRun", 

84 "qgraph": "qgraphFile", 

85 "pipeline": "pipelineYaml"} 

86 for key, value in kwargs.items(): 

87 # Don't want to override config with None or empty string values. 

88 if value: 

89 # pipetask argument parser converts some values to list, 

90 # but bps will want string. 

91 if not isinstance(value, str) and isinstance(value, Iterable): 

92 value = ",".join(value) 

93 new_key = translation.get(key, re.sub(r"_(\S)", lambda match: match.group(1).upper(), key)) 

94 config[f".bps_cmdline.{new_key}"] = value 

95 

96 # Set some initial values 

97 config[".bps_defined.timestamp"] = Instrument.makeCollectionTimestamp() 

98 if "operator" not in config: 

99 config[".bps_defined.operator"] = getpass.getuser() 

100 

101 if "outCollection" in config: 

102 raise KeyError("outCollection is deprecated. Replace all outCollection references with outputRun.") 

103 

104 if "outputRun" not in config: 

105 raise KeyError("Must specify the output run collection using outputRun") 

106 

107 if "uniqProcName" not in config: 

108 config[".bps_defined.uniqProcName"] = config["outputRun"].replace("/", "_") 

109 

110 if "submitPath" not in config: 

111 raise KeyError("Must specify the submit-side run directory using submitPath") 

112 

113 # If requested, run WMS plugin checks early in submission process to 

114 # ensure WMS has what it will need for prepare() or submit(). 

115 

116 if kwargs.get("runWmsSubmissionChecks", False): 

117 found, wms_class = config.search("wmsServiceClass") 

118 if not found: 

119 raise KeyError("Missing wmsServiceClass in bps config. Aborting.") 

120 

121 # Check that can import wms service class. 

122 wms_service_class = doImport(wms_class) 

123 wms_service = wms_service_class(config) 

124 

125 try: 

126 wms_service.run_submission_checks() 

127 except NotImplementedError: 

128 # Allow various plugins to implement only when needed to do extra 

129 # checks. 

130 _LOG.debug("run_submission_checks is not implemented in %s.", wms_class) 

131 else: 

132 _LOG.debug("Skipping submission checks.") 

133 

134 # Make submit directory to contain all outputs. 

135 submit_path = Path(config["submitPath"]) 

136 try: 

137 submit_path.mkdir(parents=True, exist_ok=False) 

138 except OSError as exc: 

139 if exc.errno == errno.EEXIST: 

140 reason = "Directory already exists" 

141 else: 

142 reason = exc.strerror 

143 raise type(exc)(f"cannot create submit directory '{submit_path}': {reason}") from None 

144 config[".bps_defined.submitPath"] = str(submit_path) 

145 

146 # save copy of configs (orig and expanded config) 

147 shutil.copy2(config_file, submit_path) 

148 with open(f"{submit_path}/{config['uniqProcName']}_config.yaml", "w") as fh: 

149 config.dump(fh) 

150 

151 return config 

152 

153 

154def acquire_qgraph_driver(config_file, **kwargs): 

155 """Read a quantum graph from a file or create one from pipeline definition. 

156 

157 Parameters 

158 ---------- 

159 config_file : `str` 

160 Name of the configuration file. 

161 

162 Returns 

163 ------- 

164 config : `lsst.ctrl.bps.BpsConfig` 

165 Updated configuration. 

166 qgraph : `lsst.pipe.base.graph.QuantumGraph` 

167 A graph representing quanta. 

168 """ 

169 config = _init_submission_driver(config_file, **kwargs) 

170 submit_path = config[".bps_defined.submitPath"] 

171 

172 _LOG.info("Starting acquire stage (generating and/or reading quantum graph)") 

173 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Acquire stage completed"): 

174 qgraph_file, qgraph, execution_butler_dir = acquire_quantum_graph(config, out_prefix=submit_path) 

175 

176 config[".bps_defined.executionButlerDir"] = execution_butler_dir 

177 config[".bps_defined.runQgraphFile"] = qgraph_file 

178 return config, qgraph 

179 

180 

181def cluster_qgraph_driver(config_file, **kwargs): 

182 """Group quanta into clusters. 

183 

184 Parameters 

185 ---------- 

186 config_file : `str` 

187 Name of the configuration file. 

188 

189 Returns 

190 ------- 

191 config : `lsst.ctrl.bps.BpsConfig` 

192 Updated configuration. 

193 clustered_qgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

194 A graph representing clustered quanta. 

195 """ 

196 config, qgraph = acquire_qgraph_driver(config_file, **kwargs) 

197 

198 _LOG.info("Starting cluster stage (grouping quanta into jobs)") 

199 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Cluster stage completed"): 

200 clustered_qgraph = cluster_quanta(config, qgraph, config["uniqProcName"]) 

201 

202 submit_path = config[".bps_defined.submitPath"] 

203 _, save_clustered_qgraph = config.search("saveClusteredQgraph", opt={"default": False}) 

204 if save_clustered_qgraph: 

205 clustered_qgraph.save(os.path.join(submit_path, "bps_clustered_qgraph.pickle")) 

206 _, save_dot = config.search("saveDot", opt={"default": False}) 

207 if save_dot: 

208 clustered_qgraph.draw(os.path.join(submit_path, "bps_clustered_qgraph.dot")) 

209 return config, clustered_qgraph 

210 

211 

212def transform_driver(config_file, **kwargs): 

213 """Create a workflow for a specific workflow management system. 

214 

215 Parameters 

216 ---------- 

217 config_file : `str` 

218 Name of the configuration file. 

219 

220 Returns 

221 ------- 

222 generic_workflow_config : `lsst.ctrl.bps.BpsConfig` 

223 Configuration to use when creating the workflow. 

224 generic_workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

225 Representation of the abstract/scientific workflow specific to a given 

226 workflow management system. 

227 """ 

228 config, clustered_qgraph = cluster_qgraph_driver(config_file, **kwargs) 

229 submit_path = config[".bps_defined.submitPath"] 

230 

231 _LOG.info("Starting transform stage (creating generic workflow)") 

232 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Transform stage completed"): 

233 generic_workflow, generic_workflow_config = transform(config, clustered_qgraph, submit_path) 

234 _LOG.info("Generic workflow name '%s'", generic_workflow.name) 

235 

236 _, save_workflow = config.search("saveGenericWorkflow", opt={"default": False}) 

237 if save_workflow: 

238 with open(os.path.join(submit_path, "bps_generic_workflow.pickle"), "wb") as outfh: 

239 generic_workflow.save(outfh, "pickle") 

240 _, save_dot = config.search("saveDot", opt={"default": False}) 

241 if save_dot: 

242 with open(os.path.join(submit_path, "bps_generic_workflow.dot"), "w") as outfh: 

243 generic_workflow.draw(outfh, "dot") 

244 return generic_workflow_config, generic_workflow 

245 

246 

247def prepare_driver(config_file, **kwargs): 

248 """Create a representation of the generic workflow. 

249 

250 Parameters 

251 ---------- 

252 config_file : `str` 

253 Name of the configuration file. 

254 

255 Returns 

256 ------- 

257 wms_config : `lsst.ctrl.bps.BpsConfig` 

258 Configuration to use when creating the workflow. 

259 workflow : `lsst.ctrl.bps.BaseWmsWorkflow` 

260 Representation of the abstract/scientific workflow specific to a given 

261 workflow management system. 

262 """ 

263 kwargs.setdefault("runWmsSubmissionChecks", True) 

264 generic_workflow_config, generic_workflow = transform_driver(config_file, **kwargs) 

265 submit_path = generic_workflow_config[".bps_defined.submitPath"] 

266 

267 _LOG.info("Starting prepare stage (creating specific implementation of workflow)") 

268 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Prepare stage completed"): 

269 wms_workflow = prepare(generic_workflow_config, generic_workflow, submit_path) 

270 

271 wms_workflow_config = generic_workflow_config 

272 print(f"Submit dir: {wms_workflow.submit_path}") 

273 return wms_workflow_config, wms_workflow 

274 

275 

276def submit_driver(config_file, **kwargs): 

277 """Submit workflow for execution. 

278 

279 Parameters 

280 ---------- 

281 config_file : `str` 

282 Name of the configuration file. 

283 """ 

284 kwargs.setdefault("runWmsSubmissionChecks", True) 

285 

286 _LOG.info("Starting submission process") 

287 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed entire submission process"): 

288 wms_workflow_config, wms_workflow = prepare_driver(config_file, **kwargs) 

289 

290 _LOG.info("Starting submit stage") 

291 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed submit stage"): 

292 submit(wms_workflow_config, wms_workflow) 

293 _LOG.info("Run '%s' submitted for execution with id '%s'", wms_workflow.name, wms_workflow.run_id) 

294 

295 print(f"Run Id: {wms_workflow.run_id}") 

296 

297 

298def report_driver(wms_service, run_id, user, hist_days, pass_thru, is_global=False): 

299 """Print out summary of jobs submitted for execution. 

300 

301 Parameters 

302 ---------- 

303 wms_service : `str` 

304 Name of the class. 

305 run_id : `str` 

306 A run id the report will be restricted to. 

307 user : `str` 

308 A user name the report will be restricted to. 

309 hist_days : int 

310 Number of days 

311 pass_thru : `str` 

312 A string to pass directly to the WMS service class. 

313 is_global : `bool`, optional 

314 If set, all available job queues will be queried for job information. 

315 Defaults to False which means that only a local job queue will be 

316 queried for information. 

317 

318 Only applicable in the context of a WMS using distributed job queues 

319 (e.g., HTCondor). 

320 """ 

321 report(wms_service, run_id, user, hist_days, pass_thru, is_global=is_global) 

322 

323 

324def cancel_driver(wms_service, run_id, user, require_bps, pass_thru, is_global=False): 

325 """Cancel submitted workflows. 

326 

327 Parameters 

328 ---------- 

329 wms_service : `str` 

330 Name of the Workload Management System service class. 

331 run_id : `str` 

332 ID or path of job that should be canceled. 

333 user : `str` 

334 User whose submitted jobs should be canceled. 

335 require_bps : `bool` 

336 Whether to require given run_id/user to be a bps submitted job. 

337 pass_thru : `str` 

338 Information to pass through to WMS. 

339 is_global : `bool`, optional 

340 If set, all available job queues will be checked for jobs to cancel. 

341 Defaults to False which means that only a local job queue will be 

342 checked. 

343 

344 Only applicable in the context of a WMS using distributed job queues 

345 (e.g., HTCondor). 

346 """ 

347 cancel(wms_service, run_id, user, require_bps, pass_thru, is_global=is_global)