Coverage for python/lsst/ctrl/bps/drivers.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Driver functions for each subcommand.
24Driver functions ensure that ensure all setup work is done before running
25the subcommand method.
26"""
29__all__ = [
30 "acquire_qgraph_driver",
31 "cluster_qgraph_driver",
32 "transform_driver",
33 "prepare_driver",
34 "submit_driver",
35 "report_driver",
36 "restart_driver",
37 "cancel_driver",
38]
41import errno
42import getpass
43import logging
44import os
45import re
46import shutil
47from collections.abc import Iterable
48from pathlib import Path
50from lsst.obs.base import Instrument
51from lsst.utils import doImport
52from lsst.utils.timer import time_this
54from . import BPS_DEFAULTS, BPS_SEARCH_ORDER, BpsConfig
55from .pre_transform import acquire_quantum_graph, cluster_quanta
56from .transform import transform
57from .prepare import prepare
58from .submit import submit
59from .cancel import cancel
60from .report import report
61from .restart import restart
62from .bps_utils import _dump_env_info, _dump_pkg_info
65_LOG = logging.getLogger(__name__)
68def _init_submission_driver(config_file, **kwargs):
69 """Initialize runtime environment.
71 Parameters
72 ----------
73 config_file : `str`
74 Name of the configuration file.
76 Returns
77 -------
78 config : `lsst.ctrl.bps.BpsConfig`
79 Batch Processing Service configuration.
80 """
81 config = BpsConfig(config_file, BPS_SEARCH_ORDER)
83 # Override config with command-line values.
84 # Handle diffs between pipetask argument names vs bps yaml
85 translation = {
86 "input": "inCollection",
87 "output_run": "outputRun",
88 "qgraph": "qgraphFile",
89 "pipeline": "pipelineYaml",
90 "wms_service": "wmsServiceClass",
91 }
92 for key, value in kwargs.items():
93 # Don't want to override config with None or empty string values.
94 if value:
95 # pipetask argument parser converts some values to list,
96 # but bps will want string.
97 if not isinstance(value, str) and isinstance(value, Iterable):
98 value = ",".join(value)
99 new_key = translation.get(key, re.sub(r"_(\S)", lambda match: match.group(1).upper(), key))
100 config[f".bps_cmdline.{new_key}"] = value
102 # If the WMS service class was not defined neither at the command line nor
103 # explicitly in config file, use the value provided by the environmental
104 # variable BPS_WMS_SERVICE_CLASS. If the variable is not set, stick to
105 # the package default.
106 wms_service = os.environ.get("BPS_WMS_SERVICE_CLASS", None)
107 if wms_service is not None and "wmsServiceClass" not in config[".bps_cmdline"]:
108 default_config = BpsConfig(BPS_DEFAULTS)
109 if config["wmsServiceClass"] == default_config["wmsServiceClass"]:
110 config["wmsServiceClass"] = wms_service
112 # Set some initial values
113 config[".bps_defined.timestamp"] = Instrument.makeCollectionTimestamp()
114 if "operator" not in config:
115 config[".bps_defined.operator"] = getpass.getuser()
117 if "outCollection" in config:
118 raise KeyError("outCollection is deprecated. Replace all outCollection references with outputRun.")
120 if "outputRun" not in config:
121 raise KeyError("Must specify the output run collection using outputRun")
123 if "uniqProcName" not in config:
124 config[".bps_defined.uniqProcName"] = config["outputRun"].replace("/", "_")
126 if "submitPath" not in config:
127 raise KeyError("Must specify the submit-side run directory using submitPath")
129 # If requested, run WMS plugin checks early in submission process to
130 # ensure WMS has what it will need for prepare() or submit().
131 if kwargs.get("runWmsSubmissionChecks", False):
132 found, wms_class = config.search("wmsServiceClass")
133 if not found:
134 raise KeyError("Missing wmsServiceClass in bps config. Aborting.")
136 # Check that can import wms service class.
137 wms_service_class = doImport(wms_class)
138 wms_service = wms_service_class(config)
140 try:
141 wms_service.run_submission_checks()
142 except NotImplementedError:
143 # Allow various plugins to implement only when needed to do extra
144 # checks.
145 _LOG.debug("run_submission_checks is not implemented in %s.", wms_class)
146 else:
147 _LOG.debug("Skipping submission checks.")
149 # Make submit directory to contain all outputs.
150 submit_path = Path(config["submitPath"])
151 try:
152 submit_path.mkdir(parents=True, exist_ok=False)
153 except OSError as exc:
154 if exc.errno == errno.EEXIST:
155 reason = "Directory already exists"
156 else:
157 reason = exc.strerror
158 raise type(exc)(f"cannot create submit directory '{submit_path}': {reason}") from None
159 config[".bps_defined.submitPath"] = str(submit_path)
161 # save copy of configs (orig and expanded config)
162 shutil.copy2(config_file, submit_path)
163 with open(f"{submit_path}/{config['uniqProcName']}_config.yaml", "w") as fh:
164 config.dump(fh)
166 # Dump information about runtime environment and software versions in use.
167 _dump_env_info(f"{submit_path}/{config['uniqProcName']}.env.info.yaml")
168 _dump_pkg_info(f"{submit_path}/{config['uniqProcName']}.pkg.info.yaml")
170 return config
173def acquire_qgraph_driver(config_file, **kwargs):
174 """Read a quantum graph from a file or create one from pipeline definition.
176 Parameters
177 ----------
178 config_file : `str`
179 Name of the configuration file.
181 Returns
182 -------
183 config : `lsst.ctrl.bps.BpsConfig`
184 Updated configuration.
185 qgraph : `lsst.pipe.base.graph.QuantumGraph`
186 A graph representing quanta.
187 """
188 config = _init_submission_driver(config_file, **kwargs)
189 submit_path = config[".bps_defined.submitPath"]
191 _LOG.info("Starting acquire stage (generating and/or reading quantum graph)")
192 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Acquire stage completed"):
193 qgraph_file, qgraph, execution_butler_dir = acquire_quantum_graph(config, out_prefix=submit_path)
195 config[".bps_defined.executionButlerDir"] = execution_butler_dir
196 config[".bps_defined.runQgraphFile"] = qgraph_file
197 return config, qgraph
200def cluster_qgraph_driver(config_file, **kwargs):
201 """Group quanta into clusters.
203 Parameters
204 ----------
205 config_file : `str`
206 Name of the configuration file.
208 Returns
209 -------
210 config : `lsst.ctrl.bps.BpsConfig`
211 Updated configuration.
212 clustered_qgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
213 A graph representing clustered quanta.
214 """
215 config, qgraph = acquire_qgraph_driver(config_file, **kwargs)
217 _LOG.info("Starting cluster stage (grouping quanta into jobs)")
218 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Cluster stage completed"):
219 clustered_qgraph = cluster_quanta(config, qgraph, config["uniqProcName"])
221 submit_path = config[".bps_defined.submitPath"]
222 _, save_clustered_qgraph = config.search("saveClusteredQgraph", opt={"default": False})
223 if save_clustered_qgraph:
224 clustered_qgraph.save(os.path.join(submit_path, "bps_clustered_qgraph.pickle"))
225 _, save_dot = config.search("saveDot", opt={"default": False})
226 if save_dot:
227 clustered_qgraph.draw(os.path.join(submit_path, "bps_clustered_qgraph.dot"))
228 return config, clustered_qgraph
231def transform_driver(config_file, **kwargs):
232 """Create a workflow for a specific workflow management system.
234 Parameters
235 ----------
236 config_file : `str`
237 Name of the configuration file.
239 Returns
240 -------
241 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
242 Configuration to use when creating the workflow.
243 generic_workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
244 Representation of the abstract/scientific workflow specific to a given
245 workflow management system.
246 """
247 config, clustered_qgraph = cluster_qgraph_driver(config_file, **kwargs)
248 submit_path = config[".bps_defined.submitPath"]
250 _LOG.info("Starting transform stage (creating generic workflow)")
251 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Transform stage completed"):
252 generic_workflow, generic_workflow_config = transform(config, clustered_qgraph, submit_path)
253 _LOG.info("Generic workflow name '%s'", generic_workflow.name)
255 _, save_workflow = config.search("saveGenericWorkflow", opt={"default": False})
256 if save_workflow:
257 with open(os.path.join(submit_path, "bps_generic_workflow.pickle"), "wb") as outfh:
258 generic_workflow.save(outfh, "pickle")
259 _, save_dot = config.search("saveDot", opt={"default": False})
260 if save_dot:
261 with open(os.path.join(submit_path, "bps_generic_workflow.dot"), "w") as outfh:
262 generic_workflow.draw(outfh, "dot")
263 return generic_workflow_config, generic_workflow
266def prepare_driver(config_file, **kwargs):
267 """Create a representation of the generic workflow.
269 Parameters
270 ----------
271 config_file : `str`
272 Name of the configuration file.
274 Returns
275 -------
276 wms_config : `lsst.ctrl.bps.BpsConfig`
277 Configuration to use when creating the workflow.
278 workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
279 Representation of the abstract/scientific workflow specific to a given
280 workflow management system.
281 """
282 kwargs.setdefault("runWmsSubmissionChecks", True)
283 generic_workflow_config, generic_workflow = transform_driver(config_file, **kwargs)
284 submit_path = generic_workflow_config[".bps_defined.submitPath"]
286 _LOG.info("Starting prepare stage (creating specific implementation of workflow)")
287 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Prepare stage completed"):
288 wms_workflow = prepare(generic_workflow_config, generic_workflow, submit_path)
290 wms_workflow_config = generic_workflow_config
291 print(f"Submit dir: {wms_workflow.submit_path}")
292 return wms_workflow_config, wms_workflow
295def submit_driver(config_file, **kwargs):
296 """Submit workflow for execution.
298 Parameters
299 ----------
300 config_file : `str`
301 Name of the configuration file.
302 """
303 kwargs.setdefault("runWmsSubmissionChecks", True)
305 _LOG.info("Starting submission process")
306 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed entire submission process"):
307 wms_workflow_config, wms_workflow = prepare_driver(config_file, **kwargs)
309 _LOG.info("Starting submit stage")
310 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed submit stage"):
311 submit(wms_workflow_config, wms_workflow)
312 _LOG.info("Run '%s' submitted for execution with id '%s'", wms_workflow.name, wms_workflow.run_id)
314 print(f"Run Id: {wms_workflow.run_id}")
315 print(f"Run Name: {wms_workflow.name}")
318def restart_driver(wms_service, run_id):
319 """Restart a failed workflow.
321 Parameters
322 ----------
323 wms_service : `str`
324 Name of the class.
325 run_id : `str`
326 Id or path of workflow that need to be restarted.
327 """
328 if wms_service is None:
329 default_config = BpsConfig(BPS_DEFAULTS)
330 wms_service = os.environ.get("BPS_WMS_SERVICE_CLASS", default_config["wmsServiceClass"])
332 new_run_id, run_name, message = restart(wms_service, run_id)
333 if new_run_id is not None:
334 path = Path(run_id)
335 if path.exists():
336 _dump_env_info(f"{run_id}/{run_name}.env.info.yaml")
337 _dump_pkg_info(f"{run_id}/{run_name}.pkg.info.yaml")
338 print(f"Run Id: {new_run_id}")
339 print(f"Run Name: {run_name}")
340 else:
341 if message:
342 print(f"Restart failed: {message}")
343 else:
344 print("Restart failed: Unknown error")
347def report_driver(wms_service, run_id, user, hist_days, pass_thru, is_global=False):
348 """Print out summary of jobs submitted for execution.
350 Parameters
351 ----------
352 wms_service : `str`
353 Name of the class.
354 run_id : `str`
355 A run id the report will be restricted to.
356 user : `str`
357 A user name the report will be restricted to.
358 hist_days : int
359 Number of days
360 pass_thru : `str`
361 A string to pass directly to the WMS service class.
362 is_global : `bool`, optional
363 If set, all available job queues will be queried for job information.
364 Defaults to False which means that only a local job queue will be
365 queried for information.
367 Only applicable in the context of a WMS using distributed job queues
368 (e.g., HTCondor).
369 """
370 if wms_service is None:
371 default_config = BpsConfig(BPS_DEFAULTS)
372 wms_service = os.environ.get("BPS_WMS_SERVICE_CLASS", default_config["wmsServiceClass"])
373 report(wms_service, run_id, user, hist_days, pass_thru, is_global=is_global)
376def cancel_driver(wms_service, run_id, user, require_bps, pass_thru, is_global=False):
377 """Cancel submitted workflows.
379 Parameters
380 ----------
381 wms_service : `str`
382 Name of the Workload Management System service class.
383 run_id : `str`
384 ID or path of job that should be canceled.
385 user : `str`
386 User whose submitted jobs should be canceled.
387 require_bps : `bool`
388 Whether to require given run_id/user to be a bps submitted job.
389 pass_thru : `str`
390 Information to pass through to WMS.
391 is_global : `bool`, optional
392 If set, all available job queues will be checked for jobs to cancel.
393 Defaults to False which means that only a local job queue will be
394 checked.
396 Only applicable in the context of a WMS using distributed job queues
397 (e.g., HTCondor).
398 """
399 if wms_service is None:
400 default_config = BpsConfig(BPS_DEFAULTS)
401 wms_service = os.environ.get("BPS_WMS_SERVICE_CLASS", default_config["wmsServiceClass"])
402 cancel(wms_service, run_id, user, require_bps, pass_thru, is_global=is_global)