Coverage for python/lsst/ctrl/bps/drivers.py: 18%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Driver functions for each subcommand.
24Driver functions ensure that ensure all setup work is done before running
25the subcommand method.
26"""
29__all__ = [
30 "acquire_qgraph_driver",
31 "cluster_qgraph_driver",
32 "transform_driver",
33 "prepare_driver",
34 "submit_driver",
35 "report_driver",
36 "cancel_driver",
37]
40import errno
41import getpass
42import logging
43import os
44import re
45import shutil
46from collections import Iterable
47from pathlib import Path
50from lsst.obs.base import Instrument
51from lsst.utils import doImport
52from lsst.utils.timer import time_this
54from . import BPS_SEARCH_ORDER, BpsConfig
55from .pre_transform import acquire_quantum_graph, cluster_quanta
56from .transform import transform
57from .prepare import prepare
58from .submit import submit
59from .cancel import cancel
60from .report import report
62_LOG = logging.getLogger(__name__)
65def _init_submission_driver(config_file, **kwargs):
66 """Initialize runtime environment.
68 Parameters
69 ----------
70 config_file : `str`
71 Name of the configuration file.
73 Returns
74 -------
75 config : `lsst.ctrl.bps.BpsConfig`
76 Batch Processing Service configuration.
77 """
78 config = BpsConfig(config_file, BPS_SEARCH_ORDER)
80 # Override config with command-line values
81 # Handle diffs between pipetask argument names vs bps yaml
82 translation = {"input": "inCollection",
83 "output_run": "outputRun",
84 "qgraph": "qgraphFile",
85 "pipeline": "pipelineYaml"}
86 for key, value in kwargs.items():
87 # Don't want to override config with None or empty string values.
88 if value:
89 # pipetask argument parser converts some values to list,
90 # but bps will want string.
91 if not isinstance(value, str) and isinstance(value, Iterable):
92 value = ",".join(value)
93 new_key = translation.get(key, re.sub(r"_(\S)", lambda match: match.group(1).upper(), key))
94 config[f".bps_cmdline.{new_key}"] = value
96 # Set some initial values
97 config[".bps_defined.timestamp"] = Instrument.makeCollectionTimestamp()
98 if "operator" not in config:
99 config[".bps_defined.operator"] = getpass.getuser()
101 if "outCollection" in config:
102 raise KeyError("outCollection is deprecated. Replace all outCollection references with outputRun.")
104 if "outputRun" not in config:
105 raise KeyError("Must specify the output run collection using outputRun")
107 if "uniqProcName" not in config:
108 config[".bps_defined.uniqProcName"] = config["outputRun"].replace("/", "_")
110 if "submitPath" not in config:
111 raise KeyError("Must specify the submit-side run directory using submitPath")
113 # If requested, run WMS plugin checks early in submission process to
114 # ensure WMS has what it will need for prepare() or submit().
116 if kwargs.get("runWmsSubmissionChecks", False):
117 found, wms_class = config.search("wmsServiceClass")
118 if not found:
119 raise KeyError("Missing wmsServiceClass in bps config. Aborting.")
121 # Check that can import wms service class.
122 wms_service_class = doImport(wms_class)
123 wms_service = wms_service_class(config)
125 try:
126 wms_service.run_submission_checks()
127 except NotImplementedError:
128 # Allow various plugins to implement only when needed to do extra
129 # checks.
130 _LOG.debug("run_submission_checks is not implemented in %s.", wms_class)
131 else:
132 _LOG.debug("Skipping submission checks.")
134 # Make submit directory to contain all outputs.
135 submit_path = Path(config["submitPath"])
136 try:
137 submit_path.mkdir(parents=True, exist_ok=False)
138 except OSError as exc:
139 if exc.errno == errno.EEXIST:
140 reason = "Directory already exists"
141 else:
142 reason = exc.strerror
143 raise type(exc)(f"cannot create submit directory '{submit_path}': {reason}") from None
144 config[".bps_defined.submitPath"] = str(submit_path)
146 # save copy of configs (orig and expanded config)
147 shutil.copy2(config_file, submit_path)
148 with open(f"{submit_path}/{config['uniqProcName']}_config.yaml", "w") as fh:
149 config.dump(fh)
151 return config
154def acquire_qgraph_driver(config_file, **kwargs):
155 """Read a quantum graph from a file or create one from pipeline definition.
157 Parameters
158 ----------
159 config_file : `str`
160 Name of the configuration file.
162 Returns
163 -------
164 config : `lsst.ctrl.bps.BpsConfig`
165 Updated configuration.
166 qgraph : `lsst.pipe.base.graph.QuantumGraph`
167 A graph representing quanta.
168 """
169 config = _init_submission_driver(config_file, **kwargs)
170 submit_path = config[".bps_defined.submitPath"]
172 _LOG.info("Starting acquire stage (generating and/or reading quantum graph)")
173 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Acquire stage completed"):
174 qgraph_file, qgraph, execution_butler_dir = acquire_quantum_graph(config, out_prefix=submit_path)
176 config[".bps_defined.executionButlerDir"] = execution_butler_dir
177 config[".bps_defined.runQgraphFile"] = qgraph_file
178 return config, qgraph
181def cluster_qgraph_driver(config_file, **kwargs):
182 """Group quanta into clusters.
184 Parameters
185 ----------
186 config_file : `str`
187 Name of the configuration file.
189 Returns
190 -------
191 config : `lsst.ctrl.bps.BpsConfig`
192 Updated configuration.
193 clustered_qgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
194 A graph representing clustered quanta.
195 """
196 config, qgraph = acquire_qgraph_driver(config_file, **kwargs)
198 _LOG.info("Starting cluster stage (grouping quanta into jobs)")
199 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Cluster stage completed"):
200 clustered_qgraph = cluster_quanta(config, qgraph, config["uniqProcName"])
202 submit_path = config[".bps_defined.submitPath"]
203 _, save_clustered_qgraph = config.search("saveClusteredQgraph", opt={"default": False})
204 if save_clustered_qgraph:
205 clustered_qgraph.save(os.path.join(submit_path, "bps_clustered_qgraph.pickle"))
206 _, save_dot = config.search("saveDot", opt={"default": False})
207 if save_dot:
208 clustered_qgraph.draw(os.path.join(submit_path, "bps_clustered_qgraph.dot"))
209 return config, clustered_qgraph
212def transform_driver(config_file, **kwargs):
213 """Create a workflow for a specific workflow management system.
215 Parameters
216 ----------
217 config_file : `str`
218 Name of the configuration file.
220 Returns
221 -------
222 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
223 Configuration to use when creating the workflow.
224 generic_workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
225 Representation of the abstract/scientific workflow specific to a given
226 workflow management system.
227 """
228 config, clustered_qgraph = cluster_qgraph_driver(config_file, **kwargs)
229 submit_path = config[".bps_defined.submitPath"]
231 _LOG.info("Starting transform stage (creating generic workflow)")
232 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Transform stage completed"):
233 generic_workflow, generic_workflow_config = transform(config, clustered_qgraph, submit_path)
234 _LOG.info("Generic workflow name '%s'", generic_workflow.name)
236 _, save_workflow = config.search("saveGenericWorkflow", opt={"default": False})
237 if save_workflow:
238 with open(os.path.join(submit_path, "bps_generic_workflow.pickle"), "wb") as outfh:
239 generic_workflow.save(outfh, "pickle")
240 _, save_dot = config.search("saveDot", opt={"default": False})
241 if save_dot:
242 with open(os.path.join(submit_path, "bps_generic_workflow.dot"), "w") as outfh:
243 generic_workflow.draw(outfh, "dot")
244 return generic_workflow_config, generic_workflow
247def prepare_driver(config_file, **kwargs):
248 """Create a representation of the generic workflow.
250 Parameters
251 ----------
252 config_file : `str`
253 Name of the configuration file.
255 Returns
256 -------
257 wms_config : `lsst.ctrl.bps.BpsConfig`
258 Configuration to use when creating the workflow.
259 workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
260 Representation of the abstract/scientific workflow specific to a given
261 workflow management system.
262 """
263 kwargs.setdefault("runWmsSubmissionChecks", True)
264 generic_workflow_config, generic_workflow = transform_driver(config_file, **kwargs)
265 submit_path = generic_workflow_config[".bps_defined.submitPath"]
267 _LOG.info("Starting prepare stage (creating specific implementation of workflow)")
268 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Prepare stage completed"):
269 wms_workflow = prepare(generic_workflow_config, generic_workflow, submit_path)
271 wms_workflow_config = generic_workflow_config
272 print(f"Submit dir: {wms_workflow.submit_path}")
273 return wms_workflow_config, wms_workflow
276def submit_driver(config_file, **kwargs):
277 """Submit workflow for execution.
279 Parameters
280 ----------
281 config_file : `str`
282 Name of the configuration file.
283 """
284 kwargs.setdefault("runWmsSubmissionChecks", True)
286 _LOG.info("Starting submission process")
287 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed entire submission process"):
288 wms_workflow_config, wms_workflow = prepare_driver(config_file, **kwargs)
290 _LOG.info("Starting submit stage")
291 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Completed submit stage"):
292 submit(wms_workflow_config, wms_workflow)
293 _LOG.info("Run '%s' submitted for execution with id '%s'", wms_workflow.name, wms_workflow.run_id)
295 print(f"Run Id: {wms_workflow.run_id}")
298def report_driver(wms_service, run_id, user, hist_days, pass_thru, is_global=False):
299 """Print out summary of jobs submitted for execution.
301 Parameters
302 ----------
303 wms_service : `str`
304 Name of the class.
305 run_id : `str`
306 A run id the report will be restricted to.
307 user : `str`
308 A user name the report will be restricted to.
309 hist_days : int
310 Number of days
311 pass_thru : `str`
312 A string to pass directly to the WMS service class.
313 is_global : `bool`, optional
314 If set, all available job queues will be queried for job information.
315 Defaults to False which means that only a local job queue will be
316 queried for information.
318 Only applicable in the context of a WMS using distributed job queues
319 (e.g., HTCondor).
320 """
321 report(wms_service, run_id, user, hist_days, pass_thru, is_global=is_global)
324def cancel_driver(wms_service, run_id, user, require_bps, pass_thru, is_global=False):
325 """Cancel submitted workflows.
327 Parameters
328 ----------
329 wms_service : `str`
330 Name of the Workload Management System service class.
331 run_id : `str`
332 ID or path of job that should be canceled.
333 user : `str`
334 User whose submitted jobs should be canceled.
335 require_bps : `bool`
336 Whether to require given run_id/user to be a bps submitted job.
337 pass_thru : `str`
338 Information to pass through to WMS.
339 is_global : `bool`, optional
340 If set, all available job queues will be checked for jobs to cancel.
341 Defaults to False which means that only a local job queue will be
342 checked.
344 Only applicable in the context of a WMS using distributed job queues
345 (e.g., HTCondor).
346 """
347 cancel(wms_service, run_id, user, require_bps, pass_thru, is_global=is_global)