Coverage for python/lsst/ctrl/bps/transform.py: 9%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import math
27import os
28import re
29import dataclasses
31from lsst.utils.timer import time_this
33from . import (
34 DEFAULT_MEM_RETRIES,
35 BpsConfig,
36 GenericWorkflow,
37 GenericWorkflowJob,
38 GenericWorkflowFile,
39 GenericWorkflowExec,
40)
41from .bps_utils import (
42 save_qg_subgraph,
43 WhenToSaveQuantumGraphs,
44 create_job_quantum_graph_filename,
45 _create_execution_butler
46)
48# All available job attributes.
49_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)])
51# Job attributes that need to be set to their maximal value in the cluster.
52_ATTRS_MAX = frozenset({
53 "memory_multiplier",
54 "number_of_retries",
55 "request_cpus",
56 "request_memory",
57 "request_memory_max",
58})
60# Job attributes that need to be set to sum of their values in the cluster.
61_ATTRS_SUM = frozenset({
62 "request_disk",
63 "request_walltime",
64})
66# Job attributes do not fall into a specific category
67_ATTRS_MISC = frozenset({
68 "cmdline",
69 "cmdvals",
70 "environment",
71 "pre_cmdline",
72 "post_cmdline",
73 "profile",
74 "attrs",
75})
77# Attributes that need to be the same for each quanta in the cluster.
78_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM))
80_LOG = logging.getLogger(__name__)
83def transform(config, cqgraph, prefix):
84 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
86 Parameters
87 ----------
88 config : `lsst.ctrl.bps.BpsConfig`
89 BPS configuration.
90 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
91 A clustered quantum graph to transform into a generic workflow.
92 prefix : `str`
93 Root path for any output files.
95 Returns
96 -------
97 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
98 The generic workflow transformed from the clustered quantum graph.
99 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
100 Configuration to accompany GenericWorkflow.
101 """
102 _, when_create = config.search(".executionButler.whenCreate")
103 if when_create.upper() == "TRANSFORM":
104 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
105 _LOG.info("Creating execution butler in '%s'", execution_butler_dir)
106 with time_this(log=_LOG, level=logging.LEVEL, prefix=None, msg="Creating execution butler completed"):
107 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
109 if cqgraph.name is not None:
110 name = cqgraph.name
111 else:
112 _, name = config.search("uniqProcName", opt={"required": True})
114 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix)
115 generic_workflow_config = create_generic_workflow_config(config, prefix)
117 return generic_workflow, generic_workflow_config
120def update_job(config, job):
121 """Update given job with workflow attribute and profile values.
123 Parameters
124 ----------
125 config : `lsst.ctrl.bps.BpsConfig`
126 BPS configuration.
127 job : `lsst.ctrl.bps.GenericWorkflowJob`
128 Job to which the attributes and profile values should be added.
129 """
130 key = f".site.{job.compute_site}.profile.condor"
132 if key in config:
133 for key, val in config[key].items():
134 if key.startswith("+"):
135 job.attrs[key[1:]] = val
136 else:
137 job.profile[key] = val
140def add_workflow_init_nodes(config, qgraph, generic_workflow):
141 """Add nodes to workflow graph that perform initialization steps.
143 Assumes that all of the initialization should be executed prior to any
144 of the current workflow.
146 Parameters
147 ----------
148 config : `lsst.ctrl.bps.BpsConfig`
149 BPS configuration.
150 qgraph : `lsst.pipe.base.graph.QuantumGraph`
151 The quantum graph the generic workflow represents.
152 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
153 Generic workflow to which the initialization steps should be added.
154 """
155 # Create a workflow graph that will have task and file nodes necessary for
156 # initializing the pipeline execution
157 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
158 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
159 generic_workflow.add_workflow_source(init_workflow)
162def create_init_workflow(config, qgraph, qgraph_gwfile):
163 """Create workflow for running initialization job(s).
165 Parameters
166 ----------
167 config : `lsst.ctrl.bps.BpsConfig`
168 BPS configuration.
169 qgraph : `lsst.pipe.base.graph.QuantumGraph`
170 The quantum graph the generic workflow represents.
171 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
172 File object for the full run QuantumGraph file.
174 Returns
175 -------
176 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
177 GenericWorkflow consisting of job(s) to initialize workflow.
178 """
179 _LOG.debug("creating init subgraph")
180 _LOG.debug("creating init task input(s)")
181 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
182 "replaceVars": False,
183 "expandEnvVars": False,
184 "replaceEnvVars": True,
185 "required": False}
187 init_workflow = GenericWorkflow("init")
188 init_workflow.add_file(qgraph_gwfile)
190 # create job for executing --init-only
191 gwjob = GenericWorkflowJob("pipetaskInit")
193 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
194 job_values["name"] = "pipetaskInit"
195 job_values["label"] = "pipetaskInit"
197 # Adjust job attributes values if necessary.
198 _handle_job_values(job_values, gwjob)
200 # Pick a node id for each task (not quantum!) to avoid reading the entire
201 # quantum graph during the initialization stage.
202 node_ids = []
203 for task in qgraph.iterTaskGraph():
204 task_def = qgraph.findTaskDefByLabel(task.label)
205 node = next(iter(qgraph.getNodesForTask(task_def)))
206 node_ids.append(node.nodeId)
207 gwjob.cmdvals["qgraphId"] = qgraph.graphID
208 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids]))
210 # Update job with workflow attribute and profile values.
211 update_job(config, gwjob)
213 init_workflow.add_job(gwjob)
214 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
215 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
216 _enhance_command(config, init_workflow, gwjob)
218 return init_workflow
221def _enhance_command(config, generic_workflow, gwjob):
222 """Enhance command line with env and file placeholders
223 and gather command line values.
225 Parameters
226 ----------
227 config : `lsst.ctrl.bps.BpsConfig`
228 BPS configuration.
229 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
230 Generic workflow that contains the job.
231 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
232 Generic workflow job to which the updated executable, arguments,
233 and values should be saved.
234 """
235 _LOG.debug("gwjob given to _enhance_command: %s", gwjob)
237 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
238 "replaceVars": False,
239 "expandEnvVars": False,
240 "replaceEnvVars": True,
241 "required": False}
243 # Change qgraph variable to match whether using run or per-job qgraph
244 # Note: these are lookup keys, not actual physical filenames.
245 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
246 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
247 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
248 elif gwjob.name == "pipetaskInit":
249 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
250 else: # Needed unique file keys for per-job QuantumGraphs
251 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
253 # Replace files with special placeholders
254 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
255 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
256 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
257 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
259 # Save dict of other values needed to complete command line.
260 # (Be careful to not replace env variables as they may
261 # be different in compute job.)
262 search_opt["replaceVars"] = True
264 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
265 if key not in gwjob.cmdvals:
266 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt)
268 # backwards compatibility
269 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True})
270 if not use_lazy_commands:
271 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals)
274def _fill_arguments(config, generic_workflow, arguments, cmdvals):
275 """Replace placeholders in command line string in job.
277 Parameters
278 ----------
279 config : `lsst.ctrl.bps.BpsConfig`
280 Bps configuration.
281 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
282 Generic workflow containing the job.
283 arguments : `str`
284 String containing placeholders.
285 cmdvals : `dict` [`str`, `Any`]
286 Any command line values that can be used to replace placeholders.
288 Returns
289 -------
290 arguments : `str`
291 Command line with FILE and ENV placeholders replaced.
292 """
293 # Replace file placeholders
294 _, use_shared = config.search("bpsUseShared", opt={"default": False})
295 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
296 gwfile = generic_workflow.get_file(file_key)
297 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared:
298 uri = os.path.basename(gwfile.src_uri)
299 else:
300 uri = gwfile.src_uri
301 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
303 # Replace env placeholder with submit-side values
304 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
305 arguments = os.path.expandvars(arguments)
307 # Replace remaining vars
308 arguments = arguments.format(**cmdvals)
310 return arguments
313def _get_butler_gwfile(config, prefix):
314 """Get butler location to be used by job.
316 Parameters
317 ----------
318 config : `lsst.ctrl.bps.BpsConfig`
319 Bps configuration.
320 prefix : `str`
321 Root path for any output files.
323 Returns
324 -------
325 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
326 Representation of butler location.
327 """
328 _, when_create = config.search(".executionButler.whenCreate")
329 if when_create.upper() == "NEVER":
330 _, butler_config = config.search("butlerConfig")
331 wms_transfer = False
332 job_access_remote = True
333 job_shared = True
334 else:
335 _, butler_config = config.search(".bps_defined.executionButlerDir")
336 butler_config = os.path.join(prefix, butler_config)
337 wms_transfer = True
338 job_access_remote = False
339 job_shared = False
341 gwfile = GenericWorkflowFile("butlerConfig",
342 src_uri=butler_config,
343 wms_transfer=wms_transfer,
344 job_access_remote=job_access_remote,
345 job_shared=job_shared)
347 return gwfile
350def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix):
351 """Get qgraph location to be used by job.
353 Parameters
354 ----------
355 config : `lsst.ctrl.bps.BpsConfig`
356 Bps configuration.
357 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
358 Job for which determining QuantumGraph file.
359 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
360 File representation of the full run QuantumGraph.
361 prefix : `str`
362 Path prefix for any files written.
364 Returns
365 -------
366 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
367 Representation of butler location (may not include filename).
368 """
369 per_job_qgraph_file = True
370 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
371 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
372 per_job_qgraph_file = False
374 qgraph_gwfile = None
375 if per_job_qgraph_file:
376 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
377 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
378 wms_transfer=True,
379 job_access_remote=True,
380 job_shared=True)
381 else:
382 qgraph_gwfile = run_qgraph_file
384 return qgraph_gwfile
387def _get_job_values(config, search_opt, cmd_line_key):
388 """Gather generic workflow job values from the bps config.
390 Parameters
391 ----------
392 config : `lsst.ctrl.bps.BpsConfig`
393 Bps configuration.
394 search_opt : `dict` [`str`, `Any`]
395 Search options to be used when searching config.
396 cmd_line_key : `str` or None
397 Which command line key to search for (e.g., "runQuantumCommand").
399 Returns
400 -------
401 job_values : `dict` [ `str`, `Any` ]`
402 A mapping between job attributes and their values.
403 """
404 job_values = {}
405 for attr in _ATTRS_ALL:
406 found, value = config.search(attr, opt=search_opt)
407 if found:
408 job_values[attr] = value
409 else:
410 job_values[attr] = None
412 # If the automatic memory scaling is enabled (i.e. the memory multiplier
413 # is set and it is a positive number greater than 1.0), adjust number
414 # of retries when necessary. If the memory multiplier is invalid, disable
415 # automatic memory scaling.
416 if job_values["memory_multiplier"] is not None:
417 if math.ceil(float(job_values["memory_multiplier"])) > 1:
418 if job_values["number_of_retries"] is None:
419 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
420 else:
421 job_values["memory_multiplier"] = None
423 if cmd_line_key:
424 found, cmdline = config.search(cmd_line_key, opt=search_opt)
425 # Make sure cmdline isn't None as that could be sent in as a
426 # default value in search_opt.
427 if found and cmdline:
428 cmd, args = cmdline.split(" ", 1)
429 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False)
430 if args:
431 job_values["arguments"] = args
433 return job_values
436def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL):
437 """Set the job attributes in the cluster to their correct values.
439 Parameters
440 ----------
441 quantum_job_values : `dict` [`str`, Any]
442 Job values for running single Quantum.
443 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
444 Generic workflow job in which to store the universal values.
445 attributes: `Iterable` [`str`], optional
446 Job attributes to be set in the job following different rules.
447 The default value is _ATTRS_ALL.
448 """
449 _LOG.debug("Call to _handle_job_values")
450 _handle_job_values_universal(quantum_job_values, gwjob, attributes)
451 _handle_job_values_max(quantum_job_values, gwjob, attributes)
452 _handle_job_values_sum(quantum_job_values, gwjob, attributes)
455def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL):
456 """Handle job attributes that must have the same value for every quantum
457 in the cluster.
459 Parameters
460 ----------
461 quantum_job_values : `dict` [`str`, Any]
462 Job values for running single Quantum.
463 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
464 Generic workflow job in which to store the universal values.
465 attributes: `Iterable` [`str`], optional
466 Job attributes to be set in the job following different rules.
467 The default value is _ATTRS_UNIVERSAL.
468 """
469 for attr in _ATTRS_UNIVERSAL & set(attributes):
470 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr),
471 quantum_job_values.get(attr, "MISSING"))
472 current_value = getattr(gwjob, attr)
473 try:
474 quantum_value = quantum_job_values[attr]
475 except KeyError:
476 continue
477 else:
478 if not current_value:
479 setattr(gwjob, attr, quantum_value)
480 elif current_value != quantum_value:
481 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n"
482 "Current cluster value: %s\n"
483 "Quantum value: %s",
484 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
485 quantum_value)
486 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.")
489def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX):
490 """Handle job attributes that should be set to their maximum value in
491 the in cluster.
493 Parameters
494 ----------
495 quantum_job_values : `dict` [`str`, `Any`]
496 Job values for running single Quantum.
497 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
498 Generic workflow job in which to store the aggregate values.
499 attributes: `Iterable` [`str`], optional
500 Job attributes to be set in the job following different rules.
501 The default value is _ATTR_MAX.
502 """
503 for attr in _ATTRS_MAX & set(attributes):
504 current_value = getattr(gwjob, attr)
505 try:
506 quantum_value = quantum_job_values[attr]
507 except KeyError:
508 continue
509 else:
510 needs_update = False
511 if current_value is None:
512 if quantum_value is not None:
513 needs_update = True
514 else:
515 if quantum_value is not None and current_value < quantum_value:
516 needs_update = True
517 if needs_update:
518 setattr(gwjob, attr, quantum_value)
520 # When updating memory requirements for a job, check if memory
521 # autoscaling is enabled. If it is, always use the memory
522 # multiplier and the number of retries which comes with the
523 # quantum.
524 #
525 # Note that as a result, the quantum with the biggest memory
526 # requirements will determine whether the memory autoscaling
527 # will be enabled (or disabled) depending on the value of its
528 # memory multiplier.
529 if attr == "request_memory":
530 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
531 if gwjob.memory_multiplier is not None:
532 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
535def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM):
536 """Handle job attributes that are the sum of their values in the cluster.
538 Parameters
539 ----------
540 quantum_job_values : `dict` [`str`, `Any`]
541 Job values for running single Quantum.
542 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
543 Generic workflow job in which to store the aggregate values.
544 attributes: `Iterable` [`str`], optional
545 Job attributes to be set in the job following different rules.
546 The default value is _ATTRS_SUM.
547 """
548 for attr in _ATTRS_SUM & set(attributes):
549 current_value = getattr(gwjob, attr)
550 if not current_value:
551 setattr(gwjob, attr, quantum_job_values[attr])
552 else:
553 setattr(gwjob, attr, current_value + quantum_job_values[attr])
556def create_generic_workflow(config, cqgraph, name, prefix):
557 """Create a generic workflow from a ClusteredQuantumGraph such that it
558 has information needed for WMS (e.g., command lines).
560 Parameters
561 ----------
562 config : `lsst.ctrl.bps.BpsConfig`
563 BPS configuration.
564 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
565 ClusteredQuantumGraph for running a specific pipeline on a specific
566 payload.
567 name : `str`
568 Name for the workflow (typically unique).
569 prefix : `str`
570 Root path for any output files.
572 Returns
573 -------
574 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
575 Generic workflow for the given ClusteredQuantumGraph + config.
576 """
577 # Determine whether saving per-job QuantumGraph files in the loop.
578 save_per_job_qgraph = False
579 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
580 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
581 save_per_job_qgraph = True
583 generic_workflow = GenericWorkflow(name)
585 # Save full run QuantumGraph for use by jobs
586 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
587 src_uri=config["runQgraphFile"],
588 wms_transfer=True,
589 job_access_remote=True,
590 job_shared=True))
592 for cluster in cqgraph.clusters():
593 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster))
594 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name,
595 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids)
597 gwjob = GenericWorkflowJob(cluster.name)
599 # First get job values from cluster or cluster config
600 search_opt = {"curvals": {},
601 "replaceVars": False,
602 "expandEnvVars": False,
603 "replaceEnvVars": True,
604 "required": False}
606 # If some config values are set for this cluster
607 if cluster.label in config["cluster"]:
608 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label])
609 cluster_job_values = _get_job_values(config["cluster"][cluster.label], search_opt,
610 "runQuantumCommand")
611 else:
612 cluster_job_values = {}
614 cluster_job_values['name'] = cluster.name
615 cluster_job_values['label'] = cluster.label
616 cluster_job_values['quanta_counts'] = cluster.quanta_counts
617 cluster_job_values['tags'] = cluster.tags
618 _LOG.debug("cluster_job_values = %s", cluster_job_values)
619 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys())
621 # For purposes of whether to continue searching for a value is whether
622 # the value evaluates to False.
623 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)}
625 _LOG.debug("unset_attributes=%s", unset_attributes)
626 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes)
628 # For job info not defined at cluster level, attempt to get job info
629 # either common or aggregate for all Quanta in cluster.
630 for node_id in iter(cluster.qgraph_node_ids):
631 _LOG.debug("node_id=%s", node_id)
632 qnode = cqgraph.get_quantum_node(node_id)
633 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label}
634 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand")
635 _handle_job_values(quantum_job_values, gwjob, unset_attributes)
637 # Update job with workflow attribute and profile values.
638 update_job(config, gwjob)
639 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"),
640 config["submitPath"])
641 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
643 generic_workflow.add_job(gwjob)
644 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
646 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID
647 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
648 cluster.qgraph_node_ids]))
649 _enhance_command(config, generic_workflow, gwjob)
651 # If writing per-job QuantumGraph files during TRANSFORM stage,
652 # write it now while in memory.
653 if save_per_job_qgraph:
654 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids)
656 # Create job dependencies.
657 for parent in cqgraph.clusters():
658 for child in cqgraph.successors(parent):
659 generic_workflow.add_job_relationships(parent.name, child.name)
661 # Add initial workflow.
662 if config.get("runInit", "{default: False}"):
663 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow)
665 generic_workflow.run_attrs.update({"bps_isjob": "True",
666 "bps_project": config["project"],
667 "bps_campaign": config["campaign"],
668 "bps_run": generic_workflow.name,
669 "bps_operator": config["operator"],
670 "bps_payload": config["payloadName"],
671 "bps_runsite": config["computeSite"]})
673 # Add final job
674 add_final_job(config, generic_workflow, prefix)
676 return generic_workflow
679def create_generic_workflow_config(config, prefix):
680 """Create generic workflow configuration.
682 Parameters
683 ----------
684 config : `lsst.ctrl.bps.BpsConfig`
685 Bps configuration.
686 prefix : `str`
687 Root path for any output files.
689 Returns
690 -------
691 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
692 Configuration accompanying the GenericWorkflow.
693 """
694 generic_workflow_config = BpsConfig(config)
695 generic_workflow_config["workflowName"] = config["uniqProcName"]
696 generic_workflow_config["workflowPath"] = prefix
697 return generic_workflow_config
700def add_final_job(config, generic_workflow, prefix):
701 """Add final workflow job depending upon configuration.
703 Parameters
704 ----------
705 config : `lsst.ctrl.bps.BpsConfig`
706 Bps configuration.
707 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
708 Generic workflow to which attributes should be added.
709 prefix : `str`
710 Directory in which to output final script.
711 """
712 _, when_create = config.search(".executionButler.whenCreate")
713 _, when_merge = config.search(".executionButler.whenMerge")
715 search_opt = {"searchobj": config[".executionButler"], "default": None}
716 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
717 # create gwjob
718 gwjob = GenericWorkflowJob("mergeExecutionButler")
719 gwjob.label = "mergeExecutionButler"
721 job_values = _get_job_values(config, search_opt, None)
722 for attr in _ATTRS_ALL:
723 if not getattr(gwjob, attr) and job_values.get(attr, None):
724 setattr(gwjob, attr, job_values[attr])
726 update_job(config, gwjob)
728 # Create script and add command line to job.
729 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
731 # Determine inputs from command line.
732 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
733 gwfile = generic_workflow.get_file(file_key)
734 generic_workflow.add_job_inputs(gwjob.name, gwfile)
736 _enhance_command(config, generic_workflow, gwjob)
738 # Put transfer repo job in appropriate location in workflow.
739 if when_merge.upper() == "ALWAYS":
740 # add as special final job
741 generic_workflow.add_final(gwjob)
742 elif when_merge.upper() == "SUCCESS":
743 # add as regular sink node
744 add_final_job_as_sink(generic_workflow, gwjob)
745 else:
746 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
749def _create_final_command(config, prefix):
750 """Create the command and shell script for the final job.
752 Parameters
753 ----------
754 config : `lsst.ctrl.bps.BpsConfig`
755 Bps configuration.
756 prefix : `str`
757 Directory in which to output final script.
759 Returns
760 -------
761 executable : `lsst.ctrl.bps.GenericWorkflowExec`
762 Executable object for the final script.
763 arguments : `str`
764 Command line needed to call the final script.
765 """
766 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False,
767 'searchobj': config['executionButler']}
769 script_file = os.path.join(prefix, "final_job.bash")
770 with open(script_file, "w") as fh:
771 print("#!/bin/bash\n", file=fh)
772 print("set -e", file=fh)
773 print("set -x", file=fh)
775 print("butlerConfig=$1", file=fh)
776 print("executionButlerDir=$2", file=fh)
778 i = 1
779 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
780 while found:
781 # Temporarily replace any env vars so formatter doesn't try to
782 # replace them.
783 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
785 # executionButlerDir and butlerConfig will be args to script and
786 # set to env vars
787 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
788 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
790 # Replace all other vars in command string
791 search_opt["replaceVars"] = True
792 command = config.formatter.format(command, config, search_opt)
793 search_opt["replaceVars"] = False
795 # Replace any temporary env place holders.
796 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
798 print(command, file=fh)
799 i += 1
800 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
801 os.chmod(script_file, 0o755)
802 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
804 _, orig_butler = config.search("butlerConfig")
805 # The execution butler was saved as butlerConfig in the workflow.
806 return executable, f"{orig_butler} <FILE:butlerConfig>"
809def add_final_job_as_sink(generic_workflow, final_job):
810 """Add final job as the single sink for the workflow.
812 Parameters
813 ----------
814 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
815 Generic workflow to which attributes should be added.
816 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
817 Job to add as new sink node depending upon all previous sink nodes.
818 """
819 # Find sink nodes of generic workflow graph.
820 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
821 _LOG.debug("gw_sinks = %s", gw_sinks)
823 generic_workflow.add_job(final_job)
824 generic_workflow.add_job_relationships(gw_sinks, final_job.name)