Coverage for python/lsst/ctrl/bps/transform.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import os
27import re
28import time
29import dataclasses
31from . import BpsConfig, GenericWorkflow, GenericWorkflowJob, GenericWorkflowFile, GenericWorkflowExec
32from .bps_utils import (save_qg_subgraph, WhenToSaveQuantumGraphs, create_job_quantum_graph_filename,
33 _create_execution_butler)
35_LOG = logging.getLogger(__name__)
38def transform(config, clustered_quantum_graph, prefix):
39 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
41 Parameters
42 ----------
43 config : `lsst.ctrl.bps.BpsConfig`
44 BPS configuration.
45 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
46 A clustered quantum graph to transform into a generic workflow.
47 prefix : `str`
48 Root path for any output files.
50 Returns
51 -------
52 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
53 The generic workflow transformed from the clustered quantum graph.
54 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
55 Configuration to accompany GenericWorkflow.
56 """
57 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None:
58 name = clustered_quantum_graph.graph["name"]
59 else:
60 _, name = config.search("uniqProcName", opt={"required": True})
62 _, when_create = config.search(".executionButler.whenCreate")
63 if when_create.upper() == "TRANSFORM":
64 _LOG.info("Creating execution butler")
65 stime = time.time()
66 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
67 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
68 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime)
70 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix)
71 generic_workflow_config = create_generic_workflow_config(config, prefix)
73 return generic_workflow, generic_workflow_config
76def update_job(config, job):
77 """Update given job with workflow attribute and profile values.
79 Parameters
80 ----------
81 config : `lsst.ctrl.bps.BpsConfig`
82 BPS configuration.
83 job : `lsst.ctrl.bps.GenericWorkflowJob`
84 Job to which the attributes and profile values should be added.
85 """
86 key = f".site.{job.compute_site}.profile.condor"
88 if key in config:
89 for key, val in config[key].items():
90 if key.startswith("+"):
91 job.attrs[key[1:]] = val
92 else:
93 job.profile[key] = val
96def add_workflow_init_nodes(config, generic_workflow):
97 """Add nodes to workflow graph that perform initialization steps.
99 Assumes that all of the initialization should be executed prior to any
100 of the current workflow.
102 Parameters
103 ----------
104 config : `lsst.ctrl.bps.BpsConfig`
105 BPS configuration.
106 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
107 Generic workflow to which the initialization steps should be added.
108 """
109 # Create a workflow graph that will have task and file nodes necessary for
110 # initializing the pipeline execution
111 init_workflow = create_init_workflow(config, generic_workflow.get_file("runQgraphFile"))
112 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
113 generic_workflow.add_workflow_source(init_workflow)
114 old_run_summary = generic_workflow.run_attrs.get("bps_run_summary", "")
115 init_summary = init_workflow.run_attrs.get("bps_run_summary", "")
116 generic_workflow.run_attrs["bps_run_summary"] = ';'.join(x for x in [init_summary, old_run_summary] if x)
119def create_init_workflow(config, qgraph_gwfile):
120 """Create workflow for running initialization job(s).
122 Parameters
123 ----------
124 config : `lsst.ctrl.bps.BpsConfig`
125 BPS configuration.
126 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
127 File object for the full run QuantumGraph file.
129 Returns
130 -------
131 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
132 GenericWorkflow consisting of job(s) to initialize workflow.
133 """
134 _LOG.debug("creating init subgraph")
135 _LOG.debug("creating init task input(s)")
136 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
137 "replaceVars": False,
138 "expandEnvVars": False,
139 "replaceEnvVars": True,
140 "required": False}
142 init_workflow = GenericWorkflow("init")
143 init_workflow.add_file(qgraph_gwfile)
145 # create job for executing --init-only
146 gwjob = GenericWorkflowJob("pipetaskInit")
147 gwjob.label = "pipetaskInit"
149 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
151 # Handle universal values.
152 _handle_job_values_universal(job_values, gwjob)
154 # Handle aggregate values.
155 _handle_job_values_aggregate(job_values, gwjob)
157 # Save summary of Quanta in job.
158 gwjob.tags["quanta_summary"] = "pipetaskInit:1"
160 # Update job with workflow attribute and profile values.
161 update_job(config, gwjob)
163 init_workflow.add_job(gwjob)
164 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
165 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
166 init_workflow.run_attrs["bps_run_summary"] = gwjob.tags["quanta_summary"]
167 _enhance_command(config, init_workflow, gwjob)
169 return init_workflow
172def _enhance_command(config, generic_workflow, gwjob):
173 """Enhance command line with env and file placeholders
174 and gather command line values.
176 Parameters
177 ----------
178 config : `lsst.ctrl.bps.BpsConfig`
179 BPS configuration.
180 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
181 Generic workflow that contains the job.
182 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
183 Generic workflow job to which the updated executable, arguments,
184 and values should be saved.
185 """
186 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
187 "replaceVars": False,
188 "expandEnvVars": False,
189 "replaceEnvVars": True,
190 "required": False}
192 # Change qgraph variable to match whether using run or per-job qgraph
193 # Note: these are lookup keys, not actual physical filenames.
194 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
195 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
196 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
197 elif gwjob.name == "pipetaskInit":
198 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
199 else: # Needed unique file keys for per-job QuantumGraphs
200 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
202 # Replace files with special placeholders
203 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
204 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
205 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
206 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
208 # Save dict of other values needed to complete command line.
209 # (Be careful to not replace env variables as they may
210 # be different in compute job.)
211 search_opt["replaceVars"] = True
213 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
214 if key not in gwjob.cmdvals:
215 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt)
217 # backwards compatibility
218 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True})
219 if not use_lazy_commands:
220 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals)
223def _fill_arguments(config, generic_workflow, arguments, cmdvals):
224 """Replace placeholders in command line string in job.
226 Parameters
227 ----------
228 config : `lsst.ctrl.bps.BpsConfig`
229 Bps configuration.
230 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
231 Generic workflow containing the job.
232 arguments : `str`
233 String containing placeholders.
234 cmdvals : `dict` [`str`, `Any`]
235 Any command line values that can be used to replace placeholders.
237 Returns
238 -------
239 arguments : `str`
240 Command line with FILE and ENV placeholders replaced.
241 """
242 # Replace file placeholders
243 _, use_shared = config.search("bpsUseShared", opt={"default": False})
244 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
245 gwfile = generic_workflow.get_file(file_key)
246 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared:
247 uri = os.path.basename(gwfile.src_uri)
248 else:
249 uri = gwfile.src_uri
250 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
252 # Replace env placeholder with submit-side values
253 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
254 arguments = os.path.expandvars(arguments)
256 # Replace remaining vars
257 arguments = arguments.format(**cmdvals)
259 return arguments
262def _get_butler_gwfile(config, prefix):
263 """Get butler location to be used by job.
265 Parameters
266 ----------
267 config : `lsst.ctrl.bps.BpsConfig`
268 Bps configuration.
269 prefix : `str`
270 Root path for any output files.
272 Returns
273 -------
274 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
275 Representation of butler location.
276 """
277 _, when_create = config.search(".executionButler.whenCreate")
278 if when_create.upper() == "NEVER":
279 _, butler_config = config.search("butlerConfig")
280 wms_transfer = False
281 job_access_remote = True
282 job_shared = True
283 else:
284 _, butler_config = config.search(".bps_defined.executionButlerDir")
285 butler_config = os.path.join(prefix, butler_config)
286 wms_transfer = True
287 job_access_remote = False
288 job_shared = False
290 gwfile = GenericWorkflowFile("butlerConfig",
291 src_uri=butler_config,
292 wms_transfer=wms_transfer,
293 job_access_remote=job_access_remote,
294 job_shared=job_shared)
296 return gwfile
299def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix):
300 """Get qgraph location to be used by job.
302 Parameters
303 ----------
304 config : `lsst.ctrl.bps.BpsConfig`
305 Bps configuration.
306 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
307 Job for which determining QuantumGraph file.
308 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
309 File representation of the full run QuantumGraph.
310 prefix : `str`
311 Path prefix for any files written.
313 Returns
314 -------
315 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
316 Representation of butler location (may not include filename).
317 """
318 per_job_qgraph_file = True
319 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
320 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
321 per_job_qgraph_file = False
323 qgraph_gwfile = None
324 if per_job_qgraph_file:
325 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
326 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
327 wms_transfer=True,
328 job_access_remote=True,
329 job_shared=True)
330 else:
331 qgraph_gwfile = run_qgraph_file
333 return qgraph_gwfile
336def _get_job_values(config, search_opt, cmd_line_key):
337 """Gather generic workflow job values from the bps config.
339 Parameters
340 ----------
341 config : `lsst.ctrl.bps.BpsConfig`
342 Bps configuration.
343 search_opt : `dict` [`str`, `Any`]
344 Search options to be used when searching config.
345 cmd_line_key : `str` or None
346 Which command line key to search for (e.g., "runQuantumCommand").
348 Returns
349 -------
350 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
351 Representation of butler location (may not include filename).
352 """
353 special_values = ['name', 'label', 'cmdline', 'pre_cmdline', 'post_cmdline']
355 job_values = {}
356 for field in dataclasses.fields(GenericWorkflowJob):
357 if field not in special_values:
358 # Variable names in yaml are camel case instead of snake case.
359 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), field.name)
360 found, value = config.search(yaml_name, opt=search_opt)
361 if not found and '_' in field.name:
362 # Just in case someone used snake case:
363 found, value = config.search(field.name, opt=search_opt)
364 if found:
365 job_values[field.name] = value
366 else:
367 job_values[field.name] = None
369 if cmd_line_key:
370 found, cmdline = config.search(cmd_line_key, opt=search_opt)
371 # Make sure cmdline isn't None as that could be sent in as a
372 # default value in search_opt.
373 if found and cmdline:
374 cmd_parts = cmdline.split(" ", 1)
375 job_values["executable"] = cmd_parts[0]
376 if len(cmd_parts) > 1:
377 job_values["arguments"] = cmd_parts[1]
379 return job_values
382def _handle_job_values_universal(quantum_job_values, gwjob):
383 """Handle job values that must be same value for every PipelineTask in
384 cluster.
386 Parameters
387 ----------
388 quantum_job_values : `dict` [`str`, `Any`]
389 Job values for running single Quantum.
390 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
391 Generic workflow job in which to store the universal values.
392 """
393 universal_values = ["arguments", "compute_site"]
394 for key in universal_values:
395 current_value = getattr(gwjob, key)
396 if not current_value:
397 setattr(gwjob, key, quantum_job_values[key])
398 elif current_value != quantum_job_values[key]:
399 _LOG.error("Inconsistent value for %s in "
400 "Cluster %s Quantum Number %s\n"
401 "Current cluster value: %s\n"
402 "Quantum value: %s",
403 key, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
404 quantum_job_values[key])
405 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
407 # Handle cmdline special
408 if not gwjob.executable:
409 gwjob.executable = GenericWorkflowExec(os.path.basename(quantum_job_values['executable']),
410 quantum_job_values['executable'], False)
411 elif quantum_job_values['executable'] != gwjob.executable.src_uri:
412 _LOG.error("Inconsistent value for %s in "
413 "Cluster %s Quantum Number %s\n"
414 "Current cluster value: %s\n"
415 "Quantum value: %s",
416 key, gwjob.name, quantum_job_values.get("executable", "MISSING"), gwjob.executable.src_uri,
417 quantum_job_values[key])
418 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
421def _handle_job_values_aggregate(quantum_job_values, gwjob):
422 """Handle job values that are aggregate of values from PipelineTasks
423 in QuantumGraph.
425 Parameters
426 ----------
427 quantum_job_values : `dict` [`str`, `Any`]
428 Job values for running single Quantum.
429 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
430 Generic workflow job in which to store the aggregate values.
431 """
432 values_max = ['request_cpus', 'request_memory']
433 values_sum = ['request_disk', 'request_walltime']
435 for key in values_max:
436 current_value = getattr(gwjob, key)
437 if not current_value:
438 setattr(gwjob, key, quantum_job_values[key])
439 else:
440 setattr(gwjob, key, max(getattr(gwjob, key), quantum_job_values[key]))
442 for key in values_sum:
443 current_value = getattr(gwjob, key)
444 if not current_value:
445 setattr(gwjob, key, quantum_job_values[key])
446 else:
447 setattr(gwjob, key, getattr(gwjob, key) + quantum_job_values[key])
450def create_generic_workflow(config, clustered_quanta_graph, name, prefix):
451 """Create a generic workflow from a ClusteredQuantumGraph such that it
452 has information needed for WMS (e.g., command lines).
454 Parameters
455 ----------
456 config : `lsst.ctrl.bps.BpsConfig`
457 BPS configuration.
458 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
459 ClusteredQuantumGraph for running a specific pipeline on a specific
460 payload.
461 name : `str`
462 Name for the workflow (typically unique).
463 prefix : `str`
464 Root path for any output files.
466 Returns
467 -------
468 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
469 Generic workflow for the given ClusteredQuantumGraph + config.
470 """
471 # Determine whether saving per-job QuantumGraph files in the loop.
472 save_per_job_qgraph = False
473 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
474 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
475 save_per_job_qgraph = True
477 generic_workflow = GenericWorkflow(name)
479 # Save full run QuantumGraph for use by jobs
480 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
481 src_uri=config["runQgraphFile"],
482 wms_transfer=True,
483 job_access_remote=True,
484 job_shared=True))
486 qgraph = clustered_quanta_graph.graph["qgraph"]
487 task_labels = [task.label for task in qgraph.iterTaskGraph()]
488 run_label_counts = dict.fromkeys(task_labels, 0)
489 for node_name, data in clustered_quanta_graph.nodes(data=True):
490 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name,
491 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4])
492 gwjob = GenericWorkflowJob(node_name)
493 if "tags" in data:
494 gwjob.tags = data["tags"]
495 if "label" in data:
496 gwjob.label = data["label"]
497 # Getting labels in pipeline order.
498 label_counts = dict.fromkeys(task_labels, 0)
500 # Get job info either common or aggregate for all Quanta in cluster.
501 for node_id in data["qgraph_node_ids"]:
502 qnode = qgraph.getQuantumNodeByNodeId(node_id)
503 label_counts[qnode.taskDef.label] += 1
505 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label},
506 "replaceVars": False,
507 "expandEnvVars": False,
508 "replaceEnvVars": True,
509 "required": False}
511 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand")
513 # Handle universal values.
514 _handle_job_values_universal(quantum_job_values, gwjob)
516 # Handle aggregate values.
517 _handle_job_values_aggregate(quantum_job_values, gwjob)
519 # Save summary of Quanta in job.
520 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v])
521 # Save job quanta counts to run
522 for key in task_labels:
523 run_label_counts[key] += label_counts[key]
525 # Update job with workflow attribute and profile values.
526 update_job(config, gwjob)
527 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"),
528 config["submitPath"])
529 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
531 generic_workflow.add_job(gwjob)
532 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
534 gwjob.cmdvals["qgraphId"] = data["qgraph_node_ids"][0].buildId
535 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
536 data["qgraph_node_ids"]]))
537 _enhance_command(config, generic_workflow, gwjob)
539 # If writing per-job QuantumGraph files during TRANSFORM stage,
540 # write it now while in memory.
541 if save_per_job_qgraph:
542 save_qg_subgraph(qgraph, qgraph_gwfile.src_uri, data["qgraph_node_ids"])
544 # Save run's Quanta summary
545 run_summary = ";".join([f"{k}:{v}" for k, v in run_label_counts.items()])
546 generic_workflow.run_attrs["bps_run_summary"] = run_summary
548 # Create job dependencies.
549 for node_name in clustered_quanta_graph.nodes():
550 for child in clustered_quanta_graph.successors(node_name):
551 generic_workflow.add_job_relationships(node_name, child)
553 # Add initial workflow.
554 if config.get("runInit", "{default: False}"):
555 add_workflow_init_nodes(config, generic_workflow)
557 generic_workflow.run_attrs.update({"bps_isjob": "True",
558 "bps_project": config["project"],
559 "bps_campaign": config["campaign"],
560 "bps_run": generic_workflow.name,
561 "bps_operator": config["operator"],
562 "bps_payload": config["payloadName"],
563 "bps_runsite": config["computeSite"]})
565 # Add final job
566 add_final_job(config, generic_workflow, prefix)
568 return generic_workflow
571def create_generic_workflow_config(config, prefix):
572 """Create generic workflow configuration.
574 Parameters
575 ----------
576 config : `lsst.ctrl.bps.BpsConfig`
577 Bps configuration.
578 prefix : `str`
579 Root path for any output files.
581 Returns
582 -------
583 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
584 Configuration accompanying the GenericWorkflow.
585 """
586 generic_workflow_config = BpsConfig(config)
587 generic_workflow_config["workflowName"] = config["uniqProcName"]
588 generic_workflow_config["workflowPath"] = prefix
589 return generic_workflow_config
592def add_final_job(config, generic_workflow, prefix):
593 """Add final workflow job depending upon configuration.
595 Parameters
596 ----------
597 config : `lsst.ctrl.bps.BpsConfig`
598 Bps configuration.
599 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
600 Generic workflow to which attributes should be added.
601 prefix : `str`
602 Directory in which to output final script.
603 """
604 _, when_create = config.search(".executionButler.whenCreate")
605 _, when_merge = config.search(".executionButler.whenMerge")
607 search_opt = {"searchobj": config[".executionButler"], "default": None}
608 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
609 # create gwjob
610 gwjob = GenericWorkflowJob("mergeExecutionButler")
611 gwjob.label = "mergeExecutionButler"
613 job_values = _get_job_values(config, search_opt, None)
614 for field in dataclasses.fields(GenericWorkflowJob):
615 if not getattr(gwjob, field.name) and job_values[field.name]:
616 setattr(gwjob, field.name, job_values[field.name])
618 update_job(config, gwjob)
620 # Create script and add command line to job.
621 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
623 # Determine inputs from command line.
624 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
625 gwfile = generic_workflow.get_file(file_key)
626 generic_workflow.add_job_inputs(gwjob.name, gwfile)
628 _enhance_command(config, generic_workflow, gwjob)
630 # Put transfer repo job in appropriate location in workflow.
631 if when_merge.upper() == "ALWAYS":
632 # add as special final job
633 generic_workflow.add_final(gwjob)
634 elif when_merge.upper() == "SUCCESS":
635 # add as regular sink node
636 add_final_job_as_sink(generic_workflow, gwjob)
637 else:
638 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
640 generic_workflow.run_attrs["bps_run_summary"] += ";mergeExecutionButler:1"
643def _create_final_command(config, prefix):
644 """Create the command and shell script for the final job.
646 Parameters
647 ----------
648 config : `lsst.ctrl.bps.BpsConfig`
649 Bps configuration.
650 prefix : `str`
651 Directory in which to output final script.
653 Returns
654 -------
655 executable : `lsst.ctrl.bps.GenericWorkflowExec`
656 Executable object for the final script.
657 arguments : `str`
658 Command line needed to call the final script.
659 """
660 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False}
662 script_file = os.path.join(prefix, "final_job.bash")
663 with open(script_file, "w") as fh:
664 print("#!/bin/bash\n", file=fh)
665 print("set -e", file=fh)
666 print("set -x", file=fh)
668 print("butlerConfig=$1", file=fh)
669 print("executionButlerDir=$2", file=fh)
671 i = 1
672 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
673 while found:
674 # Temporarily replace any env vars so formatter doesn't try to
675 # replace them.
676 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
678 # executionButlerDir and butlerConfig will be args to script and
679 # set to env vars
680 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
681 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
683 # Replace all other vars in command string
684 search_opt["replaceVars"] = True
685 command = config.formatter.format(command, config, search_opt)
686 search_opt["replaceVars"] = False
688 # Replace any temporary env place holders.
689 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
691 print(command, file=fh)
692 i += 1
693 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
694 os.chmod(script_file, 0o755)
695 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
697 _, orig_butler = config.search("butlerConfig")
698 # The execution butler was saved as butlerConfig in the workflow.
699 return executable, f"{orig_butler} <FILE:butlerConfig>"
702def add_final_job_as_sink(generic_workflow, final_job):
703 """Add final job as the single sink for the workflow.
705 Parameters
706 ----------
707 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
708 Generic workflow to which attributes should be added.
709 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
710 Job to add as new sink node depending upon all previous sink nodes.
711 """
712 # Find sink nodes of generic workflow graph.
713 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
714 _LOG.debug("gw_sinks = %s", gw_sinks)
716 generic_workflow.add_job(final_job)
717 generic_workflow.add_job_relationships(gw_sinks, final_job.name)