Coverage for python/lsst/ctrl/bps/transform.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import math
27import os
28import re
29import time
30import dataclasses
32from . import (
33 DEFAULT_MEM_RETRIES,
34 BpsConfig,
35 GenericWorkflow,
36 GenericWorkflowJob,
37 GenericWorkflowFile,
38 GenericWorkflowExec,
39)
40from .bps_utils import (
41 save_qg_subgraph,
42 WhenToSaveQuantumGraphs,
43 create_job_quantum_graph_filename,
44 _create_execution_butler
45)
47_LOG = logging.getLogger(__name__)
50def transform(config, clustered_quantum_graph, prefix):
51 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
53 Parameters
54 ----------
55 config : `lsst.ctrl.bps.BpsConfig`
56 BPS configuration.
57 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
58 A clustered quantum graph to transform into a generic workflow.
59 prefix : `str`
60 Root path for any output files.
62 Returns
63 -------
64 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
65 The generic workflow transformed from the clustered quantum graph.
66 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
67 Configuration to accompany GenericWorkflow.
68 """
69 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None:
70 name = clustered_quantum_graph.graph["name"]
71 else:
72 _, name = config.search("uniqProcName", opt={"required": True})
74 _, when_create = config.search(".executionButler.whenCreate")
75 if when_create.upper() == "TRANSFORM":
76 _LOG.info("Creating execution butler")
77 stime = time.time()
78 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
79 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
80 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime)
82 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix)
83 generic_workflow_config = create_generic_workflow_config(config, prefix)
85 return generic_workflow, generic_workflow_config
88def update_job(config, job):
89 """Update given job with workflow attribute and profile values.
91 Parameters
92 ----------
93 config : `lsst.ctrl.bps.BpsConfig`
94 BPS configuration.
95 job : `lsst.ctrl.bps.GenericWorkflowJob`
96 Job to which the attributes and profile values should be added.
97 """
98 key = f".site.{job.compute_site}.profile.condor"
100 if key in config:
101 for key, val in config[key].items():
102 if key.startswith("+"):
103 job.attrs[key[1:]] = val
104 else:
105 job.profile[key] = val
108def add_workflow_init_nodes(config, generic_workflow):
109 """Add nodes to workflow graph that perform initialization steps.
111 Assumes that all of the initialization should be executed prior to any
112 of the current workflow.
114 Parameters
115 ----------
116 config : `lsst.ctrl.bps.BpsConfig`
117 BPS configuration.
118 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
119 Generic workflow to which the initialization steps should be added.
120 """
121 # Create a workflow graph that will have task and file nodes necessary for
122 # initializing the pipeline execution
123 init_workflow = create_init_workflow(config, generic_workflow.get_file("runQgraphFile"))
124 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
125 generic_workflow.add_workflow_source(init_workflow)
126 old_run_summary = generic_workflow.run_attrs.get("bps_run_summary", "")
127 init_summary = init_workflow.run_attrs.get("bps_run_summary", "")
128 generic_workflow.run_attrs["bps_run_summary"] = ';'.join(x for x in [init_summary, old_run_summary] if x)
131def create_init_workflow(config, qgraph_gwfile):
132 """Create workflow for running initialization job(s).
134 Parameters
135 ----------
136 config : `lsst.ctrl.bps.BpsConfig`
137 BPS configuration.
138 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
139 File object for the full run QuantumGraph file.
141 Returns
142 -------
143 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
144 GenericWorkflow consisting of job(s) to initialize workflow.
145 """
146 _LOG.debug("creating init subgraph")
147 _LOG.debug("creating init task input(s)")
148 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
149 "replaceVars": False,
150 "expandEnvVars": False,
151 "replaceEnvVars": True,
152 "required": False}
154 init_workflow = GenericWorkflow("init")
155 init_workflow.add_file(qgraph_gwfile)
157 # create job for executing --init-only
158 gwjob = GenericWorkflowJob("pipetaskInit")
159 gwjob.label = "pipetaskInit"
161 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
163 # Handle universal values.
164 _handle_job_values_universal(job_values, gwjob)
166 # Handle aggregate values.
167 _handle_job_values_aggregate(job_values, gwjob)
169 # Save summary of Quanta in job.
170 gwjob.tags["quanta_summary"] = "pipetaskInit:1"
172 # Update job with workflow attribute and profile values.
173 update_job(config, gwjob)
175 init_workflow.add_job(gwjob)
176 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
177 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
178 init_workflow.run_attrs["bps_run_summary"] = gwjob.tags["quanta_summary"]
179 _enhance_command(config, init_workflow, gwjob)
181 return init_workflow
184def _enhance_command(config, generic_workflow, gwjob):
185 """Enhance command line with env and file placeholders
186 and gather command line values.
188 Parameters
189 ----------
190 config : `lsst.ctrl.bps.BpsConfig`
191 BPS configuration.
192 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
193 Generic workflow that contains the job.
194 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
195 Generic workflow job to which the updated executable, arguments,
196 and values should be saved.
197 """
198 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
199 "replaceVars": False,
200 "expandEnvVars": False,
201 "replaceEnvVars": True,
202 "required": False}
204 # Change qgraph variable to match whether using run or per-job qgraph
205 # Note: these are lookup keys, not actual physical filenames.
206 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
207 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
208 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
209 elif gwjob.name == "pipetaskInit":
210 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
211 else: # Needed unique file keys for per-job QuantumGraphs
212 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
214 # Replace files with special placeholders
215 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
216 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
217 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
218 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
220 # Save dict of other values needed to complete command line.
221 # (Be careful to not replace env variables as they may
222 # be different in compute job.)
223 search_opt["replaceVars"] = True
225 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
226 if key not in gwjob.cmdvals:
227 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt)
229 # backwards compatibility
230 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True})
231 if not use_lazy_commands:
232 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals)
235def _fill_arguments(config, generic_workflow, arguments, cmdvals):
236 """Replace placeholders in command line string in job.
238 Parameters
239 ----------
240 config : `lsst.ctrl.bps.BpsConfig`
241 Bps configuration.
242 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
243 Generic workflow containing the job.
244 arguments : `str`
245 String containing placeholders.
246 cmdvals : `dict` [`str`, `Any`]
247 Any command line values that can be used to replace placeholders.
249 Returns
250 -------
251 arguments : `str`
252 Command line with FILE and ENV placeholders replaced.
253 """
254 # Replace file placeholders
255 _, use_shared = config.search("bpsUseShared", opt={"default": False})
256 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
257 gwfile = generic_workflow.get_file(file_key)
258 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared:
259 uri = os.path.basename(gwfile.src_uri)
260 else:
261 uri = gwfile.src_uri
262 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
264 # Replace env placeholder with submit-side values
265 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
266 arguments = os.path.expandvars(arguments)
268 # Replace remaining vars
269 arguments = arguments.format(**cmdvals)
271 return arguments
274def _get_butler_gwfile(config, prefix):
275 """Get butler location to be used by job.
277 Parameters
278 ----------
279 config : `lsst.ctrl.bps.BpsConfig`
280 Bps configuration.
281 prefix : `str`
282 Root path for any output files.
284 Returns
285 -------
286 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
287 Representation of butler location.
288 """
289 _, when_create = config.search(".executionButler.whenCreate")
290 if when_create.upper() == "NEVER":
291 _, butler_config = config.search("butlerConfig")
292 wms_transfer = False
293 job_access_remote = True
294 job_shared = True
295 else:
296 _, butler_config = config.search(".bps_defined.executionButlerDir")
297 butler_config = os.path.join(prefix, butler_config)
298 wms_transfer = True
299 job_access_remote = False
300 job_shared = False
302 gwfile = GenericWorkflowFile("butlerConfig",
303 src_uri=butler_config,
304 wms_transfer=wms_transfer,
305 job_access_remote=job_access_remote,
306 job_shared=job_shared)
308 return gwfile
311def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix):
312 """Get qgraph location to be used by job.
314 Parameters
315 ----------
316 config : `lsst.ctrl.bps.BpsConfig`
317 Bps configuration.
318 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
319 Job for which determining QuantumGraph file.
320 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
321 File representation of the full run QuantumGraph.
322 prefix : `str`
323 Path prefix for any files written.
325 Returns
326 -------
327 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
328 Representation of butler location (may not include filename).
329 """
330 per_job_qgraph_file = True
331 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
332 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
333 per_job_qgraph_file = False
335 qgraph_gwfile = None
336 if per_job_qgraph_file:
337 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
338 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
339 wms_transfer=True,
340 job_access_remote=True,
341 job_shared=True)
342 else:
343 qgraph_gwfile = run_qgraph_file
345 return qgraph_gwfile
348def _get_job_values(config, search_opt, cmd_line_key):
349 """Gather generic workflow job values from the bps config.
351 Parameters
352 ----------
353 config : `lsst.ctrl.bps.BpsConfig`
354 Bps configuration.
355 search_opt : `dict` [`str`, `Any`]
356 Search options to be used when searching config.
357 cmd_line_key : `str` or None
358 Which command line key to search for (e.g., "runQuantumCommand").
360 Returns
361 -------
362 job_values : `dict` [ `str`, `Any` ]`
363 A mapping between job attributes and their values.
364 """
365 special_values = ['name', 'label', 'cmdline', 'pre_cmdline', 'post_cmdline']
367 job_values = {}
368 for field in dataclasses.fields(GenericWorkflowJob):
369 if field.name not in special_values:
370 # Variable names in yaml are camel case instead of snake case.
371 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), field.name)
372 found, value = config.search(yaml_name, opt=search_opt)
373 if not found and '_' in field.name:
374 # Just in case someone used snake case:
375 found, value = config.search(field.name, opt=search_opt)
376 if found:
377 job_values[field.name] = value
378 else:
379 job_values[field.name] = None
381 # If the automatic memory scaling is enabled (i.e. the memory multiplier
382 # is set and it is a positive number greater than 1.0), adjust number
383 # of retries when necessary. If the memory multiplier is invalid, disable
384 # automatic memory scaling.
385 if job_values["memory_multiplier"] is not None:
386 if math.ceil(float(job_values["memory_multiplier"])) > 1:
387 if job_values["number_of_retries"] is None:
388 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
389 else:
390 job_values["memory_multiplier"] = None
392 if cmd_line_key:
393 found, cmdline = config.search(cmd_line_key, opt=search_opt)
394 # Make sure cmdline isn't None as that could be sent in as a
395 # default value in search_opt.
396 if found and cmdline:
397 cmd_parts = cmdline.split(" ", 1)
398 job_values["executable"] = cmd_parts[0]
399 if len(cmd_parts) > 1:
400 job_values["arguments"] = cmd_parts[1]
402 return job_values
405def _handle_job_values_universal(quantum_job_values, gwjob):
406 """Handle job values that must be same value for every PipelineTask in
407 cluster.
409 Parameters
410 ----------
411 quantum_job_values : `dict` [`str`, `Any`]
412 Job values for running single Quantum.
413 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
414 Generic workflow job in which to store the universal values.
415 """
416 universal_values = ["arguments", "compute_site"]
417 for key in universal_values:
418 current_value = getattr(gwjob, key)
419 if not current_value:
420 setattr(gwjob, key, quantum_job_values[key])
421 elif current_value != quantum_job_values[key]:
422 _LOG.error("Inconsistent value for %s in "
423 "Cluster %s Quantum Number %s\n"
424 "Current cluster value: %s\n"
425 "Quantum value: %s",
426 key, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
427 quantum_job_values[key])
428 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
430 # Handle cmdline special
431 if not gwjob.executable:
432 gwjob.executable = GenericWorkflowExec(os.path.basename(quantum_job_values['executable']),
433 quantum_job_values['executable'], False)
434 elif quantum_job_values['executable'] != gwjob.executable.src_uri:
435 _LOG.error("Inconsistent value for %s in "
436 "Cluster %s Quantum Number %s\n"
437 "Current cluster value: %s\n"
438 "Quantum value: %s",
439 key, gwjob.name, quantum_job_values.get("executable", "MISSING"), gwjob.executable.src_uri,
440 quantum_job_values[key])
441 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
444def _handle_job_values_aggregate(quantum_job_values, gwjob):
445 """Handle job values that are aggregate of values from PipelineTasks
446 in QuantumGraph.
448 Parameters
449 ----------
450 quantum_job_values : `dict` [`str`, `Any`]
451 Job values for running single Quantum.
452 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
453 Generic workflow job in which to store the aggregate values.
454 """
455 values_max = ["memory_multiplier", "number_of_retries", "request_cpus", "request_memory"]
456 values_sum = ["request_disk", "request_walltime"]
458 for key in values_max:
459 current_value = getattr(gwjob, key)
460 quantum_value = quantum_job_values[key]
462 needs_update = False
463 if current_value is None:
464 if quantum_value is not None:
465 needs_update = True
466 else:
467 if quantum_value is not None and current_value < quantum_value:
468 needs_update = True
469 if needs_update:
470 setattr(gwjob, key, quantum_value)
472 # When updating memory requirements for a job, check if memory
473 # autoscaling is enabled. If it is, always use the memory
474 # multiplier and the number of retries which comes with the
475 # quantum.
476 #
477 # Note that as a result, the quantum with the biggest memory
478 # requirements will determine whether the memory autoscaling
479 # will be enabled (or disabled) depending on the value of its
480 # memory multiplier.
481 if key == "request_memory":
482 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
483 if gwjob.memory_multiplier is not None:
484 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
486 for key in values_sum:
487 current_value = getattr(gwjob, key)
488 if not current_value:
489 setattr(gwjob, key, quantum_job_values[key])
490 else:
491 setattr(gwjob, key, current_value + quantum_job_values[key])
494def create_generic_workflow(config, clustered_quanta_graph, name, prefix):
495 """Create a generic workflow from a ClusteredQuantumGraph such that it
496 has information needed for WMS (e.g., command lines).
498 Parameters
499 ----------
500 config : `lsst.ctrl.bps.BpsConfig`
501 BPS configuration.
502 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
503 ClusteredQuantumGraph for running a specific pipeline on a specific
504 payload.
505 name : `str`
506 Name for the workflow (typically unique).
507 prefix : `str`
508 Root path for any output files.
510 Returns
511 -------
512 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
513 Generic workflow for the given ClusteredQuantumGraph + config.
514 """
515 # Determine whether saving per-job QuantumGraph files in the loop.
516 save_per_job_qgraph = False
517 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
518 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
519 save_per_job_qgraph = True
521 generic_workflow = GenericWorkflow(name)
523 # Save full run QuantumGraph for use by jobs
524 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
525 src_uri=config["runQgraphFile"],
526 wms_transfer=True,
527 job_access_remote=True,
528 job_shared=True))
530 qgraph = clustered_quanta_graph.graph["qgraph"]
531 task_labels = [task.label for task in qgraph.iterTaskGraph()]
532 run_label_counts = dict.fromkeys(task_labels, 0)
533 for node_name, data in clustered_quanta_graph.nodes(data=True):
534 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name,
535 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4])
536 gwjob = GenericWorkflowJob(node_name)
537 if "tags" in data:
538 gwjob.tags = data["tags"]
539 if "label" in data:
540 gwjob.label = data["label"]
541 # Getting labels in pipeline order.
542 label_counts = dict.fromkeys(task_labels, 0)
544 # Get job info either common or aggregate for all Quanta in cluster.
545 for node_id in data["qgraph_node_ids"]:
546 qnode = qgraph.getQuantumNodeByNodeId(node_id)
547 label_counts[qnode.taskDef.label] += 1
549 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label},
550 "replaceVars": False,
551 "expandEnvVars": False,
552 "replaceEnvVars": True,
553 "required": False}
555 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand")
557 # Handle universal values.
558 _handle_job_values_universal(quantum_job_values, gwjob)
560 # Handle aggregate values.
561 _handle_job_values_aggregate(quantum_job_values, gwjob)
563 # Save summary of Quanta in job.
564 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v])
565 # Save job quanta counts to run
566 for key in task_labels:
567 run_label_counts[key] += label_counts[key]
569 # Update job with workflow attribute and profile values.
570 update_job(config, gwjob)
571 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"),
572 config["submitPath"])
573 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
575 generic_workflow.add_job(gwjob)
576 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
578 gwjob.cmdvals["qgraphId"] = data["qgraph_node_ids"][0].buildId
579 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
580 data["qgraph_node_ids"]]))
581 _enhance_command(config, generic_workflow, gwjob)
583 # If writing per-job QuantumGraph files during TRANSFORM stage,
584 # write it now while in memory.
585 if save_per_job_qgraph:
586 save_qg_subgraph(qgraph, qgraph_gwfile.src_uri, data["qgraph_node_ids"])
588 # Save run's Quanta summary
589 run_summary = ";".join([f"{k}:{v}" for k, v in run_label_counts.items()])
590 generic_workflow.run_attrs["bps_run_summary"] = run_summary
592 # Create job dependencies.
593 for node_name in clustered_quanta_graph.nodes():
594 for child in clustered_quanta_graph.successors(node_name):
595 generic_workflow.add_job_relationships(node_name, child)
597 # Add initial workflow.
598 if config.get("runInit", "{default: False}"):
599 add_workflow_init_nodes(config, generic_workflow)
601 generic_workflow.run_attrs.update({"bps_isjob": "True",
602 "bps_project": config["project"],
603 "bps_campaign": config["campaign"],
604 "bps_run": generic_workflow.name,
605 "bps_operator": config["operator"],
606 "bps_payload": config["payloadName"],
607 "bps_runsite": config["computeSite"]})
609 # Add final job
610 add_final_job(config, generic_workflow, prefix)
612 return generic_workflow
615def create_generic_workflow_config(config, prefix):
616 """Create generic workflow configuration.
618 Parameters
619 ----------
620 config : `lsst.ctrl.bps.BpsConfig`
621 Bps configuration.
622 prefix : `str`
623 Root path for any output files.
625 Returns
626 -------
627 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
628 Configuration accompanying the GenericWorkflow.
629 """
630 generic_workflow_config = BpsConfig(config)
631 generic_workflow_config["workflowName"] = config["uniqProcName"]
632 generic_workflow_config["workflowPath"] = prefix
633 return generic_workflow_config
636def add_final_job(config, generic_workflow, prefix):
637 """Add final workflow job depending upon configuration.
639 Parameters
640 ----------
641 config : `lsst.ctrl.bps.BpsConfig`
642 Bps configuration.
643 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
644 Generic workflow to which attributes should be added.
645 prefix : `str`
646 Directory in which to output final script.
647 """
648 _, when_create = config.search(".executionButler.whenCreate")
649 _, when_merge = config.search(".executionButler.whenMerge")
651 search_opt = {"searchobj": config[".executionButler"], "default": None}
652 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
653 # create gwjob
654 gwjob = GenericWorkflowJob("mergeExecutionButler")
655 gwjob.label = "mergeExecutionButler"
657 job_values = _get_job_values(config, search_opt, None)
658 for field in dataclasses.fields(GenericWorkflowJob):
659 if not getattr(gwjob, field.name) and job_values[field.name]:
660 setattr(gwjob, field.name, job_values[field.name])
662 update_job(config, gwjob)
664 # Create script and add command line to job.
665 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
667 # Determine inputs from command line.
668 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
669 gwfile = generic_workflow.get_file(file_key)
670 generic_workflow.add_job_inputs(gwjob.name, gwfile)
672 _enhance_command(config, generic_workflow, gwjob)
674 # Put transfer repo job in appropriate location in workflow.
675 if when_merge.upper() == "ALWAYS":
676 # add as special final job
677 generic_workflow.add_final(gwjob)
678 elif when_merge.upper() == "SUCCESS":
679 # add as regular sink node
680 add_final_job_as_sink(generic_workflow, gwjob)
681 else:
682 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
684 generic_workflow.run_attrs["bps_run_summary"] += ";mergeExecutionButler:1"
687def _create_final_command(config, prefix):
688 """Create the command and shell script for the final job.
690 Parameters
691 ----------
692 config : `lsst.ctrl.bps.BpsConfig`
693 Bps configuration.
694 prefix : `str`
695 Directory in which to output final script.
697 Returns
698 -------
699 executable : `lsst.ctrl.bps.GenericWorkflowExec`
700 Executable object for the final script.
701 arguments : `str`
702 Command line needed to call the final script.
703 """
704 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False}
706 script_file = os.path.join(prefix, "final_job.bash")
707 with open(script_file, "w") as fh:
708 print("#!/bin/bash\n", file=fh)
709 print("set -e", file=fh)
710 print("set -x", file=fh)
712 print("butlerConfig=$1", file=fh)
713 print("executionButlerDir=$2", file=fh)
715 i = 1
716 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
717 while found:
718 # Temporarily replace any env vars so formatter doesn't try to
719 # replace them.
720 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
722 # executionButlerDir and butlerConfig will be args to script and
723 # set to env vars
724 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
725 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
727 # Replace all other vars in command string
728 search_opt["replaceVars"] = True
729 command = config.formatter.format(command, config, search_opt)
730 search_opt["replaceVars"] = False
732 # Replace any temporary env place holders.
733 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
735 print(command, file=fh)
736 i += 1
737 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
738 os.chmod(script_file, 0o755)
739 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
741 _, orig_butler = config.search("butlerConfig")
742 # The execution butler was saved as butlerConfig in the workflow.
743 return executable, f"{orig_butler} <FILE:butlerConfig>"
746def add_final_job_as_sink(generic_workflow, final_job):
747 """Add final job as the single sink for the workflow.
749 Parameters
750 ----------
751 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
752 Generic workflow to which attributes should be added.
753 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
754 Job to add as new sink node depending upon all previous sink nodes.
755 """
756 # Find sink nodes of generic workflow graph.
757 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
758 _LOG.debug("gw_sinks = %s", gw_sinks)
760 generic_workflow.add_job(final_job)
761 generic_workflow.add_job_relationships(gw_sinks, final_job.name)