Coverage for python/lsst/ctrl/bps/transform.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import math
27import os
28import re
29import time
30import dataclasses
32from . import (
33 DEFAULT_MEM_RETRIES,
34 BpsConfig,
35 GenericWorkflow,
36 GenericWorkflowJob,
37 GenericWorkflowFile,
38 GenericWorkflowExec,
39)
40from .bps_utils import (
41 save_qg_subgraph,
42 WhenToSaveQuantumGraphs,
43 create_job_quantum_graph_filename,
44 _create_execution_butler
45)
48_LOG = logging.getLogger(__name__)
51def transform(config, clustered_quantum_graph, prefix):
52 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
54 Parameters
55 ----------
56 config : `lsst.ctrl.bps.BpsConfig`
57 BPS configuration.
58 clustered_quantum_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
59 A clustered quantum graph to transform into a generic workflow.
60 prefix : `str`
61 Root path for any output files.
63 Returns
64 -------
65 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
66 The generic workflow transformed from the clustered quantum graph.
67 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
68 Configuration to accompany GenericWorkflow.
69 """
70 if "name" in clustered_quantum_graph.graph and clustered_quantum_graph.graph["name"] is not None:
71 name = clustered_quantum_graph.graph["name"]
72 else:
73 _, name = config.search("uniqProcName", opt={"required": True})
75 _, when_create = config.search(".executionButler.whenCreate")
76 if when_create.upper() == "TRANSFORM":
77 _LOG.info("Creating execution butler")
78 stime = time.time()
79 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
80 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
81 _LOG.info("Creating execution butler took %.2f seconds", time.time() - stime)
83 generic_workflow = create_generic_workflow(config, clustered_quantum_graph, name, prefix)
84 generic_workflow_config = create_generic_workflow_config(config, prefix)
86 return generic_workflow, generic_workflow_config
89def update_job(config, job):
90 """Update given job with workflow attribute and profile values.
92 Parameters
93 ----------
94 config : `lsst.ctrl.bps.BpsConfig`
95 BPS configuration.
96 job : `lsst.ctrl.bps.GenericWorkflowJob`
97 Job to which the attributes and profile values should be added.
98 """
99 key = f".site.{job.compute_site}.profile.condor"
101 if key in config:
102 for key, val in config[key].items():
103 if key.startswith("+"):
104 job.attrs[key[1:]] = val
105 else:
106 job.profile[key] = val
109def add_workflow_init_nodes(config, qgraph, generic_workflow):
110 """Add nodes to workflow graph that perform initialization steps.
112 Assumes that all of the initialization should be executed prior to any
113 of the current workflow.
115 Parameters
116 ----------
117 config : `lsst.ctrl.bps.BpsConfig`
118 BPS configuration.
119 qgraph : `lsst.pipe.base.graph.QuantumGraph`
120 The quantum graph the generic workflow represents.
121 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
122 Generic workflow to which the initialization steps should be added.
123 """
124 # Create a workflow graph that will have task and file nodes necessary for
125 # initializing the pipeline execution
126 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
127 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
128 generic_workflow.add_workflow_source(init_workflow)
129 old_run_summary = generic_workflow.run_attrs.get("bps_run_summary", "")
130 init_summary = init_workflow.run_attrs.get("bps_run_summary", "")
131 generic_workflow.run_attrs["bps_run_summary"] = ';'.join(x for x in [init_summary, old_run_summary] if x)
134def create_init_workflow(config, qgraph, qgraph_gwfile):
135 """Create workflow for running initialization job(s).
137 Parameters
138 ----------
139 config : `lsst.ctrl.bps.BpsConfig`
140 BPS configuration.
141 qgraph : `lsst.pipe.base.graph.QuantumGraph`
142 The quantum graph the generic workflow represents.
143 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
144 File object for the full run QuantumGraph file.
146 Returns
147 -------
148 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
149 GenericWorkflow consisting of job(s) to initialize workflow.
150 """
151 _LOG.debug("creating init subgraph")
152 _LOG.debug("creating init task input(s)")
153 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
154 "replaceVars": False,
155 "expandEnvVars": False,
156 "replaceEnvVars": True,
157 "required": False}
159 init_workflow = GenericWorkflow("init")
160 init_workflow.add_file(qgraph_gwfile)
162 # create job for executing --init-only
163 gwjob = GenericWorkflowJob("pipetaskInit")
164 gwjob.label = "pipetaskInit"
166 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
168 # Handle universal values.
169 _handle_job_values_universal(job_values, gwjob)
171 # Handle aggregate values.
172 _handle_job_values_aggregate(job_values, gwjob)
174 # Pick a node id for each task (not quantum!) to avoid reading the entire
175 # quantum graph during the initialization stage.
176 node_ids = []
177 for task in qgraph.iterTaskGraph():
178 task_def = qgraph.findTaskDefByLabel(task.label)
179 node = next(iter(qgraph.getNodesForTask(task_def)))
180 node_ids.append(node.nodeId)
181 gwjob.cmdvals["qgraphId"] = qgraph.graphID
182 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids]))
184 # Save summary of Quanta in job.
185 gwjob.tags["quanta_summary"] = "pipetaskInit:1"
187 # Update job with workflow attribute and profile values.
188 update_job(config, gwjob)
190 init_workflow.add_job(gwjob)
191 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
192 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
193 init_workflow.run_attrs["bps_run_summary"] = gwjob.tags["quanta_summary"]
194 _enhance_command(config, init_workflow, gwjob)
196 return init_workflow
199def _enhance_command(config, generic_workflow, gwjob):
200 """Enhance command line with env and file placeholders
201 and gather command line values.
203 Parameters
204 ----------
205 config : `lsst.ctrl.bps.BpsConfig`
206 BPS configuration.
207 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
208 Generic workflow that contains the job.
209 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
210 Generic workflow job to which the updated executable, arguments,
211 and values should be saved.
212 """
213 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
214 "replaceVars": False,
215 "expandEnvVars": False,
216 "replaceEnvVars": True,
217 "required": False}
219 # Change qgraph variable to match whether using run or per-job qgraph
220 # Note: these are lookup keys, not actual physical filenames.
221 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
222 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
223 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
224 elif gwjob.name == "pipetaskInit":
225 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
226 else: # Needed unique file keys for per-job QuantumGraphs
227 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
229 # Replace files with special placeholders
230 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
231 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
232 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
233 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
235 # Save dict of other values needed to complete command line.
236 # (Be careful to not replace env variables as they may
237 # be different in compute job.)
238 search_opt["replaceVars"] = True
240 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
241 if key not in gwjob.cmdvals:
242 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt)
244 # backwards compatibility
245 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True})
246 if not use_lazy_commands:
247 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals)
250def _fill_arguments(config, generic_workflow, arguments, cmdvals):
251 """Replace placeholders in command line string in job.
253 Parameters
254 ----------
255 config : `lsst.ctrl.bps.BpsConfig`
256 Bps configuration.
257 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
258 Generic workflow containing the job.
259 arguments : `str`
260 String containing placeholders.
261 cmdvals : `dict` [`str`, `Any`]
262 Any command line values that can be used to replace placeholders.
264 Returns
265 -------
266 arguments : `str`
267 Command line with FILE and ENV placeholders replaced.
268 """
269 # Replace file placeholders
270 _, use_shared = config.search("bpsUseShared", opt={"default": False})
271 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
272 gwfile = generic_workflow.get_file(file_key)
273 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared:
274 uri = os.path.basename(gwfile.src_uri)
275 else:
276 uri = gwfile.src_uri
277 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
279 # Replace env placeholder with submit-side values
280 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
281 arguments = os.path.expandvars(arguments)
283 # Replace remaining vars
284 arguments = arguments.format(**cmdvals)
286 return arguments
289def _get_butler_gwfile(config, prefix):
290 """Get butler location to be used by job.
292 Parameters
293 ----------
294 config : `lsst.ctrl.bps.BpsConfig`
295 Bps configuration.
296 prefix : `str`
297 Root path for any output files.
299 Returns
300 -------
301 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
302 Representation of butler location.
303 """
304 _, when_create = config.search(".executionButler.whenCreate")
305 if when_create.upper() == "NEVER":
306 _, butler_config = config.search("butlerConfig")
307 wms_transfer = False
308 job_access_remote = True
309 job_shared = True
310 else:
311 _, butler_config = config.search(".bps_defined.executionButlerDir")
312 butler_config = os.path.join(prefix, butler_config)
313 wms_transfer = True
314 job_access_remote = False
315 job_shared = False
317 gwfile = GenericWorkflowFile("butlerConfig",
318 src_uri=butler_config,
319 wms_transfer=wms_transfer,
320 job_access_remote=job_access_remote,
321 job_shared=job_shared)
323 return gwfile
326def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix):
327 """Get qgraph location to be used by job.
329 Parameters
330 ----------
331 config : `lsst.ctrl.bps.BpsConfig`
332 Bps configuration.
333 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
334 Job for which determining QuantumGraph file.
335 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
336 File representation of the full run QuantumGraph.
337 prefix : `str`
338 Path prefix for any files written.
340 Returns
341 -------
342 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
343 Representation of butler location (may not include filename).
344 """
345 per_job_qgraph_file = True
346 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
347 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
348 per_job_qgraph_file = False
350 qgraph_gwfile = None
351 if per_job_qgraph_file:
352 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
353 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
354 wms_transfer=True,
355 job_access_remote=True,
356 job_shared=True)
357 else:
358 qgraph_gwfile = run_qgraph_file
360 return qgraph_gwfile
363def _get_job_values(config, search_opt, cmd_line_key):
364 """Gather generic workflow job values from the bps config.
366 Parameters
367 ----------
368 config : `lsst.ctrl.bps.BpsConfig`
369 Bps configuration.
370 search_opt : `dict` [`str`, `Any`]
371 Search options to be used when searching config.
372 cmd_line_key : `str` or None
373 Which command line key to search for (e.g., "runQuantumCommand").
375 Returns
376 -------
377 job_values : `dict` [ `str`, `Any` ]`
378 A mapping between job attributes and their values.
379 """
380 special_values = ['name', 'label', 'cmdline', 'pre_cmdline', 'post_cmdline']
382 job_values = {}
383 for field in dataclasses.fields(GenericWorkflowJob):
384 if field.name not in special_values:
385 # Variable names in yaml are camel case instead of snake case.
386 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), field.name)
387 found, value = config.search(yaml_name, opt=search_opt)
388 if not found and '_' in field.name:
389 # Just in case someone used snake case:
390 found, value = config.search(field.name, opt=search_opt)
391 if found:
392 job_values[field.name] = value
393 else:
394 job_values[field.name] = None
396 # If the automatic memory scaling is enabled (i.e. the memory multiplier
397 # is set and it is a positive number greater than 1.0), adjust number
398 # of retries when necessary. If the memory multiplier is invalid, disable
399 # automatic memory scaling.
400 if job_values["memory_multiplier"] is not None:
401 if math.ceil(float(job_values["memory_multiplier"])) > 1:
402 if job_values["number_of_retries"] is None:
403 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
404 else:
405 job_values["memory_multiplier"] = None
407 if cmd_line_key:
408 found, cmdline = config.search(cmd_line_key, opt=search_opt)
409 # Make sure cmdline isn't None as that could be sent in as a
410 # default value in search_opt.
411 if found and cmdline:
412 cmd_parts = cmdline.split(" ", 1)
413 job_values["executable"] = cmd_parts[0]
414 if len(cmd_parts) > 1:
415 job_values["arguments"] = cmd_parts[1]
417 return job_values
420def _handle_job_values_universal(quantum_job_values, gwjob):
421 """Handle job values that must be same value for every PipelineTask in
422 cluster.
424 Parameters
425 ----------
426 quantum_job_values : `dict` [`str`, `Any`]
427 Job values for running single Quantum.
428 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
429 Generic workflow job in which to store the universal values.
430 """
431 universal_values = ["arguments", "compute_site"]
432 for key in universal_values:
433 current_value = getattr(gwjob, key)
434 if not current_value:
435 setattr(gwjob, key, quantum_job_values[key])
436 elif current_value != quantum_job_values[key]:
437 _LOG.error("Inconsistent value for %s in "
438 "Cluster %s Quantum Number %s\n"
439 "Current cluster value: %s\n"
440 "Quantum value: %s",
441 key, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
442 quantum_job_values[key])
443 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
445 # Handle cmdline special
446 if not gwjob.executable:
447 gwjob.executable = GenericWorkflowExec(os.path.basename(quantum_job_values['executable']),
448 quantum_job_values['executable'], False)
449 elif quantum_job_values['executable'] != gwjob.executable.src_uri:
450 _LOG.error("Inconsistent value for %s in "
451 "Cluster %s Quantum Number %s\n"
452 "Current cluster value: %s\n"
453 "Quantum value: %s",
454 key, gwjob.name, quantum_job_values.get("executable", "MISSING"), gwjob.executable.src_uri,
455 quantum_job_values[key])
456 raise RuntimeError(f"Inconsistent value for {key} in cluster {gwjob.name}.")
459def _handle_job_values_aggregate(quantum_job_values, gwjob):
460 """Handle job values that are aggregate of values from PipelineTasks
461 in QuantumGraph.
463 Parameters
464 ----------
465 quantum_job_values : `dict` [`str`, `Any`]
466 Job values for running single Quantum.
467 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
468 Generic workflow job in which to store the aggregate values.
469 """
470 values_max = ["memory_multiplier", "number_of_retries", "request_cpus", "request_memory"]
471 values_sum = ["request_disk", "request_walltime"]
473 for key in values_max:
474 current_value = getattr(gwjob, key)
475 quantum_value = quantum_job_values[key]
477 needs_update = False
478 if current_value is None:
479 if quantum_value is not None:
480 needs_update = True
481 else:
482 if quantum_value is not None and current_value < quantum_value:
483 needs_update = True
484 if needs_update:
485 setattr(gwjob, key, quantum_value)
487 # When updating memory requirements for a job, check if memory
488 # autoscaling is enabled. If it is, always use the memory
489 # multiplier and the number of retries which comes with the
490 # quantum.
491 #
492 # Note that as a result, the quantum with the biggest memory
493 # requirements will determine whether the memory autoscaling
494 # will be enabled (or disabled) depending on the value of its
495 # memory multiplier.
496 if key == "request_memory":
497 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
498 if gwjob.memory_multiplier is not None:
499 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
501 for key in values_sum:
502 current_value = getattr(gwjob, key)
503 if not current_value:
504 setattr(gwjob, key, quantum_job_values[key])
505 else:
506 setattr(gwjob, key, current_value + quantum_job_values[key])
509def create_generic_workflow(config, clustered_quanta_graph, name, prefix):
510 """Create a generic workflow from a ClusteredQuantumGraph such that it
511 has information needed for WMS (e.g., command lines).
513 Parameters
514 ----------
515 config : `lsst.ctrl.bps.BpsConfig`
516 BPS configuration.
517 clustered_quanta_graph : `lsst.ctrl.bps.ClusteredQuantumGraph`
518 ClusteredQuantumGraph for running a specific pipeline on a specific
519 payload.
520 name : `str`
521 Name for the workflow (typically unique).
522 prefix : `str`
523 Root path for any output files.
525 Returns
526 -------
527 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
528 Generic workflow for the given ClusteredQuantumGraph + config.
529 """
530 # Determine whether saving per-job QuantumGraph files in the loop.
531 save_per_job_qgraph = False
532 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
533 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
534 save_per_job_qgraph = True
536 generic_workflow = GenericWorkflow(name)
538 # Save full run QuantumGraph for use by jobs
539 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
540 src_uri=config["runQgraphFile"],
541 wms_transfer=True,
542 job_access_remote=True,
543 job_shared=True))
545 qgraph = clustered_quanta_graph.graph["qgraph"]
546 task_labels = [task.label for task in qgraph.iterTaskGraph()]
547 run_label_counts = dict.fromkeys(task_labels, 0)
548 for node_name, data in clustered_quanta_graph.nodes(data=True):
549 _LOG.debug("clustered_quanta_graph: node_name=%s, len(cluster)=%s, label=%s, ids=%s", node_name,
550 len(data["qgraph_node_ids"]), data["label"], data["qgraph_node_ids"][:4])
551 gwjob = GenericWorkflowJob(node_name)
552 if "tags" in data:
553 gwjob.tags = data["tags"]
554 if "label" in data:
555 gwjob.label = data["label"]
556 # Getting labels in pipeline order.
557 label_counts = dict.fromkeys(task_labels, 0)
559 # Get job info either common or aggregate for all Quanta in cluster.
560 for node_id in data["qgraph_node_ids"]:
561 qnode = qgraph.getQuantumNodeByNodeId(node_id)
562 label_counts[qnode.taskDef.label] += 1
564 search_opt = {"curvals": {"curr_pipetask": qnode.taskDef.label},
565 "replaceVars": False,
566 "expandEnvVars": False,
567 "replaceEnvVars": True,
568 "required": False}
570 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand")
572 # Handle universal values.
573 _handle_job_values_universal(quantum_job_values, gwjob)
575 # Handle aggregate values.
576 _handle_job_values_aggregate(quantum_job_values, gwjob)
578 # Save summary of Quanta in job.
579 gwjob.tags["quanta_summary"] = ";".join([f"{k}:{v}" for k, v in label_counts.items() if v])
580 # Save job quanta counts to run
581 for key in task_labels:
582 run_label_counts[key] += label_counts[key]
584 # Update job with workflow attribute and profile values.
585 update_job(config, gwjob)
586 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"),
587 config["submitPath"])
588 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
590 generic_workflow.add_job(gwjob)
591 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
593 gwjob.cmdvals["qgraphId"] = data["qgraph_node_ids"][0].buildId
594 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
595 data["qgraph_node_ids"]]))
596 _enhance_command(config, generic_workflow, gwjob)
598 # If writing per-job QuantumGraph files during TRANSFORM stage,
599 # write it now while in memory.
600 if save_per_job_qgraph:
601 save_qg_subgraph(qgraph, qgraph_gwfile.src_uri, data["qgraph_node_ids"])
603 # Save run's Quanta summary
604 run_summary = ";".join([f"{k}:{v}" for k, v in run_label_counts.items()])
605 generic_workflow.run_attrs["bps_run_summary"] = run_summary
607 # Create job dependencies.
608 for node_name in clustered_quanta_graph.nodes():
609 for child in clustered_quanta_graph.successors(node_name):
610 generic_workflow.add_job_relationships(node_name, child)
612 # Add initial workflow.
613 if config.get("runInit", "{default: False}"):
614 add_workflow_init_nodes(config, qgraph, generic_workflow)
616 generic_workflow.run_attrs.update({"bps_isjob": "True",
617 "bps_project": config["project"],
618 "bps_campaign": config["campaign"],
619 "bps_run": generic_workflow.name,
620 "bps_operator": config["operator"],
621 "bps_payload": config["payloadName"],
622 "bps_runsite": config["computeSite"]})
624 # Add final job
625 add_final_job(config, generic_workflow, prefix)
627 return generic_workflow
630def create_generic_workflow_config(config, prefix):
631 """Create generic workflow configuration.
633 Parameters
634 ----------
635 config : `lsst.ctrl.bps.BpsConfig`
636 Bps configuration.
637 prefix : `str`
638 Root path for any output files.
640 Returns
641 -------
642 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
643 Configuration accompanying the GenericWorkflow.
644 """
645 generic_workflow_config = BpsConfig(config)
646 generic_workflow_config["workflowName"] = config["uniqProcName"]
647 generic_workflow_config["workflowPath"] = prefix
648 return generic_workflow_config
651def add_final_job(config, generic_workflow, prefix):
652 """Add final workflow job depending upon configuration.
654 Parameters
655 ----------
656 config : `lsst.ctrl.bps.BpsConfig`
657 Bps configuration.
658 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
659 Generic workflow to which attributes should be added.
660 prefix : `str`
661 Directory in which to output final script.
662 """
663 _, when_create = config.search(".executionButler.whenCreate")
664 _, when_merge = config.search(".executionButler.whenMerge")
666 search_opt = {"searchobj": config[".executionButler"], "default": None}
667 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
668 # create gwjob
669 gwjob = GenericWorkflowJob("mergeExecutionButler")
670 gwjob.label = "mergeExecutionButler"
672 job_values = _get_job_values(config, search_opt, None)
673 for field in dataclasses.fields(GenericWorkflowJob):
674 if not getattr(gwjob, field.name) and job_values.get(field.name, None):
675 setattr(gwjob, field.name, job_values[field.name])
677 update_job(config, gwjob)
679 # Create script and add command line to job.
680 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
682 # Determine inputs from command line.
683 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
684 gwfile = generic_workflow.get_file(file_key)
685 generic_workflow.add_job_inputs(gwjob.name, gwfile)
687 _enhance_command(config, generic_workflow, gwjob)
689 # Put transfer repo job in appropriate location in workflow.
690 if when_merge.upper() == "ALWAYS":
691 # add as special final job
692 generic_workflow.add_final(gwjob)
693 elif when_merge.upper() == "SUCCESS":
694 # add as regular sink node
695 add_final_job_as_sink(generic_workflow, gwjob)
696 else:
697 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
699 generic_workflow.run_attrs["bps_run_summary"] += ";mergeExecutionButler:1"
702def _create_final_command(config, prefix):
703 """Create the command and shell script for the final job.
705 Parameters
706 ----------
707 config : `lsst.ctrl.bps.BpsConfig`
708 Bps configuration.
709 prefix : `str`
710 Directory in which to output final script.
712 Returns
713 -------
714 executable : `lsst.ctrl.bps.GenericWorkflowExec`
715 Executable object for the final script.
716 arguments : `str`
717 Command line needed to call the final script.
718 """
719 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False}
721 script_file = os.path.join(prefix, "final_job.bash")
722 with open(script_file, "w") as fh:
723 print("#!/bin/bash\n", file=fh)
724 print("set -e", file=fh)
725 print("set -x", file=fh)
727 print("butlerConfig=$1", file=fh)
728 print("executionButlerDir=$2", file=fh)
730 i = 1
731 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
732 while found:
733 # Temporarily replace any env vars so formatter doesn't try to
734 # replace them.
735 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
737 # executionButlerDir and butlerConfig will be args to script and
738 # set to env vars
739 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
740 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
742 # Replace all other vars in command string
743 search_opt["replaceVars"] = True
744 command = config.formatter.format(command, config, search_opt)
745 search_opt["replaceVars"] = False
747 # Replace any temporary env place holders.
748 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
750 print(command, file=fh)
751 i += 1
752 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
753 os.chmod(script_file, 0o755)
754 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
756 _, orig_butler = config.search("butlerConfig")
757 # The execution butler was saved as butlerConfig in the workflow.
758 return executable, f"{orig_butler} <FILE:butlerConfig>"
761def add_final_job_as_sink(generic_workflow, final_job):
762 """Add final job as the single sink for the workflow.
764 Parameters
765 ----------
766 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
767 Generic workflow to which attributes should be added.
768 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
769 Job to add as new sink node depending upon all previous sink nodes.
770 """
771 # Find sink nodes of generic workflow graph.
772 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
773 _LOG.debug("gw_sinks = %s", gw_sinks)
775 generic_workflow.add_job(final_job)
776 generic_workflow.add_job_relationships(gw_sinks, final_job.name)