Coverage for python/lsst/ctrl/bps/transform.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import math
27import os
28import re
29import dataclasses
31from lsst.utils.timer import time_this
33from . import (
34 DEFAULT_MEM_RETRIES,
35 BpsConfig,
36 GenericWorkflow,
37 GenericWorkflowJob,
38 GenericWorkflowFile,
39 GenericWorkflowExec,
40)
41from .bps_utils import (
42 save_qg_subgraph,
43 WhenToSaveQuantumGraphs,
44 create_job_quantum_graph_filename,
45 _create_execution_butler
46)
48# All available job attributes.
49_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)])
51# Job attributes that need to be set to their maximal value in the cluster.
52_ATTRS_MAX = frozenset({
53 "memory_multiplier",
54 "number_of_retries",
55 "request_cpus",
56 "request_memory",
57})
59# Job attributes that need to be set to sum of their values in the cluster.
60_ATTRS_SUM = frozenset({
61 "request_disk",
62 "request_walltime",
63})
65# Job attributes do not fall into a specific category
66_ATTRS_MISC = frozenset({
67 "cmdline",
68 "cmdvals",
69 "environment",
70 "pre_cmdline",
71 "post_cmdline",
72 "profile",
73 "attrs",
74})
76# Attributes that need to be the same for each quanta in the cluster.
77_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM))
79_LOG = logging.getLogger(__name__)
82def transform(config, cqgraph, prefix):
83 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
85 Parameters
86 ----------
87 config : `lsst.ctrl.bps.BpsConfig`
88 BPS configuration.
89 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
90 A clustered quantum graph to transform into a generic workflow.
91 prefix : `str`
92 Root path for any output files.
94 Returns
95 -------
96 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
97 The generic workflow transformed from the clustered quantum graph.
98 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
99 Configuration to accompany GenericWorkflow.
100 """
101 _, when_create = config.search(".executionButler.whenCreate")
102 if when_create.upper() == "TRANSFORM":
103 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
104 _LOG.info("Creating execution butler in '%s'", execution_butler_dir)
105 with time_this(log=_LOG, level=logging.LEVEL, prefix=None, msg="Creating execution butler completed"):
106 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
108 if cqgraph.name is not None:
109 name = cqgraph.name
110 else:
111 _, name = config.search("uniqProcName", opt={"required": True})
113 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix)
114 generic_workflow_config = create_generic_workflow_config(config, prefix)
116 return generic_workflow, generic_workflow_config
119def update_job(config, job):
120 """Update given job with workflow attribute and profile values.
122 Parameters
123 ----------
124 config : `lsst.ctrl.bps.BpsConfig`
125 BPS configuration.
126 job : `lsst.ctrl.bps.GenericWorkflowJob`
127 Job to which the attributes and profile values should be added.
128 """
129 key = f".site.{job.compute_site}.profile.condor"
131 if key in config:
132 for key, val in config[key].items():
133 if key.startswith("+"):
134 job.attrs[key[1:]] = val
135 else:
136 job.profile[key] = val
139def add_workflow_init_nodes(config, qgraph, generic_workflow):
140 """Add nodes to workflow graph that perform initialization steps.
142 Assumes that all of the initialization should be executed prior to any
143 of the current workflow.
145 Parameters
146 ----------
147 config : `lsst.ctrl.bps.BpsConfig`
148 BPS configuration.
149 qgraph : `lsst.pipe.base.graph.QuantumGraph`
150 The quantum graph the generic workflow represents.
151 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
152 Generic workflow to which the initialization steps should be added.
153 """
154 # Create a workflow graph that will have task and file nodes necessary for
155 # initializing the pipeline execution
156 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
157 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
158 generic_workflow.add_workflow_source(init_workflow)
161def create_init_workflow(config, qgraph, qgraph_gwfile):
162 """Create workflow for running initialization job(s).
164 Parameters
165 ----------
166 config : `lsst.ctrl.bps.BpsConfig`
167 BPS configuration.
168 qgraph : `lsst.pipe.base.graph.QuantumGraph`
169 The quantum graph the generic workflow represents.
170 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
171 File object for the full run QuantumGraph file.
173 Returns
174 -------
175 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
176 GenericWorkflow consisting of job(s) to initialize workflow.
177 """
178 _LOG.debug("creating init subgraph")
179 _LOG.debug("creating init task input(s)")
180 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
181 "replaceVars": False,
182 "expandEnvVars": False,
183 "replaceEnvVars": True,
184 "required": False}
186 init_workflow = GenericWorkflow("init")
187 init_workflow.add_file(qgraph_gwfile)
189 # create job for executing --init-only
190 gwjob = GenericWorkflowJob("pipetaskInit")
192 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
193 job_values["name"] = "pipetaskInit"
194 job_values["label"] = "pipetaskInit"
196 # Adjust job attributes values if necessary.
197 _handle_job_values(job_values, gwjob)
199 # Pick a node id for each task (not quantum!) to avoid reading the entire
200 # quantum graph during the initialization stage.
201 node_ids = []
202 for task in qgraph.iterTaskGraph():
203 task_def = qgraph.findTaskDefByLabel(task.label)
204 node = next(iter(qgraph.getNodesForTask(task_def)))
205 node_ids.append(node.nodeId)
206 gwjob.cmdvals["qgraphId"] = qgraph.graphID
207 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids]))
209 # Update job with workflow attribute and profile values.
210 update_job(config, gwjob)
212 init_workflow.add_job(gwjob)
213 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
214 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
215 _enhance_command(config, init_workflow, gwjob)
217 return init_workflow
220def _enhance_command(config, generic_workflow, gwjob):
221 """Enhance command line with env and file placeholders
222 and gather command line values.
224 Parameters
225 ----------
226 config : `lsst.ctrl.bps.BpsConfig`
227 BPS configuration.
228 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
229 Generic workflow that contains the job.
230 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
231 Generic workflow job to which the updated executable, arguments,
232 and values should be saved.
233 """
234 _LOG.debug("gwjob given to _enhance_command: %s", gwjob)
236 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
237 "replaceVars": False,
238 "expandEnvVars": False,
239 "replaceEnvVars": True,
240 "required": False}
242 # Change qgraph variable to match whether using run or per-job qgraph
243 # Note: these are lookup keys, not actual physical filenames.
244 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
245 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
246 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
247 elif gwjob.name == "pipetaskInit":
248 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
249 else: # Needed unique file keys for per-job QuantumGraphs
250 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
252 # Replace files with special placeholders
253 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
254 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
255 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
256 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
258 # Save dict of other values needed to complete command line.
259 # (Be careful to not replace env variables as they may
260 # be different in compute job.)
261 search_opt["replaceVars"] = True
263 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
264 if key not in gwjob.cmdvals:
265 _, gwjob.cmdvals[key] = config.search(key, opt=search_opt)
267 # backwards compatibility
268 _, use_lazy_commands = config.search("useLazyCommands", opt={"default": True})
269 if not use_lazy_commands:
270 gwjob.arguments = _fill_arguments(config, generic_workflow, gwjob.arguments, gwjob.cmdvals)
273def _fill_arguments(config, generic_workflow, arguments, cmdvals):
274 """Replace placeholders in command line string in job.
276 Parameters
277 ----------
278 config : `lsst.ctrl.bps.BpsConfig`
279 Bps configuration.
280 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
281 Generic workflow containing the job.
282 arguments : `str`
283 String containing placeholders.
284 cmdvals : `dict` [`str`, `Any`]
285 Any command line values that can be used to replace placeholders.
287 Returns
288 -------
289 arguments : `str`
290 Command line with FILE and ENV placeholders replaced.
291 """
292 # Replace file placeholders
293 _, use_shared = config.search("bpsUseShared", opt={"default": False})
294 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
295 gwfile = generic_workflow.get_file(file_key)
296 if gwfile.wms_transfer and not use_shared or not gwfile.job_shared:
297 uri = os.path.basename(gwfile.src_uri)
298 else:
299 uri = gwfile.src_uri
300 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
302 # Replace env placeholder with submit-side values
303 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
304 arguments = os.path.expandvars(arguments)
306 # Replace remaining vars
307 arguments = arguments.format(**cmdvals)
309 return arguments
312def _get_butler_gwfile(config, prefix):
313 """Get butler location to be used by job.
315 Parameters
316 ----------
317 config : `lsst.ctrl.bps.BpsConfig`
318 Bps configuration.
319 prefix : `str`
320 Root path for any output files.
322 Returns
323 -------
324 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
325 Representation of butler location.
326 """
327 _, when_create = config.search(".executionButler.whenCreate")
328 if when_create.upper() == "NEVER":
329 _, butler_config = config.search("butlerConfig")
330 wms_transfer = False
331 job_access_remote = True
332 job_shared = True
333 else:
334 _, butler_config = config.search(".bps_defined.executionButlerDir")
335 butler_config = os.path.join(prefix, butler_config)
336 wms_transfer = True
337 job_access_remote = False
338 job_shared = False
340 gwfile = GenericWorkflowFile("butlerConfig",
341 src_uri=butler_config,
342 wms_transfer=wms_transfer,
343 job_access_remote=job_access_remote,
344 job_shared=job_shared)
346 return gwfile
349def _get_qgraph_gwfile(config, gwjob, run_qgraph_file, prefix):
350 """Get qgraph location to be used by job.
352 Parameters
353 ----------
354 config : `lsst.ctrl.bps.BpsConfig`
355 Bps configuration.
356 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
357 Job for which determining QuantumGraph file.
358 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
359 File representation of the full run QuantumGraph.
360 prefix : `str`
361 Path prefix for any files written.
363 Returns
364 -------
365 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
366 Representation of butler location (may not include filename).
367 """
368 per_job_qgraph_file = True
369 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
370 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.NEVER:
371 per_job_qgraph_file = False
373 qgraph_gwfile = None
374 if per_job_qgraph_file:
375 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
376 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
377 wms_transfer=True,
378 job_access_remote=True,
379 job_shared=True)
380 else:
381 qgraph_gwfile = run_qgraph_file
383 return qgraph_gwfile
386def _get_job_values(config, search_opt, cmd_line_key):
387 """Gather generic workflow job values from the bps config.
389 Parameters
390 ----------
391 config : `lsst.ctrl.bps.BpsConfig`
392 Bps configuration.
393 search_opt : `dict` [`str`, `Any`]
394 Search options to be used when searching config.
395 cmd_line_key : `str` or None
396 Which command line key to search for (e.g., "runQuantumCommand").
398 Returns
399 -------
400 job_values : `dict` [ `str`, `Any` ]`
401 A mapping between job attributes and their values.
402 """
403 job_values = {}
404 for attr in _ATTRS_ALL:
405 found, value = config.search(attr, opt=search_opt)
406 if found:
407 job_values[attr] = value
408 else:
409 job_values[attr] = None
411 # If the automatic memory scaling is enabled (i.e. the memory multiplier
412 # is set and it is a positive number greater than 1.0), adjust number
413 # of retries when necessary. If the memory multiplier is invalid, disable
414 # automatic memory scaling.
415 if job_values["memory_multiplier"] is not None:
416 if math.ceil(float(job_values["memory_multiplier"])) > 1:
417 if job_values["number_of_retries"] is None:
418 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
419 else:
420 job_values["memory_multiplier"] = None
422 if cmd_line_key:
423 found, cmdline = config.search(cmd_line_key, opt=search_opt)
424 # Make sure cmdline isn't None as that could be sent in as a
425 # default value in search_opt.
426 if found and cmdline:
427 cmd, args = cmdline.split(" ", 1)
428 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False)
429 if args:
430 job_values["arguments"] = args
432 return job_values
435def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL):
436 """Set the job attributes in the cluster to their correct values.
438 Parameters
439 ----------
440 quantum_job_values : `dict` [`str`, Any]
441 Job values for running single Quantum.
442 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
443 Generic workflow job in which to store the universal values.
444 attributes: `Iterable` [`str`], optional
445 Job attributes to be set in the job following different rules.
446 The default value is _ATTRS_ALL.
447 """
448 _LOG.debug("Call to _handle_job_values")
449 _handle_job_values_universal(quantum_job_values, gwjob, attributes)
450 _handle_job_values_max(quantum_job_values, gwjob, attributes)
451 _handle_job_values_sum(quantum_job_values, gwjob, attributes)
454def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL):
455 """Handle job attributes that must have the same value for every quantum
456 in the cluster.
458 Parameters
459 ----------
460 quantum_job_values : `dict` [`str`, Any]
461 Job values for running single Quantum.
462 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
463 Generic workflow job in which to store the universal values.
464 attributes: `Iterable` [`str`], optional
465 Job attributes to be set in the job following different rules.
466 The default value is _ATTRS_UNIVERSAL.
467 """
468 for attr in _ATTRS_UNIVERSAL & set(attributes):
469 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr),
470 quantum_job_values.get(attr, "MISSING"))
471 current_value = getattr(gwjob, attr)
472 try:
473 quantum_value = quantum_job_values[attr]
474 except KeyError:
475 continue
476 else:
477 if not current_value:
478 setattr(gwjob, attr, quantum_value)
479 elif current_value != quantum_value:
480 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n"
481 "Current cluster value: %s\n"
482 "Quantum value: %s",
483 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
484 quantum_value)
485 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.")
488def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX):
489 """Handle job attributes that should be set to their maximum value in
490 the in cluster.
492 Parameters
493 ----------
494 quantum_job_values : `dict` [`str`, `Any`]
495 Job values for running single Quantum.
496 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
497 Generic workflow job in which to store the aggregate values.
498 attributes: `Iterable` [`str`], optional
499 Job attributes to be set in the job following different rules.
500 The default value is _ATTR_MAX.
501 """
502 for attr in _ATTRS_MAX & set(attributes):
503 current_value = getattr(gwjob, attr)
504 try:
505 quantum_value = quantum_job_values[attr]
506 except KeyError:
507 continue
508 else:
509 needs_update = False
510 if current_value is None:
511 if quantum_value is not None:
512 needs_update = True
513 else:
514 if quantum_value is not None and current_value < quantum_value:
515 needs_update = True
516 if needs_update:
517 setattr(gwjob, attr, quantum_value)
519 # When updating memory requirements for a job, check if memory
520 # autoscaling is enabled. If it is, always use the memory
521 # multiplier and the number of retries which comes with the
522 # quantum.
523 #
524 # Note that as a result, the quantum with the biggest memory
525 # requirements will determine whether the memory autoscaling
526 # will be enabled (or disabled) depending on the value of its
527 # memory multiplier.
528 if attr == "request_memory":
529 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
530 if gwjob.memory_multiplier is not None:
531 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
534def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM):
535 """Handle job attributes that are the sum of their values in the cluster.
537 Parameters
538 ----------
539 quantum_job_values : `dict` [`str`, `Any`]
540 Job values for running single Quantum.
541 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
542 Generic workflow job in which to store the aggregate values.
543 attributes: `Iterable` [`str`], optional
544 Job attributes to be set in the job following different rules.
545 The default value is _ATTRS_SUM.
546 """
547 for attr in _ATTRS_SUM & set(attributes):
548 current_value = getattr(gwjob, attr)
549 if not current_value:
550 setattr(gwjob, attr, quantum_job_values[attr])
551 else:
552 setattr(gwjob, attr, current_value + quantum_job_values[attr])
555def create_generic_workflow(config, cqgraph, name, prefix):
556 """Create a generic workflow from a ClusteredQuantumGraph such that it
557 has information needed for WMS (e.g., command lines).
559 Parameters
560 ----------
561 config : `lsst.ctrl.bps.BpsConfig`
562 BPS configuration.
563 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
564 ClusteredQuantumGraph for running a specific pipeline on a specific
565 payload.
566 name : `str`
567 Name for the workflow (typically unique).
568 prefix : `str`
569 Root path for any output files.
571 Returns
572 -------
573 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
574 Generic workflow for the given ClusteredQuantumGraph + config.
575 """
576 # Determine whether saving per-job QuantumGraph files in the loop.
577 save_per_job_qgraph = False
578 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
579 if WhenToSaveQuantumGraphs[when_save.upper()] == WhenToSaveQuantumGraphs.TRANSFORM:
580 save_per_job_qgraph = True
582 generic_workflow = GenericWorkflow(name)
584 # Save full run QuantumGraph for use by jobs
585 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
586 src_uri=config["runQgraphFile"],
587 wms_transfer=True,
588 job_access_remote=True,
589 job_shared=True))
591 for cluster in cqgraph.clusters():
592 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster))
593 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name,
594 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids)
596 gwjob = GenericWorkflowJob(cluster.name)
598 # First get job values from cluster or cluster config
599 search_opt = {"curvals": {},
600 "replaceVars": False,
601 "expandEnvVars": False,
602 "replaceEnvVars": True,
603 "required": False}
605 # If some config values are set for this cluster
606 if cluster.label in config["cluster"]:
607 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label])
608 cluster_job_values = _get_job_values(config["cluster"][cluster.label], search_opt,
609 "runQuantumCommand")
610 else:
611 cluster_job_values = {}
613 cluster_job_values['name'] = cluster.name
614 cluster_job_values['label'] = cluster.label
615 cluster_job_values['quanta_counts'] = cluster.quanta_counts
616 cluster_job_values['tags'] = cluster.tags
617 _LOG.debug("cluster_job_values = %s", cluster_job_values)
618 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys())
620 # For purposes of whether to continue searching for a value is whether
621 # the value evaluates to False.
622 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)}
624 _LOG.debug("unset_attributes=%s", unset_attributes)
625 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes)
627 # For job info not defined at cluster level, attempt to get job info
628 # either common or aggregate for all Quanta in cluster.
629 for node_id in iter(cluster.qgraph_node_ids):
630 _LOG.debug("node_id=%s", node_id)
631 qnode = cqgraph.get_quantum_node(node_id)
632 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label}
633 quantum_job_values = _get_job_values(config, search_opt, "runQuantumCommand")
634 _handle_job_values(quantum_job_values, gwjob, unset_attributes)
636 # Update job with workflow attribute and profile values.
637 update_job(config, gwjob)
638 qgraph_gwfile = _get_qgraph_gwfile(config, gwjob, generic_workflow.get_file("runQgraphFile"),
639 config["submitPath"])
640 butler_gwfile = _get_butler_gwfile(config, config["submitPath"])
642 generic_workflow.add_job(gwjob)
643 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
645 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID
646 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
647 cluster.qgraph_node_ids]))
648 _enhance_command(config, generic_workflow, gwjob)
650 # If writing per-job QuantumGraph files during TRANSFORM stage,
651 # write it now while in memory.
652 if save_per_job_qgraph:
653 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids)
655 # Create job dependencies.
656 for parent in cqgraph.clusters():
657 for child in cqgraph.successors(parent):
658 generic_workflow.add_job_relationships(parent.name, child.name)
660 # Add initial workflow.
661 if config.get("runInit", "{default: False}"):
662 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow)
664 generic_workflow.run_attrs.update({"bps_isjob": "True",
665 "bps_project": config["project"],
666 "bps_campaign": config["campaign"],
667 "bps_run": generic_workflow.name,
668 "bps_operator": config["operator"],
669 "bps_payload": config["payloadName"],
670 "bps_runsite": config["computeSite"]})
672 # Add final job
673 add_final_job(config, generic_workflow, prefix)
675 return generic_workflow
678def create_generic_workflow_config(config, prefix):
679 """Create generic workflow configuration.
681 Parameters
682 ----------
683 config : `lsst.ctrl.bps.BpsConfig`
684 Bps configuration.
685 prefix : `str`
686 Root path for any output files.
688 Returns
689 -------
690 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
691 Configuration accompanying the GenericWorkflow.
692 """
693 generic_workflow_config = BpsConfig(config)
694 generic_workflow_config["workflowName"] = config["uniqProcName"]
695 generic_workflow_config["workflowPath"] = prefix
696 return generic_workflow_config
699def add_final_job(config, generic_workflow, prefix):
700 """Add final workflow job depending upon configuration.
702 Parameters
703 ----------
704 config : `lsst.ctrl.bps.BpsConfig`
705 Bps configuration.
706 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
707 Generic workflow to which attributes should be added.
708 prefix : `str`
709 Directory in which to output final script.
710 """
711 _, when_create = config.search(".executionButler.whenCreate")
712 _, when_merge = config.search(".executionButler.whenMerge")
714 search_opt = {"searchobj": config[".executionButler"], "default": None}
715 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
716 # create gwjob
717 gwjob = GenericWorkflowJob("mergeExecutionButler")
718 gwjob.label = "mergeExecutionButler"
720 job_values = _get_job_values(config, search_opt, None)
721 for attr in _ATTRS_ALL:
722 if not getattr(gwjob, attr) and job_values.get(attr, None):
723 setattr(gwjob, attr, job_values[attr])
725 update_job(config, gwjob)
727 # Create script and add command line to job.
728 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
730 # Determine inputs from command line.
731 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
732 gwfile = generic_workflow.get_file(file_key)
733 generic_workflow.add_job_inputs(gwjob.name, gwfile)
735 _enhance_command(config, generic_workflow, gwjob)
737 # Put transfer repo job in appropriate location in workflow.
738 if when_merge.upper() == "ALWAYS":
739 # add as special final job
740 generic_workflow.add_final(gwjob)
741 elif when_merge.upper() == "SUCCESS":
742 # add as regular sink node
743 add_final_job_as_sink(generic_workflow, gwjob)
744 else:
745 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
748def _create_final_command(config, prefix):
749 """Create the command and shell script for the final job.
751 Parameters
752 ----------
753 config : `lsst.ctrl.bps.BpsConfig`
754 Bps configuration.
755 prefix : `str`
756 Directory in which to output final script.
758 Returns
759 -------
760 executable : `lsst.ctrl.bps.GenericWorkflowExec`
761 Executable object for the final script.
762 arguments : `str`
763 Command line needed to call the final script.
764 """
765 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False,
766 'searchobj': config['executionButler']}
768 script_file = os.path.join(prefix, "final_job.bash")
769 with open(script_file, "w") as fh:
770 print("#!/bin/bash\n", file=fh)
771 print("set -e", file=fh)
772 print("set -x", file=fh)
774 print("butlerConfig=$1", file=fh)
775 print("executionButlerDir=$2", file=fh)
777 i = 1
778 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
779 while found:
780 # Temporarily replace any env vars so formatter doesn't try to
781 # replace them.
782 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
784 # executionButlerDir and butlerConfig will be args to script and
785 # set to env vars
786 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
787 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
789 # Replace all other vars in command string
790 search_opt["replaceVars"] = True
791 command = config.formatter.format(command, config, search_opt)
792 search_opt["replaceVars"] = False
794 # Replace any temporary env place holders.
795 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
797 print(command, file=fh)
798 i += 1
799 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
800 os.chmod(script_file, 0o755)
801 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
803 _, orig_butler = config.search("butlerConfig")
804 # The execution butler was saved as butlerConfig in the workflow.
805 return executable, f"{orig_butler} <FILE:butlerConfig>"
808def add_final_job_as_sink(generic_workflow, final_job):
809 """Add final job as the single sink for the workflow.
811 Parameters
812 ----------
813 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
814 Generic workflow to which attributes should be added.
815 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
816 Job to add as new sink node depending upon all previous sink nodes.
817 """
818 # Find sink nodes of generic workflow graph.
819 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
820 _LOG.debug("gw_sinks = %s", gw_sinks)
822 generic_workflow.add_job(final_job)
823 generic_workflow.add_job_relationships(gw_sinks, final_job.name)