Coverage for python/lsst/ctrl/bps/transform.py: 8%
313 statements
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 03:01 -0700
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 03:01 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import logging
26import math
27import os
28import re
29import dataclasses
30import copy
32from lsst.daf.butler.core.utils import time_this
34from . import (
35 DEFAULT_MEM_RETRIES,
36 BpsConfig,
37 GenericWorkflow,
38 GenericWorkflowJob,
39 GenericWorkflowFile,
40 GenericWorkflowExec,
41)
42from .bps_utils import (
43 save_qg_subgraph,
44 WhenToSaveQuantumGraphs,
45 create_job_quantum_graph_filename,
46 _create_execution_butler
47)
49# All available job attributes.
50_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)])
52# Job attributes that need to be set to their maximal value in the cluster.
53_ATTRS_MAX = frozenset({
54 "memory_multiplier",
55 "number_of_retries",
56 "request_cpus",
57 "request_memory",
58})
60# Job attributes that need to be set to sum of their values in the cluster.
61_ATTRS_SUM = frozenset({
62 "request_disk",
63 "request_walltime",
64})
66# Job attributes do not fall into a specific category
67_ATTRS_MISC = frozenset({
68 "cmdline",
69 "cmdvals",
70 "environment",
71 "pre_cmdline",
72 "post_cmdline",
73 "profile",
74 "attrs",
75})
77# Attributes that need to be the same for each quanta in the cluster.
78_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM))
80_LOG = logging.getLogger(__name__)
83def transform(config, cqgraph, prefix):
84 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
86 Parameters
87 ----------
88 config : `lsst.ctrl.bps.BpsConfig`
89 BPS configuration.
90 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
91 A clustered quantum graph to transform into a generic workflow.
92 prefix : `str`
93 Root path for any output files.
95 Returns
96 -------
97 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
98 The generic workflow transformed from the clustered quantum graph.
99 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
100 Configuration to accompany GenericWorkflow.
101 """
102 _, when_create = config.search(".executionButler.whenCreate")
103 if when_create.upper() == "TRANSFORM":
104 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
105 _LOG.info("Creating execution butler in '%s'", execution_butler_dir)
106 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"):
107 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
109 if cqgraph.name is not None:
110 name = cqgraph.name
111 else:
112 _, name = config.search("uniqProcName", opt={"required": True})
114 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix)
115 generic_workflow_config = create_generic_workflow_config(config, prefix)
117 return generic_workflow, generic_workflow_config
120def add_workflow_init_nodes(config, qgraph, generic_workflow):
121 """Add nodes to workflow graph that perform initialization steps.
123 Assumes that all of the initialization should be executed prior to any
124 of the current workflow.
126 Parameters
127 ----------
128 config : `lsst.ctrl.bps.BpsConfig`
129 BPS configuration.
130 qgraph : `lsst.pipe.base.graph.QuantumGraph`
131 The quantum graph the generic workflow represents.
132 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
133 Generic workflow to which the initialization steps should be added.
134 """
135 # Create a workflow graph that will have task and file nodes necessary for
136 # initializing the pipeline execution
137 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
138 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
139 generic_workflow.add_workflow_source(init_workflow)
142def create_init_workflow(config, qgraph, qgraph_gwfile):
143 """Create workflow for running initialization job(s).
145 Parameters
146 ----------
147 config : `lsst.ctrl.bps.BpsConfig`
148 BPS configuration.
149 qgraph : `lsst.pipe.base.graph.QuantumGraph`
150 The quantum graph the generic workflow represents.
151 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
152 File object for the full run QuantumGraph file.
154 Returns
155 -------
156 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
157 GenericWorkflow consisting of job(s) to initialize workflow.
158 """
159 _LOG.debug("creating init subgraph")
160 _LOG.debug("creating init task input(s)")
161 search_opt = {"curvals": {"curr_pipetask": "pipetaskInit"},
162 "replaceVars": False,
163 "expandEnvVars": False,
164 "replaceEnvVars": True,
165 "required": False}
167 init_workflow = GenericWorkflow("init")
168 init_workflow.add_file(qgraph_gwfile)
170 # create job for executing --init-only
171 gwjob = GenericWorkflowJob("pipetaskInit")
173 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
174 job_values["name"] = "pipetaskInit"
175 job_values["label"] = "pipetaskInit"
177 # Adjust job attributes values if necessary.
178 _handle_job_values(job_values, gwjob)
180 # Pick a node id for each task (not quantum!) to avoid reading the entire
181 # quantum graph during the initialization stage.
182 node_ids = []
183 for task in qgraph.iterTaskGraph():
184 task_def = qgraph.findTaskDefByLabel(task.label)
185 node = next(iter(qgraph.getNodesForTask(task_def)))
186 node_ids.append(node.nodeId)
187 gwjob.cmdvals["qgraphId"] = qgraph.graphID
188 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in node_ids]))
190 init_workflow.add_job(gwjob)
192 # Lookup butler values
193 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
194 _, butler_config = config.search("butlerConfig", opt=search_opt)
195 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
196 prefix = config["submitPath"]
197 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
199 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
200 _enhance_command(config, init_workflow, gwjob, {})
202 return init_workflow
205def _enhance_command(config, generic_workflow, gwjob, cached_job_values):
206 """Enhance command line with env and file placeholders
207 and gather command line values.
209 Parameters
210 ----------
211 config : `lsst.ctrl.bps.BpsConfig`
212 BPS configuration.
213 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
214 Generic workflow that contains the job.
215 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
216 Generic workflow job to which the updated executable, arguments,
217 and values should be saved.
218 cached_job_values : `dict` [`str`, dict[`str`, `Any`]]
219 Cached values common across jobs with same label. Updated if values
220 aren't already saved for given gwjob's label.
221 """
222 _LOG.debug("gwjob given to _enhance_command: %s", gwjob)
224 search_opt = {"curvals": {"curr_pipetask": gwjob.label},
225 "replaceVars": False,
226 "expandEnvVars": False,
227 "replaceEnvVars": True,
228 "required": False}
230 if gwjob.label not in cached_job_values:
231 cached_job_values[gwjob.label] = {}
232 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label.
233 key = "whenSaveJobQgraph"
234 _, when_save = config.search(key, opt=search_opt)
235 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
237 key = "useLazyCommands"
238 search_opt["default"] = True
239 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
240 del search_opt["default"]
242 # Change qgraph variable to match whether using run or per-job qgraph
243 # Note: these are lookup keys, not actual physical filenames.
244 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER:
245 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
246 elif gwjob.name == "pipetaskInit":
247 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
248 else: # Needed unique file keys for per-job QuantumGraphs
249 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
251 # Replace files with special placeholders
252 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
253 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
254 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
255 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
257 # Save dict of other values needed to complete command line.
258 # (Be careful to not replace env variables as they may
259 # be different in compute job.)
260 search_opt["replaceVars"] = True
262 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
263 if key not in gwjob.cmdvals:
264 if key not in cached_job_values[gwjob.label]:
265 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
266 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key]
268 # backwards compatibility
269 if not cached_job_values[gwjob.label]["useLazyCommands"]:
270 if "bpsUseShared" not in cached_job_values[gwjob.label]:
271 key = "bpsUseShared"
272 search_opt["default"] = True
273 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
274 del search_opt["default"]
276 gwjob.arguments = _fill_arguments(cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow,
277 gwjob.arguments, gwjob.cmdvals)
280def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals):
281 """Replace placeholders in command line string in job.
283 Parameters
284 ----------
285 use_shared : `bool`
286 Whether using shared filesystem.
287 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
288 Generic workflow containing the job.
289 arguments : `str`
290 String containing placeholders.
291 cmdvals : `dict` [`str`, `Any`]
292 Any command line values that can be used to replace placeholders.
294 Returns
295 -------
296 arguments : `str`
297 Command line with FILE and ENV placeholders replaced.
298 """
299 # Replace file placeholders
300 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
301 gwfile = generic_workflow.get_file(file_key)
302 if not gwfile.wms_transfer:
303 # Must assume full URI if in command line and told WMS is not
304 # responsible for transferring file.
305 uri = gwfile.src_uri
306 elif use_shared:
307 if gwfile.job_shared:
308 # Have shared filesystems and jobs can share file.
309 uri = gwfile.src_uri
310 else:
311 # Taking advantage of inside knowledge. Not future-proof.
312 # Temporary fix until have job wrapper that pulls files
313 # within job.
314 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml":
315 uri = "butler.yaml"
316 else:
317 uri = os.path.basename(gwfile.src_uri)
318 else: # Using push transfer
319 uri = os.path.basename(gwfile.src_uri)
321 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
323 # Replace env placeholder with submit-side values
324 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
325 arguments = os.path.expandvars(arguments)
327 # Replace remaining vars
328 arguments = arguments.format(**cmdvals)
330 return arguments
333def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir):
334 """Get butler location to be used by job.
336 Parameters
337 ----------
338 prefix : `str`
339 Root path for any output files.
340 when_create : `str`
341 When to create the execution butler used to determine whether job is
342 using execution butler or not.
343 butler_config : `str`
344 Location of central butler repositories config file.
345 execution_butler_dir : `str`
346 Location of execution butler repository.
348 Returns
349 -------
350 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
351 Representation of butler location.
352 """
353 if when_create.upper() == "NEVER":
354 wms_transfer = False
355 job_access_remote = True
356 job_shared = True
357 else:
358 butler_config = execution_butler_dir
359 if not butler_config.startswith("/"):
360 butler_config = f"{prefix}/{butler_config}"
361 wms_transfer = True
362 job_access_remote = False
363 job_shared = False
365 gwfile = GenericWorkflowFile("butlerConfig",
366 src_uri=butler_config,
367 wms_transfer=wms_transfer,
368 job_access_remote=job_access_remote,
369 job_shared=job_shared)
371 return gwfile
374def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix):
375 """Get qgraph location to be used by job.
377 Parameters
378 ----------
379 config : `lsst.ctrl.bps.BpsConfig`
380 Bps configuration.
381 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs`
382 What submission stage to save per-job qgraph files (or NEVER)
383 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
384 Job for which determining QuantumGraph file.
385 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
386 File representation of the full run QuantumGraph.
387 prefix : `str`
388 Path prefix for any files written.
390 Returns
391 -------
392 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
393 Representation of butler location (may not include filename).
394 """
395 qgraph_gwfile = None
396 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER:
397 qgraph_gwfile = GenericWorkflowFile(f"qgraphFile_{gwjob.name}",
398 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
399 wms_transfer=True,
400 job_access_remote=True,
401 job_shared=True)
402 else:
403 qgraph_gwfile = run_qgraph_file
405 return qgraph_gwfile
408def _get_job_values(config, search_opt, cmd_line_key):
409 """Gather generic workflow job values from the bps config.
411 Parameters
412 ----------
413 config : `lsst.ctrl.bps.BpsConfig`
414 Bps configuration.
415 search_opt : `dict` [`str`, `Any`]
416 Search options to be used when searching config.
417 cmd_line_key : `str` or None
418 Which command line key to search for (e.g., "runQuantumCommand").
420 Returns
421 -------
422 job_values : `dict` [ `str`, `Any` ]`
423 A mapping between job attributes and their values.
424 """
425 job_values = {}
426 for attr in _ATTRS_ALL:
427 # Variable names in yaml are camel case instead of snake case.
428 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr)
429 found, value = config.search(yaml_name, opt=search_opt)
430 if found:
431 job_values[attr] = value
432 else:
433 job_values[attr] = None
435 # If the automatic memory scaling is enabled (i.e. the memory multiplier
436 # is set and it is a positive number greater than 1.0), adjust number
437 # of retries when necessary. If the memory multiplier is invalid, disable
438 # automatic memory scaling.
439 if job_values["memory_multiplier"] is not None:
440 if math.ceil(float(job_values["memory_multiplier"])) > 1:
441 if job_values["number_of_retries"] is None:
442 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
443 else:
444 job_values["memory_multiplier"] = None
446 if cmd_line_key:
447 found, cmdline = config.search(cmd_line_key, opt=search_opt)
448 # Make sure cmdline isn't None as that could be sent in as a
449 # default value in search_opt.
450 if found and cmdline:
451 cmd, args = cmdline.split(" ", 1)
452 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False)
453 if args:
454 job_values["arguments"] = args
456 return job_values
459def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL):
460 """Set the job attributes in the cluster to their correct values.
462 Parameters
463 ----------
464 quantum_job_values : `dict` [`str`, Any]
465 Job values for running single Quantum.
466 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
467 Generic workflow job in which to store the universal values.
468 attributes: `Iterable` [`str`], optional
469 Job attributes to be set in the job following different rules.
470 The default value is _ATTRS_ALL.
471 """
472 _LOG.debug("Call to _handle_job_values")
473 _handle_job_values_universal(quantum_job_values, gwjob, attributes)
474 _handle_job_values_max(quantum_job_values, gwjob, attributes)
475 _handle_job_values_sum(quantum_job_values, gwjob, attributes)
478def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL):
479 """Handle job attributes that must have the same value for every quantum
480 in the cluster.
482 Parameters
483 ----------
484 quantum_job_values : `dict` [`str`, Any]
485 Job values for running single Quantum.
486 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
487 Generic workflow job in which to store the universal values.
488 attributes: `Iterable` [`str`], optional
489 Job attributes to be set in the job following different rules.
490 The default value is _ATTRS_UNIVERSAL.
491 """
492 for attr in _ATTRS_UNIVERSAL & set(attributes):
493 _LOG.debug("Handling job %s (job=%s, quantum=%s)", attr, getattr(gwjob, attr),
494 quantum_job_values.get(attr, "MISSING"))
495 current_value = getattr(gwjob, attr)
496 try:
497 quantum_value = quantum_job_values[attr]
498 except KeyError:
499 continue
500 else:
501 if not current_value:
502 setattr(gwjob, attr, quantum_value)
503 elif current_value != quantum_value:
504 _LOG.error("Inconsistent value for %s in Cluster %s Quantum Number %s\n"
505 "Current cluster value: %s\n"
506 "Quantum value: %s",
507 attr, gwjob.name, quantum_job_values.get("qgraphNodeId", "MISSING"), current_value,
508 quantum_value)
509 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.")
512def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX):
513 """Handle job attributes that should be set to their maximum value in
514 the in cluster.
516 Parameters
517 ----------
518 quantum_job_values : `dict` [`str`, `Any`]
519 Job values for running single Quantum.
520 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
521 Generic workflow job in which to store the aggregate values.
522 attributes: `Iterable` [`str`], optional
523 Job attributes to be set in the job following different rules.
524 The default value is _ATTR_MAX.
525 """
526 for attr in _ATTRS_MAX & set(attributes):
527 current_value = getattr(gwjob, attr)
528 try:
529 quantum_value = quantum_job_values[attr]
530 except KeyError:
531 continue
532 else:
533 needs_update = False
534 if current_value is None:
535 if quantum_value is not None:
536 needs_update = True
537 else:
538 if quantum_value is not None and current_value < quantum_value:
539 needs_update = True
540 if needs_update:
541 setattr(gwjob, attr, quantum_value)
543 # When updating memory requirements for a job, check if memory
544 # autoscaling is enabled. If it is, always use the memory
545 # multiplier and the number of retries which comes with the
546 # quantum.
547 #
548 # Note that as a result, the quantum with the biggest memory
549 # requirements will determine whether the memory autoscaling
550 # will be enabled (or disabled) depending on the value of its
551 # memory multiplier.
552 if attr == "request_memory":
553 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
554 if gwjob.memory_multiplier is not None:
555 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
558def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM):
559 """Handle job attributes that are the sum of their values in the cluster.
561 Parameters
562 ----------
563 quantum_job_values : `dict` [`str`, `Any`]
564 Job values for running single Quantum.
565 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
566 Generic workflow job in which to store the aggregate values.
567 attributes: `Iterable` [`str`], optional
568 Job attributes to be set in the job following different rules.
569 The default value is _ATTRS_SUM.
570 """
571 for attr in _ATTRS_SUM & set(attributes):
572 current_value = getattr(gwjob, attr)
573 if not current_value:
574 setattr(gwjob, attr, quantum_job_values[attr])
575 else:
576 setattr(gwjob, attr, current_value + quantum_job_values[attr])
579def create_generic_workflow(config, cqgraph, name, prefix):
580 """Create a generic workflow from a ClusteredQuantumGraph such that it
581 has information needed for WMS (e.g., command lines).
583 Parameters
584 ----------
585 config : `lsst.ctrl.bps.BpsConfig`
586 BPS configuration.
587 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
588 ClusteredQuantumGraph for running a specific pipeline on a specific
589 payload.
590 name : `str`
591 Name for the workflow (typically unique).
592 prefix : `str`
593 Root path for any output files.
595 Returns
596 -------
597 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
598 Generic workflow for the given ClusteredQuantumGraph + config.
599 """
600 # Determine whether saving per-job QuantumGraph files in the loop.
601 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
602 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()]
604 search_opt = {"replaceVars": False,
605 "expandEnvVars": False,
606 "replaceEnvVars": True,
607 "required": False}
609 # Lookup butler values once
610 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
611 _, butler_config = config.search("butlerConfig", opt=search_opt)
612 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
614 generic_workflow = GenericWorkflow(name)
616 # Save full run QuantumGraph for use by jobs
617 generic_workflow.add_file(GenericWorkflowFile("runQgraphFile",
618 src_uri=config["runQgraphFile"],
619 wms_transfer=True,
620 job_access_remote=True,
621 job_shared=True))
623 # Cache pipetask specific or more generic job values to minimize number
624 # on config searches.
625 cached_job_values = {}
626 cached_pipetask_values = {}
628 for cluster in cqgraph.clusters():
629 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster))
630 _LOG.debug("cqgraph: name=%s, len=%s, label=%s, ids=%s", cluster.name,
631 len(cluster.qgraph_node_ids), cluster.label, cluster.qgraph_node_ids)
633 gwjob = GenericWorkflowJob(cluster.name)
635 # First get job values from cluster or cluster config
636 search_opt["curvals"] = {"curr_cluster": cluster.label}
638 # If some config values are set for this cluster
639 if cluster.label not in cached_job_values:
640 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label])
641 cached_job_values[cluster.label] = {}
643 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label.
644 key = "whenSaveJobQgraph"
645 _, when_save = config.search(key, opt=search_opt)
646 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
648 key = "useLazyCommands"
649 search_opt["default"] = True
650 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt)
651 del search_opt["default"]
653 if cluster.label in config["cluster"]:
654 # Don't want to get global defaults here so only look in
655 # cluster section.
656 cached_job_values[cluster.label].update(_get_job_values(config["cluster"][cluster.label],
657 search_opt, "runQuantumCommand"))
658 cluster_job_values = copy.copy(cached_job_values[cluster.label])
660 cluster_job_values['name'] = cluster.name
661 cluster_job_values['label'] = cluster.label
662 cluster_job_values['quanta_counts'] = cluster.quanta_counts
663 cluster_job_values['tags'] = cluster.tags
664 _LOG.debug("cluster_job_values = %s", cluster_job_values)
665 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys())
667 # For purposes of whether to continue searching for a value is whether
668 # the value evaluates to False.
669 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)}
671 _LOG.debug("unset_attributes=%s", unset_attributes)
672 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes)
674 # For job info not defined at cluster level, attempt to get job info
675 # either common or aggregate for all Quanta in cluster.
676 for node_id in iter(cluster.qgraph_node_ids):
677 _LOG.debug("node_id=%s", node_id)
678 qnode = cqgraph.get_quantum_node(node_id)
680 if qnode.taskDef.label not in cached_pipetask_values:
681 search_opt['curvals'] = {"curr_pipetask": qnode.taskDef.label}
682 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(config, search_opt,
683 "runQuantumCommand")
685 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes)
687 # Update job with workflow attribute and profile values.
688 qgraph_gwfile = _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob,
689 generic_workflow.get_file("runQgraphFile"), prefix)
690 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
692 generic_workflow.add_job(gwjob)
693 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
695 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID
696 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id.number}" for node_id in
697 cluster.qgraph_node_ids]))
698 _enhance_command(config, generic_workflow, gwjob, cached_job_values)
700 # If writing per-job QuantumGraph files during TRANSFORM stage,
701 # write it now while in memory.
702 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM:
703 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids)
705 # Create job dependencies.
706 for parent in cqgraph.clusters():
707 for child in cqgraph.successors(parent):
708 generic_workflow.add_job_relationships(parent.name, child.name)
710 # Add initial workflow.
711 if config.get("runInit", "{default: False}"):
712 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow)
714 generic_workflow.run_attrs.update({"bps_isjob": "True",
715 "bps_project": config["project"],
716 "bps_campaign": config["campaign"],
717 "bps_run": generic_workflow.name,
718 "bps_operator": config["operator"],
719 "bps_payload": config["payloadName"],
720 "bps_runsite": config["computeSite"]})
722 # Add final job
723 add_final_job(config, generic_workflow, prefix)
725 return generic_workflow
728def create_generic_workflow_config(config, prefix):
729 """Create generic workflow configuration.
731 Parameters
732 ----------
733 config : `lsst.ctrl.bps.BpsConfig`
734 Bps configuration.
735 prefix : `str`
736 Root path for any output files.
738 Returns
739 -------
740 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
741 Configuration accompanying the GenericWorkflow.
742 """
743 generic_workflow_config = BpsConfig(config)
744 generic_workflow_config["workflowName"] = config["uniqProcName"]
745 generic_workflow_config["workflowPath"] = prefix
746 return generic_workflow_config
749def add_final_job(config, generic_workflow, prefix):
750 """Add final workflow job depending upon configuration.
752 Parameters
753 ----------
754 config : `lsst.ctrl.bps.BpsConfig`
755 Bps configuration.
756 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
757 Generic workflow to which attributes should be added.
758 prefix : `str`
759 Directory in which to output final script.
760 """
761 _, when_create = config.search(".executionButler.whenCreate")
762 _, when_merge = config.search(".executionButler.whenMerge")
764 search_opt = {"searchobj": config[".executionButler"], "default": None}
765 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
766 # create gwjob
767 gwjob = GenericWorkflowJob("mergeExecutionButler")
768 gwjob.label = "mergeExecutionButler"
770 job_values = _get_job_values(config, search_opt, None)
771 for attr in _ATTRS_ALL:
772 if not getattr(gwjob, attr) and job_values.get(attr, None):
773 setattr(gwjob, attr, job_values[attr])
775 # Create script and add command line to job.
776 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
778 # Determine inputs from command line.
779 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
780 gwfile = generic_workflow.get_file(file_key)
781 generic_workflow.add_job_inputs(gwjob.name, gwfile)
783 _enhance_command(config, generic_workflow, gwjob, {})
785 # Put transfer repo job in appropriate location in workflow.
786 if when_merge.upper() == "ALWAYS":
787 # add as special final job
788 generic_workflow.add_final(gwjob)
789 elif when_merge.upper() == "SUCCESS":
790 # add as regular sink node
791 add_final_job_as_sink(generic_workflow, gwjob)
792 else:
793 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
796def _create_final_command(config, prefix):
797 """Create the command and shell script for the final job.
799 Parameters
800 ----------
801 config : `lsst.ctrl.bps.BpsConfig`
802 Bps configuration.
803 prefix : `str`
804 Directory in which to output final script.
806 Returns
807 -------
808 executable : `lsst.ctrl.bps.GenericWorkflowExec`
809 Executable object for the final script.
810 arguments : `str`
811 Command line needed to call the final script.
812 """
813 search_opt = {'replaceVars': False, 'replaceEnvVars': False, 'expandEnvVars': False,
814 'searchobj': config['executionButler']}
816 script_file = os.path.join(prefix, "final_job.bash")
817 with open(script_file, "w") as fh:
818 print("#!/bin/bash\n", file=fh)
819 print("set -e", file=fh)
820 print("set -x", file=fh)
822 print("butlerConfig=$1", file=fh)
823 print("executionButlerDir=$2", file=fh)
825 i = 1
826 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
827 while found:
828 # Temporarily replace any env vars so formatter doesn't try to
829 # replace them.
830 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
832 # executionButlerDir and butlerConfig will be args to script and
833 # set to env vars
834 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
835 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
837 # Replace all other vars in command string
838 search_opt["replaceVars"] = True
839 command = config.formatter.format(command, config, search_opt)
840 search_opt["replaceVars"] = False
842 # Replace any temporary env place holders.
843 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
845 print(command, file=fh)
846 i += 1
847 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
848 os.chmod(script_file, 0o755)
849 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
851 _, orig_butler = config.search("butlerConfig")
852 # The execution butler was saved as butlerConfig in the workflow.
853 return executable, f"{orig_butler} <FILE:butlerConfig>"
856def add_final_job_as_sink(generic_workflow, final_job):
857 """Add final job as the single sink for the workflow.
859 Parameters
860 ----------
861 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
862 Generic workflow to which attributes should be added.
863 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
864 Job to add as new sink node depending upon all previous sink nodes.
865 """
866 # Find sink nodes of generic workflow graph.
867 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
868 _LOG.debug("gw_sinks = %s", gw_sinks)
870 generic_workflow.add_job(final_job)
871 generic_workflow.add_job_relationships(gw_sinks, final_job.name)