Coverage for python/lsst/ctrl/bps/transform.py: 8%
334 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-08 00:59 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-08 00:59 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Driver for the transformation of a QuantumGraph into a generic workflow.
23"""
25import copy
26import dataclasses
27import logging
28import math
29import os
30import re
32from lsst.utils.logging import VERBOSE
33from lsst.utils.timer import time_this, timeMethod
35from . import (
36 DEFAULT_MEM_RETRIES,
37 BpsConfig,
38 GenericWorkflow,
39 GenericWorkflowExec,
40 GenericWorkflowFile,
41 GenericWorkflowJob,
42)
43from .bps_utils import (
44 WhenToSaveQuantumGraphs,
45 _create_execution_butler,
46 create_job_quantum_graph_filename,
47 save_qg_subgraph,
48)
50# All available job attributes.
51_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)])
53# Job attributes that need to be set to their maximal value in the cluster.
54_ATTRS_MAX = frozenset(
55 {
56 "memory_multiplier",
57 "number_of_retries",
58 "request_cpus",
59 "request_memory",
60 "request_memory_max",
61 }
62)
64# Job attributes that need to be set to sum of their values in the cluster.
65_ATTRS_SUM = frozenset(
66 {
67 "request_disk",
68 "request_walltime",
69 }
70)
72# Job attributes do not fall into a specific category
73_ATTRS_MISC = frozenset(
74 {
75 "cmdline",
76 "cmdvals",
77 "environment",
78 "pre_cmdline",
79 "post_cmdline",
80 "profile",
81 "attrs",
82 }
83)
85# Attributes that need to be the same for each quanta in the cluster.
86_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM))
88_LOG = logging.getLogger(__name__)
91@timeMethod(logger=_LOG, logLevel=VERBOSE)
92def transform(config, cqgraph, prefix):
93 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
95 Parameters
96 ----------
97 config : `lsst.ctrl.bps.BpsConfig`
98 BPS configuration.
99 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
100 A clustered quantum graph to transform into a generic workflow.
101 prefix : `str`
102 Root path for any output files.
104 Returns
105 -------
106 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
107 The generic workflow transformed from the clustered quantum graph.
108 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
109 Configuration to accompany GenericWorkflow.
110 """
111 _, when_create = config.search(".executionButler.whenCreate")
112 if when_create.upper() == "TRANSFORM":
113 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
114 _LOG.info("Creating execution butler in '%s'", execution_butler_dir)
115 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"):
116 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
118 if cqgraph.name is not None:
119 name = cqgraph.name
120 else:
121 _, name = config.search("uniqProcName", opt={"required": True})
123 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix)
124 generic_workflow_config = create_generic_workflow_config(config, prefix)
126 return generic_workflow, generic_workflow_config
129def add_workflow_init_nodes(config, qgraph, generic_workflow):
130 """Add nodes to workflow graph that perform initialization steps.
132 Assumes that all of the initialization should be executed prior to any
133 of the current workflow.
135 Parameters
136 ----------
137 config : `lsst.ctrl.bps.BpsConfig`
138 BPS configuration.
139 qgraph : `lsst.pipe.base.graph.QuantumGraph`
140 The quantum graph the generic workflow represents.
141 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
142 Generic workflow to which the initialization steps should be added.
143 """
144 # Create a workflow graph that will have task and file nodes necessary for
145 # initializing the pipeline execution
146 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
147 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
148 generic_workflow.add_workflow_source(init_workflow)
151def create_init_workflow(config, qgraph, qgraph_gwfile):
152 """Create workflow for running initialization job(s).
154 Parameters
155 ----------
156 config : `lsst.ctrl.bps.BpsConfig`
157 BPS configuration.
158 qgraph : `lsst.pipe.base.graph.QuantumGraph`
159 The quantum graph the generic workflow represents.
160 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
161 File object for the full run QuantumGraph file.
163 Returns
164 -------
165 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
166 GenericWorkflow consisting of job(s) to initialize workflow.
167 """
168 _LOG.debug("creating init subgraph")
169 _LOG.debug("creating init task input(s)")
170 search_opt = {
171 "curvals": {"curr_pipetask": "pipetaskInit"},
172 "replaceVars": False,
173 "expandEnvVars": False,
174 "replaceEnvVars": True,
175 "required": False,
176 }
177 found, value = config.search("computeSite", opt=search_opt)
178 if found:
179 search_opt["curvals"]["curr_site"] = value
180 found, value = config.search("computeCloud", opt=search_opt)
181 if found:
182 search_opt["curvals"]["curr_cloud"] = value
184 init_workflow = GenericWorkflow("init")
185 init_workflow.add_file(qgraph_gwfile)
187 # create job for executing --init-only
188 gwjob = GenericWorkflowJob("pipetaskInit")
190 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
191 job_values["name"] = "pipetaskInit"
192 job_values["label"] = "pipetaskInit"
194 # Adjust job attributes values if necessary.
195 _handle_job_values(job_values, gwjob)
197 # Pick a node id for each task (not quantum!) to avoid reading the entire
198 # quantum graph during the initialization stage.
199 node_ids = []
200 for task in qgraph.iterTaskGraph():
201 task_def = qgraph.findTaskDefByLabel(task.label)
202 node = next(iter(qgraph.getNodesForTask(task_def)))
203 node_ids.append(node.nodeId)
204 gwjob.cmdvals["qgraphId"] = qgraph.graphID
205 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids]))
207 init_workflow.add_job(gwjob)
209 # Lookup butler values
210 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
211 _, butler_config = config.search("butlerConfig", opt=search_opt)
212 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
213 prefix = config["submitPath"]
214 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
216 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
217 _enhance_command(config, init_workflow, gwjob, {})
219 return init_workflow
222def _enhance_command(config, generic_workflow, gwjob, cached_job_values):
223 """Enhance command line with env and file placeholders
224 and gather command line values.
226 Parameters
227 ----------
228 config : `lsst.ctrl.bps.BpsConfig`
229 BPS configuration.
230 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
231 Generic workflow that contains the job.
232 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
233 Generic workflow job to which the updated executable, arguments,
234 and values should be saved.
235 cached_job_values : `dict` [`str`, dict[`str`, `Any`]]
236 Cached values common across jobs with same label. Updated if values
237 aren't already saved for given gwjob's label.
238 """
239 _LOG.debug("gwjob given to _enhance_command: %s", gwjob)
241 search_opt = {
242 "curvals": {"curr_pipetask": gwjob.label},
243 "replaceVars": False,
244 "expandEnvVars": False,
245 "replaceEnvVars": True,
246 "required": False,
247 }
249 if gwjob.label not in cached_job_values:
250 cached_job_values[gwjob.label] = {}
251 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label.
252 key = "whenSaveJobQgraph"
253 _, when_save = config.search(key, opt=search_opt)
254 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
256 key = "useLazyCommands"
257 search_opt["default"] = True
258 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
259 del search_opt["default"]
261 # Change qgraph variable to match whether using run or per-job qgraph
262 # Note: these are lookup keys, not actual physical filenames.
263 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER:
264 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
265 elif gwjob.name == "pipetaskInit":
266 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
267 else: # Needed unique file keys for per-job QuantumGraphs
268 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
270 # Replace files with special placeholders
271 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
272 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
273 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
274 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
276 # Save dict of other values needed to complete command line.
277 # (Be careful to not replace env variables as they may
278 # be different in compute job.)
279 search_opt["replaceVars"] = True
281 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
282 if key not in gwjob.cmdvals:
283 if key not in cached_job_values[gwjob.label]:
284 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
285 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key]
287 # backwards compatibility
288 if not cached_job_values[gwjob.label]["useLazyCommands"]:
289 if "bpsUseShared" not in cached_job_values[gwjob.label]:
290 key = "bpsUseShared"
291 search_opt["default"] = True
292 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
293 del search_opt["default"]
295 gwjob.arguments = _fill_arguments(
296 cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, gwjob.arguments, gwjob.cmdvals
297 )
300def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals):
301 """Replace placeholders in command line string in job.
303 Parameters
304 ----------
305 use_shared : `bool`
306 Whether using shared filesystem.
307 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
308 Generic workflow containing the job.
309 arguments : `str`
310 String containing placeholders.
311 cmdvals : `dict` [`str`, `Any`]
312 Any command line values that can be used to replace placeholders.
314 Returns
315 -------
316 arguments : `str`
317 Command line with FILE and ENV placeholders replaced.
318 """
319 # Replace file placeholders
320 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
321 gwfile = generic_workflow.get_file(file_key)
322 if not gwfile.wms_transfer:
323 # Must assume full URI if in command line and told WMS is not
324 # responsible for transferring file.
325 uri = gwfile.src_uri
326 elif use_shared:
327 if gwfile.job_shared:
328 # Have shared filesystems and jobs can share file.
329 uri = gwfile.src_uri
330 else:
331 # Taking advantage of inside knowledge. Not future-proof.
332 # Temporary fix until have job wrapper that pulls files
333 # within job.
334 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml":
335 uri = "butler.yaml"
336 else:
337 uri = os.path.basename(gwfile.src_uri)
338 else: # Using push transfer
339 uri = os.path.basename(gwfile.src_uri)
341 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
343 # Replace env placeholder with submit-side values
344 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
345 arguments = os.path.expandvars(arguments)
347 # Replace remaining vars
348 arguments = arguments.format(**cmdvals)
350 return arguments
353def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir):
354 """Get butler location to be used by job.
356 Parameters
357 ----------
358 prefix : `str`
359 Root path for any output files.
360 when_create : `str`
361 When to create the execution butler used to determine whether job is
362 using execution butler or not.
363 butler_config : `str`
364 Location of central butler repositories config file.
365 execution_butler_dir : `str`
366 Location of execution butler repository.
368 Returns
369 -------
370 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
371 Representation of butler location.
372 """
373 if when_create.upper() == "NEVER":
374 wms_transfer = False
375 job_access_remote = True
376 job_shared = True
377 else:
378 butler_config = execution_butler_dir
379 if not butler_config.startswith("/"):
380 butler_config = f"{prefix}/{butler_config}"
381 wms_transfer = True
382 job_access_remote = False
383 job_shared = False
385 gwfile = GenericWorkflowFile(
386 "butlerConfig",
387 src_uri=butler_config,
388 wms_transfer=wms_transfer,
389 job_access_remote=job_access_remote,
390 job_shared=job_shared,
391 )
393 return gwfile
396def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix):
397 """Get qgraph location to be used by job.
399 Parameters
400 ----------
401 config : `lsst.ctrl.bps.BpsConfig`
402 Bps configuration.
403 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs`
404 What submission stage to save per-job qgraph files (or NEVER)
405 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
406 Job for which determining QuantumGraph file.
407 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
408 File representation of the full run QuantumGraph.
409 prefix : `str`
410 Path prefix for any files written.
412 Returns
413 -------
414 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
415 Representation of butler location (may not include filename).
416 """
417 qgraph_gwfile = None
418 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER:
419 qgraph_gwfile = GenericWorkflowFile(
420 f"qgraphFile_{gwjob.name}",
421 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
422 wms_transfer=True,
423 job_access_remote=True,
424 job_shared=True,
425 )
426 else:
427 qgraph_gwfile = run_qgraph_file
429 return qgraph_gwfile
432def _get_job_values(config, search_opt, cmd_line_key):
433 """Gather generic workflow job values from the bps config.
435 Parameters
436 ----------
437 config : `lsst.ctrl.bps.BpsConfig`
438 Bps configuration.
439 search_opt : `dict` [`str`, `Any`]
440 Search options to be used when searching config.
441 cmd_line_key : `str` or None
442 Which command line key to search for (e.g., "runQuantumCommand").
444 Returns
445 -------
446 job_values : `dict` [ `str`, `Any` ]`
447 A mapping between job attributes and their values.
448 """
449 _LOG.debug("cmd_line_key=%s, search_opt=%s", cmd_line_key, search_opt)
450 job_values = {}
451 for attr in _ATTRS_ALL:
452 # Variable names in yaml are camel case instead of snake case.
453 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr)
454 found, value = config.search(yaml_name, opt=search_opt)
455 if found:
456 job_values[attr] = value
457 else:
458 job_values[attr] = None
460 # If the automatic memory scaling is enabled (i.e. the memory multiplier
461 # is set and it is a positive number greater than 1.0), adjust number
462 # of retries when necessary. If the memory multiplier is invalid, disable
463 # automatic memory scaling.
464 if job_values["memory_multiplier"] is not None:
465 if math.ceil(float(job_values["memory_multiplier"])) > 1:
466 if job_values["number_of_retries"] is None:
467 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
468 else:
469 job_values["memory_multiplier"] = None
471 if cmd_line_key:
472 found, cmdline = config.search(cmd_line_key, opt=search_opt)
473 # Make sure cmdline isn't None as that could be sent in as a
474 # default value in search_opt.
475 if found and cmdline:
476 cmd, args = cmdline.split(" ", 1)
477 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False)
478 if args:
479 job_values["arguments"] = args
481 return job_values
484def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL):
485 """Set the job attributes in the cluster to their correct values.
487 Parameters
488 ----------
489 quantum_job_values : `dict` [`str`, Any]
490 Job values for running single Quantum.
491 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
492 Generic workflow job in which to store the universal values.
493 attributes: `Iterable` [`str`], optional
494 Job attributes to be set in the job following different rules.
495 The default value is _ATTRS_ALL.
496 """
497 _LOG.debug("Call to _handle_job_values")
498 _handle_job_values_universal(quantum_job_values, gwjob, attributes)
499 _handle_job_values_max(quantum_job_values, gwjob, attributes)
500 _handle_job_values_sum(quantum_job_values, gwjob, attributes)
503def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL):
504 """Handle job attributes that must have the same value for every quantum
505 in the cluster.
507 Parameters
508 ----------
509 quantum_job_values : `dict` [`str`, Any]
510 Job values for running single Quantum.
511 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
512 Generic workflow job in which to store the universal values.
513 attributes: `Iterable` [`str`], optional
514 Job attributes to be set in the job following different rules.
515 The default value is _ATTRS_UNIVERSAL.
516 """
517 for attr in _ATTRS_UNIVERSAL & set(attributes):
518 _LOG.debug(
519 "Handling job %s (job=%s, quantum=%s)",
520 attr,
521 getattr(gwjob, attr),
522 quantum_job_values.get(attr, "MISSING"),
523 )
524 current_value = getattr(gwjob, attr)
525 try:
526 quantum_value = quantum_job_values[attr]
527 except KeyError:
528 continue
529 else:
530 if not current_value:
531 setattr(gwjob, attr, quantum_value)
532 elif current_value != quantum_value:
533 _LOG.error(
534 "Inconsistent value for %s in Cluster %s Quantum Number %s\n"
535 "Current cluster value: %s\n"
536 "Quantum value: %s",
537 attr,
538 gwjob.name,
539 quantum_job_values.get("qgraphNodeId", "MISSING"),
540 current_value,
541 quantum_value,
542 )
543 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.")
546def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX):
547 """Handle job attributes that should be set to their maximum value in
548 the in cluster.
550 Parameters
551 ----------
552 quantum_job_values : `dict` [`str`, `Any`]
553 Job values for running single Quantum.
554 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
555 Generic workflow job in which to store the aggregate values.
556 attributes: `Iterable` [`str`], optional
557 Job attributes to be set in the job following different rules.
558 The default value is _ATTR_MAX.
559 """
560 for attr in _ATTRS_MAX & set(attributes):
561 current_value = getattr(gwjob, attr)
562 try:
563 quantum_value = quantum_job_values[attr]
564 except KeyError:
565 continue
566 else:
567 needs_update = False
568 if current_value is None:
569 if quantum_value is not None:
570 needs_update = True
571 else:
572 if quantum_value is not None and current_value < quantum_value:
573 needs_update = True
574 if needs_update:
575 setattr(gwjob, attr, quantum_value)
577 # When updating memory requirements for a job, check if memory
578 # autoscaling is enabled. If it is, always use the memory
579 # multiplier and the number of retries which comes with the
580 # quantum.
581 #
582 # Note that as a result, the quantum with the biggest memory
583 # requirements will determine whether the memory autoscaling
584 # will be enabled (or disabled) depending on the value of its
585 # memory multiplier.
586 if attr == "request_memory":
587 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
588 if gwjob.memory_multiplier is not None:
589 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
592def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM):
593 """Handle job attributes that are the sum of their values in the cluster.
595 Parameters
596 ----------
597 quantum_job_values : `dict` [`str`, `Any`]
598 Job values for running single Quantum.
599 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
600 Generic workflow job in which to store the aggregate values.
601 attributes: `Iterable` [`str`], optional
602 Job attributes to be set in the job following different rules.
603 The default value is _ATTRS_SUM.
604 """
605 for attr in _ATTRS_SUM & set(attributes):
606 current_value = getattr(gwjob, attr)
607 if not current_value:
608 setattr(gwjob, attr, quantum_job_values[attr])
609 else:
610 setattr(gwjob, attr, current_value + quantum_job_values[attr])
613def create_generic_workflow(config, cqgraph, name, prefix):
614 """Create a generic workflow from a ClusteredQuantumGraph such that it
615 has information needed for WMS (e.g., command lines).
617 Parameters
618 ----------
619 config : `lsst.ctrl.bps.BpsConfig`
620 BPS configuration.
621 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
622 ClusteredQuantumGraph for running a specific pipeline on a specific
623 payload.
624 name : `str`
625 Name for the workflow (typically unique).
626 prefix : `str`
627 Root path for any output files.
629 Returns
630 -------
631 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
632 Generic workflow for the given ClusteredQuantumGraph + config.
633 """
634 # Determine whether saving per-job QuantumGraph files in the loop.
635 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
636 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()]
638 search_opt = {"replaceVars": False, "expandEnvVars": False, "replaceEnvVars": True, "required": False}
640 # Lookup butler values once
641 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
642 _, butler_config = config.search("butlerConfig", opt=search_opt)
643 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
645 generic_workflow = GenericWorkflow(name)
647 # Save full run QuantumGraph for use by jobs
648 generic_workflow.add_file(
649 GenericWorkflowFile(
650 "runQgraphFile",
651 src_uri=config["runQgraphFile"],
652 wms_transfer=True,
653 job_access_remote=True,
654 job_shared=True,
655 )
656 )
658 # Cache pipetask specific or more generic job values to minimize number
659 # on config searches.
660 cached_job_values = {}
661 cached_pipetask_values = {}
663 for cluster in cqgraph.clusters():
664 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster))
665 _LOG.debug(
666 "cqgraph: name=%s, len=%s, label=%s, ids=%s",
667 cluster.name,
668 len(cluster.qgraph_node_ids),
669 cluster.label,
670 cluster.qgraph_node_ids,
671 )
673 gwjob = GenericWorkflowJob(cluster.name)
675 # First get job values from cluster or cluster config
676 search_opt["curvals"] = {"curr_cluster": cluster.label}
677 found, value = config.search("computeSite", opt=search_opt)
678 if found:
679 search_opt["curvals"]["curr_site"] = value
680 found, value = config.search("computeCloud", opt=search_opt)
681 if found:
682 search_opt["curvals"]["curr_cloud"] = value
684 # If some config values are set for this cluster
685 if cluster.label not in cached_job_values:
686 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label])
687 cached_job_values[cluster.label] = {}
689 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label.
690 key = "whenSaveJobQgraph"
691 _, when_save = config.search(key, opt=search_opt)
692 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
694 key = "useLazyCommands"
695 search_opt["default"] = True
696 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt)
697 del search_opt["default"]
699 if cluster.label in config["cluster"]:
700 # Don't want to get global defaults here so only look in
701 # cluster section.
702 cached_job_values[cluster.label].update(
703 _get_job_values(config["cluster"][cluster.label], search_opt, "runQuantumCommand")
704 )
705 cluster_job_values = copy.copy(cached_job_values[cluster.label])
707 cluster_job_values["name"] = cluster.name
708 cluster_job_values["label"] = cluster.label
709 cluster_job_values["quanta_counts"] = cluster.quanta_counts
710 cluster_job_values["tags"] = cluster.tags
711 _LOG.debug("cluster_job_values = %s", cluster_job_values)
712 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys())
714 # For purposes of whether to continue searching for a value is whether
715 # the value evaluates to False.
716 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)}
718 _LOG.debug("unset_attributes=%s", unset_attributes)
719 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes)
721 # For job info not defined at cluster level, attempt to get job info
722 # either common or aggregate for all Quanta in cluster.
723 for node_id in iter(cluster.qgraph_node_ids):
724 _LOG.debug("node_id=%s", node_id)
725 qnode = cqgraph.get_quantum_node(node_id)
727 if qnode.taskDef.label not in cached_pipetask_values:
728 search_opt["curvals"]["curr_pipetask"] = qnode.taskDef.label
729 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(
730 config, search_opt, "runQuantumCommand"
731 )
733 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes)
735 # Update job with workflow attribute and profile values.
736 qgraph_gwfile = _get_qgraph_gwfile(
737 config, save_qgraph_per_job, gwjob, generic_workflow.get_file("runQgraphFile"), prefix
738 )
739 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
741 generic_workflow.add_job(gwjob)
742 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
744 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID
745 gwjob.cmdvals["qgraphNodeId"] = ",".join(
746 sorted([f"{node_id}" for node_id in cluster.qgraph_node_ids])
747 )
748 _enhance_command(config, generic_workflow, gwjob, cached_job_values)
750 # If writing per-job QuantumGraph files during TRANSFORM stage,
751 # write it now while in memory.
752 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM:
753 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids)
755 # Create job dependencies.
756 for parent in cqgraph.clusters():
757 for child in cqgraph.successors(parent):
758 generic_workflow.add_job_relationships(parent.name, child.name)
760 # Add initial workflow.
761 if config.get("runInit", "{default: False}"):
762 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow)
764 generic_workflow.run_attrs.update(
765 {
766 "bps_isjob": "True",
767 "bps_project": config["project"],
768 "bps_campaign": config["campaign"],
769 "bps_run": generic_workflow.name,
770 "bps_operator": config["operator"],
771 "bps_payload": config["payloadName"],
772 "bps_runsite": config["computeSite"],
773 }
774 )
776 # Add final job
777 add_final_job(config, generic_workflow, prefix)
779 return generic_workflow
782def create_generic_workflow_config(config, prefix):
783 """Create generic workflow configuration.
785 Parameters
786 ----------
787 config : `lsst.ctrl.bps.BpsConfig`
788 Bps configuration.
789 prefix : `str`
790 Root path for any output files.
792 Returns
793 -------
794 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
795 Configuration accompanying the GenericWorkflow.
796 """
797 generic_workflow_config = BpsConfig(config)
798 generic_workflow_config["workflowName"] = config["uniqProcName"]
799 generic_workflow_config["workflowPath"] = prefix
800 return generic_workflow_config
803def add_final_job(config, generic_workflow, prefix):
804 """Add final workflow job depending upon configuration.
806 Parameters
807 ----------
808 config : `lsst.ctrl.bps.BpsConfig`
809 Bps configuration.
810 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
811 Generic workflow to which attributes should be added.
812 prefix : `str`
813 Directory in which to output final script.
814 """
815 _, when_create = config.search(".executionButler.whenCreate")
816 _, when_merge = config.search(".executionButler.whenMerge")
818 search_opt = {"searchobj": config[".executionButler"], "curvals": {}, "default": None}
819 found, value = config.search("computeSite", opt=search_opt)
820 if found:
821 search_opt["curvals"]["curr_site"] = value
822 found, value = config.search("computeCloud", opt=search_opt)
823 if found:
824 search_opt["curvals"]["curr_cloud"] = value
826 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
827 # create gwjob
828 gwjob = GenericWorkflowJob("mergeExecutionButler")
829 gwjob.label = "mergeExecutionButler"
831 job_values = _get_job_values(config, search_opt, None)
832 for attr in _ATTRS_ALL:
833 if not getattr(gwjob, attr) and job_values.get(attr, None):
834 setattr(gwjob, attr, job_values[attr])
836 # Create script and add command line to job.
837 gwjob.executable, gwjob.arguments = _create_final_command(config, prefix)
839 # Determine inputs from command line.
840 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
841 gwfile = generic_workflow.get_file(file_key)
842 generic_workflow.add_job_inputs(gwjob.name, gwfile)
844 _enhance_command(config, generic_workflow, gwjob, {})
846 # Put transfer repo job in appropriate location in workflow.
847 if when_merge.upper() == "ALWAYS":
848 # add as special final job
849 generic_workflow.add_final(gwjob)
850 elif when_merge.upper() == "SUCCESS":
851 # add as regular sink node
852 add_final_job_as_sink(generic_workflow, gwjob)
853 else:
854 raise ValueError(f"Invalid value for executionButler.when_merge {when_merge}")
857def _create_final_command(config, prefix):
858 """Create the command and shell script for the final job.
860 Parameters
861 ----------
862 config : `lsst.ctrl.bps.BpsConfig`
863 Bps configuration.
864 prefix : `str`
865 Directory in which to output final script.
867 Returns
868 -------
869 executable : `lsst.ctrl.bps.GenericWorkflowExec`
870 Executable object for the final script.
871 arguments : `str`
872 Command line needed to call the final script.
873 """
874 search_opt = {
875 "replaceVars": False,
876 "replaceEnvVars": False,
877 "expandEnvVars": False,
878 "searchobj": config["executionButler"],
879 }
881 script_file = os.path.join(prefix, "final_job.bash")
882 with open(script_file, "w") as fh:
883 print("#!/bin/bash\n", file=fh)
884 print("set -e", file=fh)
885 print("set -x", file=fh)
887 print("butlerConfig=$1", file=fh)
888 print("executionButlerDir=$2", file=fh)
890 i = 1
891 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
892 while found:
893 # Temporarily replace any env vars so formatter doesn't try to
894 # replace them.
895 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
897 # executionButlerDir and butlerConfig will be args to script and
898 # set to env vars
899 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
900 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
902 # Replace all other vars in command string
903 search_opt["replaceVars"] = True
904 command = config.formatter.format(command, config, search_opt)
905 search_opt["replaceVars"] = False
907 # Replace any temporary env place holders.
908 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
910 print(command, file=fh)
911 i += 1
912 found, command = config.search(f".executionButler.command{i}", opt=search_opt)
913 os.chmod(script_file, 0o755)
914 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
916 _, orig_butler = config.search("butlerConfig")
917 # The execution butler was saved as butlerConfig in the workflow.
918 return executable, f"{orig_butler} <FILE:butlerConfig>"
921def add_final_job_as_sink(generic_workflow, final_job):
922 """Add final job as the single sink for the workflow.
924 Parameters
925 ----------
926 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
927 Generic workflow to which attributes should be added.
928 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
929 Job to add as new sink node depending upon all previous sink nodes.
930 """
931 # Find sink nodes of generic workflow graph.
932 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
933 _LOG.debug("gw_sinks = %s", gw_sinks)
935 generic_workflow.add_job(final_job)
936 generic_workflow.add_job_relationships(gw_sinks, final_job.name)