Coverage for python/lsst/ctrl/bps/transform.py: 8%
383 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 11:07 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Driver for the transformation of a QuantumGraph into a generic workflow.
29"""
31import copy
32import dataclasses
33import logging
34import math
35import os
36import re
38from lsst.utils.logging import VERBOSE
39from lsst.utils.timer import time_this, timeMethod
41from . import (
42 DEFAULT_MEM_RETRIES,
43 BpsConfig,
44 GenericWorkflow,
45 GenericWorkflowExec,
46 GenericWorkflowFile,
47 GenericWorkflowJob,
48)
49from .bps_utils import (
50 WhenToSaveQuantumGraphs,
51 _create_execution_butler,
52 create_job_quantum_graph_filename,
53 save_qg_subgraph,
54)
56# All available job attributes.
57_ATTRS_ALL = frozenset([field.name for field in dataclasses.fields(GenericWorkflowJob)])
59# Job attributes that need to be set to their maximal value in the cluster.
60_ATTRS_MAX = frozenset(
61 {
62 "memory_multiplier",
63 "number_of_retries",
64 "request_cpus",
65 "request_memory",
66 "request_memory_max",
67 }
68)
70# Job attributes that need to be set to sum of their values in the cluster.
71_ATTRS_SUM = frozenset(
72 {
73 "request_disk",
74 "request_walltime",
75 }
76)
78# Job attributes do not fall into a specific category
79_ATTRS_MISC = frozenset(
80 {
81 "label", # taskDef labels aren't same in job and may not match job label
82 "cmdvals",
83 "profile",
84 "attrs",
85 }
86)
88# Attributes that need to be the same for each quanta in the cluster.
89_ATTRS_UNIVERSAL = frozenset(_ATTRS_ALL - (_ATTRS_MAX | _ATTRS_MISC | _ATTRS_SUM))
91_LOG = logging.getLogger(__name__)
94@timeMethod(logger=_LOG, logLevel=VERBOSE)
95def transform(config, cqgraph, prefix):
96 """Transform a ClusteredQuantumGraph to a GenericWorkflow.
98 Parameters
99 ----------
100 config : `lsst.ctrl.bps.BpsConfig`
101 BPS configuration.
102 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
103 A clustered quantum graph to transform into a generic workflow.
104 prefix : `str`
105 Root path for any output files.
107 Returns
108 -------
109 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
110 The generic workflow transformed from the clustered quantum graph.
111 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
112 Configuration to accompany GenericWorkflow.
113 """
114 _, when_create = config.search(".executionButler.whenCreate")
115 if when_create.upper() == "TRANSFORM":
116 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir")
117 _LOG.info("Creating execution butler in '%s'", execution_butler_dir)
118 with time_this(log=_LOG, level=logging.INFO, prefix=None, msg="Creating execution butler completed"):
119 _create_execution_butler(config, config["runQgraphFile"], execution_butler_dir, prefix)
121 if cqgraph.name is not None:
122 name = cqgraph.name
123 else:
124 _, name = config.search("uniqProcName", opt={"required": True})
126 generic_workflow = create_generic_workflow(config, cqgraph, name, prefix)
127 generic_workflow_config = create_generic_workflow_config(config, prefix)
129 return generic_workflow, generic_workflow_config
132def add_workflow_init_nodes(config, qgraph, generic_workflow):
133 """Add nodes to workflow graph that perform initialization steps.
135 Assumes that all of the initialization should be executed prior to any
136 of the current workflow.
138 Parameters
139 ----------
140 config : `lsst.ctrl.bps.BpsConfig`
141 BPS configuration.
142 qgraph : `lsst.pipe.base.graph.QuantumGraph`
143 The quantum graph the generic workflow represents.
144 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
145 Generic workflow to which the initialization steps should be added.
146 """
147 # Create a workflow graph that will have task and file nodes necessary for
148 # initializing the pipeline execution
149 init_workflow = create_init_workflow(config, qgraph, generic_workflow.get_file("runQgraphFile"))
150 _LOG.debug("init_workflow nodes = %s", init_workflow.nodes())
151 generic_workflow.add_workflow_source(init_workflow)
154def create_init_workflow(config, qgraph, qgraph_gwfile):
155 """Create workflow for running initialization job(s).
157 Parameters
158 ----------
159 config : `lsst.ctrl.bps.BpsConfig`
160 BPS configuration.
161 qgraph : `lsst.pipe.base.graph.QuantumGraph`
162 The quantum graph the generic workflow represents.
163 qgraph_gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
164 File object for the full run QuantumGraph file.
166 Returns
167 -------
168 init_workflow : `lsst.ctrl.bps.GenericWorkflow`
169 GenericWorkflow consisting of job(s) to initialize workflow.
170 """
171 _LOG.debug("creating init subgraph")
172 _LOG.debug("creating init task input(s)")
173 search_opt = {
174 "curvals": {"curr_pipetask": "pipetaskInit"},
175 "replaceVars": False,
176 "expandEnvVars": False,
177 "replaceEnvVars": True,
178 "required": False,
179 }
180 found, value = config.search("computeSite", opt=search_opt)
181 if found:
182 search_opt["curvals"]["curr_site"] = value
183 found, value = config.search("computeCloud", opt=search_opt)
184 if found:
185 search_opt["curvals"]["curr_cloud"] = value
187 init_workflow = GenericWorkflow("init")
188 init_workflow.add_file(qgraph_gwfile)
190 # create job for executing --init-only
191 gwjob = GenericWorkflowJob("pipetaskInit", label="pipetaskInit")
193 job_values = _get_job_values(config, search_opt, "runQuantumCommand")
194 job_values["name"] = "pipetaskInit"
195 job_values["label"] = "pipetaskInit"
197 # Adjust job attributes values if necessary.
198 _handle_job_values(job_values, gwjob)
200 # Pick a node id for each task (not quantum!) to avoid reading the entire
201 # quantum graph during the initialization stage.
202 node_ids = []
203 for task in qgraph.iterTaskGraph():
204 task_def = qgraph.findTaskDefByLabel(task.label)
205 node = next(iter(qgraph.getNodesForTask(task_def)))
206 node_ids.append(node.nodeId)
207 gwjob.cmdvals["qgraphId"] = qgraph.graphID
208 gwjob.cmdvals["qgraphNodeId"] = ",".join(sorted([f"{node_id}" for node_id in node_ids]))
210 init_workflow.add_job(gwjob)
212 # Lookup butler values
213 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
214 _, butler_config = config.search("butlerConfig", opt=search_opt)
215 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
216 prefix = config["submitPath"]
217 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
219 init_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
220 _enhance_command(config, init_workflow, gwjob, {})
222 return init_workflow
225def _enhance_command(config, generic_workflow, gwjob, cached_job_values):
226 """Enhance command line with env and file placeholders
227 and gather command line values.
229 Parameters
230 ----------
231 config : `lsst.ctrl.bps.BpsConfig`
232 BPS configuration.
233 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
234 Generic workflow that contains the job.
235 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
236 Generic workflow job to which the updated executable, arguments,
237 and values should be saved.
238 cached_job_values : `dict` [`str`, dict[`str`, `Any`]]
239 Cached values common across jobs with same label. Updated if values
240 aren't already saved for given gwjob's label.
241 """
242 _LOG.debug("gwjob given to _enhance_command: %s", gwjob)
244 search_opt = {
245 "curvals": {"curr_pipetask": gwjob.label},
246 "replaceVars": False,
247 "expandEnvVars": False,
248 "replaceEnvVars": True,
249 "required": False,
250 }
252 if gwjob.label not in cached_job_values:
253 cached_job_values[gwjob.label] = {}
254 # Allowing whenSaveJobQgraph and useLazyCommands per pipetask label.
255 key = "whenSaveJobQgraph"
256 _, when_save = config.search(key, opt=search_opt)
257 cached_job_values[gwjob.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
259 key = "useLazyCommands"
260 search_opt["default"] = True
261 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
262 del search_opt["default"]
264 # Change qgraph variable to match whether using run or per-job qgraph
265 # Note: these are lookup keys, not actual physical filenames.
266 if cached_job_values[gwjob.label]["whenSaveJobQgraph"] == WhenToSaveQuantumGraphs.NEVER:
267 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
268 elif gwjob.name == "pipetaskInit":
269 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", "{runQgraphFile}")
270 else: # Needed unique file keys for per-job QuantumGraphs
271 gwjob.arguments = gwjob.arguments.replace("{qgraphFile}", f"{{qgraphFile_{gwjob.name}}}")
273 # Replace files with special placeholders
274 for gwfile in generic_workflow.get_job_inputs(gwjob.name):
275 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
276 for gwfile in generic_workflow.get_job_outputs(gwjob.name):
277 gwjob.arguments = gwjob.arguments.replace(f"{{{gwfile.name}}}", f"<FILE:{gwfile.name}>")
279 # Save dict of other values needed to complete command line.
280 # (Be careful to not replace env variables as they may
281 # be different in compute job.)
282 search_opt["replaceVars"] = True
284 for key in re.findall(r"{([^}]+)}", gwjob.arguments):
285 if key not in gwjob.cmdvals:
286 if key not in cached_job_values[gwjob.label]:
287 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
288 gwjob.cmdvals[key] = cached_job_values[gwjob.label][key]
290 # backwards compatibility
291 if not cached_job_values[gwjob.label]["useLazyCommands"]:
292 if "bpsUseShared" not in cached_job_values[gwjob.label]:
293 key = "bpsUseShared"
294 search_opt["default"] = True
295 _, cached_job_values[gwjob.label][key] = config.search(key, opt=search_opt)
296 del search_opt["default"]
298 gwjob.arguments = _fill_arguments(
299 cached_job_values[gwjob.label]["bpsUseShared"], generic_workflow, gwjob.arguments, gwjob.cmdvals
300 )
303def _fill_arguments(use_shared, generic_workflow, arguments, cmdvals):
304 """Replace placeholders in command line string in job.
306 Parameters
307 ----------
308 use_shared : `bool`
309 Whether using shared filesystem.
310 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
311 Generic workflow containing the job.
312 arguments : `str`
313 String containing placeholders.
314 cmdvals : `dict` [`str`, `Any`]
315 Any command line values that can be used to replace placeholders.
317 Returns
318 -------
319 arguments : `str`
320 Command line with FILE and ENV placeholders replaced.
321 """
322 # Replace file placeholders
323 for file_key in re.findall(r"<FILE:([^>]+)>", arguments):
324 gwfile = generic_workflow.get_file(file_key)
325 if not gwfile.wms_transfer:
326 # Must assume full URI if in command line and told WMS is not
327 # responsible for transferring file.
328 uri = gwfile.src_uri
329 elif use_shared:
330 if gwfile.job_shared:
331 # Have shared filesystems and jobs can share file.
332 uri = gwfile.src_uri
333 else:
334 # Taking advantage of inside knowledge. Not future-proof.
335 # Temporary fix until have job wrapper that pulls files
336 # within job.
337 if gwfile.name == "butlerConfig" and os.path.splitext(gwfile.src_uri)[1] != ".yaml":
338 uri = "butler.yaml"
339 else:
340 uri = os.path.basename(gwfile.src_uri)
341 else: # Using push transfer
342 uri = os.path.basename(gwfile.src_uri)
344 arguments = arguments.replace(f"<FILE:{file_key}>", uri)
346 # Replace env placeholder with submit-side values
347 arguments = re.sub(r"<ENV:([^>]+)>", r"$\1", arguments)
348 arguments = os.path.expandvars(arguments)
350 # Replace remaining vars
351 arguments = arguments.format(**cmdvals)
353 return arguments
356def _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir):
357 """Get butler location to be used by job.
359 Parameters
360 ----------
361 prefix : `str`
362 Root path for any output files.
363 when_create : `str`
364 When to create the execution butler used to determine whether job is
365 using execution butler or not.
366 butler_config : `str`
367 Location of central butler repositories config file.
368 execution_butler_dir : `str`
369 Location of execution butler repository.
371 Returns
372 -------
373 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
374 Representation of butler location.
375 """
376 if when_create.upper() == "NEVER":
377 wms_transfer = False
378 job_access_remote = True
379 job_shared = True
380 else:
381 butler_config = execution_butler_dir
382 if not butler_config.startswith("/"):
383 butler_config = f"{prefix}/{butler_config}"
384 wms_transfer = True
385 job_access_remote = False
386 job_shared = False
388 gwfile = GenericWorkflowFile(
389 "butlerConfig",
390 src_uri=butler_config,
391 wms_transfer=wms_transfer,
392 job_access_remote=job_access_remote,
393 job_shared=job_shared,
394 )
396 return gwfile
399def _get_qgraph_gwfile(config, save_qgraph_per_job, gwjob, run_qgraph_file, prefix):
400 """Get qgraph location to be used by job.
402 Parameters
403 ----------
404 config : `lsst.ctrl.bps.BpsConfig`
405 Bps configuration.
406 save_qgraph_per_job: `lsst.ctrl.bps.bps_utils.WhenToSaveQuantumGraphs`
407 What submission stage to save per-job qgraph files (or NEVER)
408 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
409 Job for which determining QuantumGraph file.
410 run_qgraph_file : `lsst.ctrl.bps.GenericWorkflowFile`
411 File representation of the full run QuantumGraph.
412 prefix : `str`
413 Path prefix for any files written.
415 Returns
416 -------
417 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
418 Representation of butler location (may not include filename).
419 """
420 qgraph_gwfile = None
421 if save_qgraph_per_job != WhenToSaveQuantumGraphs.NEVER:
422 qgraph_gwfile = GenericWorkflowFile(
423 f"qgraphFile_{gwjob.name}",
424 src_uri=create_job_quantum_graph_filename(config, gwjob, prefix),
425 wms_transfer=True,
426 job_access_remote=True,
427 job_shared=True,
428 )
429 else:
430 qgraph_gwfile = run_qgraph_file
432 return qgraph_gwfile
435def _get_job_values(config, search_opt, cmd_line_key):
436 """Gather generic workflow job values from the bps config.
438 Parameters
439 ----------
440 config : `lsst.ctrl.bps.BpsConfig`
441 Bps configuration.
442 search_opt : `dict` [`str`, `Any`]
443 Search options to be used when searching config.
444 cmd_line_key : `str` or None
445 Which command line key to search for (e.g., "runQuantumCommand").
447 Returns
448 -------
449 job_values : `dict` [ `str`, `Any` ]`
450 A mapping between job attributes and their values.
451 """
452 _LOG.debug("cmd_line_key=%s, search_opt=%s", cmd_line_key, search_opt)
454 # Create a dummy job to easily access the default values.
455 default_gwjob = GenericWorkflowJob("default_job")
457 job_values = {}
458 for attr in _ATTRS_ALL:
459 # Variable names in yaml are camel case instead of snake case.
460 yaml_name = re.sub(r"_(\S)", lambda match: match.group(1).upper(), attr)
461 found, value = config.search(yaml_name, opt=search_opt)
462 if found:
463 job_values[attr] = value
464 else:
465 job_values[attr] = getattr(default_gwjob, attr)
467 # If the automatic memory scaling is enabled (i.e. the memory multiplier
468 # is set and it is a positive number greater than 1.0), adjust number
469 # of retries when necessary. If the memory multiplier is invalid, disable
470 # automatic memory scaling.
471 if job_values["memory_multiplier"] is not None:
472 if math.ceil(float(job_values["memory_multiplier"])) > 1:
473 if job_values["number_of_retries"] is None:
474 job_values["number_of_retries"] = DEFAULT_MEM_RETRIES
475 else:
476 job_values["memory_multiplier"] = None
478 if cmd_line_key:
479 found, cmdline = config.search(cmd_line_key, opt=search_opt)
480 # Make sure cmdline isn't None as that could be sent in as a
481 # default value in search_opt.
482 if found and cmdline:
483 cmd, args = cmdline.split(" ", 1)
484 job_values["executable"] = GenericWorkflowExec(os.path.basename(cmd), cmd, False)
485 if args:
486 job_values["arguments"] = args
488 return job_values
491def _handle_job_values(quantum_job_values, gwjob, attributes=_ATTRS_ALL):
492 """Set the job attributes in the cluster to their correct values.
494 Parameters
495 ----------
496 quantum_job_values : `dict` [`str`, Any]
497 Job values for running single Quantum.
498 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
499 Generic workflow job in which to store the universal values.
500 attributes: `Iterable` [`str`], optional
501 Job attributes to be set in the job following different rules.
502 The default value is _ATTRS_ALL.
503 """
504 _LOG.debug("Call to _handle_job_values")
505 _handle_job_values_universal(quantum_job_values, gwjob, attributes)
506 _handle_job_values_max(quantum_job_values, gwjob, attributes)
507 _handle_job_values_sum(quantum_job_values, gwjob, attributes)
510def _handle_job_values_universal(quantum_job_values, gwjob, attributes=_ATTRS_UNIVERSAL):
511 """Handle job attributes that must have the same value for every quantum
512 in the cluster.
514 Parameters
515 ----------
516 quantum_job_values : `dict` [`str`, Any]
517 Job values for running single Quantum.
518 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
519 Generic workflow job in which to store the universal values.
520 attributes: `Iterable` [`str`], optional
521 Job attributes to be set in the job following different rules.
522 The default value is _ATTRS_UNIVERSAL.
523 """
524 for attr in _ATTRS_UNIVERSAL & set(attributes):
525 _LOG.debug(
526 "Handling job %s (job=%s, quantum=%s)",
527 attr,
528 getattr(gwjob, attr),
529 quantum_job_values.get(attr, "MISSING"),
530 )
531 current_value = getattr(gwjob, attr)
532 try:
533 quantum_value = quantum_job_values[attr]
534 except KeyError:
535 continue
536 else:
537 if not current_value:
538 setattr(gwjob, attr, quantum_value)
539 elif current_value != quantum_value:
540 _LOG.error(
541 "Inconsistent value for %s in Cluster %s Quantum Number %s\n"
542 "Current cluster value: %s\n"
543 "Quantum value: %s",
544 attr,
545 gwjob.name,
546 quantum_job_values.get("qgraphNodeId", "MISSING"),
547 current_value,
548 quantum_value,
549 )
550 raise RuntimeError(f"Inconsistent value for {attr} in cluster {gwjob.name}.")
553def _handle_job_values_max(quantum_job_values, gwjob, attributes=_ATTRS_MAX):
554 """Handle job attributes that should be set to their maximum value in
555 the in cluster.
557 Parameters
558 ----------
559 quantum_job_values : `dict` [`str`, `Any`]
560 Job values for running single Quantum.
561 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
562 Generic workflow job in which to store the aggregate values.
563 attributes: `Iterable` [`str`], optional
564 Job attributes to be set in the job following different rules.
565 The default value is _ATTR_MAX.
566 """
567 for attr in _ATTRS_MAX & set(attributes):
568 current_value = getattr(gwjob, attr)
569 try:
570 quantum_value = quantum_job_values[attr]
571 except KeyError:
572 continue
573 else:
574 needs_update = False
575 if current_value is None:
576 if quantum_value is not None:
577 needs_update = True
578 else:
579 if quantum_value is not None and current_value < quantum_value:
580 needs_update = True
581 if needs_update:
582 setattr(gwjob, attr, quantum_value)
584 # When updating memory requirements for a job, check if memory
585 # autoscaling is enabled. If it is, always use the memory
586 # multiplier and the number of retries which comes with the
587 # quantum.
588 #
589 # Note that as a result, the quantum with the biggest memory
590 # requirements will determine whether the memory autoscaling
591 # will be enabled (or disabled) depending on the value of its
592 # memory multiplier.
593 if attr == "request_memory":
594 gwjob.memory_multiplier = quantum_job_values["memory_multiplier"]
595 if gwjob.memory_multiplier is not None:
596 gwjob.number_of_retries = quantum_job_values["number_of_retries"]
599def _handle_job_values_sum(quantum_job_values, gwjob, attributes=_ATTRS_SUM):
600 """Handle job attributes that are the sum of their values in the cluster.
602 Parameters
603 ----------
604 quantum_job_values : `dict` [`str`, `Any`]
605 Job values for running single Quantum.
606 gwjob : `lsst.ctrl.bps.GenericWorkflowJob`
607 Generic workflow job in which to store the aggregate values.
608 attributes: `Iterable` [`str`], optional
609 Job attributes to be set in the job following different rules.
610 The default value is _ATTRS_SUM.
611 """
612 for attr in _ATTRS_SUM & set(attributes):
613 current_value = getattr(gwjob, attr)
614 if not current_value:
615 setattr(gwjob, attr, quantum_job_values[attr])
616 else:
617 setattr(gwjob, attr, current_value + quantum_job_values[attr])
620def create_generic_workflow(config, cqgraph, name, prefix):
621 """Create a generic workflow from a ClusteredQuantumGraph such that it
622 has information needed for WMS (e.g., command lines).
624 Parameters
625 ----------
626 config : `lsst.ctrl.bps.BpsConfig`
627 BPS configuration.
628 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph`
629 ClusteredQuantumGraph for running a specific pipeline on a specific
630 payload.
631 name : `str`
632 Name for the workflow (typically unique).
633 prefix : `str`
634 Root path for any output files.
636 Returns
637 -------
638 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
639 Generic workflow for the given ClusteredQuantumGraph + config.
640 """
641 # Determine whether saving per-job QuantumGraph files in the loop.
642 _, when_save = config.search("whenSaveJobQgraph", {"default": WhenToSaveQuantumGraphs.TRANSFORM.name})
643 save_qgraph_per_job = WhenToSaveQuantumGraphs[when_save.upper()]
645 search_opt = {"replaceVars": False, "expandEnvVars": False, "replaceEnvVars": True, "required": False}
647 # Lookup butler values once
648 _, when_create = config.search(".executionButler.whenCreate", opt=search_opt)
649 _, butler_config = config.search("butlerConfig", opt=search_opt)
650 _, execution_butler_dir = config.search(".bps_defined.executionButlerDir", opt=search_opt)
652 generic_workflow = GenericWorkflow(name)
654 # Save full run QuantumGraph for use by jobs
655 generic_workflow.add_file(
656 GenericWorkflowFile(
657 "runQgraphFile",
658 src_uri=config["runQgraphFile"],
659 wms_transfer=True,
660 job_access_remote=True,
661 job_shared=True,
662 )
663 )
665 # Cache pipetask specific or more generic job values to minimize number
666 # on config searches.
667 cached_job_values = {}
668 cached_pipetask_values = {}
670 for cluster in cqgraph.clusters():
671 _LOG.debug("Loop over clusters: %s, %s", cluster, type(cluster))
672 _LOG.debug(
673 "cqgraph: name=%s, len=%s, label=%s, ids=%s",
674 cluster.name,
675 len(cluster.qgraph_node_ids),
676 cluster.label,
677 cluster.qgraph_node_ids,
678 )
680 gwjob = GenericWorkflowJob(cluster.name, label=cluster.label)
682 # First get job values from cluster or cluster config
683 search_opt["curvals"] = {"curr_cluster": cluster.label}
684 found, value = config.search("computeSite", opt=search_opt)
685 if found:
686 search_opt["curvals"]["curr_site"] = value
687 found, value = config.search("computeCloud", opt=search_opt)
688 if found:
689 search_opt["curvals"]["curr_cloud"] = value
691 # If some config values are set for this cluster
692 if cluster.label not in cached_job_values:
693 _LOG.debug("config['cluster'][%s] = %s", cluster.label, config["cluster"][cluster.label])
694 cached_job_values[cluster.label] = {}
696 # Allowing whenSaveJobQgraph and useLazyCommands per cluster label.
697 key = "whenSaveJobQgraph"
698 _, when_save = config.search(key, opt=search_opt)
699 cached_job_values[cluster.label][key] = WhenToSaveQuantumGraphs[when_save.upper()]
701 key = "useLazyCommands"
702 search_opt["default"] = True
703 _, cached_job_values[cluster.label][key] = config.search(key, opt=search_opt)
704 del search_opt["default"]
706 if cluster.label in config["cluster"]:
707 # Don't want to get global defaults here so only look in
708 # cluster section.
709 cached_job_values[cluster.label].update(
710 _get_job_values(config["cluster"][cluster.label], search_opt, "runQuantumCommand")
711 )
712 cluster_job_values = copy.copy(cached_job_values[cluster.label])
714 cluster_job_values["name"] = cluster.name
715 cluster_job_values["label"] = cluster.label
716 cluster_job_values["quanta_counts"] = cluster.quanta_counts
717 cluster_job_values["tags"] = cluster.tags
718 _LOG.debug("cluster_job_values = %s", cluster_job_values)
719 _handle_job_values(cluster_job_values, gwjob, cluster_job_values.keys())
721 # For purposes of whether to continue searching for a value is whether
722 # the value evaluates to False.
723 unset_attributes = {attr for attr in _ATTRS_ALL if not getattr(gwjob, attr)}
725 _LOG.debug("unset_attributes=%s", unset_attributes)
726 _LOG.debug("set=%s", _ATTRS_ALL - unset_attributes)
728 # For job info not defined at cluster level, attempt to get job info
729 # either common or aggregate for all Quanta in cluster.
730 for node_id in iter(cluster.qgraph_node_ids):
731 _LOG.debug("node_id=%s", node_id)
732 qnode = cqgraph.get_quantum_node(node_id)
734 if qnode.taskDef.label not in cached_pipetask_values:
735 search_opt["curvals"]["curr_pipetask"] = qnode.taskDef.label
736 cached_pipetask_values[qnode.taskDef.label] = _get_job_values(
737 config, search_opt, "runQuantumCommand"
738 )
740 _handle_job_values(cached_pipetask_values[qnode.taskDef.label], gwjob, unset_attributes)
742 # Update job with workflow attribute and profile values.
743 qgraph_gwfile = _get_qgraph_gwfile(
744 config, save_qgraph_per_job, gwjob, generic_workflow.get_file("runQgraphFile"), prefix
745 )
746 butler_gwfile = _get_butler_gwfile(prefix, when_create, butler_config, execution_butler_dir)
748 generic_workflow.add_job(gwjob)
749 generic_workflow.add_job_inputs(gwjob.name, [qgraph_gwfile, butler_gwfile])
751 gwjob.cmdvals["qgraphId"] = cqgraph.qgraph.graphID
752 gwjob.cmdvals["qgraphNodeId"] = ",".join(
753 sorted([f"{node_id}" for node_id in cluster.qgraph_node_ids])
754 )
755 _enhance_command(config, generic_workflow, gwjob, cached_job_values)
757 # If writing per-job QuantumGraph files during TRANSFORM stage,
758 # write it now while in memory.
759 if save_qgraph_per_job == WhenToSaveQuantumGraphs.TRANSFORM:
760 save_qg_subgraph(cqgraph.qgraph, qgraph_gwfile.src_uri, cluster.qgraph_node_ids)
762 # Create job dependencies.
763 for parent in cqgraph.clusters():
764 for child in cqgraph.successors(parent):
765 generic_workflow.add_job_relationships(parent.name, child.name)
767 # Add initial workflow.
768 if config.get("runInit", "{default: False}"):
769 add_workflow_init_nodes(config, cqgraph.qgraph, generic_workflow)
771 generic_workflow.run_attrs.update(
772 {
773 "bps_isjob": "True",
774 "bps_project": config["project"],
775 "bps_campaign": config["campaign"],
776 "bps_run": generic_workflow.name,
777 "bps_operator": config["operator"],
778 "bps_payload": config["payloadName"],
779 "bps_runsite": config["computeSite"],
780 }
781 )
783 # Add final job
784 add_final_job(config, generic_workflow, prefix)
786 return generic_workflow
789def create_generic_workflow_config(config, prefix):
790 """Create generic workflow configuration.
792 Parameters
793 ----------
794 config : `lsst.ctrl.bps.BpsConfig`
795 Bps configuration.
796 prefix : `str`
797 Root path for any output files.
799 Returns
800 -------
801 generic_workflow_config : `lsst.ctrl.bps.BpsConfig`
802 Configuration accompanying the GenericWorkflow.
803 """
804 generic_workflow_config = BpsConfig(config)
805 generic_workflow_config["workflowName"] = config["uniqProcName"]
806 generic_workflow_config["workflowPath"] = prefix
807 return generic_workflow_config
810def add_final_job(config, generic_workflow, prefix):
811 """Add final workflow job depending upon configuration.
813 Parameters
814 ----------
815 config : `lsst.ctrl.bps.BpsConfig`
816 Bps configuration.
817 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
818 Generic workflow to which attributes should be added.
819 prefix : `str`
820 Directory in which to output final script.
822 Notes
823 -----
824 This dispatch function was introduced to preserve the existing code
825 responsible for dealing with the execution Butler (EB). Once there is
826 no need to support the EB any longer it can be replaced by the function
827 responsible for handling the final job.
828 """
829 # The order of the entries determines the priorities regarding what
830 # method will be used when adding the final job if the configuration
831 # provides conflicting specifications.
832 dispatcher = {
833 ".finalJob.whenRun": _add_final_job,
834 ".executionButler.whenCreate": _add_merge_job,
835 }
836 for name, func in dispatcher.items():
837 if name in config and config[name] != "NEVER":
838 break
839 else:
840 raise RuntimeError("Final job specification not found")
841 func(config, generic_workflow, prefix)
844def _add_final_job(config, generic_workflow, prefix):
845 """Add the final job.
847 Depending on configuration, the final job will be added as a special job
848 which will always run regardless of the exit status of the workflow or
849 a regular sink node which will only run if the workflow execution finished
850 with no errors.
852 Parameters
853 ----------
854 config : `lsst.ctrl.bps.BpsConfig`
855 Bps configuration.
856 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
857 Generic workflow to which attributes should be added.
858 prefix : `str`
859 Directory in which to output final script.
860 """
861 _, when_run = config.search(".finalJob.whenRun")
862 if when_run.upper() != "NEVER":
863 create_final_job = _make_final_job_creator("finalJob", _create_final_command)
864 gwjob = create_final_job(config, generic_workflow, prefix)
865 if when_run.upper() == "ALWAYS":
866 generic_workflow.add_final(gwjob)
867 elif when_run.upper() == "SUCCESS":
868 add_final_job_as_sink(generic_workflow, gwjob)
869 else:
870 raise ValueError(f"Invalid value for finalJob.whenRun: {when_run}")
873def _add_merge_job(config, generic_workflow, prefix):
874 """Add job responsible for merging back the execution Butler.
876 Depending on configuration, the merge job will be added as a special job
877 which will always run regardless of the exit status of the workflow or
878 a regular sink node which will only run if the workflow execution finished
879 with no errors.
881 Parameters
882 ----------
883 config : `lsst.ctrl.bps.BpsConfig`
884 Bps configuration.
885 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
886 Generic workflow to which attributes should be added.
887 prefix : `str`
888 Directory in which to output final script.
889 """
890 _, when_create = config.search(".executionButler.whenCreate")
891 _, when_merge = config.search(".executionButler.whenMerge")
892 if when_create.upper() != "NEVER" and when_merge.upper() != "NEVER":
893 create_final_job = _make_final_job_creator("executionButler", _create_merge_command)
894 gwjob = create_final_job(config, generic_workflow, prefix)
895 if when_merge.upper() == "ALWAYS":
896 generic_workflow.add_final(gwjob)
897 elif when_merge.upper() == "SUCCESS":
898 add_final_job_as_sink(generic_workflow, gwjob)
899 else:
900 raise ValueError(f"Invalid value for executionButler.whenMerge: {when_merge}")
903def _make_final_job_creator(job_name, create_cmd):
904 """Construct a function that creates the final job.
906 Parameters
907 ----------
908 job_name : `str`
909 Name of the job. It will also be used as the job label.
910 create_cmd : callable
911 Function to use when creating the script for the final job. It takes
912 two positional arguments:
914 - `config`: run configuration (`BpsConfig`).
915 - `prefix`: directory in which to output final script (`str`).
917 Returns
918 -------
919 create_gwjob : callable
920 Function to use to create a generic workflow job. The function takes
921 three positional arguments:
923 - `config`: run configuration (`BpsConfig`).
924 - `generic_workflow`: generic workflow to which the final job should
925 be added.
926 - `prefix`: directory in which to output final script (`str`).
928 Notes
929 -----
930 Implemented as a closure in order to reduce code duplication and provide
931 an extra flexibility needed to support the creation of the final node for
932 both the execution and quantum backed Butler with minimal impact on
933 the existing code base. Once all supported plugins are able to use
934 the quantum backed Butler the inner function could be merged with
935 the remaining function responsible for adding the final node and the
936 closure can be removed.
937 """
939 def create_final_job(config, generic_workflow, prefix):
940 gwjob = GenericWorkflowJob(job_name, label=job_name)
942 search_opt = {"searchobj": config[job_name], "curvals": {}, "default": None}
943 found, value = config.search("computeSite", opt=search_opt)
944 if found:
945 search_opt["curvals"]["curr_site"] = value
946 found, value = config.search("computeCloud", opt=search_opt)
947 if found:
948 search_opt["curvals"]["curr_cloud"] = value
950 # Set job attributes based on the values find in the config excluding
951 # the ones in the _ATTRS_MISC group. The attributes in this group are
952 # somewhat "special":
953 # * HTCondor plugin, which uses 'attrs' and 'profile', has its own
954 # mechanism for setting them,
955 # * 'cmdvals' is being set internally, not via config.
956 job_values = _get_job_values(config, search_opt, None)
957 for attr in _ATTRS_ALL - _ATTRS_MISC:
958 if not getattr(gwjob, attr) and job_values.get(attr, None):
959 setattr(gwjob, attr, job_values[attr])
961 # Create script and add command line to job.
962 gwjob.executable, gwjob.arguments = create_cmd(config, prefix)
964 # Determine inputs from command line.
965 for file_key in re.findall(r"<FILE:([^>]+)>", gwjob.arguments):
966 gwfile = generic_workflow.get_file(file_key)
967 generic_workflow.add_job_inputs(gwjob.name, gwfile)
969 _enhance_command(config, generic_workflow, gwjob, {})
970 return gwjob
972 return create_final_job
975def _create_final_command(config, prefix):
976 """Create the command and shell script for the final job.
978 Parameters
979 ----------
980 config : `lsst.ctrl.bps.BpsConfig`
981 Bps configuration.
982 prefix : `str`
983 Directory in which to output final script.
985 Returns
986 -------
987 executable : `lsst.ctrl.bps.GenericWorkflowExec`
988 Executable object for the final script.
989 arguments : `str`
990 Command line needed to call the final script.
991 """
992 search_opt = {
993 "replaceVars": False,
994 "replaceEnvVars": False,
995 "expandEnvVars": False,
996 "searchobj": config["finalJob"],
997 }
999 script_file = os.path.join(prefix, "final_job.bash")
1000 with open(script_file, "w", encoding="utf8") as fh:
1001 print("#!/bin/bash\n", file=fh)
1002 print("set -e", file=fh)
1003 print("set -x", file=fh)
1005 print("qgraphFile=$1", file=fh)
1006 print("butlerConfig=$2", file=fh)
1008 i = 1
1009 found, command = config.search(f"command{i}", opt=search_opt)
1010 while found:
1011 # Temporarily replace any env vars so formatter doesn't try to
1012 # replace them.
1013 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
1015 # butlerConfig will be args to script and set to env vars
1016 command = command.replace("{qgraphFile}", "<BPSTMP:qgraphFile>")
1017 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
1019 # Replace all other vars in command string
1020 search_opt["replaceVars"] = True
1021 command = config.formatter.format(command, config, search_opt)
1022 search_opt["replaceVars"] = False
1024 # Replace any temporary env placeholders.
1025 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
1027 print(command, file=fh)
1028 i += 1
1029 found, command = config.search(f"command{i}", opt=search_opt)
1030 os.chmod(script_file, 0o755)
1031 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
1033 _, orig_butler = config.search("butlerConfig")
1034 return executable, f"<FILE:runQgraphFile> {orig_butler}"
1037def _create_merge_command(config, prefix):
1038 """Create the command and shell script for merging the execution Butler.
1040 Parameters
1041 ----------
1042 config : `lsst.ctrl.bps.BpsConfig`
1043 Bps configuration.
1044 prefix : `str`
1045 Directory in which to output final script.
1047 Returns
1048 -------
1049 executable : `lsst.ctrl.bps.GenericWorkflowExec`
1050 Executable object for the final script.
1051 arguments : `str`
1052 Command line needed to call the final script.
1053 """
1054 search_opt = {
1055 "replaceVars": False,
1056 "replaceEnvVars": False,
1057 "expandEnvVars": False,
1058 "searchobj": config["executionButler"],
1059 }
1061 script_file = os.path.join(prefix, "final_job.bash")
1062 with open(script_file, "w", encoding="utf8") as fh:
1063 print("#!/bin/bash\n", file=fh)
1064 print("set -e", file=fh)
1065 print("set -x", file=fh)
1067 print("butlerConfig=$1", file=fh)
1068 print("executionButlerDir=$2", file=fh)
1070 i = 1
1071 found, command = config.search(f"command{i}", opt=search_opt)
1072 while found:
1073 # Temporarily replace any env vars so formatter doesn't try to
1074 # replace them.
1075 command = re.sub(r"\${([^}]+)}", r"<BPSTMP:\1>", command)
1077 # executionButlerDir and butlerConfig will be args to script and
1078 # set to env vars
1079 command = command.replace("{executionButlerDir}", "<BPSTMP:executionButlerDir>")
1080 command = command.replace("{butlerConfig}", "<BPSTMP:butlerConfig>")
1082 # Replace all other vars in command string
1083 search_opt["replaceVars"] = True
1084 command = config.formatter.format(command, config, search_opt)
1085 search_opt["replaceVars"] = False
1087 # Replace any temporary env placeholders.
1088 command = re.sub(r"<BPSTMP:([^>]+)>", r"${\1}", command)
1090 print(command, file=fh)
1091 i += 1
1092 found, command = config.search(f"command{i}", opt=search_opt)
1093 os.chmod(script_file, 0o755)
1094 executable = GenericWorkflowExec(os.path.basename(script_file), script_file, True)
1096 _, orig_butler = config.search("butlerConfig")
1097 # The execution butler was saved as butlerConfig in the workflow.
1098 return executable, f"{orig_butler} <FILE:butlerConfig>"
1101def add_final_job_as_sink(generic_workflow, final_job):
1102 """Add final job as the single sink for the workflow.
1104 Parameters
1105 ----------
1106 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
1107 Generic workflow to which attributes should be added.
1108 final_job : `lsst.ctrl.bps.GenericWorkflowJob`
1109 Job to add as new sink node depending upon all previous sink nodes.
1110 """
1111 # Find sink nodes of generic workflow graph.
1112 gw_sinks = [n for n in generic_workflow if generic_workflow.out_degree(n) == 0]
1113 _LOG.debug("gw_sinks = %s", gw_sinks)
1115 generic_workflow.add_job(final_job)
1116 generic_workflow.add_job_relationships(gw_sinks, final_job.name)