Coverage for python/lsst/ctrl/bps/panda/cmd_line_embedder.py: 16%
48 statements
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-23 12:14 +0000
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-23 12:14 +0000
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import logging
29import os
30import re
32_LOG = logging.getLogger(__name__)
35class CommandLineEmbedder:
36 """Embed static (constant across a task) values
37 into the pipeline execution command line
38 and resolves submission side environment variables.
40 Parameters
41 ----------
42 config : `lsst.ctrl.bps.BpsConfig`
43 BPS configuration that includes the list of dynamic
44 (uniques per job) and submission side resolved variables.
45 """
47 def __init__(self, config):
48 self.leave_placeholder_params = config.get("placeholderParams", ["qgraphNodeId", "qgraphId"])
49 self.submit_side_resolved = config.get("submitSideResolvedParams", ["USER"])
51 def replace_static_parameters(self, cmd_line, lazy_vars):
52 """Substitutes the lazy parameters in the command line which
53 are static, the same for every job in the workflow and could be
54 defined once.
56 This function offloads the edge node processing
57 and number of parameters transferred together with job.
59 Parameters
60 ----------
61 cmd_line : `str`
62 Command line to be processed.
63 lazy_vars : `dict`
64 Lazy variables and its values.
66 Returns
67 -------
68 cmd : `str`
69 Processed command line.
70 """
71 for param_name, param_val in lazy_vars.items():
72 if param_name not in self.leave_placeholder_params:
73 cmd_line = cmd_line.replace("{" + param_name + "}", param_val)
74 return cmd_line
76 def replace_static_files(self, cmd_line, files):
77 """Substitute the FILE keys with values in the command line
78 which are static, the same for every job in the workflow and
79 could be defined once.
81 Parameters
82 ----------
83 cmd_line : `str`
84 Command line to be processed.
85 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
86 Input and output files for the job.
88 Returns
89 -------
90 cmd : `str`
91 Processed command line.
92 """
93 # make copy of given command line for error message.
94 orig_cmd_line = cmd_line
96 # make gwfile lookup by name
97 files_by_name = {}
98 for gwfile in files:
99 files_by_name[gwfile.name] = gwfile
101 for file_key in re.findall(r"<FILE:([^>]+)>", cmd_line):
102 try:
103 gwfile = files_by_name[file_key]
104 except KeyError as e:
105 raise RuntimeError(
106 "%s in command line, but corresponding file not given to function (%s)",
107 file_key,
108 orig_cmd_line,
109 ) from e
111 if not gwfile.wms_transfer and gwfile.job_access_remote:
112 cmd_line = cmd_line.replace(f"<FILE:{gwfile.name}>", gwfile.src_uri)
113 return cmd_line
115 def resolve_submission_side_env_vars(self, cmd_line):
116 """Substitute the lazy parameters in the command line
117 which are defined and resolved on the submission side.
119 Parameters
120 ----------
121 cmd_line : `str`
122 Command line to be processed.
124 Returns
125 -------
126 cmd : `str`
127 Processed command line.
128 """
129 for param in self.submit_side_resolved:
130 if os.getenv(param):
131 cmd_line = cmd_line.replace("<ENV:" + param + ">", os.getenv(param))
132 else:
133 _LOG.info("Expected parameter %s is not found in the environment variables", param)
134 return cmd_line
136 def attach_pseudo_file_params(self, lazy_vars):
137 """Add the parameters needed to finalize creation of a pseudo file.
139 Parameters
140 ----------
141 lazy_vars : `dict`
142 Values to be substituted.
144 Returns
145 -------
146 suffix : `str`
147 Pseudo input file name suffix.
148 """
149 file_suffix = ""
150 for item in self.leave_placeholder_params:
151 file_suffix += "+" + item + ":" + lazy_vars.get(item, "")
152 return file_suffix
154 def substitute_command_line(self, cmd_line, lazy_vars, job_name, gwfiles):
155 """Preprocess the command line leaving for the edge node evaluation
156 only parameters which are job / environment dependent.
158 Parameters
159 ----------
160 cmd_line : `str`
161 Command line containing all lazy placeholders.
162 lazy_vars : `dict` [ `str`, `str` ]
163 Lazy parameter name/values.
164 job_name : `str`
165 Job name proposed by BPS.
166 gwfiles : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
167 Job files.
169 Returns
170 -------
171 cmd_line: `str`
172 Processed command line.
173 file_name: `str`
174 Job pseudo input file name.
176 Raises
177 ------
178 RuntimeError
179 Raised if pseudo input filename is too long.
180 """
181 cmd_vals = {m.group(1) for m in re.finditer(r"[^$]{([^}]+)}", cmd_line)}
182 actual_lazy_vars = {}
183 for key in cmd_vals:
184 actual_lazy_vars[key] = lazy_vars[key]
186 cmd_line = self.replace_static_parameters(cmd_line, actual_lazy_vars)
187 cmd_line = self.resolve_submission_side_env_vars(cmd_line)
188 if gwfiles:
189 cmd_line = self.replace_static_files(cmd_line, gwfiles)
190 file_name = job_name + self.attach_pseudo_file_params(actual_lazy_vars)
192 return cmd_line, file_name