Coverage for python/lsst/ctrl/bps/panda/cmd_line_embedder.py: 16%
52 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 09:40 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 09:40 +0000
1# This file is part of ctrl_bps_panda.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28import logging
29import os
30import re
32from lsst.ctrl.bps.panda.constants import PANDA_MAX_LEN_INPUT_FILE
34_LOG = logging.getLogger(__name__)
37class CommandLineEmbedder:
38 """Class embeds static (constant across a task) values
39 into the pipeline execution command line
40 and resolves submission side environment variables
42 Parameters
43 ----------
44 config : `lsst.ctrl.bps.BpsConfig`
45 BPS configuration that includes the list of dynamic
46 (uniques per job) and submission side resolved variables
47 """
49 def __init__(self, config):
50 self.leave_placeholder_params = config.get("placeholderParams", ["qgraphNodeId", "qgraphId"])
51 self.submit_side_resolved = config.get("submitSideResolvedParams", ["USER"])
53 def replace_static_parameters(self, cmd_line, lazy_vars):
54 """Substitutes the lazy parameters in the command line which
55 are static, the same for every job in the workflow and could be
56 defined once.
58 This function offloads the edge node processing
59 and number of parameters transferred together with job
61 Parameters
62 ----------
63 cmd_line: `str`
64 Command line to be processed.
65 lazy_vars : `dict`
66 Lazy variables and its values.
68 Returns
69 -------
70 cmd : `str`
71 Processed command line.
72 """
73 for param_name, param_val in lazy_vars.items():
74 if param_name not in self.leave_placeholder_params:
75 cmd_line = cmd_line.replace("{" + param_name + "}", param_val)
76 return cmd_line
78 def replace_static_files(self, cmd_line, files):
79 """Substitute the FILE keys with values in the command line
80 which are static, the same for every job in the workflow and
81 could be defined once.
83 Parameters
84 ----------
85 cmd_line: `str`
86 command line to be processed
87 files: `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
88 input and output files for the job.
90 Returns
91 -------
92 cmd : `str`
93 Processed command line.
94 """
95 # make copy of given command line for error message.
96 orig_cmd_line = cmd_line
98 # make gwfile lookup by name
99 files_by_name = {}
100 for gwfile in files:
101 files_by_name[gwfile.name] = gwfile
103 for file_key in re.findall(r"<FILE:([^>]+)>", cmd_line):
104 try:
105 gwfile = files_by_name[file_key]
106 except KeyError as e:
107 raise RuntimeError(
108 "%s in command line, but corresponding file not given to function (%s)",
109 file_key,
110 orig_cmd_line,
111 ) from e
113 if not gwfile.wms_transfer and gwfile.job_access_remote:
114 cmd_line = cmd_line.replace(f"<FILE:{gwfile.name}>", gwfile.src_uri)
115 return cmd_line
117 def resolve_submission_side_env_vars(self, cmd_line):
118 """Substitute the lazy parameters in the command line
119 which are defined and resolved on the submission side.
121 Parameters
122 ----------
123 cmd_line : `str`
124 Command line to be processed.
126 Returns
127 -------
128 cmd : `str`
129 Processed command line.
130 """
131 for param in self.submit_side_resolved:
132 if os.getenv(param):
133 cmd_line = cmd_line.replace("<ENV:" + param + ">", os.getenv(param))
134 else:
135 _LOG.info("Expected parameter %s is not found in the environment variables", param)
136 return cmd_line
138 def attach_pseudo_file_params(self, lazy_vars):
139 """Add the parameters needed to finalize creation of a pseudo file.
141 Parameters
142 ----------
143 lazy_vars : `dict`
144 Values to be substituted.
146 Returns
147 -------
148 suffix : `str`
149 Pseudo input file name suffix.
150 """
151 file_suffix = ""
152 for item in self.leave_placeholder_params:
153 file_suffix += "+" + item + ":" + lazy_vars.get(item, "")
154 return file_suffix
156 def substitute_command_line(self, cmd_line, lazy_vars, job_name, gwfiles):
157 """Preprocess the command line leaving for the edge node evaluation
158 only parameters which are job / environment dependent
160 Parameters
161 ----------
162 cmd_line: `str`
163 Command line containing all lazy placeholders.
164 lazy_vars: `dict` [ `str`, `str` ]
165 Lazy parameter name/values.
166 job_name: `str`
167 Job name proposed by BPS.
168 gwfiles: `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
169 Job files.
171 Returns
172 -------
173 cmd_line: `str`
174 processed command line
175 file_name: `str`
176 job pseudo input file name
178 Raises
179 ------
180 RuntimeError
181 Raised if pseudo input filename is too long.
182 """
183 cmd_vals = {m.group(1) for m in re.finditer(r"[^$]{([^}]+)}", cmd_line)}
184 actual_lazy_vars = {}
185 for key in cmd_vals:
186 actual_lazy_vars[key] = lazy_vars[key]
188 cmd_line = self.replace_static_parameters(cmd_line, actual_lazy_vars)
189 cmd_line = self.resolve_submission_side_env_vars(cmd_line)
190 if gwfiles:
191 cmd_line = self.replace_static_files(cmd_line, gwfiles)
192 file_name = job_name + self.attach_pseudo_file_params(actual_lazy_vars)
194 if len(file_name) > PANDA_MAX_LEN_INPUT_FILE:
195 _LOG.error(f"Too long pseudo input filename: {file_name}")
196 raise RuntimeError(
197 f"job pseudo input file name contains more than {PANDA_MAX_LEN_INPUT_FILE} symbols. Aborting."
198 )
200 return cmd_line, file_name