Coverage for python/lsst/ctrl/bps/panda/cmd_line_embedder.py: 16%

52 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-18 09:55 +0000

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28import logging 

29import os 

30import re 

31 

32from lsst.ctrl.bps.panda.constants import PANDA_MAX_LEN_INPUT_FILE 

33 

34_LOG = logging.getLogger(__name__) 

35 

36 

37class CommandLineEmbedder: 

38 """Class embeds static (constant across a task) values 

39 into the pipeline execution command line 

40 and resolves submission side environment variables 

41 

42 Parameters 

43 ---------- 

44 config : `lsst.ctrl.bps.BpsConfig` 

45 BPS configuration that includes the list of dynamic 

46 (uniques per job) and submission side resolved variables 

47 """ 

48 

49 def __init__(self, config): 

50 self.leave_placeholder_params = config.get("placeholderParams", ["qgraphNodeId", "qgraphId"]) 

51 self.submit_side_resolved = config.get("submitSideResolvedParams", ["USER"]) 

52 

53 def replace_static_parameters(self, cmd_line, lazy_vars): 

54 """Substitutes the lazy parameters in the command line which 

55 are static, the same for every job in the workflow and could be 

56 defined once. 

57 

58 This function offloads the edge node processing 

59 and number of parameters transferred together with job 

60 

61 Parameters 

62 ---------- 

63 cmd_line: `str` 

64 Command line to be processed. 

65 lazy_vars : `dict` 

66 Lazy variables and its values. 

67 

68 Returns 

69 ------- 

70 cmd : `str` 

71 Processed command line. 

72 """ 

73 for param_name, param_val in lazy_vars.items(): 

74 if param_name not in self.leave_placeholder_params: 

75 cmd_line = cmd_line.replace("{" + param_name + "}", param_val) 

76 return cmd_line 

77 

78 def replace_static_files(self, cmd_line, files): 

79 """Substitute the FILE keys with values in the command line 

80 which are static, the same for every job in the workflow and 

81 could be defined once. 

82 

83 Parameters 

84 ---------- 

85 cmd_line: `str` 

86 command line to be processed 

87 files: `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

88 input and output files for the job. 

89 

90 Returns 

91 ------- 

92 cmd : `str` 

93 Processed command line. 

94 """ 

95 # make copy of given command line for error message. 

96 orig_cmd_line = cmd_line 

97 

98 # make gwfile lookup by name 

99 files_by_name = {} 

100 for gwfile in files: 

101 files_by_name[gwfile.name] = gwfile 

102 

103 for file_key in re.findall(r"<FILE:([^>]+)>", cmd_line): 

104 try: 

105 gwfile = files_by_name[file_key] 

106 except KeyError as e: 

107 raise RuntimeError( 

108 "%s in command line, but corresponding file not given to function (%s)", 

109 file_key, 

110 orig_cmd_line, 

111 ) from e 

112 

113 if not gwfile.wms_transfer and gwfile.job_access_remote: 

114 cmd_line = cmd_line.replace(f"<FILE:{gwfile.name}>", gwfile.src_uri) 

115 return cmd_line 

116 

117 def resolve_submission_side_env_vars(self, cmd_line): 

118 """Substitute the lazy parameters in the command line 

119 which are defined and resolved on the submission side. 

120 

121 Parameters 

122 ---------- 

123 cmd_line : `str` 

124 Command line to be processed. 

125 

126 Returns 

127 ------- 

128 cmd : `str` 

129 Processed command line. 

130 """ 

131 for param in self.submit_side_resolved: 

132 if os.getenv(param): 

133 cmd_line = cmd_line.replace("<ENV:" + param + ">", os.getenv(param)) 

134 else: 

135 _LOG.info("Expected parameter %s is not found in the environment variables", param) 

136 return cmd_line 

137 

138 def attach_pseudo_file_params(self, lazy_vars): 

139 """Add the parameters needed to finalize creation of a pseudo file. 

140 

141 Parameters 

142 ---------- 

143 lazy_vars : `dict` 

144 Values to be substituted. 

145 

146 Returns 

147 ------- 

148 suffix : `str` 

149 Pseudo input file name suffix. 

150 """ 

151 file_suffix = "" 

152 for item in self.leave_placeholder_params: 

153 file_suffix += "+" + item + ":" + lazy_vars.get(item, "") 

154 return file_suffix 

155 

156 def substitute_command_line(self, cmd_line, lazy_vars, job_name, gwfiles): 

157 """Preprocess the command line leaving for the edge node evaluation 

158 only parameters which are job / environment dependent 

159 

160 Parameters 

161 ---------- 

162 cmd_line: `str` 

163 Command line containing all lazy placeholders. 

164 lazy_vars: `dict` [ `str`, `str` ] 

165 Lazy parameter name/values. 

166 job_name: `str` 

167 Job name proposed by BPS. 

168 gwfiles: `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

169 Job files. 

170 

171 Returns 

172 ------- 

173 cmd_line: `str` 

174 processed command line 

175 file_name: `str` 

176 job pseudo input file name 

177 

178 Raises 

179 ------ 

180 RuntimeError 

181 Raised if pseudo input filename is too long. 

182 """ 

183 cmd_vals = {m.group(1) for m in re.finditer(r"[^$]{([^}]+)}", cmd_line)} 

184 actual_lazy_vars = {} 

185 for key in cmd_vals: 

186 actual_lazy_vars[key] = lazy_vars[key] 

187 

188 cmd_line = self.replace_static_parameters(cmd_line, actual_lazy_vars) 

189 cmd_line = self.resolve_submission_side_env_vars(cmd_line) 

190 if gwfiles: 

191 cmd_line = self.replace_static_files(cmd_line, gwfiles) 

192 file_name = job_name + self.attach_pseudo_file_params(actual_lazy_vars) 

193 

194 if len(file_name) > PANDA_MAX_LEN_INPUT_FILE: 

195 _LOG.error(f"Too long pseudo input filename: {file_name}") 

196 raise RuntimeError( 

197 f"job pseudo input file name contains more than {PANDA_MAX_LEN_INPUT_FILE} symbols. Aborting." 

198 ) 

199 

200 return cmd_line, file_name