Coverage for python/lsst/ctrl/bps/panda/cmd_line_embedder.py: 16%

48 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-29 03:09 -0700

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28import logging 

29import os 

30import re 

31 

32_LOG = logging.getLogger(__name__) 

33 

34 

35class CommandLineEmbedder: 

36 """Embed static (constant across a task) values 

37 into the pipeline execution command line 

38 and resolves submission side environment variables. 

39 

40 Parameters 

41 ---------- 

42 config : `lsst.ctrl.bps.BpsConfig` 

43 BPS configuration that includes the list of dynamic 

44 (uniques per job) and submission side resolved variables. 

45 """ 

46 

47 def __init__(self, config): 

48 self.leave_placeholder_params = config.get("placeholderParams", ["qgraphNodeId", "qgraphId"]) 

49 self.submit_side_resolved = config.get("submitSideResolvedParams", ["USER"]) 

50 

51 def replace_static_parameters(self, cmd_line, lazy_vars): 

52 """Substitutes the lazy parameters in the command line which 

53 are static, the same for every job in the workflow and could be 

54 defined once. 

55 

56 This function offloads the edge node processing 

57 and number of parameters transferred together with job. 

58 

59 Parameters 

60 ---------- 

61 cmd_line : `str` 

62 Command line to be processed. 

63 lazy_vars : `dict` 

64 Lazy variables and its values. 

65 

66 Returns 

67 ------- 

68 cmd : `str` 

69 Processed command line. 

70 """ 

71 for param_name, param_val in lazy_vars.items(): 

72 if param_name not in self.leave_placeholder_params: 

73 cmd_line = cmd_line.replace("{" + param_name + "}", param_val) 

74 return cmd_line 

75 

76 def replace_static_files(self, cmd_line, files): 

77 """Substitute the FILE keys with values in the command line 

78 which are static, the same for every job in the workflow and 

79 could be defined once. 

80 

81 Parameters 

82 ---------- 

83 cmd_line : `str` 

84 Command line to be processed. 

85 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

86 Input and output files for the job. 

87 

88 Returns 

89 ------- 

90 cmd : `str` 

91 Processed command line. 

92 """ 

93 # make copy of given command line for error message. 

94 orig_cmd_line = cmd_line 

95 

96 # make gwfile lookup by name 

97 files_by_name = {} 

98 for gwfile in files: 

99 files_by_name[gwfile.name] = gwfile 

100 

101 for file_key in re.findall(r"<FILE:([^>]+)>", cmd_line): 

102 try: 

103 gwfile = files_by_name[file_key] 

104 except KeyError as e: 

105 raise RuntimeError( 

106 "%s in command line, but corresponding file not given to function (%s)", 

107 file_key, 

108 orig_cmd_line, 

109 ) from e 

110 

111 if not gwfile.wms_transfer and gwfile.job_access_remote: 

112 cmd_line = cmd_line.replace(f"<FILE:{gwfile.name}>", gwfile.src_uri) 

113 return cmd_line 

114 

115 def resolve_submission_side_env_vars(self, cmd_line): 

116 """Substitute the lazy parameters in the command line 

117 which are defined and resolved on the submission side. 

118 

119 Parameters 

120 ---------- 

121 cmd_line : `str` 

122 Command line to be processed. 

123 

124 Returns 

125 ------- 

126 cmd : `str` 

127 Processed command line. 

128 """ 

129 for param in self.submit_side_resolved: 

130 if os.getenv(param): 

131 cmd_line = cmd_line.replace("<ENV:" + param + ">", os.getenv(param)) 

132 else: 

133 _LOG.info("Expected parameter %s is not found in the environment variables", param) 

134 return cmd_line 

135 

136 def attach_pseudo_file_params(self, lazy_vars): 

137 """Add the parameters needed to finalize creation of a pseudo file. 

138 

139 Parameters 

140 ---------- 

141 lazy_vars : `dict` 

142 Values to be substituted. 

143 

144 Returns 

145 ------- 

146 suffix : `str` 

147 Pseudo input file name suffix. 

148 """ 

149 file_suffix = "" 

150 for item in self.leave_placeholder_params: 

151 file_suffix += "+" + item + ":" + lazy_vars.get(item, "") 

152 return file_suffix 

153 

154 def substitute_command_line(self, cmd_line, lazy_vars, job_name, gwfiles): 

155 """Preprocess the command line leaving for the edge node evaluation 

156 only parameters which are job / environment dependent. 

157 

158 Parameters 

159 ---------- 

160 cmd_line : `str` 

161 Command line containing all lazy placeholders. 

162 lazy_vars : `dict` [ `str`, `str` ] 

163 Lazy parameter name/values. 

164 job_name : `str` 

165 Job name proposed by BPS. 

166 gwfiles : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

167 Job files. 

168 

169 Returns 

170 ------- 

171 cmd_line: `str` 

172 Processed command line. 

173 file_name: `str` 

174 Job pseudo input file name. 

175 

176 Raises 

177 ------ 

178 RuntimeError 

179 Raised if pseudo input filename is too long. 

180 """ 

181 cmd_vals = {m.group(1) for m in re.finditer(r"[^$]{([^}]+)}", cmd_line)} 

182 actual_lazy_vars = {} 

183 for key in cmd_vals: 

184 actual_lazy_vars[key] = lazy_vars[key] 

185 

186 cmd_line = self.replace_static_parameters(cmd_line, actual_lazy_vars) 

187 cmd_line = self.resolve_submission_side_env_vars(cmd_line) 

188 if gwfiles: 

189 cmd_line = self.replace_static_files(cmd_line, gwfiles) 

190 file_name = job_name + self.attach_pseudo_file_params(actual_lazy_vars) 

191 

192 return cmd_line, file_name