Coverage for python/lsst/ctrl/bps/panda/cmd_line_embedder.py: 16%

48 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-10 07:57 +0000

1# This file is part of ctrl_bps_panda. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import logging 

23import os 

24import re 

25 

26_LOG = logging.getLogger(__name__) 

27 

28 

29class CommandLineEmbedder: 

30 """Class embeds static (constant across a task) values 

31 into the pipeline execution command line 

32 and resolves submission side environment variables 

33 

34 Parameters 

35 ---------- 

36 config : `lsst.ctrl.bps.BpsConfig` 

37 BPS configuration that includes the list of dynamic 

38 (uniques per job) and submission side resolved variables 

39 """ 

40 

41 def __init__(self, config): 

42 self.leave_placeholder_params = config.get("placeholderParams", ["qgraphNodeId", "qgraphId"]) 

43 self.submit_side_resolved = config.get("submitSideResolvedParams", ["USER"]) 

44 

45 def replace_static_parameters(self, cmd_line, lazy_vars): 

46 """Substitutes the lazy parameters in the command line which 

47 are static, the same for every job in the workflow and could be 

48 defined once. 

49 

50 This function offloads the edge node processing 

51 and number of parameters transferred together with job 

52 

53 Parameters 

54 ---------- 

55 cmd_line: `str` 

56 Command line to be processed. 

57 lazy_vars : `dict` 

58 Lazy variables and its values. 

59 

60 Returns 

61 ------- 

62 cmd : `str` 

63 Processed command line. 

64 """ 

65 for param_name, param_val in lazy_vars.items(): 

66 if param_name not in self.leave_placeholder_params: 

67 cmd_line = cmd_line.replace("{" + param_name + "}", param_val) 

68 return cmd_line 

69 

70 def replace_static_files(self, cmd_line, files): 

71 """Substitute the FILE keys with values in the command line 

72 which are static, the same for every job in the workflow and 

73 could be defined once. 

74 

75 Parameters 

76 ---------- 

77 cmd_line: `str` 

78 command line to be processed 

79 files: `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

80 input and output files for the job. 

81 

82 Returns 

83 ------- 

84 cmd : `str` 

85 Processed command line. 

86 """ 

87 # make copy of given command line for error message. 

88 orig_cmd_line = cmd_line 

89 

90 # make gwfile lookup by name 

91 files_by_name = {} 

92 for gwfile in files: 

93 files_by_name[gwfile.name] = gwfile 

94 

95 for file_key in re.findall(r"<FILE:([^>]+)>", cmd_line): 

96 try: 

97 gwfile = files_by_name[file_key] 

98 except KeyError as e: 

99 raise RuntimeError( 

100 "%s in command line, but corresponding file not given to function (%s)", 

101 file_key, 

102 orig_cmd_line, 

103 ) from e 

104 

105 if not gwfile.wms_transfer and gwfile.job_access_remote: 

106 cmd_line = cmd_line.replace(f"<FILE:{gwfile.name}>", gwfile.src_uri) 

107 return cmd_line 

108 

109 def resolve_submission_side_env_vars(self, cmd_line): 

110 """Substitute the lazy parameters in the command line 

111 which are defined and resolved on the submission side. 

112 

113 Parameters 

114 ---------- 

115 cmd_line : `str` 

116 Command line to be processed. 

117 

118 Returns 

119 ------- 

120 cmd : `str` 

121 Processed command line. 

122 """ 

123 for param in self.submit_side_resolved: 

124 if os.getenv(param): 

125 cmd_line = cmd_line.replace("<ENV:" + param + ">", os.getenv(param)) 

126 else: 

127 _LOG.info("Expected parameter %s is not found in the environment variables", param) 

128 return cmd_line 

129 

130 def attach_pseudo_file_params(self, lazy_vars): 

131 """Add the parameters needed to finalize creation of a pseudo file. 

132 

133 Parameters 

134 ---------- 

135 lazy_vars : `dict` 

136 Values to be substituted. 

137 

138 Returns 

139 ------- 

140 suffix : `str` 

141 Pseudo input file name suffix. 

142 """ 

143 file_suffix = "" 

144 for item in self.leave_placeholder_params: 

145 file_suffix += "+" + item + ":" + lazy_vars.get(item, "") 

146 return file_suffix 

147 

148 def substitute_command_line(self, cmd_line, lazy_vars, job_name, gwfiles): 

149 """Preprocess the command line leaving for the edge node evaluation 

150 only parameters which are job / environment dependent 

151 

152 Parameters 

153 ---------- 

154 cmd_line: `str` 

155 Command line containing all lazy placeholders. 

156 lazy_vars: `dict` [ `str`, `str` ] 

157 Lazy parameter name/values. 

158 job_name: `str` 

159 Job name proposed by BPS. 

160 gwfiles: `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

161 Job files. 

162 

163 Returns 

164 ------- 

165 cmd_line: `str` 

166 processed command line 

167 file_name: `str` 

168 job pseudo input file name 

169 """ 

170 cmd_vals = {m.group(1) for m in re.finditer(r"[^$]{([^}]+)}", cmd_line)} 

171 actual_lazy_vars = {} 

172 for key in cmd_vals: 

173 actual_lazy_vars[key] = lazy_vars[key] 

174 

175 cmd_line = self.replace_static_parameters(cmd_line, actual_lazy_vars) 

176 cmd_line = self.resolve_submission_side_env_vars(cmd_line) 

177 if gwfiles: 

178 cmd_line = self.replace_static_files(cmd_line, gwfiles) 

179 file_name = job_name + self.attach_pseudo_file_params(actual_lazy_vars) 

180 return cmd_line, file_name