Coverage for python/lsst/ctrl/bps/parsl/site.py: 51%

54 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 10:14 +0000

1# This file is part of ctrl_bps_parsl. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28from abc import ABC, abstractmethod 

29from types import ModuleType 

30from typing import TYPE_CHECKING 

31 

32import parsl.config 

33from lsst.ctrl.bps import BpsConfig 

34from lsst.utils import doImport 

35from parsl.addresses import address_by_hostname 

36from parsl.executors.base import ParslExecutor 

37from parsl.monitoring import MonitoringHub 

38 

39from .configuration import get_bps_config_value, get_workflow_name 

40from .environment import export_environment 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from .job import ParslJob 

44 

45__all__ = ("SiteConfig",) 

46 

47 

48class SiteConfig(ABC): 

49 """Base class for site configuration. 

50 

51 Subclasses need to override at least the ``get_executors`` and 

52 ``select_executor`` methods. 

53 

54 Parameters 

55 ---------- 

56 config : `BpsConfig` 

57 BPS configuration. 

58 add_resources : `bool` 

59 Add resource specification when submitting the job? This is only 

60 appropriate for the ``WorkQueue`` executor; other executors will 

61 raise an exception. 

62 """ 

63 

64 def __init__(self, config: BpsConfig, add_resources: bool = False): 

65 self.config = config 

66 self.site = self.get_site_subconfig(config) 

67 self.add_resources = add_resources 

68 

69 @staticmethod 

70 def get_site_subconfig(config: BpsConfig) -> BpsConfig: 

71 """Get BPS configuration for the site of interest. 

72 

73 We return the BPS sub-configuration for the site indicated by the 

74 ``computeSite`` value, which is ``site.<computeSite>``. 

75 

76 Parameters 

77 ---------- 

78 config : `BpsConfig` 

79 BPS configuration. 

80 

81 Returns 

82 ------- 

83 site : `BpsConfig` 

84 Site sub-configuration. 

85 """ 

86 computeSite = get_bps_config_value(config, "computeSite", str, required=True) 

87 return get_bps_config_value(config, f".site.{computeSite}", BpsConfig, required=True) 

88 

89 @classmethod 

90 def from_config(cls, config: BpsConfig) -> "SiteConfig": 

91 """Get the site configuration nominated in the BPS config. 

92 

93 The ``computeSite`` (`str`) value in the BPS configuration is used to 

94 select a site configuration. The site configuration class to use is 

95 specified by the BPS configuration as ``site.<computeSite>.class`` 

96 (`str`), which should be the fully-qualified name of a python class 

97 that inherits from `SiteConfig`. 

98 

99 Parameters 

100 ---------- 

101 config : `BpsConfig` 

102 BPS configuration. 

103 

104 Returns 

105 ------- 

106 site_config : subclass of `SiteConfig` 

107 Site configuration. 

108 """ 

109 site = cls.get_site_subconfig(config) 

110 name = get_bps_config_value(site, "class", str, required=True) 

111 site_config = doImport(name) 

112 if isinstance(site_config, ModuleType) or not issubclass(site_config, SiteConfig): 

113 raise RuntimeError(f"Site class={name} is not a SiteConfig subclass") 

114 return site_config(config) 

115 

116 @abstractmethod 

117 def get_executors(self) -> list[ParslExecutor]: 

118 """Get a list of executors to be used in processing. 

119 

120 Each executor should have a unique ``label``. 

121 """ 

122 raise NotImplementedError("Subclasses must define") 

123 

124 @abstractmethod 

125 def select_executor(self, job: "ParslJob") -> str: 

126 """Get the ``label`` of the executor to use to execute a job. 

127 

128 Parameters 

129 ---------- 

130 job : `ParslJob` 

131 Job to be executed. 

132 

133 Returns 

134 ------- 

135 label : `str` 

136 Label of executor to use to execute ``job``. 

137 """ 

138 raise NotImplementedError("Subclasses must define") 

139 

140 def get_address(self) -> str: 

141 """Return the IP address of the machine hosting the driver/submission. 

142 

143 This address should be accessible from the workers. This should 

144 generally by the return value of one of the functions in 

145 ``parsl.addresses``. 

146 

147 This is used by the default implementation of ``get_monitor``, but will 

148 generally be used by ``get_executors`` too. 

149 

150 This default implementation gets the address from the hostname, but 

151 that will not work if the workers don't access the driver/submission 

152 node by that address. 

153 """ 

154 return address_by_hostname() 

155 

156 def get_command_prefix(self) -> str: 

157 """Return command(s) to add before each job command. 

158 

159 These may be used to configure the environment for the job. 

160 

161 This default implementation respects the BPS configuration elements: 

162 

163 - ``site.<computeSite>.commandPrefix`` (`str`): command(s) to use as a 

164 prefix to executing a job command on a worker. 

165 - ``site.<computeSite>.environment`` (`bool`): add bash commands that 

166 replicate the environment on the driver/submit machine? 

167 """ 

168 prefix = get_bps_config_value(self.site, "commandPrefix", str, "") 

169 if get_bps_config_value(self.site, "environment", bool, False): 

170 prefix += "\n" + export_environment() 

171 return prefix 

172 

173 def get_monitor(self) -> MonitoringHub | None: 

174 """Get parsl monitor. 

175 

176 The parsl monitor provides a database that tracks the progress of the 

177 workflow and the use of resources on the workers. 

178 

179 This implementation respects the BPS configuration elements: 

180 

181 - ``site.<computeSite>.monitorEnable`` (`bool`): enable monitor? 

182 - ``site.<computeSite>.monitorInterval`` (`float`): time interval (sec) 

183 between logging of resource usage. 

184 - ``site.<computeSite>.monitorFilename`` (`str`): name of file to use 

185 for the monitor sqlite database. 

186 

187 Returns 

188 ------- 

189 monitor : `MonitoringHub` or `None` 

190 Parsl monitor, or `None` for no monitor. 

191 """ 

192 if not get_bps_config_value(self.site, "monitorEnable", bool, False): 

193 return None 

194 return MonitoringHub( 

195 workflow_name=get_workflow_name(self.config), 

196 hub_address=self.get_address(), 

197 resource_monitoring_interval=get_bps_config_value(self.site, "monitorInterval", float, 30.0), 

198 logging_endpoint="sqlite:///" 

199 + get_bps_config_value(self.site, "monitorFilename", str, "monitor.sqlite"), 

200 ) 

201 

202 def get_parsl_config(self) -> parsl.config.Config: 

203 """Get Parsl configuration for this site. 

204 

205 Subclasses can overwrite this method to build a more specific Parsl 

206 configuration, if required. 

207 

208 The retries are set from the ``site.<computeSite>.retries`` value 

209 found in the BPS configuration file. 

210 

211 Returns 

212 ------- 

213 config : `parsl.config.Config` 

214 The configuration to be used for Parsl. 

215 """ 

216 executors = self.get_executors() 

217 monitor = self.get_monitor() 

218 retries = get_bps_config_value(self.site, "retries", int, 1) 

219 # Path to Parsl run directory. The default set by Parsl is 

220 # 'runinfo' which is not explicit enough for end users given that 

221 # we are using BPS + Parsl + Slurm to execute a workflow. 

222 run_dir = get_bps_config_value(self.site, "run_dir", str, "runinfo") 

223 return parsl.config.Config( 

224 executors=executors, 

225 monitoring=monitor, 

226 retries=retries, 

227 run_dir=run_dir, 

228 checkpoint_mode="task_exit", 

229 )