Coverage for python/lsst/ctrl/bps/parsl/site.py: 51%
54 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 03:08 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 03:08 -0700
1# This file is part of ctrl_bps_parsl.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org) and the LSST DESC (https://www.lsstdesc.org/).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28from abc import ABC, abstractmethod
29from types import ModuleType
30from typing import TYPE_CHECKING
32import parsl.config
33from lsst.ctrl.bps import BpsConfig
34from lsst.utils import doImport
35from parsl.addresses import address_by_hostname
36from parsl.executors.base import ParslExecutor
37from parsl.monitoring import MonitoringHub
39from .configuration import get_bps_config_value, get_workflow_name
40from .environment import export_environment
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from .job import ParslJob
45__all__ = ("SiteConfig",)
48class SiteConfig(ABC):
49 """Base class for site configuration.
51 Subclasses need to override at least the ``get_executors`` and
52 ``select_executor`` methods.
54 Parameters
55 ----------
56 config : `BpsConfig`
57 BPS configuration.
58 add_resources : `bool`
59 Add resource specification when submitting the job? This is only
60 appropriate for the ``WorkQueue`` executor; other executors will
61 raise an exception.
62 """
64 def __init__(self, config: BpsConfig, add_resources: bool = False):
65 self.config = config
66 self.site = self.get_site_subconfig(config)
67 self.add_resources = add_resources
69 @staticmethod
70 def get_site_subconfig(config: BpsConfig) -> BpsConfig:
71 """Get BPS configuration for the site of interest.
73 We return the BPS sub-configuration for the site indicated by the
74 ``computeSite`` value, which is ``site.<computeSite>``.
76 Parameters
77 ----------
78 config : `BpsConfig`
79 BPS configuration.
81 Returns
82 -------
83 site : `BpsConfig`
84 Site sub-configuration.
85 """
86 computeSite = get_bps_config_value(config, "computeSite", str, required=True)
87 return get_bps_config_value(config, f".site.{computeSite}", BpsConfig, required=True)
89 @classmethod
90 def from_config(cls, config: BpsConfig) -> "SiteConfig":
91 """Get the site configuration nominated in the BPS config.
93 The ``computeSite`` (`str`) value in the BPS configuration is used to
94 select a site configuration. The site configuration class to use is
95 specified by the BPS configuration as ``site.<computeSite>.class``
96 (`str`), which should be the fully-qualified name of a python class
97 that inherits from `SiteConfig`.
99 Parameters
100 ----------
101 config : `BpsConfig`
102 BPS configuration.
104 Returns
105 -------
106 site_config : subclass of `SiteConfig`
107 Site configuration.
108 """
109 site = cls.get_site_subconfig(config)
110 name = get_bps_config_value(site, "class", str, required=True)
111 site_config = doImport(name)
112 if isinstance(site_config, ModuleType) or not issubclass(site_config, SiteConfig):
113 raise RuntimeError(f"Site class={name} is not a SiteConfig subclass")
114 return site_config(config)
116 @abstractmethod
117 def get_executors(self) -> list[ParslExecutor]:
118 """Get a list of executors to be used in processing.
120 Each executor should have a unique ``label``.
121 """
122 raise NotImplementedError("Subclasses must define")
124 @abstractmethod
125 def select_executor(self, job: "ParslJob") -> str:
126 """Get the ``label`` of the executor to use to execute a job.
128 Parameters
129 ----------
130 job : `ParslJob`
131 Job to be executed.
133 Returns
134 -------
135 label : `str`
136 Label of executor to use to execute ``job``.
137 """
138 raise NotImplementedError("Subclasses must define")
140 def get_address(self) -> str:
141 """Return the IP address of the machine hosting the driver/submission.
143 This address should be accessible from the workers. This should
144 generally by the return value of one of the functions in
145 ``parsl.addresses``.
147 This is used by the default implementation of ``get_monitor``, but will
148 generally be used by ``get_executors`` too.
150 This default implementation gets the address from the hostname, but
151 that will not work if the workers don't access the driver/submission
152 node by that address.
153 """
154 return address_by_hostname()
156 def get_command_prefix(self) -> str:
157 """Return command(s) to add before each job command.
159 These may be used to configure the environment for the job.
161 This default implementation respects the BPS configuration elements:
163 - ``site.<computeSite>.commandPrefix`` (`str`): command(s) to use as a
164 prefix to executing a job command on a worker.
165 - ``site.<computeSite>.environment`` (`bool`): add bash commands that
166 replicate the environment on the driver/submit machine?
167 """
168 prefix = get_bps_config_value(self.site, "commandPrefix", str, "")
169 if get_bps_config_value(self.site, "environment", bool, False):
170 prefix += "\n" + export_environment()
171 return prefix
173 def get_monitor(self) -> MonitoringHub | None:
174 """Get parsl monitor.
176 The parsl monitor provides a database that tracks the progress of the
177 workflow and the use of resources on the workers.
179 This implementation respects the BPS configuration elements:
181 - ``site.<computeSite>.monitorEnable`` (`bool`): enable monitor?
182 - ``site.<computeSite>.monitorInterval`` (`float`): time interval (sec)
183 between logging of resource usage.
184 - ``site.<computeSite>.monitorFilename`` (`str`): name of file to use
185 for the monitor sqlite database.
187 Returns
188 -------
189 monitor : `MonitoringHub` or `None`
190 Parsl monitor, or `None` for no monitor.
191 """
192 if not get_bps_config_value(self.site, "monitorEnable", bool, False):
193 return None
194 return MonitoringHub(
195 workflow_name=get_workflow_name(self.config),
196 hub_address=self.get_address(),
197 resource_monitoring_interval=get_bps_config_value(self.site, "monitorInterval", float, 30.0),
198 logging_endpoint="sqlite:///"
199 + get_bps_config_value(self.site, "monitorFilename", str, "monitor.sqlite"),
200 )
202 def get_parsl_config(self) -> parsl.config.Config:
203 """Get Parsl configuration for this site.
205 Subclasses can overwrite this method to build a more specific Parsl
206 configuration, if required.
208 The retries are set from the ``site.<computeSite>.retries`` value
209 found in the BPS configuration file.
211 Returns
212 -------
213 config : `parsl.config.Config`
214 The configuration to be used for Parsl.
215 """
216 executors = self.get_executors()
217 monitor = self.get_monitor()
218 retries = get_bps_config_value(self.site, "retries", int, 1)
219 # Path to Parsl run directory. The default set by Parsl is
220 # 'runinfo' which is not explicit enough for end users given that
221 # we are using BPS + Parsl + Slurm to execute a workflow.
222 run_dir = get_bps_config_value(self.site, "run_dir", str, "runinfo")
223 return parsl.config.Config(
224 executors=executors,
225 monitoring=monitor,
226 retries=retries,
227 run_dir=run_dir,
228 checkpoint_mode="task_exit",
229 )