Coverage for python/lsst/ctrl/bps/wms_service.py: 81%
73 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-07 04:16 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-07 04:16 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Base classes for working with a specific WMS"""
25__all__ = [
26 "BaseWmsService",
27 "BaseWmsWorkflow",
28 "WmsJobReport",
29 "WmsRunReport",
30 "WmsStates",
31]
34import dataclasses
35import logging
36from abc import ABCMeta
37from enum import Enum
39_LOG = logging.getLogger(__name__)
42class WmsStates(Enum):
43 """Run and job states"""
45 UNKNOWN = 0 # Can't determine state
46 MISFIT = 1 # Determined state, but doesn't fit other states
47 UNREADY = 2 # Still waiting for parents to finish
48 READY = 3 # All of its parents have finished successfully
49 PENDING = 4 # Ready to run, visible in batch queue
50 RUNNING = 5 # Currently running
51 DELETED = 6 # In the process of being deleted or already deleted
52 HELD = 7 # In a hold state
53 SUCCEEDED = 8 # Have completed with success status
54 FAILED = 9 # Have completed with non-success status
57@dataclasses.dataclass
58class WmsJobReport:
59 """WMS job information to be included in detailed report output"""
61 wms_id: str
62 name: str
63 label: str
64 state: WmsStates
66 __slots__ = ("wms_id", "name", "label", "state")
69@dataclasses.dataclass
70class WmsRunReport:
71 """WMS run information to be included in detailed report output"""
73 wms_id: str
74 global_wms_id: str
75 path: str
76 label: str
77 run: str
78 project: str
79 campaign: str
80 payload: str
81 operator: str
82 run_summary: str
83 state: WmsStates
84 jobs: list
85 total_number_jobs: int
86 job_state_counts: dict
88 __slots__ = (
89 "wms_id",
90 "global_wms_id",
91 "path",
92 "label",
93 "run",
94 "project",
95 "campaign",
96 "payload",
97 "operator",
98 "run_summary",
99 "state",
100 "total_number_jobs",
101 "jobs",
102 "job_state_counts",
103 )
106class BaseWmsService:
107 """Interface for interactions with a specific WMS.
109 Parameters
110 ----------
111 config : `lsst.ctrl.bps.BpsConfig`
112 Configuration needed by the WMS service.
113 """
115 def __init__(self, config):
116 self.config = config
118 def prepare(self, config, generic_workflow, out_prefix=None):
119 """Create submission for a generic workflow for a specific WMS.
121 Parameters
122 ----------
123 config : `lsst.ctrl.bps.BpsConfig`
124 BPS configuration.
125 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
126 Generic representation of a single workflow
127 out_prefix : `str`
128 Prefix for all WMS output files
130 Returns
131 -------
132 wms_workflow : `BaseWmsWorkflow`
133 Prepared WMS Workflow to submit for execution
134 """
135 raise NotImplementedError
137 def submit(self, workflow):
138 """Submit a single WMS workflow
140 Parameters
141 ----------
142 workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
143 Prepared WMS Workflow to submit for execution
144 """
145 raise NotImplementedError
147 def restart(self, wms_workflow_id):
148 """Restart a workflow from the point of failure.
150 Parameters
151 ----------
152 wms_workflow_id : `str`
153 Id that can be used by WMS service to identify workflow that
154 need to be restarted.
156 Returns
157 -------
158 wms_id : `str`
159 Id of the restarted workflow. If restart failed, it will be set
160 to None.
161 run_name : `str`
162 Name of the restarted workflow. If restart failed, it will be set
163 to None.
164 message : `str`
165 A message describing any issues encountered during the restart.
166 If there were no issue, an empty string is returned.
167 """
168 raise NotImplementedError
170 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False):
171 """Query WMS for list of submitted WMS workflows/jobs.
173 This should be a quick lookup function to create list of jobs for
174 other functions.
176 Parameters
177 ----------
178 wms_id : `int` or `str`, optional
179 Id or path that can be used by WMS service to look up job.
180 user : `str`, optional
181 User whose submitted jobs should be listed.
182 require_bps : `bool`, optional
183 Whether to require jobs returned in list to be bps-submitted jobs.
184 pass_thru : `str`, optional
185 Information to pass through to WMS.
186 is_global : `bool`, optional
187 If set, all available job queues will be queried for job
188 information. Defaults to False which means that only a local job
189 queue will be queried for information.
191 Only applicable in the context of a WMS using distributed job
192 queues (e.g., HTCondor). A WMS with a centralized job queue
193 (e.g. PanDA) can safely ignore it.
195 Returns
196 -------
197 job_ids : `list` [`Any`]
198 Only job ids to be used by cancel and other functions. Typically
199 this means top-level jobs (i.e., not children jobs).
200 """
201 raise NotImplementedError
203 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
204 """Query WMS for status of submitted WMS workflows.
206 Parameters
207 ----------
208 wms_workflow_id : `int` or `str`, optional
209 Id that can be used by WMS service to look up status.
210 user : `str`, optional
211 Limit report to submissions by this particular user.
212 hist : `int`, optional
213 Number of days to expand report to include finished WMS workflows.
214 pass_thru : `str`, optional
215 Additional arguments to pass through to the specific WMS service.
216 is_global : `bool`, optional
217 If set, all available job queues will be queried for job
218 information. Defaults to False which means that only a local job
219 queue will be queried for information.
221 Only applicable in the context of a WMS using distributed job
222 queues (e.g., HTCondor). A WMS with a centralized job queue
223 (e.g. PanDA) can safely ignore it.
225 Returns
226 -------
227 run_reports : `list` [`lsst.ctrl.bps.WmsRunReport`]
228 Status information for submitted WMS workflows.
229 message : `str`
230 Message to user on how to find more status information specific to
231 this particular WMS.
232 """
233 raise NotImplementedError
235 def cancel(self, wms_id, pass_thru=None):
236 """Cancel submitted workflows/jobs.
238 Parameters
239 ----------
240 wms_id : `str`
241 ID or path of job that should be canceled.
242 pass_thru : `str`, optional
243 Information to pass through to WMS.
245 Returns
246 -------
247 deleted : `bool`
248 Whether successful deletion or not. Currently, if any doubt or any
249 individual jobs not deleted, return False.
250 message : `str`
251 Any message from WMS (e.g., error details).
252 """
253 raise NotImplementedError
255 def run_submission_checks(self):
256 """Checks to run at start if running WMS specific submission steps.
258 Any exception other than NotImplementedError will halt submission.
259 Submit directory may not yet exist when this is called.
260 """
261 raise NotImplementedError
263 def ping(self, pass_thru):
264 """Checks whether WMS services are up, reachable, and can authenticate
265 if authentication is required.
267 The services to be checked are those needed for submit, report, cancel,
268 restart, but ping cannot guarantee whether jobs would actually run
269 successfully.
271 Parameters
272 ----------
273 pass_thru : `str`, optional
274 Information to pass through to WMS.
276 Returns
277 -------
278 status : `int`
279 0 for success, non-zero for failure
280 message : `str`
281 Any message from WMS (e.g., error details).
282 """
283 raise NotImplementedError
286class BaseWmsWorkflow(metaclass=ABCMeta):
287 """Interface for single workflow specific to a WMS.
289 Parameters
290 ----------
291 name : `str`
292 Unique name of workflow.
293 config : `lsst.ctrl.bps.BpsConfig`
294 Generic workflow config.
295 """
297 def __init__(self, name, config):
298 self.name = name
299 self.config = config
300 self.service_class = None
301 self.run_id = None
302 self.submit_path = None
304 @classmethod
305 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
306 """Create a WMS-specific workflow from a GenericWorkflow
308 Parameters
309 ----------
310 config : `lsst.ctrl.bps.BpsConfig`
311 Configuration values needed for generating a WMS specific workflow.
312 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
313 Generic workflow from which to create the WMS-specific one.
314 out_prefix : `str`
315 Root directory to be used for WMS workflow inputs and outputs
316 as well as internal WMS files.
317 service_class : `str`
318 Full module name of WMS service class that created this workflow.
320 Returns
321 -------
322 wms_workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
323 A WMS specific workflow.
324 """
326 raise NotImplementedError
328 def write(self, out_prefix):
329 """Write WMS files for this particular workflow.
331 Parameters
332 ----------
333 out_prefix : `str`
334 Root directory to be used for WMS workflow inputs and outputs
335 as well as internal WMS files.
336 """
337 raise NotImplementedError