Coverage for python/lsst/ctrl/bps/wms_service.py: 85%
103 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:20 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:20 -0700
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Base classes for working with a specific WMS"""
25__all__ = [
26 "BaseWmsService",
27 "BaseWmsWorkflow",
28 "WmsJobReport",
29 "WmsRunReport",
30 "WmsStates",
31]
34import dataclasses
35import logging
36from abc import ABCMeta
37from enum import Enum
39_LOG = logging.getLogger(__name__)
42class WmsStates(Enum):
43 """Run and job states"""
45 UNKNOWN = 0
46 """Can't determine state."""
48 MISFIT = 1
49 """Determined state, but doesn't fit other states."""
51 UNREADY = 2
52 """Still waiting for parents to finish."""
54 READY = 3
55 """All of its parents have finished successfully."""
57 PENDING = 4
58 """Ready to run, visible in batch queue."""
60 RUNNING = 5
61 """Currently running."""
63 DELETED = 6
64 """In the process of being deleted or already deleted."""
66 HELD = 7
67 """In a hold state."""
69 SUCCEEDED = 8
70 """Have completed with success status."""
72 FAILED = 9
73 """Have completed with non-success status."""
75 PRUNED = 10
76 """At least one of the parents failed or can't be run."""
79@dataclasses.dataclass(slots=True)
80class WmsJobReport:
81 """WMS job information to be included in detailed report output"""
83 wms_id: str
84 """Job id assigned by the workflow management system."""
86 name: str
87 """A name assigned automatically by BPS."""
89 label: str
90 """A user-facing label for a job. Multiple jobs can have the same label."""
92 state: WmsStates
93 """Job's current execution state."""
96@dataclasses.dataclass(slots=True)
97class WmsRunReport:
98 """WMS run information to be included in detailed report output"""
100 wms_id: str = None
101 """Id assigned to the run by the WMS.
102 """
104 global_wms_id: str = None
105 """Global run identification number.
107 Only applicable in the context of a WMS using distributed job queues
108 (e.g., HTCondor).
109 """
111 path: str = None
112 """Path to the submit directory."""
114 label: str = None
115 """Run's label."""
117 run: str = None
118 """Run's name."""
120 project: str = None
121 """Name of the project run belongs to."""
123 campaign: str = None
124 """Name of the campaign the run belongs to."""
126 payload: str = None
127 """Name of the payload."""
129 operator: str = None
130 """Username of the operator who submitted the run."""
132 run_summary: str = None
133 """Job counts per label."""
135 state: WmsStates = None
136 """Run's execution state."""
138 jobs: list[WmsJobReport] = None
139 """Information about individual jobs in the run."""
141 total_number_jobs: int = None
142 """Total number of jobs in the run."""
144 job_state_counts: dict[WmsStates, int] = None
145 """Job counts per state."""
147 job_summary: dict[str, dict[WmsStates, int]] = None
148 """Job counts per label and per state.
149 """
152class BaseWmsService:
153 """Interface for interactions with a specific WMS.
155 Parameters
156 ----------
157 config : `lsst.ctrl.bps.BpsConfig`
158 Configuration needed by the WMS service.
159 """
161 def __init__(self, config):
162 self.config = config
164 def prepare(self, config, generic_workflow, out_prefix=None):
165 """Create submission for a generic workflow for a specific WMS.
167 Parameters
168 ----------
169 config : `lsst.ctrl.bps.BpsConfig`
170 BPS configuration.
171 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
172 Generic representation of a single workflow
173 out_prefix : `str`
174 Prefix for all WMS output files
176 Returns
177 -------
178 wms_workflow : `BaseWmsWorkflow`
179 Prepared WMS Workflow to submit for execution
180 """
181 raise NotImplementedError
183 def submit(self, workflow):
184 """Submit a single WMS workflow
186 Parameters
187 ----------
188 workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
189 Prepared WMS Workflow to submit for execution
190 """
191 raise NotImplementedError
193 def restart(self, wms_workflow_id):
194 """Restart a workflow from the point of failure.
196 Parameters
197 ----------
198 wms_workflow_id : `str`
199 Id that can be used by WMS service to identify workflow that
200 need to be restarted.
202 Returns
203 -------
204 wms_id : `str`
205 Id of the restarted workflow. If restart failed, it will be set
206 to None.
207 run_name : `str`
208 Name of the restarted workflow. If restart failed, it will be set
209 to None.
210 message : `str`
211 A message describing any issues encountered during the restart.
212 If there were no issue, an empty string is returned.
213 """
214 raise NotImplementedError
216 def list_submitted_jobs(self, wms_id=None, user=None, require_bps=True, pass_thru=None, is_global=False):
217 """Query WMS for list of submitted WMS workflows/jobs.
219 This should be a quick lookup function to create list of jobs for
220 other functions.
222 Parameters
223 ----------
224 wms_id : `int` or `str`, optional
225 Id or path that can be used by WMS service to look up job.
226 user : `str`, optional
227 User whose submitted jobs should be listed.
228 require_bps : `bool`, optional
229 Whether to require jobs returned in list to be bps-submitted jobs.
230 pass_thru : `str`, optional
231 Information to pass through to WMS.
232 is_global : `bool`, optional
233 If set, all available job queues will be queried for job
234 information. Defaults to False which means that only a local job
235 queue will be queried for information.
237 Only applicable in the context of a WMS using distributed job
238 queues (e.g., HTCondor). A WMS with a centralized job queue
239 (e.g. PanDA) can safely ignore it.
241 Returns
242 -------
243 job_ids : `list` [`Any`]
244 Only job ids to be used by cancel and other functions. Typically
245 this means top-level jobs (i.e., not children jobs).
246 """
247 raise NotImplementedError
249 def report(self, wms_workflow_id=None, user=None, hist=0, pass_thru=None, is_global=False):
250 """Query WMS for status of submitted WMS workflows.
252 Parameters
253 ----------
254 wms_workflow_id : `int` or `str`, optional
255 Id that can be used by WMS service to look up status.
256 user : `str`, optional
257 Limit report to submissions by this particular user.
258 hist : `int`, optional
259 Number of days to expand report to include finished WMS workflows.
260 pass_thru : `str`, optional
261 Additional arguments to pass through to the specific WMS service.
262 is_global : `bool`, optional
263 If set, all available job queues will be queried for job
264 information. Defaults to False which means that only a local job
265 queue will be queried for information.
267 Only applicable in the context of a WMS using distributed job
268 queues (e.g., HTCondor). A WMS with a centralized job queue
269 (e.g. PanDA) can safely ignore it.
271 Returns
272 -------
273 run_reports : `list` [`lsst.ctrl.bps.WmsRunReport`]
274 Status information for submitted WMS workflows.
275 message : `str`
276 Message to user on how to find more status information specific to
277 this particular WMS.
278 """
279 raise NotImplementedError
281 def cancel(self, wms_id, pass_thru=None):
282 """Cancel submitted workflows/jobs.
284 Parameters
285 ----------
286 wms_id : `str`
287 ID or path of job that should be canceled.
288 pass_thru : `str`, optional
289 Information to pass through to WMS.
291 Returns
292 -------
293 deleted : `bool`
294 Whether successful deletion or not. Currently, if any doubt or any
295 individual jobs not deleted, return False.
296 message : `str`
297 Any message from WMS (e.g., error details).
298 """
299 raise NotImplementedError
301 def run_submission_checks(self):
302 """Checks to run at start if running WMS specific submission steps.
304 Any exception other than NotImplementedError will halt submission.
305 Submit directory may not yet exist when this is called.
306 """
307 raise NotImplementedError
309 def ping(self, pass_thru):
310 """Checks whether WMS services are up, reachable, and can authenticate
311 if authentication is required.
313 The services to be checked are those needed for submit, report, cancel,
314 restart, but ping cannot guarantee whether jobs would actually run
315 successfully.
317 Parameters
318 ----------
319 pass_thru : `str`, optional
320 Information to pass through to WMS.
322 Returns
323 -------
324 status : `int`
325 0 for success, non-zero for failure
326 message : `str`
327 Any message from WMS (e.g., error details).
328 """
329 raise NotImplementedError
332class BaseWmsWorkflow(metaclass=ABCMeta):
333 """Interface for single workflow specific to a WMS.
335 Parameters
336 ----------
337 name : `str`
338 Unique name of workflow.
339 config : `lsst.ctrl.bps.BpsConfig`
340 Generic workflow config.
341 """
343 def __init__(self, name, config):
344 self.name = name
345 self.config = config
346 self.service_class = None
347 self.run_id = None
348 self.submit_path = None
350 @classmethod
351 def from_generic_workflow(cls, config, generic_workflow, out_prefix, service_class):
352 """Create a WMS-specific workflow from a GenericWorkflow
354 Parameters
355 ----------
356 config : `lsst.ctrl.bps.BpsConfig`
357 Configuration values needed for generating a WMS specific workflow.
358 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
359 Generic workflow from which to create the WMS-specific one.
360 out_prefix : `str`
361 Root directory to be used for WMS workflow inputs and outputs
362 as well as internal WMS files.
363 service_class : `str`
364 Full module name of WMS service class that created this workflow.
366 Returns
367 -------
368 wms_workflow : `lsst.ctrl.bps.BaseWmsWorkflow`
369 A WMS specific workflow.
370 """
372 raise NotImplementedError
374 def write(self, out_prefix):
375 """Write WMS files for this particular workflow.
377 Parameters
378 ----------
379 out_prefix : `str`
380 Root directory to be used for WMS workflow inputs and outputs
381 as well as internal WMS files.
382 """
383 raise NotImplementedError