Coverage for python/lsst/ctrl/bps/generic_workflow.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob"]
28import dataclasses
29import itertools
30from typing import Optional
32import networkx as nx
34from lsst.daf.butler.core.utils import iterable
35from .bps_draw import draw_networkx_dot
38@dataclasses.dataclass
39class GenericWorkflowFile:
40 """Information about a file that may be needed by various workflow
41 management services.
42 """
43 name: str
44 """Lookup key (logical file name) of file/directory. Must be unique
45 within run.
46 """
48 src_uri: str or None # don't know that need ButlerURI
49 """Original location of file/directory.
50 """
52 wms_transfer: bool
53 """Whether the WMS should ignore file or not. Default is False.
54 """
56 job_access_remote: bool
57 """Whether the job can remotely access file (using separately specified
58 file access protocols). Default is False.
59 """
61 job_shared: bool
62 """Whether job requires its own copy of this file. Default is False.
63 """
65 # As of python 3.7.8, can't use __slots__ + dataclass if give default
66 # values, so writing own __init__.
67 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
68 job_access_remote: bool = False, job_shared: bool = False):
69 self.name = name
70 self.src_uri = src_uri
71 self.wms_transfer = wms_transfer
72 self.job_access_remote = job_access_remote
73 self.job_shared = job_shared
75 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
77 def __hash__(self):
78 return hash(self.name)
81@dataclasses.dataclass
82class GenericWorkflowJob:
83 """Information about a job that may be needed by various workflow
84 management services.
85 """
86 name: str
87 """Name of job. Must be unique within workflow.
88 """
90 label: Optional[str]
91 """Primary user-facing label for job. Does not need to be unique
92 and may be used for summary reports.
93 """
95 tags: Optional[dict]
96 """Other key/value pairs for job that user may want to use as a filter.
97 """
99 cmdline: Optional[str]
100 """Command line for job.
101 """
103 cmdvals: Optional[dict]
104 """Values for variables in cmdline when using lazy command line creation.
105 """
107 request_memory: Optional[int] # MB
108 """Max memory (in MB) that the job is expected to need.
109 """
111 request_cpus: Optional[int] # cores
112 """Max number of cpus that the job is expected to need.
113 """
115 request_disk: Optional[int] # MB
116 """Max amount of job scratch disk (in MB) that the job is expected to need.
117 """
119 request_walltime: Optional[str] # minutes
120 """Max amount of time (in seconds) that the job is expected to need.
121 """
123 compute_site: Optional[str]
124 """Key to look up site-specific information for running the job.
125 """
127 mail_to: Optional[str]
128 """Comma separated list of email addresses for emailing job status.
129 """
131 when_to_mail: Optional[str]
132 """WMS-specific terminology for when to email job status.
133 """
135 number_of_retries: Optional[int]
136 """Number of times to automatically retry a failed job.
137 """
139 retry_unless_exit: Optional[int]
140 """Exit code for job that means to not automatically retry.
141 """
143 abort_on_value: Optional[int]
144 """Job exit value for signals to abort the entire workflow.
145 """
147 abort_return_value: Optional[int]
148 """Exit value to use when aborting the entire workflow.
149 """
151 priority: Optional[str]
152 """Initial priority of job in WMS-format.
153 """
155 category: Optional[str]
156 """WMS-facing label of job within single workflow (e.g., can be used for
157 throttling jobs within a single workflow).
158 """
160 concurrency_limit: Optional[list]
161 """Names of concurrency limits that the WMS plugin can appropriately
162 translate to limit the number of this job across all running workflows.
163 """
165 pre_cmdline: Optional[str]
166 """Command line to be executed prior to executing job.
167 """
169 post_cmdline: Optional[str]
170 """Command line to be executed after job executes.
172 Should be executed regardless of exit status.
173 """
175 profile: Optional[dict]
176 """Nested dictionary of WMS-specific key/value pairs with primary key being
177 WMS key (e.g., pegasus, condor, panda).
178 """
180 attrs: Optional[dict]
181 """Key/value pairs of job attributes (for WMS that have attributes in
182 addition to commands).
183 """
185 environment: Optional[dict]
186 """Environment variable names and values to be explicitly set inside job.
187 """
189 # As of python 3.7.8, can't use __slots__ if give default values, so
190 # writing own __init__.
191 def __init__(self, name: str):
192 self.name = name
193 self.label = None
194 self.tags = {}
195 self.cmdline = None
196 self.cmdvals = {}
197 self.request_memory = None
198 self.request_cpus = None
199 self.request_disk = None
200 self.request_walltime = None
201 self.compute_site = None
202 self.mail_to = None
203 self.when_to_mail = None
204 self.number_of_retries = None
205 self.retry_unless_exit = None
206 self.abort_on_value = None
207 self.abort_return_value = None
208 self.priority = None
209 self.category = None
210 self.concurrency_limit = []
211 self.pre_cmdline = None
212 self.post_cmdline = None
213 self.profile = {}
214 self.attrs = {}
215 self.environment = {}
217 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail",
218 "cmdline", "cmdvals", "transfer_executable",
219 "request_memory", "request_cpus", "request_disk", "request_walltime",
220 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
221 "compute_site", "environment", "priority", "category", "concurrency_limit",
222 "pre_cmdline", "post_cmdline", "profile", "attrs")
224 def __hash__(self):
225 return hash(self.name)
228class GenericWorkflow(nx.DiGraph):
229 """A generic representation of a workflow used to submit to specific
230 workflow management systems.
232 Parameters
233 ----------
234 name : `str`
235 Name of generic workflow.
236 incoming_graph_data : `Any`, optional
237 Data used to initialized graph that is passed through to nx.DiGraph
238 constructor. Can be any type supported by networkx.DiGraph.
239 attr : `dict`
240 Keyword arguments passed through to nx.DiGraph constructor.
241 """
242 def __init__(self, name, incoming_graph_data=None, **attr):
243 super().__init__(incoming_graph_data, **attr)
244 self._name = name
245 self.run_attrs = {}
246 self._files = {}
247 self.run_id = None
249 @property
250 def name(self):
251 """Retrieve name of generic workflow.
253 Returns
254 -------
255 name : `str`
256 Name of generic workflow.
257 """
258 return self._name
260 def get_files(self, data=False, transfer_only=True):
261 """Retrieve files from generic workflow.
263 Need API in case change way files are stored (e.g., make
264 workflow a bipartite graph with jobs and files nodes).
266 Parameters
267 ----------
268 data : `bool`, optional
269 Whether to return the file data as well as the file object name.
270 (The defaults is False.)
271 transfer_only : `bool`, optional
272 Whether to only return files for which a workflow management system
273 would be responsible for transferring.
275 Returns
276 -------
277 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
278 File names or objects from generic workflow meeting specifications.
279 """
280 files = []
281 for filename, file in self._files.items():
282 if not transfer_only or file.wms_transfer:
283 if not data:
284 files.append(filename)
285 else:
286 files.append(file)
287 return files
289 def add_job(self, job, parent_names=None, child_names=None):
290 """Add job to generic workflow.
292 Parameters
293 ----------
294 job : `lsst.ctrl.bps.GenericWorkflowJob`
295 Job to add to the generic workflow.
296 parent_names : `list` [`str`], optional
297 Names of jobs that are parents of given job
298 child_names : `list` [`str`], optional
299 Names of jobs that are children of given job
300 """
301 if not isinstance(job, GenericWorkflowJob):
302 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
303 if self.has_node(job.name):
304 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
305 super().add_node(job.name, job=job, inputs={}, outputs={})
306 self.add_job_relationships(parent_names, job.name)
307 self.add_job_relationships(job.name, child_names)
309 def add_node(self, node_for_adding, **attr):
310 """Override networkx function to call more specific add_job function.
312 Parameters
313 ----------
314 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
315 Job to be added to generic workflow.
316 attr :
317 Needed to match original networkx function, but not used.
318 """
319 self.add_job(node_for_adding)
321 def add_job_relationships(self, parents, children):
322 """Add dependencies between parent and child jobs. All parents will
323 be connected to all children.
325 Parameters
326 ----------
327 parents : `list` [`str`]
328 Parent job names.
329 children : `list` [`str`]
330 Children job names.
331 """
332 if parents is not None and children is not None:
333 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
335 def add_edges_from(self, ebunch_to_add, **attr):
336 """Add several edges between jobs in the generic workflow.
338 Parameters
339 ----------
340 ebunch_to_add : Iterable [`tuple`]
341 Iterable of job name pairs between which a dependency should be
342 saved.
343 attr : keyword arguments, optional
344 Data can be assigned using keyword arguments (not currently used).
345 """
346 for edge_to_add in ebunch_to_add:
347 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
349 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
350 """Add edge connecting jobs in workflow.
352 Parameters
353 ----------
354 u_of_edge : `str`
355 Name of parent job.
356 v_of_edge : `str`
357 Name of child job.
358 attr : keyword arguments, optional
359 Attributes to save with edge.
360 """
361 if u_of_edge not in self:
362 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
363 if v_of_edge not in self:
364 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
365 super().add_edge(u_of_edge, v_of_edge, **attr)
367 def get_job(self, job_name: str):
368 """Retrieve job by name from workflow.
370 Parameters
371 ----------
372 job_name : `str`
373 Name of job to retrieve.
375 Returns
376 -------
377 job : `lsst.ctrl.bps.GenericWorkflowJob`
378 Job matching given job_name.
379 """
380 return self.nodes[job_name]["job"]
382 def del_job(self, job_name: str):
383 """Delete job from generic workflow leaving connected graph.
385 Parameters
386 ----------
387 job_name : `str`
388 Name of job to delete from workflow.
389 """
390 # Connect all parent jobs to all children jobs.
391 parents = self.predecessors(job_name)
392 children = self.successors(job_name)
393 self.add_job_relationships(parents, children)
395 # Delete job node (which deleted edges).
396 self.remove_node(job_name)
398 def add_job_inputs(self, job_name: str, files):
399 """Add files as inputs to specified job.
401 Parameters
402 ----------
403 job_name : `str`
404 Name of job to which inputs should be added
405 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
406 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
407 File object(s) to be added as inputs to the specified job.
408 """
409 job_inputs = self.nodes[job_name]["inputs"]
410 for file in iterable(files):
411 # Save the central copy
412 if file.name not in self._files:
413 self._files[file.name] = file
415 # Save the job reference to the file
416 job_inputs[file.name] = file
418 def get_file(self, name):
419 """Retrieve a file object by name.
421 Parameters
422 ----------
423 name : `str`
424 Name of file object
426 Returns
427 -------
428 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
429 File matching given name.
430 """
431 return self._files[name]
433 def add_file(self, gwfile):
434 """Add file object.
436 Parameters
437 ----------
438 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
439 File object to add to workflow
440 """
441 if gwfile.name not in self._files:
442 self._files[gwfile.name] = gwfile
444 def get_job_inputs(self, job_name, data=True, transfer_only=False):
445 """Return the input files for the given job.
447 Parameters
448 ----------
449 job_name : `str`
450 Name of the job.
451 data : `bool`, optional
452 Whether to return the file data as well as the file object name.
453 transfer_only : `bool`, optional
454 Whether to only return files for which a workflow management system
455 would be responsible for transferring.
457 Returns
458 -------
459 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
460 Input files for the given job.
461 """
462 job_inputs = self.nodes[job_name]["inputs"]
463 inputs = []
464 for file_name in job_inputs:
465 file = self._files[file_name]
466 if not transfer_only or file.wms_transfer:
467 if not data:
468 inputs.append(file_name)
469 else:
470 inputs.append(self._files[file_name])
471 return inputs
473 def add_job_outputs(self, job_name, files):
474 """Add output files to a job.
476 Parameters
477 ----------
478 job_name : `str`
479 Name of job to which the files should be added as outputs.
480 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
481 File objects to be added as outputs for specified job.
482 """
483 job_outputs = self.nodes[job_name]["outputs"]
484 for file in files:
485 # Save the central copy
486 self.add_file(file.name)
488 # Save the job reference to the file
489 job_outputs[file.name] = file
491 def get_job_outputs(self, job_name, data=True, transfer_only=False):
492 """Return the output files for the given job.
494 Parameters
495 ----------
496 job_name : `str`
497 Name of the job.
498 data : `bool`
499 Whether to return the file data as well as the file object name.
500 It defaults to `True` thus returning file data as well.
501 transfer_only : `bool`
502 Whether to only return files for which a workflow management system
503 would be responsible for transferring. It defaults to `False` thus
504 returning all output files.
506 Returns
507 -------
508 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
509 Output files for the given job.
510 """
511 job_outputs = self.nodes[job_name]["outputs"]
512 outputs = []
513 for file_name in job_outputs:
514 file = self._files[file_name]
515 if not transfer_only or file.wms_transfer:
516 if not data:
517 outputs.append(file_name)
518 else:
519 outputs.append(self._files[file_name])
520 return outputs
522 def draw(self, stream, format_="dot"):
523 """Output generic workflow in a visualization format.
525 Parameters
526 ----------
527 stream : `str` or `io.BufferedIOBase`
528 Stream to which the visualization should be written.
529 format_ : `str`, optional
530 Which visualization format to use. It defaults to the format for
531 the dot program.
532 """
533 draw_funcs = {"dot": draw_networkx_dot}
534 if format_ in draw_funcs:
535 draw_funcs[format_](self, stream)
536 else:
537 raise RuntimeError(f"Unknown draw format ({format_}")
539 def save(self, stream, format_="pickle"):
540 """Save the generic workflow in a format that is loadable.
542 Parameters
543 ----------
544 stream : `str` or `io.BufferedIOBase`
545 Stream to pass to the format-specific writer. Accepts anything
546 that the writer accepts.
548 format_ : `str`, optional
549 Format in which to write the data. It defaults to pickle format.
550 """
551 if format_ == "pickle":
552 nx.write_gpickle(self, stream)
553 else:
554 raise RuntimeError(f"Unknown format ({format_})")
556 @classmethod
557 def load(cls, stream, format_="pickle"):
558 """Load a GenericWorkflow from the given stream
560 Parameters
561 ----------
562 stream : `str` or `io.BufferedIOBase`
563 Stream to pass to the format-specific loader. Accepts anything that
564 the loader accepts.
565 format_ : `str`, optional
566 Format of data to expect when loading from stream. It defaults
567 to pickle format.
569 Returns
570 -------
571 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
572 Generic workflow loaded from the given stream
573 """
574 if format_ == "pickle":
575 return nx.read_gpickle(stream)
577 raise RuntimeError(f"Unknown format ({format_})")
579 def validate(self):
580 """Run checks to ensure this is still a valid generic workflow graph.
581 """
582 # Make sure a directed acyclic graph
583 assert nx.algorithms.dag.is_directed_acyclic_graph(self)