Coverage for python/lsst/ctrl/bps/generic_workflow.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from typing import Optional
33import networkx as nx
35from lsst.daf.butler.core.utils import iterable
36from .bps_draw import draw_networkx_dot
38_LOG = logging.getLogger(__name__)
41@dataclasses.dataclass
42class GenericWorkflowFile:
43 """Information about a file that may be needed by various workflow
44 management services.
45 """
46 name: str
47 """Lookup key (logical file name) of file/directory. Must be unique
48 within run.
49 """
51 src_uri: str or None # don't know that need ButlerURI
52 """Original location of file/directory.
53 """
55 wms_transfer: bool
56 """Whether the WMS should ignore file or not. Default is False.
57 """
59 job_access_remote: bool
60 """Whether the job can remotely access file (using separately specified
61 file access protocols). Default is False.
62 """
64 job_shared: bool
65 """Whether job requires its own copy of this file. Default is False.
66 """
68 # As of python 3.7.8, can't use __slots__ + dataclass if give default
69 # values, so writing own __init__.
70 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
71 job_access_remote: bool = False, job_shared: bool = False):
72 self.name = name
73 self.src_uri = src_uri
74 self.wms_transfer = wms_transfer
75 self.job_access_remote = job_access_remote
76 self.job_shared = job_shared
78 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
80 def __hash__(self):
81 return hash(self.name)
84@dataclasses.dataclass
85class GenericWorkflowExec:
86 """Information about an executable that may be needed by various workflow
87 management services.
88 """
89 name: str
90 """Lookup key (logical file name) of executable. Must be unique
91 within run.
92 """
94 src_uri: str or None # don't know that need ButlerURI
95 """Original location of executable.
96 """
98 transfer_executable: bool
99 """Whether the WMS/plugin is responsible for staging executable to
100 location usable by job.
101 """
103 # As of python 3.7.8, can't use __slots__ + dataclass if give default
104 # values, so writing own __init__.
105 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
106 self.name = name
107 self.src_uri = src_uri
108 self.transfer_executable = transfer_executable
110 __slots__ = ("name", "src_uri", "transfer_executable")
112 def __hash__(self):
113 return hash(self.name)
116@dataclasses.dataclass
117class GenericWorkflowJob:
118 """Information about a job that may be needed by various workflow
119 management services.
120 """
121 name: str
122 """Name of job. Must be unique within workflow.
123 """
125 label: Optional[str]
126 """Primary user-facing label for job. Does not need to be unique
127 and may be used for summary reports.
128 """
130 tags: Optional[dict]
131 """Other key/value pairs for job that user may want to use as a filter.
132 """
134 executable: Optional[GenericWorkflowExec]
135 """Executable for job.
136 """
138 arguments: Optional[str]
139 """Command line arguments for job.
140 """
142 cmdvals: Optional[dict]
143 """Values for variables in cmdline when using lazy command line creation.
144 """
146 request_memory: Optional[int] # MB
147 """Max memory (in MB) that the job is expected to need.
148 """
150 request_cpus: Optional[int] # cores
151 """Max number of cpus that the job is expected to need.
152 """
154 request_disk: Optional[int] # MB
155 """Max amount of job scratch disk (in MB) that the job is expected to need.
156 """
158 request_walltime: Optional[str] # minutes
159 """Max amount of time (in seconds) that the job is expected to need.
160 """
162 compute_site: Optional[str]
163 """Key to look up site-specific information for running the job.
164 """
166 mail_to: Optional[str]
167 """Comma separated list of email addresses for emailing job status.
168 """
170 when_to_mail: Optional[str]
171 """WMS-specific terminology for when to email job status.
172 """
174 number_of_retries: Optional[int]
175 """Number of times to automatically retry a failed job.
176 """
178 retry_unless_exit: Optional[int]
179 """Exit code for job that means to not automatically retry.
180 """
182 abort_on_value: Optional[int]
183 """Job exit value for signals to abort the entire workflow.
184 """
186 abort_return_value: Optional[int]
187 """Exit value to use when aborting the entire workflow.
188 """
190 priority: Optional[str]
191 """Initial priority of job in WMS-format.
192 """
194 category: Optional[str]
195 """WMS-facing label of job within single workflow (e.g., can be used for
196 throttling jobs within a single workflow).
197 """
199 concurrency_limit: Optional[list]
200 """Names of concurrency limits that the WMS plugin can appropriately
201 translate to limit the number of this job across all running workflows.
202 """
204 queue: Optional[str]
205 """Name of queue to use. Different WMS can translate
206 this concept differently.
207 """
209 pre_cmdline: Optional[str]
210 """Command line to be executed prior to executing job.
211 """
213 post_cmdline: Optional[str]
214 """Command line to be executed after job executes.
216 Should be executed regardless of exit status.
217 """
219 profile: Optional[dict]
220 """Nested dictionary of WMS-specific key/value pairs with primary key being
221 WMS key (e.g., pegasus, condor, panda).
222 """
224 attrs: Optional[dict]
225 """Key/value pairs of job attributes (for WMS that have attributes in
226 addition to commands).
227 """
229 environment: Optional[dict]
230 """Environment variable names and values to be explicitly set inside job.
231 """
233 # As of python 3.7.8, can't use __slots__ if give default values, so
234 # writing own __init__.
235 def __init__(self, name: str):
236 self.name = name
237 self.label = None
238 self.tags = {}
239 self.executable = None
240 self.arguments = None
241 self.cmdvals = {}
242 self.request_memory = None
243 self.request_cpus = None
244 self.request_disk = None
245 self.request_walltime = None
246 self.compute_site = None
247 self.mail_to = None
248 self.when_to_mail = None
249 self.number_of_retries = None
250 self.retry_unless_exit = None
251 self.abort_on_value = None
252 self.abort_return_value = None
253 self.priority = None
254 self.category = None
255 self.concurrency_limit = []
256 self.queue = None
257 self.pre_cmdline = None
258 self.post_cmdline = None
259 self.profile = {}
260 self.attrs = {}
261 self.environment = {}
263 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail",
264 "executable", "arguments", "cmdvals",
265 "request_memory", "request_cpus", "request_disk", "request_walltime",
266 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
267 "compute_site", "environment", "priority", "category", "concurrency_limit",
268 "queue", "pre_cmdline", "post_cmdline", "profile", "attrs")
270 def __hash__(self):
271 return hash(self.name)
274class GenericWorkflow(nx.DiGraph):
275 """A generic representation of a workflow used to submit to specific
276 workflow management systems.
278 Parameters
279 ----------
280 name : `str`
281 Name of generic workflow.
282 incoming_graph_data : `Any`, optional
283 Data used to initialized graph that is passed through to nx.DiGraph
284 constructor. Can be any type supported by networkx.DiGraph.
285 attr : `dict`
286 Keyword arguments passed through to nx.DiGraph constructor.
287 """
288 def __init__(self, name, incoming_graph_data=None, **attr):
289 super().__init__(incoming_graph_data, **attr)
290 self._name = name
291 self.run_attrs = {}
292 self._files = {}
293 self._executables = {}
294 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
295 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
296 self.run_id = None
297 self._final = None
299 @property
300 def name(self):
301 """Retrieve name of generic workflow.
303 Returns
304 -------
305 name : `str`
306 Name of generic workflow.
307 """
308 return self._name
310 def get_files(self, data=False, transfer_only=True):
311 """Retrieve files from generic workflow.
313 Need API in case change way files are stored (e.g., make
314 workflow a bipartite graph with jobs and files nodes).
316 Parameters
317 ----------
318 data : `bool`, optional
319 Whether to return the file data as well as the file object name.
320 (The defaults is False.)
321 transfer_only : `bool`, optional
322 Whether to only return files for which a workflow management system
323 would be responsible for transferring.
325 Returns
326 -------
327 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
328 File names or objects from generic workflow meeting specifications.
329 """
330 files = []
331 for filename, file in self._files.items():
332 if not transfer_only or file.wms_transfer:
333 if not data:
334 files.append(filename)
335 else:
336 files.append(file)
337 return files
339 def add_job(self, job, parent_names=None, child_names=None):
340 """Add job to generic workflow.
342 Parameters
343 ----------
344 job : `lsst.ctrl.bps.GenericWorkflowJob`
345 Job to add to the generic workflow.
346 parent_names : `list` [`str`], optional
347 Names of jobs that are parents of given job
348 child_names : `list` [`str`], optional
349 Names of jobs that are children of given job
350 """
351 if not isinstance(job, GenericWorkflowJob):
352 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
353 if self.has_node(job.name):
354 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
355 super().add_node(job.name, job=job)
356 self.add_job_relationships(parent_names, job.name)
357 self.add_job_relationships(job.name, child_names)
358 self.add_executable(job.executable)
360 def add_node(self, node_for_adding, **attr):
361 """Override networkx function to call more specific add_job function.
363 Parameters
364 ----------
365 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
366 Job to be added to generic workflow.
367 attr :
368 Needed to match original networkx function, but not used.
369 """
370 self.add_job(node_for_adding)
372 def add_job_relationships(self, parents, children):
373 """Add dependencies between parent and child jobs. All parents will
374 be connected to all children.
376 Parameters
377 ----------
378 parents : `list` [`str`]
379 Parent job names.
380 children : `list` [`str`]
381 Children job names.
382 """
383 if parents is not None and children is not None:
384 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
386 def add_edges_from(self, ebunch_to_add, **attr):
387 """Add several edges between jobs in the generic workflow.
389 Parameters
390 ----------
391 ebunch_to_add : Iterable [`tuple`]
392 Iterable of job name pairs between which a dependency should be
393 saved.
394 attr : keyword arguments, optional
395 Data can be assigned using keyword arguments (not currently used).
396 """
397 for edge_to_add in ebunch_to_add:
398 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
400 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
401 """Add edge connecting jobs in workflow.
403 Parameters
404 ----------
405 u_of_edge : `str`
406 Name of parent job.
407 v_of_edge : `str`
408 Name of child job.
409 attr : keyword arguments, optional
410 Attributes to save with edge.
411 """
412 if u_of_edge not in self:
413 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
414 if v_of_edge not in self:
415 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
416 super().add_edge(u_of_edge, v_of_edge, **attr)
418 def get_job(self, job_name: str):
419 """Retrieve job by name from workflow.
421 Parameters
422 ----------
423 job_name : `str`
424 Name of job to retrieve.
426 Returns
427 -------
428 job : `lsst.ctrl.bps.GenericWorkflowJob`
429 Job matching given job_name.
430 """
431 return self.nodes[job_name]["job"]
433 def del_job(self, job_name: str):
434 """Delete job from generic workflow leaving connected graph.
436 Parameters
437 ----------
438 job_name : `str`
439 Name of job to delete from workflow.
440 """
441 # Connect all parent jobs to all children jobs.
442 parents = self.predecessors(job_name)
443 children = self.successors(job_name)
444 self.add_job_relationships(parents, children)
446 # Delete job node (which deleted edges).
447 self.remove_node(job_name)
449 def add_job_inputs(self, job_name, files):
450 """Add files as inputs to specified job.
452 Parameters
453 ----------
454 job_name : `str`
455 Name of job to which inputs should be added
456 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
457 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
458 File object(s) to be added as inputs to the specified job.
459 """
460 self._inputs.setdefault(job_name, [])
461 for file in iterable(files):
462 # Save the central copy
463 if file.name not in self._files:
464 self._files[file.name] = file
466 # Save the job reference to the file
467 self._inputs[job_name].append(file)
469 def get_file(self, name):
470 """Retrieve a file object by name.
472 Parameters
473 ----------
474 name : `str`
475 Name of file object
477 Returns
478 -------
479 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
480 File matching given name.
481 """
482 return self._files[name]
484 def add_file(self, gwfile):
485 """Add file object.
487 Parameters
488 ----------
489 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
490 File object to add to workflow
491 """
492 if gwfile.name not in self._files:
493 self._files[gwfile.name] = gwfile
494 else:
495 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
497 def get_job_inputs(self, job_name, data=True, transfer_only=False):
498 """Return the input files for the given job.
500 Parameters
501 ----------
502 job_name : `str`
503 Name of the job.
504 data : `bool`, optional
505 Whether to return the file data as well as the file object name.
506 transfer_only : `bool`, optional
507 Whether to only return files for which a workflow management system
508 would be responsible for transferring.
510 Returns
511 -------
512 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
513 Input files for the given job. If no input files for the job,
514 returns an empty list.
515 """
516 inputs = []
517 if job_name in self._inputs:
518 for gwfile in self._inputs[job_name]:
519 if not transfer_only or gwfile.wms_transfer:
520 if not data:
521 inputs.append(gwfile.name)
522 else:
523 inputs.append(gwfile)
524 return inputs
526 def add_job_outputs(self, job_name, files):
527 """Add output files to a job.
529 Parameters
530 ----------
531 job_name : `str`
532 Name of job to which the files should be added as outputs.
533 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
534 File objects to be added as outputs for specified job.
535 """
536 self._outputs.setdefault(job_name, [])
538 for file_ in iterable(files):
539 # Save the central copy
540 if file_.name not in self._files:
541 self._files[file_.name] = file_
543 # Save the job reference to the file
544 self._outputs[job_name].append(file_)
546 def get_job_outputs(self, job_name, data=True, transfer_only=False):
547 """Return the output files for the given job.
549 Parameters
550 ----------
551 job_name : `str`
552 Name of the job.
553 data : `bool`
554 Whether to return the file data as well as the file object name.
555 It defaults to `True` thus returning file data as well.
556 transfer_only : `bool`
557 Whether to only return files for which a workflow management system
558 would be responsible for transferring. It defaults to `False` thus
559 returning all output files.
561 Returns
562 -------
563 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
564 Output files for the given job. If no output files for the job,
565 returns an empty list.
566 """
567 outputs = []
569 if job_name in self._outputs:
570 for file_name in self._outputs[job_name]:
571 file = self._files[file_name]
572 if not transfer_only or file.wms_transfer:
573 if not data:
574 outputs.append(file_name)
575 else:
576 outputs.append(self._files[file_name])
577 return outputs
579 def draw(self, stream, format_="dot"):
580 """Output generic workflow in a visualization format.
582 Parameters
583 ----------
584 stream : `str` or `io.BufferedIOBase`
585 Stream to which the visualization should be written.
586 format_ : `str`, optional
587 Which visualization format to use. It defaults to the format for
588 the dot program.
589 """
590 draw_funcs = {"dot": draw_networkx_dot}
591 if format_ in draw_funcs:
592 draw_funcs[format_](self, stream)
593 else:
594 raise RuntimeError(f"Unknown draw format ({format_}")
596 def save(self, stream, format_="pickle"):
597 """Save the generic workflow in a format that is loadable.
599 Parameters
600 ----------
601 stream : `str` or `io.BufferedIOBase`
602 Stream to pass to the format-specific writer. Accepts anything
603 that the writer accepts.
605 format_ : `str`, optional
606 Format in which to write the data. It defaults to pickle format.
607 """
608 if format_ == "pickle":
609 nx.write_gpickle(self, stream)
610 else:
611 raise RuntimeError(f"Unknown format ({format_})")
613 @classmethod
614 def load(cls, stream, format_="pickle"):
615 """Load a GenericWorkflow from the given stream
617 Parameters
618 ----------
619 stream : `str` or `io.BufferedIOBase`
620 Stream to pass to the format-specific loader. Accepts anything that
621 the loader accepts.
622 format_ : `str`, optional
623 Format of data to expect when loading from stream. It defaults
624 to pickle format.
626 Returns
627 -------
628 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
629 Generic workflow loaded from the given stream
630 """
631 if format_ == "pickle":
632 return nx.read_gpickle(stream)
634 raise RuntimeError(f"Unknown format ({format_})")
636 def validate(self):
637 """Run checks to ensure this is still a valid generic workflow graph.
638 """
639 # Make sure a directed acyclic graph
640 assert nx.algorithms.dag.is_directed_acyclic_graph(self)
642 def add_workflow_source(self, workflow):
643 """Add given workflow as new source to this workflow.
645 Parameters
646 ----------
647 workflow : `lsst.ctrl.bps.GenericWorkflow`
648 """
649 # Find source nodes in self.
650 self_sources = [n for n in self if self.in_degree(n) == 0]
651 _LOG.debug("self_sources = %s", self_sources)
653 # Find sink nodes of workflow.
654 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
655 _LOG.debug("new sinks = %s", new_sinks)
657 # Add new workflow nodes to self graph and make new edges.
658 self.add_nodes_from(workflow.nodes(data=True))
659 self.add_edges_from(workflow.edges())
660 for source in self_sources:
661 for sink in new_sinks:
662 self.add_edge(sink, source)
664 # Files are stored separately so copy them.
665 for job_name in workflow:
666 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
667 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
668 self.add_executable(workflow.get_job(job_name).executable)
670 def add_final(self, final):
671 """Add special final job/workflow to the generic workflow.
673 Parameters
674 ----------
675 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
676 `lsst.ctrl.bps.GenericWorkflow`
677 Information needed to execute the special final job(s), the
678 job(s) to be executed after all jobs that can be executed
679 have been executed regardless of exit status of any of the
680 jobs.
681 """
682 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
683 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
685 self._final = final
686 if isinstance(final, GenericWorkflowJob):
687 self.add_executable(final.executable)
689 def get_final(self):
690 """Return job/workflow to be executed after all jobs that can be
691 executed have been executed regardless of exit status of any of
692 the jobs.
694 Returns
695 -------
696 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
697 `lsst.ctrl.bps.GenericWorkflow`
698 Information needed to execute final job(s).
699 """
700 return self._final
702 def add_executable(self, executable):
703 """Add executable to workflow's list of executables.
705 Parameters
706 ----------
707 executable : `lsst.ctrl.bps.GenericWorkflowExec`
708 Executable object to be added to workflow.
709 """
710 if executable is not None:
711 self._executables[executable.name] = executable
712 else:
713 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
715 def get_executables(self, data=False, transfer_only=True):
716 """Retrieve executables from generic workflow.
718 Parameters
719 ----------
720 data : `bool`, optional
721 Whether to return the executable data as well as the exec object
722 name. (The defaults is False.)
723 transfer_only : `bool`, optional
724 Whether to only return executables for which transfer_executable
725 is True.
727 Returns
728 -------
729 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
730 Filtered executable names or objects from generic workflow.
731 """
732 execs = []
733 for name, executable in self._executables.items():
734 if not transfer_only or executable.transfer_executable:
735 if not data:
736 execs.append(name)
737 else:
738 execs.append(executable)
739 return execs