Coverage for python/lsst/ctrl/bps/generic_workflow.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from typing import Optional
32from collections import Counter
34from networkx import DiGraph, read_gpickle, write_gpickle, topological_sort
35from networkx.algorithms.dag import is_directed_acyclic_graph
37from lsst.utils.iteration import ensure_iterable
38from .bps_draw import draw_networkx_dot
40_LOG = logging.getLogger(__name__)
43@dataclasses.dataclass
44class GenericWorkflowFile:
45 """Information about a file that may be needed by various workflow
46 management services.
47 """
48 name: str
49 """Lookup key (logical file name) of file/directory. Must be unique
50 within run.
51 """
53 src_uri: str or None # don't know that need ResourcePath
54 """Original location of file/directory.
55 """
57 wms_transfer: bool
58 """Whether the WMS should ignore file or not. Default is False.
59 """
61 job_access_remote: bool
62 """Whether the job can remotely access file (using separately specified
63 file access protocols). Default is False.
64 """
66 job_shared: bool
67 """Whether job requires its own copy of this file. Default is False.
68 """
70 # As of python 3.7.8, can't use __slots__ + dataclass if give default
71 # values, so writing own __init__.
72 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
73 job_access_remote: bool = False, job_shared: bool = False):
74 self.name = name
75 self.src_uri = src_uri
76 self.wms_transfer = wms_transfer
77 self.job_access_remote = job_access_remote
78 self.job_shared = job_shared
80 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
82 def __hash__(self):
83 return hash(self.name)
86@dataclasses.dataclass
87class GenericWorkflowExec:
88 """Information about an executable that may be needed by various workflow
89 management services.
90 """
91 name: str
92 """Lookup key (logical file name) of executable. Must be unique
93 within run.
94 """
96 src_uri: str or None # don't know that need ResourcePath
97 """Original location of executable.
98 """
100 transfer_executable: bool
101 """Whether the WMS/plugin is responsible for staging executable to
102 location usable by job.
103 """
105 # As of python 3.7.8, can't use __slots__ + dataclass if give default
106 # values, so writing own __init__.
107 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
108 self.name = name
109 self.src_uri = src_uri
110 self.transfer_executable = transfer_executable
112 __slots__ = ("name", "src_uri", "transfer_executable")
114 def __hash__(self):
115 return hash(self.name)
118@dataclasses.dataclass
119class GenericWorkflowJob:
120 """Information about a job that may be needed by various workflow
121 management services.
122 """
123 name: str
124 """Name of job. Must be unique within workflow.
125 """
127 label: Optional[str]
128 """Primary user-facing label for job. Does not need to be unique
129 and may be used for summary reports.
130 """
132 quanta_counts: Optional[Counter]
133 """Counts of quanta per task label in job.
134 """
136 tags: Optional[dict]
137 """Other key/value pairs for job that user may want to use as a filter.
138 """
140 executable: Optional[GenericWorkflowExec]
141 """Executable for job.
142 """
144 arguments: Optional[str]
145 """Command line arguments for job.
146 """
148 cmdvals: Optional[dict]
149 """Values for variables in cmdline when using lazy command line creation.
150 """
152 memory_multiplier: Optional[float]
153 """Memory growth rate between retries.
154 """
156 request_memory: Optional[int] # MB
157 """Max memory (in MB) that the job is expected to need.
158 """
160 request_memory_max: Optional[int] # MB
161 """Max memory (in MB) that the job should ever use.
162 """
164 request_cpus: Optional[int] # cores
165 """Max number of cpus that the job is expected to need.
166 """
168 request_disk: Optional[int] # MB
169 """Max amount of job scratch disk (in MB) that the job is expected to need.
170 """
172 request_walltime: Optional[str] # minutes
173 """Max amount of time (in seconds) that the job is expected to need.
174 """
176 compute_site: Optional[str]
177 """Key to look up site-specific information for running the job.
178 """
180 mail_to: Optional[str]
181 """Comma separated list of email addresses for emailing job status.
182 """
184 when_to_mail: Optional[str]
185 """WMS-specific terminology for when to email job status.
186 """
188 number_of_retries: Optional[int]
189 """Number of times to automatically retry a failed job.
190 """
192 retry_unless_exit: Optional[int]
193 """Exit code for job that means to not automatically retry.
194 """
196 abort_on_value: Optional[int]
197 """Job exit value for signals to abort the entire workflow.
198 """
200 abort_return_value: Optional[int]
201 """Exit value to use when aborting the entire workflow.
202 """
204 priority: Optional[str]
205 """Initial priority of job in WMS-format.
206 """
208 category: Optional[str]
209 """WMS-facing label of job within single workflow (e.g., can be used for
210 throttling jobs within a single workflow).
211 """
213 concurrency_limit: Optional[str]
214 """Names of concurrency limits that the WMS plugin can appropriately
215 translate to limit the number of this job across all running workflows.
216 """
218 queue: Optional[str]
219 """Name of queue to use. Different WMS can translate this concept
220 differently.
221 """
223 pre_cmdline: Optional[str]
224 """Command line to be executed prior to executing job.
225 """
227 post_cmdline: Optional[str]
228 """Command line to be executed after job executes.
230 Should be executed regardless of exit status.
231 """
233 preemptible: Optional[bool]
234 """The flag indicating whether the job can be preempted.
235 """
237 profile: Optional[dict]
238 """Nested dictionary of WMS-specific key/value pairs with primary key being
239 WMS key (e.g., pegasus, condor, panda).
240 """
242 attrs: Optional[dict]
243 """Key/value pairs of job attributes (for WMS that have attributes in
244 addition to commands).
245 """
247 environment: Optional[dict]
248 """Environment variable names and values to be explicitly set inside job.
249 """
251 # As of python 3.7.8, can't use __slots__ if give default values, so
252 # writing own __init__.
253 def __init__(self, name: str):
254 self.name = name
255 self.label = None
256 self.quanta_counts = Counter()
257 self.tags = {}
258 self.executable = None
259 self.arguments = None
260 self.cmdvals = {}
261 self.memory_multiplier = None
262 self.request_memory = None
263 self.request_memory_max = None
264 self.request_cpus = None
265 self.request_disk = None
266 self.request_walltime = None
267 self.compute_site = None
268 self.mail_to = None
269 self.when_to_mail = None
270 self.number_of_retries = None
271 self.retry_unless_exit = None
272 self.abort_on_value = None
273 self.abort_return_value = None
274 self.priority = None
275 self.category = None
276 self.concurrency_limit = None
277 self.queue = None
278 self.pre_cmdline = None
279 self.post_cmdline = None
280 self.preemptible = None
281 self.profile = {}
282 self.attrs = {}
283 self.environment = {}
285 __slots__ = ("name", "label", "quanta_counts", "tags", "mail_to", "when_to_mail",
286 "executable", "arguments", "cmdvals",
287 "memory_multiplier", "request_memory", "request_memory_max", "request_cpus", "request_disk",
288 "request_walltime", "number_of_retries", "retry_unless_exit", "abort_on_value",
289 "abort_return_value", "compute_site", "environment", "priority", "category",
290 "concurrency_limit", "queue", "pre_cmdline", "post_cmdline", "preemptible", "profile",
291 "attrs")
293 def __hash__(self):
294 return hash(self.name)
297class GenericWorkflow(DiGraph):
298 """A generic representation of a workflow used to submit to specific
299 workflow management systems.
301 Parameters
302 ----------
303 name : `str`
304 Name of generic workflow.
305 incoming_graph_data : `Any`, optional
306 Data used to initialized graph that is passed through to DiGraph
307 constructor. Can be any type supported by networkx.DiGraph.
308 attr : `dict`
309 Keyword arguments passed through to DiGraph constructor.
310 """
311 def __init__(self, name, incoming_graph_data=None, **attr):
312 super().__init__(incoming_graph_data, **attr)
313 self._name = name
314 self.run_attrs = {}
315 self._files = {}
316 self._executables = {}
317 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
318 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
319 self.run_id = None
320 self._final = None
322 @property
323 def name(self):
324 """Retrieve name of generic workflow.
326 Returns
327 -------
328 name : `str`
329 Name of generic workflow.
330 """
331 return self._name
333 @property
334 def quanta_counts(self):
335 """Counts of quanta per task label in workflow (`collections.Counter`).
336 """
337 qcounts = Counter()
338 for job_name in self:
339 gwjob = self.get_job(job_name)
340 if gwjob.quanta_counts is not None:
341 qcounts += gwjob.quanta_counts
342 return qcounts
344 @property
345 def job_counts(self):
346 """Counts of jobs per job label in workflow (`collections.Counter`).
347 """
348 jcounts = Counter()
349 for job_name in self:
350 gwjob = self.get_job(job_name)
351 jcounts[gwjob.label] += 1
353 # Final is separate
354 final = self.get_final()
355 if final:
356 if isinstance(final, GenericWorkflow):
357 for job_name in final:
358 gwjob = final.get_job(job_name)
359 jcounts[gwjob.label] += 1
360 else:
361 jcounts[final.label] += 1
363 return jcounts
365 def __iter__(self):
366 """Return iterator of job names in topologically sorted order.
367 """
368 return topological_sort(self)
370 def get_files(self, data=False, transfer_only=True):
371 """Retrieve files from generic workflow.
373 Need API in case change way files are stored (e.g., make
374 workflow a bipartite graph with jobs and files nodes).
376 Parameters
377 ----------
378 data : `bool`, optional
379 Whether to return the file data as well as the file object name.
380 (The defaults is False.)
381 transfer_only : `bool`, optional
382 Whether to only return files for which a workflow management system
383 would be responsible for transferring.
385 Returns
386 -------
387 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
388 File names or objects from generic workflow meeting specifications.
389 """
390 files = []
391 for filename, file in self._files.items():
392 if not transfer_only or file.wms_transfer:
393 if not data:
394 files.append(filename)
395 else:
396 files.append(file)
397 return files
399 def add_job(self, job, parent_names=None, child_names=None):
400 """Add job to generic workflow.
402 Parameters
403 ----------
404 job : `lsst.ctrl.bps.GenericWorkflowJob`
405 Job to add to the generic workflow.
406 parent_names : `list` [`str`], optional
407 Names of jobs that are parents of given job
408 child_names : `list` [`str`], optional
409 Names of jobs that are children of given job
410 """
411 if not isinstance(job, GenericWorkflowJob):
412 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
413 if self.has_node(job.name):
414 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
415 super().add_node(job.name, job=job)
416 self.add_job_relationships(parent_names, job.name)
417 self.add_job_relationships(job.name, child_names)
418 self.add_executable(job.executable)
420 def add_node(self, node_for_adding, **attr):
421 """Override networkx function to call more specific add_job function.
423 Parameters
424 ----------
425 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
426 Job to be added to generic workflow.
427 attr :
428 Needed to match original networkx function, but not used.
429 """
430 self.add_job(node_for_adding)
432 def add_job_relationships(self, parents, children):
433 """Add dependencies between parent and child jobs. All parents will
434 be connected to all children.
436 Parameters
437 ----------
438 parents : `list` [`str`]
439 Parent job names.
440 children : `list` [`str`]
441 Children job names.
442 """
443 if parents is not None and children is not None:
444 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children)))
446 def add_edges_from(self, ebunch_to_add, **attr):
447 """Add several edges between jobs in the generic workflow.
449 Parameters
450 ----------
451 ebunch_to_add : Iterable [`tuple`]
452 Iterable of job name pairs between which a dependency should be
453 saved.
454 attr : keyword arguments, optional
455 Data can be assigned using keyword arguments (not currently used).
456 """
457 for edge_to_add in ebunch_to_add:
458 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
460 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
461 """Add edge connecting jobs in workflow.
463 Parameters
464 ----------
465 u_of_edge : `str`
466 Name of parent job.
467 v_of_edge : `str`
468 Name of child job.
469 attr : keyword arguments, optional
470 Attributes to save with edge.
471 """
472 if u_of_edge not in self:
473 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
474 if v_of_edge not in self:
475 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
476 super().add_edge(u_of_edge, v_of_edge, **attr)
478 def get_job(self, job_name: str):
479 """Retrieve job by name from workflow.
481 Parameters
482 ----------
483 job_name : `str`
484 Name of job to retrieve.
486 Returns
487 -------
488 job : `lsst.ctrl.bps.GenericWorkflowJob`
489 Job matching given job_name.
490 """
491 return self.nodes[job_name]["job"]
493 def del_job(self, job_name: str):
494 """Delete job from generic workflow leaving connected graph.
496 Parameters
497 ----------
498 job_name : `str`
499 Name of job to delete from workflow.
500 """
501 # Connect all parent jobs to all children jobs.
502 parents = self.predecessors(job_name)
503 children = self.successors(job_name)
504 self.add_job_relationships(parents, children)
506 # Delete job node (which deleted edges).
507 self.remove_node(job_name)
509 def add_job_inputs(self, job_name, files):
510 """Add files as inputs to specified job.
512 Parameters
513 ----------
514 job_name : `str`
515 Name of job to which inputs should be added
516 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
517 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
518 File object(s) to be added as inputs to the specified job.
519 """
520 self._inputs.setdefault(job_name, [])
521 for file in ensure_iterable(files):
522 # Save the central copy
523 if file.name not in self._files:
524 self._files[file.name] = file
526 # Save the job reference to the file
527 self._inputs[job_name].append(file)
529 def get_file(self, name):
530 """Retrieve a file object by name.
532 Parameters
533 ----------
534 name : `str`
535 Name of file object
537 Returns
538 -------
539 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
540 File matching given name.
541 """
542 return self._files[name]
544 def add_file(self, gwfile):
545 """Add file object.
547 Parameters
548 ----------
549 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
550 File object to add to workflow
551 """
552 if gwfile.name not in self._files:
553 self._files[gwfile.name] = gwfile
554 else:
555 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
557 def get_job_inputs(self, job_name, data=True, transfer_only=False):
558 """Return the input files for the given job.
560 Parameters
561 ----------
562 job_name : `str`
563 Name of the job.
564 data : `bool`, optional
565 Whether to return the file data as well as the file object name.
566 transfer_only : `bool`, optional
567 Whether to only return files for which a workflow management system
568 would be responsible for transferring.
570 Returns
571 -------
572 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
573 Input files for the given job. If no input files for the job,
574 returns an empty list.
575 """
576 inputs = []
577 if job_name in self._inputs:
578 for gwfile in self._inputs[job_name]:
579 if not transfer_only or gwfile.wms_transfer:
580 if not data:
581 inputs.append(gwfile.name)
582 else:
583 inputs.append(gwfile)
584 return inputs
586 def add_job_outputs(self, job_name, files):
587 """Add output files to a job.
589 Parameters
590 ----------
591 job_name : `str`
592 Name of job to which the files should be added as outputs.
593 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
594 File objects to be added as outputs for specified job.
595 """
596 self._outputs.setdefault(job_name, [])
598 for file_ in ensure_iterable(files):
599 # Save the central copy
600 if file_.name not in self._files:
601 self._files[file_.name] = file_
603 # Save the job reference to the file
604 self._outputs[job_name].append(file_)
606 def get_job_outputs(self, job_name, data=True, transfer_only=False):
607 """Return the output files for the given job.
609 Parameters
610 ----------
611 job_name : `str`
612 Name of the job.
613 data : `bool`
614 Whether to return the file data as well as the file object name.
615 It defaults to `True` thus returning file data as well.
616 transfer_only : `bool`
617 Whether to only return files for which a workflow management system
618 would be responsible for transferring. It defaults to `False` thus
619 returning all output files.
621 Returns
622 -------
623 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
624 Output files for the given job. If no output files for the job,
625 returns an empty list.
626 """
627 outputs = []
629 if job_name in self._outputs:
630 for file_name in self._outputs[job_name]:
631 file = self._files[file_name]
632 if not transfer_only or file.wms_transfer:
633 if not data:
634 outputs.append(file_name)
635 else:
636 outputs.append(self._files[file_name])
637 return outputs
639 def draw(self, stream, format_="dot"):
640 """Output generic workflow in a visualization format.
642 Parameters
643 ----------
644 stream : `str` or `io.BufferedIOBase`
645 Stream to which the visualization should be written.
646 format_ : `str`, optional
647 Which visualization format to use. It defaults to the format for
648 the dot program.
649 """
650 draw_funcs = {"dot": draw_networkx_dot}
651 if format_ in draw_funcs:
652 draw_funcs[format_](self, stream)
653 else:
654 raise RuntimeError(f"Unknown draw format ({format_}")
656 def save(self, stream, format_="pickle"):
657 """Save the generic workflow in a format that is loadable.
659 Parameters
660 ----------
661 stream : `str` or `io.BufferedIOBase`
662 Stream to pass to the format-specific writer. Accepts anything
663 that the writer accepts.
665 format_ : `str`, optional
666 Format in which to write the data. It defaults to pickle format.
667 """
668 if format_ == "pickle":
669 write_gpickle(self, stream)
670 else:
671 raise RuntimeError(f"Unknown format ({format_})")
673 @classmethod
674 def load(cls, stream, format_="pickle"):
675 """Load a GenericWorkflow from the given stream
677 Parameters
678 ----------
679 stream : `str` or `io.BufferedIOBase`
680 Stream to pass to the format-specific loader. Accepts anything that
681 the loader accepts.
682 format_ : `str`, optional
683 Format of data to expect when loading from stream. It defaults
684 to pickle format.
686 Returns
687 -------
688 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
689 Generic workflow loaded from the given stream
690 """
691 if format_ == "pickle":
692 return read_gpickle(stream)
694 raise RuntimeError(f"Unknown format ({format_})")
696 def validate(self):
697 """Run checks to ensure this is still a valid generic workflow graph.
698 """
699 # Make sure a directed acyclic graph
700 assert is_directed_acyclic_graph(self)
702 def add_workflow_source(self, workflow):
703 """Add given workflow as new source to this workflow.
705 Parameters
706 ----------
707 workflow : `lsst.ctrl.bps.GenericWorkflow`
708 """
709 # Find source nodes in self.
710 self_sources = [n for n in self if self.in_degree(n) == 0]
711 _LOG.debug("self_sources = %s", self_sources)
713 # Find sink nodes of workflow.
714 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
715 _LOG.debug("new sinks = %s", new_sinks)
717 # Add new workflow nodes to self graph and make new edges.
718 self.add_nodes_from(workflow.nodes(data=True))
719 self.add_edges_from(workflow.edges())
720 for source in self_sources:
721 for sink in new_sinks:
722 self.add_edge(sink, source)
724 # Files are stored separately so copy them.
725 for job_name in workflow:
726 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
727 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
728 self.add_executable(workflow.get_job(job_name).executable)
730 def add_final(self, final):
731 """Add special final job/workflow to the generic workflow.
733 Parameters
734 ----------
735 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
736 `lsst.ctrl.bps.GenericWorkflow`
737 Information needed to execute the special final job(s), the
738 job(s) to be executed after all jobs that can be executed
739 have been executed regardless of exit status of any of the
740 jobs.
741 """
742 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
743 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
745 self._final = final
746 if isinstance(final, GenericWorkflowJob):
747 self.add_executable(final.executable)
749 def get_final(self):
750 """Return job/workflow to be executed after all jobs that can be
751 executed have been executed regardless of exit status of any of
752 the jobs.
754 Returns
755 -------
756 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
757 `lsst.ctrl.bps.GenericWorkflow`
758 Information needed to execute final job(s).
759 """
760 return self._final
762 def add_executable(self, executable):
763 """Add executable to workflow's list of executables.
765 Parameters
766 ----------
767 executable : `lsst.ctrl.bps.GenericWorkflowExec`
768 Executable object to be added to workflow.
769 """
770 if executable is not None:
771 self._executables[executable.name] = executable
772 else:
773 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
775 def get_executables(self, data=False, transfer_only=True):
776 """Retrieve executables from generic workflow.
778 Parameters
779 ----------
780 data : `bool`, optional
781 Whether to return the executable data as well as the exec object
782 name. (The defaults is False.)
783 transfer_only : `bool`, optional
784 Whether to only return executables for which transfer_executable
785 is True.
787 Returns
788 -------
789 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
790 Filtered executable names or objects from generic workflow.
791 """
792 execs = []
793 for name, executable in self._executables.items():
794 if not transfer_only or executable.transfer_executable:
795 if not data:
796 execs.append(name)
797 else:
798 execs.append(executable)
799 return execs