Coverage for python/lsst/ctrl/bps/generic_workflow.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from collections import Counter
32from typing import Optional
34from lsst.utils.iteration import ensure_iterable
35from networkx import DiGraph, read_gpickle, topological_sort, write_gpickle
36from networkx.algorithms.dag import is_directed_acyclic_graph
38from .bps_draw import draw_networkx_dot
40_LOG = logging.getLogger(__name__)
43@dataclasses.dataclass
44class GenericWorkflowFile:
45 """Information about a file that may be needed by various workflow
46 management services.
47 """
49 name: str
50 """Lookup key (logical file name) of file/directory. Must be unique
51 within run.
52 """
54 src_uri: str or None # don't know that need ResourcePath
55 """Original location of file/directory.
56 """
58 wms_transfer: bool
59 """Whether the WMS should ignore file or not. Default is False.
60 """
62 job_access_remote: bool
63 """Whether the job can remotely access file (using separately specified
64 file access protocols). Default is False.
65 """
67 job_shared: bool
68 """Whether job requires its own copy of this file. Default is False.
69 """
71 # As of python 3.7.8, can't use __slots__ + dataclass if give default
72 # values, so writing own __init__.
73 def __init__(
74 self,
75 name: str,
76 src_uri: str = None,
77 wms_transfer: bool = False,
78 job_access_remote: bool = False,
79 job_shared: bool = False,
80 ):
81 self.name = name
82 self.src_uri = src_uri
83 self.wms_transfer = wms_transfer
84 self.job_access_remote = job_access_remote
85 self.job_shared = job_shared
87 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
89 def __hash__(self):
90 return hash(self.name)
93@dataclasses.dataclass
94class GenericWorkflowExec:
95 """Information about an executable that may be needed by various workflow
96 management services.
97 """
99 name: str
100 """Lookup key (logical file name) of executable. Must be unique
101 within run.
102 """
104 src_uri: str or None # don't know that need ResourcePath
105 """Original location of executable.
106 """
108 transfer_executable: bool
109 """Whether the WMS/plugin is responsible for staging executable to
110 location usable by job.
111 """
113 # As of python 3.7.8, can't use __slots__ + dataclass if give default
114 # values, so writing own __init__.
115 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
116 self.name = name
117 self.src_uri = src_uri
118 self.transfer_executable = transfer_executable
120 __slots__ = ("name", "src_uri", "transfer_executable")
122 def __hash__(self):
123 return hash(self.name)
126@dataclasses.dataclass
127class GenericWorkflowJob:
128 """Information about a job that may be needed by various workflow
129 management services.
130 """
132 name: str
133 """Name of job. Must be unique within workflow.
134 """
136 label: Optional[str]
137 """Primary user-facing label for job. Does not need to be unique
138 and may be used for summary reports.
139 """
141 quanta_counts: Optional[Counter]
142 """Counts of quanta per task label in job.
143 """
145 tags: Optional[dict]
146 """Other key/value pairs for job that user may want to use as a filter.
147 """
149 executable: Optional[GenericWorkflowExec]
150 """Executable for job.
151 """
153 arguments: Optional[str]
154 """Command line arguments for job.
155 """
157 cmdvals: Optional[dict]
158 """Values for variables in cmdline when using lazy command line creation.
159 """
161 memory_multiplier: Optional[float]
162 """Memory growth rate between retries.
163 """
165 request_memory: Optional[int] # MB
166 """Max memory (in MB) that the job is expected to need.
167 """
169 request_memory_max: Optional[int] # MB
170 """Max memory (in MB) that the job should ever use.
171 """
173 request_cpus: Optional[int] # cores
174 """Max number of cpus that the job is expected to need.
175 """
177 request_disk: Optional[int] # MB
178 """Max amount of job scratch disk (in MB) that the job is expected to need.
179 """
181 request_walltime: Optional[str] # minutes
182 """Max amount of time (in seconds) that the job is expected to need.
183 """
185 compute_site: Optional[str]
186 """Key to look up site-specific information for running the job.
187 """
189 mail_to: Optional[str]
190 """Comma separated list of email addresses for emailing job status.
191 """
193 when_to_mail: Optional[str]
194 """WMS-specific terminology for when to email job status.
195 """
197 number_of_retries: Optional[int]
198 """Number of times to automatically retry a failed job.
199 """
201 retry_unless_exit: Optional[int]
202 """Exit code for job that means to not automatically retry.
203 """
205 abort_on_value: Optional[int]
206 """Job exit value for signals to abort the entire workflow.
207 """
209 abort_return_value: Optional[int]
210 """Exit value to use when aborting the entire workflow.
211 """
213 priority: Optional[str]
214 """Initial priority of job in WMS-format.
215 """
217 category: Optional[str]
218 """WMS-facing label of job within single workflow (e.g., can be used for
219 throttling jobs within a single workflow).
220 """
222 concurrency_limit: Optional[str]
223 """Names of concurrency limits that the WMS plugin can appropriately
224 translate to limit the number of this job across all running workflows.
225 """
227 queue: Optional[str]
228 """Name of queue to use. Different WMS can translate this concept
229 differently.
230 """
232 pre_cmdline: Optional[str]
233 """Command line to be executed prior to executing job.
234 """
236 post_cmdline: Optional[str]
237 """Command line to be executed after job executes.
239 Should be executed regardless of exit status.
240 """
242 preemptible: Optional[bool]
243 """The flag indicating whether the job can be preempted.
244 """
246 profile: Optional[dict]
247 """Nested dictionary of WMS-specific key/value pairs with primary key being
248 WMS key (e.g., pegasus, condor, panda).
249 """
251 attrs: Optional[dict]
252 """Key/value pairs of job attributes (for WMS that have attributes in
253 addition to commands).
254 """
256 environment: Optional[dict]
257 """Environment variable names and values to be explicitly set inside job.
258 """
260 # As of python 3.7.8, can't use __slots__ if give default values, so
261 # writing own __init__.
262 def __init__(self, name: str):
263 self.name = name
264 self.label = None
265 self.quanta_counts = Counter()
266 self.tags = {}
267 self.executable = None
268 self.arguments = None
269 self.cmdvals = {}
270 self.memory_multiplier = None
271 self.request_memory = None
272 self.request_memory_max = None
273 self.request_cpus = None
274 self.request_disk = None
275 self.request_walltime = None
276 self.compute_site = None
277 self.mail_to = None
278 self.when_to_mail = None
279 self.number_of_retries = None
280 self.retry_unless_exit = None
281 self.abort_on_value = None
282 self.abort_return_value = None
283 self.priority = None
284 self.category = None
285 self.concurrency_limit = None
286 self.queue = None
287 self.pre_cmdline = None
288 self.post_cmdline = None
289 self.preemptible = None
290 self.profile = {}
291 self.attrs = {}
292 self.environment = {}
294 __slots__ = (
295 "name",
296 "label",
297 "quanta_counts",
298 "tags",
299 "mail_to",
300 "when_to_mail",
301 "executable",
302 "arguments",
303 "cmdvals",
304 "memory_multiplier",
305 "request_memory",
306 "request_memory_max",
307 "request_cpus",
308 "request_disk",
309 "request_walltime",
310 "number_of_retries",
311 "retry_unless_exit",
312 "abort_on_value",
313 "abort_return_value",
314 "compute_site",
315 "environment",
316 "priority",
317 "category",
318 "concurrency_limit",
319 "queue",
320 "pre_cmdline",
321 "post_cmdline",
322 "preemptible",
323 "profile",
324 "attrs",
325 )
327 def __hash__(self):
328 return hash(self.name)
331class GenericWorkflow(DiGraph):
332 """A generic representation of a workflow used to submit to specific
333 workflow management systems.
335 Parameters
336 ----------
337 name : `str`
338 Name of generic workflow.
339 incoming_graph_data : `Any`, optional
340 Data used to initialized graph that is passed through to DiGraph
341 constructor. Can be any type supported by networkx.DiGraph.
342 attr : `dict`
343 Keyword arguments passed through to DiGraph constructor.
344 """
346 def __init__(self, name, incoming_graph_data=None, **attr):
347 super().__init__(incoming_graph_data, **attr)
348 self._name = name
349 self.run_attrs = {}
350 self._files = {}
351 self._executables = {}
352 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
353 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
354 self.run_id = None
355 self._final = None
357 @property
358 def name(self):
359 """Retrieve name of generic workflow.
361 Returns
362 -------
363 name : `str`
364 Name of generic workflow.
365 """
366 return self._name
368 @property
369 def quanta_counts(self):
370 """Count of quanta per task label (`collections.Counter`)."""
371 qcounts = Counter()
372 for job_name in self:
373 gwjob = self.get_job(job_name)
374 if gwjob.quanta_counts is not None:
375 qcounts += gwjob.quanta_counts
376 return qcounts
378 @property
379 def job_counts(self):
380 """Count of jobs per job label (`collections.Counter`)."""
381 jcounts = Counter()
382 for job_name in self:
383 gwjob = self.get_job(job_name)
384 jcounts[gwjob.label] += 1
386 # Final is separate
387 final = self.get_final()
388 if final:
389 if isinstance(final, GenericWorkflow):
390 for job_name in final:
391 gwjob = final.get_job(job_name)
392 jcounts[gwjob.label] += 1
393 else:
394 jcounts[final.label] += 1
396 return jcounts
398 def __iter__(self):
399 """Return iterator of job names in topologically sorted order."""
400 return topological_sort(self)
402 def get_files(self, data=False, transfer_only=True):
403 """Retrieve files from generic workflow.
405 Need API in case change way files are stored (e.g., make
406 workflow a bipartite graph with jobs and files nodes).
408 Parameters
409 ----------
410 data : `bool`, optional
411 Whether to return the file data as well as the file object name.
412 (The defaults is False.)
413 transfer_only : `bool`, optional
414 Whether to only return files for which a workflow management system
415 would be responsible for transferring.
417 Returns
418 -------
419 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
420 File names or objects from generic workflow meeting specifications.
421 """
422 files = []
423 for filename, file in self._files.items():
424 if not transfer_only or file.wms_transfer:
425 if not data:
426 files.append(filename)
427 else:
428 files.append(file)
429 return files
431 def add_job(self, job, parent_names=None, child_names=None):
432 """Add job to generic workflow.
434 Parameters
435 ----------
436 job : `lsst.ctrl.bps.GenericWorkflowJob`
437 Job to add to the generic workflow.
438 parent_names : `list` [`str`], optional
439 Names of jobs that are parents of given job
440 child_names : `list` [`str`], optional
441 Names of jobs that are children of given job
442 """
443 if not isinstance(job, GenericWorkflowJob):
444 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
445 if self.has_node(job.name):
446 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
447 super().add_node(job.name, job=job)
448 self.add_job_relationships(parent_names, job.name)
449 self.add_job_relationships(job.name, child_names)
450 self.add_executable(job.executable)
452 def add_node(self, node_for_adding, **attr):
453 """Override networkx function to call more specific add_job function.
455 Parameters
456 ----------
457 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
458 Job to be added to generic workflow.
459 attr :
460 Needed to match original networkx function, but not used.
461 """
462 self.add_job(node_for_adding)
464 def add_job_relationships(self, parents, children):
465 """Add dependencies between parent and child jobs. All parents will
466 be connected to all children.
468 Parameters
469 ----------
470 parents : `list` [`str`]
471 Parent job names.
472 children : `list` [`str`]
473 Children job names.
474 """
475 if parents is not None and children is not None:
476 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children)))
478 def add_edges_from(self, ebunch_to_add, **attr):
479 """Add several edges between jobs in the generic workflow.
481 Parameters
482 ----------
483 ebunch_to_add : Iterable [`tuple`]
484 Iterable of job name pairs between which a dependency should be
485 saved.
486 attr : keyword arguments, optional
487 Data can be assigned using keyword arguments (not currently used).
488 """
489 for edge_to_add in ebunch_to_add:
490 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
492 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
493 """Add edge connecting jobs in workflow.
495 Parameters
496 ----------
497 u_of_edge : `str`
498 Name of parent job.
499 v_of_edge : `str`
500 Name of child job.
501 attr : keyword arguments, optional
502 Attributes to save with edge.
503 """
504 if u_of_edge not in self:
505 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
506 if v_of_edge not in self:
507 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
508 super().add_edge(u_of_edge, v_of_edge, **attr)
510 def get_job(self, job_name: str):
511 """Retrieve job by name from workflow.
513 Parameters
514 ----------
515 job_name : `str`
516 Name of job to retrieve.
518 Returns
519 -------
520 job : `lsst.ctrl.bps.GenericWorkflowJob`
521 Job matching given job_name.
522 """
523 return self.nodes[job_name]["job"]
525 def del_job(self, job_name: str):
526 """Delete job from generic workflow leaving connected graph.
528 Parameters
529 ----------
530 job_name : `str`
531 Name of job to delete from workflow.
532 """
533 # Connect all parent jobs to all children jobs.
534 parents = self.predecessors(job_name)
535 children = self.successors(job_name)
536 self.add_job_relationships(parents, children)
538 # Delete job node (which deleted edges).
539 self.remove_node(job_name)
541 def add_job_inputs(self, job_name, files):
542 """Add files as inputs to specified job.
544 Parameters
545 ----------
546 job_name : `str`
547 Name of job to which inputs should be added
548 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
549 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
550 File object(s) to be added as inputs to the specified job.
551 """
552 self._inputs.setdefault(job_name, [])
553 for file in ensure_iterable(files):
554 # Save the central copy
555 if file.name not in self._files:
556 self._files[file.name] = file
558 # Save the job reference to the file
559 self._inputs[job_name].append(file)
561 def get_file(self, name):
562 """Retrieve a file object by name.
564 Parameters
565 ----------
566 name : `str`
567 Name of file object
569 Returns
570 -------
571 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
572 File matching given name.
573 """
574 return self._files[name]
576 def add_file(self, gwfile):
577 """Add file object.
579 Parameters
580 ----------
581 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
582 File object to add to workflow
583 """
584 if gwfile.name not in self._files:
585 self._files[gwfile.name] = gwfile
586 else:
587 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
589 def get_job_inputs(self, job_name, data=True, transfer_only=False):
590 """Return the input files for the given job.
592 Parameters
593 ----------
594 job_name : `str`
595 Name of the job.
596 data : `bool`, optional
597 Whether to return the file data as well as the file object name.
598 transfer_only : `bool`, optional
599 Whether to only return files for which a workflow management system
600 would be responsible for transferring.
602 Returns
603 -------
604 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
605 Input files for the given job. If no input files for the job,
606 returns an empty list.
607 """
608 inputs = []
609 if job_name in self._inputs:
610 for gwfile in self._inputs[job_name]:
611 if not transfer_only or gwfile.wms_transfer:
612 if not data:
613 inputs.append(gwfile.name)
614 else:
615 inputs.append(gwfile)
616 return inputs
618 def add_job_outputs(self, job_name, files):
619 """Add output files to a job.
621 Parameters
622 ----------
623 job_name : `str`
624 Name of job to which the files should be added as outputs.
625 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
626 File objects to be added as outputs for specified job.
627 """
628 self._outputs.setdefault(job_name, [])
630 for file_ in ensure_iterable(files):
631 # Save the central copy
632 if file_.name not in self._files:
633 self._files[file_.name] = file_
635 # Save the job reference to the file
636 self._outputs[job_name].append(file_)
638 def get_job_outputs(self, job_name, data=True, transfer_only=False):
639 """Return the output files for the given job.
641 Parameters
642 ----------
643 job_name : `str`
644 Name of the job.
645 data : `bool`
646 Whether to return the file data as well as the file object name.
647 It defaults to `True` thus returning file data as well.
648 transfer_only : `bool`
649 Whether to only return files for which a workflow management system
650 would be responsible for transferring. It defaults to `False` thus
651 returning all output files.
653 Returns
654 -------
655 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
656 Output files for the given job. If no output files for the job,
657 returns an empty list.
658 """
659 outputs = []
661 if job_name in self._outputs:
662 for file_name in self._outputs[job_name]:
663 file = self._files[file_name]
664 if not transfer_only or file.wms_transfer:
665 if not data:
666 outputs.append(file_name)
667 else:
668 outputs.append(self._files[file_name])
669 return outputs
671 def draw(self, stream, format_="dot"):
672 """Output generic workflow in a visualization format.
674 Parameters
675 ----------
676 stream : `str` or `io.BufferedIOBase`
677 Stream to which the visualization should be written.
678 format_ : `str`, optional
679 Which visualization format to use. It defaults to the format for
680 the dot program.
681 """
682 draw_funcs = {"dot": draw_networkx_dot}
683 if format_ in draw_funcs:
684 draw_funcs[format_](self, stream)
685 else:
686 raise RuntimeError(f"Unknown draw format ({format_}")
688 def save(self, stream, format_="pickle"):
689 """Save the generic workflow in a format that is loadable.
691 Parameters
692 ----------
693 stream : `str` or `io.BufferedIOBase`
694 Stream to pass to the format-specific writer. Accepts anything
695 that the writer accepts.
697 format_ : `str`, optional
698 Format in which to write the data. It defaults to pickle format.
699 """
700 if format_ == "pickle":
701 write_gpickle(self, stream)
702 else:
703 raise RuntimeError(f"Unknown format ({format_})")
705 @classmethod
706 def load(cls, stream, format_="pickle"):
707 """Load a GenericWorkflow from the given stream
709 Parameters
710 ----------
711 stream : `str` or `io.BufferedIOBase`
712 Stream to pass to the format-specific loader. Accepts anything that
713 the loader accepts.
714 format_ : `str`, optional
715 Format of data to expect when loading from stream. It defaults
716 to pickle format.
718 Returns
719 -------
720 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
721 Generic workflow loaded from the given stream
722 """
723 if format_ == "pickle":
724 return read_gpickle(stream)
726 raise RuntimeError(f"Unknown format ({format_})")
728 def validate(self):
729 """Run checks to ensure that the generic workflow graph is valid."""
730 # Make sure a directed acyclic graph
731 assert is_directed_acyclic_graph(self)
733 def add_workflow_source(self, workflow):
734 """Add given workflow as new source to this workflow.
736 Parameters
737 ----------
738 workflow : `lsst.ctrl.bps.GenericWorkflow`
739 """
740 # Find source nodes in self.
741 self_sources = [n for n in self if self.in_degree(n) == 0]
742 _LOG.debug("self_sources = %s", self_sources)
744 # Find sink nodes of workflow.
745 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
746 _LOG.debug("new sinks = %s", new_sinks)
748 # Add new workflow nodes to self graph and make new edges.
749 self.add_nodes_from(workflow.nodes(data=True))
750 self.add_edges_from(workflow.edges())
751 for source in self_sources:
752 for sink in new_sinks:
753 self.add_edge(sink, source)
755 # Files are stored separately so copy them.
756 for job_name in workflow:
757 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
758 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
759 self.add_executable(workflow.get_job(job_name).executable)
761 def add_final(self, final):
762 """Add special final job/workflow to the generic workflow.
764 Parameters
765 ----------
766 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
767 `lsst.ctrl.bps.GenericWorkflow`
768 Information needed to execute the special final job(s), the
769 job(s) to be executed after all jobs that can be executed
770 have been executed regardless of exit status of any of the
771 jobs.
772 """
773 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
774 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
776 self._final = final
777 if isinstance(final, GenericWorkflowJob):
778 self.add_executable(final.executable)
780 def get_final(self):
781 """Return job/workflow to be executed after all jobs that can be
782 executed have been executed regardless of exit status of any of
783 the jobs.
785 Returns
786 -------
787 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
788 `lsst.ctrl.bps.GenericWorkflow`
789 Information needed to execute final job(s).
790 """
791 return self._final
793 def add_executable(self, executable):
794 """Add executable to workflow's list of executables.
796 Parameters
797 ----------
798 executable : `lsst.ctrl.bps.GenericWorkflowExec`
799 Executable object to be added to workflow.
800 """
801 if executable is not None:
802 self._executables[executable.name] = executable
803 else:
804 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
806 def get_executables(self, data=False, transfer_only=True):
807 """Retrieve executables from generic workflow.
809 Parameters
810 ----------
811 data : `bool`, optional
812 Whether to return the executable data as well as the exec object
813 name. (The defaults is False.)
814 transfer_only : `bool`, optional
815 Whether to only return executables for which transfer_executable
816 is True.
818 Returns
819 -------
820 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
821 Filtered executable names or objects from generic workflow.
822 """
823 execs = []
824 for name, executable in self._executables.items():
825 if not transfer_only or executable.transfer_executable:
826 if not data:
827 execs.append(name)
828 else:
829 execs.append(executable)
830 return execs