Coverage for python/lsst/ctrl/bps/generic_workflow.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from typing import Optional
32from collections import Counter
34from networkx import DiGraph, read_gpickle, write_gpickle, topological_sort
35from networkx.algorithms.dag import is_directed_acyclic_graph
37from lsst.daf.butler.core.utils import iterable
38from .bps_draw import draw_networkx_dot
40_LOG = logging.getLogger(__name__)
43@dataclasses.dataclass
44class GenericWorkflowFile:
45 """Information about a file that may be needed by various workflow
46 management services.
47 """
48 name: str
49 """Lookup key (logical file name) of file/directory. Must be unique
50 within run.
51 """
53 src_uri: str or None # don't know that need ButlerURI
54 """Original location of file/directory.
55 """
57 wms_transfer: bool
58 """Whether the WMS should ignore file or not. Default is False.
59 """
61 job_access_remote: bool
62 """Whether the job can remotely access file (using separately specified
63 file access protocols). Default is False.
64 """
66 job_shared: bool
67 """Whether job requires its own copy of this file. Default is False.
68 """
70 # As of python 3.7.8, can't use __slots__ + dataclass if give default
71 # values, so writing own __init__.
72 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
73 job_access_remote: bool = False, job_shared: bool = False):
74 self.name = name
75 self.src_uri = src_uri
76 self.wms_transfer = wms_transfer
77 self.job_access_remote = job_access_remote
78 self.job_shared = job_shared
80 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
82 def __hash__(self):
83 return hash(self.name)
86@dataclasses.dataclass
87class GenericWorkflowExec:
88 """Information about an executable that may be needed by various workflow
89 management services.
90 """
91 name: str
92 """Lookup key (logical file name) of executable. Must be unique
93 within run.
94 """
96 src_uri: str or None # don't know that need ButlerURI
97 """Original location of executable.
98 """
100 transfer_executable: bool
101 """Whether the WMS/plugin is responsible for staging executable to
102 location usable by job.
103 """
105 # As of python 3.7.8, can't use __slots__ + dataclass if give default
106 # values, so writing own __init__.
107 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
108 self.name = name
109 self.src_uri = src_uri
110 self.transfer_executable = transfer_executable
112 __slots__ = ("name", "src_uri", "transfer_executable")
114 def __hash__(self):
115 return hash(self.name)
118@dataclasses.dataclass
119class GenericWorkflowJob:
120 """Information about a job that may be needed by various workflow
121 management services.
122 """
123 name: str
124 """Name of job. Must be unique within workflow.
125 """
127 label: Optional[str]
128 """Primary user-facing label for job. Does not need to be unique
129 and may be used for summary reports.
130 """
132 quanta_counts: Optional[Counter]
133 """Counts of quanta per task label in job.
134 """
136 tags: Optional[dict]
137 """Other key/value pairs for job that user may want to use as a filter.
138 """
140 executable: Optional[GenericWorkflowExec]
141 """Executable for job.
142 """
144 arguments: Optional[str]
145 """Command line arguments for job.
146 """
148 cmdvals: Optional[dict]
149 """Values for variables in cmdline when using lazy command line creation.
150 """
152 memory_multiplier: Optional[float]
153 """Memory growth rate between retries.
154 """
156 request_memory: Optional[int] # MB
157 """Max memory (in MB) that the job is expected to need.
158 """
160 request_cpus: Optional[int] # cores
161 """Max number of cpus that the job is expected to need.
162 """
164 request_disk: Optional[int] # MB
165 """Max amount of job scratch disk (in MB) that the job is expected to need.
166 """
168 request_walltime: Optional[str] # minutes
169 """Max amount of time (in seconds) that the job is expected to need.
170 """
172 compute_site: Optional[str]
173 """Key to look up site-specific information for running the job.
174 """
176 mail_to: Optional[str]
177 """Comma separated list of email addresses for emailing job status.
178 """
180 when_to_mail: Optional[str]
181 """WMS-specific terminology for when to email job status.
182 """
184 number_of_retries: Optional[int]
185 """Number of times to automatically retry a failed job.
186 """
188 retry_unless_exit: Optional[int]
189 """Exit code for job that means to not automatically retry.
190 """
192 abort_on_value: Optional[int]
193 """Job exit value for signals to abort the entire workflow.
194 """
196 abort_return_value: Optional[int]
197 """Exit value to use when aborting the entire workflow.
198 """
200 priority: Optional[str]
201 """Initial priority of job in WMS-format.
202 """
204 category: Optional[str]
205 """WMS-facing label of job within single workflow (e.g., can be used for
206 throttling jobs within a single workflow).
207 """
209 concurrency_limit: Optional[list]
210 """Names of concurrency limits that the WMS plugin can appropriately
211 translate to limit the number of this job across all running workflows.
212 """
214 queue: Optional[str]
215 """Name of queue to use. Different WMS can translate this concept
216 differently.
217 """
219 pre_cmdline: Optional[str]
220 """Command line to be executed prior to executing job.
221 """
223 post_cmdline: Optional[str]
224 """Command line to be executed after job executes.
226 Should be executed regardless of exit status.
227 """
229 preemptible: Optional[bool]
230 """The flag indicating whether the job can be preempted.
231 """
233 profile: Optional[dict]
234 """Nested dictionary of WMS-specific key/value pairs with primary key being
235 WMS key (e.g., pegasus, condor, panda).
236 """
238 attrs: Optional[dict]
239 """Key/value pairs of job attributes (for WMS that have attributes in
240 addition to commands).
241 """
243 environment: Optional[dict]
244 """Environment variable names and values to be explicitly set inside job.
245 """
247 # As of python 3.7.8, can't use __slots__ if give default values, so
248 # writing own __init__.
249 def __init__(self, name: str):
250 self.name = name
251 self.label = None
252 self.quanta_counts = Counter()
253 self.tags = {}
254 self.executable = None
255 self.arguments = None
256 self.cmdvals = {}
257 self.memory_multiplier = None
258 self.request_memory = None
259 self.request_cpus = None
260 self.request_disk = None
261 self.request_walltime = None
262 self.compute_site = None
263 self.mail_to = None
264 self.when_to_mail = None
265 self.number_of_retries = None
266 self.retry_unless_exit = None
267 self.abort_on_value = None
268 self.abort_return_value = None
269 self.priority = None
270 self.category = None
271 self.concurrency_limit = []
272 self.queue = None
273 self.pre_cmdline = None
274 self.post_cmdline = None
275 self.preemptible = None
276 self.profile = {}
277 self.attrs = {}
278 self.environment = {}
280 __slots__ = ("name", "label", "quanta_counts", "tags", "mail_to", "when_to_mail",
281 "executable", "arguments", "cmdvals",
282 "memory_multiplier", "request_memory", "request_cpus", "request_disk", "request_walltime",
283 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
284 "compute_site", "environment", "priority", "category", "concurrency_limit",
285 "queue", "pre_cmdline", "post_cmdline", "preemptible", "profile", "attrs")
287 def __hash__(self):
288 return hash(self.name)
291class GenericWorkflow(DiGraph):
292 """A generic representation of a workflow used to submit to specific
293 workflow management systems.
295 Parameters
296 ----------
297 name : `str`
298 Name of generic workflow.
299 incoming_graph_data : `Any`, optional
300 Data used to initialized graph that is passed through to DiGraph
301 constructor. Can be any type supported by networkx.DiGraph.
302 attr : `dict`
303 Keyword arguments passed through to DiGraph constructor.
304 """
305 def __init__(self, name, incoming_graph_data=None, **attr):
306 super().__init__(incoming_graph_data, **attr)
307 self._name = name
308 self.run_attrs = {}
309 self._files = {}
310 self._executables = {}
311 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
312 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
313 self.run_id = None
314 self._final = None
316 @property
317 def name(self):
318 """Retrieve name of generic workflow.
320 Returns
321 -------
322 name : `str`
323 Name of generic workflow.
324 """
325 return self._name
327 @property
328 def quanta_counts(self):
329 """Counts of quanta per task label in workflow (`collections.Counter`).
330 """
331 qcounts = Counter()
332 for job_name in self:
333 gwjob = self.get_job(job_name)
334 if gwjob.quanta_counts is not None:
335 qcounts += gwjob.quanta_counts
336 return qcounts
338 @property
339 def job_counts(self):
340 """Counts of jobs per job label in workflow (`collections.Counter`).
341 """
342 jcounts = Counter()
343 for job_name in self:
344 gwjob = self.get_job(job_name)
345 jcounts[gwjob.label] += 1
346 return jcounts
348 def __iter__(self):
349 """Return iterator of job names in topologically sorted order.
350 """
351 return topological_sort(self)
353 def get_files(self, data=False, transfer_only=True):
354 """Retrieve files from generic workflow.
356 Need API in case change way files are stored (e.g., make
357 workflow a bipartite graph with jobs and files nodes).
359 Parameters
360 ----------
361 data : `bool`, optional
362 Whether to return the file data as well as the file object name.
363 (The defaults is False.)
364 transfer_only : `bool`, optional
365 Whether to only return files for which a workflow management system
366 would be responsible for transferring.
368 Returns
369 -------
370 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
371 File names or objects from generic workflow meeting specifications.
372 """
373 files = []
374 for filename, file in self._files.items():
375 if not transfer_only or file.wms_transfer:
376 if not data:
377 files.append(filename)
378 else:
379 files.append(file)
380 return files
382 def add_job(self, job, parent_names=None, child_names=None):
383 """Add job to generic workflow.
385 Parameters
386 ----------
387 job : `lsst.ctrl.bps.GenericWorkflowJob`
388 Job to add to the generic workflow.
389 parent_names : `list` [`str`], optional
390 Names of jobs that are parents of given job
391 child_names : `list` [`str`], optional
392 Names of jobs that are children of given job
393 """
394 if not isinstance(job, GenericWorkflowJob):
395 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
396 if self.has_node(job.name):
397 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
398 super().add_node(job.name, job=job)
399 self.add_job_relationships(parent_names, job.name)
400 self.add_job_relationships(job.name, child_names)
401 self.add_executable(job.executable)
403 def add_node(self, node_for_adding, **attr):
404 """Override networkx function to call more specific add_job function.
406 Parameters
407 ----------
408 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
409 Job to be added to generic workflow.
410 attr :
411 Needed to match original networkx function, but not used.
412 """
413 self.add_job(node_for_adding)
415 def add_job_relationships(self, parents, children):
416 """Add dependencies between parent and child jobs. All parents will
417 be connected to all children.
419 Parameters
420 ----------
421 parents : `list` [`str`]
422 Parent job names.
423 children : `list` [`str`]
424 Children job names.
425 """
426 if parents is not None and children is not None:
427 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
429 def add_edges_from(self, ebunch_to_add, **attr):
430 """Add several edges between jobs in the generic workflow.
432 Parameters
433 ----------
434 ebunch_to_add : Iterable [`tuple`]
435 Iterable of job name pairs between which a dependency should be
436 saved.
437 attr : keyword arguments, optional
438 Data can be assigned using keyword arguments (not currently used).
439 """
440 for edge_to_add in ebunch_to_add:
441 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
443 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
444 """Add edge connecting jobs in workflow.
446 Parameters
447 ----------
448 u_of_edge : `str`
449 Name of parent job.
450 v_of_edge : `str`
451 Name of child job.
452 attr : keyword arguments, optional
453 Attributes to save with edge.
454 """
455 if u_of_edge not in self:
456 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
457 if v_of_edge not in self:
458 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
459 super().add_edge(u_of_edge, v_of_edge, **attr)
461 def get_job(self, job_name: str):
462 """Retrieve job by name from workflow.
464 Parameters
465 ----------
466 job_name : `str`
467 Name of job to retrieve.
469 Returns
470 -------
471 job : `lsst.ctrl.bps.GenericWorkflowJob`
472 Job matching given job_name.
473 """
474 return self.nodes[job_name]["job"]
476 def del_job(self, job_name: str):
477 """Delete job from generic workflow leaving connected graph.
479 Parameters
480 ----------
481 job_name : `str`
482 Name of job to delete from workflow.
483 """
484 # Connect all parent jobs to all children jobs.
485 parents = self.predecessors(job_name)
486 children = self.successors(job_name)
487 self.add_job_relationships(parents, children)
489 # Delete job node (which deleted edges).
490 self.remove_node(job_name)
492 def add_job_inputs(self, job_name, files):
493 """Add files as inputs to specified job.
495 Parameters
496 ----------
497 job_name : `str`
498 Name of job to which inputs should be added
499 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
500 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
501 File object(s) to be added as inputs to the specified job.
502 """
503 self._inputs.setdefault(job_name, [])
504 for file in iterable(files):
505 # Save the central copy
506 if file.name not in self._files:
507 self._files[file.name] = file
509 # Save the job reference to the file
510 self._inputs[job_name].append(file)
512 def get_file(self, name):
513 """Retrieve a file object by name.
515 Parameters
516 ----------
517 name : `str`
518 Name of file object
520 Returns
521 -------
522 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
523 File matching given name.
524 """
525 return self._files[name]
527 def add_file(self, gwfile):
528 """Add file object.
530 Parameters
531 ----------
532 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
533 File object to add to workflow
534 """
535 if gwfile.name not in self._files:
536 self._files[gwfile.name] = gwfile
537 else:
538 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
540 def get_job_inputs(self, job_name, data=True, transfer_only=False):
541 """Return the input files for the given job.
543 Parameters
544 ----------
545 job_name : `str`
546 Name of the job.
547 data : `bool`, optional
548 Whether to return the file data as well as the file object name.
549 transfer_only : `bool`, optional
550 Whether to only return files for which a workflow management system
551 would be responsible for transferring.
553 Returns
554 -------
555 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
556 Input files for the given job. If no input files for the job,
557 returns an empty list.
558 """
559 inputs = []
560 if job_name in self._inputs:
561 for gwfile in self._inputs[job_name]:
562 if not transfer_only or gwfile.wms_transfer:
563 if not data:
564 inputs.append(gwfile.name)
565 else:
566 inputs.append(gwfile)
567 return inputs
569 def add_job_outputs(self, job_name, files):
570 """Add output files to a job.
572 Parameters
573 ----------
574 job_name : `str`
575 Name of job to which the files should be added as outputs.
576 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
577 File objects to be added as outputs for specified job.
578 """
579 self._outputs.setdefault(job_name, [])
581 for file_ in iterable(files):
582 # Save the central copy
583 if file_.name not in self._files:
584 self._files[file_.name] = file_
586 # Save the job reference to the file
587 self._outputs[job_name].append(file_)
589 def get_job_outputs(self, job_name, data=True, transfer_only=False):
590 """Return the output files for the given job.
592 Parameters
593 ----------
594 job_name : `str`
595 Name of the job.
596 data : `bool`
597 Whether to return the file data as well as the file object name.
598 It defaults to `True` thus returning file data as well.
599 transfer_only : `bool`
600 Whether to only return files for which a workflow management system
601 would be responsible for transferring. It defaults to `False` thus
602 returning all output files.
604 Returns
605 -------
606 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
607 Output files for the given job. If no output files for the job,
608 returns an empty list.
609 """
610 outputs = []
612 if job_name in self._outputs:
613 for file_name in self._outputs[job_name]:
614 file = self._files[file_name]
615 if not transfer_only or file.wms_transfer:
616 if not data:
617 outputs.append(file_name)
618 else:
619 outputs.append(self._files[file_name])
620 return outputs
622 def draw(self, stream, format_="dot"):
623 """Output generic workflow in a visualization format.
625 Parameters
626 ----------
627 stream : `str` or `io.BufferedIOBase`
628 Stream to which the visualization should be written.
629 format_ : `str`, optional
630 Which visualization format to use. It defaults to the format for
631 the dot program.
632 """
633 draw_funcs = {"dot": draw_networkx_dot}
634 if format_ in draw_funcs:
635 draw_funcs[format_](self, stream)
636 else:
637 raise RuntimeError(f"Unknown draw format ({format_}")
639 def save(self, stream, format_="pickle"):
640 """Save the generic workflow in a format that is loadable.
642 Parameters
643 ----------
644 stream : `str` or `io.BufferedIOBase`
645 Stream to pass to the format-specific writer. Accepts anything
646 that the writer accepts.
648 format_ : `str`, optional
649 Format in which to write the data. It defaults to pickle format.
650 """
651 if format_ == "pickle":
652 write_gpickle(self, stream)
653 else:
654 raise RuntimeError(f"Unknown format ({format_})")
656 @classmethod
657 def load(cls, stream, format_="pickle"):
658 """Load a GenericWorkflow from the given stream
660 Parameters
661 ----------
662 stream : `str` or `io.BufferedIOBase`
663 Stream to pass to the format-specific loader. Accepts anything that
664 the loader accepts.
665 format_ : `str`, optional
666 Format of data to expect when loading from stream. It defaults
667 to pickle format.
669 Returns
670 -------
671 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
672 Generic workflow loaded from the given stream
673 """
674 if format_ == "pickle":
675 return read_gpickle(stream)
677 raise RuntimeError(f"Unknown format ({format_})")
679 def validate(self):
680 """Run checks to ensure this is still a valid generic workflow graph.
681 """
682 # Make sure a directed acyclic graph
683 assert is_directed_acyclic_graph(self)
685 def add_workflow_source(self, workflow):
686 """Add given workflow as new source to this workflow.
688 Parameters
689 ----------
690 workflow : `lsst.ctrl.bps.GenericWorkflow`
691 """
692 # Find source nodes in self.
693 self_sources = [n for n in self if self.in_degree(n) == 0]
694 _LOG.debug("self_sources = %s", self_sources)
696 # Find sink nodes of workflow.
697 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
698 _LOG.debug("new sinks = %s", new_sinks)
700 # Add new workflow nodes to self graph and make new edges.
701 self.add_nodes_from(workflow.nodes(data=True))
702 self.add_edges_from(workflow.edges())
703 for source in self_sources:
704 for sink in new_sinks:
705 self.add_edge(sink, source)
707 # Files are stored separately so copy them.
708 for job_name in workflow:
709 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
710 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
711 self.add_executable(workflow.get_job(job_name).executable)
713 def add_final(self, final):
714 """Add special final job/workflow to the generic workflow.
716 Parameters
717 ----------
718 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
719 `lsst.ctrl.bps.GenericWorkflow`
720 Information needed to execute the special final job(s), the
721 job(s) to be executed after all jobs that can be executed
722 have been executed regardless of exit status of any of the
723 jobs.
724 """
725 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
726 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
728 self._final = final
729 if isinstance(final, GenericWorkflowJob):
730 self.add_executable(final.executable)
732 def get_final(self):
733 """Return job/workflow to be executed after all jobs that can be
734 executed have been executed regardless of exit status of any of
735 the jobs.
737 Returns
738 -------
739 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
740 `lsst.ctrl.bps.GenericWorkflow`
741 Information needed to execute final job(s).
742 """
743 return self._final
745 def add_executable(self, executable):
746 """Add executable to workflow's list of executables.
748 Parameters
749 ----------
750 executable : `lsst.ctrl.bps.GenericWorkflowExec`
751 Executable object to be added to workflow.
752 """
753 if executable is not None:
754 self._executables[executable.name] = executable
755 else:
756 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
758 def get_executables(self, data=False, transfer_only=True):
759 """Retrieve executables from generic workflow.
761 Parameters
762 ----------
763 data : `bool`, optional
764 Whether to return the executable data as well as the exec object
765 name. (The defaults is False.)
766 transfer_only : `bool`, optional
767 Whether to only return executables for which transfer_executable
768 is True.
770 Returns
771 -------
772 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
773 Filtered executable names or objects from generic workflow.
774 """
775 execs = []
776 for name, executable in self._executables.items():
777 if not transfer_only or executable.transfer_executable:
778 if not data:
779 execs.append(name)
780 else:
781 execs.append(executable)
782 return execs