Coverage for python/lsst/ctrl/bps/generic_workflow.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from typing import Optional
33import networkx as nx
35from lsst.daf.butler.core.utils import iterable
36from .bps_draw import draw_networkx_dot
38_LOG = logging.getLogger(__name__)
41@dataclasses.dataclass
42class GenericWorkflowFile:
43 """Information about a file that may be needed by various workflow
44 management services.
45 """
46 name: str
47 """Lookup key (logical file name) of file/directory. Must be unique
48 within run.
49 """
51 src_uri: str or None # don't know that need ButlerURI
52 """Original location of file/directory.
53 """
55 wms_transfer: bool
56 """Whether the WMS should ignore file or not. Default is False.
57 """
59 job_access_remote: bool
60 """Whether the job can remotely access file (using separately specified
61 file access protocols). Default is False.
62 """
64 job_shared: bool
65 """Whether job requires its own copy of this file. Default is False.
66 """
68 # As of python 3.7.8, can't use __slots__ + dataclass if give default
69 # values, so writing own __init__.
70 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
71 job_access_remote: bool = False, job_shared: bool = False):
72 self.name = name
73 self.src_uri = src_uri
74 self.wms_transfer = wms_transfer
75 self.job_access_remote = job_access_remote
76 self.job_shared = job_shared
78 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
80 def __hash__(self):
81 return hash(self.name)
84@dataclasses.dataclass
85class GenericWorkflowExec:
86 """Information about an executable that may be needed by various workflow
87 management services.
88 """
89 name: str
90 """Lookup key (logical file name) of executable. Must be unique
91 within run.
92 """
94 src_uri: str or None # don't know that need ButlerURI
95 """Original location of executable.
96 """
98 transfer_executable: bool
99 """Whether the WMS/plugin is responsible for staging executable to
100 location usable by job.
101 """
103 # As of python 3.7.8, can't use __slots__ + dataclass if give default
104 # values, so writing own __init__.
105 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
106 self.name = name
107 self.src_uri = src_uri
108 self.transfer_executable = transfer_executable
110 __slots__ = ("name", "src_uri", "transfer_executable")
112 def __hash__(self):
113 return hash(self.name)
116@dataclasses.dataclass
117class GenericWorkflowJob:
118 """Information about a job that may be needed by various workflow
119 management services.
120 """
121 name: str
122 """Name of job. Must be unique within workflow.
123 """
125 label: Optional[str]
126 """Primary user-facing label for job. Does not need to be unique
127 and may be used for summary reports.
128 """
130 tags: Optional[dict]
131 """Other key/value pairs for job that user may want to use as a filter.
132 """
134 executable: Optional[GenericWorkflowExec]
135 """Executable for job.
136 """
138 arguments: Optional[str]
139 """Command line arguments for job.
140 """
142 cmdvals: Optional[dict]
143 """Values for variables in cmdline when using lazy command line creation.
144 """
146 memory_multiplier: Optional[float]
147 """Memory growth rate between retries.
148 """
150 request_memory: Optional[int] # MB
151 """Max memory (in MB) that the job is expected to need.
152 """
154 request_cpus: Optional[int] # cores
155 """Max number of cpus that the job is expected to need.
156 """
158 request_disk: Optional[int] # MB
159 """Max amount of job scratch disk (in MB) that the job is expected to need.
160 """
162 request_walltime: Optional[str] # minutes
163 """Max amount of time (in seconds) that the job is expected to need.
164 """
166 compute_site: Optional[str]
167 """Key to look up site-specific information for running the job.
168 """
170 mail_to: Optional[str]
171 """Comma separated list of email addresses for emailing job status.
172 """
174 when_to_mail: Optional[str]
175 """WMS-specific terminology for when to email job status.
176 """
178 number_of_retries: Optional[int]
179 """Number of times to automatically retry a failed job.
180 """
182 retry_unless_exit: Optional[int]
183 """Exit code for job that means to not automatically retry.
184 """
186 abort_on_value: Optional[int]
187 """Job exit value for signals to abort the entire workflow.
188 """
190 abort_return_value: Optional[int]
191 """Exit value to use when aborting the entire workflow.
192 """
194 priority: Optional[str]
195 """Initial priority of job in WMS-format.
196 """
198 category: Optional[str]
199 """WMS-facing label of job within single workflow (e.g., can be used for
200 throttling jobs within a single workflow).
201 """
203 concurrency_limit: Optional[list]
204 """Names of concurrency limits that the WMS plugin can appropriately
205 translate to limit the number of this job across all running workflows.
206 """
208 queue: Optional[str]
209 """Name of queue to use. Different WMS can translate this concept
210 differently.
211 """
213 pre_cmdline: Optional[str]
214 """Command line to be executed prior to executing job.
215 """
217 post_cmdline: Optional[str]
218 """Command line to be executed after job executes.
220 Should be executed regardless of exit status.
221 """
223 preemptible: Optional[bool]
224 """The flag indicating whether the job can be preempted.
225 """
227 profile: Optional[dict]
228 """Nested dictionary of WMS-specific key/value pairs with primary key being
229 WMS key (e.g., pegasus, condor, panda).
230 """
232 attrs: Optional[dict]
233 """Key/value pairs of job attributes (for WMS that have attributes in
234 addition to commands).
235 """
237 environment: Optional[dict]
238 """Environment variable names and values to be explicitly set inside job.
239 """
241 # As of python 3.7.8, can't use __slots__ if give default values, so
242 # writing own __init__.
243 def __init__(self, name: str):
244 self.name = name
245 self.label = None
246 self.tags = {}
247 self.executable = None
248 self.arguments = None
249 self.cmdvals = {}
250 self.memory_multiplier = None
251 self.request_memory = None
252 self.request_cpus = None
253 self.request_disk = None
254 self.request_walltime = None
255 self.compute_site = None
256 self.mail_to = None
257 self.when_to_mail = None
258 self.number_of_retries = None
259 self.retry_unless_exit = None
260 self.abort_on_value = None
261 self.abort_return_value = None
262 self.priority = None
263 self.category = None
264 self.concurrency_limit = []
265 self.queue = None
266 self.pre_cmdline = None
267 self.post_cmdline = None
268 self.preemptible = None
269 self.profile = {}
270 self.attrs = {}
271 self.environment = {}
273 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail",
274 "executable", "arguments", "cmdvals",
275 "memory_multiplier", "request_memory", "request_cpus", "request_disk", "request_walltime",
276 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
277 "compute_site", "environment", "priority", "category", "concurrency_limit",
278 "queue", "pre_cmdline", "post_cmdline", "preemptible", "profile", "attrs")
280 def __hash__(self):
281 return hash(self.name)
284class GenericWorkflow(nx.DiGraph):
285 """A generic representation of a workflow used to submit to specific
286 workflow management systems.
288 Parameters
289 ----------
290 name : `str`
291 Name of generic workflow.
292 incoming_graph_data : `Any`, optional
293 Data used to initialized graph that is passed through to nx.DiGraph
294 constructor. Can be any type supported by networkx.DiGraph.
295 attr : `dict`
296 Keyword arguments passed through to nx.DiGraph constructor.
297 """
298 def __init__(self, name, incoming_graph_data=None, **attr):
299 super().__init__(incoming_graph_data, **attr)
300 self._name = name
301 self.run_attrs = {}
302 self._files = {}
303 self._executables = {}
304 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
305 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
306 self.run_id = None
307 self._final = None
309 @property
310 def name(self):
311 """Retrieve name of generic workflow.
313 Returns
314 -------
315 name : `str`
316 Name of generic workflow.
317 """
318 return self._name
320 def get_files(self, data=False, transfer_only=True):
321 """Retrieve files from generic workflow.
323 Need API in case change way files are stored (e.g., make
324 workflow a bipartite graph with jobs and files nodes).
326 Parameters
327 ----------
328 data : `bool`, optional
329 Whether to return the file data as well as the file object name.
330 (The defaults is False.)
331 transfer_only : `bool`, optional
332 Whether to only return files for which a workflow management system
333 would be responsible for transferring.
335 Returns
336 -------
337 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
338 File names or objects from generic workflow meeting specifications.
339 """
340 files = []
341 for filename, file in self._files.items():
342 if not transfer_only or file.wms_transfer:
343 if not data:
344 files.append(filename)
345 else:
346 files.append(file)
347 return files
349 def add_job(self, job, parent_names=None, child_names=None):
350 """Add job to generic workflow.
352 Parameters
353 ----------
354 job : `lsst.ctrl.bps.GenericWorkflowJob`
355 Job to add to the generic workflow.
356 parent_names : `list` [`str`], optional
357 Names of jobs that are parents of given job
358 child_names : `list` [`str`], optional
359 Names of jobs that are children of given job
360 """
361 if not isinstance(job, GenericWorkflowJob):
362 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
363 if self.has_node(job.name):
364 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
365 super().add_node(job.name, job=job)
366 self.add_job_relationships(parent_names, job.name)
367 self.add_job_relationships(job.name, child_names)
368 self.add_executable(job.executable)
370 def add_node(self, node_for_adding, **attr):
371 """Override networkx function to call more specific add_job function.
373 Parameters
374 ----------
375 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
376 Job to be added to generic workflow.
377 attr :
378 Needed to match original networkx function, but not used.
379 """
380 self.add_job(node_for_adding)
382 def add_job_relationships(self, parents, children):
383 """Add dependencies between parent and child jobs. All parents will
384 be connected to all children.
386 Parameters
387 ----------
388 parents : `list` [`str`]
389 Parent job names.
390 children : `list` [`str`]
391 Children job names.
392 """
393 if parents is not None and children is not None:
394 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
396 def add_edges_from(self, ebunch_to_add, **attr):
397 """Add several edges between jobs in the generic workflow.
399 Parameters
400 ----------
401 ebunch_to_add : Iterable [`tuple`]
402 Iterable of job name pairs between which a dependency should be
403 saved.
404 attr : keyword arguments, optional
405 Data can be assigned using keyword arguments (not currently used).
406 """
407 for edge_to_add in ebunch_to_add:
408 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
410 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
411 """Add edge connecting jobs in workflow.
413 Parameters
414 ----------
415 u_of_edge : `str`
416 Name of parent job.
417 v_of_edge : `str`
418 Name of child job.
419 attr : keyword arguments, optional
420 Attributes to save with edge.
421 """
422 if u_of_edge not in self:
423 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
424 if v_of_edge not in self:
425 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
426 super().add_edge(u_of_edge, v_of_edge, **attr)
428 def get_job(self, job_name: str):
429 """Retrieve job by name from workflow.
431 Parameters
432 ----------
433 job_name : `str`
434 Name of job to retrieve.
436 Returns
437 -------
438 job : `lsst.ctrl.bps.GenericWorkflowJob`
439 Job matching given job_name.
440 """
441 return self.nodes[job_name]["job"]
443 def del_job(self, job_name: str):
444 """Delete job from generic workflow leaving connected graph.
446 Parameters
447 ----------
448 job_name : `str`
449 Name of job to delete from workflow.
450 """
451 # Connect all parent jobs to all children jobs.
452 parents = self.predecessors(job_name)
453 children = self.successors(job_name)
454 self.add_job_relationships(parents, children)
456 # Delete job node (which deleted edges).
457 self.remove_node(job_name)
459 def add_job_inputs(self, job_name, files):
460 """Add files as inputs to specified job.
462 Parameters
463 ----------
464 job_name : `str`
465 Name of job to which inputs should be added
466 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
467 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
468 File object(s) to be added as inputs to the specified job.
469 """
470 self._inputs.setdefault(job_name, [])
471 for file in iterable(files):
472 # Save the central copy
473 if file.name not in self._files:
474 self._files[file.name] = file
476 # Save the job reference to the file
477 self._inputs[job_name].append(file)
479 def get_file(self, name):
480 """Retrieve a file object by name.
482 Parameters
483 ----------
484 name : `str`
485 Name of file object
487 Returns
488 -------
489 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
490 File matching given name.
491 """
492 return self._files[name]
494 def add_file(self, gwfile):
495 """Add file object.
497 Parameters
498 ----------
499 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
500 File object to add to workflow
501 """
502 if gwfile.name not in self._files:
503 self._files[gwfile.name] = gwfile
504 else:
505 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
507 def get_job_inputs(self, job_name, data=True, transfer_only=False):
508 """Return the input files for the given job.
510 Parameters
511 ----------
512 job_name : `str`
513 Name of the job.
514 data : `bool`, optional
515 Whether to return the file data as well as the file object name.
516 transfer_only : `bool`, optional
517 Whether to only return files for which a workflow management system
518 would be responsible for transferring.
520 Returns
521 -------
522 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
523 Input files for the given job. If no input files for the job,
524 returns an empty list.
525 """
526 inputs = []
527 if job_name in self._inputs:
528 for gwfile in self._inputs[job_name]:
529 if not transfer_only or gwfile.wms_transfer:
530 if not data:
531 inputs.append(gwfile.name)
532 else:
533 inputs.append(gwfile)
534 return inputs
536 def add_job_outputs(self, job_name, files):
537 """Add output files to a job.
539 Parameters
540 ----------
541 job_name : `str`
542 Name of job to which the files should be added as outputs.
543 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
544 File objects to be added as outputs for specified job.
545 """
546 self._outputs.setdefault(job_name, [])
548 for file_ in iterable(files):
549 # Save the central copy
550 if file_.name not in self._files:
551 self._files[file_.name] = file_
553 # Save the job reference to the file
554 self._outputs[job_name].append(file_)
556 def get_job_outputs(self, job_name, data=True, transfer_only=False):
557 """Return the output files for the given job.
559 Parameters
560 ----------
561 job_name : `str`
562 Name of the job.
563 data : `bool`
564 Whether to return the file data as well as the file object name.
565 It defaults to `True` thus returning file data as well.
566 transfer_only : `bool`
567 Whether to only return files for which a workflow management system
568 would be responsible for transferring. It defaults to `False` thus
569 returning all output files.
571 Returns
572 -------
573 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
574 Output files for the given job. If no output files for the job,
575 returns an empty list.
576 """
577 outputs = []
579 if job_name in self._outputs:
580 for file_name in self._outputs[job_name]:
581 file = self._files[file_name]
582 if not transfer_only or file.wms_transfer:
583 if not data:
584 outputs.append(file_name)
585 else:
586 outputs.append(self._files[file_name])
587 return outputs
589 def draw(self, stream, format_="dot"):
590 """Output generic workflow in a visualization format.
592 Parameters
593 ----------
594 stream : `str` or `io.BufferedIOBase`
595 Stream to which the visualization should be written.
596 format_ : `str`, optional
597 Which visualization format to use. It defaults to the format for
598 the dot program.
599 """
600 draw_funcs = {"dot": draw_networkx_dot}
601 if format_ in draw_funcs:
602 draw_funcs[format_](self, stream)
603 else:
604 raise RuntimeError(f"Unknown draw format ({format_}")
606 def save(self, stream, format_="pickle"):
607 """Save the generic workflow in a format that is loadable.
609 Parameters
610 ----------
611 stream : `str` or `io.BufferedIOBase`
612 Stream to pass to the format-specific writer. Accepts anything
613 that the writer accepts.
615 format_ : `str`, optional
616 Format in which to write the data. It defaults to pickle format.
617 """
618 if format_ == "pickle":
619 nx.write_gpickle(self, stream)
620 else:
621 raise RuntimeError(f"Unknown format ({format_})")
623 @classmethod
624 def load(cls, stream, format_="pickle"):
625 """Load a GenericWorkflow from the given stream
627 Parameters
628 ----------
629 stream : `str` or `io.BufferedIOBase`
630 Stream to pass to the format-specific loader. Accepts anything that
631 the loader accepts.
632 format_ : `str`, optional
633 Format of data to expect when loading from stream. It defaults
634 to pickle format.
636 Returns
637 -------
638 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
639 Generic workflow loaded from the given stream
640 """
641 if format_ == "pickle":
642 return nx.read_gpickle(stream)
644 raise RuntimeError(f"Unknown format ({format_})")
646 def validate(self):
647 """Run checks to ensure this is still a valid generic workflow graph.
648 """
649 # Make sure a directed acyclic graph
650 assert nx.algorithms.dag.is_directed_acyclic_graph(self)
652 def add_workflow_source(self, workflow):
653 """Add given workflow as new source to this workflow.
655 Parameters
656 ----------
657 workflow : `lsst.ctrl.bps.GenericWorkflow`
658 """
659 # Find source nodes in self.
660 self_sources = [n for n in self if self.in_degree(n) == 0]
661 _LOG.debug("self_sources = %s", self_sources)
663 # Find sink nodes of workflow.
664 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
665 _LOG.debug("new sinks = %s", new_sinks)
667 # Add new workflow nodes to self graph and make new edges.
668 self.add_nodes_from(workflow.nodes(data=True))
669 self.add_edges_from(workflow.edges())
670 for source in self_sources:
671 for sink in new_sinks:
672 self.add_edge(sink, source)
674 # Files are stored separately so copy them.
675 for job_name in workflow:
676 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
677 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
678 self.add_executable(workflow.get_job(job_name).executable)
680 def add_final(self, final):
681 """Add special final job/workflow to the generic workflow.
683 Parameters
684 ----------
685 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
686 `lsst.ctrl.bps.GenericWorkflow`
687 Information needed to execute the special final job(s), the
688 job(s) to be executed after all jobs that can be executed
689 have been executed regardless of exit status of any of the
690 jobs.
691 """
692 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
693 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
695 self._final = final
696 if isinstance(final, GenericWorkflowJob):
697 self.add_executable(final.executable)
699 def get_final(self):
700 """Return job/workflow to be executed after all jobs that can be
701 executed have been executed regardless of exit status of any of
702 the jobs.
704 Returns
705 -------
706 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
707 `lsst.ctrl.bps.GenericWorkflow`
708 Information needed to execute final job(s).
709 """
710 return self._final
712 def add_executable(self, executable):
713 """Add executable to workflow's list of executables.
715 Parameters
716 ----------
717 executable : `lsst.ctrl.bps.GenericWorkflowExec`
718 Executable object to be added to workflow.
719 """
720 if executable is not None:
721 self._executables[executable.name] = executable
722 else:
723 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
725 def get_executables(self, data=False, transfer_only=True):
726 """Retrieve executables from generic workflow.
728 Parameters
729 ----------
730 data : `bool`, optional
731 Whether to return the executable data as well as the exec object
732 name. (The defaults is False.)
733 transfer_only : `bool`, optional
734 Whether to only return executables for which transfer_executable
735 is True.
737 Returns
738 -------
739 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
740 Filtered executable names or objects from generic workflow.
741 """
742 execs = []
743 for name, executable in self._executables.items():
744 if not transfer_only or executable.transfer_executable:
745 if not data:
746 execs.append(name)
747 else:
748 execs.append(executable)
749 return execs