Coverage for python/lsst/ctrl/bps/generic_workflow.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31from typing import Optional
33import networkx as nx
35from lsst.daf.butler.core.utils import iterable
36from .bps_draw import draw_networkx_dot
38_LOG = logging.getLogger(__name__)
41@dataclasses.dataclass
42class GenericWorkflowFile:
43 """Information about a file that may be needed by various workflow
44 management services.
45 """
46 name: str
47 """Lookup key (logical file name) of file/directory. Must be unique
48 within run.
49 """
51 src_uri: str or None # don't know that need ButlerURI
52 """Original location of file/directory.
53 """
55 wms_transfer: bool
56 """Whether the WMS should ignore file or not. Default is False.
57 """
59 job_access_remote: bool
60 """Whether the job can remotely access file (using separately specified
61 file access protocols). Default is False.
62 """
64 job_shared: bool
65 """Whether job requires its own copy of this file. Default is False.
66 """
68 # As of python 3.7.8, can't use __slots__ + dataclass if give default
69 # values, so writing own __init__.
70 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
71 job_access_remote: bool = False, job_shared: bool = False):
72 self.name = name
73 self.src_uri = src_uri
74 self.wms_transfer = wms_transfer
75 self.job_access_remote = job_access_remote
76 self.job_shared = job_shared
78 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
80 def __hash__(self):
81 return hash(self.name)
84@dataclasses.dataclass
85class GenericWorkflowExec:
86 """Information about an executable that may be needed by various workflow
87 management services.
88 """
89 name: str
90 """Lookup key (logical file name) of executable. Must be unique
91 within run.
92 """
94 src_uri: str or None # don't know that need ButlerURI
95 """Original location of executable.
96 """
98 transfer_executable: bool
99 """Whether the WMS/plugin is responsible for staging executable to
100 location usable by job.
101 """
103 # As of python 3.7.8, can't use __slots__ + dataclass if give default
104 # values, so writing own __init__.
105 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
106 self.name = name
107 self.src_uri = src_uri
108 self.transfer_executable = transfer_executable
110 __slots__ = ("name", "src_uri", "transfer_executable")
112 def __hash__(self):
113 return hash(self.name)
116@dataclasses.dataclass
117class GenericWorkflowJob:
118 """Information about a job that may be needed by various workflow
119 management services.
120 """
121 name: str
122 """Name of job. Must be unique within workflow.
123 """
125 label: Optional[str]
126 """Primary user-facing label for job. Does not need to be unique
127 and may be used for summary reports.
128 """
130 tags: Optional[dict]
131 """Other key/value pairs for job that user may want to use as a filter.
132 """
134 executable: Optional[GenericWorkflowExec]
135 """Executable for job.
136 """
138 arguments: Optional[str]
139 """Command line arguments for job.
140 """
142 cmdvals: Optional[dict]
143 """Values for variables in cmdline when using lazy command line creation.
144 """
146 memory_multiplier: Optional[float]
147 """Memory growth rate between retries.
148 """
150 request_memory: Optional[int] # MB
151 """Max memory (in MB) that the job is expected to need.
152 """
154 request_cpus: Optional[int] # cores
155 """Max number of cpus that the job is expected to need.
156 """
158 request_disk: Optional[int] # MB
159 """Max amount of job scratch disk (in MB) that the job is expected to need.
160 """
162 request_walltime: Optional[str] # minutes
163 """Max amount of time (in seconds) that the job is expected to need.
164 """
166 compute_site: Optional[str]
167 """Key to look up site-specific information for running the job.
168 """
170 mail_to: Optional[str]
171 """Comma separated list of email addresses for emailing job status.
172 """
174 when_to_mail: Optional[str]
175 """WMS-specific terminology for when to email job status.
176 """
178 number_of_retries: Optional[int]
179 """Number of times to automatically retry a failed job.
180 """
182 retry_unless_exit: Optional[int]
183 """Exit code for job that means to not automatically retry.
184 """
186 abort_on_value: Optional[int]
187 """Job exit value for signals to abort the entire workflow.
188 """
190 abort_return_value: Optional[int]
191 """Exit value to use when aborting the entire workflow.
192 """
194 priority: Optional[str]
195 """Initial priority of job in WMS-format.
196 """
198 category: Optional[str]
199 """WMS-facing label of job within single workflow (e.g., can be used for
200 throttling jobs within a single workflow).
201 """
203 concurrency_limit: Optional[list]
204 """Names of concurrency limits that the WMS plugin can appropriately
205 translate to limit the number of this job across all running workflows.
206 """
208 queue: Optional[str]
209 """Name of queue to use. Different WMS can translate
210 this concept differently.
211 """
213 pre_cmdline: Optional[str]
214 """Command line to be executed prior to executing job.
215 """
217 post_cmdline: Optional[str]
218 """Command line to be executed after job executes.
220 Should be executed regardless of exit status.
221 """
223 profile: Optional[dict]
224 """Nested dictionary of WMS-specific key/value pairs with primary key being
225 WMS key (e.g., pegasus, condor, panda).
226 """
228 attrs: Optional[dict]
229 """Key/value pairs of job attributes (for WMS that have attributes in
230 addition to commands).
231 """
233 environment: Optional[dict]
234 """Environment variable names and values to be explicitly set inside job.
235 """
237 # As of python 3.7.8, can't use __slots__ if give default values, so
238 # writing own __init__.
239 def __init__(self, name: str):
240 self.name = name
241 self.label = None
242 self.tags = {}
243 self.executable = None
244 self.arguments = None
245 self.cmdvals = {}
246 self.memory_multiplier = None
247 self.request_memory = None
248 self.request_cpus = None
249 self.request_disk = None
250 self.request_walltime = None
251 self.compute_site = None
252 self.mail_to = None
253 self.when_to_mail = None
254 self.number_of_retries = None
255 self.retry_unless_exit = None
256 self.abort_on_value = None
257 self.abort_return_value = None
258 self.priority = None
259 self.category = None
260 self.concurrency_limit = []
261 self.queue = None
262 self.pre_cmdline = None
263 self.post_cmdline = None
264 self.profile = {}
265 self.attrs = {}
266 self.environment = {}
268 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail",
269 "executable", "arguments", "cmdvals",
270 "memory_multiplier", "request_memory", "request_cpus", "request_disk", "request_walltime",
271 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
272 "compute_site", "environment", "priority", "category", "concurrency_limit",
273 "queue", "pre_cmdline", "post_cmdline", "profile", "attrs")
275 def __hash__(self):
276 return hash(self.name)
279class GenericWorkflow(nx.DiGraph):
280 """A generic representation of a workflow used to submit to specific
281 workflow management systems.
283 Parameters
284 ----------
285 name : `str`
286 Name of generic workflow.
287 incoming_graph_data : `Any`, optional
288 Data used to initialized graph that is passed through to nx.DiGraph
289 constructor. Can be any type supported by networkx.DiGraph.
290 attr : `dict`
291 Keyword arguments passed through to nx.DiGraph constructor.
292 """
293 def __init__(self, name, incoming_graph_data=None, **attr):
294 super().__init__(incoming_graph_data, **attr)
295 self._name = name
296 self.run_attrs = {}
297 self._files = {}
298 self._executables = {}
299 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
300 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
301 self.run_id = None
302 self._final = None
304 @property
305 def name(self):
306 """Retrieve name of generic workflow.
308 Returns
309 -------
310 name : `str`
311 Name of generic workflow.
312 """
313 return self._name
315 def get_files(self, data=False, transfer_only=True):
316 """Retrieve files from generic workflow.
318 Need API in case change way files are stored (e.g., make
319 workflow a bipartite graph with jobs and files nodes).
321 Parameters
322 ----------
323 data : `bool`, optional
324 Whether to return the file data as well as the file object name.
325 (The defaults is False.)
326 transfer_only : `bool`, optional
327 Whether to only return files for which a workflow management system
328 would be responsible for transferring.
330 Returns
331 -------
332 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
333 File names or objects from generic workflow meeting specifications.
334 """
335 files = []
336 for filename, file in self._files.items():
337 if not transfer_only or file.wms_transfer:
338 if not data:
339 files.append(filename)
340 else:
341 files.append(file)
342 return files
344 def add_job(self, job, parent_names=None, child_names=None):
345 """Add job to generic workflow.
347 Parameters
348 ----------
349 job : `lsst.ctrl.bps.GenericWorkflowJob`
350 Job to add to the generic workflow.
351 parent_names : `list` [`str`], optional
352 Names of jobs that are parents of given job
353 child_names : `list` [`str`], optional
354 Names of jobs that are children of given job
355 """
356 if not isinstance(job, GenericWorkflowJob):
357 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
358 if self.has_node(job.name):
359 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
360 super().add_node(job.name, job=job)
361 self.add_job_relationships(parent_names, job.name)
362 self.add_job_relationships(job.name, child_names)
363 self.add_executable(job.executable)
365 def add_node(self, node_for_adding, **attr):
366 """Override networkx function to call more specific add_job function.
368 Parameters
369 ----------
370 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
371 Job to be added to generic workflow.
372 attr :
373 Needed to match original networkx function, but not used.
374 """
375 self.add_job(node_for_adding)
377 def add_job_relationships(self, parents, children):
378 """Add dependencies between parent and child jobs. All parents will
379 be connected to all children.
381 Parameters
382 ----------
383 parents : `list` [`str`]
384 Parent job names.
385 children : `list` [`str`]
386 Children job names.
387 """
388 if parents is not None and children is not None:
389 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
391 def add_edges_from(self, ebunch_to_add, **attr):
392 """Add several edges between jobs in the generic workflow.
394 Parameters
395 ----------
396 ebunch_to_add : Iterable [`tuple`]
397 Iterable of job name pairs between which a dependency should be
398 saved.
399 attr : keyword arguments, optional
400 Data can be assigned using keyword arguments (not currently used).
401 """
402 for edge_to_add in ebunch_to_add:
403 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
405 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
406 """Add edge connecting jobs in workflow.
408 Parameters
409 ----------
410 u_of_edge : `str`
411 Name of parent job.
412 v_of_edge : `str`
413 Name of child job.
414 attr : keyword arguments, optional
415 Attributes to save with edge.
416 """
417 if u_of_edge not in self:
418 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
419 if v_of_edge not in self:
420 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
421 super().add_edge(u_of_edge, v_of_edge, **attr)
423 def get_job(self, job_name: str):
424 """Retrieve job by name from workflow.
426 Parameters
427 ----------
428 job_name : `str`
429 Name of job to retrieve.
431 Returns
432 -------
433 job : `lsst.ctrl.bps.GenericWorkflowJob`
434 Job matching given job_name.
435 """
436 return self.nodes[job_name]["job"]
438 def del_job(self, job_name: str):
439 """Delete job from generic workflow leaving connected graph.
441 Parameters
442 ----------
443 job_name : `str`
444 Name of job to delete from workflow.
445 """
446 # Connect all parent jobs to all children jobs.
447 parents = self.predecessors(job_name)
448 children = self.successors(job_name)
449 self.add_job_relationships(parents, children)
451 # Delete job node (which deleted edges).
452 self.remove_node(job_name)
454 def add_job_inputs(self, job_name, files):
455 """Add files as inputs to specified job.
457 Parameters
458 ----------
459 job_name : `str`
460 Name of job to which inputs should be added
461 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
462 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
463 File object(s) to be added as inputs to the specified job.
464 """
465 self._inputs.setdefault(job_name, [])
466 for file in iterable(files):
467 # Save the central copy
468 if file.name not in self._files:
469 self._files[file.name] = file
471 # Save the job reference to the file
472 self._inputs[job_name].append(file)
474 def get_file(self, name):
475 """Retrieve a file object by name.
477 Parameters
478 ----------
479 name : `str`
480 Name of file object
482 Returns
483 -------
484 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
485 File matching given name.
486 """
487 return self._files[name]
489 def add_file(self, gwfile):
490 """Add file object.
492 Parameters
493 ----------
494 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
495 File object to add to workflow
496 """
497 if gwfile.name not in self._files:
498 self._files[gwfile.name] = gwfile
499 else:
500 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
502 def get_job_inputs(self, job_name, data=True, transfer_only=False):
503 """Return the input files for the given job.
505 Parameters
506 ----------
507 job_name : `str`
508 Name of the job.
509 data : `bool`, optional
510 Whether to return the file data as well as the file object name.
511 transfer_only : `bool`, optional
512 Whether to only return files for which a workflow management system
513 would be responsible for transferring.
515 Returns
516 -------
517 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
518 Input files for the given job. If no input files for the job,
519 returns an empty list.
520 """
521 inputs = []
522 if job_name in self._inputs:
523 for gwfile in self._inputs[job_name]:
524 if not transfer_only or gwfile.wms_transfer:
525 if not data:
526 inputs.append(gwfile.name)
527 else:
528 inputs.append(gwfile)
529 return inputs
531 def add_job_outputs(self, job_name, files):
532 """Add output files to a job.
534 Parameters
535 ----------
536 job_name : `str`
537 Name of job to which the files should be added as outputs.
538 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
539 File objects to be added as outputs for specified job.
540 """
541 self._outputs.setdefault(job_name, [])
543 for file_ in iterable(files):
544 # Save the central copy
545 if file_.name not in self._files:
546 self._files[file_.name] = file_
548 # Save the job reference to the file
549 self._outputs[job_name].append(file_)
551 def get_job_outputs(self, job_name, data=True, transfer_only=False):
552 """Return the output files for the given job.
554 Parameters
555 ----------
556 job_name : `str`
557 Name of the job.
558 data : `bool`
559 Whether to return the file data as well as the file object name.
560 It defaults to `True` thus returning file data as well.
561 transfer_only : `bool`
562 Whether to only return files for which a workflow management system
563 would be responsible for transferring. It defaults to `False` thus
564 returning all output files.
566 Returns
567 -------
568 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
569 Output files for the given job. If no output files for the job,
570 returns an empty list.
571 """
572 outputs = []
574 if job_name in self._outputs:
575 for file_name in self._outputs[job_name]:
576 file = self._files[file_name]
577 if not transfer_only or file.wms_transfer:
578 if not data:
579 outputs.append(file_name)
580 else:
581 outputs.append(self._files[file_name])
582 return outputs
584 def draw(self, stream, format_="dot"):
585 """Output generic workflow in a visualization format.
587 Parameters
588 ----------
589 stream : `str` or `io.BufferedIOBase`
590 Stream to which the visualization should be written.
591 format_ : `str`, optional
592 Which visualization format to use. It defaults to the format for
593 the dot program.
594 """
595 draw_funcs = {"dot": draw_networkx_dot}
596 if format_ in draw_funcs:
597 draw_funcs[format_](self, stream)
598 else:
599 raise RuntimeError(f"Unknown draw format ({format_}")
601 def save(self, stream, format_="pickle"):
602 """Save the generic workflow in a format that is loadable.
604 Parameters
605 ----------
606 stream : `str` or `io.BufferedIOBase`
607 Stream to pass to the format-specific writer. Accepts anything
608 that the writer accepts.
610 format_ : `str`, optional
611 Format in which to write the data. It defaults to pickle format.
612 """
613 if format_ == "pickle":
614 nx.write_gpickle(self, stream)
615 else:
616 raise RuntimeError(f"Unknown format ({format_})")
618 @classmethod
619 def load(cls, stream, format_="pickle"):
620 """Load a GenericWorkflow from the given stream
622 Parameters
623 ----------
624 stream : `str` or `io.BufferedIOBase`
625 Stream to pass to the format-specific loader. Accepts anything that
626 the loader accepts.
627 format_ : `str`, optional
628 Format of data to expect when loading from stream. It defaults
629 to pickle format.
631 Returns
632 -------
633 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
634 Generic workflow loaded from the given stream
635 """
636 if format_ == "pickle":
637 return nx.read_gpickle(stream)
639 raise RuntimeError(f"Unknown format ({format_})")
641 def validate(self):
642 """Run checks to ensure this is still a valid generic workflow graph.
643 """
644 # Make sure a directed acyclic graph
645 assert nx.algorithms.dag.is_directed_acyclic_graph(self)
647 def add_workflow_source(self, workflow):
648 """Add given workflow as new source to this workflow.
650 Parameters
651 ----------
652 workflow : `lsst.ctrl.bps.GenericWorkflow`
653 """
654 # Find source nodes in self.
655 self_sources = [n for n in self if self.in_degree(n) == 0]
656 _LOG.debug("self_sources = %s", self_sources)
658 # Find sink nodes of workflow.
659 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
660 _LOG.debug("new sinks = %s", new_sinks)
662 # Add new workflow nodes to self graph and make new edges.
663 self.add_nodes_from(workflow.nodes(data=True))
664 self.add_edges_from(workflow.edges())
665 for source in self_sources:
666 for sink in new_sinks:
667 self.add_edge(sink, source)
669 # Files are stored separately so copy them.
670 for job_name in workflow:
671 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
672 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
673 self.add_executable(workflow.get_job(job_name).executable)
675 def add_final(self, final):
676 """Add special final job/workflow to the generic workflow.
678 Parameters
679 ----------
680 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
681 `lsst.ctrl.bps.GenericWorkflow`
682 Information needed to execute the special final job(s), the
683 job(s) to be executed after all jobs that can be executed
684 have been executed regardless of exit status of any of the
685 jobs.
686 """
687 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
688 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
690 self._final = final
691 if isinstance(final, GenericWorkflowJob):
692 self.add_executable(final.executable)
694 def get_final(self):
695 """Return job/workflow to be executed after all jobs that can be
696 executed have been executed regardless of exit status of any of
697 the jobs.
699 Returns
700 -------
701 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
702 `lsst.ctrl.bps.GenericWorkflow`
703 Information needed to execute final job(s).
704 """
705 return self._final
707 def add_executable(self, executable):
708 """Add executable to workflow's list of executables.
710 Parameters
711 ----------
712 executable : `lsst.ctrl.bps.GenericWorkflowExec`
713 Executable object to be added to workflow.
714 """
715 if executable is not None:
716 self._executables[executable.name] = executable
717 else:
718 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
720 def get_executables(self, data=False, transfer_only=True):
721 """Retrieve executables from generic workflow.
723 Parameters
724 ----------
725 data : `bool`, optional
726 Whether to return the executable data as well as the exec object
727 name. (The defaults is False.)
728 transfer_only : `bool`, optional
729 Whether to only return executables for which transfer_executable
730 is True.
732 Returns
733 -------
734 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
735 Filtered executable names or objects from generic workflow.
736 """
737 execs = []
738 for name, executable in self._executables.items():
739 if not transfer_only or executable.transfer_executable:
740 if not data:
741 execs.append(name)
742 else:
743 execs.append(executable)
744 return execs