Coverage for python/lsst/ctrl/bps/generic_workflow.py: 36%
366 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:55 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 09:55 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Class definitions for a Generic Workflow Graph.
29"""
31__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
34import dataclasses
35import itertools
36import logging
37import pickle
38from collections import Counter, defaultdict
40from lsst.utils.iteration import ensure_iterable
41from networkx import DiGraph, topological_sort
42from networkx.algorithms.dag import is_directed_acyclic_graph
44from .bps_draw import draw_networkx_dot
46_LOG = logging.getLogger(__name__)
49@dataclasses.dataclass
50class GenericWorkflowFile:
51 """Information about a file that may be needed by various workflow
52 management services.
53 """
55 name: str
56 """Lookup key (logical file name) of file/directory. Must be unique
57 within run.
58 """
60 src_uri: str or None # don't know that need ResourcePath
61 """Original location of file/directory.
62 """
64 wms_transfer: bool
65 """Whether the WMS should ignore file or not. Default is False.
66 """
68 job_access_remote: bool
69 """Whether the job can remotely access file (using separately specified
70 file access protocols). Default is False.
71 """
73 job_shared: bool
74 """Whether job requires its own copy of this file. Default is False.
75 """
77 # As of python 3.7.8, can't use __slots__ + dataclass if give default
78 # values, so writing own __init__.
79 def __init__(
80 self,
81 name: str,
82 src_uri: str = None,
83 wms_transfer: bool = False,
84 job_access_remote: bool = False,
85 job_shared: bool = False,
86 ):
87 self.name = name
88 self.src_uri = src_uri
89 self.wms_transfer = wms_transfer
90 self.job_access_remote = job_access_remote
91 self.job_shared = job_shared
93 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
95 def __hash__(self):
96 return hash(self.name)
99@dataclasses.dataclass
100class GenericWorkflowExec:
101 """Information about an executable that may be needed by various workflow
102 management services.
103 """
105 name: str
106 """Lookup key (logical file name) of executable. Must be unique
107 within run.
108 """
110 src_uri: str or None # don't know that need ResourcePath
111 """Original location of executable.
112 """
114 transfer_executable: bool
115 """Whether the WMS/plugin is responsible for staging executable to
116 location usable by job.
117 """
119 # As of python 3.7.8, can't use __slots__ + dataclass if give default
120 # values, so writing own __init__.
121 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
122 self.name = name
123 self.src_uri = src_uri
124 self.transfer_executable = transfer_executable
126 __slots__ = ("name", "src_uri", "transfer_executable")
128 def __hash__(self):
129 return hash(self.name)
132@dataclasses.dataclass
133class GenericWorkflowJob:
134 """Information about a job that may be needed by various workflow
135 management services.
136 """
138 name: str
139 """Name of job. Must be unique within workflow.
140 """
142 label: str | None
143 """Primary user-facing label for job. Does not need to be unique
144 and may be used for summary reports.
145 """
147 quanta_counts: Counter | None
148 """Counts of quanta per task label in job.
149 """
151 tags: dict | None
152 """Other key/value pairs for job that user may want to use as a filter.
153 """
155 executable: GenericWorkflowExec | None
156 """Executable for job.
157 """
159 arguments: str | None
160 """Command line arguments for job.
161 """
163 cmdvals: dict | None
164 """Values for variables in cmdline when using lazy command line creation.
165 """
167 memory_multiplier: float | None
168 """Memory growth rate between retries.
169 """
171 request_memory: int | None # MB
172 """Max memory (in MB) that the job is expected to need.
173 """
175 request_memory_max: int | None # MB
176 """Max memory (in MB) that the job should ever use.
177 """
179 request_cpus: int | None # cores
180 """Max number of cpus that the job is expected to need.
181 """
183 request_disk: int | None # MB
184 """Max amount of job scratch disk (in MB) that the job is expected to need.
185 """
187 request_walltime: str | None # minutes
188 """Max amount of time (in seconds) that the job is expected to need.
189 """
191 compute_site: str | None
192 """Key to look up site-specific information for running the job.
193 """
195 accounting_group: str | None
196 """Name of the accounting group to use.
197 """
199 accounting_user: str | None
200 """Name of the user to use for accounting purposes.
201 """
203 mail_to: str | None
204 """Comma separated list of email addresses for emailing job status.
205 """
207 when_to_mail: str | None
208 """WMS-specific terminology for when to email job status.
209 """
211 number_of_retries: int | None
212 """Number of times to automatically retry a failed job.
213 """
215 retry_unless_exit: int | None
216 """Exit code for job that means to not automatically retry.
217 """
219 abort_on_value: int | None
220 """Job exit value for signals to abort the entire workflow.
221 """
223 abort_return_value: int | None
224 """Exit value to use when aborting the entire workflow.
225 """
227 priority: str | None
228 """Initial priority of job in WMS-format.
229 """
231 category: str | None
232 """WMS-facing label of job within single workflow (e.g., can be used for
233 throttling jobs within a single workflow).
234 """
236 concurrency_limit: str | None
237 """Names of concurrency limits that the WMS plugin can appropriately
238 translate to limit the number of this job across all running workflows.
239 """
241 queue: str | None
242 """Name of queue to use. Different WMS can translate this concept
243 differently.
244 """
246 pre_cmdline: str | None
247 """Command line to be executed prior to executing job.
248 """
250 post_cmdline: str | None
251 """Command line to be executed after job executes.
253 Should be executed regardless of exit status.
254 """
256 preemptible: bool | None
257 """The flag indicating whether the job can be preempted.
258 """
260 profile: dict | None
261 """Nested dictionary of WMS-specific key/value pairs with primary key being
262 WMS key (e.g., pegasus, condor, panda).
263 """
265 attrs: dict | None
266 """Key/value pairs of job attributes (for WMS that have attributes in
267 addition to commands).
268 """
270 environment: dict | None
271 """Environment variable names and values to be explicitly set inside job.
272 """
274 compute_cloud: str | None
275 """Key to look up cloud-specific information for running the job.
276 """
278 # As of python 3.7.8, can't use __slots__ if give default values, so
279 # writing own __init__.
280 def __init__(self, name, label="UNK"):
281 self.name = name
282 self.label = label
283 self.quanta_counts = Counter()
284 self.tags = {}
285 self.executable = None
286 self.arguments = None
287 self.cmdvals = {}
288 self.memory_multiplier = None
289 self.request_memory = None
290 self.request_memory_max = None
291 self.request_cpus = None
292 self.request_disk = None
293 self.request_walltime = None
294 self.compute_site = None
295 self.accounting_group = None
296 self.accounting_user = None
297 self.mail_to = None
298 self.when_to_mail = None
299 self.number_of_retries = None
300 self.retry_unless_exit = None
301 self.abort_on_value = None
302 self.abort_return_value = None
303 self.priority = None
304 self.category = None
305 self.concurrency_limit = None
306 self.queue = None
307 self.pre_cmdline = None
308 self.post_cmdline = None
309 self.preemptible = None
310 self.profile = {}
311 self.attrs = {}
312 self.environment = {}
313 self.compute_cloud = None
315 __slots__ = (
316 "name",
317 "label",
318 "quanta_counts",
319 "tags",
320 "mail_to",
321 "when_to_mail",
322 "executable",
323 "arguments",
324 "cmdvals",
325 "memory_multiplier",
326 "request_memory",
327 "request_memory_max",
328 "request_cpus",
329 "request_disk",
330 "request_walltime",
331 "number_of_retries",
332 "retry_unless_exit",
333 "abort_on_value",
334 "abort_return_value",
335 "compute_site",
336 "accounting_group",
337 "accounting_user",
338 "environment",
339 "priority",
340 "category",
341 "concurrency_limit",
342 "queue",
343 "pre_cmdline",
344 "post_cmdline",
345 "preemptible",
346 "profile",
347 "attrs",
348 "compute_cloud",
349 )
351 def __hash__(self):
352 return hash(self.name)
355class GenericWorkflow(DiGraph):
356 """A generic representation of a workflow used to submit to specific
357 workflow management systems.
359 Parameters
360 ----------
361 name : `str`
362 Name of generic workflow.
363 incoming_graph_data : `Any`, optional
364 Data used to initialized graph that is passed through to DiGraph
365 constructor. Can be any type supported by networkx.DiGraph.
366 attr : `dict`
367 Keyword arguments passed through to DiGraph constructor.
368 """
370 def __init__(self, name, incoming_graph_data=None, **attr):
371 super().__init__(incoming_graph_data, **attr)
372 self._name = name
373 self.run_attrs = {}
374 self._job_labels = GenericWorkflowLabels()
375 self._files = {}
376 self._executables = {}
377 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
378 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
379 self.run_id = None
380 self._final = None
382 @property
383 def name(self):
384 """Retrieve name of generic workflow.
386 Returns
387 -------
388 name : `str`
389 Name of generic workflow.
390 """
391 return self._name
393 @property
394 def quanta_counts(self):
395 """Count of quanta per task label (`collections.Counter`)."""
396 qcounts = Counter()
397 for job_name in self:
398 gwjob = self.get_job(job_name)
399 if gwjob.quanta_counts is not None:
400 qcounts += gwjob.quanta_counts
401 return qcounts
403 @property
404 def labels(self):
405 """Job labels (`list` [`str`], read-only)"""
406 return self._job_labels.labels
408 def regenerate_labels(self):
409 """Regenerate the list of job labels."""
410 self._job_labels = GenericWorkflowLabels()
411 for job_name in self:
412 job = self.get_job(job_name)
413 self._job_labels.add_job(
414 job,
415 [self.get_job(p).label for p in self.predecessors(job.name)],
416 [self.get_job(p).label for p in self.successors(job.name)],
417 )
419 @property
420 def job_counts(self):
421 """Count of jobs per job label (`collections.Counter`)."""
422 jcounts = self._job_labels.job_counts
424 # Final is separate
425 final = self.get_final()
426 if final:
427 if isinstance(final, GenericWorkflow):
428 jcounts.update(final.job_counts())
429 else:
430 jcounts[final.label] += 1
432 return jcounts
434 def __iter__(self):
435 """Return iterator of job names in topologically sorted order."""
436 return topological_sort(self)
438 def get_files(self, data=False, transfer_only=True):
439 """Retrieve files from generic workflow.
441 Need API in case change way files are stored (e.g., make
442 workflow a bipartite graph with jobs and files nodes).
444 Parameters
445 ----------
446 data : `bool`, optional
447 Whether to return the file data as well as the file object name.
448 (The defaults is False.)
449 transfer_only : `bool`, optional
450 Whether to only return files for which a workflow management system
451 would be responsible for transferring.
453 Returns
454 -------
455 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
456 File names or objects from generic workflow meeting specifications.
457 """
458 files = []
459 for filename, file in self._files.items():
460 if not transfer_only or file.wms_transfer:
461 if not data:
462 files.append(filename)
463 else:
464 files.append(file)
465 return files
467 def add_job(self, job, parent_names=None, child_names=None):
468 """Add job to generic workflow.
470 Parameters
471 ----------
472 job : `lsst.ctrl.bps.GenericWorkflowJob`
473 Job to add to the generic workflow.
474 parent_names : `list` [`str`], optional
475 Names of jobs that are parents of given job
476 child_names : `list` [`str`], optional
477 Names of jobs that are children of given job
478 """
479 _LOG.debug("job: %s (%s)", job.name, job.label)
480 _LOG.debug("parent_names: %s", parent_names)
481 _LOG.debug("child_names: %s", child_names)
482 if not isinstance(job, GenericWorkflowJob):
483 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
484 if self.has_node(job.name):
485 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
486 super().add_node(job.name, job=job)
487 self.add_job_relationships(parent_names, job.name)
488 self.add_job_relationships(job.name, child_names)
489 self.add_executable(job.executable)
490 self._job_labels.add_job(
491 job,
492 [self.get_job(p).label for p in self.predecessors(job.name)],
493 [self.get_job(p).label for p in self.successors(job.name)],
494 )
496 def add_node(self, node_for_adding, **attr):
497 """Override networkx function to call more specific add_job function.
499 Parameters
500 ----------
501 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
502 Job to be added to generic workflow.
503 attr :
504 Needed to match original networkx function, but not used.
505 """
506 self.add_job(node_for_adding)
508 def add_job_relationships(self, parents, children):
509 """Add dependencies between parent and child jobs. All parents will
510 be connected to all children.
512 Parameters
513 ----------
514 parents : `list` [`str`]
515 Parent job names.
516 children : `list` [`str`]
517 Children job names.
518 """
519 if parents is not None and children is not None:
520 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children)))
521 self._job_labels.add_job_relationships(
522 [self.get_job(n).label for n in ensure_iterable(parents)],
523 [self.get_job(n).label for n in ensure_iterable(children)],
524 )
526 def add_edges_from(self, ebunch_to_add, **attr):
527 """Add several edges between jobs in the generic workflow.
529 Parameters
530 ----------
531 ebunch_to_add : Iterable [`tuple`]
532 Iterable of job name pairs between which a dependency should be
533 saved.
534 attr : keyword arguments, optional
535 Data can be assigned using keyword arguments (not currently used).
536 """
537 for edge_to_add in ebunch_to_add:
538 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
540 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
541 """Add edge connecting jobs in workflow.
543 Parameters
544 ----------
545 u_of_edge : `str`
546 Name of parent job.
547 v_of_edge : `str`
548 Name of child job.
549 attr : keyword arguments, optional
550 Attributes to save with edge.
551 """
552 if u_of_edge not in self:
553 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
554 if v_of_edge not in self:
555 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
556 super().add_edge(u_of_edge, v_of_edge, **attr)
558 def get_job(self, job_name: str):
559 """Retrieve job by name from workflow.
561 Parameters
562 ----------
563 job_name : `str`
564 Name of job to retrieve.
566 Returns
567 -------
568 job : `lsst.ctrl.bps.GenericWorkflowJob`
569 Job matching given job_name.
570 """
571 return self.nodes[job_name]["job"]
573 def del_job(self, job_name: str):
574 """Delete job from generic workflow leaving connected graph.
576 Parameters
577 ----------
578 job_name : `str`
579 Name of job to delete from workflow.
580 """
581 job = self.get_job(job_name)
583 # Remove from job labels
584 self._job_labels.del_job(job)
586 # Connect all parent jobs to all children jobs.
587 parents = self.predecessors(job_name)
588 children = self.successors(job_name)
589 self.add_job_relationships(parents, children)
591 # Delete job node (which deletes edges).
592 self.remove_node(job_name)
594 def add_job_inputs(self, job_name, files):
595 """Add files as inputs to specified job.
597 Parameters
598 ----------
599 job_name : `str`
600 Name of job to which inputs should be added
601 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
602 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
603 File object(s) to be added as inputs to the specified job.
604 """
605 self._inputs.setdefault(job_name, [])
606 for file in ensure_iterable(files):
607 # Save the central copy
608 if file.name not in self._files:
609 self._files[file.name] = file
611 # Save the job reference to the file
612 self._inputs[job_name].append(file)
614 def get_file(self, name):
615 """Retrieve a file object by name.
617 Parameters
618 ----------
619 name : `str`
620 Name of file object
622 Returns
623 -------
624 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
625 File matching given name.
626 """
627 return self._files[name]
629 def add_file(self, gwfile):
630 """Add file object.
632 Parameters
633 ----------
634 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
635 File object to add to workflow
636 """
637 if gwfile.name not in self._files:
638 self._files[gwfile.name] = gwfile
639 else:
640 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
642 def get_job_inputs(self, job_name, data=True, transfer_only=False):
643 """Return the input files for the given job.
645 Parameters
646 ----------
647 job_name : `str`
648 Name of the job.
649 data : `bool`, optional
650 Whether to return the file data as well as the file object name.
651 transfer_only : `bool`, optional
652 Whether to only return files for which a workflow management system
653 would be responsible for transferring.
655 Returns
656 -------
657 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
658 Input files for the given job. If no input files for the job,
659 returns an empty list.
660 """
661 inputs = []
662 if job_name in self._inputs:
663 for gwfile in self._inputs[job_name]:
664 if not transfer_only or gwfile.wms_transfer:
665 if not data:
666 inputs.append(gwfile.name)
667 else:
668 inputs.append(gwfile)
669 return inputs
671 def add_job_outputs(self, job_name, files):
672 """Add output files to a job.
674 Parameters
675 ----------
676 job_name : `str`
677 Name of job to which the files should be added as outputs.
678 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
679 File objects to be added as outputs for specified job.
680 """
681 self._outputs.setdefault(job_name, [])
683 for file_ in ensure_iterable(files):
684 # Save the central copy
685 if file_.name not in self._files:
686 self._files[file_.name] = file_
688 # Save the job reference to the file
689 self._outputs[job_name].append(file_)
691 def get_job_outputs(self, job_name, data=True, transfer_only=False):
692 """Return the output files for the given job.
694 Parameters
695 ----------
696 job_name : `str`
697 Name of the job.
698 data : `bool`
699 Whether to return the file data as well as the file object name.
700 It defaults to `True` thus returning file data as well.
701 transfer_only : `bool`
702 Whether to only return files for which a workflow management system
703 would be responsible for transferring. It defaults to `False` thus
704 returning all output files.
706 Returns
707 -------
708 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
709 Output files for the given job. If no output files for the job,
710 returns an empty list.
711 """
712 outputs = []
714 if job_name in self._outputs:
715 for file_name in self._outputs[job_name]:
716 file = self._files[file_name]
717 if not transfer_only or file.wms_transfer:
718 if not data:
719 outputs.append(file_name)
720 else:
721 outputs.append(self._files[file_name])
722 return outputs
724 def draw(self, stream, format_="dot"):
725 """Output generic workflow in a visualization format.
727 Parameters
728 ----------
729 stream : `str` or `io.BufferedIOBase`
730 Stream to which the visualization should be written.
731 format_ : `str`, optional
732 Which visualization format to use. It defaults to the format for
733 the dot program.
734 """
735 draw_funcs = {"dot": draw_networkx_dot}
736 if format_ in draw_funcs:
737 draw_funcs[format_](self, stream)
738 else:
739 raise RuntimeError(f"Unknown draw format ({format_}")
741 def save(self, stream, format_="pickle"):
742 """Save the generic workflow in a format that is loadable.
744 Parameters
745 ----------
746 stream : `str` or `io.BufferedIOBase`
747 Stream to pass to the format-specific writer. Accepts anything
748 that the writer accepts.
750 format_ : `str`, optional
751 Format in which to write the data. It defaults to pickle format.
752 """
753 if format_ == "pickle":
754 pickle.dump(self, stream)
755 else:
756 raise RuntimeError(f"Unknown format ({format_})")
758 @classmethod
759 def load(cls, stream, format_="pickle"):
760 """Load a GenericWorkflow from the given stream
762 Parameters
763 ----------
764 stream : `str` or `io.BufferedIOBase`
765 Stream to pass to the format-specific loader. Accepts anything that
766 the loader accepts.
767 format_ : `str`, optional
768 Format of data to expect when loading from stream. It defaults
769 to pickle format.
771 Returns
772 -------
773 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
774 Generic workflow loaded from the given stream
775 """
776 if format_ == "pickle":
777 return pickle.load(stream)
779 raise RuntimeError(f"Unknown format ({format_})")
781 def validate(self):
782 """Run checks to ensure that the generic workflow graph is valid."""
783 # Make sure a directed acyclic graph
784 assert is_directed_acyclic_graph(self)
786 def add_workflow_source(self, workflow):
787 """Add given workflow as new source to this workflow.
789 Parameters
790 ----------
791 workflow : `lsst.ctrl.bps.GenericWorkflow`
792 """
793 # Find source nodes in self.
794 self_sources = [n for n in self if self.in_degree(n) == 0]
795 _LOG.debug("self_sources = %s", self_sources)
797 # Find sink nodes of workflow.
798 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
799 _LOG.debug("new sinks = %s", new_sinks)
801 # Add new workflow nodes to self graph and make new edges.
802 self.add_nodes_from(workflow.nodes(data=True))
803 self.add_edges_from(workflow.edges())
804 for source in self_sources:
805 for sink in new_sinks:
806 self.add_edge(sink, source)
808 # Add separately stored info
809 for job_name in workflow:
810 job = self.get_job(job_name)
811 # Add job labels
812 self._job_labels.add_job(
813 job,
814 [self.get_job(p).label for p in self.predecessors(job.name)],
815 [self.get_job(p).label for p in self.successors(job.name)],
816 )
817 # Files are stored separately so copy them.
818 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
819 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
820 # Executables are stored separately so copy them.
821 self.add_executable(workflow.get_job(job_name).executable)
823 def add_final(self, final):
824 """Add special final job/workflow to the generic workflow.
826 Parameters
827 ----------
828 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
829 `lsst.ctrl.bps.GenericWorkflow`
830 Information needed to execute the special final job(s), the
831 job(s) to be executed after all jobs that can be executed
832 have been executed regardless of exit status of any of the
833 jobs.
834 """
835 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
836 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
838 self._final = final
839 if isinstance(final, GenericWorkflowJob):
840 self.add_executable(final.executable)
842 def get_final(self):
843 """Return job/workflow to be executed after all jobs that can be
844 executed have been executed regardless of exit status of any of
845 the jobs.
847 Returns
848 -------
849 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
850 `lsst.ctrl.bps.GenericWorkflow`
851 Information needed to execute final job(s).
852 """
853 return self._final
855 def add_executable(self, executable):
856 """Add executable to workflow's list of executables.
858 Parameters
859 ----------
860 executable : `lsst.ctrl.bps.GenericWorkflowExec`
861 Executable object to be added to workflow.
862 """
863 if executable is not None:
864 self._executables[executable.name] = executable
865 else:
866 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
868 def get_executables(self, data=False, transfer_only=True):
869 """Retrieve executables from generic workflow.
871 Parameters
872 ----------
873 data : `bool`, optional
874 Whether to return the executable data as well as the exec object
875 name. (The defaults is False.)
876 transfer_only : `bool`, optional
877 Whether to only return executables for which transfer_executable
878 is True.
880 Returns
881 -------
882 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
883 Filtered executable names or objects from generic workflow.
884 """
885 execs = []
886 for name, executable in self._executables.items():
887 if not transfer_only or executable.transfer_executable:
888 if not data:
889 execs.append(name)
890 else:
891 execs.append(executable)
892 return execs
894 def get_jobs_by_label(self, label: str):
895 """Retrieve jobs by label from workflow.
897 Parameters
898 ----------
899 label : `str`
900 Label of jobs to retrieve.
902 Returns
903 -------
904 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`]
905 Jobs having given label.
906 """
907 return self._job_labels.get_jobs_by_label(label)
910class GenericWorkflowLabels:
911 """Label-oriented representation of the GenericWorkflow."""
913 def __init__(self):
914 self._label_graph = DiGraph() # Dependency graph of job labels
915 self._label_to_jobs = defaultdict(list) # mapping job label to list of GenericWorkflowJob
917 @property
918 def labels(self):
919 """List of job labels (`list` [`str`], read-only)"""
920 return list(topological_sort(self._label_graph))
922 @property
923 def job_counts(self):
924 """Count of jobs per job label (`collections.Counter`)."""
925 jcounts = Counter({label: len(jobs) for label, jobs in self._label_to_jobs.items()})
926 return jcounts
928 def get_jobs_by_label(self, label: str):
929 """Retrieve jobs by label from workflow.
931 Parameters
932 ----------
933 label : `str`
934 Label of jobs to retrieve.
936 Returns
937 -------
938 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`]
939 Jobs having given label.
940 """
941 return self._label_to_jobs[label]
943 def add_job(self, job, parent_labels, child_labels):
944 """Add job's label to labels.
946 Parameters
947 ----------
948 job : `lsst.ctrl.bps.GenericWorkflowJob`
949 The job to delete from the job labels.
950 parent_labels : `list` [`str`]
951 Parent job labels.
952 children_labels : `list` [`str`]
953 Children job labels.
954 """
955 _LOG.debug("job: %s (%s)", job.name, job.label)
956 _LOG.debug("parent_labels: %s", parent_labels)
957 _LOG.debug("child_labels: %s", child_labels)
958 self._label_to_jobs[job.label].append(job)
959 self._label_graph.add_node(job.label)
960 for parent in parent_labels:
961 self._label_graph.add_edge(parent, job.label)
962 for child in child_labels:
963 self._label_graph.add_edge(job.label, child)
965 def add_job_relationships(self, parent_labels, children_labels):
966 """Add dependencies between parent and child job labels.
967 All parents will be connected to all children.
969 Parameters
970 ----------
971 parent_labels : `list` [`str`]
972 Parent job labels.
973 children_labels : `list` [`str`]
974 Children job labels.
975 """
976 if parent_labels is not None and children_labels is not None:
977 # Since labels, must ensure not adding edge from label to itself.
978 edges = [
979 e
980 for e in itertools.product(ensure_iterable(parent_labels), ensure_iterable(children_labels))
981 if e[0] != e[1]
982 ]
984 self._label_graph.add_edges_from(edges)
986 def del_job(self, job):
987 """Delete job and its label from job labels.
989 Parameters
990 ----------
991 job : `lsst.ctrl.bps.GenericWorkflowJob`
992 The job to delete from the job labels.
993 """
994 self._label_to_jobs[job.label].remove(job)
995 # Don't leave keys around if removed last job
996 if not self._label_to_jobs[job.label]:
997 del self._label_to_jobs[job.label]
999 parents = self._label_graph.predecessors(job.label)
1000 children = self._label_graph.successors(job.label)
1001 self._label_graph.remove_node(job.label)
1002 self._label_graph.add_edges_from(
1003 itertools.product(ensure_iterable(parents), ensure_iterable(children))
1004 )