Coverage for python/lsst/ctrl/bps/generic_workflow.py: 36%
366 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-22 09:44 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-22 09:44 +0000
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph.
23"""
25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"]
28import dataclasses
29import itertools
30import logging
31import pickle
32from collections import Counter, defaultdict
34from lsst.utils.iteration import ensure_iterable
35from networkx import DiGraph, topological_sort
36from networkx.algorithms.dag import is_directed_acyclic_graph
38from .bps_draw import draw_networkx_dot
40_LOG = logging.getLogger(__name__)
43@dataclasses.dataclass
44class GenericWorkflowFile:
45 """Information about a file that may be needed by various workflow
46 management services.
47 """
49 name: str
50 """Lookup key (logical file name) of file/directory. Must be unique
51 within run.
52 """
54 src_uri: str or None # don't know that need ResourcePath
55 """Original location of file/directory.
56 """
58 wms_transfer: bool
59 """Whether the WMS should ignore file or not. Default is False.
60 """
62 job_access_remote: bool
63 """Whether the job can remotely access file (using separately specified
64 file access protocols). Default is False.
65 """
67 job_shared: bool
68 """Whether job requires its own copy of this file. Default is False.
69 """
71 # As of python 3.7.8, can't use __slots__ + dataclass if give default
72 # values, so writing own __init__.
73 def __init__(
74 self,
75 name: str,
76 src_uri: str = None,
77 wms_transfer: bool = False,
78 job_access_remote: bool = False,
79 job_shared: bool = False,
80 ):
81 self.name = name
82 self.src_uri = src_uri
83 self.wms_transfer = wms_transfer
84 self.job_access_remote = job_access_remote
85 self.job_shared = job_shared
87 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
89 def __hash__(self):
90 return hash(self.name)
93@dataclasses.dataclass
94class GenericWorkflowExec:
95 """Information about an executable that may be needed by various workflow
96 management services.
97 """
99 name: str
100 """Lookup key (logical file name) of executable. Must be unique
101 within run.
102 """
104 src_uri: str or None # don't know that need ResourcePath
105 """Original location of executable.
106 """
108 transfer_executable: bool
109 """Whether the WMS/plugin is responsible for staging executable to
110 location usable by job.
111 """
113 # As of python 3.7.8, can't use __slots__ + dataclass if give default
114 # values, so writing own __init__.
115 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False):
116 self.name = name
117 self.src_uri = src_uri
118 self.transfer_executable = transfer_executable
120 __slots__ = ("name", "src_uri", "transfer_executable")
122 def __hash__(self):
123 return hash(self.name)
126@dataclasses.dataclass
127class GenericWorkflowJob:
128 """Information about a job that may be needed by various workflow
129 management services.
130 """
132 name: str
133 """Name of job. Must be unique within workflow.
134 """
136 label: str | None
137 """Primary user-facing label for job. Does not need to be unique
138 and may be used for summary reports.
139 """
141 quanta_counts: Counter | None
142 """Counts of quanta per task label in job.
143 """
145 tags: dict | None
146 """Other key/value pairs for job that user may want to use as a filter.
147 """
149 executable: GenericWorkflowExec | None
150 """Executable for job.
151 """
153 arguments: str | None
154 """Command line arguments for job.
155 """
157 cmdvals: dict | None
158 """Values for variables in cmdline when using lazy command line creation.
159 """
161 memory_multiplier: float | None
162 """Memory growth rate between retries.
163 """
165 request_memory: int | None # MB
166 """Max memory (in MB) that the job is expected to need.
167 """
169 request_memory_max: int | None # MB
170 """Max memory (in MB) that the job should ever use.
171 """
173 request_cpus: int | None # cores
174 """Max number of cpus that the job is expected to need.
175 """
177 request_disk: int | None # MB
178 """Max amount of job scratch disk (in MB) that the job is expected to need.
179 """
181 request_walltime: str | None # minutes
182 """Max amount of time (in seconds) that the job is expected to need.
183 """
185 compute_site: str | None
186 """Key to look up site-specific information for running the job.
187 """
189 accounting_group: str | None
190 """Name of the accounting group to use.
191 """
193 accounting_user: str | None
194 """Name of the user to use for accounting purposes.
195 """
197 mail_to: str | None
198 """Comma separated list of email addresses for emailing job status.
199 """
201 when_to_mail: str | None
202 """WMS-specific terminology for when to email job status.
203 """
205 number_of_retries: int | None
206 """Number of times to automatically retry a failed job.
207 """
209 retry_unless_exit: int | None
210 """Exit code for job that means to not automatically retry.
211 """
213 abort_on_value: int | None
214 """Job exit value for signals to abort the entire workflow.
215 """
217 abort_return_value: int | None
218 """Exit value to use when aborting the entire workflow.
219 """
221 priority: str | None
222 """Initial priority of job in WMS-format.
223 """
225 category: str | None
226 """WMS-facing label of job within single workflow (e.g., can be used for
227 throttling jobs within a single workflow).
228 """
230 concurrency_limit: str | None
231 """Names of concurrency limits that the WMS plugin can appropriately
232 translate to limit the number of this job across all running workflows.
233 """
235 queue: str | None
236 """Name of queue to use. Different WMS can translate this concept
237 differently.
238 """
240 pre_cmdline: str | None
241 """Command line to be executed prior to executing job.
242 """
244 post_cmdline: str | None
245 """Command line to be executed after job executes.
247 Should be executed regardless of exit status.
248 """
250 preemptible: bool | None
251 """The flag indicating whether the job can be preempted.
252 """
254 profile: dict | None
255 """Nested dictionary of WMS-specific key/value pairs with primary key being
256 WMS key (e.g., pegasus, condor, panda).
257 """
259 attrs: dict | None
260 """Key/value pairs of job attributes (for WMS that have attributes in
261 addition to commands).
262 """
264 environment: dict | None
265 """Environment variable names and values to be explicitly set inside job.
266 """
268 compute_cloud: str | None
269 """Key to look up cloud-specific information for running the job.
270 """
272 # As of python 3.7.8, can't use __slots__ if give default values, so
273 # writing own __init__.
274 def __init__(self, name, label="UNK"):
275 self.name = name
276 self.label = label
277 self.quanta_counts = Counter()
278 self.tags = {}
279 self.executable = None
280 self.arguments = None
281 self.cmdvals = {}
282 self.memory_multiplier = None
283 self.request_memory = None
284 self.request_memory_max = None
285 self.request_cpus = None
286 self.request_disk = None
287 self.request_walltime = None
288 self.compute_site = None
289 self.accounting_group = None
290 self.accounting_user = None
291 self.mail_to = None
292 self.when_to_mail = None
293 self.number_of_retries = None
294 self.retry_unless_exit = None
295 self.abort_on_value = None
296 self.abort_return_value = None
297 self.priority = None
298 self.category = None
299 self.concurrency_limit = None
300 self.queue = None
301 self.pre_cmdline = None
302 self.post_cmdline = None
303 self.preemptible = None
304 self.profile = {}
305 self.attrs = {}
306 self.environment = {}
307 self.compute_cloud = None
309 __slots__ = (
310 "name",
311 "label",
312 "quanta_counts",
313 "tags",
314 "mail_to",
315 "when_to_mail",
316 "executable",
317 "arguments",
318 "cmdvals",
319 "memory_multiplier",
320 "request_memory",
321 "request_memory_max",
322 "request_cpus",
323 "request_disk",
324 "request_walltime",
325 "number_of_retries",
326 "retry_unless_exit",
327 "abort_on_value",
328 "abort_return_value",
329 "compute_site",
330 "accounting_group",
331 "accounting_user",
332 "environment",
333 "priority",
334 "category",
335 "concurrency_limit",
336 "queue",
337 "pre_cmdline",
338 "post_cmdline",
339 "preemptible",
340 "profile",
341 "attrs",
342 "compute_cloud",
343 )
345 def __hash__(self):
346 return hash(self.name)
349class GenericWorkflow(DiGraph):
350 """A generic representation of a workflow used to submit to specific
351 workflow management systems.
353 Parameters
354 ----------
355 name : `str`
356 Name of generic workflow.
357 incoming_graph_data : `Any`, optional
358 Data used to initialized graph that is passed through to DiGraph
359 constructor. Can be any type supported by networkx.DiGraph.
360 attr : `dict`
361 Keyword arguments passed through to DiGraph constructor.
362 """
364 def __init__(self, name, incoming_graph_data=None, **attr):
365 super().__init__(incoming_graph_data, **attr)
366 self._name = name
367 self.run_attrs = {}
368 self._job_labels = GenericWorkflowLabels()
369 self._files = {}
370 self._executables = {}
371 self._inputs = {} # mapping job.names to list of GenericWorkflowFile
372 self._outputs = {} # mapping job.names to list of GenericWorkflowFile
373 self.run_id = None
374 self._final = None
376 @property
377 def name(self):
378 """Retrieve name of generic workflow.
380 Returns
381 -------
382 name : `str`
383 Name of generic workflow.
384 """
385 return self._name
387 @property
388 def quanta_counts(self):
389 """Count of quanta per task label (`collections.Counter`)."""
390 qcounts = Counter()
391 for job_name in self:
392 gwjob = self.get_job(job_name)
393 if gwjob.quanta_counts is not None:
394 qcounts += gwjob.quanta_counts
395 return qcounts
397 @property
398 def labels(self):
399 """Job labels (`list` [`str`], read-only)"""
400 return self._job_labels.labels
402 def regenerate_labels(self):
403 """Regenerate the list of job labels."""
404 self._job_labels = GenericWorkflowLabels()
405 for job_name in self:
406 job = self.get_job(job_name)
407 self._job_labels.add_job(
408 job,
409 [self.get_job(p).label for p in self.predecessors(job.name)],
410 [self.get_job(p).label for p in self.successors(job.name)],
411 )
413 @property
414 def job_counts(self):
415 """Count of jobs per job label (`collections.Counter`)."""
416 jcounts = self._job_labels.job_counts
418 # Final is separate
419 final = self.get_final()
420 if final:
421 if isinstance(final, GenericWorkflow):
422 jcounts.update(final.job_counts())
423 else:
424 jcounts[final.label] += 1
426 return jcounts
428 def __iter__(self):
429 """Return iterator of job names in topologically sorted order."""
430 return topological_sort(self)
432 def get_files(self, data=False, transfer_only=True):
433 """Retrieve files from generic workflow.
435 Need API in case change way files are stored (e.g., make
436 workflow a bipartite graph with jobs and files nodes).
438 Parameters
439 ----------
440 data : `bool`, optional
441 Whether to return the file data as well as the file object name.
442 (The defaults is False.)
443 transfer_only : `bool`, optional
444 Whether to only return files for which a workflow management system
445 would be responsible for transferring.
447 Returns
448 -------
449 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`]
450 File names or objects from generic workflow meeting specifications.
451 """
452 files = []
453 for filename, file in self._files.items():
454 if not transfer_only or file.wms_transfer:
455 if not data:
456 files.append(filename)
457 else:
458 files.append(file)
459 return files
461 def add_job(self, job, parent_names=None, child_names=None):
462 """Add job to generic workflow.
464 Parameters
465 ----------
466 job : `lsst.ctrl.bps.GenericWorkflowJob`
467 Job to add to the generic workflow.
468 parent_names : `list` [`str`], optional
469 Names of jobs that are parents of given job
470 child_names : `list` [`str`], optional
471 Names of jobs that are children of given job
472 """
473 _LOG.debug("job: %s (%s)", job.name, job.label)
474 _LOG.debug("parent_names: %s", parent_names)
475 _LOG.debug("child_names: %s", child_names)
476 if not isinstance(job, GenericWorkflowJob):
477 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
478 if self.has_node(job.name):
479 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
480 super().add_node(job.name, job=job)
481 self.add_job_relationships(parent_names, job.name)
482 self.add_job_relationships(job.name, child_names)
483 self.add_executable(job.executable)
484 self._job_labels.add_job(
485 job,
486 [self.get_job(p).label for p in self.predecessors(job.name)],
487 [self.get_job(p).label for p in self.successors(job.name)],
488 )
490 def add_node(self, node_for_adding, **attr):
491 """Override networkx function to call more specific add_job function.
493 Parameters
494 ----------
495 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob`
496 Job to be added to generic workflow.
497 attr :
498 Needed to match original networkx function, but not used.
499 """
500 self.add_job(node_for_adding)
502 def add_job_relationships(self, parents, children):
503 """Add dependencies between parent and child jobs. All parents will
504 be connected to all children.
506 Parameters
507 ----------
508 parents : `list` [`str`]
509 Parent job names.
510 children : `list` [`str`]
511 Children job names.
512 """
513 if parents is not None and children is not None:
514 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children)))
515 self._job_labels.add_job_relationships(
516 [self.get_job(n).label for n in ensure_iterable(parents)],
517 [self.get_job(n).label for n in ensure_iterable(children)],
518 )
520 def add_edges_from(self, ebunch_to_add, **attr):
521 """Add several edges between jobs in the generic workflow.
523 Parameters
524 ----------
525 ebunch_to_add : Iterable [`tuple`]
526 Iterable of job name pairs between which a dependency should be
527 saved.
528 attr : keyword arguments, optional
529 Data can be assigned using keyword arguments (not currently used).
530 """
531 for edge_to_add in ebunch_to_add:
532 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
534 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
535 """Add edge connecting jobs in workflow.
537 Parameters
538 ----------
539 u_of_edge : `str`
540 Name of parent job.
541 v_of_edge : `str`
542 Name of child job.
543 attr : keyword arguments, optional
544 Attributes to save with edge.
545 """
546 if u_of_edge not in self:
547 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
548 if v_of_edge not in self:
549 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
550 super().add_edge(u_of_edge, v_of_edge, **attr)
552 def get_job(self, job_name: str):
553 """Retrieve job by name from workflow.
555 Parameters
556 ----------
557 job_name : `str`
558 Name of job to retrieve.
560 Returns
561 -------
562 job : `lsst.ctrl.bps.GenericWorkflowJob`
563 Job matching given job_name.
564 """
565 return self.nodes[job_name]["job"]
567 def del_job(self, job_name: str):
568 """Delete job from generic workflow leaving connected graph.
570 Parameters
571 ----------
572 job_name : `str`
573 Name of job to delete from workflow.
574 """
575 job = self.get_job(job_name)
577 # Remove from job labels
578 self._job_labels.del_job(job)
580 # Connect all parent jobs to all children jobs.
581 parents = self.predecessors(job_name)
582 children = self.successors(job_name)
583 self.add_job_relationships(parents, children)
585 # Delete job node (which deletes edges).
586 self.remove_node(job_name)
588 def add_job_inputs(self, job_name, files):
589 """Add files as inputs to specified job.
591 Parameters
592 ----------
593 job_name : `str`
594 Name of job to which inputs should be added
595 files : `lsst.ctrl.bps.GenericWorkflowFile` or \
596 `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
597 File object(s) to be added as inputs to the specified job.
598 """
599 self._inputs.setdefault(job_name, [])
600 for file in ensure_iterable(files):
601 # Save the central copy
602 if file.name not in self._files:
603 self._files[file.name] = file
605 # Save the job reference to the file
606 self._inputs[job_name].append(file)
608 def get_file(self, name):
609 """Retrieve a file object by name.
611 Parameters
612 ----------
613 name : `str`
614 Name of file object
616 Returns
617 -------
618 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
619 File matching given name.
620 """
621 return self._files[name]
623 def add_file(self, gwfile):
624 """Add file object.
626 Parameters
627 ----------
628 gwfile : `lsst.ctrl.bps.GenericWorkflowFile`
629 File object to add to workflow
630 """
631 if gwfile.name not in self._files:
632 self._files[gwfile.name] = gwfile
633 else:
634 _LOG.debug("Skipped add_file for existing file %s", gwfile.name)
636 def get_job_inputs(self, job_name, data=True, transfer_only=False):
637 """Return the input files for the given job.
639 Parameters
640 ----------
641 job_name : `str`
642 Name of the job.
643 data : `bool`, optional
644 Whether to return the file data as well as the file object name.
645 transfer_only : `bool`, optional
646 Whether to only return files for which a workflow management system
647 would be responsible for transferring.
649 Returns
650 -------
651 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
652 Input files for the given job. If no input files for the job,
653 returns an empty list.
654 """
655 inputs = []
656 if job_name in self._inputs:
657 for gwfile in self._inputs[job_name]:
658 if not transfer_only or gwfile.wms_transfer:
659 if not data:
660 inputs.append(gwfile.name)
661 else:
662 inputs.append(gwfile)
663 return inputs
665 def add_job_outputs(self, job_name, files):
666 """Add output files to a job.
668 Parameters
669 ----------
670 job_name : `str`
671 Name of job to which the files should be added as outputs.
672 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
673 File objects to be added as outputs for specified job.
674 """
675 self._outputs.setdefault(job_name, [])
677 for file_ in ensure_iterable(files):
678 # Save the central copy
679 if file_.name not in self._files:
680 self._files[file_.name] = file_
682 # Save the job reference to the file
683 self._outputs[job_name].append(file_)
685 def get_job_outputs(self, job_name, data=True, transfer_only=False):
686 """Return the output files for the given job.
688 Parameters
689 ----------
690 job_name : `str`
691 Name of the job.
692 data : `bool`
693 Whether to return the file data as well as the file object name.
694 It defaults to `True` thus returning file data as well.
695 transfer_only : `bool`
696 Whether to only return files for which a workflow management system
697 would be responsible for transferring. It defaults to `False` thus
698 returning all output files.
700 Returns
701 -------
702 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`]
703 Output files for the given job. If no output files for the job,
704 returns an empty list.
705 """
706 outputs = []
708 if job_name in self._outputs:
709 for file_name in self._outputs[job_name]:
710 file = self._files[file_name]
711 if not transfer_only or file.wms_transfer:
712 if not data:
713 outputs.append(file_name)
714 else:
715 outputs.append(self._files[file_name])
716 return outputs
718 def draw(self, stream, format_="dot"):
719 """Output generic workflow in a visualization format.
721 Parameters
722 ----------
723 stream : `str` or `io.BufferedIOBase`
724 Stream to which the visualization should be written.
725 format_ : `str`, optional
726 Which visualization format to use. It defaults to the format for
727 the dot program.
728 """
729 draw_funcs = {"dot": draw_networkx_dot}
730 if format_ in draw_funcs:
731 draw_funcs[format_](self, stream)
732 else:
733 raise RuntimeError(f"Unknown draw format ({format_}")
735 def save(self, stream, format_="pickle"):
736 """Save the generic workflow in a format that is loadable.
738 Parameters
739 ----------
740 stream : `str` or `io.BufferedIOBase`
741 Stream to pass to the format-specific writer. Accepts anything
742 that the writer accepts.
744 format_ : `str`, optional
745 Format in which to write the data. It defaults to pickle format.
746 """
747 if format_ == "pickle":
748 pickle.dump(self, stream)
749 else:
750 raise RuntimeError(f"Unknown format ({format_})")
752 @classmethod
753 def load(cls, stream, format_="pickle"):
754 """Load a GenericWorkflow from the given stream
756 Parameters
757 ----------
758 stream : `str` or `io.BufferedIOBase`
759 Stream to pass to the format-specific loader. Accepts anything that
760 the loader accepts.
761 format_ : `str`, optional
762 Format of data to expect when loading from stream. It defaults
763 to pickle format.
765 Returns
766 -------
767 generic_workflow : `lsst.ctrl.bps.GenericWorkflow`
768 Generic workflow loaded from the given stream
769 """
770 if format_ == "pickle":
771 return pickle.load(stream)
773 raise RuntimeError(f"Unknown format ({format_})")
775 def validate(self):
776 """Run checks to ensure that the generic workflow graph is valid."""
777 # Make sure a directed acyclic graph
778 assert is_directed_acyclic_graph(self)
780 def add_workflow_source(self, workflow):
781 """Add given workflow as new source to this workflow.
783 Parameters
784 ----------
785 workflow : `lsst.ctrl.bps.GenericWorkflow`
786 """
787 # Find source nodes in self.
788 self_sources = [n for n in self if self.in_degree(n) == 0]
789 _LOG.debug("self_sources = %s", self_sources)
791 # Find sink nodes of workflow.
792 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0]
793 _LOG.debug("new sinks = %s", new_sinks)
795 # Add new workflow nodes to self graph and make new edges.
796 self.add_nodes_from(workflow.nodes(data=True))
797 self.add_edges_from(workflow.edges())
798 for source in self_sources:
799 for sink in new_sinks:
800 self.add_edge(sink, source)
802 # Add separately stored info
803 for job_name in workflow:
804 job = self.get_job(job_name)
805 # Add job labels
806 self._job_labels.add_job(
807 job,
808 [self.get_job(p).label for p in self.predecessors(job.name)],
809 [self.get_job(p).label for p in self.successors(job.name)],
810 )
811 # Files are stored separately so copy them.
812 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True))
813 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True))
814 # Executables are stored separately so copy them.
815 self.add_executable(workflow.get_job(job_name).executable)
817 def add_final(self, final):
818 """Add special final job/workflow to the generic workflow.
820 Parameters
821 ----------
822 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
823 `lsst.ctrl.bps.GenericWorkflow`
824 Information needed to execute the special final job(s), the
825 job(s) to be executed after all jobs that can be executed
826 have been executed regardless of exit status of any of the
827 jobs.
828 """
829 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow):
830 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})")
832 self._final = final
833 if isinstance(final, GenericWorkflowJob):
834 self.add_executable(final.executable)
836 def get_final(self):
837 """Return job/workflow to be executed after all jobs that can be
838 executed have been executed regardless of exit status of any of
839 the jobs.
841 Returns
842 -------
843 final : `lsst.ctrl.bps.GenericWorkflowJob` or \
844 `lsst.ctrl.bps.GenericWorkflow`
845 Information needed to execute final job(s).
846 """
847 return self._final
849 def add_executable(self, executable):
850 """Add executable to workflow's list of executables.
852 Parameters
853 ----------
854 executable : `lsst.ctrl.bps.GenericWorkflowExec`
855 Executable object to be added to workflow.
856 """
857 if executable is not None:
858 self._executables[executable.name] = executable
859 else:
860 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables")
862 def get_executables(self, data=False, transfer_only=True):
863 """Retrieve executables from generic workflow.
865 Parameters
866 ----------
867 data : `bool`, optional
868 Whether to return the executable data as well as the exec object
869 name. (The defaults is False.)
870 transfer_only : `bool`, optional
871 Whether to only return executables for which transfer_executable
872 is True.
874 Returns
875 -------
876 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`]
877 Filtered executable names or objects from generic workflow.
878 """
879 execs = []
880 for name, executable in self._executables.items():
881 if not transfer_only or executable.transfer_executable:
882 if not data:
883 execs.append(name)
884 else:
885 execs.append(executable)
886 return execs
888 def get_jobs_by_label(self, label: str):
889 """Retrieve jobs by label from workflow.
891 Parameters
892 ----------
893 label : `str`
894 Label of jobs to retrieve.
896 Returns
897 -------
898 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`]
899 Jobs having given label.
900 """
901 return self._job_labels.get_jobs_by_label(label)
904class GenericWorkflowLabels:
905 """Label-oriented representation of the GenericWorkflow."""
907 def __init__(self):
908 self._label_graph = DiGraph() # Dependency graph of job labels
909 self._label_to_jobs = defaultdict(list) # mapping job label to list of GenericWorkflowJob
911 @property
912 def labels(self):
913 """List of job labels (`list` [`str`], read-only)"""
914 return list(topological_sort(self._label_graph))
916 @property
917 def job_counts(self):
918 """Count of jobs per job label (`collections.Counter`)."""
919 jcounts = Counter({label: len(jobs) for label, jobs in self._label_to_jobs.items()})
920 return jcounts
922 def get_jobs_by_label(self, label: str):
923 """Retrieve jobs by label from workflow.
925 Parameters
926 ----------
927 label : `str`
928 Label of jobs to retrieve.
930 Returns
931 -------
932 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`]
933 Jobs having given label.
934 """
935 return self._label_to_jobs[label]
937 def add_job(self, job, parent_labels, child_labels):
938 """Add job's label to labels.
940 Parameters
941 ----------
942 job : `lsst.ctrl.bps.GenericWorkflowJob`
943 The job to delete from the job labels.
944 parent_labels : `list` [`str`]
945 Parent job labels.
946 children_labels : `list` [`str`]
947 Children job labels.
948 """
949 _LOG.debug("job: %s (%s)", job.name, job.label)
950 _LOG.debug("parent_labels: %s", parent_labels)
951 _LOG.debug("child_labels: %s", child_labels)
952 self._label_to_jobs[job.label].append(job)
953 self._label_graph.add_node(job.label)
954 for parent in parent_labels:
955 self._label_graph.add_edge(parent, job.label)
956 for child in child_labels:
957 self._label_graph.add_edge(job.label, child)
959 def add_job_relationships(self, parent_labels, children_labels):
960 """Add dependencies between parent and child job labels.
961 All parents will be connected to all children.
963 Parameters
964 ----------
965 parent_labels : `list` [`str`]
966 Parent job labels.
967 children_labels : `list` [`str`]
968 Children job labels.
969 """
970 if parent_labels is not None and children_labels is not None:
971 # Since labels, must ensure not adding edge from label to itself.
972 edges = [
973 e
974 for e in itertools.product(ensure_iterable(parent_labels), ensure_iterable(children_labels))
975 if e[0] != e[1]
976 ]
978 self._label_graph.add_edges_from(edges)
980 def del_job(self, job):
981 """Delete job and its label from job labels.
983 Parameters
984 ----------
985 job : `lsst.ctrl.bps.GenericWorkflowJob`
986 The job to delete from the job labels.
987 """
988 self._label_to_jobs[job.label].remove(job)
989 # Don't leave keys around if removed last job
990 if not self._label_to_jobs[job.label]:
991 del self._label_to_jobs[job.label]
993 parents = self._label_graph.predecessors(job.label)
994 children = self._label_graph.successors(job.label)
995 self._label_graph.remove_node(job.label)
996 self._label_graph.add_edges_from(
997 itertools.product(ensure_iterable(parents), ensure_iterable(children))
998 )