Coverage for python/lsst/pipe/base/pipeTools.py: 26%
31 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 04:05 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-06 04:05 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining few methods to manipulate or query pipelines.
29"""
31from __future__ import annotations
33# No one should do import * from this module
34__all__ = ["isPipelineOrdered", "orderPipeline"]
36from collections.abc import Iterable
37from typing import TYPE_CHECKING
39from .pipeline import Pipeline, TaskDef
41# Exceptions re-exported here for backwards compatibility.
42from .pipeline_graph import DuplicateOutputError, PipelineDataCycleError, PipelineGraph # noqa: F401
44if TYPE_CHECKING:
45 from .taskFactory import TaskFactory
48class MissingTaskFactoryError(Exception):
49 """Exception raised when client fails to provide TaskFactory instance."""
51 pass
54def isPipelineOrdered(pipeline: Pipeline | Iterable[TaskDef], taskFactory: TaskFactory | None = None) -> bool:
55 """Check whether tasks in pipeline are correctly ordered.
57 Pipeline is correctly ordered if for any DatasetType produced by a task
58 in a pipeline all its consumer tasks are located after producer.
60 Parameters
61 ----------
62 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ]
63 Pipeline description.
64 taskFactory : `TaskFactory`, optional
65 Ignored; present only for backwards compatibility.
67 Returns
68 -------
69 is_ordered : `bool`
70 True for correctly ordered pipeline, False otherwise.
72 Raises
73 ------
74 ImportError
75 Raised when task class cannot be imported.
76 DuplicateOutputError
77 Raised when there is more than one producer for a dataset type.
78 """
79 if isinstance(pipeline, Pipeline):
80 graph = pipeline.to_graph()
81 else:
82 graph = PipelineGraph()
83 for task_def in pipeline:
84 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections)
85 # Can't use graph.is_sorted because that requires sorted dataset type names
86 # as well as sorted tasks.
87 tasks_xgraph = graph.make_task_xgraph()
88 seen: set[str] = set()
89 for task_label in tasks_xgraph:
90 successors = set(tasks_xgraph.successors(task_label))
91 if not successors.isdisjoint(seen):
92 return False
93 seen.add(task_label)
94 return True
97def orderPipeline(pipeline: Pipeline | Iterable[TaskDef]) -> list[TaskDef]:
98 """Re-order tasks in pipeline to satisfy data dependencies.
100 Parameters
101 ----------
102 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ]
103 Pipeline description.
105 Returns
106 -------
107 ordered : `list` [ `TaskDef` ]
108 Correctly ordered pipeline.
110 Raises
111 ------
112 DuplicateOutputError
113 Raised when there is more than one producer for a dataset type.
114 PipelineDataCycleError
115 Raised when the pipeline has dependency cycles.
116 """
117 if isinstance(pipeline, Pipeline):
118 graph = pipeline.to_graph()
119 else:
120 graph = PipelineGraph()
121 for task_def in pipeline:
122 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections)
123 graph.sort()
124 return list(graph._iter_task_defs())