Coverage for python/lsst/pipe/base/pipeTools.py: 26%

31 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining few methods to manipulate or query pipelines. 

23""" 

24 

25from __future__ import annotations 

26 

27# No one should do import * from this module 

28__all__ = ["isPipelineOrdered", "orderPipeline"] 

29 

30from collections.abc import Iterable 

31from typing import TYPE_CHECKING 

32 

33from .pipeline import Pipeline, TaskDef 

34 

35# Exceptions re-exported here for backwards compatibility. 

36from .pipeline_graph import DuplicateOutputError, PipelineDataCycleError, PipelineGraph # noqa: F401 

37 

38if TYPE_CHECKING: 

39 from .taskFactory import TaskFactory 

40 

41 

42class MissingTaskFactoryError(Exception): 

43 """Exception raised when client fails to provide TaskFactory instance.""" 

44 

45 pass 

46 

47 

48def isPipelineOrdered(pipeline: Pipeline | Iterable[TaskDef], taskFactory: TaskFactory | None = None) -> bool: 

49 """Check whether tasks in pipeline are correctly ordered. 

50 

51 Pipeline is correctly ordered if for any DatasetType produced by a task 

52 in a pipeline all its consumer tasks are located after producer. 

53 

54 Parameters 

55 ---------- 

56 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ] 

57 Pipeline description. 

58 taskFactory: `TaskFactory`, optional 

59 Ignored; present only for backwards compatibility. 

60 

61 Returns 

62 ------- 

63 is_ordered : `bool` 

64 True for correctly ordered pipeline, False otherwise. 

65 

66 Raises 

67 ------ 

68 ImportError 

69 Raised when task class cannot be imported. 

70 DuplicateOutputError 

71 Raised when there is more than one producer for a dataset type. 

72 """ 

73 if isinstance(pipeline, Pipeline): 

74 graph = pipeline.to_graph() 

75 else: 

76 graph = PipelineGraph() 

77 for task_def in pipeline: 

78 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections) 

79 # Can't use graph.is_sorted because that requires sorted dataset type names 

80 # as well as sorted tasks. 

81 tasks_xgraph = graph.make_task_xgraph() 

82 seen: set[str] = set() 

83 for task_label in tasks_xgraph: 

84 successors = set(tasks_xgraph.successors(task_label)) 

85 if not successors.isdisjoint(seen): 

86 return False 

87 seen.add(task_label) 

88 return True 

89 

90 

91def orderPipeline(pipeline: Pipeline | Iterable[TaskDef]) -> list[TaskDef]: 

92 """Re-order tasks in pipeline to satisfy data dependencies. 

93 

94 Parameters 

95 ---------- 

96 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ] 

97 Pipeline description. 

98 

99 Returns 

100 ------- 

101 ordered : `list` [ `TaskDef` ] 

102 Correctly ordered pipeline. 

103 

104 Raises 

105 ------ 

106 DuplicateOutputError 

107 Raised when there is more than one producer for a dataset type. 

108 PipelineDataCycleError 

109 Raised when the pipeline has dependency cycles. 

110 """ 

111 if isinstance(pipeline, Pipeline): 

112 graph = pipeline.to_graph() 

113 else: 

114 graph = PipelineGraph() 

115 for task_def in pipeline: 

116 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections) 

117 graph.sort() 

118 return list(graph._iter_task_defs())