Coverage for python/lsst/pipe/base/pipeTools.py: 26%

31 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-11 09:32 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining few methods to manipulate or query pipelines. 

29""" 

30 

31from __future__ import annotations 

32 

33# No one should do import * from this module 

34__all__ = ["isPipelineOrdered", "orderPipeline"] 

35 

36from collections.abc import Iterable 

37from typing import TYPE_CHECKING 

38 

39from .pipeline import Pipeline, TaskDef 

40 

41# Exceptions re-exported here for backwards compatibility. 

42from .pipeline_graph import DuplicateOutputError, PipelineDataCycleError, PipelineGraph # noqa: F401 

43 

44if TYPE_CHECKING: 

45 from .taskFactory import TaskFactory 

46 

47 

48class MissingTaskFactoryError(Exception): 

49 """Exception raised when client fails to provide TaskFactory instance.""" 

50 

51 pass 

52 

53 

54def isPipelineOrdered(pipeline: Pipeline | Iterable[TaskDef], taskFactory: TaskFactory | None = None) -> bool: 

55 """Check whether tasks in pipeline are correctly ordered. 

56 

57 Pipeline is correctly ordered if for any DatasetType produced by a task 

58 in a pipeline all its consumer tasks are located after producer. 

59 

60 Parameters 

61 ---------- 

62 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ] 

63 Pipeline description. 

64 taskFactory: `TaskFactory`, optional 

65 Ignored; present only for backwards compatibility. 

66 

67 Returns 

68 ------- 

69 is_ordered : `bool` 

70 True for correctly ordered pipeline, False otherwise. 

71 

72 Raises 

73 ------ 

74 ImportError 

75 Raised when task class cannot be imported. 

76 DuplicateOutputError 

77 Raised when there is more than one producer for a dataset type. 

78 """ 

79 if isinstance(pipeline, Pipeline): 

80 graph = pipeline.to_graph() 

81 else: 

82 graph = PipelineGraph() 

83 for task_def in pipeline: 

84 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections) 

85 # Can't use graph.is_sorted because that requires sorted dataset type names 

86 # as well as sorted tasks. 

87 tasks_xgraph = graph.make_task_xgraph() 

88 seen: set[str] = set() 

89 for task_label in tasks_xgraph: 

90 successors = set(tasks_xgraph.successors(task_label)) 

91 if not successors.isdisjoint(seen): 

92 return False 

93 seen.add(task_label) 

94 return True 

95 

96 

97def orderPipeline(pipeline: Pipeline | Iterable[TaskDef]) -> list[TaskDef]: 

98 """Re-order tasks in pipeline to satisfy data dependencies. 

99 

100 Parameters 

101 ---------- 

102 pipeline : `Pipeline` or `collections.abc.Iterable` [ `TaskDef` ] 

103 Pipeline description. 

104 

105 Returns 

106 ------- 

107 ordered : `list` [ `TaskDef` ] 

108 Correctly ordered pipeline. 

109 

110 Raises 

111 ------ 

112 DuplicateOutputError 

113 Raised when there is more than one producer for a dataset type. 

114 PipelineDataCycleError 

115 Raised when the pipeline has dependency cycles. 

116 """ 

117 if isinstance(pipeline, Pipeline): 

118 graph = pipeline.to_graph() 

119 else: 

120 graph = PipelineGraph() 

121 for task_def in pipeline: 

122 graph.add_task(task_def.label, task_def.taskClass, task_def.config, task_def.connections) 

123 graph.sort() 

124 return list(graph._iter_task_defs())