22 """Module defining few methods to manipulate or query pipelines. 26 __all__ = [
"isPipelineOrdered",
"orderPipeline"]
36 from .pipeline
import Pipeline
37 from .connections
import iterConnections
44 def _loadTaskClass(taskDef, taskFactory):
45 """Import task class if necessary. 49 `ImportError` is raised when task class cannot be imported. 50 `MissingTaskFactoryError` is raised when TaskFactory is needed but not 53 taskClass = taskDef.taskClass
57 "factory instance is not provided")
58 taskClass = taskFactory.loadTaskClass(taskDef.taskName)
67 """Exception raised when client fails to provide TaskFactory instance. 72 class DuplicateOutputError(Exception):
73 """Exception raised when Pipeline has more than one task for the same 80 """Exception raised when Pipeline has data dependency cycle. 86 """Checks whether tasks in pipeline are correctly ordered. 88 Pipeline is correctly ordered if for any DatasetType produced by a task 89 in a pipeline all its consumer tasks are located after producer. 93 pipeline : `pipe.base.Pipeline` 95 taskFactory: `pipe.base.TaskFactory`, optional 96 Instance of an object which knows how to import task classes. It is 97 only used if pipeline task definitions do not define task classes. 101 True for correctly ordered pipeline, False otherwise. 105 `ImportError` is raised when task class cannot be imported. 106 `DuplicateOutputError` is raised when there is more than one producer for a 108 `MissingTaskFactoryError` is raised when TaskFactory is needed but not 113 for idx, taskDef
in enumerate(pipeline):
116 if attr.name
in producerIndex:
117 raise DuplicateOutputError(
"DatasetType `{}' appears more than " 118 "once as output".format(attr.name))
119 producerIndex[attr.name] = idx
122 for idx, taskDef
in enumerate(pipeline):
125 inputs = {name: getattr(taskDef.connections, name)
for name
in taskDef.connections.inputs}
126 for dsTypeDescr
in inputs.values():
128 prodIdx = producerIndex.get(dsTypeDescr.name, -1)
137 """Re-order tasks in pipeline to satisfy data dependencies. 139 When possible new ordering keeps original relative order of the tasks. 143 pipeline : `pipe.base.Pipeline` 144 Pipeline description. 145 taskFactory: `pipe.base.TaskFactory`, optional 146 Instance of an object which knows how to import task classes. It is 147 only used if pipeline task definitions do not define task classes. 151 Correctly ordered pipeline (`pipe.base.Pipeline` instance). 155 `ImportError` is raised when task class cannot be imported. 156 `DuplicateOutputError` is raised when there is more than one producer for a 158 `PipelineDataCycleError` is also raised when pipeline has dependency 159 cycles. `MissingTaskFactoryError` is raised when TaskFactory is needed but 170 for idx, taskDef
in enumerate(pipeline):
172 dsMap = {name: getattr(taskDef.connections, name)
for name
in taskDef.connections.outputs}
173 for dsTypeDescr
in dsMap.values():
174 if dsTypeDescr.name
in allOutputs:
176 "once as output".format(dsTypeDescr.name))
177 outputs[idx] = set(dsTypeDescr.name
for dsTypeDescr
in dsMap.values())
178 allOutputs.update(outputs[idx])
181 connectionInputs = itertools.chain(taskDef.connections.inputs, taskDef.connections.prerequisiteInputs)
182 dsMap = [getattr(taskDef.connections, name).name
for name
in connectionInputs]
183 inputs[idx] = set(dsMap)
184 allInputs.update(inputs[idx])
188 preExisting = allInputs - allOutputs
189 outputs[-1] = preExisting
202 thisTaskOutputs = outputs.get(idx, set())
203 for taskInputs
in inputs.values():
204 taskInputs -= thisTaskOutputs
207 topNodes = [key
for key, value
in inputs.items()
if not value]
219 for idx, inputNames
in inputs.items():
220 taskName = pipeline[idx].label
221 outputNames = outputs[idx]
222 edge =
" {} -> {} -> {}".format(inputNames, taskName, outputNames)
226 return Pipeline(pipeline[idx]
for idx
in result)