21 from __future__
import annotations
23 """Module defining Pipeline class and related methods. 26 __all__ = [
"Pipeline",
"TaskDef",
"TaskDatasetTypes",
"PipelineDatasetTypes"]
31 from dataclasses
import dataclass
32 from typing
import FrozenSet, Mapping, Type
33 from types
import MappingProxyType
38 from lsst.daf.butler
import DatasetType, DimensionUniverse
39 from .pipelineTask
import PipelineTask
40 from .config
import PipelineTaskConfig
52 """TaskDef is a collection of information about task needed by Pipeline. 54 The information includes task name, configuration object and optional 55 task class. This class is just a collection of attributes and it exposes 56 all of them so that attributes could potentially be modified in place 57 (e.g. if configuration needs extra overrides). 62 `PipelineTask` class name, currently it is not specified whether this 63 is a fully-qualified name or partial name (e.g. ``module.TaskClass``). 64 Framework should be prepared to handle all cases. 65 config : `lsst.pex.config.Config` 66 Instance of the configuration class corresponding to this task class, 67 usually with all overrides applied. 68 taskClass : `type` or ``None`` 69 `PipelineTask` class object, can be ``None``. If ``None`` then 70 framework will have to locate and load class. 71 label : `str`, optional 72 Task label, usually a short string unique in a pipeline. 74 def __init__(self, taskName, config, taskClass=None, label=""):
83 rep +=
", label=" + self.
label 89 """Pipeline is a sequence of `TaskDef` objects. 91 Pipeline is given as one of the inputs to a supervising framework 92 which builds execution graph out of it. Pipeline contains a sequence 93 of `TaskDef` instances. 95 Main purpose of this class is to provide a mechanism to pass pipeline 96 definition from users to supervising framework. That mechanism is 97 implemented using simple serialization and de-serialization via 98 `pickle`. Note that pipeline serialization is not guaranteed to be 99 compatible between different versions or releases. 101 In current implementation Pipeline is a list (it inherits from `list`) 102 and one can use all list methods on pipeline. Content of the pipeline 103 can be modified, it is up to the client to verify that modifications 104 leave pipeline in a consistent state. One could modify container 105 directly by adding or removing its elements. 109 pipeline : iterable of `TaskDef` instances, optional 110 Initial sequence of tasks. 113 list.__init__(self, iterable
or [])
116 """Return task index given its label. 126 Task index, or -1 if label is not found. 128 for idx, taskDef
in enumerate(self):
129 if taskDef.label == label:
134 infos = [str(tdef)
for tdef
in self]
135 return "Pipeline({})".format(
", ".join(infos))
138 @dataclass(frozen=
True)
140 """An immutable struct that extracts and classifies the dataset types used 144 initInputs: FrozenSet[DatasetType]
145 """Dataset types that are needed as inputs in order to construct this Task. 147 Task-level `initInputs` may be classified as either 148 `~PipelineDatasetTypes.initInputs` or 149 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 152 initOutputs: FrozenSet[DatasetType]
153 """Dataset types that may be written after constructing this Task. 155 Task-level `initOutputs` may be classified as either 156 `~PipelineDatasetTypes.initOutputs` or 157 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 160 inputs: FrozenSet[DatasetType]
161 """Dataset types that are regular inputs to this Task. 163 If an input dataset needed for a Quantum cannot be found in the input 164 collection(s) or produced by another Task in the Pipeline, that Quantum 165 (and all dependent Quanta) will not be produced. 167 Task-level `inputs` may be classified as either 168 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 169 at the Pipeline level. 172 prerequisites: FrozenSet[DatasetType]
173 """Dataset types that are prerequisite inputs to this Task. 175 Prerequisite inputs must exist in the input collection(s) before the 176 pipeline is run, but do not constrain the graph - if a prerequisite is 177 missing for a Quantum, `PrerequisiteMissingError` is raised. 179 Prerequisite inputs are not resolved until the second stage of 180 QuantumGraph generation. 183 outputs: FrozenSet[DatasetType]
184 """Dataset types that are produced by this Task. 186 Task-level `outputs` may be classified as either 187 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 188 at the Pipeline level. 192 def fromTask(cls, taskClass: Type[PipelineTask], config: PipelineTaskConfig, *,
193 universe: DimensionUniverse) -> TaskDatasetTypes:
194 """Extract and classify the dataset types from a single `PipelineTask`. 199 A concrete `PipelineTask` subclass. 200 config: `PipelineTaskConfig` 201 Configuration for the concrete `PipelineTask`. 202 universe: `DimensionUniverse` 203 Set of all known dimensions, used to construct normalized 204 `DatasetType` objects. 208 types: `TaskDatasetTypes` 209 The dataset types used by this task. 216 allInputsByArgName = {k: descr.makeDatasetType(universe)
217 for k, descr
in taskClass.getInputDatasetTypes(config).items()}
218 prerequisiteArgNames = taskClass.getPrerequisiteDatasetTypes(config)
220 initInputs=frozenset(descr.makeDatasetType(universe)
221 for descr
in taskClass.getInitInputDatasetTypes(config).values()),
222 initOutputs=frozenset(descr.makeDatasetType(universe)
223 for descr
in taskClass.getInitOutputDatasetTypes(config).values()),
224 inputs=frozenset(v
for k, v
in allInputsByArgName.items()
if k
not in prerequisiteArgNames),
225 prerequisites=frozenset(v
for k, v
in allInputsByArgName.items()
if k
in prerequisiteArgNames),
226 outputs=frozenset(descr.makeDatasetType(universe)
227 for descr
in taskClass.getOutputDatasetTypes(config).values()),
231 @dataclass(frozen=
True)
233 """An immutable struct that classifies the dataset types used in a 237 initInputs: FrozenSet[DatasetType]
238 """Dataset types that are needed as inputs in order to construct the Tasks 241 This does not include dataset types that are produced when constructing 242 other Tasks in the Pipeline (these are classified as `initIntermediates`). 245 initOutputs: FrozenSet[DatasetType]
246 """Dataset types that may be written after constructing the Tasks in this 249 This does not include dataset types that are also used as inputs when 250 constructing other Tasks in the Pipeline (these are classified as 251 `initIntermediates`). 254 initIntermediates: FrozenSet[DatasetType]
255 """Dataset types that are both used when constructing one or more Tasks 256 in the Pipeline and produced as a side-effect of constructing another 257 Task in the Pipeline. 260 inputs: FrozenSet[DatasetType]
261 """Dataset types that are regular inputs for the full pipeline. 263 If an input dataset needed for a Quantum cannot be found in the input 264 collection(s), that Quantum (and all dependent Quanta) will not be 268 prerequisites: FrozenSet[DatasetType]
269 """Dataset types that are prerequisite inputs for the full Pipeline. 271 Prerequisite inputs must exist in the input collection(s) before the 272 pipeline is run, but do not constrain the graph - if a prerequisite is 273 missing for a Quantum, `PrerequisiteMissingError` is raised. 275 Prerequisite inputs are not resolved until the second stage of 276 QuantumGraph generation. 279 intermediates: FrozenSet[DatasetType]
280 """Dataset types that are output by one Task in the Pipeline and consumed 281 as inputs by one or more other Tasks in the Pipeline. 284 outputs: FrozenSet[DatasetType]
285 """Dataset types that are output by a Task in the Pipeline and not consumed 286 by any other Task in the Pipeline. 289 byTask: Mapping[str, TaskDatasetTypes]
290 """Per-Task dataset types, keyed by label in the `Pipeline`. 292 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 293 neither has been modified since the dataset types were extracted, of 298 def fromPipeline(cls, pipeline: Pipeline, *, universe: DimensionUniverse) -> PipelineDatasetTypes:
299 """Extract and classify the dataset types from all tasks in a 305 An ordered collection of tasks that can be run together. 306 universe: `DimensionUniverse` 307 Set of all known dimensions, used to construct normalized 308 `DatasetType` objects. 312 types: `PipelineDatasetTypes` 313 The dataset types used by this `Pipeline`. 318 Raised if Tasks are inconsistent about which datasets are marked 319 prerequisite. This indicates that the Tasks cannot be run as part 320 of the same `Pipeline`. 324 allInitInputs = set()
325 allInitOutputs = set()
326 prerequisites = set()
328 for taskDef
in pipeline:
329 thisTask = TaskDatasetTypes.fromTask(taskDef.taskClass, taskDef.config, universe=universe)
330 allInitInputs.update(thisTask.initInputs)
331 allInitOutputs.update(thisTask.initOutputs)
332 allInputs.update(thisTask.inputs)
333 prerequisites.update(thisTask.prerequisites)
334 allOutputs.update(thisTask.outputs)
335 byTask[taskDef.label] = thisTask
336 if not prerequisites.isdisjoint(allInputs):
337 raise ValueError(
"{} marked as both prerequisites and regular inputs".format(
338 {dt.name
for dt
in allInputs & prerequisites}
340 if not prerequisites.isdisjoint(allOutputs):
341 raise ValueError(
"{} marked as both prerequisites and outputs".format(
342 {dt.name
for dt
in allOutputs & prerequisites}
345 initInputs=frozenset(allInitInputs - allInitOutputs),
346 initIntermediates=frozenset(allInitInputs & allInitOutputs),
347 initOutputs=frozenset(allInitOutputs - allInitInputs),
348 inputs=frozenset(allInputs - allOutputs),
349 intermediates=frozenset(allInputs & allOutputs),
350 outputs=frozenset(allOutputs - allInputs),
351 prerequisites=frozenset(prerequisites),
352 byTask=MappingProxyType(byTask),
def __init__(self, taskName, config, taskClass=None, label="")
def __init__(self, iterable=None)
def labelIndex(self, label)