21 from __future__
import annotations
23 """Module defining Pipeline class and related methods. 26 __all__ = [
"Pipeline",
"TaskDef",
"TaskDatasetTypes",
"PipelineDatasetTypes"]
31 from dataclasses
import dataclass
32 from types
import MappingProxyType
33 from typing
import FrozenSet, Mapping
38 from lsst.daf.butler
import DatasetType, DimensionUniverse
39 from .connections
import PipelineTaskConnections, iterConnections
51 """TaskDef is a collection of information about task needed by Pipeline. 53 The information includes task name, configuration object and optional 54 task class. This class is just a collection of attributes and it exposes 55 all of them so that attributes could potentially be modified in place 56 (e.g. if configuration needs extra overrides). 61 `PipelineTask` class name, currently it is not specified whether this 62 is a fully-qualified name or partial name (e.g. ``module.TaskClass``). 63 Framework should be prepared to handle all cases. 64 config : `lsst.pex.config.Config` 65 Instance of the configuration class corresponding to this task class, 66 usually with all overrides applied. 67 taskClass : `type` or ``None`` 68 `PipelineTask` class object, can be ``None``. If ``None`` then 69 framework will have to locate and load class. 70 label : `str`, optional 71 Task label, usually a short string unique in a pipeline. 73 def __init__(self, taskName, config, taskClass=None, label=""):
78 self.
connections = config.connections.ConnectionsClass(config=config)
83 rep +=
", label=" + self.
label 89 """Pipeline is a sequence of `TaskDef` objects. 91 Pipeline is given as one of the inputs to a supervising framework 92 which builds execution graph out of it. Pipeline contains a sequence 93 of `TaskDef` instances. 95 Main purpose of this class is to provide a mechanism to pass pipeline 96 definition from users to supervising framework. That mechanism is 97 implemented using simple serialization and de-serialization via 98 `pickle`. Note that pipeline serialization is not guaranteed to be 99 compatible between different versions or releases. 101 In current implementation Pipeline is a list (it inherits from `list`) 102 and one can use all list methods on pipeline. Content of the pipeline 103 can be modified, it is up to the client to verify that modifications 104 leave pipeline in a consistent state. One could modify container 105 directly by adding or removing its elements. 109 pipeline : iterable of `TaskDef` instances, optional 110 Initial sequence of tasks. 113 list.__init__(self, iterable
or [])
116 """Return task index given its label. 126 Task index, or -1 if label is not found. 128 for idx, taskDef
in enumerate(self):
129 if taskDef.label == label:
134 infos = [str(tdef)
for tdef
in self]
135 return "Pipeline({})".format(
", ".join(infos))
138 @dataclass(frozen=
True)
140 """An immutable struct that extracts and classifies the dataset types used 144 initInputs: FrozenSet[DatasetType]
145 """Dataset types that are needed as inputs in order to construct this Task. 147 Task-level `initInputs` may be classified as either 148 `~PipelineDatasetTypes.initInputs` or 149 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 152 initOutputs: FrozenSet[DatasetType]
153 """Dataset types that may be written after constructing this Task. 155 Task-level `initOutputs` may be classified as either 156 `~PipelineDatasetTypes.initOutputs` or 157 `~PipelineDatasetTypes.initIntermediates` at the Pipeline level. 160 inputs: FrozenSet[DatasetType]
161 """Dataset types that are regular inputs to this Task. 163 If an input dataset needed for a Quantum cannot be found in the input 164 collection(s) or produced by another Task in the Pipeline, that Quantum 165 (and all dependent Quanta) will not be produced. 167 Task-level `inputs` may be classified as either 168 `~PipelineDatasetTypes.inputs` or `~PipelineDatasetTypes.intermediates` 169 at the Pipeline level. 172 prerequisites: FrozenSet[DatasetType]
173 """Dataset types that are prerequisite inputs to this Task. 175 Prerequisite inputs must exist in the input collection(s) before the 176 pipeline is run, but do not constrain the graph - if a prerequisite is 177 missing for a Quantum, `PrerequisiteMissingError` is raised. 179 Prerequisite inputs are not resolved until the second stage of 180 QuantumGraph generation. 183 outputs: FrozenSet[DatasetType]
184 """Dataset types that are produced by this Task. 186 Task-level `outputs` may be classified as either 187 `~PipelineDatasetTypes.outputs` or `~PipelineDatasetTypes.intermediates` 188 at the Pipeline level. 193 universe: DimensionUniverse) -> TaskDatasetTypes:
194 """Extract and classify the dataset types from a single `PipelineTask`. 198 connectionsInstance: `PipelineTaskConnections` 199 An instance of a `PipelineTaskConnections` class for a particular 201 universe: `DimensionUniverse` 202 Set of all known dimensions, used to construct normalized 203 `DatasetType` objects. 207 types: `TaskDatasetTypes` 208 The dataset types used by this task. 210 def makeDatasetTypesSet(connectionType):
211 """Constructs a set of true `DatasetType` objects 215 connectionType : `str` 216 Name of the connection type to produce a set for, corresponds 217 to an attribute of type `list` on the connection class instance 221 datasetTypes : `frozenset` 222 A set of all datasetTypes which correspond to the input 223 connection type specified in the connection class of this 228 This function is a closure over the variables univers and 233 dimensions = getattr(c,
'dimensions', set())
234 datasetTypes.append(DatasetType(c.name, universe.extract(dimensions), c.storageClass))
235 return frozenset(datasetTypes)
238 initInputs=makeDatasetTypesSet(
"initInputs"),
239 initOutputs=makeDatasetTypesSet(
"initOutputs"),
240 inputs=makeDatasetTypesSet(
"inputs"),
241 prerequisites=makeDatasetTypesSet(
"prerequisiteInputs"),
242 outputs=makeDatasetTypesSet(
"outputs"),
246 @dataclass(frozen=
True)
248 """An immutable struct that classifies the dataset types used in a 252 initInputs: FrozenSet[DatasetType]
253 """Dataset types that are needed as inputs in order to construct the Tasks 256 This does not include dataset types that are produced when constructing 257 other Tasks in the Pipeline (these are classified as `initIntermediates`). 260 initOutputs: FrozenSet[DatasetType]
261 """Dataset types that may be written after constructing the Tasks in this 264 This does not include dataset types that are also used as inputs when 265 constructing other Tasks in the Pipeline (these are classified as 266 `initIntermediates`). 269 initIntermediates: FrozenSet[DatasetType]
270 """Dataset types that are both used when constructing one or more Tasks 271 in the Pipeline and produced as a side-effect of constructing another 272 Task in the Pipeline. 275 inputs: FrozenSet[DatasetType]
276 """Dataset types that are regular inputs for the full pipeline. 278 If an input dataset needed for a Quantum cannot be found in the input 279 collection(s), that Quantum (and all dependent Quanta) will not be 283 prerequisites: FrozenSet[DatasetType]
284 """Dataset types that are prerequisite inputs for the full Pipeline. 286 Prerequisite inputs must exist in the input collection(s) before the 287 pipeline is run, but do not constrain the graph - if a prerequisite is 288 missing for a Quantum, `PrerequisiteMissingError` is raised. 290 Prerequisite inputs are not resolved until the second stage of 291 QuantumGraph generation. 294 intermediates: FrozenSet[DatasetType]
295 """Dataset types that are output by one Task in the Pipeline and consumed 296 as inputs by one or more other Tasks in the Pipeline. 299 outputs: FrozenSet[DatasetType]
300 """Dataset types that are output by a Task in the Pipeline and not consumed 301 by any other Task in the Pipeline. 304 byTask: Mapping[str, TaskDatasetTypes]
305 """Per-Task dataset types, keyed by label in the `Pipeline`. 307 This is guaranteed to be zip-iterable with the `Pipeline` itself (assuming 308 neither has been modified since the dataset types were extracted, of 313 def fromPipeline(cls, pipeline: Pipeline, *, universe: DimensionUniverse) -> PipelineDatasetTypes:
314 """Extract and classify the dataset types from all tasks in a 320 An ordered collection of tasks that can be run together. 321 universe: `DimensionUniverse` 322 Set of all known dimensions, used to construct normalized 323 `DatasetType` objects. 327 types: `PipelineDatasetTypes` 328 The dataset types used by this `Pipeline`. 333 Raised if Tasks are inconsistent about which datasets are marked 334 prerequisite. This indicates that the Tasks cannot be run as part 335 of the same `Pipeline`. 339 allInitInputs = set()
340 allInitOutputs = set()
341 prerequisites = set()
343 for taskDef
in pipeline:
344 thisTask = TaskDatasetTypes.fromConnections(taskDef.connections, universe=universe)
345 allInitInputs.update(thisTask.initInputs)
346 allInitOutputs.update(thisTask.initOutputs)
347 allInputs.update(thisTask.inputs)
348 prerequisites.update(thisTask.prerequisites)
349 allOutputs.update(thisTask.outputs)
350 byTask[taskDef.label] = thisTask
351 if not prerequisites.isdisjoint(allInputs):
352 raise ValueError(
"{} marked as both prerequisites and regular inputs".format(
353 {dt.name
for dt
in allInputs & prerequisites}
355 if not prerequisites.isdisjoint(allOutputs):
356 raise ValueError(
"{} marked as both prerequisites and outputs".format(
357 {dt.name
for dt
in allOutputs & prerequisites}
362 intermediateComponents = set()
363 intermediateComposites = set()
364 outputNameMapping = {dsType.name: dsType
for dsType
in allOutputs}
365 for dsType
in allInputs:
367 name, component = dsType.nameAndComponent()
371 if component
is not None:
372 if name
in outputNameMapping
and outputNameMapping[name].dimensions == dsType.dimensions:
373 composite = DatasetType(name, dsType.dimensions, outputNameMapping[name].storageClass,
375 intermediateComponents.add(dsType)
376 intermediateComposites.add(composite)
378 initInputs=frozenset(allInitInputs - allInitOutputs),
379 initIntermediates=frozenset(allInitInputs & allInitOutputs),
380 initOutputs=frozenset(allInitOutputs - allInitInputs),
381 inputs=frozenset(allInputs - allOutputs - intermediateComponents),
382 intermediates=frozenset(allInputs & allOutputs | intermediateComponents),
383 outputs=frozenset(allOutputs - allInputs - intermediateComposites),
384 prerequisites=frozenset(prerequisites),
385 byTask=MappingProxyType(byTask),
def __init__(self, taskName, config, taskClass=None, label="")
def __init__(self, iterable=None)
def labelIndex(self, label)