Coverage for python/lsst/pipe/base/pipelineTask.py: 75%
16 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Define `PipelineTask` class and related methods.
23"""
25from __future__ import annotations
27__all__ = ["PipelineTask"] # Classes in this module
29from typing import TYPE_CHECKING, Any, ClassVar
31from .connections import InputQuantizedConnection, OutputQuantizedConnection
32from .task import Task
34if TYPE_CHECKING:
35 import logging
37 from lsst.utils.logging import LsstLogAdapter
39 from ._quantumContext import QuantumContext
40 from .config import PipelineTaskConfig
41 from .struct import Struct
44class PipelineTask(Task):
45 """Base class for all pipeline tasks.
47 This is an abstract base class for PipelineTasks which represents an
48 algorithm executed by framework(s) on data which comes from data butler,
49 resulting data is also stored in a data butler.
51 PipelineTask inherits from a `pipe.base.Task` and uses the same
52 configuration mechanism based on `pex.config`. `PipelineTask` classes also
53 have a `PipelineTaskConnections` class associated with their config which
54 defines all of the IO a `PipelineTask` will need to do. PipelineTask
55 sub-class typically implements `run()` method which receives Python-domain
56 data objects and returns `pipe.base.Struct` object with resulting data.
57 `run()` method is not supposed to perform any I/O, it operates entirely on
58 in-memory objects. `runQuantum()` is the method (can be re-implemented in
59 sub-class) where all necessary I/O is performed, it reads all input data
60 from data butler into memory, calls `run()` method with that data, examines
61 returned `Struct` object and saves some or all of that data back to data
62 butler. `runQuantum()` method receives a `QuantumContext` instance to
63 facilitate I/O, a `InputQuantizedConnection` instance which defines all
64 input `lsst.daf.butler.DatasetRef`, and a `OutputQuantizedConnection`
65 instance which defines all the output `lsst.daf.butler.DatasetRef` for a
66 single invocation of PipelineTask.
68 Subclasses must be constructable with exactly the arguments taken by the
69 PipelineTask base class constructor, but may support other signatures as
70 well.
72 Attributes
73 ----------
74 canMultiprocess : bool, True by default (class attribute)
75 This class attribute is checked by execution framework, sub-classes
76 can set it to ``False`` in case task does not support multiprocessing.
78 Parameters
79 ----------
80 config : `pex.config.Config`, optional
81 Configuration for this task (an instance of ``self.ConfigClass``,
82 which is a task-specific subclass of `PipelineTaskConfig`).
83 If not specified then it defaults to `self.ConfigClass()`.
84 log : `logging.Logger`, optional
85 Logger instance whose name is used as a log name prefix, or ``None``
86 for no prefix.
87 initInputs : `dict`, optional
88 A dictionary of objects needed to construct this PipelineTask, with
89 keys matching the keys of the dictionary returned by
90 `getInitInputDatasetTypes` and values equivalent to what would be
91 obtained by calling `Butler.get` with those DatasetTypes and no data
92 IDs. While it is optional for the base class, subclasses are
93 permitted to require this argument.
94 """
96 ConfigClass: ClassVar[type[PipelineTaskConfig]]
97 canMultiprocess: ClassVar[bool] = True
99 def __init__(
100 self,
101 *,
102 config: PipelineTaskConfig | None = None,
103 log: logging.Logger | LsstLogAdapter | None = None,
104 initInputs: dict[str, Any] | None = None,
105 **kwargs: Any,
106 ):
107 super().__init__(config=config, log=log, **kwargs)
109 def run(self, **kwargs: Any) -> Struct:
110 """Run task algorithm on in-memory data.
112 This method should be implemented in a subclass. This method will
113 receive keyword arguments whose names will be the same as names of
114 connection fields describing input dataset types. Argument values will
115 be data objects retrieved from data butler. If a dataset type is
116 configured with ``multiple`` field set to ``True`` then the argument
117 value will be a list of objects, otherwise it will be a single object.
119 If the task needs to know its input or output DataIds then it has to
120 override `runQuantum` method instead.
122 This method should return a `Struct` whose attributes share the same
123 name as the connection fields describing output dataset types.
125 Returns
126 -------
127 struct : `Struct`
128 Struct with attribute names corresponding to output connection
129 fields
131 Examples
132 --------
133 Typical implementation of this method may look like:
135 .. code-block:: python
137 def run(self, input, calib):
138 # "input", "calib", and "output" are the names of the config
139 # fields
141 # Assuming that input/calib datasets are `scalar` they are
142 # simple objects, do something with inputs and calibs, produce
143 # output image.
144 image = self.makeImage(input, calib)
146 # If output dataset is `scalar` then return object, not list
147 return Struct(output=image)
149 """
150 raise NotImplementedError("run() is not implemented")
152 def runQuantum(
153 self,
154 butlerQC: QuantumContext,
155 inputRefs: InputQuantizedConnection,
156 outputRefs: OutputQuantizedConnection,
157 ) -> None:
158 """Do butler IO and transform to provide in memory
159 objects for tasks `~Task.run` method.
161 Parameters
162 ----------
163 butlerQC : `QuantumContext`
164 A butler which is specialized to operate in the context of a
165 `lsst.daf.butler.Quantum`.
166 inputRefs : `InputQuantizedConnection`
167 Datastructure whose attribute names are the names that identify
168 connections defined in corresponding `PipelineTaskConnections`
169 class. The values of these attributes are the
170 `lsst.daf.butler.DatasetRef` objects associated with the defined
171 input/prerequisite connections.
172 outputRefs : `OutputQuantizedConnection`
173 Datastructure whose attribute names are the names that identify
174 connections defined in corresponding `PipelineTaskConnections`
175 class. The values of these attributes are the
176 `lsst.daf.butler.DatasetRef` objects associated with the defined
177 output connections.
178 """
179 inputs = butlerQC.get(inputRefs)
180 outputs = self.run(**inputs)
181 butlerQC.put(outputs, outputRefs)