Coverage for python/lsst/pipe/base/pipelineTask.py: 78%
18 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 10:52 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-17 10:52 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Define `PipelineTask` class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["PipelineTask"] # Classes in this module
35from collections.abc import Callable
36from typing import TYPE_CHECKING, Any, ClassVar
38from .connections import InputQuantizedConnection, OutputQuantizedConnection
39from .task import Task
41if TYPE_CHECKING:
42 import logging
44 from lsst.utils.logging import LsstLogAdapter
46 from ._quantumContext import QuantumContext
47 from .config import PipelineTaskConfig
48 from .struct import Struct
51class PipelineTask(Task):
52 """Base class for all pipeline tasks.
54 This is an abstract base class for PipelineTasks which represents an
55 algorithm executed by framework(s) on data which comes from data butler,
56 resulting data is also stored in a data butler.
58 PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
59 configuration mechanism based on :ref:`lsst.pex.config`. `PipelineTask`
60 classes also have a `PipelineTaskConnections` class associated with their
61 config which defines all of the IO a `PipelineTask` will need to do.
62 PipelineTask sub-class typically implements `run()` method which receives
63 Python-domain data objects and returns `lsst.pipe.base.Struct` object with
64 resulting data. `run()` method is not supposed to perform any I/O, it
65 operates entirely on in-memory objects. `runQuantum()` is the method (can
66 be re-implemented in sub-class) where all necessary I/O is performed, it
67 reads all input data from data butler into memory, calls `run()` method
68 with that data, examines returned `Struct` object and saves some or all of
69 that data back to data butler. `runQuantum()` method receives a
70 `QuantumContext` instance to facilitate I/O, a `InputQuantizedConnection`
71 instance which defines all input `lsst.daf.butler.DatasetRef`, and a
72 `OutputQuantizedConnection` instance which defines all the output
73 `lsst.daf.butler.DatasetRef` for a single invocation of PipelineTask.
75 Subclasses must be constructable with exactly the arguments taken by the
76 PipelineTask base class constructor, but may support other signatures as
77 well.
79 Attributes
80 ----------
81 canMultiprocess : bool, True by default (class attribute)
82 This class attribute is checked by execution framework, sub-classes
83 can set it to ``False`` in case task does not support multiprocessing.
85 Parameters
86 ----------
87 config : `~lsst.pex.config.Config`, optional
88 Configuration for this task (an instance of ``self.ConfigClass``,
89 which is a task-specific subclass of `PipelineTaskConfig`).
90 If not specified then it defaults to ``self.ConfigClass()``.
91 log : `logging.Logger`, optional
92 Logger instance whose name is used as a log name prefix, or ``None``
93 for no prefix.
94 initInputs : `dict`, optional
95 A dictionary of objects needed to construct this PipelineTask, with
96 keys matching the keys of the dictionary returned by
97 `getInitInputDatasetTypes` and values equivalent to what would be
98 obtained by calling `~lsst.daf.butler.Butler.get` with those
99 DatasetTypes and no data IDs. While it is optional for the base class,
100 subclasses are permitted to require this argument.
101 """
103 ConfigClass: ClassVar[type[PipelineTaskConfig]]
104 canMultiprocess: ClassVar[bool] = True
106 def __init__(
107 self,
108 *,
109 config: PipelineTaskConfig | None = None,
110 log: logging.Logger | LsstLogAdapter | None = None,
111 initInputs: dict[str, Any] | None = None,
112 **kwargs: Any,
113 ):
114 super().__init__(config=config, log=log, **kwargs)
116 run: Callable[..., Struct] # The 'run' method for subclasses will have a different signature
118 def run(self, **kwargs: Any) -> Struct: # type: ignore
119 """Run task algorithm on in-memory data.
121 This method should be implemented in a subclass. This method will
122 receive keyword arguments whose names will be the same as names of
123 connection fields describing input dataset types. Argument values will
124 be data objects retrieved from data butler. If a dataset type is
125 configured with ``multiple`` field set to ``True`` then the argument
126 value will be a list of objects, otherwise it will be a single object.
128 If the task needs to know its input or output DataIds then it has to
129 override `runQuantum` method instead.
131 This method should return a `Struct` whose attributes share the same
132 name as the connection fields describing output dataset types.
134 Returns
135 -------
136 struct : `Struct`
137 Struct with attribute names corresponding to output connection
138 fields
140 Examples
141 --------
142 Typical implementation of this method may look like:
144 .. code-block:: python
146 def run(self, input, calib):
147 # "input", "calib", and "output" are the names of the config
148 # fields
150 # Assuming that input/calib datasets are `scalar` they are
151 # simple objects, do something with inputs and calibs, produce
152 # output image.
153 image = self.makeImage(input, calib)
155 # If output dataset is `scalar` then return object, not list
156 return Struct(output=image)
158 """
159 raise NotImplementedError("run() is not implemented")
161 def runQuantum(
162 self,
163 butlerQC: QuantumContext,
164 inputRefs: InputQuantizedConnection,
165 outputRefs: OutputQuantizedConnection,
166 ) -> None:
167 """Do butler IO and transform to provide in memory
168 objects for tasks `~Task.run` method.
170 Parameters
171 ----------
172 butlerQC : `QuantumContext`
173 A butler which is specialized to operate in the context of a
174 `lsst.daf.butler.Quantum`.
175 inputRefs : `InputQuantizedConnection`
176 Datastructure whose attribute names are the names that identify
177 connections defined in corresponding `PipelineTaskConnections`
178 class. The values of these attributes are the
179 `lsst.daf.butler.DatasetRef` objects associated with the defined
180 input/prerequisite connections.
181 outputRefs : `OutputQuantizedConnection`
182 Datastructure whose attribute names are the names that identify
183 connections defined in corresponding `PipelineTaskConnections`
184 class. The values of these attributes are the
185 `lsst.daf.butler.DatasetRef` objects associated with the defined
186 output connections.
187 """
188 inputs = butlerQC.get(inputRefs)
189 outputs = self.run(**inputs)
190 butlerQC.put(outputs, outputRefs)