Coverage for python/lsst/pipe/base/pipelineTask.py: 56%

25 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-18 02:12 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""This module defines PipelineTask class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["PipelineTask"] # Classes in this module 

28 

29from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Type, Union 

30 

31from .connections import InputQuantizedConnection, OutputQuantizedConnection 

32from .task import Task 

33 

34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 import logging 

36 

37 from lsst.utils.logging import LsstLogAdapter 

38 

39 from .butlerQuantumContext import ButlerQuantumContext 

40 from .config import PipelineTaskConfig, ResourceConfig 

41 from .struct import Struct 

42 

43 

44class PipelineTask(Task): 

45 """Base class for all pipeline tasks. 

46 

47 This is an abstract base class for PipelineTasks which represents an 

48 algorithm executed by framework(s) on data which comes from data butler, 

49 resulting data is also stored in a data butler. 

50 

51 PipelineTask inherits from a `pipe.base.Task` and uses the same 

52 configuration mechanism based on `pex.config`. `PipelineTask` classes also 

53 have a `PipelineTaskConnections` class associated with their config which 

54 defines all of the IO a `PipelineTask` will need to do. PipelineTask 

55 sub-class typically implements `run()` method which receives Python-domain 

56 data objects and returns `pipe.base.Struct` object with resulting data. 

57 `run()` method is not supposed to perform any I/O, it operates entirely on 

58 in-memory objects. `runQuantum()` is the method (can be re-implemented in 

59 sub-class) where all necessary I/O is performed, it reads all input data 

60 from data butler into memory, calls `run()` method with that data, examines 

61 returned `Struct` object and saves some or all of that data back to data 

62 butler. `runQuantum()` method receives a `ButlerQuantumContext` instance to 

63 facilitate I/O, a `InputQuantizedConnection` instance which defines all 

64 input `lsst.daf.butler.DatasetRef`, and a `OutputQuantizedConnection` 

65 instance which defines all the output `lsst.daf.butler.DatasetRef` for a 

66 single invocation of PipelineTask. 

67 

68 Subclasses must be constructable with exactly the arguments taken by the 

69 PipelineTask base class constructor, but may support other signatures as 

70 well. 

71 

72 Attributes 

73 ---------- 

74 canMultiprocess : bool, True by default (class attribute) 

75 This class attribute is checked by execution framework, sub-classes 

76 can set it to ``False`` in case task does not support multiprocessing. 

77 

78 Parameters 

79 ---------- 

80 config : `pex.config.Config`, optional 

81 Configuration for this task (an instance of ``self.ConfigClass``, 

82 which is a task-specific subclass of `PipelineTaskConfig`). 

83 If not specified then it defaults to `self.ConfigClass()`. 

84 log : `logging.Logger`, optional 

85 Logger instance whose name is used as a log name prefix, or ``None`` 

86 for no prefix. 

87 initInputs : `dict`, optional 

88 A dictionary of objects needed to construct this PipelineTask, with 

89 keys matching the keys of the dictionary returned by 

90 `getInitInputDatasetTypes` and values equivalent to what would be 

91 obtained by calling `Butler.get` with those DatasetTypes and no data 

92 IDs. While it is optional for the base class, subclasses are 

93 permitted to require this argument. 

94 """ 

95 

96 ConfigClass: ClassVar[Type[PipelineTaskConfig]] 

97 canMultiprocess: ClassVar[bool] = True 

98 

99 def __init__( 

100 self, 

101 *, 

102 config: Optional[PipelineTaskConfig] = None, 

103 log: Optional[Union[logging.Logger, LsstLogAdapter]] = None, 

104 initInputs: Optional[Dict[str, Any]] = None, 

105 **kwargs: Any, 

106 ): 

107 super().__init__(config=config, log=log, **kwargs) 

108 

109 def run(self, **kwargs: Any) -> Struct: 

110 """Run task algorithm on in-memory data. 

111 

112 This method should be implemented in a subclass. This method will 

113 receive keyword arguments whose names will be the same as names of 

114 connection fields describing input dataset types. Argument values will 

115 be data objects retrieved from data butler. If a dataset type is 

116 configured with ``multiple`` field set to ``True`` then the argument 

117 value will be a list of objects, otherwise it will be a single object. 

118 

119 If the task needs to know its input or output DataIds then it has to 

120 override `runQuantum` method instead. 

121 

122 This method should return a `Struct` whose attributes share the same 

123 name as the connection fields describing output dataset types. 

124 

125 Returns 

126 ------- 

127 struct : `Struct` 

128 Struct with attribute names corresponding to output connection 

129 fields 

130 

131 Examples 

132 -------- 

133 Typical implementation of this method may look like: 

134 

135 .. code-block:: python 

136 

137 def run(self, input, calib): 

138 # "input", "calib", and "output" are the names of the config 

139 # fields 

140 

141 # Assuming that input/calib datasets are `scalar` they are 

142 # simple objects, do something with inputs and calibs, produce 

143 # output image. 

144 image = self.makeImage(input, calib) 

145 

146 # If output dataset is `scalar` then return object, not list 

147 return Struct(output=image) 

148 

149 """ 

150 raise NotImplementedError("run() is not implemented") 

151 

152 def runQuantum( 

153 self, 

154 butlerQC: ButlerQuantumContext, 

155 inputRefs: InputQuantizedConnection, 

156 outputRefs: OutputQuantizedConnection, 

157 ) -> None: 

158 """Method to do butler IO and or transforms to provide in memory 

159 objects for tasks run method 

160 

161 Parameters 

162 ---------- 

163 butlerQC : `ButlerQuantumContext` 

164 A butler which is specialized to operate in the context of a 

165 `lsst.daf.butler.Quantum`. 

166 inputRefs : `InputQuantizedConnection` 

167 Datastructure whose attribute names are the names that identify 

168 connections defined in corresponding `PipelineTaskConnections` 

169 class. The values of these attributes are the 

170 `lsst.daf.butler.DatasetRef` objects associated with the defined 

171 input/prerequisite connections. 

172 outputRefs : `OutputQuantizedConnection` 

173 Datastructure whose attribute names are the names that identify 

174 connections defined in corresponding `PipelineTaskConnections` 

175 class. The values of these attributes are the 

176 `lsst.daf.butler.DatasetRef` objects associated with the defined 

177 output connections. 

178 """ 

179 inputs = butlerQC.get(inputRefs) 

180 outputs = self.run(**inputs) 

181 butlerQC.put(outputs, outputRefs) 

182 

183 def getResourceConfig(self) -> Optional[ResourceConfig]: 

184 """Return resource configuration for this task. 

185 

186 Returns 

187 ------- 

188 Object of type `~config.ResourceConfig` or ``None`` if resource 

189 configuration is not defined for this task. 

190 """ 

191 return getattr(self.config, "resources", None)