Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["MPGraphExecutor", "MPGraphExecutorError", "MPTimeoutError"] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import multiprocessing 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from .quantumGraphExecutor import QuantumGraphExecutor 

34from lsst.base import disableImplicitThreading 

35 

36_LOG = logging.getLogger(__name__.partition(".")[2]) 

37 

38 

39class MPGraphExecutorError(Exception): 

40 """Exception class for errors raised by MPGraphExecutor. 

41 """ 

42 pass 

43 

44 

45class MPTimeoutError(MPGraphExecutorError): 

46 """Exception raised when task execution times out. 

47 """ 

48 pass 

49 

50 

51class MPGraphExecutor(QuantumGraphExecutor): 

52 """Implementation of QuantumGraphExecutor using same-host multiprocess 

53 execution of Quanta. 

54 

55 Parameters 

56 ---------- 

57 numProc : `int` 

58 Number of processes to use for executing tasks. 

59 timeout : `float` 

60 Time in seconds to wait for tasks to finish. 

61 quantumExecutor : `QuantumExecutor` 

62 Executor for single quantum. For multiprocess-style execution when 

63 ``numProc`` is greater than one this instance must support pickle. 

64 executionGraphFixup : `ExecutionGraphFixup`, optional 

65 Instance used for modification of execution graph. 

66 """ 

67 def __init__(self, numProc, timeout, quantumExecutor, *, executionGraphFixup=None): 

68 self.numProc = numProc 

69 self.timeout = timeout 

70 self.quantumExecutor = quantumExecutor 

71 self.executionGraphFixup = executionGraphFixup 

72 

73 def execute(self, graph, butler): 

74 # Docstring inherited from QuantumGraphExecutor.execute 

75 quantaIter = self._fixupQuanta(graph.traverse()) 

76 if self.numProc > 1: 

77 self._executeQuantaMP(quantaIter, butler) 

78 else: 

79 self._executeQuantaInProcess(quantaIter, butler) 

80 

81 def _fixupQuanta(self, quantaIter): 

82 """Call fixup code to modify execution graph. 

83 

84 Parameters 

85 ---------- 

86 quantaIter : iterable of `~lsst.pipe.base.QuantumIterData` 

87 Quanta as originated from a quantum graph. 

88 

89 Returns 

90 ------- 

91 quantaIter : iterable of `~lsst.pipe.base.QuantumIterData` 

92 Possibly updated set of quanta, properly ordered for execution. 

93 

94 Raises 

95 ------ 

96 MPGraphExecutorError 

97 Raised if execution graph cannot be ordered after modification, 

98 i.e. it has dependency cycles. 

99 """ 

100 if not self.executionGraphFixup: 

101 return quantaIter 

102 

103 _LOG.debug("Call execution graph fixup method") 

104 quantaIter = self.executionGraphFixup.fixupQuanta(quantaIter) 

105 

106 # need it correctly ordered as dependencies may have changed 

107 # after modification, so do topo-sort 

108 updatedQuanta = list(quantaIter) 

109 quanta = [] 

110 ids = set() 

111 _LOG.debug("Re-ordering execution graph") 

112 while updatedQuanta: 

113 # find quantum that has all dependencies resolved already 

114 for i, qdata in enumerate(updatedQuanta): 

115 if ids.issuperset(qdata.dependencies): 

116 _LOG.debug("Found next quanta to execute: %s", qdata) 

117 del updatedQuanta[i] 

118 ids.add(qdata.index) 

119 # we could yield here but I want to detect cycles before 

120 # returning anything from this method 

121 quanta.append(qdata) 

122 break 

123 else: 

124 # means remaining quanta have dependency cycle 

125 raise MPGraphExecutorError( 

126 "Updated execution graph has dependency clycle.") 

127 

128 return quanta 

129 

130 def _executeQuantaInProcess(self, iterable, butler): 

131 """Execute all Quanta in current process. 

132 

133 Parameters 

134 ---------- 

135 iterable : iterable of `~lsst.pipe.base.QuantumIterData` 

136 Sequence if Quanta to execute. It is guaranteed that re-requisites 

137 for a given Quantum will always appear before that Quantum. 

138 butler : `lsst.daf.butler.Butler` 

139 Data butler instance 

140 """ 

141 for qdata in iterable: 

142 _LOG.debug("Executing %s", qdata) 

143 self._executePipelineTask(taskDef=qdata.taskDef, quantum=qdata.quantum, 

144 butler=butler, executor=self.quantumExecutor) 

145 

146 def _executeQuantaMP(self, iterable, butler): 

147 """Execute all Quanta in separate process pool. 

148 

149 Parameters 

150 ---------- 

151 iterable : iterable of `~lsst.pipe.base.QuantumIterData` 

152 Sequence if Quanta to execute. It is guaranteed that re-requisites 

153 for a given Quantum will always appear before that Quantum. 

154 butler : `lsst.daf.butler.Butler` 

155 Data butler instance 

156 """ 

157 

158 disableImplicitThreading() # To prevent thread contention 

159 

160 pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1) 

161 

162 # map quantum id to AsyncResult and QuantumIterData 

163 results = {} 

164 qdataMap = {} 

165 

166 # Add each Quantum to a pool, wait until it pre-requisites completed. 

167 # TODO: This is not super-efficient as it stops at the first Quantum 

168 # that cannot be executed (yet) and does not check other Quanta. 

169 for qdata in iterable: 

170 

171 # check that task can run in sub-process 

172 taskDef = qdata.taskDef 

173 if not taskDef.taskClass.canMultiprocess: 

174 raise MPGraphExecutorError(f"Task {taskDef.taskName} does not support multiprocessing;" 

175 " use single process") 

176 

177 # Wait for all dependencies 

178 for dep in qdata.dependencies: 

179 # Wait for max. timeout for this result to be ready. 

180 # This can raise on timeout or if remote call raises. 

181 _LOG.debug("Check dependency %s for %s", dep, qdata) 

182 try: 

183 results[dep].get(self.timeout) 

184 except multiprocessing.TimeoutError as exc: 

185 failed_qdata = qdataMap[dep] 

186 raise MPTimeoutError( 

187 f"Timeout ({self.timeout}sec) for task {failed_qdata.taskDef} while processing " 

188 f"quantum with dataId={failed_qdata.quantum.dataId}" 

189 ) from exc 

190 _LOG.debug("Result %s is ready", dep) 

191 

192 # Add it to the pool and remember its result 

193 _LOG.debug("Sumbitting %s", qdata) 

194 kwargs = dict(taskDef=taskDef, quantum=qdata.quantum, 

195 butler=butler, executor=self.quantumExecutor) 

196 results[qdata.index] = pool.apply_async(self._executePipelineTask, (), kwargs) 

197 qdataMap[qdata.index] = qdata 

198 

199 # Everything is submitted, wait until it's complete 

200 _LOG.debug("Wait for all tasks") 

201 for qid, res in results.items(): 

202 if res.ready(): 

203 _LOG.debug("Result %d is ready", qid) 

204 else: 

205 _LOG.debug("Waiting for result %d", qid) 

206 try: 

207 res.get(self.timeout) 

208 except multiprocessing.TimeoutError as exc: 

209 failed_qdata = qdataMap[qid] 

210 raise MPTimeoutError( 

211 f"Timeout ({self.timeout}sec) for task {failed_qdata.taskDef} while processing " 

212 f"quantum with dataId={failed_qdata.quantum.dataId}" 

213 ) from exc 

214 

215 @staticmethod 

216 def _executePipelineTask(*, taskDef, quantum, butler, executor): 

217 """Execute PipelineTask on a single data item. 

218 

219 Parameters 

220 ---------- 

221 taskDef : `~lsst.pipe.base.TaskDef` 

222 Task definition structure. 

223 quantum : `~lsst.daf.butler.Quantum` 

224 Quantum for this execution. 

225 butler : `~lsst.daf.butler.Butler` 

226 Data butler instance. 

227 executor : `QuantumExecutor` 

228 Executor for single quantum. 

229 """ 

230 return executor.execute(taskDef, quantum, butler)