Coverage for python/lsst/ctrl/mpexec/mpGraphExecutor.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["MPGraphExecutor", "MPGraphExecutorError", "MPTimeoutError"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import multiprocessing
30# -----------------------------
31# Imports for other modules --
32# -----------------------------
33from .quantumGraphExecutor import QuantumGraphExecutor
34from lsst.base import disableImplicitThreading
36_LOG = logging.getLogger(__name__.partition(".")[2])
39class MPGraphExecutorError(Exception):
40 """Exception class for errors raised by MPGraphExecutor.
41 """
42 pass
45class MPTimeoutError(MPGraphExecutorError):
46 """Exception raised when task execution times out.
47 """
48 pass
51class MPGraphExecutor(QuantumGraphExecutor):
52 """Implementation of QuantumGraphExecutor using same-host multiprocess
53 execution of Quanta.
55 Parameters
56 ----------
57 numProc : `int`
58 Number of processes to use for executing tasks.
59 timeout : `float`
60 Time in seconds to wait for tasks to finish.
61 quantumExecutor : `QuantumExecutor`
62 Executor for single quantum. For multiprocess-style execution when
63 ``numProc`` is greater than one this instance must support pickle.
64 executionGraphFixup : `ExecutionGraphFixup`, optional
65 Instance used for modification of execution graph.
66 """
67 def __init__(self, numProc, timeout, quantumExecutor, *, executionGraphFixup=None):
68 self.numProc = numProc
69 self.timeout = timeout
70 self.quantumExecutor = quantumExecutor
71 self.executionGraphFixup = executionGraphFixup
73 def execute(self, graph, butler):
74 # Docstring inherited from QuantumGraphExecutor.execute
75 quantaIter = self._fixupQuanta(graph.traverse())
76 if self.numProc > 1:
77 self._executeQuantaMP(quantaIter, butler)
78 else:
79 self._executeQuantaInProcess(quantaIter, butler)
81 def _fixupQuanta(self, quantaIter):
82 """Call fixup code to modify execution graph.
84 Parameters
85 ----------
86 quantaIter : iterable of `~lsst.pipe.base.QuantumIterData`
87 Quanta as originated from a quantum graph.
89 Returns
90 -------
91 quantaIter : iterable of `~lsst.pipe.base.QuantumIterData`
92 Possibly updated set of quanta, properly ordered for execution.
94 Raises
95 ------
96 MPGraphExecutorError
97 Raised if execution graph cannot be ordered after modification,
98 i.e. it has dependency cycles.
99 """
100 if not self.executionGraphFixup:
101 return quantaIter
103 _LOG.debug("Call execution graph fixup method")
104 quantaIter = self.executionGraphFixup.fixupQuanta(quantaIter)
106 # need it correctly ordered as dependencies may have changed
107 # after modification, so do topo-sort
108 updatedQuanta = list(quantaIter)
109 quanta = []
110 ids = set()
111 _LOG.debug("Re-ordering execution graph")
112 while updatedQuanta:
113 # find quantum that has all dependencies resolved already
114 for i, qdata in enumerate(updatedQuanta):
115 if ids.issuperset(qdata.dependencies):
116 _LOG.debug("Found next quanta to execute: %s", qdata)
117 del updatedQuanta[i]
118 ids.add(qdata.index)
119 # we could yield here but I want to detect cycles before
120 # returning anything from this method
121 quanta.append(qdata)
122 break
123 else:
124 # means remaining quanta have dependency cycle
125 raise MPGraphExecutorError(
126 "Updated execution graph has dependency clycle.")
128 return quanta
130 def _executeQuantaInProcess(self, iterable, butler):
131 """Execute all Quanta in current process.
133 Parameters
134 ----------
135 iterable : iterable of `~lsst.pipe.base.QuantumIterData`
136 Sequence if Quanta to execute. It is guaranteed that re-requisites
137 for a given Quantum will always appear before that Quantum.
138 butler : `lsst.daf.butler.Butler`
139 Data butler instance
140 """
141 for qdata in iterable:
142 _LOG.debug("Executing %s", qdata)
143 self._executePipelineTask(taskDef=qdata.taskDef, quantum=qdata.quantum,
144 butler=butler, executor=self.quantumExecutor)
146 def _executeQuantaMP(self, iterable, butler):
147 """Execute all Quanta in separate process pool.
149 Parameters
150 ----------
151 iterable : iterable of `~lsst.pipe.base.QuantumIterData`
152 Sequence if Quanta to execute. It is guaranteed that re-requisites
153 for a given Quantum will always appear before that Quantum.
154 butler : `lsst.daf.butler.Butler`
155 Data butler instance
156 """
158 disableImplicitThreading() # To prevent thread contention
160 pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1)
162 # map quantum id to AsyncResult and QuantumIterData
163 results = {}
164 qdataMap = {}
166 # Add each Quantum to a pool, wait until it pre-requisites completed.
167 # TODO: This is not super-efficient as it stops at the first Quantum
168 # that cannot be executed (yet) and does not check other Quanta.
169 for qdata in iterable:
171 # check that task can run in sub-process
172 taskDef = qdata.taskDef
173 if not taskDef.taskClass.canMultiprocess:
174 raise MPGraphExecutorError(f"Task {taskDef.taskName} does not support multiprocessing;"
175 " use single process")
177 # Wait for all dependencies
178 for dep in qdata.dependencies:
179 # Wait for max. timeout for this result to be ready.
180 # This can raise on timeout or if remote call raises.
181 _LOG.debug("Check dependency %s for %s", dep, qdata)
182 try:
183 results[dep].get(self.timeout)
184 except multiprocessing.TimeoutError as exc:
185 failed_qdata = qdataMap[dep]
186 raise MPTimeoutError(
187 f"Timeout ({self.timeout}sec) for task {failed_qdata.taskDef} while processing "
188 f"quantum with dataId={failed_qdata.quantum.dataId}"
189 ) from exc
190 _LOG.debug("Result %s is ready", dep)
192 # Add it to the pool and remember its result
193 _LOG.debug("Sumbitting %s", qdata)
194 kwargs = dict(taskDef=taskDef, quantum=qdata.quantum,
195 butler=butler, executor=self.quantumExecutor)
196 results[qdata.index] = pool.apply_async(self._executePipelineTask, (), kwargs)
197 qdataMap[qdata.index] = qdata
199 # Everything is submitted, wait until it's complete
200 _LOG.debug("Wait for all tasks")
201 for qid, res in results.items():
202 if res.ready():
203 _LOG.debug("Result %d is ready", qid)
204 else:
205 _LOG.debug("Waiting for result %d", qid)
206 try:
207 res.get(self.timeout)
208 except multiprocessing.TimeoutError as exc:
209 failed_qdata = qdataMap[qid]
210 raise MPTimeoutError(
211 f"Timeout ({self.timeout}sec) for task {failed_qdata.taskDef} while processing "
212 f"quantum with dataId={failed_qdata.quantum.dataId}"
213 ) from exc
215 @staticmethod
216 def _executePipelineTask(*, taskDef, quantum, butler, executor):
217 """Execute PipelineTask on a single data item.
219 Parameters
220 ----------
221 taskDef : `~lsst.pipe.base.TaskDef`
222 Task definition structure.
223 quantum : `~lsst.daf.butler.Quantum`
224 Quantum for this execution.
225 butler : `~lsst.daf.butler.Butler`
226 Data butler instance.
227 executor : `QuantumExecutor`
228 Executor for single quantum.
229 """
230 return executor.execute(taskDef, quantum, butler)