Coverage for tests/test_executors.py: 31%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Simple unit test for cmdLineFwk module.
23"""
25import logging
26import signal
27import sys
28import time
29import unittest
30import warnings
31from multiprocessing import Manager
33import networkx as nx
34import psutil
35from lsst.ctrl.mpexec import MPGraphExecutor, MPGraphExecutorError, MPTimeoutError, QuantumExecutor
36from lsst.ctrl.mpexec.execFixupDataId import ExecFixupDataId
37from lsst.pipe.base import NodeId
39logging.basicConfig(level=logging.DEBUG)
41_LOG = logging.getLogger(__name__)
44class QuantumExecutorMock(QuantumExecutor):
45 """Mock class for QuantumExecutor"""
47 def __init__(self, mp=False):
48 self.quanta = []
49 if mp:
50 # in multiprocess mode use shared list
51 manager = Manager()
52 self.quanta = manager.list()
54 def execute(self, taskDef, quantum, butler):
55 _LOG.debug("QuantumExecutorMock.execute: taskDef=%s dataId=%s", taskDef, quantum.dataId)
56 if taskDef.taskClass:
57 # only works for TaskMockMP class below
58 taskDef.taskClass().runQuantum()
59 self.quanta.append(quantum)
60 return quantum
62 def getDataIds(self, field):
63 """Returns values for dataId field for each visited quanta"""
64 return [quantum.dataId[field] for quantum in self.quanta]
67class QuantumMock:
68 def __init__(self, dataId):
69 self.dataId = dataId
71 def __eq__(self, other):
72 return self.dataId == other.dataId
74 def __hash__(self):
75 # dict.__eq__ is order-insensitive
76 return hash(tuple(sorted(kv for kv in self.dataId.items())))
79class QuantumIterDataMock:
80 """Simple class to mock QuantumIterData."""
82 def __init__(self, index, taskDef, **dataId):
83 self.index = index
84 self.taskDef = taskDef
85 self.quantum = QuantumMock(dataId)
86 self.dependencies = set()
87 self.nodeId = NodeId(index, "DummyBuildString")
90class QuantumGraphMock:
91 """Mock for quantum graph."""
93 def __init__(self, qdata):
94 self._graph = nx.DiGraph()
95 previous = qdata[0]
96 for node in qdata[1:]:
97 self._graph.add_edge(previous, node)
98 previous = node
100 def __iter__(self):
101 yield from nx.topological_sort(self._graph)
103 def __len__(self):
104 return len(self._graph)
106 def findTaskDefByLabel(self, label):
107 for q in self:
108 if q.taskDef.label == label:
109 return q.taskDef
111 def getQuantaForTask(self, taskDef):
112 nodes = self.getNodesForTask(taskDef)
113 return {q.quantum for q in nodes}
115 def getNodesForTask(self, taskDef):
116 quanta = set()
117 for q in self:
118 if q.taskDef == taskDef:
119 quanta.add(q)
120 return quanta
122 @property
123 def graph(self):
124 return self._graph
126 def findCycle(self):
127 return []
129 def determineInputsToQuantumNode(self, node):
130 result = set()
131 for n in node.dependencies:
132 for otherNode in self:
133 if otherNode.index == n:
134 result.add(otherNode)
135 return result
138class TaskMockMP:
139 """Simple mock class for task supporting multiprocessing."""
141 canMultiprocess = True
143 def runQuantum(self):
144 _LOG.debug("TaskMockMP.runQuantum")
145 pass
148class TaskMockFail:
149 """Simple mock class for task which fails."""
151 canMultiprocess = True
153 def runQuantum(self):
154 _LOG.debug("TaskMockFail.runQuantum")
155 raise ValueError("expected failure")
158class TaskMockCrash:
159 """Simple mock class for task which fails."""
161 canMultiprocess = True
163 def runQuantum(self):
164 _LOG.debug("TaskMockCrash.runQuantum")
165 signal.raise_signal(signal.SIGILL)
168class TaskMockSleep:
169 """Simple mock class for task which "runs" for some time."""
171 canMultiprocess = True
173 def runQuantum(self):
174 _LOG.debug("TaskMockSleep.runQuantum")
175 time.sleep(5.0)
178class TaskMockLongSleep:
179 """Simple mock class for task which "runs" for very long time."""
181 canMultiprocess = True
183 def runQuantum(self):
184 _LOG.debug("TaskMockLongSleep.runQuantum")
185 time.sleep(100.0)
188class TaskMockNoMP:
189 """Simple mock class for task not supporting multiprocessing."""
191 canMultiprocess = False
194class TaskDefMock:
195 """Simple mock class for task definition in a pipeline."""
197 def __init__(self, taskName="Task", config=None, taskClass=TaskMockMP, label="task1"):
198 self.taskName = taskName
199 self.config = config
200 self.taskClass = taskClass
201 self.label = label
203 def __str__(self):
204 return f"TaskDefMock(taskName={self.taskName}, taskClass={self.taskClass.__name__})"
207class MPGraphExecutorTestCase(unittest.TestCase):
208 """A test case for MPGraphExecutor class"""
210 def test_mpexec_nomp(self):
211 """Make simple graph and execute"""
213 taskDef = TaskDefMock()
214 qgraph = QuantumGraphMock(
215 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)]
216 )
218 # run in single-process mode
219 qexec = QuantumExecutorMock()
220 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec)
221 mpexec.execute(qgraph, butler=None)
222 self.assertEqual(qexec.getDataIds("detector"), [0, 1, 2])
224 def test_mpexec_mp(self):
225 """Make simple graph and execute"""
227 taskDef = TaskDefMock()
228 qgraph = QuantumGraphMock(
229 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)]
230 )
232 methods = ["spawn"]
233 if sys.platform == "linux":
234 methods.append("fork")
235 methods.append("forkserver")
237 for method in methods:
238 with self.subTest(startMethod=method):
239 # Run in multi-process mode, the order of results is not
240 # defined.
241 qexec = QuantumExecutorMock(mp=True)
242 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, startMethod=method)
243 mpexec.execute(qgraph, butler=None)
244 self.assertCountEqual(qexec.getDataIds("detector"), [0, 1, 2])
246 def test_mpexec_nompsupport(self):
247 """Try to run MP for task that has no MP support which should fail"""
249 taskDef = TaskDefMock(taskClass=TaskMockNoMP)
250 qgraph = QuantumGraphMock(
251 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)]
252 )
254 # run in multi-process mode
255 qexec = QuantumExecutorMock()
256 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec)
257 with self.assertRaisesRegex(MPGraphExecutorError, "Task Task does not support multiprocessing"):
258 mpexec.execute(qgraph, butler=None)
260 def test_mpexec_fixup(self):
261 """Make simple graph and execute, add dependencies by executing fixup
262 code.
263 """
265 taskDef = TaskDefMock()
267 for reverse in (False, True):
268 qgraph = QuantumGraphMock(
269 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)]
270 )
272 qexec = QuantumExecutorMock()
273 fixup = ExecFixupDataId("task1", "detector", reverse=reverse)
274 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec, executionGraphFixup=fixup)
275 mpexec.execute(qgraph, butler=None)
277 expected = [0, 1, 2]
278 if reverse:
279 expected = list(reversed(expected))
280 self.assertEqual(qexec.getDataIds("detector"), expected)
282 def test_mpexec_timeout(self):
283 """Fail due to timeout"""
285 taskDef = TaskDefMock()
286 taskDefSleep = TaskDefMock(taskClass=TaskMockSleep)
287 qgraph = QuantumGraphMock(
288 [
289 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
290 QuantumIterDataMock(index=1, taskDef=taskDefSleep, detector=1),
291 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
292 ]
293 )
295 # with failFast we'll get immediate MPTimeoutError
296 qexec = QuantumExecutorMock(mp=True)
297 mpexec = MPGraphExecutor(numProc=3, timeout=1, quantumExecutor=qexec, failFast=True)
298 with self.assertRaises(MPTimeoutError):
299 mpexec.execute(qgraph, butler=None)
301 # with failFast=False exception happens after last task finishes
302 qexec = QuantumExecutorMock(mp=True)
303 mpexec = MPGraphExecutor(numProc=3, timeout=3, quantumExecutor=qexec, failFast=False)
304 with self.assertRaises(MPTimeoutError):
305 mpexec.execute(qgraph, butler=None)
306 # We expect two tasks (0 and 2) to finish successfully and one task to
307 # timeout. Unfortunately on busy CPU there is no guarantee that tasks
308 # finish on time, so expect more timeouts and issue a warning.
309 detectorIds = set(qexec.getDataIds("detector"))
310 self.assertLess(len(detectorIds), 3)
311 if detectorIds != {0, 2}:
312 warnings.warn(f"Possibly timed out tasks, expected [0, 2], received {detectorIds}")
314 def test_mpexec_failure(self):
315 """Failure in one task should not stop other tasks"""
317 taskDef = TaskDefMock()
318 taskDefFail = TaskDefMock(taskClass=TaskMockFail)
319 qgraph = QuantumGraphMock(
320 [
321 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
322 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1),
323 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
324 ]
325 )
327 qexec = QuantumExecutorMock(mp=True)
328 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec)
329 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"):
330 mpexec.execute(qgraph, butler=None)
331 self.assertCountEqual(qexec.getDataIds("detector"), [0, 2])
333 def test_mpexec_failure_dep(self):
334 """Failure in one task should skip dependents"""
336 taskDef = TaskDefMock()
337 taskDefFail = TaskDefMock(taskClass=TaskMockFail)
338 qdata = [
339 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
340 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1),
341 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
342 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3),
343 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4),
344 ]
345 qdata[2].dependencies.add(1)
346 qdata[4].dependencies.add(3)
347 qdata[4].dependencies.add(2)
349 qgraph = QuantumGraphMock(qdata)
351 qexec = QuantumExecutorMock(mp=True)
352 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec)
353 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"):
354 mpexec.execute(qgraph, butler=None)
355 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3])
357 def test_mpexec_failure_failfast(self):
358 """Fast fail stops quickly.
360 Timing delay of task #3 should be sufficient to process
361 failure and raise exception.
362 """
364 taskDef = TaskDefMock()
365 taskDefFail = TaskDefMock(taskClass=TaskMockFail)
366 taskDefLongSleep = TaskDefMock(taskClass=TaskMockLongSleep)
367 qdata = [
368 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
369 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1),
370 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
371 QuantumIterDataMock(index=3, taskDef=taskDefLongSleep, detector=3),
372 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4),
373 ]
374 qdata[1].dependencies.add(0)
375 qdata[2].dependencies.add(1)
376 qdata[4].dependencies.add(3)
377 qdata[4].dependencies.add(2)
379 qgraph = QuantumGraphMock(qdata)
381 qexec = QuantumExecutorMock(mp=True)
382 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True)
383 with self.assertRaisesRegex(MPGraphExecutorError, "failed, exit code=1"):
384 mpexec.execute(qgraph, butler=None)
385 self.assertCountEqual(qexec.getDataIds("detector"), [0])
387 def test_mpexec_crash(self):
388 """Check task crash due to signal"""
390 taskDef = TaskDefMock()
391 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash)
392 qgraph = QuantumGraphMock(
393 [
394 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
395 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1),
396 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
397 ]
398 )
400 qexec = QuantumExecutorMock(mp=True)
401 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec)
402 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"):
403 mpexec.execute(qgraph, butler=None)
405 def test_mpexec_crash_failfast(self):
406 """Check task crash due to signal with --fail-fast"""
408 taskDef = TaskDefMock()
409 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash)
410 qgraph = QuantumGraphMock(
411 [
412 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0),
413 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1),
414 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2),
415 ]
416 )
418 qexec = QuantumExecutorMock(mp=True)
419 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True)
420 with self.assertRaisesRegex(MPGraphExecutorError, "failed, killed by signal 4 .Illegal instruction"):
421 mpexec.execute(qgraph, butler=None)
423 def test_mpexec_num_fd(self):
424 """Check that number of open files stays reasonable"""
426 taskDef = TaskDefMock()
427 qgraph = QuantumGraphMock(
428 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(20)]
429 )
431 this_proc = psutil.Process()
432 num_fds_0 = this_proc.num_fds()
434 # run in multi-process mode, the order of results is not defined
435 qexec = QuantumExecutorMock(mp=True)
436 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec)
437 mpexec.execute(qgraph, butler=None)
439 num_fds_1 = this_proc.num_fds()
440 # They should be the same but allow small growth just in case.
441 # Without DM-26728 fix the difference would be equal to number of
442 # quanta (20).
443 self.assertLess(num_fds_1 - num_fds_0, 5)
446if __name__ == "__main__": 446 ↛ 447line 446 didn't jump to line 447, because the condition on line 446 was never true
447 unittest.main()