Coverage for tests/test_executors.py: 31%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

235 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import logging 

26import signal 

27import sys 

28import time 

29import unittest 

30import warnings 

31from multiprocessing import Manager 

32 

33import networkx as nx 

34import psutil 

35from lsst.ctrl.mpexec import MPGraphExecutor, MPGraphExecutorError, MPTimeoutError, QuantumExecutor 

36from lsst.ctrl.mpexec.execFixupDataId import ExecFixupDataId 

37from lsst.pipe.base import NodeId 

38 

39logging.basicConfig(level=logging.DEBUG) 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class QuantumExecutorMock(QuantumExecutor): 

45 """Mock class for QuantumExecutor""" 

46 

47 def __init__(self, mp=False): 

48 self.quanta = [] 

49 if mp: 

50 # in multiprocess mode use shared list 

51 manager = Manager() 

52 self.quanta = manager.list() 

53 

54 def execute(self, taskDef, quantum, butler): 

55 _LOG.debug("QuantumExecutorMock.execute: taskDef=%s dataId=%s", taskDef, quantum.dataId) 

56 if taskDef.taskClass: 

57 # only works for TaskMockMP class below 

58 taskDef.taskClass().runQuantum() 

59 self.quanta.append(quantum) 

60 return quantum 

61 

62 def getDataIds(self, field): 

63 """Returns values for dataId field for each visited quanta""" 

64 return [quantum.dataId[field] for quantum in self.quanta] 

65 

66 

67class QuantumMock: 

68 def __init__(self, dataId): 

69 self.dataId = dataId 

70 

71 def __eq__(self, other): 

72 return self.dataId == other.dataId 

73 

74 def __hash__(self): 

75 # dict.__eq__ is order-insensitive 

76 return hash(tuple(sorted(kv for kv in self.dataId.items()))) 

77 

78 

79class QuantumIterDataMock: 

80 """Simple class to mock QuantumIterData.""" 

81 

82 def __init__(self, index, taskDef, **dataId): 

83 self.index = index 

84 self.taskDef = taskDef 

85 self.quantum = QuantumMock(dataId) 

86 self.dependencies = set() 

87 self.nodeId = NodeId(index, "DummyBuildString") 

88 

89 

90class QuantumGraphMock: 

91 """Mock for quantum graph.""" 

92 

93 def __init__(self, qdata): 

94 self._graph = nx.DiGraph() 

95 previous = qdata[0] 

96 for node in qdata[1:]: 

97 self._graph.add_edge(previous, node) 

98 previous = node 

99 

100 def __iter__(self): 

101 yield from nx.topological_sort(self._graph) 

102 

103 def __len__(self): 

104 return len(self._graph) 

105 

106 def findTaskDefByLabel(self, label): 

107 for q in self: 

108 if q.taskDef.label == label: 

109 return q.taskDef 

110 

111 def getQuantaForTask(self, taskDef): 

112 nodes = self.getNodesForTask(taskDef) 

113 return {q.quantum for q in nodes} 

114 

115 def getNodesForTask(self, taskDef): 

116 quanta = set() 

117 for q in self: 

118 if q.taskDef == taskDef: 

119 quanta.add(q) 

120 return quanta 

121 

122 @property 

123 def graph(self): 

124 return self._graph 

125 

126 def findCycle(self): 

127 return [] 

128 

129 def determineInputsToQuantumNode(self, node): 

130 result = set() 

131 for n in node.dependencies: 

132 for otherNode in self: 

133 if otherNode.index == n: 

134 result.add(otherNode) 

135 return result 

136 

137 

138class TaskMockMP: 

139 """Simple mock class for task supporting multiprocessing.""" 

140 

141 canMultiprocess = True 

142 

143 def runQuantum(self): 

144 _LOG.debug("TaskMockMP.runQuantum") 

145 pass 

146 

147 

148class TaskMockFail: 

149 """Simple mock class for task which fails.""" 

150 

151 canMultiprocess = True 

152 

153 def runQuantum(self): 

154 _LOG.debug("TaskMockFail.runQuantum") 

155 raise ValueError("expected failure") 

156 

157 

158class TaskMockCrash: 

159 """Simple mock class for task which fails.""" 

160 

161 canMultiprocess = True 

162 

163 def runQuantum(self): 

164 _LOG.debug("TaskMockCrash.runQuantum") 

165 signal.raise_signal(signal.SIGILL) 

166 

167 

168class TaskMockSleep: 

169 """Simple mock class for task which "runs" for some time.""" 

170 

171 canMultiprocess = True 

172 

173 def runQuantum(self): 

174 _LOG.debug("TaskMockSleep.runQuantum") 

175 time.sleep(5.0) 

176 

177 

178class TaskMockLongSleep: 

179 """Simple mock class for task which "runs" for very long time.""" 

180 

181 canMultiprocess = True 

182 

183 def runQuantum(self): 

184 _LOG.debug("TaskMockLongSleep.runQuantum") 

185 time.sleep(100.0) 

186 

187 

188class TaskMockNoMP: 

189 """Simple mock class for task not supporting multiprocessing.""" 

190 

191 canMultiprocess = False 

192 

193 

194class TaskDefMock: 

195 """Simple mock class for task definition in a pipeline.""" 

196 

197 def __init__(self, taskName="Task", config=None, taskClass=TaskMockMP, label="task1"): 

198 self.taskName = taskName 

199 self.config = config 

200 self.taskClass = taskClass 

201 self.label = label 

202 

203 def __str__(self): 

204 return f"TaskDefMock(taskName={self.taskName}, taskClass={self.taskClass.__name__})" 

205 

206 

207class MPGraphExecutorTestCase(unittest.TestCase): 

208 """A test case for MPGraphExecutor class""" 

209 

210 def test_mpexec_nomp(self): 

211 """Make simple graph and execute""" 

212 

213 taskDef = TaskDefMock() 

214 qgraph = QuantumGraphMock( 

215 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

216 ) 

217 

218 # run in single-process mode 

219 qexec = QuantumExecutorMock() 

220 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

221 mpexec.execute(qgraph, butler=None) 

222 self.assertEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

223 

224 def test_mpexec_mp(self): 

225 """Make simple graph and execute""" 

226 

227 taskDef = TaskDefMock() 

228 qgraph = QuantumGraphMock( 

229 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

230 ) 

231 

232 methods = ["spawn"] 

233 if sys.platform == "linux": 

234 methods.append("fork") 

235 methods.append("forkserver") 

236 

237 for method in methods: 

238 with self.subTest(startMethod=method): 

239 # Run in multi-process mode, the order of results is not 

240 # defined. 

241 qexec = QuantumExecutorMock(mp=True) 

242 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, startMethod=method) 

243 mpexec.execute(qgraph, butler=None) 

244 self.assertCountEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

245 

246 def test_mpexec_nompsupport(self): 

247 """Try to run MP for task that has no MP support which should fail""" 

248 

249 taskDef = TaskDefMock(taskClass=TaskMockNoMP) 

250 qgraph = QuantumGraphMock( 

251 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

252 ) 

253 

254 # run in multi-process mode 

255 qexec = QuantumExecutorMock() 

256 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

257 with self.assertRaisesRegex(MPGraphExecutorError, "Task Task does not support multiprocessing"): 

258 mpexec.execute(qgraph, butler=None) 

259 

260 def test_mpexec_fixup(self): 

261 """Make simple graph and execute, add dependencies by executing fixup 

262 code. 

263 """ 

264 

265 taskDef = TaskDefMock() 

266 

267 for reverse in (False, True): 

268 qgraph = QuantumGraphMock( 

269 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

270 ) 

271 

272 qexec = QuantumExecutorMock() 

273 fixup = ExecFixupDataId("task1", "detector", reverse=reverse) 

274 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec, executionGraphFixup=fixup) 

275 mpexec.execute(qgraph, butler=None) 

276 

277 expected = [0, 1, 2] 

278 if reverse: 

279 expected = list(reversed(expected)) 

280 self.assertEqual(qexec.getDataIds("detector"), expected) 

281 

282 def test_mpexec_timeout(self): 

283 """Fail due to timeout""" 

284 

285 taskDef = TaskDefMock() 

286 taskDefSleep = TaskDefMock(taskClass=TaskMockSleep) 

287 qgraph = QuantumGraphMock( 

288 [ 

289 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

290 QuantumIterDataMock(index=1, taskDef=taskDefSleep, detector=1), 

291 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

292 ] 

293 ) 

294 

295 # with failFast we'll get immediate MPTimeoutError 

296 qexec = QuantumExecutorMock(mp=True) 

297 mpexec = MPGraphExecutor(numProc=3, timeout=1, quantumExecutor=qexec, failFast=True) 

298 with self.assertRaises(MPTimeoutError): 

299 mpexec.execute(qgraph, butler=None) 

300 

301 # with failFast=False exception happens after last task finishes 

302 qexec = QuantumExecutorMock(mp=True) 

303 mpexec = MPGraphExecutor(numProc=3, timeout=3, quantumExecutor=qexec, failFast=False) 

304 with self.assertRaises(MPTimeoutError): 

305 mpexec.execute(qgraph, butler=None) 

306 # We expect two tasks (0 and 2) to finish successfully and one task to 

307 # timeout. Unfortunately on busy CPU there is no guarantee that tasks 

308 # finish on time, so expect more timeouts and issue a warning. 

309 detectorIds = set(qexec.getDataIds("detector")) 

310 self.assertLess(len(detectorIds), 3) 

311 if detectorIds != {0, 2}: 

312 warnings.warn(f"Possibly timed out tasks, expected [0, 2], received {detectorIds}") 

313 

314 def test_mpexec_failure(self): 

315 """Failure in one task should not stop other tasks""" 

316 

317 taskDef = TaskDefMock() 

318 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

319 qgraph = QuantumGraphMock( 

320 [ 

321 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

322 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

323 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

324 ] 

325 ) 

326 

327 qexec = QuantumExecutorMock(mp=True) 

328 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

329 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

330 mpexec.execute(qgraph, butler=None) 

331 self.assertCountEqual(qexec.getDataIds("detector"), [0, 2]) 

332 

333 def test_mpexec_failure_dep(self): 

334 """Failure in one task should skip dependents""" 

335 

336 taskDef = TaskDefMock() 

337 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

338 qdata = [ 

339 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

340 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

341 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

342 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

343 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

344 ] 

345 qdata[2].dependencies.add(1) 

346 qdata[4].dependencies.add(3) 

347 qdata[4].dependencies.add(2) 

348 

349 qgraph = QuantumGraphMock(qdata) 

350 

351 qexec = QuantumExecutorMock(mp=True) 

352 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

353 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

354 mpexec.execute(qgraph, butler=None) 

355 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

356 

357 def test_mpexec_failure_failfast(self): 

358 """Fast fail stops quickly. 

359 

360 Timing delay of task #3 should be sufficient to process 

361 failure and raise exception. 

362 """ 

363 

364 taskDef = TaskDefMock() 

365 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

366 taskDefLongSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

367 qdata = [ 

368 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

369 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

370 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

371 QuantumIterDataMock(index=3, taskDef=taskDefLongSleep, detector=3), 

372 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

373 ] 

374 qdata[1].dependencies.add(0) 

375 qdata[2].dependencies.add(1) 

376 qdata[4].dependencies.add(3) 

377 qdata[4].dependencies.add(2) 

378 

379 qgraph = QuantumGraphMock(qdata) 

380 

381 qexec = QuantumExecutorMock(mp=True) 

382 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

383 with self.assertRaisesRegex(MPGraphExecutorError, "failed, exit code=1"): 

384 mpexec.execute(qgraph, butler=None) 

385 self.assertCountEqual(qexec.getDataIds("detector"), [0]) 

386 

387 def test_mpexec_crash(self): 

388 """Check task crash due to signal""" 

389 

390 taskDef = TaskDefMock() 

391 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

392 qgraph = QuantumGraphMock( 

393 [ 

394 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

395 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

396 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

397 ] 

398 ) 

399 

400 qexec = QuantumExecutorMock(mp=True) 

401 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

402 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

403 mpexec.execute(qgraph, butler=None) 

404 

405 def test_mpexec_crash_failfast(self): 

406 """Check task crash due to signal with --fail-fast""" 

407 

408 taskDef = TaskDefMock() 

409 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

410 qgraph = QuantumGraphMock( 

411 [ 

412 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

413 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

414 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

415 ] 

416 ) 

417 

418 qexec = QuantumExecutorMock(mp=True) 

419 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

420 with self.assertRaisesRegex(MPGraphExecutorError, "failed, killed by signal 4 .Illegal instruction"): 

421 mpexec.execute(qgraph, butler=None) 

422 

423 def test_mpexec_num_fd(self): 

424 """Check that number of open files stays reasonable""" 

425 

426 taskDef = TaskDefMock() 

427 qgraph = QuantumGraphMock( 

428 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(20)] 

429 ) 

430 

431 this_proc = psutil.Process() 

432 num_fds_0 = this_proc.num_fds() 

433 

434 # run in multi-process mode, the order of results is not defined 

435 qexec = QuantumExecutorMock(mp=True) 

436 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

437 mpexec.execute(qgraph, butler=None) 

438 

439 num_fds_1 = this_proc.num_fds() 

440 # They should be the same but allow small growth just in case. 

441 # Without DM-26728 fix the difference would be equal to number of 

442 # quanta (20). 

443 self.assertLess(num_fds_1 - num_fds_0, 5) 

444 

445 

446if __name__ == "__main__": 446 ↛ 447line 446 didn't jump to line 447, because the condition on line 446 was never true

447 unittest.main()