Coverage for tests/test_executors.py: 15%

410 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-20 10:51 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import faulthandler 

26import logging 

27import os 

28import signal 

29import sys 

30import time 

31import unittest 

32import warnings 

33from multiprocessing import Manager 

34 

35import networkx as nx 

36import psutil 

37from lsst.ctrl.mpexec import ( 

38 ExecutionStatus, 

39 MPGraphExecutor, 

40 MPGraphExecutorError, 

41 MPTimeoutError, 

42 QuantumExecutor, 

43 QuantumReport, 

44 SingleQuantumExecutor, 

45) 

46from lsst.ctrl.mpexec.execFixupDataId import ExecFixupDataId 

47from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

48from lsst.pipe.base import NodeId 

49from lsst.pipe.base.tests.simpleQGraph import AddTaskFactoryMock, makeSimpleQGraph 

50 

51logging.basicConfig(level=logging.DEBUG) 

52 

53_LOG = logging.getLogger(__name__) 

54 

55TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

56 

57 

58class QuantumExecutorMock(QuantumExecutor): 

59 """Mock class for QuantumExecutor""" 

60 

61 def __init__(self, mp=False): 

62 self.quanta = [] 

63 if mp: 

64 # in multiprocess mode use shared list 

65 manager = Manager() 

66 self.quanta = manager.list() 

67 self.report = None 

68 self._execute_called = False 

69 

70 def execute(self, taskDef, quantum): 

71 _LOG.debug("QuantumExecutorMock.execute: taskDef=%s dataId=%s", taskDef, quantum.dataId) 

72 self._execute_called = True 

73 if taskDef.taskClass: 

74 try: 

75 # only works for one of the TaskMock classes below 

76 taskDef.taskClass().runQuantum() 

77 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

78 except Exception as exc: 

79 self.report = QuantumReport.from_exception( 

80 exception=exc, 

81 dataId=quantum.dataId, 

82 taskLabel=taskDef.label, 

83 ) 

84 raise 

85 self.quanta.append(quantum) 

86 return quantum 

87 

88 def getReport(self): 

89 if not self._execute_called: 

90 raise RuntimeError("getReport called before execute") 

91 return self.report 

92 

93 def getDataIds(self, field): 

94 """Returns values for dataId field for each visited quanta""" 

95 return [quantum.dataId[field] for quantum in self.quanta] 

96 

97 

98class QuantumMock: 

99 def __init__(self, dataId): 

100 self.dataId = dataId 

101 

102 def __eq__(self, other): 

103 return self.dataId == other.dataId 

104 

105 def __hash__(self): 

106 # dict.__eq__ is order-insensitive 

107 return hash(tuple(sorted(kv for kv in self.dataId.items()))) 

108 

109 

110class QuantumIterDataMock: 

111 """Simple class to mock QuantumIterData.""" 

112 

113 def __init__(self, index, taskDef, **dataId): 

114 self.index = index 

115 self.taskDef = taskDef 

116 self.quantum = QuantumMock(dataId) 

117 self.dependencies = set() 

118 self.nodeId = NodeId(index, "DummyBuildString") 

119 

120 

121class QuantumGraphMock: 

122 """Mock for quantum graph.""" 

123 

124 def __init__(self, qdata): 

125 self._graph = nx.DiGraph() 

126 previous = qdata[0] 

127 for node in qdata[1:]: 

128 self._graph.add_edge(previous, node) 

129 previous = node 

130 

131 def __iter__(self): 

132 yield from nx.topological_sort(self._graph) 

133 

134 def __len__(self): 

135 return len(self._graph) 

136 

137 def findTaskDefByLabel(self, label): 

138 for q in self: 

139 if q.taskDef.label == label: 

140 return q.taskDef 

141 

142 def getQuantaForTask(self, taskDef): 

143 nodes = self.getNodesForTask(taskDef) 

144 return {q.quantum for q in nodes} 

145 

146 def getNodesForTask(self, taskDef): 

147 quanta = set() 

148 for q in self: 

149 if q.taskDef == taskDef: 

150 quanta.add(q) 

151 return quanta 

152 

153 @property 

154 def graph(self): 

155 return self._graph 

156 

157 def findCycle(self): 

158 return [] 

159 

160 def determineInputsToQuantumNode(self, node): 

161 result = set() 

162 for n in node.dependencies: 

163 for otherNode in self: 

164 if otherNode.index == n: 

165 result.add(otherNode) 

166 return result 

167 

168 

169class TaskMockMP: 

170 """Simple mock class for task supporting multiprocessing.""" 

171 

172 canMultiprocess = True 

173 

174 def runQuantum(self): 

175 _LOG.debug("TaskMockMP.runQuantum") 

176 pass 

177 

178 

179class TaskMockFail: 

180 """Simple mock class for task which fails.""" 

181 

182 canMultiprocess = True 

183 

184 def runQuantum(self): 

185 _LOG.debug("TaskMockFail.runQuantum") 

186 raise ValueError("expected failure") 

187 

188 

189class TaskMockCrash: 

190 """Simple mock class for task which fails.""" 

191 

192 canMultiprocess = True 

193 

194 def runQuantum(self): 

195 _LOG.debug("TaskMockCrash.runQuantum") 

196 # Disable fault handler to suppress long scary traceback. 

197 faulthandler.disable() 

198 signal.raise_signal(signal.SIGILL) 

199 

200 

201class TaskMockLongSleep: 

202 """Simple mock class for task which "runs" for very long time.""" 

203 

204 canMultiprocess = True 

205 

206 def runQuantum(self): 

207 _LOG.debug("TaskMockLongSleep.runQuantum") 

208 time.sleep(100.0) 

209 

210 

211class TaskMockNoMP: 

212 """Simple mock class for task not supporting multiprocessing.""" 

213 

214 canMultiprocess = False 

215 

216 

217class TaskDefMock: 

218 """Simple mock class for task definition in a pipeline.""" 

219 

220 def __init__(self, taskName="Task", config=None, taskClass=TaskMockMP, label="task1"): 

221 self.taskName = taskName 

222 self.config = config 

223 self.taskClass = taskClass 

224 self.label = label 

225 

226 def __str__(self): 

227 return f"TaskDefMock(taskName={self.taskName}, taskClass={self.taskClass.__name__})" 

228 

229 

230def _count_status(report, status): 

231 """Count number of quanta witha a given status.""" 

232 return len([qrep for qrep in report.quantaReports if qrep.status is status]) 

233 

234 

235class MPGraphExecutorTestCase(unittest.TestCase): 

236 """A test case for MPGraphExecutor class""" 

237 

238 def test_mpexec_nomp(self): 

239 """Make simple graph and execute""" 

240 

241 taskDef = TaskDefMock() 

242 qgraph = QuantumGraphMock( 

243 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

244 ) 

245 

246 # run in single-process mode 

247 qexec = QuantumExecutorMock() 

248 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

249 mpexec.execute(qgraph) 

250 self.assertEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

251 report = mpexec.getReport() 

252 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

253 self.assertIsNone(report.exitCode) 

254 self.assertIsNone(report.exceptionInfo) 

255 self.assertEqual(len(report.quantaReports), 3) 

256 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

257 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

258 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

259 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

260 

261 def test_mpexec_mp(self): 

262 """Make simple graph and execute""" 

263 

264 taskDef = TaskDefMock() 

265 qgraph = QuantumGraphMock( 

266 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

267 ) 

268 

269 methods = ["spawn"] 

270 if sys.platform == "linux": 

271 methods.append("fork") 

272 methods.append("forkserver") 

273 

274 for method in methods: 

275 with self.subTest(startMethod=method): 

276 # Run in multi-process mode, the order of results is not 

277 # defined. 

278 qexec = QuantumExecutorMock(mp=True) 

279 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, startMethod=method) 

280 mpexec.execute(qgraph) 

281 self.assertCountEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

282 report = mpexec.getReport() 

283 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

284 self.assertIsNone(report.exitCode) 

285 self.assertIsNone(report.exceptionInfo) 

286 self.assertEqual(len(report.quantaReports), 3) 

287 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

288 self.assertTrue(all(qrep.exitCode == 0 for qrep in report.quantaReports)) 

289 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

290 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

291 

292 def test_mpexec_nompsupport(self): 

293 """Try to run MP for task that has no MP support which should fail""" 

294 

295 taskDef = TaskDefMock(taskClass=TaskMockNoMP) 

296 qgraph = QuantumGraphMock( 

297 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

298 ) 

299 

300 # run in multi-process mode 

301 qexec = QuantumExecutorMock() 

302 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

303 with self.assertRaisesRegex(MPGraphExecutorError, "Task Task does not support multiprocessing"): 

304 mpexec.execute(qgraph) 

305 

306 def test_mpexec_fixup(self): 

307 """Make simple graph and execute, add dependencies by executing fixup 

308 code. 

309 """ 

310 

311 taskDef = TaskDefMock() 

312 

313 for reverse in (False, True): 

314 qgraph = QuantumGraphMock( 

315 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

316 ) 

317 

318 qexec = QuantumExecutorMock() 

319 fixup = ExecFixupDataId("task1", "detector", reverse=reverse) 

320 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec, executionGraphFixup=fixup) 

321 mpexec.execute(qgraph) 

322 

323 expected = [0, 1, 2] 

324 if reverse: 

325 expected = list(reversed(expected)) 

326 self.assertEqual(qexec.getDataIds("detector"), expected) 

327 

328 def test_mpexec_timeout(self): 

329 """Fail due to timeout""" 

330 

331 taskDef = TaskDefMock() 

332 taskDefSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

333 qgraph = QuantumGraphMock( 

334 [ 

335 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

336 QuantumIterDataMock(index=1, taskDef=taskDefSleep, detector=1), 

337 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

338 ] 

339 ) 

340 

341 # with failFast we'll get immediate MPTimeoutError 

342 qexec = QuantumExecutorMock(mp=True) 

343 mpexec = MPGraphExecutor(numProc=3, timeout=1, quantumExecutor=qexec, failFast=True) 

344 with self.assertRaises(MPTimeoutError): 

345 mpexec.execute(qgraph) 

346 report = mpexec.getReport() 

347 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

348 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

349 self.assertGreater(len(report.quantaReports), 0) 

350 self.assertEqual(_count_status(report, ExecutionStatus.TIMEOUT), 1) 

351 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

352 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

353 

354 # with failFast=False exception happens after last task finishes 

355 qexec = QuantumExecutorMock(mp=True) 

356 mpexec = MPGraphExecutor(numProc=3, timeout=3, quantumExecutor=qexec, failFast=False) 

357 with self.assertRaises(MPTimeoutError): 

358 mpexec.execute(qgraph) 

359 # We expect two tasks (0 and 2) to finish successfully and one task to 

360 # timeout. Unfortunately on busy CPU there is no guarantee that tasks 

361 # finish on time, so expect more timeouts and issue a warning. 

362 detectorIds = set(qexec.getDataIds("detector")) 

363 self.assertLess(len(detectorIds), 3) 

364 if detectorIds != {0, 2}: 

365 warnings.warn(f"Possibly timed out tasks, expected [0, 2], received {detectorIds}") 

366 report = mpexec.getReport() 

367 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

368 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

369 self.assertGreater(len(report.quantaReports), 0) 

370 self.assertGreater(_count_status(report, ExecutionStatus.TIMEOUT), 0) 

371 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

372 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

373 

374 def test_mpexec_failure(self): 

375 """Failure in one task should not stop other tasks""" 

376 

377 taskDef = TaskDefMock() 

378 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

379 qgraph = QuantumGraphMock( 

380 [ 

381 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

382 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

383 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

384 ] 

385 ) 

386 

387 qexec = QuantumExecutorMock(mp=True) 

388 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

389 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

390 mpexec.execute(qgraph) 

391 self.assertCountEqual(qexec.getDataIds("detector"), [0, 2]) 

392 report = mpexec.getReport() 

393 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

394 self.assertEqual( 

395 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

396 ) 

397 self.assertGreater(len(report.quantaReports), 0) 

398 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

399 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

400 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

401 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

402 

403 def test_mpexec_failure_dep(self): 

404 """Failure in one task should skip dependents""" 

405 

406 taskDef = TaskDefMock() 

407 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

408 qdata = [ 

409 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

410 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

411 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

412 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

413 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

414 ] 

415 qdata[2].dependencies.add(1) 

416 qdata[4].dependencies.add(3) 

417 qdata[4].dependencies.add(2) 

418 

419 qgraph = QuantumGraphMock(qdata) 

420 

421 qexec = QuantumExecutorMock(mp=True) 

422 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

423 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

424 mpexec.execute(qgraph) 

425 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

426 report = mpexec.getReport() 

427 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

428 self.assertEqual( 

429 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

430 ) 

431 # Dependencies of failed tasks do not appear in quantaReports 

432 self.assertGreater(len(report.quantaReports), 0) 

433 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

434 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

435 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

436 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

437 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

438 

439 def test_mpexec_failure_dep_nomp(self): 

440 """Failure in one task should skip dependents, in-process version""" 

441 

442 taskDef = TaskDefMock() 

443 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

444 qdata = [ 

445 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

446 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

447 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

448 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

449 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

450 ] 

451 qdata[2].dependencies.add(1) 

452 qdata[4].dependencies.add(3) 

453 qdata[4].dependencies.add(2) 

454 

455 qgraph = QuantumGraphMock(qdata) 

456 

457 qexec = QuantumExecutorMock() 

458 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

459 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

460 mpexec.execute(qgraph) 

461 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

462 report = mpexec.getReport() 

463 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

464 self.assertEqual( 

465 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

466 ) 

467 # Dependencies of failed tasks do not appear in quantaReports 

468 self.assertGreater(len(report.quantaReports), 0) 

469 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

470 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

471 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

472 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

473 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

474 

475 def test_mpexec_failure_failfast(self): 

476 """Fast fail stops quickly. 

477 

478 Timing delay of task #3 should be sufficient to process 

479 failure and raise exception. 

480 """ 

481 

482 taskDef = TaskDefMock() 

483 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

484 taskDefLongSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

485 qdata = [ 

486 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

487 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

488 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

489 QuantumIterDataMock(index=3, taskDef=taskDefLongSleep, detector=3), 

490 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

491 ] 

492 qdata[1].dependencies.add(0) 

493 qdata[2].dependencies.add(1) 

494 qdata[4].dependencies.add(3) 

495 qdata[4].dependencies.add(2) 

496 

497 qgraph = QuantumGraphMock(qdata) 

498 

499 qexec = QuantumExecutorMock(mp=True) 

500 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

501 with self.assertRaisesRegex(MPGraphExecutorError, "failed, exit code=1"): 

502 mpexec.execute(qgraph) 

503 self.assertCountEqual(qexec.getDataIds("detector"), [0]) 

504 report = mpexec.getReport() 

505 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

506 self.assertEqual( 

507 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

508 ) 

509 # Dependencies of failed tasks do not appear in quantaReports 

510 self.assertGreater(len(report.quantaReports), 0) 

511 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

512 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

513 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

514 

515 def test_mpexec_crash(self): 

516 """Check task crash due to signal""" 

517 

518 taskDef = TaskDefMock() 

519 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

520 qgraph = QuantumGraphMock( 

521 [ 

522 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

523 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

524 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

525 ] 

526 ) 

527 

528 qexec = QuantumExecutorMock(mp=True) 

529 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

530 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

531 mpexec.execute(qgraph) 

532 report = mpexec.getReport() 

533 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

534 self.assertEqual( 

535 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

536 ) 

537 # Dependencies of failed tasks do not appear in quantaReports 

538 self.assertGreater(len(report.quantaReports), 0) 

539 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

540 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

541 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

542 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

543 

544 def test_mpexec_crash_failfast(self): 

545 """Check task crash due to signal with --fail-fast""" 

546 

547 taskDef = TaskDefMock() 

548 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

549 qgraph = QuantumGraphMock( 

550 [ 

551 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

552 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

553 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

554 ] 

555 ) 

556 

557 qexec = QuantumExecutorMock(mp=True) 

558 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

559 with self.assertRaisesRegex(MPGraphExecutorError, "failed, killed by signal 4 .Illegal instruction"): 

560 mpexec.execute(qgraph) 

561 report = mpexec.getReport() 

562 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

563 self.assertEqual( 

564 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

565 ) 

566 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

567 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

568 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

569 

570 def test_mpexec_num_fd(self): 

571 """Check that number of open files stays reasonable""" 

572 

573 taskDef = TaskDefMock() 

574 qgraph = QuantumGraphMock( 

575 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(20)] 

576 ) 

577 

578 this_proc = psutil.Process() 

579 num_fds_0 = this_proc.num_fds() 

580 

581 # run in multi-process mode, the order of results is not defined 

582 qexec = QuantumExecutorMock(mp=True) 

583 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

584 mpexec.execute(qgraph) 

585 

586 num_fds_1 = this_proc.num_fds() 

587 # They should be the same but allow small growth just in case. 

588 # Without DM-26728 fix the difference would be equal to number of 

589 # quanta (20). 

590 self.assertLess(num_fds_1 - num_fds_0, 5) 

591 

592 

593class SingleQuantumExecutorTestCase(unittest.TestCase): 

594 """Tests for SingleQuantumExecutor implementation.""" 

595 

596 instrument = "lsst.pipe.base.tests.simpleQGraph.SimpleInstrument" 

597 

598 def setUp(self): 

599 self.root = makeTestTempDir(TESTDIR) 

600 

601 def tearDown(self): 

602 removeTestTempDir(self.root) 

603 

604 def test_simple_execute(self) -> None: 

605 """Run execute() method in simplest setup.""" 

606 

607 nQuanta = 1 

608 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

609 

610 nodes = list(qgraph) 

611 self.assertEqual(len(nodes), nQuanta) 

612 node = nodes[0] 

613 

614 taskFactory = AddTaskFactoryMock() 

615 executor = SingleQuantumExecutor(butler, taskFactory) 

616 executor.execute(node.taskDef, node.quantum) 

617 self.assertEqual(taskFactory.countExec, 1) 

618 

619 # There must be one dataset of task's output connection 

620 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

621 self.assertEqual(len(refs), 1) 

622 

623 def test_skip_existing_execute(self) -> None: 

624 """Run execute() method twice, with skip_existing_in.""" 

625 

626 nQuanta = 1 

627 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

628 

629 nodes = list(qgraph) 

630 self.assertEqual(len(nodes), nQuanta) 

631 node = nodes[0] 

632 

633 taskFactory = AddTaskFactoryMock() 

634 executor = SingleQuantumExecutor(butler, taskFactory) 

635 executor.execute(node.taskDef, node.quantum) 

636 self.assertEqual(taskFactory.countExec, 1) 

637 

638 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

639 self.assertEqual(len(refs), 1) 

640 dataset_id_1 = refs[0].id 

641 

642 # Re-run it with skipExistingIn, it should not run. 

643 assert butler.run is not None 

644 executor = SingleQuantumExecutor(butler, taskFactory, skipExistingIn=[butler.run]) 

645 executor.execute(node.taskDef, node.quantum) 

646 self.assertEqual(taskFactory.countExec, 1) 

647 

648 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

649 self.assertEqual(len(refs), 1) 

650 dataset_id_2 = refs[0].id 

651 self.assertEqual(dataset_id_1, dataset_id_2) 

652 

653 def test_clobber_outputs_execute(self) -> None: 

654 """Run execute() method twice, with clobber_outputs.""" 

655 

656 nQuanta = 1 

657 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

658 

659 nodes = list(qgraph) 

660 self.assertEqual(len(nodes), nQuanta) 

661 node = nodes[0] 

662 

663 taskFactory = AddTaskFactoryMock() 

664 executor = SingleQuantumExecutor(butler, taskFactory) 

665 executor.execute(node.taskDef, node.quantum) 

666 self.assertEqual(taskFactory.countExec, 1) 

667 

668 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

669 self.assertEqual(len(refs), 1) 

670 dataset_id_1 = refs[0].id 

671 

672 # Re-run it with clobberOutputs and skipExistingIn, it should not 

673 # clobber but should skip instead. 

674 assert butler.run is not None 

675 executor = SingleQuantumExecutor( 

676 butler, taskFactory, skipExistingIn=[butler.run], clobberOutputs=True 

677 ) 

678 executor.execute(node.taskDef, node.quantum) 

679 self.assertEqual(taskFactory.countExec, 1) 

680 

681 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

682 self.assertEqual(len(refs), 1) 

683 dataset_id_2 = refs[0].id 

684 self.assertEqual(dataset_id_1, dataset_id_2) 

685 

686 # Re-run it with clobberOutputs but without skipExistingIn, it should 

687 # clobber. 

688 assert butler.run is not None 

689 executor = SingleQuantumExecutor(butler, taskFactory, clobberOutputs=True) 

690 executor.execute(node.taskDef, node.quantum) 

691 self.assertEqual(taskFactory.countExec, 2) 

692 

693 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

694 self.assertEqual(len(refs), 1) 

695 dataset_id_3 = refs[0].id 

696 self.assertNotEqual(dataset_id_1, dataset_id_3) 

697 

698 

699if __name__ == "__main__": 699 ↛ 700line 699 didn't jump to line 700, because the condition on line 699 was never true

700 unittest.main()