Coverage for tests/test_executors.py: 15%

416 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-23 10:58 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Simple unit test for cmdLineFwk module. 

29""" 

30 

31import faulthandler 

32import logging 

33import os 

34import signal 

35import sys 

36import time 

37import unittest 

38import warnings 

39from multiprocessing import Manager 

40 

41import networkx as nx 

42import psutil 

43from lsst.ctrl.mpexec import ( 

44 ExecutionStatus, 

45 MPGraphExecutor, 

46 MPGraphExecutorError, 

47 MPTimeoutError, 

48 QuantumExecutor, 

49 QuantumReport, 

50 SingleQuantumExecutor, 

51) 

52from lsst.ctrl.mpexec.execFixupDataId import ExecFixupDataId 

53from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

54from lsst.pipe.base import NodeId 

55from lsst.pipe.base.tests.simpleQGraph import AddTaskFactoryMock, makeSimpleQGraph 

56 

57logging.basicConfig(level=logging.DEBUG) 

58 

59_LOG = logging.getLogger(__name__) 

60 

61TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

62 

63 

64class QuantumExecutorMock(QuantumExecutor): 

65 """Mock class for QuantumExecutor""" 

66 

67 def __init__(self, mp=False): 

68 self.quanta = [] 

69 if mp: 

70 # in multiprocess mode use shared list 

71 manager = Manager() 

72 self.quanta = manager.list() 

73 self.report = None 

74 self._execute_called = False 

75 

76 def execute(self, taskDef, quantum): 

77 _LOG.debug("QuantumExecutorMock.execute: taskDef=%s dataId=%s", taskDef, quantum.dataId) 

78 self._execute_called = True 

79 if taskDef.taskClass: 

80 try: 

81 # only works for one of the TaskMock classes below 

82 taskDef.taskClass().runQuantum() 

83 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

84 except Exception as exc: 

85 self.report = QuantumReport.from_exception( 

86 exception=exc, 

87 dataId=quantum.dataId, 

88 taskLabel=taskDef.label, 

89 ) 

90 raise 

91 self.quanta.append(quantum) 

92 return quantum 

93 

94 def getReport(self): 

95 if not self._execute_called: 

96 raise RuntimeError("getReport called before execute") 

97 return self.report 

98 

99 def getDataIds(self, field): 

100 """Return values for dataId field for each visited quanta.""" 

101 return [quantum.dataId[field] for quantum in self.quanta] 

102 

103 

104class QuantumMock: 

105 """Mock equivalent of a `~lsst.daf.butler.Quantum`.""" 

106 

107 def __init__(self, dataId): 

108 self.dataId = dataId 

109 

110 def __eq__(self, other): 

111 return self.dataId == other.dataId 

112 

113 def __hash__(self): 

114 # dict.__eq__ is order-insensitive 

115 return hash(tuple(sorted(kv for kv in self.dataId.items()))) 

116 

117 

118class QuantumIterDataMock: 

119 """Simple class to mock QuantumIterData.""" 

120 

121 def __init__(self, index, taskDef, **dataId): 

122 self.index = index 

123 self.taskDef = taskDef 

124 self.quantum = QuantumMock(dataId) 

125 self.dependencies = set() 

126 self.nodeId = NodeId(index, "DummyBuildString") 

127 

128 

129class QuantumGraphMock: 

130 """Mock for quantum graph.""" 

131 

132 def __init__(self, qdata): 

133 self._graph = nx.DiGraph() 

134 previous = qdata[0] 

135 for node in qdata[1:]: 

136 self._graph.add_edge(previous, node) 

137 previous = node 

138 

139 def __iter__(self): 

140 yield from nx.topological_sort(self._graph) 

141 

142 def __len__(self): 

143 return len(self._graph) 

144 

145 def findTaskDefByLabel(self, label): 

146 for q in self: 

147 if q.taskDef.label == label: 

148 return q.taskDef 

149 

150 def getQuantaForTask(self, taskDef): 

151 nodes = self.getNodesForTask(taskDef) 

152 return {q.quantum for q in nodes} 

153 

154 def getNodesForTask(self, taskDef): 

155 quanta = set() 

156 for q in self: 

157 if q.taskDef == taskDef: 

158 quanta.add(q) 

159 return quanta 

160 

161 @property 

162 def graph(self): 

163 return self._graph 

164 

165 def findCycle(self): 

166 return [] 

167 

168 def determineInputsToQuantumNode(self, node): 

169 result = set() 

170 for n in node.dependencies: 

171 for otherNode in self: 

172 if otherNode.index == n: 

173 result.add(otherNode) 

174 return result 

175 

176 

177class TaskMockMP: 

178 """Simple mock class for task supporting multiprocessing.""" 

179 

180 canMultiprocess = True 

181 

182 def runQuantum(self): 

183 _LOG.debug("TaskMockMP.runQuantum") 

184 pass 

185 

186 

187class TaskMockFail: 

188 """Simple mock class for task which fails.""" 

189 

190 canMultiprocess = True 

191 

192 def runQuantum(self): 

193 _LOG.debug("TaskMockFail.runQuantum") 

194 raise ValueError("expected failure") 

195 

196 

197class TaskMockCrash: 

198 """Simple mock class for task which fails.""" 

199 

200 canMultiprocess = True 

201 

202 def runQuantum(self): 

203 _LOG.debug("TaskMockCrash.runQuantum") 

204 # Disable fault handler to suppress long scary traceback. 

205 faulthandler.disable() 

206 signal.raise_signal(signal.SIGILL) 

207 

208 

209class TaskMockLongSleep: 

210 """Simple mock class for task which "runs" for very long time.""" 

211 

212 canMultiprocess = True 

213 

214 def runQuantum(self): 

215 _LOG.debug("TaskMockLongSleep.runQuantum") 

216 time.sleep(100.0) 

217 

218 

219class TaskMockNoMP: 

220 """Simple mock class for task not supporting multiprocessing.""" 

221 

222 canMultiprocess = False 

223 

224 

225class TaskDefMock: 

226 """Simple mock class for task definition in a pipeline.""" 

227 

228 def __init__(self, taskName="Task", config=None, taskClass=TaskMockMP, label="task1"): 

229 self.taskName = taskName 

230 self.config = config 

231 self.taskClass = taskClass 

232 self.label = label 

233 

234 def __str__(self): 

235 return f"TaskDefMock(taskName={self.taskName}, taskClass={self.taskClass.__name__})" 

236 

237 

238def _count_status(report, status): 

239 """Count number of quanta witha a given status.""" 

240 return len([qrep for qrep in report.quantaReports if qrep.status is status]) 

241 

242 

243class MPGraphExecutorTestCase(unittest.TestCase): 

244 """A test case for MPGraphExecutor class""" 

245 

246 def test_mpexec_nomp(self): 

247 """Make simple graph and execute""" 

248 taskDef = TaskDefMock() 

249 qgraph = QuantumGraphMock( 

250 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

251 ) 

252 

253 # run in single-process mode 

254 qexec = QuantumExecutorMock() 

255 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

256 mpexec.execute(qgraph) 

257 self.assertEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

258 report = mpexec.getReport() 

259 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

260 self.assertIsNone(report.exitCode) 

261 self.assertIsNone(report.exceptionInfo) 

262 self.assertEqual(len(report.quantaReports), 3) 

263 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

264 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

265 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

266 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

267 

268 def test_mpexec_mp(self): 

269 """Make simple graph and execute""" 

270 taskDef = TaskDefMock() 

271 qgraph = QuantumGraphMock( 

272 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

273 ) 

274 

275 methods = ["spawn"] 

276 if sys.platform == "linux": 

277 methods.append("fork") 

278 methods.append("forkserver") 

279 

280 for method in methods: 

281 with self.subTest(startMethod=method): 

282 # Run in multi-process mode, the order of results is not 

283 # defined. 

284 qexec = QuantumExecutorMock(mp=True) 

285 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, startMethod=method) 

286 mpexec.execute(qgraph) 

287 self.assertCountEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

288 report = mpexec.getReport() 

289 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

290 self.assertIsNone(report.exitCode) 

291 self.assertIsNone(report.exceptionInfo) 

292 self.assertEqual(len(report.quantaReports), 3) 

293 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

294 self.assertTrue(all(qrep.exitCode == 0 for qrep in report.quantaReports)) 

295 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

296 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

297 

298 def test_mpexec_nompsupport(self): 

299 """Try to run MP for task that has no MP support which should fail""" 

300 taskDef = TaskDefMock(taskClass=TaskMockNoMP) 

301 qgraph = QuantumGraphMock( 

302 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

303 ) 

304 

305 # run in multi-process mode 

306 qexec = QuantumExecutorMock() 

307 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

308 with self.assertRaisesRegex(MPGraphExecutorError, "Task Task does not support multiprocessing"): 

309 mpexec.execute(qgraph) 

310 

311 def test_mpexec_fixup(self): 

312 """Make simple graph and execute, add dependencies by executing fixup 

313 code. 

314 """ 

315 taskDef = TaskDefMock() 

316 

317 for reverse in (False, True): 

318 qgraph = QuantumGraphMock( 

319 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

320 ) 

321 

322 qexec = QuantumExecutorMock() 

323 fixup = ExecFixupDataId("task1", "detector", reverse=reverse) 

324 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec, executionGraphFixup=fixup) 

325 mpexec.execute(qgraph) 

326 

327 expected = [0, 1, 2] 

328 if reverse: 

329 expected = list(reversed(expected)) 

330 self.assertEqual(qexec.getDataIds("detector"), expected) 

331 

332 def test_mpexec_timeout(self): 

333 """Fail due to timeout""" 

334 taskDef = TaskDefMock() 

335 taskDefSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

336 qgraph = QuantumGraphMock( 

337 [ 

338 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

339 QuantumIterDataMock(index=1, taskDef=taskDefSleep, detector=1), 

340 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

341 ] 

342 ) 

343 

344 # with failFast we'll get immediate MPTimeoutError 

345 qexec = QuantumExecutorMock(mp=True) 

346 mpexec = MPGraphExecutor(numProc=3, timeout=1, quantumExecutor=qexec, failFast=True) 

347 with self.assertRaises(MPTimeoutError): 

348 mpexec.execute(qgraph) 

349 report = mpexec.getReport() 

350 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

351 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

352 self.assertGreater(len(report.quantaReports), 0) 

353 self.assertEqual(_count_status(report, ExecutionStatus.TIMEOUT), 1) 

354 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

355 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

356 

357 # with failFast=False exception happens after last task finishes 

358 qexec = QuantumExecutorMock(mp=True) 

359 mpexec = MPGraphExecutor(numProc=3, timeout=3, quantumExecutor=qexec, failFast=False) 

360 with self.assertRaises(MPTimeoutError): 

361 mpexec.execute(qgraph) 

362 # We expect two tasks (0 and 2) to finish successfully and one task to 

363 # timeout. Unfortunately on busy CPU there is no guarantee that tasks 

364 # finish on time, so expect more timeouts and issue a warning. 

365 detectorIds = set(qexec.getDataIds("detector")) 

366 self.assertLess(len(detectorIds), 3) 

367 if detectorIds != {0, 2}: 

368 warnings.warn(f"Possibly timed out tasks, expected [0, 2], received {detectorIds}") 

369 report = mpexec.getReport() 

370 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

371 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

372 self.assertGreater(len(report.quantaReports), 0) 

373 self.assertGreater(_count_status(report, ExecutionStatus.TIMEOUT), 0) 

374 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

375 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

376 

377 def test_mpexec_failure(self): 

378 """Failure in one task should not stop other tasks""" 

379 taskDef = TaskDefMock() 

380 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

381 qgraph = QuantumGraphMock( 

382 [ 

383 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

384 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

385 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

386 ] 

387 ) 

388 

389 qexec = QuantumExecutorMock(mp=True) 

390 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

391 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

392 mpexec.execute(qgraph) 

393 self.assertCountEqual(qexec.getDataIds("detector"), [0, 2]) 

394 report = mpexec.getReport() 

395 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

396 self.assertEqual( 

397 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

398 ) 

399 self.assertGreater(len(report.quantaReports), 0) 

400 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

401 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

402 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

403 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

404 

405 def test_mpexec_failure_dep(self): 

406 """Failure in one task should skip dependents""" 

407 taskDef = TaskDefMock() 

408 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

409 qdata = [ 

410 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

411 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

412 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

413 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

414 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

415 ] 

416 qdata[2].dependencies.add(1) 

417 qdata[4].dependencies.add(3) 

418 qdata[4].dependencies.add(2) 

419 

420 qgraph = QuantumGraphMock(qdata) 

421 

422 qexec = QuantumExecutorMock(mp=True) 

423 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

424 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

425 mpexec.execute(qgraph) 

426 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

427 report = mpexec.getReport() 

428 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

429 self.assertEqual( 

430 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

431 ) 

432 # Dependencies of failed tasks do not appear in quantaReports 

433 self.assertGreater(len(report.quantaReports), 0) 

434 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

435 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

436 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

437 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

438 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

439 

440 def test_mpexec_failure_dep_nomp(self): 

441 """Failure in one task should skip dependents, in-process version""" 

442 taskDef = TaskDefMock() 

443 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

444 qdata = [ 

445 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

446 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

447 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

448 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

449 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

450 ] 

451 qdata[2].dependencies.add(1) 

452 qdata[4].dependencies.add(3) 

453 qdata[4].dependencies.add(2) 

454 

455 qgraph = QuantumGraphMock(qdata) 

456 

457 qexec = QuantumExecutorMock() 

458 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

459 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

460 mpexec.execute(qgraph) 

461 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

462 report = mpexec.getReport() 

463 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

464 self.assertEqual( 

465 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

466 ) 

467 # Dependencies of failed tasks do not appear in quantaReports 

468 self.assertGreater(len(report.quantaReports), 0) 

469 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

470 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

471 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

472 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

473 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

474 

475 def test_mpexec_failure_failfast(self): 

476 """Fast fail stops quickly. 

477 

478 Timing delay of task #3 should be sufficient to process 

479 failure and raise exception. 

480 """ 

481 taskDef = TaskDefMock() 

482 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

483 taskDefLongSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

484 qdata = [ 

485 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

486 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

487 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

488 QuantumIterDataMock(index=3, taskDef=taskDefLongSleep, detector=3), 

489 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

490 ] 

491 qdata[1].dependencies.add(0) 

492 qdata[2].dependencies.add(1) 

493 qdata[4].dependencies.add(3) 

494 qdata[4].dependencies.add(2) 

495 

496 qgraph = QuantumGraphMock(qdata) 

497 

498 qexec = QuantumExecutorMock(mp=True) 

499 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

500 with self.assertRaisesRegex(MPGraphExecutorError, "failed, exit code=1"): 

501 mpexec.execute(qgraph) 

502 self.assertCountEqual(qexec.getDataIds("detector"), [0]) 

503 report = mpexec.getReport() 

504 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

505 self.assertEqual( 

506 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

507 ) 

508 # Dependencies of failed tasks do not appear in quantaReports 

509 self.assertGreater(len(report.quantaReports), 0) 

510 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

511 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

512 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

513 

514 def test_mpexec_crash(self): 

515 """Check task crash due to signal""" 

516 taskDef = TaskDefMock() 

517 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

518 qgraph = QuantumGraphMock( 

519 [ 

520 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

521 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

522 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

523 ] 

524 ) 

525 

526 qexec = QuantumExecutorMock(mp=True) 

527 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

528 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

529 mpexec.execute(qgraph) 

530 report = mpexec.getReport() 

531 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

532 self.assertEqual( 

533 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

534 ) 

535 # Dependencies of failed tasks do not appear in quantaReports 

536 self.assertGreater(len(report.quantaReports), 0) 

537 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

538 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

539 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

540 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

541 

542 def test_mpexec_crash_failfast(self): 

543 """Check task crash due to signal with --fail-fast""" 

544 taskDef = TaskDefMock() 

545 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

546 qgraph = QuantumGraphMock( 

547 [ 

548 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

549 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

550 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

551 ] 

552 ) 

553 

554 qexec = QuantumExecutorMock(mp=True) 

555 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

556 with self.assertRaisesRegex(MPGraphExecutorError, "failed, killed by signal 4 .Illegal instruction"): 

557 mpexec.execute(qgraph) 

558 report = mpexec.getReport() 

559 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

560 self.assertEqual( 

561 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

562 ) 

563 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

564 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

565 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

566 

567 def test_mpexec_num_fd(self): 

568 """Check that number of open files stays reasonable""" 

569 taskDef = TaskDefMock() 

570 qgraph = QuantumGraphMock( 

571 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(20)] 

572 ) 

573 

574 this_proc = psutil.Process() 

575 num_fds_0 = this_proc.num_fds() 

576 

577 # run in multi-process mode, the order of results is not defined 

578 qexec = QuantumExecutorMock(mp=True) 

579 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

580 mpexec.execute(qgraph) 

581 

582 num_fds_1 = this_proc.num_fds() 

583 # They should be the same but allow small growth just in case. 

584 # Without DM-26728 fix the difference would be equal to number of 

585 # quanta (20). 

586 self.assertLess(num_fds_1 - num_fds_0, 5) 

587 

588 

589class SingleQuantumExecutorTestCase(unittest.TestCase): 

590 """Tests for SingleQuantumExecutor implementation.""" 

591 

592 instrument = "lsst.pipe.base.tests.simpleQGraph.SimpleInstrument" 

593 

594 def setUp(self): 

595 self.root = makeTestTempDir(TESTDIR) 

596 

597 def tearDown(self): 

598 removeTestTempDir(self.root) 

599 

600 def test_simple_execute(self) -> None: 

601 """Run execute() method in simplest setup.""" 

602 nQuanta = 1 

603 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

604 

605 nodes = list(qgraph) 

606 self.assertEqual(len(nodes), nQuanta) 

607 node = nodes[0] 

608 

609 taskFactory = AddTaskFactoryMock() 

610 executor = SingleQuantumExecutor(butler, taskFactory) 

611 executor.execute(node.taskDef, node.quantum) 

612 self.assertEqual(taskFactory.countExec, 1) 

613 

614 # There must be one dataset of task's output connection 

615 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

616 self.assertEqual(len(refs), 1) 

617 

618 def test_skip_existing_execute(self) -> None: 

619 """Run execute() method twice, with skip_existing_in.""" 

620 nQuanta = 1 

621 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

622 

623 nodes = list(qgraph) 

624 self.assertEqual(len(nodes), nQuanta) 

625 node = nodes[0] 

626 

627 taskFactory = AddTaskFactoryMock() 

628 executor = SingleQuantumExecutor(butler, taskFactory) 

629 executor.execute(node.taskDef, node.quantum) 

630 self.assertEqual(taskFactory.countExec, 1) 

631 

632 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

633 self.assertEqual(len(refs), 1) 

634 dataset_id_1 = refs[0].id 

635 

636 # Re-run it with skipExistingIn, it should not run. 

637 assert butler.run is not None 

638 executor = SingleQuantumExecutor(butler, taskFactory, skipExistingIn=[butler.run]) 

639 executor.execute(node.taskDef, node.quantum) 

640 self.assertEqual(taskFactory.countExec, 1) 

641 

642 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

643 self.assertEqual(len(refs), 1) 

644 dataset_id_2 = refs[0].id 

645 self.assertEqual(dataset_id_1, dataset_id_2) 

646 

647 def test_clobber_outputs_execute(self) -> None: 

648 """Run execute() method twice, with clobber_outputs.""" 

649 nQuanta = 1 

650 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

651 

652 nodes = list(qgraph) 

653 self.assertEqual(len(nodes), nQuanta) 

654 node = nodes[0] 

655 

656 taskFactory = AddTaskFactoryMock() 

657 executor = SingleQuantumExecutor(butler, taskFactory) 

658 executor.execute(node.taskDef, node.quantum) 

659 self.assertEqual(taskFactory.countExec, 1) 

660 

661 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

662 self.assertEqual(len(refs), 1) 

663 dataset_id_1 = refs[0].id 

664 

665 original_dataset = butler.get(refs[0]) 

666 

667 # Remove the dataset ourself, and replace it with something 

668 # different so we can check later whether it got replaced. 

669 butler.pruneDatasets([refs[0]], disassociate=False, unstore=True, purge=False) 

670 replacement = original_dataset + 10 

671 butler.put(replacement, refs[0]) 

672 

673 # Re-run it with clobberOutputs and skipExistingIn, it should not 

674 # clobber but should skip instead. 

675 assert butler.run is not None 

676 executor = SingleQuantumExecutor( 

677 butler, taskFactory, skipExistingIn=[butler.run], clobberOutputs=True 

678 ) 

679 executor.execute(node.taskDef, node.quantum) 

680 self.assertEqual(taskFactory.countExec, 1) 

681 

682 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

683 self.assertEqual(len(refs), 1) 

684 dataset_id_2 = refs[0].id 

685 self.assertEqual(dataset_id_1, dataset_id_2) 

686 

687 second_dataset = butler.get(refs[0]) 

688 self.assertEqual(list(second_dataset), list(replacement)) 

689 

690 # Re-run it with clobberOutputs but without skipExistingIn, it should 

691 # clobber. 

692 assert butler.run is not None 

693 executor = SingleQuantumExecutor(butler, taskFactory, clobberOutputs=True) 

694 executor.execute(node.taskDef, node.quantum) 

695 self.assertEqual(taskFactory.countExec, 2) 

696 

697 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

698 self.assertEqual(len(refs), 1) 

699 dataset_id_3 = refs[0].id 

700 

701 third_dataset = butler.get(refs[0]) 

702 self.assertEqual(list(third_dataset), list(original_dataset)) 

703 

704 # No change in UUID even after replacement 

705 self.assertEqual(dataset_id_1, dataset_id_3) 

706 

707 

708if __name__ == "__main__": 

709 unittest.main()