Coverage for tests/test_executors.py: 15%

416 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:56 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import faulthandler 

26import logging 

27import os 

28import signal 

29import sys 

30import time 

31import unittest 

32import warnings 

33from multiprocessing import Manager 

34 

35import networkx as nx 

36import psutil 

37from lsst.ctrl.mpexec import ( 

38 ExecutionStatus, 

39 MPGraphExecutor, 

40 MPGraphExecutorError, 

41 MPTimeoutError, 

42 QuantumExecutor, 

43 QuantumReport, 

44 SingleQuantumExecutor, 

45) 

46from lsst.ctrl.mpexec.execFixupDataId import ExecFixupDataId 

47from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

48from lsst.pipe.base import NodeId 

49from lsst.pipe.base.tests.simpleQGraph import AddTaskFactoryMock, makeSimpleQGraph 

50 

51logging.basicConfig(level=logging.DEBUG) 

52 

53_LOG = logging.getLogger(__name__) 

54 

55TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

56 

57 

58class QuantumExecutorMock(QuantumExecutor): 

59 """Mock class for QuantumExecutor""" 

60 

61 def __init__(self, mp=False): 

62 self.quanta = [] 

63 if mp: 

64 # in multiprocess mode use shared list 

65 manager = Manager() 

66 self.quanta = manager.list() 

67 self.report = None 

68 self._execute_called = False 

69 

70 def execute(self, taskDef, quantum): 

71 _LOG.debug("QuantumExecutorMock.execute: taskDef=%s dataId=%s", taskDef, quantum.dataId) 

72 self._execute_called = True 

73 if taskDef.taskClass: 

74 try: 

75 # only works for one of the TaskMock classes below 

76 taskDef.taskClass().runQuantum() 

77 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

78 except Exception as exc: 

79 self.report = QuantumReport.from_exception( 

80 exception=exc, 

81 dataId=quantum.dataId, 

82 taskLabel=taskDef.label, 

83 ) 

84 raise 

85 self.quanta.append(quantum) 

86 return quantum 

87 

88 def getReport(self): 

89 if not self._execute_called: 

90 raise RuntimeError("getReport called before execute") 

91 return self.report 

92 

93 def getDataIds(self, field): 

94 """Return values for dataId field for each visited quanta.""" 

95 return [quantum.dataId[field] for quantum in self.quanta] 

96 

97 

98class QuantumMock: 

99 """Mock equivalent of a `~lsst.daf.butler.Quantum`.""" 

100 

101 def __init__(self, dataId): 

102 self.dataId = dataId 

103 

104 def __eq__(self, other): 

105 return self.dataId == other.dataId 

106 

107 def __hash__(self): 

108 # dict.__eq__ is order-insensitive 

109 return hash(tuple(sorted(kv for kv in self.dataId.items()))) 

110 

111 

112class QuantumIterDataMock: 

113 """Simple class to mock QuantumIterData.""" 

114 

115 def __init__(self, index, taskDef, **dataId): 

116 self.index = index 

117 self.taskDef = taskDef 

118 self.quantum = QuantumMock(dataId) 

119 self.dependencies = set() 

120 self.nodeId = NodeId(index, "DummyBuildString") 

121 

122 

123class QuantumGraphMock: 

124 """Mock for quantum graph.""" 

125 

126 def __init__(self, qdata): 

127 self._graph = nx.DiGraph() 

128 previous = qdata[0] 

129 for node in qdata[1:]: 

130 self._graph.add_edge(previous, node) 

131 previous = node 

132 

133 def __iter__(self): 

134 yield from nx.topological_sort(self._graph) 

135 

136 def __len__(self): 

137 return len(self._graph) 

138 

139 def findTaskDefByLabel(self, label): 

140 for q in self: 

141 if q.taskDef.label == label: 

142 return q.taskDef 

143 

144 def getQuantaForTask(self, taskDef): 

145 nodes = self.getNodesForTask(taskDef) 

146 return {q.quantum for q in nodes} 

147 

148 def getNodesForTask(self, taskDef): 

149 quanta = set() 

150 for q in self: 

151 if q.taskDef == taskDef: 

152 quanta.add(q) 

153 return quanta 

154 

155 @property 

156 def graph(self): 

157 return self._graph 

158 

159 def findCycle(self): 

160 return [] 

161 

162 def determineInputsToQuantumNode(self, node): 

163 result = set() 

164 for n in node.dependencies: 

165 for otherNode in self: 

166 if otherNode.index == n: 

167 result.add(otherNode) 

168 return result 

169 

170 

171class TaskMockMP: 

172 """Simple mock class for task supporting multiprocessing.""" 

173 

174 canMultiprocess = True 

175 

176 def runQuantum(self): 

177 _LOG.debug("TaskMockMP.runQuantum") 

178 pass 

179 

180 

181class TaskMockFail: 

182 """Simple mock class for task which fails.""" 

183 

184 canMultiprocess = True 

185 

186 def runQuantum(self): 

187 _LOG.debug("TaskMockFail.runQuantum") 

188 raise ValueError("expected failure") 

189 

190 

191class TaskMockCrash: 

192 """Simple mock class for task which fails.""" 

193 

194 canMultiprocess = True 

195 

196 def runQuantum(self): 

197 _LOG.debug("TaskMockCrash.runQuantum") 

198 # Disable fault handler to suppress long scary traceback. 

199 faulthandler.disable() 

200 signal.raise_signal(signal.SIGILL) 

201 

202 

203class TaskMockLongSleep: 

204 """Simple mock class for task which "runs" for very long time.""" 

205 

206 canMultiprocess = True 

207 

208 def runQuantum(self): 

209 _LOG.debug("TaskMockLongSleep.runQuantum") 

210 time.sleep(100.0) 

211 

212 

213class TaskMockNoMP: 

214 """Simple mock class for task not supporting multiprocessing.""" 

215 

216 canMultiprocess = False 

217 

218 

219class TaskDefMock: 

220 """Simple mock class for task definition in a pipeline.""" 

221 

222 def __init__(self, taskName="Task", config=None, taskClass=TaskMockMP, label="task1"): 

223 self.taskName = taskName 

224 self.config = config 

225 self.taskClass = taskClass 

226 self.label = label 

227 

228 def __str__(self): 

229 return f"TaskDefMock(taskName={self.taskName}, taskClass={self.taskClass.__name__})" 

230 

231 

232def _count_status(report, status): 

233 """Count number of quanta witha a given status.""" 

234 return len([qrep for qrep in report.quantaReports if qrep.status is status]) 

235 

236 

237class MPGraphExecutorTestCase(unittest.TestCase): 

238 """A test case for MPGraphExecutor class""" 

239 

240 def test_mpexec_nomp(self): 

241 """Make simple graph and execute""" 

242 taskDef = TaskDefMock() 

243 qgraph = QuantumGraphMock( 

244 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

245 ) 

246 

247 # run in single-process mode 

248 qexec = QuantumExecutorMock() 

249 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

250 mpexec.execute(qgraph) 

251 self.assertEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

252 report = mpexec.getReport() 

253 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

254 self.assertIsNone(report.exitCode) 

255 self.assertIsNone(report.exceptionInfo) 

256 self.assertEqual(len(report.quantaReports), 3) 

257 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

258 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

259 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

260 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

261 

262 def test_mpexec_mp(self): 

263 """Make simple graph and execute""" 

264 taskDef = TaskDefMock() 

265 qgraph = QuantumGraphMock( 

266 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

267 ) 

268 

269 methods = ["spawn"] 

270 if sys.platform == "linux": 

271 methods.append("fork") 

272 methods.append("forkserver") 

273 

274 for method in methods: 

275 with self.subTest(startMethod=method): 

276 # Run in multi-process mode, the order of results is not 

277 # defined. 

278 qexec = QuantumExecutorMock(mp=True) 

279 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, startMethod=method) 

280 mpexec.execute(qgraph) 

281 self.assertCountEqual(qexec.getDataIds("detector"), [0, 1, 2]) 

282 report = mpexec.getReport() 

283 self.assertEqual(report.status, ExecutionStatus.SUCCESS) 

284 self.assertIsNone(report.exitCode) 

285 self.assertIsNone(report.exceptionInfo) 

286 self.assertEqual(len(report.quantaReports), 3) 

287 self.assertTrue(all(qrep.status == ExecutionStatus.SUCCESS for qrep in report.quantaReports)) 

288 self.assertTrue(all(qrep.exitCode == 0 for qrep in report.quantaReports)) 

289 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

290 self.assertTrue(all(qrep.taskLabel == "task1" for qrep in report.quantaReports)) 

291 

292 def test_mpexec_nompsupport(self): 

293 """Try to run MP for task that has no MP support which should fail""" 

294 taskDef = TaskDefMock(taskClass=TaskMockNoMP) 

295 qgraph = QuantumGraphMock( 

296 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

297 ) 

298 

299 # run in multi-process mode 

300 qexec = QuantumExecutorMock() 

301 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

302 with self.assertRaisesRegex(MPGraphExecutorError, "Task Task does not support multiprocessing"): 

303 mpexec.execute(qgraph) 

304 

305 def test_mpexec_fixup(self): 

306 """Make simple graph and execute, add dependencies by executing fixup 

307 code. 

308 """ 

309 taskDef = TaskDefMock() 

310 

311 for reverse in (False, True): 

312 qgraph = QuantumGraphMock( 

313 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(3)] 

314 ) 

315 

316 qexec = QuantumExecutorMock() 

317 fixup = ExecFixupDataId("task1", "detector", reverse=reverse) 

318 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec, executionGraphFixup=fixup) 

319 mpexec.execute(qgraph) 

320 

321 expected = [0, 1, 2] 

322 if reverse: 

323 expected = list(reversed(expected)) 

324 self.assertEqual(qexec.getDataIds("detector"), expected) 

325 

326 def test_mpexec_timeout(self): 

327 """Fail due to timeout""" 

328 taskDef = TaskDefMock() 

329 taskDefSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

330 qgraph = QuantumGraphMock( 

331 [ 

332 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

333 QuantumIterDataMock(index=1, taskDef=taskDefSleep, detector=1), 

334 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

335 ] 

336 ) 

337 

338 # with failFast we'll get immediate MPTimeoutError 

339 qexec = QuantumExecutorMock(mp=True) 

340 mpexec = MPGraphExecutor(numProc=3, timeout=1, quantumExecutor=qexec, failFast=True) 

341 with self.assertRaises(MPTimeoutError): 

342 mpexec.execute(qgraph) 

343 report = mpexec.getReport() 

344 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

345 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

346 self.assertGreater(len(report.quantaReports), 0) 

347 self.assertEqual(_count_status(report, ExecutionStatus.TIMEOUT), 1) 

348 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

349 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

350 

351 # with failFast=False exception happens after last task finishes 

352 qexec = QuantumExecutorMock(mp=True) 

353 mpexec = MPGraphExecutor(numProc=3, timeout=3, quantumExecutor=qexec, failFast=False) 

354 with self.assertRaises(MPTimeoutError): 

355 mpexec.execute(qgraph) 

356 # We expect two tasks (0 and 2) to finish successfully and one task to 

357 # timeout. Unfortunately on busy CPU there is no guarantee that tasks 

358 # finish on time, so expect more timeouts and issue a warning. 

359 detectorIds = set(qexec.getDataIds("detector")) 

360 self.assertLess(len(detectorIds), 3) 

361 if detectorIds != {0, 2}: 

362 warnings.warn(f"Possibly timed out tasks, expected [0, 2], received {detectorIds}") 

363 report = mpexec.getReport() 

364 self.assertEqual(report.status, ExecutionStatus.TIMEOUT) 

365 self.assertEqual(report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPTimeoutError") 

366 self.assertGreater(len(report.quantaReports), 0) 

367 self.assertGreater(_count_status(report, ExecutionStatus.TIMEOUT), 0) 

368 self.assertTrue(any(qrep.exitCode < 0 for qrep in report.quantaReports)) 

369 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

370 

371 def test_mpexec_failure(self): 

372 """Failure in one task should not stop other tasks""" 

373 taskDef = TaskDefMock() 

374 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

375 qgraph = QuantumGraphMock( 

376 [ 

377 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

378 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

379 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

380 ] 

381 ) 

382 

383 qexec = QuantumExecutorMock(mp=True) 

384 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

385 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

386 mpexec.execute(qgraph) 

387 self.assertCountEqual(qexec.getDataIds("detector"), [0, 2]) 

388 report = mpexec.getReport() 

389 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

390 self.assertEqual( 

391 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

392 ) 

393 self.assertGreater(len(report.quantaReports), 0) 

394 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

395 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

396 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

397 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

398 

399 def test_mpexec_failure_dep(self): 

400 """Failure in one task should skip dependents""" 

401 taskDef = TaskDefMock() 

402 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

403 qdata = [ 

404 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

405 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

406 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

407 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

408 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

409 ] 

410 qdata[2].dependencies.add(1) 

411 qdata[4].dependencies.add(3) 

412 qdata[4].dependencies.add(2) 

413 

414 qgraph = QuantumGraphMock(qdata) 

415 

416 qexec = QuantumExecutorMock(mp=True) 

417 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

418 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

419 mpexec.execute(qgraph) 

420 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

421 report = mpexec.getReport() 

422 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

423 self.assertEqual( 

424 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

425 ) 

426 # Dependencies of failed tasks do not appear in quantaReports 

427 self.assertGreater(len(report.quantaReports), 0) 

428 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

429 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

430 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

431 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

432 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

433 

434 def test_mpexec_failure_dep_nomp(self): 

435 """Failure in one task should skip dependents, in-process version""" 

436 taskDef = TaskDefMock() 

437 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

438 qdata = [ 

439 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

440 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

441 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

442 QuantumIterDataMock(index=3, taskDef=taskDef, detector=3), 

443 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

444 ] 

445 qdata[2].dependencies.add(1) 

446 qdata[4].dependencies.add(3) 

447 qdata[4].dependencies.add(2) 

448 

449 qgraph = QuantumGraphMock(qdata) 

450 

451 qexec = QuantumExecutorMock() 

452 mpexec = MPGraphExecutor(numProc=1, timeout=100, quantumExecutor=qexec) 

453 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

454 mpexec.execute(qgraph) 

455 self.assertCountEqual(qexec.getDataIds("detector"), [0, 3]) 

456 report = mpexec.getReport() 

457 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

458 self.assertEqual( 

459 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

460 ) 

461 # Dependencies of failed tasks do not appear in quantaReports 

462 self.assertGreater(len(report.quantaReports), 0) 

463 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

464 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

465 self.assertEqual(_count_status(report, ExecutionStatus.SKIPPED), 2) 

466 self.assertTrue(all(qrep.exitCode is None for qrep in report.quantaReports)) 

467 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

468 

469 def test_mpexec_failure_failfast(self): 

470 """Fast fail stops quickly. 

471 

472 Timing delay of task #3 should be sufficient to process 

473 failure and raise exception. 

474 """ 

475 taskDef = TaskDefMock() 

476 taskDefFail = TaskDefMock(taskClass=TaskMockFail) 

477 taskDefLongSleep = TaskDefMock(taskClass=TaskMockLongSleep) 

478 qdata = [ 

479 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

480 QuantumIterDataMock(index=1, taskDef=taskDefFail, detector=1), 

481 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

482 QuantumIterDataMock(index=3, taskDef=taskDefLongSleep, detector=3), 

483 QuantumIterDataMock(index=4, taskDef=taskDef, detector=4), 

484 ] 

485 qdata[1].dependencies.add(0) 

486 qdata[2].dependencies.add(1) 

487 qdata[4].dependencies.add(3) 

488 qdata[4].dependencies.add(2) 

489 

490 qgraph = QuantumGraphMock(qdata) 

491 

492 qexec = QuantumExecutorMock(mp=True) 

493 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

494 with self.assertRaisesRegex(MPGraphExecutorError, "failed, exit code=1"): 

495 mpexec.execute(qgraph) 

496 self.assertCountEqual(qexec.getDataIds("detector"), [0]) 

497 report = mpexec.getReport() 

498 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

499 self.assertEqual( 

500 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

501 ) 

502 # Dependencies of failed tasks do not appear in quantaReports 

503 self.assertGreater(len(report.quantaReports), 0) 

504 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

505 self.assertTrue(any(qrep.exitCode > 0 for qrep in report.quantaReports)) 

506 self.assertTrue(any(qrep.exceptionInfo is not None for qrep in report.quantaReports)) 

507 

508 def test_mpexec_crash(self): 

509 """Check task crash due to signal""" 

510 taskDef = TaskDefMock() 

511 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

512 qgraph = QuantumGraphMock( 

513 [ 

514 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

515 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

516 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

517 ] 

518 ) 

519 

520 qexec = QuantumExecutorMock(mp=True) 

521 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

522 with self.assertRaisesRegex(MPGraphExecutorError, "One or more tasks failed"): 

523 mpexec.execute(qgraph) 

524 report = mpexec.getReport() 

525 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

526 self.assertEqual( 

527 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

528 ) 

529 # Dependencies of failed tasks do not appear in quantaReports 

530 self.assertGreater(len(report.quantaReports), 0) 

531 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

532 self.assertEqual(_count_status(report, ExecutionStatus.SUCCESS), 2) 

533 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

534 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

535 

536 def test_mpexec_crash_failfast(self): 

537 """Check task crash due to signal with --fail-fast""" 

538 taskDef = TaskDefMock() 

539 taskDefCrash = TaskDefMock(taskClass=TaskMockCrash) 

540 qgraph = QuantumGraphMock( 

541 [ 

542 QuantumIterDataMock(index=0, taskDef=taskDef, detector=0), 

543 QuantumIterDataMock(index=1, taskDef=taskDefCrash, detector=1), 

544 QuantumIterDataMock(index=2, taskDef=taskDef, detector=2), 

545 ] 

546 ) 

547 

548 qexec = QuantumExecutorMock(mp=True) 

549 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec, failFast=True) 

550 with self.assertRaisesRegex(MPGraphExecutorError, "failed, killed by signal 4 .Illegal instruction"): 

551 mpexec.execute(qgraph) 

552 report = mpexec.getReport() 

553 self.assertEqual(report.status, ExecutionStatus.FAILURE) 

554 self.assertEqual( 

555 report.exceptionInfo.className, "lsst.ctrl.mpexec.mpGraphExecutor.MPGraphExecutorError" 

556 ) 

557 self.assertEqual(_count_status(report, ExecutionStatus.FAILURE), 1) 

558 self.assertTrue(any(qrep.exitCode == -signal.SIGILL for qrep in report.quantaReports)) 

559 self.assertTrue(all(qrep.exceptionInfo is None for qrep in report.quantaReports)) 

560 

561 def test_mpexec_num_fd(self): 

562 """Check that number of open files stays reasonable""" 

563 taskDef = TaskDefMock() 

564 qgraph = QuantumGraphMock( 

565 [QuantumIterDataMock(index=i, taskDef=taskDef, detector=i) for i in range(20)] 

566 ) 

567 

568 this_proc = psutil.Process() 

569 num_fds_0 = this_proc.num_fds() 

570 

571 # run in multi-process mode, the order of results is not defined 

572 qexec = QuantumExecutorMock(mp=True) 

573 mpexec = MPGraphExecutor(numProc=3, timeout=100, quantumExecutor=qexec) 

574 mpexec.execute(qgraph) 

575 

576 num_fds_1 = this_proc.num_fds() 

577 # They should be the same but allow small growth just in case. 

578 # Without DM-26728 fix the difference would be equal to number of 

579 # quanta (20). 

580 self.assertLess(num_fds_1 - num_fds_0, 5) 

581 

582 

583class SingleQuantumExecutorTestCase(unittest.TestCase): 

584 """Tests for SingleQuantumExecutor implementation.""" 

585 

586 instrument = "lsst.pipe.base.tests.simpleQGraph.SimpleInstrument" 

587 

588 def setUp(self): 

589 self.root = makeTestTempDir(TESTDIR) 

590 

591 def tearDown(self): 

592 removeTestTempDir(self.root) 

593 

594 def test_simple_execute(self) -> None: 

595 """Run execute() method in simplest setup.""" 

596 nQuanta = 1 

597 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

598 

599 nodes = list(qgraph) 

600 self.assertEqual(len(nodes), nQuanta) 

601 node = nodes[0] 

602 

603 taskFactory = AddTaskFactoryMock() 

604 executor = SingleQuantumExecutor(butler, taskFactory) 

605 executor.execute(node.taskDef, node.quantum) 

606 self.assertEqual(taskFactory.countExec, 1) 

607 

608 # There must be one dataset of task's output connection 

609 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

610 self.assertEqual(len(refs), 1) 

611 

612 def test_skip_existing_execute(self) -> None: 

613 """Run execute() method twice, with skip_existing_in.""" 

614 nQuanta = 1 

615 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

616 

617 nodes = list(qgraph) 

618 self.assertEqual(len(nodes), nQuanta) 

619 node = nodes[0] 

620 

621 taskFactory = AddTaskFactoryMock() 

622 executor = SingleQuantumExecutor(butler, taskFactory) 

623 executor.execute(node.taskDef, node.quantum) 

624 self.assertEqual(taskFactory.countExec, 1) 

625 

626 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

627 self.assertEqual(len(refs), 1) 

628 dataset_id_1 = refs[0].id 

629 

630 # Re-run it with skipExistingIn, it should not run. 

631 assert butler.run is not None 

632 executor = SingleQuantumExecutor(butler, taskFactory, skipExistingIn=[butler.run]) 

633 executor.execute(node.taskDef, node.quantum) 

634 self.assertEqual(taskFactory.countExec, 1) 

635 

636 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

637 self.assertEqual(len(refs), 1) 

638 dataset_id_2 = refs[0].id 

639 self.assertEqual(dataset_id_1, dataset_id_2) 

640 

641 def test_clobber_outputs_execute(self) -> None: 

642 """Run execute() method twice, with clobber_outputs.""" 

643 nQuanta = 1 

644 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, instrument=self.instrument) 

645 

646 nodes = list(qgraph) 

647 self.assertEqual(len(nodes), nQuanta) 

648 node = nodes[0] 

649 

650 taskFactory = AddTaskFactoryMock() 

651 executor = SingleQuantumExecutor(butler, taskFactory) 

652 executor.execute(node.taskDef, node.quantum) 

653 self.assertEqual(taskFactory.countExec, 1) 

654 

655 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

656 self.assertEqual(len(refs), 1) 

657 dataset_id_1 = refs[0].id 

658 

659 original_dataset = butler.get(refs[0]) 

660 

661 # Remove the dataset ourself, and replace it with something 

662 # different so we can check later whether it got replaced. 

663 butler.pruneDatasets([refs[0]], disassociate=False, unstore=True, purge=False) 

664 replacement = original_dataset + 10 

665 butler.put(replacement, refs[0]) 

666 

667 # Re-run it with clobberOutputs and skipExistingIn, it should not 

668 # clobber but should skip instead. 

669 assert butler.run is not None 

670 executor = SingleQuantumExecutor( 

671 butler, taskFactory, skipExistingIn=[butler.run], clobberOutputs=True 

672 ) 

673 executor.execute(node.taskDef, node.quantum) 

674 self.assertEqual(taskFactory.countExec, 1) 

675 

676 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

677 self.assertEqual(len(refs), 1) 

678 dataset_id_2 = refs[0].id 

679 self.assertEqual(dataset_id_1, dataset_id_2) 

680 

681 second_dataset = butler.get(refs[0]) 

682 self.assertEqual(list(second_dataset), list(replacement)) 

683 

684 # Re-run it with clobberOutputs but without skipExistingIn, it should 

685 # clobber. 

686 assert butler.run is not None 

687 executor = SingleQuantumExecutor(butler, taskFactory, clobberOutputs=True) 

688 executor.execute(node.taskDef, node.quantum) 

689 self.assertEqual(taskFactory.countExec, 2) 

690 

691 refs = list(butler.registry.queryDatasets("add_dataset1", collections=butler.run)) 

692 self.assertEqual(len(refs), 1) 

693 dataset_id_3 = refs[0].id 

694 

695 third_dataset = butler.get(refs[0]) 

696 self.assertEqual(list(third_dataset), list(original_dataset)) 

697 

698 # No change in UUID even after replacement 

699 self.assertEqual(dataset_id_1, dataset_id_3) 

700 

701 

702if __name__ == "__main__": 

703 unittest.main()