Coverage for tests/test_cmdLineFwk.py: 14%

496 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-14 02:17 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.ctrl.mpexec.showInfo import ShowInfo 

52from lsst.daf.butler import ( 

53 Config, 

54 DataCoordinate, 

55 DatasetRef, 

56 DimensionConfig, 

57 DimensionUniverse, 

58 Quantum, 

59 Registry, 

60) 

61from lsst.daf.butler.core.datasets.type import DatasetType 

62from lsst.daf.butler.registry import RegistryConfig 

63from lsst.pipe.base import ( 

64 Instrument, 

65 Pipeline, 

66 PipelineTaskConfig, 

67 PipelineTaskConnections, 

68 QuantumGraph, 

69 TaskDef, 

70) 

71from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

72from lsst.pipe.base.tests.simpleQGraph import ( 

73 AddTask, 

74 AddTaskFactoryMock, 

75 makeSimpleButler, 

76 makeSimplePipeline, 

77 makeSimpleQGraph, 

78 populateButler, 

79) 

80from lsst.utils.tests import temporaryDirectory 

81 

82logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

83 

84# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

85# instrument from registry, these tests can run fine without actual instrument 

86# and implementing full mock for Instrument is too complicated. 

87Instrument.fromName = lambda name, reg: None 87 ↛ exitline 87 didn't run the lambda on line 87

88 

89 

90@contextlib.contextmanager 

91def makeTmpFile(contents=None, suffix=None): 

92 """Context manager for generating temporary file name. 

93 

94 Temporary file is deleted on exiting context. 

95 

96 Parameters 

97 ---------- 

98 contents : `bytes` 

99 Data to write into a file. 

100 """ 

101 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

102 if contents: 

103 os.write(fd, contents) 

104 os.close(fd) 

105 yield tmpname 

106 with contextlib.suppress(OSError): 

107 os.remove(tmpname) 

108 

109 

110@contextlib.contextmanager 

111def makeSQLiteRegistry(create=True, universe=None): 

112 """Context manager to create new empty registry database. 

113 

114 Yields 

115 ------ 

116 config : `RegistryConfig` 

117 Registry configuration for initialized registry database. 

118 """ 

119 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

120 with temporaryDirectory() as tmpdir: 

121 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

122 config = RegistryConfig() 

123 config["db"] = uri 

124 if create: 

125 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

126 yield config 

127 

128 

129class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

130 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

131 

132 

133class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

134 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

135 

136 def setDefaults(self): 

137 PipelineTaskConfig.setDefaults(self) 

138 

139 

140def _makeArgs(registryConfig=None, **kwargs): 

141 """Return parsed command line arguments. 

142 

143 By default butler_config is set to `Config` populated with some defaults, 

144 it can be overridden completely by keyword argument. 

145 

146 Parameters 

147 ---------- 

148 cmd : `str`, optional 

149 Produce arguments for this pipetask command. 

150 registryConfig : `RegistryConfig`, optional 

151 Override for registry configuration. 

152 **kwargs 

153 Overrides for other arguments. 

154 """ 

155 # Use a mock to get the default value of arguments to 'run'. 

156 

157 mock = unittest.mock.Mock() 

158 

159 @click.command(cls=PipetaskCommand) 

160 @run_options() 

161 def fake_run(ctx, **kwargs): 

162 """Fake "pipetask run" command for gathering input arguments. 

163 

164 The arguments & options should always match the arguments & options in 

165 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

166 """ 

167 mock(**kwargs) 

168 

169 runner = click.testing.CliRunner() 

170 # --butler-config is the only required option 

171 result = runner.invoke(fake_run, "--butler-config /") 

172 if result.exit_code != 0: 

173 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

174 mock.assert_called_once() 

175 args = mock.call_args[1] 

176 args["enableLsstDebug"] = args.pop("debug") 

177 args["execution_butler_location"] = args.pop("save_execution_butler") 

178 if "pipeline_actions" not in args: 

179 args["pipeline_actions"] = [] 

180 if "mock_configs" not in args: 

181 args["mock_configs"] = [] 

182 args = SimpleNamespace(**args) 

183 

184 # override butler_config with our defaults 

185 if "butler_config" not in kwargs: 

186 args.butler_config = Config() 

187 if registryConfig: 

188 args.butler_config["registry"] = registryConfig 

189 # The default datastore has a relocatable root, so we need to specify 

190 # some root here for it to use 

191 args.butler_config.configFile = "." 

192 

193 # override arguments from keyword parameters 

194 for key, value in kwargs.items(): 

195 setattr(args, key, value) 

196 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

197 return args 

198 

199 

200class FakeDSType(NamedTuple): 

201 name: str 

202 

203 

204@dataclass(frozen=True) 

205class FakeDSRef: 

206 datasetType: str 

207 dataId: tuple 

208 

209 def isComponent(self): 

210 return False 

211 

212 

213# Task class name used by tests, needs to be importable 

214_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

215 

216 

217def _makeDimensionConfig(): 

218 """Make a simple dimension universe configuration.""" 

219 return DimensionConfig( 

220 { 

221 "version": 1, 

222 "namespace": "ctrl_mpexec_test", 

223 "skypix": { 

224 "common": "htm7", 

225 "htm": { 

226 "class": "lsst.sphgeom.HtmPixelization", 

227 "max_level": 24, 

228 }, 

229 }, 

230 "elements": { 

231 "A": { 

232 "keys": [ 

233 { 

234 "name": "id", 

235 "type": "int", 

236 } 

237 ], 

238 "storage": { 

239 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

240 }, 

241 }, 

242 "B": { 

243 "keys": [ 

244 { 

245 "name": "id", 

246 "type": "int", 

247 } 

248 ], 

249 "storage": { 

250 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

251 }, 

252 }, 

253 }, 

254 "packers": {}, 

255 } 

256 ) 

257 

258 

259def _makeQGraph(): 

260 """Make a trivial QuantumGraph with one quantum. 

261 

262 The only thing that we need to do with this quantum graph is to pickle 

263 it, the quanta in this graph are not usable for anything else. 

264 

265 Returns 

266 ------- 

267 qgraph : `~lsst.pipe.base.QuantumGraph` 

268 """ 

269 universe = DimensionUniverse(config=_makeDimensionConfig()) 

270 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

271 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

272 quanta = [ 

273 Quantum( 

274 taskName=_TASK_CLASS, 

275 inputs={ 

276 fakeDSType: [ 

277 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

278 ] 

279 }, 

280 ) 

281 ] # type: ignore 

282 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

283 return qgraph 

284 

285 

286class CmdLineFwkTestCase(unittest.TestCase): 

287 """A test case for CmdLineFwk""" 

288 

289 def testMakePipeline(self): 

290 """Tests for CmdLineFwk.makePipeline method""" 

291 fwk = CmdLineFwk() 

292 

293 # make empty pipeline 

294 args = _makeArgs() 

295 pipeline = fwk.makePipeline(args) 

296 self.assertIsInstance(pipeline, Pipeline) 

297 self.assertEqual(len(pipeline), 0) 

298 

299 # few tests with serialization 

300 with makeTmpFile() as tmpname: 

301 # make empty pipeline and store it in a file 

302 args = _makeArgs(save_pipeline=tmpname) 

303 pipeline = fwk.makePipeline(args) 

304 self.assertIsInstance(pipeline, Pipeline) 

305 

306 # read pipeline from a file 

307 args = _makeArgs(pipeline=tmpname) 

308 pipeline = fwk.makePipeline(args) 

309 self.assertIsInstance(pipeline, Pipeline) 

310 self.assertEqual(len(pipeline), 0) 

311 

312 # single task pipeline, task name can be anything here 

313 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

314 args = _makeArgs(pipeline_actions=actions) 

315 pipeline = fwk.makePipeline(args) 

316 self.assertIsInstance(pipeline, Pipeline) 

317 self.assertEqual(len(pipeline), 1) 

318 

319 # many task pipeline 

320 actions = [ 

321 _ACTION_ADD_TASK("TaskOne:task1a"), 

322 _ACTION_ADD_TASK("TaskTwo:task2"), 

323 _ACTION_ADD_TASK("TaskOne:task1b"), 

324 ] 

325 args = _makeArgs(pipeline_actions=actions) 

326 pipeline = fwk.makePipeline(args) 

327 self.assertIsInstance(pipeline, Pipeline) 

328 self.assertEqual(len(pipeline), 3) 

329 

330 # single task pipeline with config overrides, need real task class 

331 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

332 args = _makeArgs(pipeline_actions=actions) 

333 pipeline = fwk.makePipeline(args) 

334 taskDefs = list(pipeline.toExpandedPipeline()) 

335 self.assertEqual(len(taskDefs), 1) 

336 self.assertEqual(taskDefs[0].config.addend, 100) 

337 

338 overrides = b"config.addend = 1000\n" 

339 with makeTmpFile(overrides) as tmpname: 

340 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

341 args = _makeArgs(pipeline_actions=actions) 

342 pipeline = fwk.makePipeline(args) 

343 taskDefs = list(pipeline.toExpandedPipeline()) 

344 self.assertEqual(len(taskDefs), 1) 

345 self.assertEqual(taskDefs[0].config.addend, 1000) 

346 

347 # Check --instrument option, for now it only checks that it does not 

348 # crash. 

349 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

350 args = _makeArgs(pipeline_actions=actions) 

351 pipeline = fwk.makePipeline(args) 

352 

353 def testMakeGraphFromSave(self): 

354 """Tests for CmdLineFwk.makeGraph method. 

355 

356 Only most trivial case is tested that does not do actual graph 

357 building. 

358 """ 

359 fwk = CmdLineFwk() 

360 

361 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

362 # make non-empty graph and store it in a file 

363 qgraph = _makeQGraph() 

364 with open(tmpname, "wb") as saveFile: 

365 qgraph.save(saveFile) 

366 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

367 qgraph = fwk.makeGraph(None, args) 

368 self.assertIsInstance(qgraph, QuantumGraph) 

369 self.assertEqual(len(qgraph), 1) 

370 

371 # will fail if graph id does not match 

372 args = _makeArgs( 

373 qgraph=tmpname, 

374 qgraph_id="R2-D2 is that you?", 

375 registryConfig=registryConfig, 

376 execution_butler_location=None, 

377 ) 

378 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

379 fwk.makeGraph(None, args) 

380 

381 # save with wrong object type 

382 with open(tmpname, "wb") as saveFile: 

383 pickle.dump({}, saveFile) 

384 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

385 with self.assertRaises(ValueError): 

386 fwk.makeGraph(None, args) 

387 

388 # reading empty graph from pickle should work but makeGraph() 

389 # will return None. 

390 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

391 with open(tmpname, "wb") as saveFile: 

392 qgraph.save(saveFile) 

393 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

394 qgraph = fwk.makeGraph(None, args) 

395 self.assertIs(qgraph, None) 

396 

397 def testShowPipeline(self): 

398 """Test for --show options for pipeline.""" 

399 fwk = CmdLineFwk() 

400 

401 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

402 args = _makeArgs(pipeline_actions=actions) 

403 pipeline = fwk.makePipeline(args) 

404 

405 with self.assertRaises(ValueError): 

406 ShowInfo(["unrecognized", "config"]) 

407 

408 stream = StringIO() 

409 show = ShowInfo( 

410 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

411 stream=stream, 

412 ) 

413 show.show_pipeline_info(pipeline) 

414 self.assertEqual(show.unhandled, frozenset({})) 

415 stream.seek(0) 

416 output = stream.read() 

417 self.assertIn("config.addend=100", output) # config option 

418 self.assertIn("addend\n3", output) # History output 

419 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

420 

421 show = ShowInfo(["pipeline", "uri"], stream=stream) 

422 show.show_pipeline_info(pipeline) 

423 self.assertEqual(show.unhandled, frozenset({"uri"})) 

424 self.assertEqual(show.handled, {"pipeline"}) 

425 

426 stream = StringIO() 

427 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

428 show.show_pipeline_info(pipeline) 

429 stream.seek(0) 

430 output = stream.read().strip() 

431 self.assertEqual("### Configuration for task `task'", output) 

432 

433 stream = StringIO() 

434 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

435 show.show_pipeline_info(pipeline) 

436 stream.seek(0) 

437 output = stream.read().strip() 

438 self.assertEqual("### Configuration for task `task'", output) 

439 

440 stream = StringIO() 

441 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

442 show.show_pipeline_info(pipeline) 

443 stream.seek(0) 

444 output = stream.read().strip() 

445 self.assertIn("NOIGNORECASE", output) 

446 

447 show = ShowInfo(["dump-config=notask"]) 

448 with self.assertRaises(ValueError) as cm: 

449 show.show_pipeline_info(pipeline) 

450 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

451 

452 show = ShowInfo(["history"]) 

453 with self.assertRaises(ValueError) as cm: 

454 show.show_pipeline_info(pipeline) 

455 self.assertIn("Please provide a value", str(cm.exception)) 

456 

457 show = ShowInfo(["history=notask::param"]) 

458 with self.assertRaises(ValueError) as cm: 

459 show.show_pipeline_info(pipeline) 

460 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

461 

462 

463class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

464 """A test case for CmdLineFwk""" 

465 

466 def setUp(self): 

467 super().setUpClass() 

468 self.root = tempfile.mkdtemp() 

469 self.nQuanta = 5 

470 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

471 

472 def tearDown(self): 

473 shutil.rmtree(self.root, ignore_errors=True) 

474 super().tearDownClass() 

475 

476 def testSimpleQGraph(self): 

477 """Test successfull execution of trivial quantum graph.""" 

478 args = _makeArgs(butler_config=self.root, input="test", output="output") 

479 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

480 populateButler(self.pipeline, butler) 

481 

482 fwk = CmdLineFwk() 

483 taskFactory = AddTaskFactoryMock() 

484 

485 qgraph = fwk.makeGraph(self.pipeline, args) 

486 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

487 self.assertEqual(len(qgraph), self.nQuanta) 

488 

489 # run whole thing 

490 fwk.runPipeline(qgraph, taskFactory, args) 

491 self.assertEqual(taskFactory.countExec, self.nQuanta) 

492 

493 # test that we've disabled implicit threading 

494 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

495 

496 def testEmptyQGraph(self): 

497 """Test that making an empty QG produces the right error messages.""" 

498 # We make QG generation fail by populating one input collection in the 

499 # butler while using a different one (that we only register, not 

500 # populate) to make the QG. 

501 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

502 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

503 butler.registry.registerCollection("bad_input") 

504 populateButler(self.pipeline, butler) 

505 

506 fwk = CmdLineFwk() 

507 with self.assertLogs(level=logging.CRITICAL) as cm: 

508 qgraph = fwk.makeGraph(self.pipeline, args) 

509 self.assertRegex( 

510 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

511 ) 

512 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

513 self.assertIsNone(qgraph) 

514 

515 def testSimpleQGraphNoSkipExisting_inputs(self): 

516 """Test for case when output data for one task already appears in 

517 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

518 option is present. 

519 """ 

520 args = _makeArgs( 

521 butler_config=self.root, 

522 input="test", 

523 output="output", 

524 ) 

525 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

526 populateButler( 

527 self.pipeline, 

528 butler, 

529 datasetTypes={ 

530 args.input: [ 

531 "add_dataset0", 

532 "add_dataset1", 

533 "add2_dataset1", 

534 "add_init_output1", 

535 "task0_config", 

536 "task0_metadata", 

537 "task0_log", 

538 ] 

539 }, 

540 ) 

541 

542 fwk = CmdLineFwk() 

543 taskFactory = AddTaskFactoryMock() 

544 

545 qgraph = fwk.makeGraph(self.pipeline, args) 

546 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

547 # With current implementation graph has all nQuanta quanta, but when 

548 # executing one quantum is skipped. 

549 self.assertEqual(len(qgraph), self.nQuanta) 

550 

551 # run whole thing 

552 fwk.runPipeline(qgraph, taskFactory, args) 

553 self.assertEqual(taskFactory.countExec, self.nQuanta) 

554 

555 def testSimpleQGraphSkipExisting_inputs(self): 

556 """Test for ``--skip-existing`` with output data for one task already 

557 appears in _input_ collection. No ``--extend-run`` option is needed 

558 for this case. 

559 """ 

560 args = _makeArgs( 

561 butler_config=self.root, 

562 input="test", 

563 output="output", 

564 skip_existing_in=("test",), 

565 ) 

566 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

567 populateButler( 

568 self.pipeline, 

569 butler, 

570 datasetTypes={ 

571 args.input: [ 

572 "add_dataset0", 

573 "add_dataset1", 

574 "add2_dataset1", 

575 "add_init_output1", 

576 "task0_config", 

577 "task0_metadata", 

578 "task0_log", 

579 ] 

580 }, 

581 ) 

582 

583 fwk = CmdLineFwk() 

584 taskFactory = AddTaskFactoryMock() 

585 

586 qgraph = fwk.makeGraph(self.pipeline, args) 

587 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

588 self.assertEqual(len(qgraph), self.nQuanta - 1) 

589 

590 # run whole thing 

591 fwk.runPipeline(qgraph, taskFactory, args) 

592 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

593 

594 def testSimpleQGraphSkipExisting_outputs(self): 

595 """Test for ``--skip-existing`` with output data for one task already 

596 appears in _output_ collection. The ``--extend-run`` option is needed 

597 for this case. 

598 """ 

599 args = _makeArgs( 

600 butler_config=self.root, 

601 input="test", 

602 output_run="output/run", 

603 skip_existing_in=("output/run",), 

604 ) 

605 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

606 populateButler( 

607 self.pipeline, 

608 butler, 

609 datasetTypes={ 

610 args.input: ["add_dataset0"], 

611 args.output_run: [ 

612 "add_dataset1", 

613 "add2_dataset1", 

614 "add_init_output1", 

615 "task0_metadata", 

616 "task0_log", 

617 ], 

618 }, 

619 ) 

620 

621 fwk = CmdLineFwk() 

622 taskFactory = AddTaskFactoryMock() 

623 

624 # fails without --extend-run 

625 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

626 qgraph = fwk.makeGraph(self.pipeline, args) 

627 

628 # retry with --extend-run 

629 args.extend_run = True 

630 qgraph = fwk.makeGraph(self.pipeline, args) 

631 

632 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

633 # Graph does not include quantum for first task 

634 self.assertEqual(len(qgraph), self.nQuanta - 1) 

635 

636 # run whole thing 

637 fwk.runPipeline(qgraph, taskFactory, args) 

638 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

639 

640 def testSimpleQGraphOutputsFail(self): 

641 """Test continuing execution of trivial quantum graph with partial 

642 outputs. 

643 """ 

644 args = _makeArgs(butler_config=self.root, input="test", output="output") 

645 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

646 populateButler(self.pipeline, butler) 

647 

648 fwk = CmdLineFwk() 

649 taskFactory = AddTaskFactoryMock(stopAt=3) 

650 

651 qgraph = fwk.makeGraph(self.pipeline, args) 

652 self.assertEqual(len(qgraph), self.nQuanta) 

653 

654 # run first three quanta 

655 with self.assertRaises(MPGraphExecutorError): 

656 fwk.runPipeline(qgraph, taskFactory, args) 

657 self.assertEqual(taskFactory.countExec, 3) 

658 

659 butler.registry.refresh() 

660 

661 # drop one of the two outputs from one task 

662 ref1 = butler.registry.findDataset( 

663 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

664 ) 

665 self.assertIsNotNone(ref1) 

666 # also drop the metadata output 

667 ref2 = butler.registry.findDataset( 

668 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

669 ) 

670 self.assertIsNotNone(ref2) 

671 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

672 

673 taskFactory.stopAt = -1 

674 args.skip_existing_in = (args.output,) 

675 args.extend_run = True 

676 args.no_versions = True 

677 with self.assertRaises(MPGraphExecutorError): 

678 fwk.runPipeline(qgraph, taskFactory, args) 

679 

680 def testSimpleQGraphClobberOutputs(self): 

681 """Test continuing execution of trivial quantum graph with 

682 --clobber-outputs. 

683 """ 

684 args = _makeArgs(butler_config=self.root, input="test", output="output") 

685 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

686 populateButler(self.pipeline, butler) 

687 

688 fwk = CmdLineFwk() 

689 taskFactory = AddTaskFactoryMock(stopAt=3) 

690 

691 qgraph = fwk.makeGraph(self.pipeline, args) 

692 

693 # should have one task and number of quanta 

694 self.assertEqual(len(qgraph), self.nQuanta) 

695 

696 # run first three quanta 

697 with self.assertRaises(MPGraphExecutorError): 

698 fwk.runPipeline(qgraph, taskFactory, args) 

699 self.assertEqual(taskFactory.countExec, 3) 

700 

701 butler.registry.refresh() 

702 

703 # drop one of the two outputs from one task 

704 ref1 = butler.registry.findDataset( 

705 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

706 ) 

707 self.assertIsNotNone(ref1) 

708 # also drop the metadata output 

709 ref2 = butler.registry.findDataset( 

710 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

711 ) 

712 self.assertIsNotNone(ref2) 

713 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

714 

715 taskFactory.stopAt = -1 

716 args.skip_existing = True 

717 args.extend_run = True 

718 args.clobber_outputs = True 

719 args.no_versions = True 

720 fwk.runPipeline(qgraph, taskFactory, args) 

721 # number of executed quanta is incremented 

722 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

723 

724 def testSimpleQGraphReplaceRun(self): 

725 """Test repeated execution of trivial quantum graph with 

726 --replace-run. 

727 """ 

728 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

729 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

730 populateButler(self.pipeline, butler) 

731 

732 fwk = CmdLineFwk() 

733 taskFactory = AddTaskFactoryMock() 

734 

735 qgraph = fwk.makeGraph(self.pipeline, args) 

736 

737 # should have one task and number of quanta 

738 self.assertEqual(len(qgraph), self.nQuanta) 

739 

740 # deep copy is needed because quanta are updated in place 

741 fwk.runPipeline(qgraph, taskFactory, args) 

742 self.assertEqual(taskFactory.countExec, self.nQuanta) 

743 

744 # need to refresh collections explicitly (or make new butler/registry) 

745 butler.registry.refresh() 

746 collections = set(butler.registry.queryCollections(...)) 

747 self.assertEqual(collections, {"test", "output", "output/run1"}) 

748 

749 # number of datasets written by pipeline: 

750 # - nQuanta of init_outputs 

751 # - nQuanta of configs 

752 # - packages (single dataset) 

753 # - nQuanta * two output datasets 

754 # - nQuanta of metadata 

755 # - nQuanta of log output 

756 n_outputs = self.nQuanta * 6 + 1 

757 refs = butler.registry.queryDatasets(..., collections="output/run1") 

758 self.assertEqual(len(list(refs)), n_outputs) 

759 

760 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

761 # changed) 

762 args.replace_run = True 

763 args.output_run = "output/run2" 

764 qgraph = fwk.makeGraph(self.pipeline, args) 

765 fwk.runPipeline(qgraph, taskFactory, args) 

766 

767 butler.registry.refresh() 

768 collections = set(butler.registry.queryCollections(...)) 

769 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

770 

771 # new output collection 

772 refs = butler.registry.queryDatasets(..., collections="output/run2") 

773 self.assertEqual(len(list(refs)), n_outputs) 

774 

775 # old output collection is still there 

776 refs = butler.registry.queryDatasets(..., collections="output/run1") 

777 self.assertEqual(len(list(refs)), n_outputs) 

778 

779 # re-run with --replace-run and --prune-replaced=unstore 

780 args.replace_run = True 

781 args.prune_replaced = "unstore" 

782 args.output_run = "output/run3" 

783 qgraph = fwk.makeGraph(self.pipeline, args) 

784 fwk.runPipeline(qgraph, taskFactory, args) 

785 

786 butler.registry.refresh() 

787 collections = set(butler.registry.queryCollections(...)) 

788 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

789 

790 # new output collection 

791 refs = butler.registry.queryDatasets(..., collections="output/run3") 

792 self.assertEqual(len(list(refs)), n_outputs) 

793 

794 # old output collection is still there, and it has all datasets but 

795 # non-InitOutputs are not in datastore 

796 refs = butler.registry.queryDatasets(..., collections="output/run2") 

797 refs = list(refs) 

798 self.assertEqual(len(refs), n_outputs) 

799 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

800 for ref in refs: 

801 if initOutNameRe.fullmatch(ref.datasetType.name): 

802 butler.get(ref, collections="output/run2") 

803 else: 

804 with self.assertRaises(FileNotFoundError): 

805 butler.get(ref, collections="output/run2") 

806 

807 # re-run with --replace-run and --prune-replaced=purge 

808 # This time also remove --input; passing the same inputs that we 

809 # started with and not passing inputs at all should be equivalent. 

810 args.input = None 

811 args.replace_run = True 

812 args.prune_replaced = "purge" 

813 args.output_run = "output/run4" 

814 qgraph = fwk.makeGraph(self.pipeline, args) 

815 fwk.runPipeline(qgraph, taskFactory, args) 

816 

817 butler.registry.refresh() 

818 collections = set(butler.registry.queryCollections(...)) 

819 # output/run3 should disappear now 

820 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

821 

822 # new output collection 

823 refs = butler.registry.queryDatasets(..., collections="output/run4") 

824 self.assertEqual(len(list(refs)), n_outputs) 

825 

826 # Trying to run again with inputs that aren't exactly what we started 

827 # with is an error, and the kind that should not modify the data repo. 

828 with self.assertRaises(ValueError): 

829 args.input = ["test", "output/run2"] 

830 args.prune_replaced = None 

831 args.replace_run = True 

832 args.output_run = "output/run5" 

833 qgraph = fwk.makeGraph(self.pipeline, args) 

834 fwk.runPipeline(qgraph, taskFactory, args) 

835 butler.registry.refresh() 

836 collections = set(butler.registry.queryCollections(...)) 

837 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

838 with self.assertRaises(ValueError): 

839 args.input = ["output/run2", "test"] 

840 args.prune_replaced = None 

841 args.replace_run = True 

842 args.output_run = "output/run6" 

843 qgraph = fwk.makeGraph(self.pipeline, args) 

844 fwk.runPipeline(qgraph, taskFactory, args) 

845 butler.registry.refresh() 

846 collections = set(butler.registry.queryCollections(...)) 

847 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

848 

849 def testMockTask(self): 

850 """Test --mock option.""" 

851 args = _makeArgs( 

852 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

853 ) 

854 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

855 populateButler(self.pipeline, butler) 

856 

857 fwk = CmdLineFwk() 

858 taskFactory = AddTaskFactoryMock() 

859 

860 qgraph = fwk.makeGraph(self.pipeline, args) 

861 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

862 self.assertEqual(len(qgraph), self.nQuanta) 

863 

864 # run whole thing 

865 fwk.runPipeline(qgraph, taskFactory, args) 

866 # None of the actual tasks is executed 

867 self.assertEqual(taskFactory.countExec, 0) 

868 

869 # check dataset types 

870 butler.registry.refresh() 

871 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

872 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

873 

874 def testMockTaskFailure(self): 

875 """Test --mock option and configure one of the tasks to fail.""" 

876 args = _makeArgs( 

877 butler_config=self.root, 

878 input="test", 

879 output="output", 

880 mock=True, 

881 register_dataset_types=True, 

882 mock_configs=[ 

883 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

884 ], 

885 fail_fast=True, 

886 ) 

887 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

888 populateButler(self.pipeline, butler) 

889 

890 fwk = CmdLineFwk() 

891 taskFactory = AddTaskFactoryMock() 

892 

893 qgraph = fwk.makeGraph(self.pipeline, args) 

894 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

895 self.assertEqual(len(qgraph), self.nQuanta) 

896 

897 with self.assertRaises(MPGraphExecutorError) as cm: 

898 fwk.runPipeline(qgraph, taskFactory, args) 

899 

900 self.assertIsNotNone(cm.exception.__cause__) 

901 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

902 

903 def testSubgraph(self): 

904 """Test successful execution of trivial quantum graph.""" 

905 args = _makeArgs(butler_config=self.root, input="test", output="output") 

906 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

907 populateButler(self.pipeline, butler) 

908 

909 fwk = CmdLineFwk() 

910 qgraph = fwk.makeGraph(self.pipeline, args) 

911 

912 # Select first two nodes for execution. This depends on node ordering 

913 # which I assume is the same as execution order. 

914 nNodes = 2 

915 nodeIds = [node.nodeId for node in qgraph] 

916 nodeIds = nodeIds[:nNodes] 

917 

918 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

919 self.assertEqual(len(qgraph), self.nQuanta) 

920 

921 with ( 

922 makeTmpFile(suffix=".qgraph") as tmpname, 

923 makeSQLiteRegistry(universe=butler.registry.dimensions) as registryConfig, 

924 ): 

925 with open(tmpname, "wb") as saveFile: 

926 qgraph.save(saveFile) 

927 

928 args = _makeArgs( 

929 qgraph=tmpname, 

930 qgraph_node_id=nodeIds, 

931 registryConfig=registryConfig, 

932 execution_butler_location=None, 

933 ) 

934 fwk = CmdLineFwk() 

935 

936 # load graph, should only read a subset 

937 qgraph = fwk.makeGraph(pipeline=None, args=args) 

938 self.assertEqual(len(qgraph), nNodes) 

939 

940 def testShowGraph(self): 

941 """Test for --show options for quantum graph.""" 

942 nQuanta = 2 

943 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

944 

945 show = ShowInfo(["graph"]) 

946 show.show_graph_info(qgraph) 

947 self.assertEqual(show.handled, {"graph"}) 

948 

949 def testShowGraphWorkflow(self): 

950 nQuanta = 2 

951 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

952 

953 show = ShowInfo(["workflow"]) 

954 show.show_graph_info(qgraph) 

955 self.assertEqual(show.handled, {"workflow"}) 

956 

957 # TODO: cannot test "uri" option presently, it instantiates 

958 # butler from command line options and there is no way to pass butler 

959 # mock to that code. 

960 show = ShowInfo(["uri"]) 

961 with self.assertRaises(ValueError): # No args given 

962 show.show_graph_info(qgraph) 

963 

964 def testSimpleQGraphDatastoreRecords(self): 

965 """Test quantum graph generation with --qgraph-datastore-records.""" 

966 args = _makeArgs( 

967 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

968 ) 

969 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

970 populateButler(self.pipeline, butler) 

971 

972 fwk = CmdLineFwk() 

973 qgraph = fwk.makeGraph(self.pipeline, args) 

974 self.assertEqual(len(qgraph), self.nQuanta) 

975 for i, qnode in enumerate(qgraph): 

976 quantum = qnode.quantum 

977 self.assertIsNotNone(quantum.datastore_records) 

978 # only the first quantum has a pre-existing input 

979 if i == 0: 

980 datastore_name = "FileDatastore@<butlerRoot>" 

981 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

982 records_data = quantum.datastore_records[datastore_name] 

983 records = dict(records_data.records) 

984 self.assertEqual(len(records), 1) 

985 _, records = records.popitem() 

986 records = records["file_datastore_records"] 

987 self.assertEqual( 

988 [record.path for record in records], 

989 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

990 ) 

991 else: 

992 self.assertEqual(quantum.datastore_records, {}) 

993 

994 

995class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

996 pass 

997 

998 

999def setup_module(module): 

1000 lsst.utils.tests.init() 

1001 

1002 

1003if __name__ == "__main__": 1003 ↛ 1004line 1003 didn't jump to line 1004, because the condition on line 1003 was never true

1004 lsst.utils.tests.init() 

1005 unittest.main()