Coverage for tests/test_cmdLineFwk.py: 14%

496 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 10:22 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.ctrl.mpexec.showInfo import ShowInfo 

52from lsst.daf.butler import ( 

53 Config, 

54 DataCoordinate, 

55 DatasetRef, 

56 DimensionConfig, 

57 DimensionUniverse, 

58 Quantum, 

59 Registry, 

60) 

61from lsst.daf.butler.core.datasets.type import DatasetType 

62from lsst.daf.butler.registry import RegistryConfig 

63from lsst.pipe.base import ( 

64 Instrument, 

65 Pipeline, 

66 PipelineTaskConfig, 

67 PipelineTaskConnections, 

68 QuantumGraph, 

69 TaskDef, 

70) 

71from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

72from lsst.pipe.base.tests.simpleQGraph import ( 

73 AddTask, 

74 AddTaskFactoryMock, 

75 makeSimpleButler, 

76 makeSimplePipeline, 

77 makeSimpleQGraph, 

78 populateButler, 

79) 

80from lsst.utils.tests import temporaryDirectory 

81 

82logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

83 

84# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

85# instrument from registry, these tests can run fine without actual instrument 

86# and implementing full mock for Instrument is too complicated. 

87Instrument.fromName = lambda name, reg: None 87 ↛ exitline 87 didn't run the lambda on line 87

88 

89 

90@contextlib.contextmanager 

91def makeTmpFile(contents=None, suffix=None): 

92 """Context manager for generating temporary file name. 

93 

94 Temporary file is deleted on exiting context. 

95 

96 Parameters 

97 ---------- 

98 contents : `bytes` 

99 Data to write into a file. 

100 """ 

101 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

102 if contents: 

103 os.write(fd, contents) 

104 os.close(fd) 

105 yield tmpname 

106 with contextlib.suppress(OSError): 

107 os.remove(tmpname) 

108 

109 

110@contextlib.contextmanager 

111def makeSQLiteRegistry(create=True, universe=None): 

112 """Context manager to create new empty registry database. 

113 

114 Yields 

115 ------ 

116 config : `RegistryConfig` 

117 Registry configuration for initialized registry database. 

118 """ 

119 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

120 with temporaryDirectory() as tmpdir: 

121 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

122 config = RegistryConfig() 

123 config["db"] = uri 

124 if create: 

125 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

126 yield config 

127 

128 

129class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

130 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

131 

132 

133class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

134 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

135 

136 def setDefaults(self): 

137 PipelineTaskConfig.setDefaults(self) 

138 

139 

140def _makeArgs(registryConfig=None, **kwargs): 

141 """Return parsed command line arguments. 

142 

143 By default butler_config is set to `Config` populated with some defaults, 

144 it can be overridden completely by keyword argument. 

145 

146 Parameters 

147 ---------- 

148 cmd : `str`, optional 

149 Produce arguments for this pipetask command. 

150 registryConfig : `RegistryConfig`, optional 

151 Override for registry configuration. 

152 **kwargs 

153 Overrides for other arguments. 

154 """ 

155 # Use a mock to get the default value of arguments to 'run'. 

156 

157 mock = unittest.mock.Mock() 

158 

159 @click.command(cls=PipetaskCommand) 

160 @run_options() 

161 def fake_run(ctx, **kwargs): 

162 """Fake "pipetask run" command for gathering input arguments. 

163 

164 The arguments & options should always match the arguments & options in 

165 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

166 """ 

167 mock(**kwargs) 

168 

169 runner = click.testing.CliRunner() 

170 # --butler-config is the only required option 

171 result = runner.invoke(fake_run, "--butler-config /") 

172 if result.exit_code != 0: 

173 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

174 mock.assert_called_once() 

175 args = mock.call_args[1] 

176 args["enableLsstDebug"] = args.pop("debug") 

177 args["execution_butler_location"] = args.pop("save_execution_butler") 

178 if "pipeline_actions" not in args: 

179 args["pipeline_actions"] = [] 

180 if "mock_configs" not in args: 

181 args["mock_configs"] = [] 

182 args = SimpleNamespace(**args) 

183 

184 # override butler_config with our defaults 

185 if "butler_config" not in kwargs: 

186 args.butler_config = Config() 

187 if registryConfig: 

188 args.butler_config["registry"] = registryConfig 

189 # The default datastore has a relocatable root, so we need to specify 

190 # some root here for it to use 

191 args.butler_config.configFile = "." 

192 

193 # override arguments from keyword parameters 

194 for key, value in kwargs.items(): 

195 setattr(args, key, value) 

196 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

197 return args 

198 

199 

200class FakeDSType(NamedTuple): 

201 name: str 

202 

203 

204@dataclass(frozen=True) 

205class FakeDSRef: 

206 datasetType: str 

207 dataId: tuple 

208 

209 def isComponent(self): 

210 return False 

211 

212 

213# Task class name used by tests, needs to be importable 

214_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

215 

216 

217def _makeDimensionConfig(): 

218 """Make a simple dimension universe configuration.""" 

219 return DimensionConfig( 

220 { 

221 "version": 1, 

222 "namespace": "ctrl_mpexec_test", 

223 "skypix": { 

224 "common": "htm7", 

225 "htm": { 

226 "class": "lsst.sphgeom.HtmPixelization", 

227 "max_level": 24, 

228 }, 

229 }, 

230 "elements": { 

231 "A": { 

232 "keys": [ 

233 { 

234 "name": "id", 

235 "type": "int", 

236 } 

237 ], 

238 "storage": { 

239 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

240 }, 

241 }, 

242 "B": { 

243 "keys": [ 

244 { 

245 "name": "id", 

246 "type": "int", 

247 } 

248 ], 

249 "storage": { 

250 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

251 }, 

252 }, 

253 }, 

254 "packers": {}, 

255 } 

256 ) 

257 

258 

259def _makeQGraph(): 

260 """Make a trivial QuantumGraph with one quantum. 

261 

262 The only thing that we need to do with this quantum graph is to pickle 

263 it, the quanta in this graph are not usable for anything else. 

264 

265 Returns 

266 ------- 

267 qgraph : `~lsst.pipe.base.QuantumGraph` 

268 """ 

269 universe = DimensionUniverse(config=_makeDimensionConfig()) 

270 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

271 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

272 quanta = [ 

273 Quantum( 

274 taskName=_TASK_CLASS, 

275 inputs={ 

276 fakeDSType: [ 

277 DatasetRef( 

278 fakeDSType, 

279 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

280 run="fake_run", 

281 ) 

282 ] 

283 }, 

284 ) 

285 ] # type: ignore 

286 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

287 return qgraph 

288 

289 

290class CmdLineFwkTestCase(unittest.TestCase): 

291 """A test case for CmdLineFwk""" 

292 

293 def testMakePipeline(self): 

294 """Tests for CmdLineFwk.makePipeline method""" 

295 fwk = CmdLineFwk() 

296 

297 # make empty pipeline 

298 args = _makeArgs() 

299 pipeline = fwk.makePipeline(args) 

300 self.assertIsInstance(pipeline, Pipeline) 

301 self.assertEqual(len(pipeline), 0) 

302 

303 # few tests with serialization 

304 with makeTmpFile() as tmpname: 

305 # make empty pipeline and store it in a file 

306 args = _makeArgs(save_pipeline=tmpname) 

307 pipeline = fwk.makePipeline(args) 

308 self.assertIsInstance(pipeline, Pipeline) 

309 

310 # read pipeline from a file 

311 args = _makeArgs(pipeline=tmpname) 

312 pipeline = fwk.makePipeline(args) 

313 self.assertIsInstance(pipeline, Pipeline) 

314 self.assertEqual(len(pipeline), 0) 

315 

316 # single task pipeline, task name can be anything here 

317 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

318 args = _makeArgs(pipeline_actions=actions) 

319 pipeline = fwk.makePipeline(args) 

320 self.assertIsInstance(pipeline, Pipeline) 

321 self.assertEqual(len(pipeline), 1) 

322 

323 # many task pipeline 

324 actions = [ 

325 _ACTION_ADD_TASK("TaskOne:task1a"), 

326 _ACTION_ADD_TASK("TaskTwo:task2"), 

327 _ACTION_ADD_TASK("TaskOne:task1b"), 

328 ] 

329 args = _makeArgs(pipeline_actions=actions) 

330 pipeline = fwk.makePipeline(args) 

331 self.assertIsInstance(pipeline, Pipeline) 

332 self.assertEqual(len(pipeline), 3) 

333 

334 # single task pipeline with config overrides, need real task class 

335 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

336 args = _makeArgs(pipeline_actions=actions) 

337 pipeline = fwk.makePipeline(args) 

338 taskDefs = list(pipeline.toExpandedPipeline()) 

339 self.assertEqual(len(taskDefs), 1) 

340 self.assertEqual(taskDefs[0].config.addend, 100) 

341 

342 overrides = b"config.addend = 1000\n" 

343 with makeTmpFile(overrides) as tmpname: 

344 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

345 args = _makeArgs(pipeline_actions=actions) 

346 pipeline = fwk.makePipeline(args) 

347 taskDefs = list(pipeline.toExpandedPipeline()) 

348 self.assertEqual(len(taskDefs), 1) 

349 self.assertEqual(taskDefs[0].config.addend, 1000) 

350 

351 # Check --instrument option, for now it only checks that it does not 

352 # crash. 

353 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

354 args = _makeArgs(pipeline_actions=actions) 

355 pipeline = fwk.makePipeline(args) 

356 

357 def testMakeGraphFromSave(self): 

358 """Tests for CmdLineFwk.makeGraph method. 

359 

360 Only most trivial case is tested that does not do actual graph 

361 building. 

362 """ 

363 fwk = CmdLineFwk() 

364 

365 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

366 # make non-empty graph and store it in a file 

367 qgraph = _makeQGraph() 

368 with open(tmpname, "wb") as saveFile: 

369 qgraph.save(saveFile) 

370 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

371 qgraph = fwk.makeGraph(None, args) 

372 self.assertIsInstance(qgraph, QuantumGraph) 

373 self.assertEqual(len(qgraph), 1) 

374 

375 # will fail if graph id does not match 

376 args = _makeArgs( 

377 qgraph=tmpname, 

378 qgraph_id="R2-D2 is that you?", 

379 registryConfig=registryConfig, 

380 execution_butler_location=None, 

381 ) 

382 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

383 fwk.makeGraph(None, args) 

384 

385 # save with wrong object type 

386 with open(tmpname, "wb") as saveFile: 

387 pickle.dump({}, saveFile) 

388 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

389 with self.assertRaises(ValueError): 

390 fwk.makeGraph(None, args) 

391 

392 # reading empty graph from pickle should work but makeGraph() 

393 # will return None. 

394 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

395 with open(tmpname, "wb") as saveFile: 

396 qgraph.save(saveFile) 

397 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

398 qgraph = fwk.makeGraph(None, args) 

399 self.assertIs(qgraph, None) 

400 

401 def testShowPipeline(self): 

402 """Test for --show options for pipeline.""" 

403 fwk = CmdLineFwk() 

404 

405 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

406 args = _makeArgs(pipeline_actions=actions) 

407 pipeline = fwk.makePipeline(args) 

408 

409 with self.assertRaises(ValueError): 

410 ShowInfo(["unrecognized", "config"]) 

411 

412 stream = StringIO() 

413 show = ShowInfo( 

414 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

415 stream=stream, 

416 ) 

417 show.show_pipeline_info(pipeline) 

418 self.assertEqual(show.unhandled, frozenset({})) 

419 stream.seek(0) 

420 output = stream.read() 

421 self.assertIn("config.addend=100", output) # config option 

422 self.assertIn("addend\n3", output) # History output 

423 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

424 

425 show = ShowInfo(["pipeline", "uri"], stream=stream) 

426 show.show_pipeline_info(pipeline) 

427 self.assertEqual(show.unhandled, frozenset({"uri"})) 

428 self.assertEqual(show.handled, {"pipeline"}) 

429 

430 stream = StringIO() 

431 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

432 show.show_pipeline_info(pipeline) 

433 stream.seek(0) 

434 output = stream.read().strip() 

435 self.assertEqual("### Configuration for task `task'", output) 

436 

437 stream = StringIO() 

438 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

439 show.show_pipeline_info(pipeline) 

440 stream.seek(0) 

441 output = stream.read().strip() 

442 self.assertEqual("### Configuration for task `task'", output) 

443 

444 stream = StringIO() 

445 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

446 show.show_pipeline_info(pipeline) 

447 stream.seek(0) 

448 output = stream.read().strip() 

449 self.assertIn("NOIGNORECASE", output) 

450 

451 show = ShowInfo(["dump-config=notask"]) 

452 with self.assertRaises(ValueError) as cm: 

453 show.show_pipeline_info(pipeline) 

454 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

455 

456 show = ShowInfo(["history"]) 

457 with self.assertRaises(ValueError) as cm: 

458 show.show_pipeline_info(pipeline) 

459 self.assertIn("Please provide a value", str(cm.exception)) 

460 

461 show = ShowInfo(["history=notask::param"]) 

462 with self.assertRaises(ValueError) as cm: 

463 show.show_pipeline_info(pipeline) 

464 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

465 

466 

467class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

468 """A test case for CmdLineFwk""" 

469 

470 def setUp(self): 

471 super().setUpClass() 

472 self.root = tempfile.mkdtemp() 

473 self.nQuanta = 5 

474 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

475 

476 def tearDown(self): 

477 shutil.rmtree(self.root, ignore_errors=True) 

478 super().tearDownClass() 

479 

480 def testSimpleQGraph(self): 

481 """Test successfull execution of trivial quantum graph.""" 

482 args = _makeArgs(butler_config=self.root, input="test", output="output") 

483 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

484 populateButler(self.pipeline, butler) 

485 

486 fwk = CmdLineFwk() 

487 taskFactory = AddTaskFactoryMock() 

488 

489 qgraph = fwk.makeGraph(self.pipeline, args) 

490 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

491 self.assertEqual(len(qgraph), self.nQuanta) 

492 

493 # run whole thing 

494 fwk.runPipeline(qgraph, taskFactory, args) 

495 self.assertEqual(taskFactory.countExec, self.nQuanta) 

496 

497 # test that we've disabled implicit threading 

498 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

499 

500 def testEmptyQGraph(self): 

501 """Test that making an empty QG produces the right error messages.""" 

502 # We make QG generation fail by populating one input collection in the 

503 # butler while using a different one (that we only register, not 

504 # populate) to make the QG. 

505 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

506 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

507 butler.registry.registerCollection("bad_input") 

508 populateButler(self.pipeline, butler) 

509 

510 fwk = CmdLineFwk() 

511 with self.assertLogs(level=logging.CRITICAL) as cm: 

512 qgraph = fwk.makeGraph(self.pipeline, args) 

513 self.assertRegex( 

514 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

515 ) 

516 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

517 self.assertIsNone(qgraph) 

518 

519 def testSimpleQGraphNoSkipExisting_inputs(self): 

520 """Test for case when output data for one task already appears in 

521 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

522 option is present. 

523 """ 

524 args = _makeArgs( 

525 butler_config=self.root, 

526 input="test", 

527 output="output", 

528 ) 

529 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

530 populateButler( 

531 self.pipeline, 

532 butler, 

533 datasetTypes={ 

534 args.input: [ 

535 "add_dataset0", 

536 "add_dataset1", 

537 "add2_dataset1", 

538 "add_init_output1", 

539 "task0_config", 

540 "task0_metadata", 

541 "task0_log", 

542 ] 

543 }, 

544 ) 

545 

546 fwk = CmdLineFwk() 

547 taskFactory = AddTaskFactoryMock() 

548 

549 qgraph = fwk.makeGraph(self.pipeline, args) 

550 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

551 # With current implementation graph has all nQuanta quanta, but when 

552 # executing one quantum is skipped. 

553 self.assertEqual(len(qgraph), self.nQuanta) 

554 

555 # run whole thing 

556 fwk.runPipeline(qgraph, taskFactory, args) 

557 self.assertEqual(taskFactory.countExec, self.nQuanta) 

558 

559 def testSimpleQGraphSkipExisting_inputs(self): 

560 """Test for ``--skip-existing`` with output data for one task already 

561 appears in _input_ collection. No ``--extend-run`` option is needed 

562 for this case. 

563 """ 

564 args = _makeArgs( 

565 butler_config=self.root, 

566 input="test", 

567 output="output", 

568 skip_existing_in=("test",), 

569 ) 

570 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

571 populateButler( 

572 self.pipeline, 

573 butler, 

574 datasetTypes={ 

575 args.input: [ 

576 "add_dataset0", 

577 "add_dataset1", 

578 "add2_dataset1", 

579 "add_init_output1", 

580 "task0_config", 

581 "task0_metadata", 

582 "task0_log", 

583 ] 

584 }, 

585 ) 

586 

587 fwk = CmdLineFwk() 

588 taskFactory = AddTaskFactoryMock() 

589 

590 qgraph = fwk.makeGraph(self.pipeline, args) 

591 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

592 self.assertEqual(len(qgraph), self.nQuanta - 1) 

593 

594 # run whole thing 

595 fwk.runPipeline(qgraph, taskFactory, args) 

596 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

597 

598 def testSimpleQGraphSkipExisting_outputs(self): 

599 """Test for ``--skip-existing`` with output data for one task already 

600 appears in _output_ collection. The ``--extend-run`` option is needed 

601 for this case. 

602 """ 

603 args = _makeArgs( 

604 butler_config=self.root, 

605 input="test", 

606 output_run="output/run", 

607 skip_existing_in=("output/run",), 

608 ) 

609 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

610 populateButler( 

611 self.pipeline, 

612 butler, 

613 datasetTypes={ 

614 args.input: ["add_dataset0"], 

615 args.output_run: [ 

616 "add_dataset1", 

617 "add2_dataset1", 

618 "add_init_output1", 

619 "task0_metadata", 

620 "task0_log", 

621 ], 

622 }, 

623 ) 

624 

625 fwk = CmdLineFwk() 

626 taskFactory = AddTaskFactoryMock() 

627 

628 # fails without --extend-run 

629 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

630 qgraph = fwk.makeGraph(self.pipeline, args) 

631 

632 # retry with --extend-run 

633 args.extend_run = True 

634 qgraph = fwk.makeGraph(self.pipeline, args) 

635 

636 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

637 # Graph does not include quantum for first task 

638 self.assertEqual(len(qgraph), self.nQuanta - 1) 

639 

640 # run whole thing 

641 fwk.runPipeline(qgraph, taskFactory, args) 

642 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

643 

644 def testSimpleQGraphOutputsFail(self): 

645 """Test continuing execution of trivial quantum graph with partial 

646 outputs. 

647 """ 

648 args = _makeArgs(butler_config=self.root, input="test", output="output") 

649 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

650 populateButler(self.pipeline, butler) 

651 

652 fwk = CmdLineFwk() 

653 taskFactory = AddTaskFactoryMock(stopAt=3) 

654 

655 qgraph = fwk.makeGraph(self.pipeline, args) 

656 self.assertEqual(len(qgraph), self.nQuanta) 

657 

658 # run first three quanta 

659 with self.assertRaises(MPGraphExecutorError): 

660 fwk.runPipeline(qgraph, taskFactory, args) 

661 self.assertEqual(taskFactory.countExec, 3) 

662 

663 butler.registry.refresh() 

664 

665 # drop one of the two outputs from one task 

666 ref1 = butler.registry.findDataset( 

667 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

668 ) 

669 self.assertIsNotNone(ref1) 

670 # also drop the metadata output 

671 ref2 = butler.registry.findDataset( 

672 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

673 ) 

674 self.assertIsNotNone(ref2) 

675 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

676 

677 taskFactory.stopAt = -1 

678 args.skip_existing_in = (args.output,) 

679 args.extend_run = True 

680 args.no_versions = True 

681 with self.assertRaises(MPGraphExecutorError): 

682 fwk.runPipeline(qgraph, taskFactory, args) 

683 

684 def testSimpleQGraphClobberOutputs(self): 

685 """Test continuing execution of trivial quantum graph with 

686 --clobber-outputs. 

687 """ 

688 args = _makeArgs(butler_config=self.root, input="test", output="output") 

689 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

690 populateButler(self.pipeline, butler) 

691 

692 fwk = CmdLineFwk() 

693 taskFactory = AddTaskFactoryMock(stopAt=3) 

694 

695 qgraph = fwk.makeGraph(self.pipeline, args) 

696 

697 # should have one task and number of quanta 

698 self.assertEqual(len(qgraph), self.nQuanta) 

699 

700 # run first three quanta 

701 with self.assertRaises(MPGraphExecutorError): 

702 fwk.runPipeline(qgraph, taskFactory, args) 

703 self.assertEqual(taskFactory.countExec, 3) 

704 

705 butler.registry.refresh() 

706 

707 # drop one of the two outputs from one task 

708 ref1 = butler.registry.findDataset( 

709 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

710 ) 

711 self.assertIsNotNone(ref1) 

712 # also drop the metadata output 

713 ref2 = butler.registry.findDataset( 

714 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

715 ) 

716 self.assertIsNotNone(ref2) 

717 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

718 

719 taskFactory.stopAt = -1 

720 args.skip_existing = True 

721 args.extend_run = True 

722 args.clobber_outputs = True 

723 args.no_versions = True 

724 fwk.runPipeline(qgraph, taskFactory, args) 

725 # number of executed quanta is incremented 

726 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

727 

728 def testSimpleQGraphReplaceRun(self): 

729 """Test repeated execution of trivial quantum graph with 

730 --replace-run. 

731 """ 

732 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

733 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

734 populateButler(self.pipeline, butler) 

735 

736 fwk = CmdLineFwk() 

737 taskFactory = AddTaskFactoryMock() 

738 

739 qgraph = fwk.makeGraph(self.pipeline, args) 

740 

741 # should have one task and number of quanta 

742 self.assertEqual(len(qgraph), self.nQuanta) 

743 

744 # deep copy is needed because quanta are updated in place 

745 fwk.runPipeline(qgraph, taskFactory, args) 

746 self.assertEqual(taskFactory.countExec, self.nQuanta) 

747 

748 # need to refresh collections explicitly (or make new butler/registry) 

749 butler.registry.refresh() 

750 collections = set(butler.registry.queryCollections(...)) 

751 self.assertEqual(collections, {"test", "output", "output/run1"}) 

752 

753 # number of datasets written by pipeline: 

754 # - nQuanta of init_outputs 

755 # - nQuanta of configs 

756 # - packages (single dataset) 

757 # - nQuanta * two output datasets 

758 # - nQuanta of metadata 

759 # - nQuanta of log output 

760 n_outputs = self.nQuanta * 6 + 1 

761 refs = butler.registry.queryDatasets(..., collections="output/run1") 

762 self.assertEqual(len(list(refs)), n_outputs) 

763 

764 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

765 # changed) 

766 args.replace_run = True 

767 args.output_run = "output/run2" 

768 qgraph = fwk.makeGraph(self.pipeline, args) 

769 fwk.runPipeline(qgraph, taskFactory, args) 

770 

771 butler.registry.refresh() 

772 collections = set(butler.registry.queryCollections(...)) 

773 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

774 

775 # new output collection 

776 refs = butler.registry.queryDatasets(..., collections="output/run2") 

777 self.assertEqual(len(list(refs)), n_outputs) 

778 

779 # old output collection is still there 

780 refs = butler.registry.queryDatasets(..., collections="output/run1") 

781 self.assertEqual(len(list(refs)), n_outputs) 

782 

783 # re-run with --replace-run and --prune-replaced=unstore 

784 args.replace_run = True 

785 args.prune_replaced = "unstore" 

786 args.output_run = "output/run3" 

787 qgraph = fwk.makeGraph(self.pipeline, args) 

788 fwk.runPipeline(qgraph, taskFactory, args) 

789 

790 butler.registry.refresh() 

791 collections = set(butler.registry.queryCollections(...)) 

792 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

793 

794 # new output collection 

795 refs = butler.registry.queryDatasets(..., collections="output/run3") 

796 self.assertEqual(len(list(refs)), n_outputs) 

797 

798 # old output collection is still there, and it has all datasets but 

799 # non-InitOutputs are not in datastore 

800 refs = butler.registry.queryDatasets(..., collections="output/run2") 

801 refs = list(refs) 

802 self.assertEqual(len(refs), n_outputs) 

803 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

804 for ref in refs: 

805 if initOutNameRe.fullmatch(ref.datasetType.name): 

806 butler.get(ref, collections="output/run2") 

807 else: 

808 with self.assertRaises(FileNotFoundError): 

809 butler.get(ref, collections="output/run2") 

810 

811 # re-run with --replace-run and --prune-replaced=purge 

812 # This time also remove --input; passing the same inputs that we 

813 # started with and not passing inputs at all should be equivalent. 

814 args.input = None 

815 args.replace_run = True 

816 args.prune_replaced = "purge" 

817 args.output_run = "output/run4" 

818 qgraph = fwk.makeGraph(self.pipeline, args) 

819 fwk.runPipeline(qgraph, taskFactory, args) 

820 

821 butler.registry.refresh() 

822 collections = set(butler.registry.queryCollections(...)) 

823 # output/run3 should disappear now 

824 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

825 

826 # new output collection 

827 refs = butler.registry.queryDatasets(..., collections="output/run4") 

828 self.assertEqual(len(list(refs)), n_outputs) 

829 

830 # Trying to run again with inputs that aren't exactly what we started 

831 # with is an error, and the kind that should not modify the data repo. 

832 with self.assertRaises(ValueError): 

833 args.input = ["test", "output/run2"] 

834 args.prune_replaced = None 

835 args.replace_run = True 

836 args.output_run = "output/run5" 

837 qgraph = fwk.makeGraph(self.pipeline, args) 

838 fwk.runPipeline(qgraph, taskFactory, args) 

839 butler.registry.refresh() 

840 collections = set(butler.registry.queryCollections(...)) 

841 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

842 with self.assertRaises(ValueError): 

843 args.input = ["output/run2", "test"] 

844 args.prune_replaced = None 

845 args.replace_run = True 

846 args.output_run = "output/run6" 

847 qgraph = fwk.makeGraph(self.pipeline, args) 

848 fwk.runPipeline(qgraph, taskFactory, args) 

849 butler.registry.refresh() 

850 collections = set(butler.registry.queryCollections(...)) 

851 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

852 

853 def testMockTask(self): 

854 """Test --mock option.""" 

855 args = _makeArgs( 

856 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

857 ) 

858 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

859 populateButler(self.pipeline, butler) 

860 

861 fwk = CmdLineFwk() 

862 taskFactory = AddTaskFactoryMock() 

863 

864 qgraph = fwk.makeGraph(self.pipeline, args) 

865 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

866 self.assertEqual(len(qgraph), self.nQuanta) 

867 

868 # run whole thing 

869 fwk.runPipeline(qgraph, taskFactory, args) 

870 # None of the actual tasks is executed 

871 self.assertEqual(taskFactory.countExec, 0) 

872 

873 # check dataset types 

874 butler.registry.refresh() 

875 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

876 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

877 

878 def testMockTaskFailure(self): 

879 """Test --mock option and configure one of the tasks to fail.""" 

880 args = _makeArgs( 

881 butler_config=self.root, 

882 input="test", 

883 output="output", 

884 mock=True, 

885 register_dataset_types=True, 

886 mock_configs=[ 

887 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

888 ], 

889 fail_fast=True, 

890 ) 

891 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

892 populateButler(self.pipeline, butler) 

893 

894 fwk = CmdLineFwk() 

895 taskFactory = AddTaskFactoryMock() 

896 

897 qgraph = fwk.makeGraph(self.pipeline, args) 

898 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

899 self.assertEqual(len(qgraph), self.nQuanta) 

900 

901 with self.assertRaises(MPGraphExecutorError) as cm: 

902 fwk.runPipeline(qgraph, taskFactory, args) 

903 

904 self.assertIsNotNone(cm.exception.__cause__) 

905 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

906 

907 def testSubgraph(self): 

908 """Test successful execution of trivial quantum graph.""" 

909 args = _makeArgs(butler_config=self.root, input="test", output="output") 

910 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

911 populateButler(self.pipeline, butler) 

912 

913 fwk = CmdLineFwk() 

914 qgraph = fwk.makeGraph(self.pipeline, args) 

915 

916 # Select first two nodes for execution. This depends on node ordering 

917 # which I assume is the same as execution order. 

918 nNodes = 2 

919 nodeIds = [node.nodeId for node in qgraph] 

920 nodeIds = nodeIds[:nNodes] 

921 

922 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

923 self.assertEqual(len(qgraph), self.nQuanta) 

924 

925 with ( 

926 makeTmpFile(suffix=".qgraph") as tmpname, 

927 makeSQLiteRegistry(universe=butler.registry.dimensions) as registryConfig, 

928 ): 

929 with open(tmpname, "wb") as saveFile: 

930 qgraph.save(saveFile) 

931 

932 args = _makeArgs( 

933 qgraph=tmpname, 

934 qgraph_node_id=nodeIds, 

935 registryConfig=registryConfig, 

936 execution_butler_location=None, 

937 ) 

938 fwk = CmdLineFwk() 

939 

940 # load graph, should only read a subset 

941 qgraph = fwk.makeGraph(pipeline=None, args=args) 

942 self.assertEqual(len(qgraph), nNodes) 

943 

944 def testShowGraph(self): 

945 """Test for --show options for quantum graph.""" 

946 nQuanta = 2 

947 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

948 

949 show = ShowInfo(["graph"]) 

950 show.show_graph_info(qgraph) 

951 self.assertEqual(show.handled, {"graph"}) 

952 

953 def testShowGraphWorkflow(self): 

954 nQuanta = 2 

955 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

956 

957 show = ShowInfo(["workflow"]) 

958 show.show_graph_info(qgraph) 

959 self.assertEqual(show.handled, {"workflow"}) 

960 

961 # TODO: cannot test "uri" option presently, it instantiates 

962 # butler from command line options and there is no way to pass butler 

963 # mock to that code. 

964 show = ShowInfo(["uri"]) 

965 with self.assertRaises(ValueError): # No args given 

966 show.show_graph_info(qgraph) 

967 

968 def testSimpleQGraphDatastoreRecords(self): 

969 """Test quantum graph generation with --qgraph-datastore-records.""" 

970 args = _makeArgs( 

971 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

972 ) 

973 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

974 populateButler(self.pipeline, butler) 

975 

976 fwk = CmdLineFwk() 

977 qgraph = fwk.makeGraph(self.pipeline, args) 

978 self.assertEqual(len(qgraph), self.nQuanta) 

979 for i, qnode in enumerate(qgraph): 

980 quantum = qnode.quantum 

981 self.assertIsNotNone(quantum.datastore_records) 

982 # only the first quantum has a pre-existing input 

983 if i == 0: 

984 datastore_name = "FileDatastore@<butlerRoot>" 

985 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

986 records_data = quantum.datastore_records[datastore_name] 

987 records = dict(records_data.records) 

988 self.assertEqual(len(records), 1) 

989 _, records = records.popitem() 

990 records = records["file_datastore_records"] 

991 self.assertEqual( 

992 [record.path for record in records], 

993 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

994 ) 

995 else: 

996 self.assertEqual(quantum.datastore_records, {}) 

997 

998 

999class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1000 pass 

1001 

1002 

1003def setup_module(module): 

1004 lsst.utils.tests.init() 

1005 

1006 

1007if __name__ == "__main__": 1007 ↛ 1008line 1007 didn't jump to line 1008, because the condition on line 1007 was never true

1008 lsst.utils.tests.init() 

1009 unittest.main()