Coverage for tests/test_cmdLineFwk.py: 16%

481 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-15 02:54 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27import logging 

28import os 

29import pickle 

30import re 

31import shutil 

32import tempfile 

33import unittest 

34from dataclasses import dataclass 

35from io import StringIO 

36from types import SimpleNamespace 

37from typing import NamedTuple 

38 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.tests.simpleQGraph import ( 

74 AddTask, 

75 AddTaskFactoryMock, 

76 makeSimpleButler, 

77 makeSimplePipeline, 

78 makeSimpleQGraph, 

79 populateButler, 

80) 

81from lsst.utils.tests import temporaryDirectory 

82 

83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

84 

85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

86# instrument from registry, these tests can run fine without actual instrument 

87# and implementing full mock for Instrument is too complicated. 

88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88

89 

90 

91@contextlib.contextmanager 

92def makeTmpFile(contents=None, suffix=None): 

93 """Context manager for generating temporary file name. 

94 

95 Temporary file is deleted on exiting context. 

96 

97 Parameters 

98 ---------- 

99 contents : `bytes` 

100 Data to write into a file. 

101 """ 

102 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

103 if contents: 

104 os.write(fd, contents) 

105 os.close(fd) 

106 yield tmpname 

107 with contextlib.suppress(OSError): 

108 os.remove(tmpname) 

109 

110 

111@contextlib.contextmanager 

112def makeSQLiteRegistry(create=True, universe=None): 

113 """Context manager to create new empty registry database. 

114 

115 Yields 

116 ------ 

117 config : `RegistryConfig` 

118 Registry configuration for initialized registry database. 

119 """ 

120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

121 with temporaryDirectory() as tmpdir: 

122 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

123 config = RegistryConfig() 

124 config["db"] = uri 

125 if create: 

126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

127 yield config 

128 

129 

130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

131 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

132 

133 

134class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

135 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

136 

137 def setDefaults(self): 

138 PipelineTaskConfig.setDefaults(self) 

139 

140 

141def _makeArgs(registryConfig=None, **kwargs): 

142 """Return parsed command line arguments. 

143 

144 By default butler_config is set to `Config` populated with some defaults, 

145 it can be overridden completely by keyword argument. 

146 

147 Parameters 

148 ---------- 

149 cmd : `str`, optional 

150 Produce arguments for this pipetask command. 

151 registryConfig : `RegistryConfig`, optional 

152 Override for registry configuration. 

153 **kwargs 

154 Overrides for other arguments. 

155 """ 

156 # Use a mock to get the default value of arguments to 'run'. 

157 

158 mock = unittest.mock.Mock() 

159 

160 @click.command(cls=PipetaskCommand) 

161 @run_options() 

162 def fake_run(ctx, **kwargs): 

163 """Fake "pipetask run" command for gathering input arguments. 

164 

165 The arguments & options should always match the arguments & options in 

166 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

167 """ 

168 mock(**kwargs) 

169 

170 runner = click.testing.CliRunner() 

171 # --butler-config is the only required option 

172 result = runner.invoke(fake_run, "--butler-config /") 

173 if result.exit_code != 0: 

174 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

175 mock.assert_called_once() 

176 args = mock.call_args[1] 

177 args["enableLsstDebug"] = args.pop("debug") 

178 args["execution_butler_location"] = args.pop("save_execution_butler") 

179 if "pipeline_actions" not in args: 

180 args["pipeline_actions"] = [] 

181 if "mock_configs" not in args: 

182 args["mock_configs"] = [] 

183 args = SimpleNamespace(**args) 

184 

185 # override butler_config with our defaults 

186 if "butler_config" not in kwargs: 

187 args.butler_config = Config() 

188 if registryConfig: 

189 args.butler_config["registry"] = registryConfig 

190 # The default datastore has a relocatable root, so we need to specify 

191 # some root here for it to use 

192 args.butler_config.configFile = "." 

193 

194 # override arguments from keyword parameters 

195 for key, value in kwargs.items(): 

196 setattr(args, key, value) 

197 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

198 return args 

199 

200 

201class FakeDSType(NamedTuple): 

202 name: str 

203 

204 

205@dataclass(frozen=True) 

206class FakeDSRef: 

207 datasetType: str 

208 dataId: tuple 

209 

210 def isComponent(self): 

211 return False 

212 

213 

214# Task class name used by tests, needs to be importable 

215_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

216 

217 

218def _makeDimensionConfig(): 

219 """Make a simple dimension universe configuration.""" 

220 return DimensionConfig( 

221 { 

222 "version": 1, 

223 "namespace": "ctrl_mpexec_test", 

224 "skypix": { 

225 "common": "htm7", 

226 "htm": { 

227 "class": "lsst.sphgeom.HtmPixelization", 

228 "max_level": 24, 

229 }, 

230 }, 

231 "elements": { 

232 "A": { 

233 "keys": [ 

234 { 

235 "name": "id", 

236 "type": "int", 

237 } 

238 ], 

239 "storage": { 

240 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

241 }, 

242 }, 

243 "B": { 

244 "keys": [ 

245 { 

246 "name": "id", 

247 "type": "int", 

248 } 

249 ], 

250 "storage": { 

251 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

252 }, 

253 }, 

254 }, 

255 "packers": {}, 

256 } 

257 ) 

258 

259 

260def _makeQGraph(): 

261 """Make a trivial QuantumGraph with one quantum. 

262 

263 The only thing that we need to do with this quantum graph is to pickle 

264 it, the quanta in this graph are not usable for anything else. 

265 

266 Returns 

267 ------- 

268 qgraph : `~lsst.pipe.base.QuantumGraph` 

269 """ 

270 universe = DimensionUniverse(config=_makeDimensionConfig()) 

271 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

272 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

273 quanta = [ 

274 Quantum( 

275 taskName=_TASK_CLASS, 

276 inputs={ 

277 fakeDSType: [ 

278 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

279 ] 

280 }, 

281 ) 

282 ] # type: ignore 

283 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

284 return qgraph 

285 

286 

287class CmdLineFwkTestCase(unittest.TestCase): 

288 """A test case for CmdLineFwk""" 

289 

290 def testMakePipeline(self): 

291 """Tests for CmdLineFwk.makePipeline method""" 

292 fwk = CmdLineFwk() 

293 

294 # make empty pipeline 

295 args = _makeArgs() 

296 pipeline = fwk.makePipeline(args) 

297 self.assertIsInstance(pipeline, Pipeline) 

298 self.assertEqual(len(pipeline), 0) 

299 

300 # few tests with serialization 

301 with makeTmpFile() as tmpname: 

302 # make empty pipeline and store it in a file 

303 args = _makeArgs(save_pipeline=tmpname) 

304 pipeline = fwk.makePipeline(args) 

305 self.assertIsInstance(pipeline, Pipeline) 

306 

307 # read pipeline from a file 

308 args = _makeArgs(pipeline=tmpname) 

309 pipeline = fwk.makePipeline(args) 

310 self.assertIsInstance(pipeline, Pipeline) 

311 self.assertEqual(len(pipeline), 0) 

312 

313 # single task pipeline, task name can be anything here 

314 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

315 args = _makeArgs(pipeline_actions=actions) 

316 pipeline = fwk.makePipeline(args) 

317 self.assertIsInstance(pipeline, Pipeline) 

318 self.assertEqual(len(pipeline), 1) 

319 

320 # many task pipeline 

321 actions = [ 

322 _ACTION_ADD_TASK("TaskOne:task1a"), 

323 _ACTION_ADD_TASK("TaskTwo:task2"), 

324 _ACTION_ADD_TASK("TaskOne:task1b"), 

325 ] 

326 args = _makeArgs(pipeline_actions=actions) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 3) 

330 

331 # single task pipeline with config overrides, need real task class 

332 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 taskDefs = list(pipeline.toExpandedPipeline()) 

336 self.assertEqual(len(taskDefs), 1) 

337 self.assertEqual(taskDefs[0].config.addend, 100) 

338 

339 overrides = b"config.addend = 1000\n" 

340 with makeTmpFile(overrides) as tmpname: 

341 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

342 args = _makeArgs(pipeline_actions=actions) 

343 pipeline = fwk.makePipeline(args) 

344 taskDefs = list(pipeline.toExpandedPipeline()) 

345 self.assertEqual(len(taskDefs), 1) 

346 self.assertEqual(taskDefs[0].config.addend, 1000) 

347 

348 # Check --instrument option, for now it only checks that it does not 

349 # crash. 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 

354 def testMakeGraphFromSave(self): 

355 """Tests for CmdLineFwk.makeGraph method. 

356 

357 Only most trivial case is tested that does not do actual graph 

358 building. 

359 """ 

360 fwk = CmdLineFwk() 

361 

362 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

363 

364 # make non-empty graph and store it in a file 

365 qgraph = _makeQGraph() 

366 with open(tmpname, "wb") as saveFile: 

367 qgraph.save(saveFile) 

368 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

369 qgraph = fwk.makeGraph(None, args) 

370 self.assertIsInstance(qgraph, QuantumGraph) 

371 self.assertEqual(len(qgraph), 1) 

372 

373 # will fail if graph id does not match 

374 args = _makeArgs( 

375 qgraph=tmpname, 

376 qgraph_id="R2-D2 is that you?", 

377 registryConfig=registryConfig, 

378 execution_butler_location=None, 

379 ) 

380 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

381 fwk.makeGraph(None, args) 

382 

383 # save with wrong object type 

384 with open(tmpname, "wb") as saveFile: 

385 pickle.dump({}, saveFile) 

386 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

387 with self.assertRaises(ValueError): 

388 fwk.makeGraph(None, args) 

389 

390 # reading empty graph from pickle should work but makeGraph() 

391 # will return None and make a warning 

392 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

393 with open(tmpname, "wb") as saveFile: 

394 qgraph.save(saveFile) 

395 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

396 with self.assertWarnsRegex(UserWarning, "QuantumGraph is empty"): 

397 # this also tests that warning is generated for empty graph 

398 qgraph = fwk.makeGraph(None, args) 

399 self.assertIs(qgraph, None) 

400 

401 def testShowPipeline(self): 

402 """Test for --show options for pipeline.""" 

403 fwk = CmdLineFwk() 

404 

405 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

406 args = _makeArgs(pipeline_actions=actions) 

407 pipeline = fwk.makePipeline(args) 

408 

409 with self.assertRaises(ValueError): 

410 ShowInfo(["unrecognized", "config"]) 

411 

412 stream = StringIO() 

413 show = ShowInfo( 

414 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

415 stream=stream, 

416 ) 

417 show.show_pipeline_info(pipeline) 

418 self.assertEqual(show.unhandled, frozenset({})) 

419 stream.seek(0) 

420 output = stream.read() 

421 self.assertIn("config.addend=100", output) # config option 

422 self.assertIn("addend\n3", output) # History output 

423 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

424 

425 show = ShowInfo(["pipeline", "uri"], stream=stream) 

426 show.show_pipeline_info(pipeline) 

427 self.assertEqual(show.unhandled, frozenset({"uri"})) 

428 self.assertEqual(show.handled, {"pipeline"}) 

429 

430 stream = StringIO() 

431 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

432 show.show_pipeline_info(pipeline) 

433 stream.seek(0) 

434 output = stream.read().strip() 

435 self.assertEqual("### Configuration for task `task'", output) 

436 

437 stream = StringIO() 

438 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

439 show.show_pipeline_info(pipeline) 

440 stream.seek(0) 

441 output = stream.read().strip() 

442 self.assertEqual("### Configuration for task `task'", output) 

443 

444 stream = StringIO() 

445 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

446 show.show_pipeline_info(pipeline) 

447 stream.seek(0) 

448 output = stream.read().strip() 

449 self.assertIn("NOIGNORECASE", output) 

450 

451 show = ShowInfo(["dump-config=notask"]) 

452 with self.assertRaises(ValueError) as cm: 

453 show.show_pipeline_info(pipeline) 

454 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

455 

456 show = ShowInfo(["history"]) 

457 with self.assertRaises(ValueError) as cm: 

458 show.show_pipeline_info(pipeline) 

459 self.assertIn("Please provide a value", str(cm.exception)) 

460 

461 show = ShowInfo(["history=notask::param"]) 

462 with self.assertRaises(ValueError) as cm: 

463 show.show_pipeline_info(pipeline) 

464 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

465 

466 

467class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

468 """A test case for CmdLineFwk""" 

469 

470 def setUp(self): 

471 super().setUpClass() 

472 self.root = tempfile.mkdtemp() 

473 self.nQuanta = 5 

474 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

475 

476 def tearDown(self): 

477 shutil.rmtree(self.root, ignore_errors=True) 

478 super().tearDownClass() 

479 

480 def testSimpleQGraph(self): 

481 """Test successfull execution of trivial quantum graph.""" 

482 args = _makeArgs(butler_config=self.root, input="test", output="output") 

483 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

484 populateButler(self.pipeline, butler) 

485 

486 fwk = CmdLineFwk() 

487 taskFactory = AddTaskFactoryMock() 

488 

489 qgraph = fwk.makeGraph(self.pipeline, args) 

490 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

491 self.assertEqual(len(qgraph), self.nQuanta) 

492 

493 # run whole thing 

494 fwk.runPipeline(qgraph, taskFactory, args) 

495 self.assertEqual(taskFactory.countExec, self.nQuanta) 

496 

497 def testSimpleQGraphNoSkipExisting_inputs(self): 

498 """Test for case when output data for one task already appears in 

499 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

500 option is present. 

501 """ 

502 args = _makeArgs( 

503 butler_config=self.root, 

504 input="test", 

505 output="output", 

506 ) 

507 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

508 populateButler( 

509 self.pipeline, 

510 butler, 

511 datasetTypes={ 

512 args.input: [ 

513 "add_dataset0", 

514 "add_dataset1", 

515 "add2_dataset1", 

516 "add_init_output1", 

517 "task0_config", 

518 "task0_metadata", 

519 "task0_log", 

520 ] 

521 }, 

522 ) 

523 

524 fwk = CmdLineFwk() 

525 taskFactory = AddTaskFactoryMock() 

526 

527 qgraph = fwk.makeGraph(self.pipeline, args) 

528 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

529 # With current implementation graph has all nQuanta quanta, but when 

530 # executing one quantum is skipped. 

531 self.assertEqual(len(qgraph), self.nQuanta) 

532 

533 # run whole thing 

534 fwk.runPipeline(qgraph, taskFactory, args) 

535 self.assertEqual(taskFactory.countExec, self.nQuanta) 

536 

537 def testSimpleQGraphSkipExisting_inputs(self): 

538 """Test for ``--skip-existing`` with output data for one task already 

539 appears in _input_ collection. No ``--extend-run`` option is needed 

540 for this case. 

541 """ 

542 args = _makeArgs( 

543 butler_config=self.root, 

544 input="test", 

545 output="output", 

546 skip_existing_in=("test",), 

547 ) 

548 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

549 populateButler( 

550 self.pipeline, 

551 butler, 

552 datasetTypes={ 

553 args.input: [ 

554 "add_dataset0", 

555 "add_dataset1", 

556 "add2_dataset1", 

557 "add_init_output1", 

558 "task0_config", 

559 "task0_metadata", 

560 "task0_log", 

561 ] 

562 }, 

563 ) 

564 

565 fwk = CmdLineFwk() 

566 taskFactory = AddTaskFactoryMock() 

567 

568 qgraph = fwk.makeGraph(self.pipeline, args) 

569 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

570 self.assertEqual(len(qgraph), self.nQuanta - 1) 

571 

572 # run whole thing 

573 fwk.runPipeline(qgraph, taskFactory, args) 

574 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

575 

576 def testSimpleQGraphSkipExisting_outputs(self): 

577 """Test for ``--skip-existing`` with output data for one task already 

578 appears in _output_ collection. The ``--extend-run`` option is needed 

579 for this case. 

580 """ 

581 args = _makeArgs( 

582 butler_config=self.root, 

583 input="test", 

584 output_run="output/run", 

585 skip_existing_in=("output/run",), 

586 ) 

587 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

588 populateButler( 

589 self.pipeline, 

590 butler, 

591 datasetTypes={ 

592 args.input: ["add_dataset0"], 

593 args.output_run: [ 

594 "add_dataset1", 

595 "add2_dataset1", 

596 "add_init_output1", 

597 "task0_metadata", 

598 "task0_log", 

599 ], 

600 }, 

601 ) 

602 

603 fwk = CmdLineFwk() 

604 taskFactory = AddTaskFactoryMock() 

605 

606 # fails without --extend-run 

607 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

608 qgraph = fwk.makeGraph(self.pipeline, args) 

609 

610 # retry with --extend-run 

611 args.extend_run = True 

612 qgraph = fwk.makeGraph(self.pipeline, args) 

613 

614 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

615 # Graph does not include quantum for first task 

616 self.assertEqual(len(qgraph), self.nQuanta - 1) 

617 

618 # run whole thing 

619 fwk.runPipeline(qgraph, taskFactory, args) 

620 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

621 

622 def testSimpleQGraphOutputsFail(self): 

623 """Test continuing execution of trivial quantum graph with partial 

624 outputs. 

625 """ 

626 args = _makeArgs(butler_config=self.root, input="test", output="output") 

627 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

628 populateButler(self.pipeline, butler) 

629 

630 fwk = CmdLineFwk() 

631 taskFactory = AddTaskFactoryMock(stopAt=3) 

632 

633 qgraph = fwk.makeGraph(self.pipeline, args) 

634 self.assertEqual(len(qgraph), self.nQuanta) 

635 

636 # run first three quanta 

637 with self.assertRaises(MPGraphExecutorError): 

638 fwk.runPipeline(qgraph, taskFactory, args) 

639 self.assertEqual(taskFactory.countExec, 3) 

640 

641 butler.registry.refresh() 

642 

643 # drop one of the two outputs from one task 

644 ref1 = butler.registry.findDataset( 

645 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

646 ) 

647 self.assertIsNotNone(ref1) 

648 # also drop the metadata output 

649 ref2 = butler.registry.findDataset( 

650 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

651 ) 

652 self.assertIsNotNone(ref2) 

653 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

654 

655 taskFactory.stopAt = -1 

656 args.skip_existing_in = (args.output,) 

657 args.extend_run = True 

658 args.no_versions = True 

659 with self.assertRaises(MPGraphExecutorError): 

660 fwk.runPipeline(qgraph, taskFactory, args) 

661 

662 def testSimpleQGraphClobberOutputs(self): 

663 """Test continuing execution of trivial quantum graph with 

664 --clobber-outputs. 

665 """ 

666 args = _makeArgs(butler_config=self.root, input="test", output="output") 

667 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

668 populateButler(self.pipeline, butler) 

669 

670 fwk = CmdLineFwk() 

671 taskFactory = AddTaskFactoryMock(stopAt=3) 

672 

673 qgraph = fwk.makeGraph(self.pipeline, args) 

674 

675 # should have one task and number of quanta 

676 self.assertEqual(len(qgraph), self.nQuanta) 

677 

678 # run first three quanta 

679 with self.assertRaises(MPGraphExecutorError): 

680 fwk.runPipeline(qgraph, taskFactory, args) 

681 self.assertEqual(taskFactory.countExec, 3) 

682 

683 butler.registry.refresh() 

684 

685 # drop one of the two outputs from one task 

686 ref1 = butler.registry.findDataset( 

687 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

688 ) 

689 self.assertIsNotNone(ref1) 

690 # also drop the metadata output 

691 ref2 = butler.registry.findDataset( 

692 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

693 ) 

694 self.assertIsNotNone(ref2) 

695 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

696 

697 taskFactory.stopAt = -1 

698 args.skip_existing = True 

699 args.extend_run = True 

700 args.clobber_outputs = True 

701 args.no_versions = True 

702 fwk.runPipeline(qgraph, taskFactory, args) 

703 # number of executed quanta is incremented 

704 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

705 

706 def testSimpleQGraphReplaceRun(self): 

707 """Test repeated execution of trivial quantum graph with 

708 --replace-run. 

709 """ 

710 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

711 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

712 populateButler(self.pipeline, butler) 

713 

714 fwk = CmdLineFwk() 

715 taskFactory = AddTaskFactoryMock() 

716 

717 qgraph = fwk.makeGraph(self.pipeline, args) 

718 

719 # should have one task and number of quanta 

720 self.assertEqual(len(qgraph), self.nQuanta) 

721 

722 # deep copy is needed because quanta are updated in place 

723 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

724 self.assertEqual(taskFactory.countExec, self.nQuanta) 

725 

726 # need to refresh collections explicitly (or make new butler/registry) 

727 butler.registry.refresh() 

728 collections = set(butler.registry.queryCollections(...)) 

729 self.assertEqual(collections, {"test", "output", "output/run1"}) 

730 

731 # number of datasets written by pipeline: 

732 # - nQuanta of init_outputs 

733 # - nQuanta of configs 

734 # - packages (single dataset) 

735 # - nQuanta * two output datasets 

736 # - nQuanta of metadata 

737 # - nQuanta of log output 

738 n_outputs = self.nQuanta * 6 + 1 

739 refs = butler.registry.queryDatasets(..., collections="output/run1") 

740 self.assertEqual(len(list(refs)), n_outputs) 

741 

742 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

743 # changed) 

744 args.replace_run = True 

745 args.output_run = "output/run2" 

746 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

747 

748 butler.registry.refresh() 

749 collections = set(butler.registry.queryCollections(...)) 

750 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

751 

752 # new output collection 

753 refs = butler.registry.queryDatasets(..., collections="output/run2") 

754 self.assertEqual(len(list(refs)), n_outputs) 

755 

756 # old output collection is still there 

757 refs = butler.registry.queryDatasets(..., collections="output/run1") 

758 self.assertEqual(len(list(refs)), n_outputs) 

759 

760 # re-run with --replace-run and --prune-replaced=unstore 

761 args.replace_run = True 

762 args.prune_replaced = "unstore" 

763 args.output_run = "output/run3" 

764 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

765 

766 butler.registry.refresh() 

767 collections = set(butler.registry.queryCollections(...)) 

768 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

769 

770 # new output collection 

771 refs = butler.registry.queryDatasets(..., collections="output/run3") 

772 self.assertEqual(len(list(refs)), n_outputs) 

773 

774 # old output collection is still there, and it has all datasets but 

775 # non-InitOutputs are not in datastore 

776 refs = butler.registry.queryDatasets(..., collections="output/run2") 

777 refs = list(refs) 

778 self.assertEqual(len(refs), n_outputs) 

779 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

780 for ref in refs: 

781 if initOutNameRe.fullmatch(ref.datasetType.name): 

782 butler.get(ref, collections="output/run2") 

783 else: 

784 with self.assertRaises(FileNotFoundError): 

785 butler.get(ref, collections="output/run2") 

786 

787 # re-run with --replace-run and --prune-replaced=purge 

788 # This time also remove --input; passing the same inputs that we 

789 # started with and not passing inputs at all should be equivalent. 

790 args.input = None 

791 args.replace_run = True 

792 args.prune_replaced = "purge" 

793 args.output_run = "output/run4" 

794 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

795 

796 butler.registry.refresh() 

797 collections = set(butler.registry.queryCollections(...)) 

798 # output/run3 should disappear now 

799 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

800 

801 # new output collection 

802 refs = butler.registry.queryDatasets(..., collections="output/run4") 

803 self.assertEqual(len(list(refs)), n_outputs) 

804 

805 # Trying to run again with inputs that aren't exactly what we started 

806 # with is an error, and the kind that should not modify the data repo. 

807 with self.assertRaises(ValueError): 

808 args.input = ["test", "output/run2"] 

809 args.prune_replaced = None 

810 args.replace_run = True 

811 args.output_run = "output/run5" 

812 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

813 butler.registry.refresh() 

814 collections = set(butler.registry.queryCollections(...)) 

815 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

816 with self.assertRaises(ValueError): 

817 args.input = ["output/run2", "test"] 

818 args.prune_replaced = None 

819 args.replace_run = True 

820 args.output_run = "output/run6" 

821 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

822 butler.registry.refresh() 

823 collections = set(butler.registry.queryCollections(...)) 

824 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

825 

826 def testMockTask(self): 

827 """Test --mock option.""" 

828 args = _makeArgs( 

829 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

830 ) 

831 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

832 populateButler(self.pipeline, butler) 

833 

834 fwk = CmdLineFwk() 

835 taskFactory = AddTaskFactoryMock() 

836 

837 qgraph = fwk.makeGraph(self.pipeline, args) 

838 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

839 self.assertEqual(len(qgraph), self.nQuanta) 

840 

841 # run whole thing 

842 fwk.runPipeline(qgraph, taskFactory, args) 

843 # None of the actual tasks is executed 

844 self.assertEqual(taskFactory.countExec, 0) 

845 

846 # check dataset types 

847 butler.registry.refresh() 

848 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

849 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

850 

851 def testMockTaskFailure(self): 

852 """Test --mock option and configure one of the tasks to fail.""" 

853 args = _makeArgs( 

854 butler_config=self.root, 

855 input="test", 

856 output="output", 

857 mock=True, 

858 register_dataset_types=True, 

859 mock_configs=[ 

860 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

861 ], 

862 fail_fast=True, 

863 ) 

864 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

865 populateButler(self.pipeline, butler) 

866 

867 fwk = CmdLineFwk() 

868 taskFactory = AddTaskFactoryMock() 

869 

870 qgraph = fwk.makeGraph(self.pipeline, args) 

871 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

872 self.assertEqual(len(qgraph), self.nQuanta) 

873 

874 with self.assertRaises(MPGraphExecutorError) as cm: 

875 fwk.runPipeline(qgraph, taskFactory, args) 

876 

877 self.assertIsNotNone(cm.exception.__cause__) 

878 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

879 

880 def testSubgraph(self): 

881 """Test successful execution of trivial quantum graph.""" 

882 args = _makeArgs(butler_config=self.root, input="test", output="output") 

883 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

884 populateButler(self.pipeline, butler) 

885 

886 fwk = CmdLineFwk() 

887 qgraph = fwk.makeGraph(self.pipeline, args) 

888 

889 # Select first two nodes for execution. This depends on node ordering 

890 # which I assume is the same as execution order. 

891 nNodes = 2 

892 nodeIds = [node.nodeId for node in qgraph] 

893 nodeIds = nodeIds[:nNodes] 

894 

895 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

896 self.assertEqual(len(qgraph), self.nQuanta) 

897 

898 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry( 

899 universe=butler.registry.dimensions 

900 ) as registryConfig: 

901 with open(tmpname, "wb") as saveFile: 

902 qgraph.save(saveFile) 

903 

904 args = _makeArgs( 

905 qgraph=tmpname, 

906 qgraph_node_id=nodeIds, 

907 registryConfig=registryConfig, 

908 execution_butler_location=None, 

909 ) 

910 fwk = CmdLineFwk() 

911 

912 # load graph, should only read a subset 

913 qgraph = fwk.makeGraph(pipeline=None, args=args) 

914 self.assertEqual(len(qgraph), nNodes) 

915 

916 def testShowGraph(self): 

917 """Test for --show options for quantum graph.""" 

918 nQuanta = 2 

919 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

920 

921 show = ShowInfo(["graph"]) 

922 show.show_graph_info(qgraph) 

923 self.assertEqual(show.handled, {"graph"}) 

924 

925 def testShowGraphWorkflow(self): 

926 nQuanta = 2 

927 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

928 

929 show = ShowInfo(["workflow"]) 

930 show.show_graph_info(qgraph) 

931 self.assertEqual(show.handled, {"workflow"}) 

932 

933 # TODO: cannot test "uri" option presently, it instantiates 

934 # butler from command line options and there is no way to pass butler 

935 # mock to that code. 

936 show = ShowInfo(["uri"]) 

937 with self.assertRaises(ValueError): # No args given 

938 show.show_graph_info(qgraph) 

939 

940 def testSimpleQGraphDatastoreRecords(self): 

941 """Test quantum graph generation with --qgraph-datastore-records.""" 

942 args = _makeArgs( 

943 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

944 ) 

945 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

946 populateButler(self.pipeline, butler) 

947 

948 fwk = CmdLineFwk() 

949 qgraph = fwk.makeGraph(self.pipeline, args) 

950 self.assertEqual(len(qgraph), self.nQuanta) 

951 for i, qnode in enumerate(qgraph): 

952 quantum = qnode.quantum 

953 self.assertIsNotNone(quantum.datastore_records) 

954 # only the first quantum has a pre-existing input 

955 if i == 0: 

956 datastore_name = "FileDatastore@<butlerRoot>" 

957 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

958 records_data = quantum.datastore_records[datastore_name] 

959 records = dict(records_data.records) 

960 self.assertEqual(len(records), 1) 

961 _, records = records.popitem() 

962 records = records["file_datastore_records"] 

963 self.assertEqual( 

964 [record.path for record in records], 

965 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

966 ) 

967 else: 

968 self.assertEqual(quantum.datastore_records, {}) 

969 

970 

971class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

972 pass 

973 

974 

975def setup_module(module): 

976 lsst.utils.tests.init() 

977 

978 

979if __name__ == "__main__": 979 ↛ 980line 979 didn't jump to line 980, because the condition on line 979 was never true

980 lsst.utils.tests.init() 

981 unittest.main()