Coverage for tests/test_cmdLineFwk.py: 14%

492 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-13 02:55 -0800

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27import logging 

28import os 

29import pickle 

30import re 

31import shutil 

32import tempfile 

33import unittest 

34from dataclasses import dataclass 

35from io import StringIO 

36from types import SimpleNamespace 

37from typing import NamedTuple 

38 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.tests.simpleQGraph import ( 

74 AddTask, 

75 AddTaskFactoryMock, 

76 makeSimpleButler, 

77 makeSimplePipeline, 

78 makeSimpleQGraph, 

79 populateButler, 

80) 

81from lsst.utils.tests import temporaryDirectory 

82 

83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

84 

85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

86# instrument from registry, these tests can run fine without actual instrument 

87# and implementing full mock for Instrument is too complicated. 

88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88

89 

90 

91@contextlib.contextmanager 

92def makeTmpFile(contents=None, suffix=None): 

93 """Context manager for generating temporary file name. 

94 

95 Temporary file is deleted on exiting context. 

96 

97 Parameters 

98 ---------- 

99 contents : `bytes` 

100 Data to write into a file. 

101 """ 

102 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

103 if contents: 

104 os.write(fd, contents) 

105 os.close(fd) 

106 yield tmpname 

107 with contextlib.suppress(OSError): 

108 os.remove(tmpname) 

109 

110 

111@contextlib.contextmanager 

112def makeSQLiteRegistry(create=True, universe=None): 

113 """Context manager to create new empty registry database. 

114 

115 Yields 

116 ------ 

117 config : `RegistryConfig` 

118 Registry configuration for initialized registry database. 

119 """ 

120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

121 with temporaryDirectory() as tmpdir: 

122 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

123 config = RegistryConfig() 

124 config["db"] = uri 

125 if create: 

126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

127 yield config 

128 

129 

130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

131 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

132 

133 

134class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

135 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

136 

137 def setDefaults(self): 

138 PipelineTaskConfig.setDefaults(self) 

139 

140 

141def _makeArgs(registryConfig=None, **kwargs): 

142 """Return parsed command line arguments. 

143 

144 By default butler_config is set to `Config` populated with some defaults, 

145 it can be overridden completely by keyword argument. 

146 

147 Parameters 

148 ---------- 

149 cmd : `str`, optional 

150 Produce arguments for this pipetask command. 

151 registryConfig : `RegistryConfig`, optional 

152 Override for registry configuration. 

153 **kwargs 

154 Overrides for other arguments. 

155 """ 

156 # Use a mock to get the default value of arguments to 'run'. 

157 

158 mock = unittest.mock.Mock() 

159 

160 @click.command(cls=PipetaskCommand) 

161 @run_options() 

162 def fake_run(ctx, **kwargs): 

163 """Fake "pipetask run" command for gathering input arguments. 

164 

165 The arguments & options should always match the arguments & options in 

166 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

167 """ 

168 mock(**kwargs) 

169 

170 runner = click.testing.CliRunner() 

171 # --butler-config is the only required option 

172 result = runner.invoke(fake_run, "--butler-config /") 

173 if result.exit_code != 0: 

174 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

175 mock.assert_called_once() 

176 args = mock.call_args[1] 

177 args["enableLsstDebug"] = args.pop("debug") 

178 args["execution_butler_location"] = args.pop("save_execution_butler") 

179 if "pipeline_actions" not in args: 

180 args["pipeline_actions"] = [] 

181 if "mock_configs" not in args: 

182 args["mock_configs"] = [] 

183 args = SimpleNamespace(**args) 

184 

185 # override butler_config with our defaults 

186 if "butler_config" not in kwargs: 

187 args.butler_config = Config() 

188 if registryConfig: 

189 args.butler_config["registry"] = registryConfig 

190 # The default datastore has a relocatable root, so we need to specify 

191 # some root here for it to use 

192 args.butler_config.configFile = "." 

193 

194 # override arguments from keyword parameters 

195 for key, value in kwargs.items(): 

196 setattr(args, key, value) 

197 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

198 return args 

199 

200 

201class FakeDSType(NamedTuple): 

202 name: str 

203 

204 

205@dataclass(frozen=True) 

206class FakeDSRef: 

207 datasetType: str 

208 dataId: tuple 

209 

210 def isComponent(self): 

211 return False 

212 

213 

214# Task class name used by tests, needs to be importable 

215_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

216 

217 

218def _makeDimensionConfig(): 

219 """Make a simple dimension universe configuration.""" 

220 return DimensionConfig( 

221 { 

222 "version": 1, 

223 "namespace": "ctrl_mpexec_test", 

224 "skypix": { 

225 "common": "htm7", 

226 "htm": { 

227 "class": "lsst.sphgeom.HtmPixelization", 

228 "max_level": 24, 

229 }, 

230 }, 

231 "elements": { 

232 "A": { 

233 "keys": [ 

234 { 

235 "name": "id", 

236 "type": "int", 

237 } 

238 ], 

239 "storage": { 

240 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

241 }, 

242 }, 

243 "B": { 

244 "keys": [ 

245 { 

246 "name": "id", 

247 "type": "int", 

248 } 

249 ], 

250 "storage": { 

251 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

252 }, 

253 }, 

254 }, 

255 "packers": {}, 

256 } 

257 ) 

258 

259 

260def _makeQGraph(): 

261 """Make a trivial QuantumGraph with one quantum. 

262 

263 The only thing that we need to do with this quantum graph is to pickle 

264 it, the quanta in this graph are not usable for anything else. 

265 

266 Returns 

267 ------- 

268 qgraph : `~lsst.pipe.base.QuantumGraph` 

269 """ 

270 universe = DimensionUniverse(config=_makeDimensionConfig()) 

271 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

272 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

273 quanta = [ 

274 Quantum( 

275 taskName=_TASK_CLASS, 

276 inputs={ 

277 fakeDSType: [ 

278 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

279 ] 

280 }, 

281 ) 

282 ] # type: ignore 

283 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

284 return qgraph 

285 

286 

287class CmdLineFwkTestCase(unittest.TestCase): 

288 """A test case for CmdLineFwk""" 

289 

290 def testMakePipeline(self): 

291 """Tests for CmdLineFwk.makePipeline method""" 

292 fwk = CmdLineFwk() 

293 

294 # make empty pipeline 

295 args = _makeArgs() 

296 pipeline = fwk.makePipeline(args) 

297 self.assertIsInstance(pipeline, Pipeline) 

298 self.assertEqual(len(pipeline), 0) 

299 

300 # few tests with serialization 

301 with makeTmpFile() as tmpname: 

302 # make empty pipeline and store it in a file 

303 args = _makeArgs(save_pipeline=tmpname) 

304 pipeline = fwk.makePipeline(args) 

305 self.assertIsInstance(pipeline, Pipeline) 

306 

307 # read pipeline from a file 

308 args = _makeArgs(pipeline=tmpname) 

309 pipeline = fwk.makePipeline(args) 

310 self.assertIsInstance(pipeline, Pipeline) 

311 self.assertEqual(len(pipeline), 0) 

312 

313 # single task pipeline, task name can be anything here 

314 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

315 args = _makeArgs(pipeline_actions=actions) 

316 pipeline = fwk.makePipeline(args) 

317 self.assertIsInstance(pipeline, Pipeline) 

318 self.assertEqual(len(pipeline), 1) 

319 

320 # many task pipeline 

321 actions = [ 

322 _ACTION_ADD_TASK("TaskOne:task1a"), 

323 _ACTION_ADD_TASK("TaskTwo:task2"), 

324 _ACTION_ADD_TASK("TaskOne:task1b"), 

325 ] 

326 args = _makeArgs(pipeline_actions=actions) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 3) 

330 

331 # single task pipeline with config overrides, need real task class 

332 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 taskDefs = list(pipeline.toExpandedPipeline()) 

336 self.assertEqual(len(taskDefs), 1) 

337 self.assertEqual(taskDefs[0].config.addend, 100) 

338 

339 overrides = b"config.addend = 1000\n" 

340 with makeTmpFile(overrides) as tmpname: 

341 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

342 args = _makeArgs(pipeline_actions=actions) 

343 pipeline = fwk.makePipeline(args) 

344 taskDefs = list(pipeline.toExpandedPipeline()) 

345 self.assertEqual(len(taskDefs), 1) 

346 self.assertEqual(taskDefs[0].config.addend, 1000) 

347 

348 # Check --instrument option, for now it only checks that it does not 

349 # crash. 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 

354 def testMakeGraphFromSave(self): 

355 """Tests for CmdLineFwk.makeGraph method. 

356 

357 Only most trivial case is tested that does not do actual graph 

358 building. 

359 """ 

360 fwk = CmdLineFwk() 

361 

362 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

363 

364 # make non-empty graph and store it in a file 

365 qgraph = _makeQGraph() 

366 with open(tmpname, "wb") as saveFile: 

367 qgraph.save(saveFile) 

368 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

369 qgraph = fwk.makeGraph(None, args) 

370 self.assertIsInstance(qgraph, QuantumGraph) 

371 self.assertEqual(len(qgraph), 1) 

372 

373 # will fail if graph id does not match 

374 args = _makeArgs( 

375 qgraph=tmpname, 

376 qgraph_id="R2-D2 is that you?", 

377 registryConfig=registryConfig, 

378 execution_butler_location=None, 

379 ) 

380 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

381 fwk.makeGraph(None, args) 

382 

383 # save with wrong object type 

384 with open(tmpname, "wb") as saveFile: 

385 pickle.dump({}, saveFile) 

386 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

387 with self.assertRaises(ValueError): 

388 fwk.makeGraph(None, args) 

389 

390 # reading empty graph from pickle should work but makeGraph() 

391 # will return None. 

392 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

393 with open(tmpname, "wb") as saveFile: 

394 qgraph.save(saveFile) 

395 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

396 qgraph = fwk.makeGraph(None, args) 

397 self.assertIs(qgraph, None) 

398 

399 def testShowPipeline(self): 

400 """Test for --show options for pipeline.""" 

401 fwk = CmdLineFwk() 

402 

403 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

404 args = _makeArgs(pipeline_actions=actions) 

405 pipeline = fwk.makePipeline(args) 

406 

407 with self.assertRaises(ValueError): 

408 ShowInfo(["unrecognized", "config"]) 

409 

410 stream = StringIO() 

411 show = ShowInfo( 

412 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

413 stream=stream, 

414 ) 

415 show.show_pipeline_info(pipeline) 

416 self.assertEqual(show.unhandled, frozenset({})) 

417 stream.seek(0) 

418 output = stream.read() 

419 self.assertIn("config.addend=100", output) # config option 

420 self.assertIn("addend\n3", output) # History output 

421 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

422 

423 show = ShowInfo(["pipeline", "uri"], stream=stream) 

424 show.show_pipeline_info(pipeline) 

425 self.assertEqual(show.unhandled, frozenset({"uri"})) 

426 self.assertEqual(show.handled, {"pipeline"}) 

427 

428 stream = StringIO() 

429 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

430 show.show_pipeline_info(pipeline) 

431 stream.seek(0) 

432 output = stream.read().strip() 

433 self.assertEqual("### Configuration for task `task'", output) 

434 

435 stream = StringIO() 

436 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

437 show.show_pipeline_info(pipeline) 

438 stream.seek(0) 

439 output = stream.read().strip() 

440 self.assertEqual("### Configuration for task `task'", output) 

441 

442 stream = StringIO() 

443 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

444 show.show_pipeline_info(pipeline) 

445 stream.seek(0) 

446 output = stream.read().strip() 

447 self.assertIn("NOIGNORECASE", output) 

448 

449 show = ShowInfo(["dump-config=notask"]) 

450 with self.assertRaises(ValueError) as cm: 

451 show.show_pipeline_info(pipeline) 

452 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

453 

454 show = ShowInfo(["history"]) 

455 with self.assertRaises(ValueError) as cm: 

456 show.show_pipeline_info(pipeline) 

457 self.assertIn("Please provide a value", str(cm.exception)) 

458 

459 show = ShowInfo(["history=notask::param"]) 

460 with self.assertRaises(ValueError) as cm: 

461 show.show_pipeline_info(pipeline) 

462 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

463 

464 

465class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

466 """A test case for CmdLineFwk""" 

467 

468 def setUp(self): 

469 super().setUpClass() 

470 self.root = tempfile.mkdtemp() 

471 self.nQuanta = 5 

472 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

473 

474 def tearDown(self): 

475 shutil.rmtree(self.root, ignore_errors=True) 

476 super().tearDownClass() 

477 

478 def testSimpleQGraph(self): 

479 """Test successfull execution of trivial quantum graph.""" 

480 args = _makeArgs(butler_config=self.root, input="test", output="output") 

481 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

482 populateButler(self.pipeline, butler) 

483 

484 fwk = CmdLineFwk() 

485 taskFactory = AddTaskFactoryMock() 

486 

487 qgraph = fwk.makeGraph(self.pipeline, args) 

488 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

489 self.assertEqual(len(qgraph), self.nQuanta) 

490 

491 # run whole thing 

492 fwk.runPipeline(qgraph, taskFactory, args) 

493 self.assertEqual(taskFactory.countExec, self.nQuanta) 

494 

495 # test that we've disabled implicit threading 

496 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

497 

498 def testEmptyQGraph(self): 

499 """Test that making an empty QG produces the right error messages.""" 

500 # We make QG generation fail by populating one input collection in the 

501 # butler while using a different one (that we only register, not 

502 # populate) to make the QG. 

503 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

504 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

505 butler.registry.registerCollection("bad_input") 

506 populateButler(self.pipeline, butler) 

507 

508 fwk = CmdLineFwk() 

509 with self.assertLogs(level=logging.CRITICAL) as cm: 

510 qgraph = fwk.makeGraph(self.pipeline, args) 

511 self.assertRegex( 

512 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

513 ) 

514 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

515 self.assertIsNone(qgraph) 

516 

517 def testSimpleQGraphNoSkipExisting_inputs(self): 

518 """Test for case when output data for one task already appears in 

519 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

520 option is present. 

521 """ 

522 args = _makeArgs( 

523 butler_config=self.root, 

524 input="test", 

525 output="output", 

526 ) 

527 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

528 populateButler( 

529 self.pipeline, 

530 butler, 

531 datasetTypes={ 

532 args.input: [ 

533 "add_dataset0", 

534 "add_dataset1", 

535 "add2_dataset1", 

536 "add_init_output1", 

537 "task0_config", 

538 "task0_metadata", 

539 "task0_log", 

540 ] 

541 }, 

542 ) 

543 

544 fwk = CmdLineFwk() 

545 taskFactory = AddTaskFactoryMock() 

546 

547 qgraph = fwk.makeGraph(self.pipeline, args) 

548 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

549 # With current implementation graph has all nQuanta quanta, but when 

550 # executing one quantum is skipped. 

551 self.assertEqual(len(qgraph), self.nQuanta) 

552 

553 # run whole thing 

554 fwk.runPipeline(qgraph, taskFactory, args) 

555 self.assertEqual(taskFactory.countExec, self.nQuanta) 

556 

557 def testSimpleQGraphSkipExisting_inputs(self): 

558 """Test for ``--skip-existing`` with output data for one task already 

559 appears in _input_ collection. No ``--extend-run`` option is needed 

560 for this case. 

561 """ 

562 args = _makeArgs( 

563 butler_config=self.root, 

564 input="test", 

565 output="output", 

566 skip_existing_in=("test",), 

567 ) 

568 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

569 populateButler( 

570 self.pipeline, 

571 butler, 

572 datasetTypes={ 

573 args.input: [ 

574 "add_dataset0", 

575 "add_dataset1", 

576 "add2_dataset1", 

577 "add_init_output1", 

578 "task0_config", 

579 "task0_metadata", 

580 "task0_log", 

581 ] 

582 }, 

583 ) 

584 

585 fwk = CmdLineFwk() 

586 taskFactory = AddTaskFactoryMock() 

587 

588 qgraph = fwk.makeGraph(self.pipeline, args) 

589 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

590 self.assertEqual(len(qgraph), self.nQuanta - 1) 

591 

592 # run whole thing 

593 fwk.runPipeline(qgraph, taskFactory, args) 

594 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

595 

596 def testSimpleQGraphSkipExisting_outputs(self): 

597 """Test for ``--skip-existing`` with output data for one task already 

598 appears in _output_ collection. The ``--extend-run`` option is needed 

599 for this case. 

600 """ 

601 args = _makeArgs( 

602 butler_config=self.root, 

603 input="test", 

604 output_run="output/run", 

605 skip_existing_in=("output/run",), 

606 ) 

607 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

608 populateButler( 

609 self.pipeline, 

610 butler, 

611 datasetTypes={ 

612 args.input: ["add_dataset0"], 

613 args.output_run: [ 

614 "add_dataset1", 

615 "add2_dataset1", 

616 "add_init_output1", 

617 "task0_metadata", 

618 "task0_log", 

619 ], 

620 }, 

621 ) 

622 

623 fwk = CmdLineFwk() 

624 taskFactory = AddTaskFactoryMock() 

625 

626 # fails without --extend-run 

627 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

628 qgraph = fwk.makeGraph(self.pipeline, args) 

629 

630 # retry with --extend-run 

631 args.extend_run = True 

632 qgraph = fwk.makeGraph(self.pipeline, args) 

633 

634 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

635 # Graph does not include quantum for first task 

636 self.assertEqual(len(qgraph), self.nQuanta - 1) 

637 

638 # run whole thing 

639 fwk.runPipeline(qgraph, taskFactory, args) 

640 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

641 

642 def testSimpleQGraphOutputsFail(self): 

643 """Test continuing execution of trivial quantum graph with partial 

644 outputs. 

645 """ 

646 args = _makeArgs(butler_config=self.root, input="test", output="output") 

647 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

648 populateButler(self.pipeline, butler) 

649 

650 fwk = CmdLineFwk() 

651 taskFactory = AddTaskFactoryMock(stopAt=3) 

652 

653 qgraph = fwk.makeGraph(self.pipeline, args) 

654 self.assertEqual(len(qgraph), self.nQuanta) 

655 

656 # run first three quanta 

657 with self.assertRaises(MPGraphExecutorError): 

658 fwk.runPipeline(qgraph, taskFactory, args) 

659 self.assertEqual(taskFactory.countExec, 3) 

660 

661 butler.registry.refresh() 

662 

663 # drop one of the two outputs from one task 

664 ref1 = butler.registry.findDataset( 

665 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

666 ) 

667 self.assertIsNotNone(ref1) 

668 # also drop the metadata output 

669 ref2 = butler.registry.findDataset( 

670 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

671 ) 

672 self.assertIsNotNone(ref2) 

673 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

674 

675 taskFactory.stopAt = -1 

676 args.skip_existing_in = (args.output,) 

677 args.extend_run = True 

678 args.no_versions = True 

679 with self.assertRaises(MPGraphExecutorError): 

680 fwk.runPipeline(qgraph, taskFactory, args) 

681 

682 def testSimpleQGraphClobberOutputs(self): 

683 """Test continuing execution of trivial quantum graph with 

684 --clobber-outputs. 

685 """ 

686 args = _makeArgs(butler_config=self.root, input="test", output="output") 

687 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

688 populateButler(self.pipeline, butler) 

689 

690 fwk = CmdLineFwk() 

691 taskFactory = AddTaskFactoryMock(stopAt=3) 

692 

693 qgraph = fwk.makeGraph(self.pipeline, args) 

694 

695 # should have one task and number of quanta 

696 self.assertEqual(len(qgraph), self.nQuanta) 

697 

698 # run first three quanta 

699 with self.assertRaises(MPGraphExecutorError): 

700 fwk.runPipeline(qgraph, taskFactory, args) 

701 self.assertEqual(taskFactory.countExec, 3) 

702 

703 butler.registry.refresh() 

704 

705 # drop one of the two outputs from one task 

706 ref1 = butler.registry.findDataset( 

707 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

708 ) 

709 self.assertIsNotNone(ref1) 

710 # also drop the metadata output 

711 ref2 = butler.registry.findDataset( 

712 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

713 ) 

714 self.assertIsNotNone(ref2) 

715 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

716 

717 taskFactory.stopAt = -1 

718 args.skip_existing = True 

719 args.extend_run = True 

720 args.clobber_outputs = True 

721 args.no_versions = True 

722 fwk.runPipeline(qgraph, taskFactory, args) 

723 # number of executed quanta is incremented 

724 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

725 

726 def testSimpleQGraphReplaceRun(self): 

727 """Test repeated execution of trivial quantum graph with 

728 --replace-run. 

729 """ 

730 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

731 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

732 populateButler(self.pipeline, butler) 

733 

734 fwk = CmdLineFwk() 

735 taskFactory = AddTaskFactoryMock() 

736 

737 qgraph = fwk.makeGraph(self.pipeline, args) 

738 

739 # should have one task and number of quanta 

740 self.assertEqual(len(qgraph), self.nQuanta) 

741 

742 # deep copy is needed because quanta are updated in place 

743 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

744 self.assertEqual(taskFactory.countExec, self.nQuanta) 

745 

746 # need to refresh collections explicitly (or make new butler/registry) 

747 butler.registry.refresh() 

748 collections = set(butler.registry.queryCollections(...)) 

749 self.assertEqual(collections, {"test", "output", "output/run1"}) 

750 

751 # number of datasets written by pipeline: 

752 # - nQuanta of init_outputs 

753 # - nQuanta of configs 

754 # - packages (single dataset) 

755 # - nQuanta * two output datasets 

756 # - nQuanta of metadata 

757 # - nQuanta of log output 

758 n_outputs = self.nQuanta * 6 + 1 

759 refs = butler.registry.queryDatasets(..., collections="output/run1") 

760 self.assertEqual(len(list(refs)), n_outputs) 

761 

762 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

763 # changed) 

764 args.replace_run = True 

765 args.output_run = "output/run2" 

766 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

767 

768 butler.registry.refresh() 

769 collections = set(butler.registry.queryCollections(...)) 

770 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

771 

772 # new output collection 

773 refs = butler.registry.queryDatasets(..., collections="output/run2") 

774 self.assertEqual(len(list(refs)), n_outputs) 

775 

776 # old output collection is still there 

777 refs = butler.registry.queryDatasets(..., collections="output/run1") 

778 self.assertEqual(len(list(refs)), n_outputs) 

779 

780 # re-run with --replace-run and --prune-replaced=unstore 

781 args.replace_run = True 

782 args.prune_replaced = "unstore" 

783 args.output_run = "output/run3" 

784 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

785 

786 butler.registry.refresh() 

787 collections = set(butler.registry.queryCollections(...)) 

788 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

789 

790 # new output collection 

791 refs = butler.registry.queryDatasets(..., collections="output/run3") 

792 self.assertEqual(len(list(refs)), n_outputs) 

793 

794 # old output collection is still there, and it has all datasets but 

795 # non-InitOutputs are not in datastore 

796 refs = butler.registry.queryDatasets(..., collections="output/run2") 

797 refs = list(refs) 

798 self.assertEqual(len(refs), n_outputs) 

799 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

800 for ref in refs: 

801 if initOutNameRe.fullmatch(ref.datasetType.name): 

802 butler.get(ref, collections="output/run2") 

803 else: 

804 with self.assertRaises(FileNotFoundError): 

805 butler.get(ref, collections="output/run2") 

806 

807 # re-run with --replace-run and --prune-replaced=purge 

808 # This time also remove --input; passing the same inputs that we 

809 # started with and not passing inputs at all should be equivalent. 

810 args.input = None 

811 args.replace_run = True 

812 args.prune_replaced = "purge" 

813 args.output_run = "output/run4" 

814 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

815 

816 butler.registry.refresh() 

817 collections = set(butler.registry.queryCollections(...)) 

818 # output/run3 should disappear now 

819 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

820 

821 # new output collection 

822 refs = butler.registry.queryDatasets(..., collections="output/run4") 

823 self.assertEqual(len(list(refs)), n_outputs) 

824 

825 # Trying to run again with inputs that aren't exactly what we started 

826 # with is an error, and the kind that should not modify the data repo. 

827 with self.assertRaises(ValueError): 

828 args.input = ["test", "output/run2"] 

829 args.prune_replaced = None 

830 args.replace_run = True 

831 args.output_run = "output/run5" 

832 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

833 butler.registry.refresh() 

834 collections = set(butler.registry.queryCollections(...)) 

835 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

836 with self.assertRaises(ValueError): 

837 args.input = ["output/run2", "test"] 

838 args.prune_replaced = None 

839 args.replace_run = True 

840 args.output_run = "output/run6" 

841 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

842 butler.registry.refresh() 

843 collections = set(butler.registry.queryCollections(...)) 

844 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

845 

846 def testMockTask(self): 

847 """Test --mock option.""" 

848 args = _makeArgs( 

849 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

850 ) 

851 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

852 populateButler(self.pipeline, butler) 

853 

854 fwk = CmdLineFwk() 

855 taskFactory = AddTaskFactoryMock() 

856 

857 qgraph = fwk.makeGraph(self.pipeline, args) 

858 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

859 self.assertEqual(len(qgraph), self.nQuanta) 

860 

861 # run whole thing 

862 fwk.runPipeline(qgraph, taskFactory, args) 

863 # None of the actual tasks is executed 

864 self.assertEqual(taskFactory.countExec, 0) 

865 

866 # check dataset types 

867 butler.registry.refresh() 

868 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

869 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

870 

871 def testMockTaskFailure(self): 

872 """Test --mock option and configure one of the tasks to fail.""" 

873 args = _makeArgs( 

874 butler_config=self.root, 

875 input="test", 

876 output="output", 

877 mock=True, 

878 register_dataset_types=True, 

879 mock_configs=[ 

880 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

881 ], 

882 fail_fast=True, 

883 ) 

884 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

885 populateButler(self.pipeline, butler) 

886 

887 fwk = CmdLineFwk() 

888 taskFactory = AddTaskFactoryMock() 

889 

890 qgraph = fwk.makeGraph(self.pipeline, args) 

891 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

892 self.assertEqual(len(qgraph), self.nQuanta) 

893 

894 with self.assertRaises(MPGraphExecutorError) as cm: 

895 fwk.runPipeline(qgraph, taskFactory, args) 

896 

897 self.assertIsNotNone(cm.exception.__cause__) 

898 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

899 

900 def testSubgraph(self): 

901 """Test successful execution of trivial quantum graph.""" 

902 args = _makeArgs(butler_config=self.root, input="test", output="output") 

903 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

904 populateButler(self.pipeline, butler) 

905 

906 fwk = CmdLineFwk() 

907 qgraph = fwk.makeGraph(self.pipeline, args) 

908 

909 # Select first two nodes for execution. This depends on node ordering 

910 # which I assume is the same as execution order. 

911 nNodes = 2 

912 nodeIds = [node.nodeId for node in qgraph] 

913 nodeIds = nodeIds[:nNodes] 

914 

915 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

916 self.assertEqual(len(qgraph), self.nQuanta) 

917 

918 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry( 

919 universe=butler.registry.dimensions 

920 ) as registryConfig: 

921 with open(tmpname, "wb") as saveFile: 

922 qgraph.save(saveFile) 

923 

924 args = _makeArgs( 

925 qgraph=tmpname, 

926 qgraph_node_id=nodeIds, 

927 registryConfig=registryConfig, 

928 execution_butler_location=None, 

929 ) 

930 fwk = CmdLineFwk() 

931 

932 # load graph, should only read a subset 

933 qgraph = fwk.makeGraph(pipeline=None, args=args) 

934 self.assertEqual(len(qgraph), nNodes) 

935 

936 def testShowGraph(self): 

937 """Test for --show options for quantum graph.""" 

938 nQuanta = 2 

939 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

940 

941 show = ShowInfo(["graph"]) 

942 show.show_graph_info(qgraph) 

943 self.assertEqual(show.handled, {"graph"}) 

944 

945 def testShowGraphWorkflow(self): 

946 nQuanta = 2 

947 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

948 

949 show = ShowInfo(["workflow"]) 

950 show.show_graph_info(qgraph) 

951 self.assertEqual(show.handled, {"workflow"}) 

952 

953 # TODO: cannot test "uri" option presently, it instantiates 

954 # butler from command line options and there is no way to pass butler 

955 # mock to that code. 

956 show = ShowInfo(["uri"]) 

957 with self.assertRaises(ValueError): # No args given 

958 show.show_graph_info(qgraph) 

959 

960 def testSimpleQGraphDatastoreRecords(self): 

961 """Test quantum graph generation with --qgraph-datastore-records.""" 

962 args = _makeArgs( 

963 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

964 ) 

965 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

966 populateButler(self.pipeline, butler) 

967 

968 fwk = CmdLineFwk() 

969 qgraph = fwk.makeGraph(self.pipeline, args) 

970 self.assertEqual(len(qgraph), self.nQuanta) 

971 for i, qnode in enumerate(qgraph): 

972 quantum = qnode.quantum 

973 self.assertIsNotNone(quantum.datastore_records) 

974 # only the first quantum has a pre-existing input 

975 if i == 0: 

976 datastore_name = "FileDatastore@<butlerRoot>" 

977 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

978 records_data = quantum.datastore_records[datastore_name] 

979 records = dict(records_data.records) 

980 self.assertEqual(len(records), 1) 

981 _, records = records.popitem() 

982 records = records["file_datastore_records"] 

983 self.assertEqual( 

984 [record.path for record in records], 

985 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

986 ) 

987 else: 

988 self.assertEqual(quantum.datastore_records, {}) 

989 

990 

991class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

992 pass 

993 

994 

995def setup_module(module): 

996 lsst.utils.tests.init() 

997 

998 

999if __name__ == "__main__": 999 ↛ 1000line 999 didn't jump to line 1000, because the condition on line 999 was never true

1000 lsst.utils.tests.init() 

1001 unittest.main()