Coverage for tests/test_cmdLineFwk.py: 14%

492 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-07 02:42 -0800

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27import logging 

28import os 

29import pickle 

30import re 

31import shutil 

32import tempfile 

33import unittest 

34from dataclasses import dataclass 

35from io import StringIO 

36from types import SimpleNamespace 

37from typing import NamedTuple 

38 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.tests.simpleQGraph import ( 

74 AddTask, 

75 AddTaskFactoryMock, 

76 makeSimpleButler, 

77 makeSimplePipeline, 

78 makeSimpleQGraph, 

79 populateButler, 

80) 

81from lsst.utils.tests import temporaryDirectory 

82 

83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

84 

85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

86# instrument from registry, these tests can run fine without actual instrument 

87# and implementing full mock for Instrument is too complicated. 

88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88

89 

90 

91@contextlib.contextmanager 

92def makeTmpFile(contents=None, suffix=None): 

93 """Context manager for generating temporary file name. 

94 

95 Temporary file is deleted on exiting context. 

96 

97 Parameters 

98 ---------- 

99 contents : `bytes` 

100 Data to write into a file. 

101 """ 

102 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

103 if contents: 

104 os.write(fd, contents) 

105 os.close(fd) 

106 yield tmpname 

107 with contextlib.suppress(OSError): 

108 os.remove(tmpname) 

109 

110 

111@contextlib.contextmanager 

112def makeSQLiteRegistry(create=True, universe=None): 

113 """Context manager to create new empty registry database. 

114 

115 Yields 

116 ------ 

117 config : `RegistryConfig` 

118 Registry configuration for initialized registry database. 

119 """ 

120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

121 with temporaryDirectory() as tmpdir: 

122 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

123 config = RegistryConfig() 

124 config["db"] = uri 

125 if create: 

126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

127 yield config 

128 

129 

130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

131 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

132 

133 

134class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

135 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

136 

137 def setDefaults(self): 

138 PipelineTaskConfig.setDefaults(self) 

139 

140 

141def _makeArgs(registryConfig=None, **kwargs): 

142 """Return parsed command line arguments. 

143 

144 By default butler_config is set to `Config` populated with some defaults, 

145 it can be overridden completely by keyword argument. 

146 

147 Parameters 

148 ---------- 

149 cmd : `str`, optional 

150 Produce arguments for this pipetask command. 

151 registryConfig : `RegistryConfig`, optional 

152 Override for registry configuration. 

153 **kwargs 

154 Overrides for other arguments. 

155 """ 

156 # Use a mock to get the default value of arguments to 'run'. 

157 

158 mock = unittest.mock.Mock() 

159 

160 @click.command(cls=PipetaskCommand) 

161 @run_options() 

162 def fake_run(ctx, **kwargs): 

163 """Fake "pipetask run" command for gathering input arguments. 

164 

165 The arguments & options should always match the arguments & options in 

166 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

167 """ 

168 mock(**kwargs) 

169 

170 runner = click.testing.CliRunner() 

171 # --butler-config is the only required option 

172 result = runner.invoke(fake_run, "--butler-config /") 

173 if result.exit_code != 0: 

174 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

175 mock.assert_called_once() 

176 args = mock.call_args[1] 

177 args["enableLsstDebug"] = args.pop("debug") 

178 args["execution_butler_location"] = args.pop("save_execution_butler") 

179 if "pipeline_actions" not in args: 

180 args["pipeline_actions"] = [] 

181 if "mock_configs" not in args: 

182 args["mock_configs"] = [] 

183 args = SimpleNamespace(**args) 

184 

185 # override butler_config with our defaults 

186 if "butler_config" not in kwargs: 

187 args.butler_config = Config() 

188 if registryConfig: 

189 args.butler_config["registry"] = registryConfig 

190 # The default datastore has a relocatable root, so we need to specify 

191 # some root here for it to use 

192 args.butler_config.configFile = "." 

193 

194 # override arguments from keyword parameters 

195 for key, value in kwargs.items(): 

196 setattr(args, key, value) 

197 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

198 return args 

199 

200 

201class FakeDSType(NamedTuple): 

202 name: str 

203 

204 

205@dataclass(frozen=True) 

206class FakeDSRef: 

207 datasetType: str 

208 dataId: tuple 

209 

210 def isComponent(self): 

211 return False 

212 

213 

214# Task class name used by tests, needs to be importable 

215_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

216 

217 

218def _makeDimensionConfig(): 

219 """Make a simple dimension universe configuration.""" 

220 return DimensionConfig( 

221 { 

222 "version": 1, 

223 "namespace": "ctrl_mpexec_test", 

224 "skypix": { 

225 "common": "htm7", 

226 "htm": { 

227 "class": "lsst.sphgeom.HtmPixelization", 

228 "max_level": 24, 

229 }, 

230 }, 

231 "elements": { 

232 "A": { 

233 "keys": [ 

234 { 

235 "name": "id", 

236 "type": "int", 

237 } 

238 ], 

239 "storage": { 

240 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

241 }, 

242 }, 

243 "B": { 

244 "keys": [ 

245 { 

246 "name": "id", 

247 "type": "int", 

248 } 

249 ], 

250 "storage": { 

251 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

252 }, 

253 }, 

254 }, 

255 "packers": {}, 

256 } 

257 ) 

258 

259 

260def _makeQGraph(): 

261 """Make a trivial QuantumGraph with one quantum. 

262 

263 The only thing that we need to do with this quantum graph is to pickle 

264 it, the quanta in this graph are not usable for anything else. 

265 

266 Returns 

267 ------- 

268 qgraph : `~lsst.pipe.base.QuantumGraph` 

269 """ 

270 universe = DimensionUniverse(config=_makeDimensionConfig()) 

271 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

272 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

273 quanta = [ 

274 Quantum( 

275 taskName=_TASK_CLASS, 

276 inputs={ 

277 fakeDSType: [ 

278 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

279 ] 

280 }, 

281 ) 

282 ] # type: ignore 

283 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

284 return qgraph 

285 

286 

287class CmdLineFwkTestCase(unittest.TestCase): 

288 """A test case for CmdLineFwk""" 

289 

290 def testMakePipeline(self): 

291 """Tests for CmdLineFwk.makePipeline method""" 

292 fwk = CmdLineFwk() 

293 

294 # make empty pipeline 

295 args = _makeArgs() 

296 pipeline = fwk.makePipeline(args) 

297 self.assertIsInstance(pipeline, Pipeline) 

298 self.assertEqual(len(pipeline), 0) 

299 

300 # few tests with serialization 

301 with makeTmpFile() as tmpname: 

302 # make empty pipeline and store it in a file 

303 args = _makeArgs(save_pipeline=tmpname) 

304 pipeline = fwk.makePipeline(args) 

305 self.assertIsInstance(pipeline, Pipeline) 

306 

307 # read pipeline from a file 

308 args = _makeArgs(pipeline=tmpname) 

309 pipeline = fwk.makePipeline(args) 

310 self.assertIsInstance(pipeline, Pipeline) 

311 self.assertEqual(len(pipeline), 0) 

312 

313 # single task pipeline, task name can be anything here 

314 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

315 args = _makeArgs(pipeline_actions=actions) 

316 pipeline = fwk.makePipeline(args) 

317 self.assertIsInstance(pipeline, Pipeline) 

318 self.assertEqual(len(pipeline), 1) 

319 

320 # many task pipeline 

321 actions = [ 

322 _ACTION_ADD_TASK("TaskOne:task1a"), 

323 _ACTION_ADD_TASK("TaskTwo:task2"), 

324 _ACTION_ADD_TASK("TaskOne:task1b"), 

325 ] 

326 args = _makeArgs(pipeline_actions=actions) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 3) 

330 

331 # single task pipeline with config overrides, need real task class 

332 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 taskDefs = list(pipeline.toExpandedPipeline()) 

336 self.assertEqual(len(taskDefs), 1) 

337 self.assertEqual(taskDefs[0].config.addend, 100) 

338 

339 overrides = b"config.addend = 1000\n" 

340 with makeTmpFile(overrides) as tmpname: 

341 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

342 args = _makeArgs(pipeline_actions=actions) 

343 pipeline = fwk.makePipeline(args) 

344 taskDefs = list(pipeline.toExpandedPipeline()) 

345 self.assertEqual(len(taskDefs), 1) 

346 self.assertEqual(taskDefs[0].config.addend, 1000) 

347 

348 # Check --instrument option, for now it only checks that it does not 

349 # crash. 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 

354 def testMakeGraphFromSave(self): 

355 """Tests for CmdLineFwk.makeGraph method. 

356 

357 Only most trivial case is tested that does not do actual graph 

358 building. 

359 """ 

360 fwk = CmdLineFwk() 

361 

362 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

363 # make non-empty graph and store it in a file 

364 qgraph = _makeQGraph() 

365 with open(tmpname, "wb") as saveFile: 

366 qgraph.save(saveFile) 

367 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

368 qgraph = fwk.makeGraph(None, args) 

369 self.assertIsInstance(qgraph, QuantumGraph) 

370 self.assertEqual(len(qgraph), 1) 

371 

372 # will fail if graph id does not match 

373 args = _makeArgs( 

374 qgraph=tmpname, 

375 qgraph_id="R2-D2 is that you?", 

376 registryConfig=registryConfig, 

377 execution_butler_location=None, 

378 ) 

379 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

380 fwk.makeGraph(None, args) 

381 

382 # save with wrong object type 

383 with open(tmpname, "wb") as saveFile: 

384 pickle.dump({}, saveFile) 

385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

386 with self.assertRaises(ValueError): 

387 fwk.makeGraph(None, args) 

388 

389 # reading empty graph from pickle should work but makeGraph() 

390 # will return None. 

391 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

392 with open(tmpname, "wb") as saveFile: 

393 qgraph.save(saveFile) 

394 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

395 qgraph = fwk.makeGraph(None, args) 

396 self.assertIs(qgraph, None) 

397 

398 def testShowPipeline(self): 

399 """Test for --show options for pipeline.""" 

400 fwk = CmdLineFwk() 

401 

402 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

403 args = _makeArgs(pipeline_actions=actions) 

404 pipeline = fwk.makePipeline(args) 

405 

406 with self.assertRaises(ValueError): 

407 ShowInfo(["unrecognized", "config"]) 

408 

409 stream = StringIO() 

410 show = ShowInfo( 

411 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

412 stream=stream, 

413 ) 

414 show.show_pipeline_info(pipeline) 

415 self.assertEqual(show.unhandled, frozenset({})) 

416 stream.seek(0) 

417 output = stream.read() 

418 self.assertIn("config.addend=100", output) # config option 

419 self.assertIn("addend\n3", output) # History output 

420 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

421 

422 show = ShowInfo(["pipeline", "uri"], stream=stream) 

423 show.show_pipeline_info(pipeline) 

424 self.assertEqual(show.unhandled, frozenset({"uri"})) 

425 self.assertEqual(show.handled, {"pipeline"}) 

426 

427 stream = StringIO() 

428 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

429 show.show_pipeline_info(pipeline) 

430 stream.seek(0) 

431 output = stream.read().strip() 

432 self.assertEqual("### Configuration for task `task'", output) 

433 

434 stream = StringIO() 

435 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

436 show.show_pipeline_info(pipeline) 

437 stream.seek(0) 

438 output = stream.read().strip() 

439 self.assertEqual("### Configuration for task `task'", output) 

440 

441 stream = StringIO() 

442 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

443 show.show_pipeline_info(pipeline) 

444 stream.seek(0) 

445 output = stream.read().strip() 

446 self.assertIn("NOIGNORECASE", output) 

447 

448 show = ShowInfo(["dump-config=notask"]) 

449 with self.assertRaises(ValueError) as cm: 

450 show.show_pipeline_info(pipeline) 

451 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

452 

453 show = ShowInfo(["history"]) 

454 with self.assertRaises(ValueError) as cm: 

455 show.show_pipeline_info(pipeline) 

456 self.assertIn("Please provide a value", str(cm.exception)) 

457 

458 show = ShowInfo(["history=notask::param"]) 

459 with self.assertRaises(ValueError) as cm: 

460 show.show_pipeline_info(pipeline) 

461 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

462 

463 

464class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

465 """A test case for CmdLineFwk""" 

466 

467 def setUp(self): 

468 super().setUpClass() 

469 self.root = tempfile.mkdtemp() 

470 self.nQuanta = 5 

471 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

472 

473 def tearDown(self): 

474 shutil.rmtree(self.root, ignore_errors=True) 

475 super().tearDownClass() 

476 

477 def testSimpleQGraph(self): 

478 """Test successfull execution of trivial quantum graph.""" 

479 args = _makeArgs(butler_config=self.root, input="test", output="output") 

480 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

481 populateButler(self.pipeline, butler) 

482 

483 fwk = CmdLineFwk() 

484 taskFactory = AddTaskFactoryMock() 

485 

486 qgraph = fwk.makeGraph(self.pipeline, args) 

487 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

488 self.assertEqual(len(qgraph), self.nQuanta) 

489 

490 # run whole thing 

491 fwk.runPipeline(qgraph, taskFactory, args) 

492 self.assertEqual(taskFactory.countExec, self.nQuanta) 

493 

494 # test that we've disabled implicit threading 

495 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

496 

497 def testEmptyQGraph(self): 

498 """Test that making an empty QG produces the right error messages.""" 

499 # We make QG generation fail by populating one input collection in the 

500 # butler while using a different one (that we only register, not 

501 # populate) to make the QG. 

502 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

503 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

504 butler.registry.registerCollection("bad_input") 

505 populateButler(self.pipeline, butler) 

506 

507 fwk = CmdLineFwk() 

508 with self.assertLogs(level=logging.CRITICAL) as cm: 

509 qgraph = fwk.makeGraph(self.pipeline, args) 

510 self.assertRegex( 

511 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

512 ) 

513 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

514 self.assertIsNone(qgraph) 

515 

516 def testSimpleQGraphNoSkipExisting_inputs(self): 

517 """Test for case when output data for one task already appears in 

518 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

519 option is present. 

520 """ 

521 args = _makeArgs( 

522 butler_config=self.root, 

523 input="test", 

524 output="output", 

525 ) 

526 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

527 populateButler( 

528 self.pipeline, 

529 butler, 

530 datasetTypes={ 

531 args.input: [ 

532 "add_dataset0", 

533 "add_dataset1", 

534 "add2_dataset1", 

535 "add_init_output1", 

536 "task0_config", 

537 "task0_metadata", 

538 "task0_log", 

539 ] 

540 }, 

541 ) 

542 

543 fwk = CmdLineFwk() 

544 taskFactory = AddTaskFactoryMock() 

545 

546 qgraph = fwk.makeGraph(self.pipeline, args) 

547 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

548 # With current implementation graph has all nQuanta quanta, but when 

549 # executing one quantum is skipped. 

550 self.assertEqual(len(qgraph), self.nQuanta) 

551 

552 # run whole thing 

553 fwk.runPipeline(qgraph, taskFactory, args) 

554 self.assertEqual(taskFactory.countExec, self.nQuanta) 

555 

556 def testSimpleQGraphSkipExisting_inputs(self): 

557 """Test for ``--skip-existing`` with output data for one task already 

558 appears in _input_ collection. No ``--extend-run`` option is needed 

559 for this case. 

560 """ 

561 args = _makeArgs( 

562 butler_config=self.root, 

563 input="test", 

564 output="output", 

565 skip_existing_in=("test",), 

566 ) 

567 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

568 populateButler( 

569 self.pipeline, 

570 butler, 

571 datasetTypes={ 

572 args.input: [ 

573 "add_dataset0", 

574 "add_dataset1", 

575 "add2_dataset1", 

576 "add_init_output1", 

577 "task0_config", 

578 "task0_metadata", 

579 "task0_log", 

580 ] 

581 }, 

582 ) 

583 

584 fwk = CmdLineFwk() 

585 taskFactory = AddTaskFactoryMock() 

586 

587 qgraph = fwk.makeGraph(self.pipeline, args) 

588 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

589 self.assertEqual(len(qgraph), self.nQuanta - 1) 

590 

591 # run whole thing 

592 fwk.runPipeline(qgraph, taskFactory, args) 

593 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

594 

595 def testSimpleQGraphSkipExisting_outputs(self): 

596 """Test for ``--skip-existing`` with output data for one task already 

597 appears in _output_ collection. The ``--extend-run`` option is needed 

598 for this case. 

599 """ 

600 args = _makeArgs( 

601 butler_config=self.root, 

602 input="test", 

603 output_run="output/run", 

604 skip_existing_in=("output/run",), 

605 ) 

606 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

607 populateButler( 

608 self.pipeline, 

609 butler, 

610 datasetTypes={ 

611 args.input: ["add_dataset0"], 

612 args.output_run: [ 

613 "add_dataset1", 

614 "add2_dataset1", 

615 "add_init_output1", 

616 "task0_metadata", 

617 "task0_log", 

618 ], 

619 }, 

620 ) 

621 

622 fwk = CmdLineFwk() 

623 taskFactory = AddTaskFactoryMock() 

624 

625 # fails without --extend-run 

626 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

627 qgraph = fwk.makeGraph(self.pipeline, args) 

628 

629 # retry with --extend-run 

630 args.extend_run = True 

631 qgraph = fwk.makeGraph(self.pipeline, args) 

632 

633 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

634 # Graph does not include quantum for first task 

635 self.assertEqual(len(qgraph), self.nQuanta - 1) 

636 

637 # run whole thing 

638 fwk.runPipeline(qgraph, taskFactory, args) 

639 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

640 

641 def testSimpleQGraphOutputsFail(self): 

642 """Test continuing execution of trivial quantum graph with partial 

643 outputs. 

644 """ 

645 args = _makeArgs(butler_config=self.root, input="test", output="output") 

646 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

647 populateButler(self.pipeline, butler) 

648 

649 fwk = CmdLineFwk() 

650 taskFactory = AddTaskFactoryMock(stopAt=3) 

651 

652 qgraph = fwk.makeGraph(self.pipeline, args) 

653 self.assertEqual(len(qgraph), self.nQuanta) 

654 

655 # run first three quanta 

656 with self.assertRaises(MPGraphExecutorError): 

657 fwk.runPipeline(qgraph, taskFactory, args) 

658 self.assertEqual(taskFactory.countExec, 3) 

659 

660 butler.registry.refresh() 

661 

662 # drop one of the two outputs from one task 

663 ref1 = butler.registry.findDataset( 

664 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

665 ) 

666 self.assertIsNotNone(ref1) 

667 # also drop the metadata output 

668 ref2 = butler.registry.findDataset( 

669 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

670 ) 

671 self.assertIsNotNone(ref2) 

672 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

673 

674 taskFactory.stopAt = -1 

675 args.skip_existing_in = (args.output,) 

676 args.extend_run = True 

677 args.no_versions = True 

678 with self.assertRaises(MPGraphExecutorError): 

679 fwk.runPipeline(qgraph, taskFactory, args) 

680 

681 def testSimpleQGraphClobberOutputs(self): 

682 """Test continuing execution of trivial quantum graph with 

683 --clobber-outputs. 

684 """ 

685 args = _makeArgs(butler_config=self.root, input="test", output="output") 

686 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

687 populateButler(self.pipeline, butler) 

688 

689 fwk = CmdLineFwk() 

690 taskFactory = AddTaskFactoryMock(stopAt=3) 

691 

692 qgraph = fwk.makeGraph(self.pipeline, args) 

693 

694 # should have one task and number of quanta 

695 self.assertEqual(len(qgraph), self.nQuanta) 

696 

697 # run first three quanta 

698 with self.assertRaises(MPGraphExecutorError): 

699 fwk.runPipeline(qgraph, taskFactory, args) 

700 self.assertEqual(taskFactory.countExec, 3) 

701 

702 butler.registry.refresh() 

703 

704 # drop one of the two outputs from one task 

705 ref1 = butler.registry.findDataset( 

706 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

707 ) 

708 self.assertIsNotNone(ref1) 

709 # also drop the metadata output 

710 ref2 = butler.registry.findDataset( 

711 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

712 ) 

713 self.assertIsNotNone(ref2) 

714 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

715 

716 taskFactory.stopAt = -1 

717 args.skip_existing = True 

718 args.extend_run = True 

719 args.clobber_outputs = True 

720 args.no_versions = True 

721 fwk.runPipeline(qgraph, taskFactory, args) 

722 # number of executed quanta is incremented 

723 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

724 

725 def testSimpleQGraphReplaceRun(self): 

726 """Test repeated execution of trivial quantum graph with 

727 --replace-run. 

728 """ 

729 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

730 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

731 populateButler(self.pipeline, butler) 

732 

733 fwk = CmdLineFwk() 

734 taskFactory = AddTaskFactoryMock() 

735 

736 qgraph = fwk.makeGraph(self.pipeline, args) 

737 

738 # should have one task and number of quanta 

739 self.assertEqual(len(qgraph), self.nQuanta) 

740 

741 # deep copy is needed because quanta are updated in place 

742 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

743 self.assertEqual(taskFactory.countExec, self.nQuanta) 

744 

745 # need to refresh collections explicitly (or make new butler/registry) 

746 butler.registry.refresh() 

747 collections = set(butler.registry.queryCollections(...)) 

748 self.assertEqual(collections, {"test", "output", "output/run1"}) 

749 

750 # number of datasets written by pipeline: 

751 # - nQuanta of init_outputs 

752 # - nQuanta of configs 

753 # - packages (single dataset) 

754 # - nQuanta * two output datasets 

755 # - nQuanta of metadata 

756 # - nQuanta of log output 

757 n_outputs = self.nQuanta * 6 + 1 

758 refs = butler.registry.queryDatasets(..., collections="output/run1") 

759 self.assertEqual(len(list(refs)), n_outputs) 

760 

761 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

762 # changed) 

763 args.replace_run = True 

764 args.output_run = "output/run2" 

765 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

766 

767 butler.registry.refresh() 

768 collections = set(butler.registry.queryCollections(...)) 

769 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

770 

771 # new output collection 

772 refs = butler.registry.queryDatasets(..., collections="output/run2") 

773 self.assertEqual(len(list(refs)), n_outputs) 

774 

775 # old output collection is still there 

776 refs = butler.registry.queryDatasets(..., collections="output/run1") 

777 self.assertEqual(len(list(refs)), n_outputs) 

778 

779 # re-run with --replace-run and --prune-replaced=unstore 

780 args.replace_run = True 

781 args.prune_replaced = "unstore" 

782 args.output_run = "output/run3" 

783 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

784 

785 butler.registry.refresh() 

786 collections = set(butler.registry.queryCollections(...)) 

787 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

788 

789 # new output collection 

790 refs = butler.registry.queryDatasets(..., collections="output/run3") 

791 self.assertEqual(len(list(refs)), n_outputs) 

792 

793 # old output collection is still there, and it has all datasets but 

794 # non-InitOutputs are not in datastore 

795 refs = butler.registry.queryDatasets(..., collections="output/run2") 

796 refs = list(refs) 

797 self.assertEqual(len(refs), n_outputs) 

798 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

799 for ref in refs: 

800 if initOutNameRe.fullmatch(ref.datasetType.name): 

801 butler.get(ref, collections="output/run2") 

802 else: 

803 with self.assertRaises(FileNotFoundError): 

804 butler.get(ref, collections="output/run2") 

805 

806 # re-run with --replace-run and --prune-replaced=purge 

807 # This time also remove --input; passing the same inputs that we 

808 # started with and not passing inputs at all should be equivalent. 

809 args.input = None 

810 args.replace_run = True 

811 args.prune_replaced = "purge" 

812 args.output_run = "output/run4" 

813 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

814 

815 butler.registry.refresh() 

816 collections = set(butler.registry.queryCollections(...)) 

817 # output/run3 should disappear now 

818 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

819 

820 # new output collection 

821 refs = butler.registry.queryDatasets(..., collections="output/run4") 

822 self.assertEqual(len(list(refs)), n_outputs) 

823 

824 # Trying to run again with inputs that aren't exactly what we started 

825 # with is an error, and the kind that should not modify the data repo. 

826 with self.assertRaises(ValueError): 

827 args.input = ["test", "output/run2"] 

828 args.prune_replaced = None 

829 args.replace_run = True 

830 args.output_run = "output/run5" 

831 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

832 butler.registry.refresh() 

833 collections = set(butler.registry.queryCollections(...)) 

834 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

835 with self.assertRaises(ValueError): 

836 args.input = ["output/run2", "test"] 

837 args.prune_replaced = None 

838 args.replace_run = True 

839 args.output_run = "output/run6" 

840 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

841 butler.registry.refresh() 

842 collections = set(butler.registry.queryCollections(...)) 

843 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

844 

845 def testMockTask(self): 

846 """Test --mock option.""" 

847 args = _makeArgs( 

848 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

849 ) 

850 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

851 populateButler(self.pipeline, butler) 

852 

853 fwk = CmdLineFwk() 

854 taskFactory = AddTaskFactoryMock() 

855 

856 qgraph = fwk.makeGraph(self.pipeline, args) 

857 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

858 self.assertEqual(len(qgraph), self.nQuanta) 

859 

860 # run whole thing 

861 fwk.runPipeline(qgraph, taskFactory, args) 

862 # None of the actual tasks is executed 

863 self.assertEqual(taskFactory.countExec, 0) 

864 

865 # check dataset types 

866 butler.registry.refresh() 

867 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

868 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

869 

870 def testMockTaskFailure(self): 

871 """Test --mock option and configure one of the tasks to fail.""" 

872 args = _makeArgs( 

873 butler_config=self.root, 

874 input="test", 

875 output="output", 

876 mock=True, 

877 register_dataset_types=True, 

878 mock_configs=[ 

879 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

880 ], 

881 fail_fast=True, 

882 ) 

883 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

884 populateButler(self.pipeline, butler) 

885 

886 fwk = CmdLineFwk() 

887 taskFactory = AddTaskFactoryMock() 

888 

889 qgraph = fwk.makeGraph(self.pipeline, args) 

890 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

891 self.assertEqual(len(qgraph), self.nQuanta) 

892 

893 with self.assertRaises(MPGraphExecutorError) as cm: 

894 fwk.runPipeline(qgraph, taskFactory, args) 

895 

896 self.assertIsNotNone(cm.exception.__cause__) 

897 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

898 

899 def testSubgraph(self): 

900 """Test successful execution of trivial quantum graph.""" 

901 args = _makeArgs(butler_config=self.root, input="test", output="output") 

902 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

903 populateButler(self.pipeline, butler) 

904 

905 fwk = CmdLineFwk() 

906 qgraph = fwk.makeGraph(self.pipeline, args) 

907 

908 # Select first two nodes for execution. This depends on node ordering 

909 # which I assume is the same as execution order. 

910 nNodes = 2 

911 nodeIds = [node.nodeId for node in qgraph] 

912 nodeIds = nodeIds[:nNodes] 

913 

914 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

915 self.assertEqual(len(qgraph), self.nQuanta) 

916 

917 with ( 

918 makeTmpFile(suffix=".qgraph") as tmpname, 

919 makeSQLiteRegistry(universe=butler.registry.dimensions) as registryConfig, 

920 ): 

921 with open(tmpname, "wb") as saveFile: 

922 qgraph.save(saveFile) 

923 

924 args = _makeArgs( 

925 qgraph=tmpname, 

926 qgraph_node_id=nodeIds, 

927 registryConfig=registryConfig, 

928 execution_butler_location=None, 

929 ) 

930 fwk = CmdLineFwk() 

931 

932 # load graph, should only read a subset 

933 qgraph = fwk.makeGraph(pipeline=None, args=args) 

934 self.assertEqual(len(qgraph), nNodes) 

935 

936 def testShowGraph(self): 

937 """Test for --show options for quantum graph.""" 

938 nQuanta = 2 

939 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

940 

941 show = ShowInfo(["graph"]) 

942 show.show_graph_info(qgraph) 

943 self.assertEqual(show.handled, {"graph"}) 

944 

945 def testShowGraphWorkflow(self): 

946 nQuanta = 2 

947 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

948 

949 show = ShowInfo(["workflow"]) 

950 show.show_graph_info(qgraph) 

951 self.assertEqual(show.handled, {"workflow"}) 

952 

953 # TODO: cannot test "uri" option presently, it instantiates 

954 # butler from command line options and there is no way to pass butler 

955 # mock to that code. 

956 show = ShowInfo(["uri"]) 

957 with self.assertRaises(ValueError): # No args given 

958 show.show_graph_info(qgraph) 

959 

960 def testSimpleQGraphDatastoreRecords(self): 

961 """Test quantum graph generation with --qgraph-datastore-records.""" 

962 args = _makeArgs( 

963 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

964 ) 

965 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

966 populateButler(self.pipeline, butler) 

967 

968 fwk = CmdLineFwk() 

969 qgraph = fwk.makeGraph(self.pipeline, args) 

970 self.assertEqual(len(qgraph), self.nQuanta) 

971 for i, qnode in enumerate(qgraph): 

972 quantum = qnode.quantum 

973 self.assertIsNotNone(quantum.datastore_records) 

974 # only the first quantum has a pre-existing input 

975 if i == 0: 

976 datastore_name = "FileDatastore@<butlerRoot>" 

977 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

978 records_data = quantum.datastore_records[datastore_name] 

979 records = dict(records_data.records) 

980 self.assertEqual(len(records), 1) 

981 _, records = records.popitem() 

982 records = records["file_datastore_records"] 

983 self.assertEqual( 

984 [record.path for record in records], 

985 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

986 ) 

987 else: 

988 self.assertEqual(quantum.datastore_records, {}) 

989 

990 

991class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

992 pass 

993 

994 

995def setup_module(module): 

996 lsst.utils.tests.init() 

997 

998 

999if __name__ == "__main__": 999 ↛ 1000line 999 didn't jump to line 1000, because the condition on line 999 was never true

1000 lsst.utils.tests.init() 

1001 unittest.main()