Coverage for tests/test_cmdLineFwk.py: 17%

439 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-23 02:34 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27import logging 

28import os 

29import pickle 

30import re 

31import shutil 

32import tempfile 

33import unittest 

34from dataclasses import dataclass 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.daf.butler import ( 

52 Config, 

53 DataCoordinate, 

54 DatasetRef, 

55 DimensionConfig, 

56 DimensionUniverse, 

57 Quantum, 

58 Registry, 

59) 

60from lsst.daf.butler.core.datasets.type import DatasetType 

61from lsst.daf.butler.registry import RegistryConfig 

62from lsst.pipe.base import ( 

63 Instrument, 

64 Pipeline, 

65 PipelineTaskConfig, 

66 PipelineTaskConnections, 

67 QuantumGraph, 

68 TaskDef, 

69) 

70from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

71from lsst.pipe.base.tests.simpleQGraph import ( 

72 AddTask, 

73 AddTaskFactoryMock, 

74 makeSimpleButler, 

75 makeSimplePipeline, 

76 makeSimpleQGraph, 

77 populateButler, 

78) 

79from lsst.utils.tests import temporaryDirectory 

80 

81logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

82 

83# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

84# instrument from registry, these tests can run fine without actual instrument 

85# and implementing full mock for Instrument is too complicated. 

86Instrument.fromName = lambda name, reg: None 86 ↛ exitline 86 didn't run the lambda on line 86

87 

88 

89@contextlib.contextmanager 

90def makeTmpFile(contents=None, suffix=None): 

91 """Context manager for generating temporary file name. 

92 

93 Temporary file is deleted on exiting context. 

94 

95 Parameters 

96 ---------- 

97 contents : `bytes` 

98 Data to write into a file. 

99 """ 

100 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

101 if contents: 

102 os.write(fd, contents) 

103 os.close(fd) 

104 yield tmpname 

105 with contextlib.suppress(OSError): 

106 os.remove(tmpname) 

107 

108 

109@contextlib.contextmanager 

110def makeSQLiteRegistry(create=True, universe=None): 

111 """Context manager to create new empty registry database. 

112 

113 Yields 

114 ------ 

115 config : `RegistryConfig` 

116 Registry configuration for initialized registry database. 

117 """ 

118 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

119 with temporaryDirectory() as tmpdir: 

120 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

121 config = RegistryConfig() 

122 config["db"] = uri 

123 if create: 

124 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

125 yield config 

126 

127 

128class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

129 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

130 

131 

132class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

133 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

134 

135 def setDefaults(self): 

136 PipelineTaskConfig.setDefaults(self) 

137 

138 

139def _makeArgs(registryConfig=None, **kwargs): 

140 """Return parsed command line arguments. 

141 

142 By default butler_config is set to `Config` populated with some defaults, 

143 it can be overridden completely by keyword argument. 

144 

145 Parameters 

146 ---------- 

147 cmd : `str`, optional 

148 Produce arguments for this pipetask command. 

149 registryConfig : `RegistryConfig`, optional 

150 Override for registry configuration. 

151 **kwargs 

152 Overrides for other arguments. 

153 """ 

154 # Use a mock to get the default value of arguments to 'run'. 

155 

156 mock = unittest.mock.Mock() 

157 

158 @click.command(cls=PipetaskCommand) 

159 @run_options() 

160 def fake_run(ctx, **kwargs): 

161 """Fake "pipetask run" command for gathering input arguments. 

162 

163 The arguments & options should always match the arguments & options in 

164 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

165 """ 

166 mock(**kwargs) 

167 

168 runner = click.testing.CliRunner() 

169 # --butler-config is the only required option 

170 result = runner.invoke(fake_run, "--butler-config /") 

171 if result.exit_code != 0: 

172 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

173 mock.assert_called_once() 

174 args = mock.call_args[1] 

175 args["enableLsstDebug"] = args.pop("debug") 

176 args["execution_butler_location"] = args.pop("save_execution_butler") 

177 if "pipeline_actions" not in args: 

178 args["pipeline_actions"] = [] 

179 if "mock_configs" not in args: 

180 args["mock_configs"] = [] 

181 args = SimpleNamespace(**args) 

182 

183 # override butler_config with our defaults 

184 if "butler_config" not in kwargs: 

185 args.butler_config = Config() 

186 if registryConfig: 

187 args.butler_config["registry"] = registryConfig 

188 # The default datastore has a relocatable root, so we need to specify 

189 # some root here for it to use 

190 args.butler_config.configFile = "." 

191 

192 # override arguments from keyword parameters 

193 for key, value in kwargs.items(): 

194 setattr(args, key, value) 

195 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

196 return args 

197 

198 

199class FakeDSType(NamedTuple): 

200 name: str 

201 

202 

203@dataclass(frozen=True) 

204class FakeDSRef: 

205 datasetType: str 

206 dataId: tuple 

207 

208 def isComponent(self): 

209 return False 

210 

211 

212# Task class name used by tests, needs to be importable 

213_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

214 

215 

216def _makeDimensionConfig(): 

217 """Make a simple dimension universe configuration.""" 

218 return DimensionConfig( 

219 { 

220 "version": 1, 

221 "namespace": "ctrl_mpexec_test", 

222 "skypix": { 

223 "common": "htm7", 

224 "htm": { 

225 "class": "lsst.sphgeom.HtmPixelization", 

226 "max_level": 24, 

227 }, 

228 }, 

229 "elements": { 

230 "A": { 

231 "keys": [ 

232 { 

233 "name": "id", 

234 "type": "int", 

235 } 

236 ], 

237 "storage": { 

238 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

239 }, 

240 }, 

241 "B": { 

242 "keys": [ 

243 { 

244 "name": "id", 

245 "type": "int", 

246 } 

247 ], 

248 "storage": { 

249 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

250 }, 

251 }, 

252 }, 

253 "packers": {}, 

254 } 

255 ) 

256 

257 

258def _makeQGraph(): 

259 """Make a trivial QuantumGraph with one quantum. 

260 

261 The only thing that we need to do with this quantum graph is to pickle 

262 it, the quanta in this graph are not usable for anything else. 

263 

264 Returns 

265 ------- 

266 qgraph : `~lsst.pipe.base.QuantumGraph` 

267 """ 

268 universe = DimensionUniverse(config=_makeDimensionConfig()) 

269 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

270 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

271 quanta = [ 

272 Quantum( 

273 taskName=_TASK_CLASS, 

274 inputs={ 

275 fakeDSType: [ 

276 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

277 ] 

278 }, 

279 ) 

280 ] # type: ignore 

281 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

282 return qgraph 

283 

284 

285class CmdLineFwkTestCase(unittest.TestCase): 

286 """A test case for CmdLineFwk""" 

287 

288 def testMakePipeline(self): 

289 """Tests for CmdLineFwk.makePipeline method""" 

290 fwk = CmdLineFwk() 

291 

292 # make empty pipeline 

293 args = _makeArgs() 

294 pipeline = fwk.makePipeline(args) 

295 self.assertIsInstance(pipeline, Pipeline) 

296 self.assertEqual(len(pipeline), 0) 

297 

298 # few tests with serialization 

299 with makeTmpFile() as tmpname: 

300 # make empty pipeline and store it in a file 

301 args = _makeArgs(save_pipeline=tmpname) 

302 pipeline = fwk.makePipeline(args) 

303 self.assertIsInstance(pipeline, Pipeline) 

304 

305 # read pipeline from a file 

306 args = _makeArgs(pipeline=tmpname) 

307 pipeline = fwk.makePipeline(args) 

308 self.assertIsInstance(pipeline, Pipeline) 

309 self.assertEqual(len(pipeline), 0) 

310 

311 # single task pipeline, task name can be anything here 

312 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

313 args = _makeArgs(pipeline_actions=actions) 

314 pipeline = fwk.makePipeline(args) 

315 self.assertIsInstance(pipeline, Pipeline) 

316 self.assertEqual(len(pipeline), 1) 

317 

318 # many task pipeline 

319 actions = [ 

320 _ACTION_ADD_TASK("TaskOne:task1a"), 

321 _ACTION_ADD_TASK("TaskTwo:task2"), 

322 _ACTION_ADD_TASK("TaskOne:task1b"), 

323 ] 

324 args = _makeArgs(pipeline_actions=actions) 

325 pipeline = fwk.makePipeline(args) 

326 self.assertIsInstance(pipeline, Pipeline) 

327 self.assertEqual(len(pipeline), 3) 

328 

329 # single task pipeline with config overrides, need real task class 

330 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

331 args = _makeArgs(pipeline_actions=actions) 

332 pipeline = fwk.makePipeline(args) 

333 taskDefs = list(pipeline.toExpandedPipeline()) 

334 self.assertEqual(len(taskDefs), 1) 

335 self.assertEqual(taskDefs[0].config.addend, 100) 

336 

337 overrides = b"config.addend = 1000\n" 

338 with makeTmpFile(overrides) as tmpname: 

339 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

340 args = _makeArgs(pipeline_actions=actions) 

341 pipeline = fwk.makePipeline(args) 

342 taskDefs = list(pipeline.toExpandedPipeline()) 

343 self.assertEqual(len(taskDefs), 1) 

344 self.assertEqual(taskDefs[0].config.addend, 1000) 

345 

346 # Check --instrument option, for now it only checks that it does not 

347 # crash. 

348 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

349 args = _makeArgs(pipeline_actions=actions) 

350 pipeline = fwk.makePipeline(args) 

351 

352 def testMakeGraphFromSave(self): 

353 """Tests for CmdLineFwk.makeGraph method. 

354 

355 Only most trivial case is tested that does not do actual graph 

356 building. 

357 """ 

358 fwk = CmdLineFwk() 

359 

360 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

361 

362 # make non-empty graph and store it in a file 

363 qgraph = _makeQGraph() 

364 with open(tmpname, "wb") as saveFile: 

365 qgraph.save(saveFile) 

366 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

367 qgraph = fwk.makeGraph(None, args) 

368 self.assertIsInstance(qgraph, QuantumGraph) 

369 self.assertEqual(len(qgraph), 1) 

370 

371 # will fail if graph id does not match 

372 args = _makeArgs( 

373 qgraph=tmpname, 

374 qgraph_id="R2-D2 is that you?", 

375 registryConfig=registryConfig, 

376 execution_butler_location=None, 

377 ) 

378 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

379 fwk.makeGraph(None, args) 

380 

381 # save with wrong object type 

382 with open(tmpname, "wb") as saveFile: 

383 pickle.dump({}, saveFile) 

384 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

385 with self.assertRaises(ValueError): 

386 fwk.makeGraph(None, args) 

387 

388 # reading empty graph from pickle should work but makeGraph() 

389 # will return None and make a warning 

390 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

391 with open(tmpname, "wb") as saveFile: 

392 qgraph.save(saveFile) 

393 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

394 with self.assertWarnsRegex(UserWarning, "QuantumGraph is empty"): 

395 # this also tests that warning is generated for empty graph 

396 qgraph = fwk.makeGraph(None, args) 

397 self.assertIs(qgraph, None) 

398 

399 def testShowPipeline(self): 

400 """Test for --show options for pipeline.""" 

401 fwk = CmdLineFwk() 

402 

403 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

404 args = _makeArgs(pipeline_actions=actions) 

405 pipeline = fwk.makePipeline(args) 

406 

407 args.show = ["pipeline"] 

408 fwk.showInfo(args, pipeline) 

409 args.show = ["config"] 

410 fwk.showInfo(args, pipeline) 

411 args.show = ["history=task::addend"] 

412 fwk.showInfo(args, pipeline) 

413 args.show = ["tasks"] 

414 fwk.showInfo(args, pipeline) 

415 

416 

417class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

418 """A test case for CmdLineFwk""" 

419 

420 def setUp(self): 

421 super().setUpClass() 

422 self.root = tempfile.mkdtemp() 

423 self.nQuanta = 5 

424 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

425 

426 def tearDown(self): 

427 shutil.rmtree(self.root, ignore_errors=True) 

428 super().tearDownClass() 

429 

430 def testSimpleQGraph(self): 

431 """Test successfull execution of trivial quantum graph.""" 

432 args = _makeArgs(butler_config=self.root, input="test", output="output") 

433 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

434 populateButler(self.pipeline, butler) 

435 

436 fwk = CmdLineFwk() 

437 taskFactory = AddTaskFactoryMock() 

438 

439 qgraph = fwk.makeGraph(self.pipeline, args) 

440 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

441 self.assertEqual(len(qgraph), self.nQuanta) 

442 

443 # run whole thing 

444 fwk.runPipeline(qgraph, taskFactory, args) 

445 self.assertEqual(taskFactory.countExec, self.nQuanta) 

446 

447 def testSimpleQGraphNoSkipExisting_inputs(self): 

448 """Test for case when output data for one task already appears in 

449 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

450 option is present. 

451 """ 

452 args = _makeArgs( 

453 butler_config=self.root, 

454 input="test", 

455 output="output", 

456 ) 

457 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

458 populateButler( 

459 self.pipeline, 

460 butler, 

461 datasetTypes={ 

462 args.input: [ 

463 "add_dataset0", 

464 "add_dataset1", 

465 "add2_dataset1", 

466 "add_init_output1", 

467 "task0_config", 

468 "task0_metadata", 

469 "task0_log", 

470 ] 

471 }, 

472 ) 

473 

474 fwk = CmdLineFwk() 

475 taskFactory = AddTaskFactoryMock() 

476 

477 qgraph = fwk.makeGraph(self.pipeline, args) 

478 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

479 # With current implementation graph has all nQuanta quanta, but when 

480 # executing one quantum is skipped. 

481 self.assertEqual(len(qgraph), self.nQuanta) 

482 

483 # run whole thing 

484 fwk.runPipeline(qgraph, taskFactory, args) 

485 self.assertEqual(taskFactory.countExec, self.nQuanta) 

486 

487 def testSimpleQGraphSkipExisting_inputs(self): 

488 """Test for ``--skip-existing`` with output data for one task already 

489 appears in _input_ collection. No ``--extend-run`` option is needed 

490 for this case. 

491 """ 

492 args = _makeArgs( 

493 butler_config=self.root, 

494 input="test", 

495 output="output", 

496 skip_existing_in=("test",), 

497 ) 

498 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

499 populateButler( 

500 self.pipeline, 

501 butler, 

502 datasetTypes={ 

503 args.input: [ 

504 "add_dataset0", 

505 "add_dataset1", 

506 "add2_dataset1", 

507 "add_init_output1", 

508 "task0_config", 

509 "task0_metadata", 

510 "task0_log", 

511 ] 

512 }, 

513 ) 

514 

515 fwk = CmdLineFwk() 

516 taskFactory = AddTaskFactoryMock() 

517 

518 qgraph = fwk.makeGraph(self.pipeline, args) 

519 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

520 self.assertEqual(len(qgraph), self.nQuanta - 1) 

521 

522 # run whole thing 

523 fwk.runPipeline(qgraph, taskFactory, args) 

524 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

525 

526 def testSimpleQGraphSkipExisting_outputs(self): 

527 """Test for ``--skip-existing`` with output data for one task already 

528 appears in _output_ collection. The ``--extend-run`` option is needed 

529 for this case. 

530 """ 

531 args = _makeArgs( 

532 butler_config=self.root, 

533 input="test", 

534 output_run="output/run", 

535 skip_existing_in=("output/run",), 

536 ) 

537 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

538 populateButler( 

539 self.pipeline, 

540 butler, 

541 datasetTypes={ 

542 args.input: ["add_dataset0"], 

543 args.output_run: [ 

544 "add_dataset1", 

545 "add2_dataset1", 

546 "add_init_output1", 

547 "task0_metadata", 

548 "task0_log", 

549 ], 

550 }, 

551 ) 

552 

553 fwk = CmdLineFwk() 

554 taskFactory = AddTaskFactoryMock() 

555 

556 # fails without --extend-run 

557 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

558 qgraph = fwk.makeGraph(self.pipeline, args) 

559 

560 # retry with --extend-run 

561 args.extend_run = True 

562 qgraph = fwk.makeGraph(self.pipeline, args) 

563 

564 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

565 # Graph does not include quantum for first task 

566 self.assertEqual(len(qgraph), self.nQuanta - 1) 

567 

568 # run whole thing 

569 fwk.runPipeline(qgraph, taskFactory, args) 

570 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

571 

572 def testSimpleQGraphOutputsFail(self): 

573 """Test continuing execution of trivial quantum graph with partial 

574 outputs. 

575 """ 

576 args = _makeArgs(butler_config=self.root, input="test", output="output") 

577 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

578 populateButler(self.pipeline, butler) 

579 

580 fwk = CmdLineFwk() 

581 taskFactory = AddTaskFactoryMock(stopAt=3) 

582 

583 qgraph = fwk.makeGraph(self.pipeline, args) 

584 self.assertEqual(len(qgraph), self.nQuanta) 

585 

586 # run first three quanta 

587 with self.assertRaises(MPGraphExecutorError): 

588 fwk.runPipeline(qgraph, taskFactory, args) 

589 self.assertEqual(taskFactory.countExec, 3) 

590 

591 butler.registry.refresh() 

592 

593 # drop one of the two outputs from one task 

594 ref1 = butler.registry.findDataset( 

595 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

596 ) 

597 self.assertIsNotNone(ref1) 

598 # also drop the metadata output 

599 ref2 = butler.registry.findDataset( 

600 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

601 ) 

602 self.assertIsNotNone(ref2) 

603 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

604 

605 taskFactory.stopAt = -1 

606 args.skip_existing_in = (args.output,) 

607 args.extend_run = True 

608 args.no_versions = True 

609 with self.assertRaises(MPGraphExecutorError): 

610 fwk.runPipeline(qgraph, taskFactory, args) 

611 

612 def testSimpleQGraphClobberOutputs(self): 

613 """Test continuing execution of trivial quantum graph with 

614 --clobber-outputs. 

615 """ 

616 args = _makeArgs(butler_config=self.root, input="test", output="output") 

617 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

618 populateButler(self.pipeline, butler) 

619 

620 fwk = CmdLineFwk() 

621 taskFactory = AddTaskFactoryMock(stopAt=3) 

622 

623 qgraph = fwk.makeGraph(self.pipeline, args) 

624 

625 # should have one task and number of quanta 

626 self.assertEqual(len(qgraph), self.nQuanta) 

627 

628 # run first three quanta 

629 with self.assertRaises(MPGraphExecutorError): 

630 fwk.runPipeline(qgraph, taskFactory, args) 

631 self.assertEqual(taskFactory.countExec, 3) 

632 

633 butler.registry.refresh() 

634 

635 # drop one of the two outputs from one task 

636 ref1 = butler.registry.findDataset( 

637 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

638 ) 

639 self.assertIsNotNone(ref1) 

640 # also drop the metadata output 

641 ref2 = butler.registry.findDataset( 

642 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

643 ) 

644 self.assertIsNotNone(ref2) 

645 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

646 

647 taskFactory.stopAt = -1 

648 args.skip_existing = True 

649 args.extend_run = True 

650 args.clobber_outputs = True 

651 args.no_versions = True 

652 fwk.runPipeline(qgraph, taskFactory, args) 

653 # number of executed quanta is incremented 

654 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

655 

656 def testSimpleQGraphReplaceRun(self): 

657 """Test repeated execution of trivial quantum graph with 

658 --replace-run. 

659 """ 

660 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

661 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

662 populateButler(self.pipeline, butler) 

663 

664 fwk = CmdLineFwk() 

665 taskFactory = AddTaskFactoryMock() 

666 

667 qgraph = fwk.makeGraph(self.pipeline, args) 

668 

669 # should have one task and number of quanta 

670 self.assertEqual(len(qgraph), self.nQuanta) 

671 

672 # deep copy is needed because quanta are updated in place 

673 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

674 self.assertEqual(taskFactory.countExec, self.nQuanta) 

675 

676 # need to refresh collections explicitly (or make new butler/registry) 

677 butler.registry.refresh() 

678 collections = set(butler.registry.queryCollections(...)) 

679 self.assertEqual(collections, {"test", "output", "output/run1"}) 

680 

681 # number of datasets written by pipeline: 

682 # - nQuanta of init_outputs 

683 # - nQuanta of configs 

684 # - packages (single dataset) 

685 # - nQuanta * two output datasets 

686 # - nQuanta of metadata 

687 # - nQuanta of log output 

688 n_outputs = self.nQuanta * 6 + 1 

689 refs = butler.registry.queryDatasets(..., collections="output/run1") 

690 self.assertEqual(len(list(refs)), n_outputs) 

691 

692 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

693 # changed) 

694 args.replace_run = True 

695 args.output_run = "output/run2" 

696 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

697 

698 butler.registry.refresh() 

699 collections = set(butler.registry.queryCollections(...)) 

700 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

701 

702 # new output collection 

703 refs = butler.registry.queryDatasets(..., collections="output/run2") 

704 self.assertEqual(len(list(refs)), n_outputs) 

705 

706 # old output collection is still there 

707 refs = butler.registry.queryDatasets(..., collections="output/run1") 

708 self.assertEqual(len(list(refs)), n_outputs) 

709 

710 # re-run with --replace-run and --prune-replaced=unstore 

711 args.replace_run = True 

712 args.prune_replaced = "unstore" 

713 args.output_run = "output/run3" 

714 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

715 

716 butler.registry.refresh() 

717 collections = set(butler.registry.queryCollections(...)) 

718 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

719 

720 # new output collection 

721 refs = butler.registry.queryDatasets(..., collections="output/run3") 

722 self.assertEqual(len(list(refs)), n_outputs) 

723 

724 # old output collection is still there, and it has all datasets but 

725 # non-InitOutputs are not in datastore 

726 refs = butler.registry.queryDatasets(..., collections="output/run2") 

727 refs = list(refs) 

728 self.assertEqual(len(refs), n_outputs) 

729 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

730 for ref in refs: 

731 if initOutNameRe.fullmatch(ref.datasetType.name): 

732 butler.get(ref, collections="output/run2") 

733 else: 

734 with self.assertRaises(FileNotFoundError): 

735 butler.get(ref, collections="output/run2") 

736 

737 # re-run with --replace-run and --prune-replaced=purge 

738 # This time also remove --input; passing the same inputs that we 

739 # started with and not passing inputs at all should be equivalent. 

740 args.input = None 

741 args.replace_run = True 

742 args.prune_replaced = "purge" 

743 args.output_run = "output/run4" 

744 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

745 

746 butler.registry.refresh() 

747 collections = set(butler.registry.queryCollections(...)) 

748 # output/run3 should disappear now 

749 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

750 

751 # new output collection 

752 refs = butler.registry.queryDatasets(..., collections="output/run4") 

753 self.assertEqual(len(list(refs)), n_outputs) 

754 

755 # Trying to run again with inputs that aren't exactly what we started 

756 # with is an error, and the kind that should not modify the data repo. 

757 with self.assertRaises(ValueError): 

758 args.input = ["test", "output/run2"] 

759 args.prune_replaced = None 

760 args.replace_run = True 

761 args.output_run = "output/run5" 

762 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

763 butler.registry.refresh() 

764 collections = set(butler.registry.queryCollections(...)) 

765 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

766 with self.assertRaises(ValueError): 

767 args.input = ["output/run2", "test"] 

768 args.prune_replaced = None 

769 args.replace_run = True 

770 args.output_run = "output/run6" 

771 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

772 butler.registry.refresh() 

773 collections = set(butler.registry.queryCollections(...)) 

774 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

775 

776 def testMockTask(self): 

777 """Test --mock option.""" 

778 args = _makeArgs( 

779 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

780 ) 

781 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

782 populateButler(self.pipeline, butler) 

783 

784 fwk = CmdLineFwk() 

785 taskFactory = AddTaskFactoryMock() 

786 

787 qgraph = fwk.makeGraph(self.pipeline, args) 

788 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

789 self.assertEqual(len(qgraph), self.nQuanta) 

790 

791 # run whole thing 

792 fwk.runPipeline(qgraph, taskFactory, args) 

793 # None of the actual tasks is executed 

794 self.assertEqual(taskFactory.countExec, 0) 

795 

796 # check dataset types 

797 butler.registry.refresh() 

798 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

799 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

800 

801 def testMockTaskFailure(self): 

802 """Test --mock option and configure one of the tasks to fail.""" 

803 args = _makeArgs( 

804 butler_config=self.root, 

805 input="test", 

806 output="output", 

807 mock=True, 

808 register_dataset_types=True, 

809 mock_configs=[ 

810 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

811 ], 

812 fail_fast=True, 

813 ) 

814 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

815 populateButler(self.pipeline, butler) 

816 

817 fwk = CmdLineFwk() 

818 taskFactory = AddTaskFactoryMock() 

819 

820 qgraph = fwk.makeGraph(self.pipeline, args) 

821 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

822 self.assertEqual(len(qgraph), self.nQuanta) 

823 

824 with self.assertRaises(MPGraphExecutorError) as cm: 

825 fwk.runPipeline(qgraph, taskFactory, args) 

826 

827 self.assertIsNotNone(cm.exception.__cause__) 

828 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

829 

830 def testSubgraph(self): 

831 """Test successful execution of trivial quantum graph.""" 

832 args = _makeArgs(butler_config=self.root, input="test", output="output") 

833 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

834 populateButler(self.pipeline, butler) 

835 

836 fwk = CmdLineFwk() 

837 qgraph = fwk.makeGraph(self.pipeline, args) 

838 

839 # Select first two nodes for execution. This depends on node ordering 

840 # which I assume is the same as execution order. 

841 nNodes = 2 

842 nodeIds = [node.nodeId for node in qgraph] 

843 nodeIds = nodeIds[:nNodes] 

844 

845 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

846 self.assertEqual(len(qgraph), self.nQuanta) 

847 

848 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry( 

849 universe=butler.registry.dimensions 

850 ) as registryConfig: 

851 with open(tmpname, "wb") as saveFile: 

852 qgraph.save(saveFile) 

853 

854 args = _makeArgs( 

855 qgraph=tmpname, 

856 qgraph_node_id=nodeIds, 

857 registryConfig=registryConfig, 

858 execution_butler_location=None, 

859 ) 

860 fwk = CmdLineFwk() 

861 

862 # load graph, should only read a subset 

863 qgraph = fwk.makeGraph(pipeline=None, args=args) 

864 self.assertEqual(len(qgraph), nNodes) 

865 

866 def testShowGraph(self): 

867 """Test for --show options for quantum graph.""" 

868 fwk = CmdLineFwk() 

869 

870 nQuanta = 2 

871 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

872 

873 args = _makeArgs(show=["graph"]) 

874 fwk.showInfo(args, pipeline=None, graph=qgraph) 

875 

876 def testShowGraphWorkflow(self): 

877 fwk = CmdLineFwk() 

878 

879 nQuanta = 2 

880 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

881 

882 args = _makeArgs(show=["workflow"]) 

883 fwk.showInfo(args, pipeline=None, graph=qgraph) 

884 

885 # TODO: cannot test "uri" option presently, it instanciates 

886 # butler from command line options and there is no way to pass butler 

887 # mock to that code. 

888 

889 def testSimpleQGraphDatastoreRecords(self): 

890 """Test quantum graph generation with --qgraph-datastore-records.""" 

891 args = _makeArgs( 

892 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

893 ) 

894 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

895 populateButler(self.pipeline, butler) 

896 

897 fwk = CmdLineFwk() 

898 qgraph = fwk.makeGraph(self.pipeline, args) 

899 self.assertEqual(len(qgraph), self.nQuanta) 

900 for i, qnode in enumerate(qgraph): 

901 quantum = qnode.quantum 

902 self.assertIsNotNone(quantum.datastore_records) 

903 # only the first quantum has a pre-existing input 

904 if i == 0: 

905 datastore_name = "FileDatastore@<butlerRoot>" 

906 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

907 records_data = quantum.datastore_records[datastore_name] 

908 records = dict(records_data.records) 

909 self.assertEqual(len(records), 1) 

910 _, records = records.popitem() 

911 records = records["file_datastore_records"] 

912 self.assertEqual( 

913 [record.path for record in records], 

914 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

915 ) 

916 else: 

917 self.assertEqual(quantum.datastore_records, {}) 

918 

919 

920class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

921 pass 

922 

923 

924def setup_module(module): 

925 lsst.utils.tests.init() 

926 

927 

928if __name__ == "__main__": 928 ↛ 929line 928 didn't jump to line 929, because the condition on line 928 was never true

929 lsst.utils.tests.init() 

930 unittest.main()