Coverage for tests/test_cmdLineFwk.py: 20%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

389 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27import logging 

28import os 

29import pickle 

30import re 

31import shutil 

32import tempfile 

33import unittest 

34from dataclasses import dataclass 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.daf.butler import Config, DataCoordinate, DatasetRef, DimensionUniverse, Quantum, Registry 

52from lsst.daf.butler.core.datasets.type import DatasetType 

53from lsst.daf.butler.registry import RegistryConfig 

54from lsst.obs.base import Instrument 

55from lsst.pipe.base import Pipeline, PipelineTaskConfig, PipelineTaskConnections, QuantumGraph, TaskDef 

56from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

57from lsst.pipe.base.tests.simpleQGraph import ( 

58 AddTask, 

59 AddTaskFactoryMock, 

60 makeSimpleButler, 

61 makeSimplePipeline, 

62 makeSimpleQGraph, 

63 populateButler, 

64) 

65from lsst.utils.tests import temporaryDirectory 

66 

67logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

68 

69# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

70# instrument from registry, these tests can run fine without actual instrument 

71# and implementing full mock for Instrument is too complicated. 

72Instrument.fromName = lambda name, reg: None 72 ↛ exitline 72 didn't run the lambda on line 72

73 

74 

75@contextlib.contextmanager 

76def makeTmpFile(contents=None, suffix=None): 

77 """Context manager for generating temporary file name. 

78 

79 Temporary file is deleted on exiting context. 

80 

81 Parameters 

82 ---------- 

83 contents : `bytes` 

84 Data to write into a file. 

85 """ 

86 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

87 if contents: 

88 os.write(fd, contents) 

89 os.close(fd) 

90 yield tmpname 

91 with contextlib.suppress(OSError): 

92 os.remove(tmpname) 

93 

94 

95@contextlib.contextmanager 

96def makeSQLiteRegistry(create=True): 

97 """Context manager to create new empty registry database. 

98 

99 Yields 

100 ------ 

101 config : `RegistryConfig` 

102 Registry configuration for initialized registry database. 

103 """ 

104 with temporaryDirectory() as tmpdir: 

105 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

106 config = RegistryConfig() 

107 config["db"] = uri 

108 if create: 

109 Registry.createFromConfig(config) 

110 yield config 

111 

112 

113class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

114 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

115 

116 

117class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

118 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

119 

120 def setDefaults(self): 

121 PipelineTaskConfig.setDefaults(self) 

122 

123 

124def _makeArgs(registryConfig=None, **kwargs): 

125 """Return parsed command line arguments. 

126 

127 By default butler_config is set to `Config` populated with some defaults, 

128 it can be overridden completely by keyword argument. 

129 

130 Parameters 

131 ---------- 

132 cmd : `str`, optional 

133 Produce arguments for this pipetask command. 

134 registryConfig : `RegistryConfig`, optional 

135 Override for registry configuration. 

136 **kwargs 

137 Overrides for other arguments. 

138 """ 

139 # Use a mock to get the default value of arguments to 'run'. 

140 

141 mock = unittest.mock.Mock() 

142 

143 @click.command(cls=PipetaskCommand) 

144 @run_options() 

145 def fake_run(ctx, **kwargs): 

146 """Fake "pipetask run" command for gathering input arguments. 

147 

148 The arguments & options should always match the arguments & options in 

149 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

150 """ 

151 mock(**kwargs) 

152 

153 runner = click.testing.CliRunner() 

154 # --butler-config is the only required option 

155 result = runner.invoke(fake_run, "--butler-config /") 

156 if result.exit_code != 0: 

157 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

158 mock.assert_called_once() 

159 args = mock.call_args[1] 

160 args["enableLsstDebug"] = args.pop("debug") 

161 args["execution_butler_location"] = args.pop("save_execution_butler") 

162 if "pipeline_actions" not in args: 

163 args["pipeline_actions"] = [] 

164 args = SimpleNamespace(**args) 

165 

166 # override butler_config with our defaults 

167 if "butler_config" not in kwargs: 

168 args.butler_config = Config() 

169 if registryConfig: 

170 args.butler_config["registry"] = registryConfig 

171 # The default datastore has a relocatable root, so we need to specify 

172 # some root here for it to use 

173 args.butler_config.configFile = "." 

174 

175 # override arguments from keyword parameters 

176 for key, value in kwargs.items(): 

177 setattr(args, key, value) 

178 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

179 return args 

180 

181 

182class FakeDSType(NamedTuple): 

183 name: str 

184 

185 

186@dataclass(frozen=True) 

187class FakeDSRef: 

188 datasetType: str 

189 dataId: tuple 

190 

191 def isComponent(self): 

192 return False 

193 

194 

195# Task class name used by tests, needs to be importable 

196_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

197 

198 

199def _makeQGraph(): 

200 """Make a trivial QuantumGraph with one quantum. 

201 

202 The only thing that we need to do with this quantum graph is to pickle 

203 it, the quanta in this graph are not usable for anything else. 

204 

205 Returns 

206 ------- 

207 qgraph : `~lsst.pipe.base.QuantumGraph` 

208 """ 

209 config = Config( 

210 { 

211 "version": 1, 

212 "skypix": { 

213 "common": "htm7", 

214 "htm": { 

215 "class": "lsst.sphgeom.HtmPixelization", 

216 "max_level": 24, 

217 }, 

218 }, 

219 "elements": { 

220 "A": { 

221 "keys": [ 

222 { 

223 "name": "id", 

224 "type": "int", 

225 } 

226 ], 

227 "storage": { 

228 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

229 }, 

230 }, 

231 "B": { 

232 "keys": [ 

233 { 

234 "name": "id", 

235 "type": "int", 

236 } 

237 ], 

238 "storage": { 

239 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

240 }, 

241 }, 

242 }, 

243 "packers": {}, 

244 } 

245 ) 

246 universe = DimensionUniverse(config=config) 

247 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

248 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

249 quanta = [ 

250 Quantum( 

251 taskName=_TASK_CLASS, 

252 inputs={ 

253 fakeDSType: [ 

254 DatasetRef(fakeDSType, DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe)) 

255 ] 

256 }, 

257 ) 

258 ] # type: ignore 

259 qgraph = QuantumGraph({taskDef: set(quanta)}) 

260 return qgraph 

261 

262 

263class CmdLineFwkTestCase(unittest.TestCase): 

264 """A test case for CmdLineFwk""" 

265 

266 def testMakePipeline(self): 

267 """Tests for CmdLineFwk.makePipeline method""" 

268 fwk = CmdLineFwk() 

269 

270 # make empty pipeline 

271 args = _makeArgs() 

272 pipeline = fwk.makePipeline(args) 

273 self.assertIsInstance(pipeline, Pipeline) 

274 self.assertEqual(len(pipeline), 0) 

275 

276 # few tests with serialization 

277 with makeTmpFile() as tmpname: 

278 # make empty pipeline and store it in a file 

279 args = _makeArgs(save_pipeline=tmpname) 

280 pipeline = fwk.makePipeline(args) 

281 self.assertIsInstance(pipeline, Pipeline) 

282 

283 # read pipeline from a file 

284 args = _makeArgs(pipeline=tmpname) 

285 pipeline = fwk.makePipeline(args) 

286 self.assertIsInstance(pipeline, Pipeline) 

287 self.assertEqual(len(pipeline), 0) 

288 

289 # single task pipeline, task name can be anything here 

290 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

291 args = _makeArgs(pipeline_actions=actions) 

292 pipeline = fwk.makePipeline(args) 

293 self.assertIsInstance(pipeline, Pipeline) 

294 self.assertEqual(len(pipeline), 1) 

295 

296 # many task pipeline 

297 actions = [ 

298 _ACTION_ADD_TASK("TaskOne:task1a"), 

299 _ACTION_ADD_TASK("TaskTwo:task2"), 

300 _ACTION_ADD_TASK("TaskOne:task1b"), 

301 ] 

302 args = _makeArgs(pipeline_actions=actions) 

303 pipeline = fwk.makePipeline(args) 

304 self.assertIsInstance(pipeline, Pipeline) 

305 self.assertEqual(len(pipeline), 3) 

306 

307 # single task pipeline with config overrides, need real task class 

308 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

309 args = _makeArgs(pipeline_actions=actions) 

310 pipeline = fwk.makePipeline(args) 

311 taskDefs = list(pipeline.toExpandedPipeline()) 

312 self.assertEqual(len(taskDefs), 1) 

313 self.assertEqual(taskDefs[0].config.addend, 100) 

314 

315 overrides = b"config.addend = 1000\n" 

316 with makeTmpFile(overrides) as tmpname: 

317 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

318 args = _makeArgs(pipeline_actions=actions) 

319 pipeline = fwk.makePipeline(args) 

320 taskDefs = list(pipeline.toExpandedPipeline()) 

321 self.assertEqual(len(taskDefs), 1) 

322 self.assertEqual(taskDefs[0].config.addend, 1000) 

323 

324 # Check --instrument option, for now it only checks that it does not 

325 # crash. 

326 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

327 args = _makeArgs(pipeline_actions=actions) 

328 pipeline = fwk.makePipeline(args) 

329 

330 def testMakeGraphFromSave(self): 

331 """Tests for CmdLineFwk.makeGraph method. 

332 

333 Only most trivial case is tested that does not do actual graph 

334 building. 

335 """ 

336 fwk = CmdLineFwk() 

337 

338 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

339 

340 # make non-empty graph and store it in a file 

341 qgraph = _makeQGraph() 

342 with open(tmpname, "wb") as saveFile: 

343 qgraph.save(saveFile) 

344 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

345 qgraph = fwk.makeGraph(None, args) 

346 self.assertIsInstance(qgraph, QuantumGraph) 

347 self.assertEqual(len(qgraph), 1) 

348 

349 # will fail if graph id does not match 

350 args = _makeArgs( 

351 qgraph=tmpname, 

352 qgraph_id="R2-D2 is that you?", 

353 registryConfig=registryConfig, 

354 execution_butler_location=None, 

355 ) 

356 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

357 fwk.makeGraph(None, args) 

358 

359 # save with wrong object type 

360 with open(tmpname, "wb") as saveFile: 

361 pickle.dump({}, saveFile) 

362 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

363 with self.assertRaises(ValueError): 

364 fwk.makeGraph(None, args) 

365 

366 # reading empty graph from pickle should work but makeGraph() 

367 # will return None and make a warning 

368 qgraph = QuantumGraph(dict()) 

369 with open(tmpname, "wb") as saveFile: 

370 qgraph.save(saveFile) 

371 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

372 with self.assertWarnsRegex(UserWarning, "QuantumGraph is empty"): 

373 # this also tests that warning is generated for empty graph 

374 qgraph = fwk.makeGraph(None, args) 

375 self.assertIs(qgraph, None) 

376 

377 def testShowPipeline(self): 

378 """Test for --show options for pipeline.""" 

379 fwk = CmdLineFwk() 

380 

381 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

382 args = _makeArgs(pipeline_actions=actions) 

383 pipeline = fwk.makePipeline(args) 

384 

385 args.show = ["pipeline"] 

386 fwk.showInfo(args, pipeline) 

387 args.show = ["config"] 

388 fwk.showInfo(args, pipeline) 

389 args.show = ["history=task::addend"] 

390 fwk.showInfo(args, pipeline) 

391 args.show = ["tasks"] 

392 fwk.showInfo(args, pipeline) 

393 

394 

395class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

396 """A test case for CmdLineFwk""" 

397 

398 def setUp(self): 

399 super().setUpClass() 

400 self.root = tempfile.mkdtemp() 

401 self.nQuanta = 5 

402 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

403 

404 def tearDown(self): 

405 shutil.rmtree(self.root, ignore_errors=True) 

406 super().tearDownClass() 

407 

408 def testSimpleQGraph(self): 

409 """Test successfull execution of trivial quantum graph.""" 

410 args = _makeArgs(butler_config=self.root, input="test", output="output") 

411 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

412 populateButler(self.pipeline, butler) 

413 

414 fwk = CmdLineFwk() 

415 taskFactory = AddTaskFactoryMock() 

416 

417 qgraph = fwk.makeGraph(self.pipeline, args) 

418 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

419 self.assertEqual(len(qgraph), self.nQuanta) 

420 

421 # run whole thing 

422 fwk.runPipeline(qgraph, taskFactory, args) 

423 self.assertEqual(taskFactory.countExec, self.nQuanta) 

424 

425 def testSimpleQGraphNoSkipExisting_inputs(self): 

426 """Test for case when output data for one task already appears in 

427 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

428 option is present. 

429 """ 

430 args = _makeArgs( 

431 butler_config=self.root, 

432 input="test", 

433 output="output", 

434 ) 

435 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

436 populateButler( 

437 self.pipeline, 

438 butler, 

439 datasetTypes={ 

440 args.input: [ 

441 "add_dataset0", 

442 "add_dataset1", 

443 "add2_dataset1", 

444 "add_init_output1", 

445 "task0_config", 

446 "task0_metadata", 

447 "task0_log", 

448 ] 

449 }, 

450 ) 

451 

452 fwk = CmdLineFwk() 

453 taskFactory = AddTaskFactoryMock() 

454 

455 qgraph = fwk.makeGraph(self.pipeline, args) 

456 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

457 # With current implementation graph has all nQuanta quanta, but when 

458 # executing one quantum is skipped. 

459 self.assertEqual(len(qgraph), self.nQuanta) 

460 

461 # run whole thing 

462 fwk.runPipeline(qgraph, taskFactory, args) 

463 self.assertEqual(taskFactory.countExec, self.nQuanta) 

464 

465 def testSimpleQGraphSkipExisting_inputs(self): 

466 """Test for ``--skip-existing`` with output data for one task already 

467 appears in _input_ collection. No ``--extend-run`` option is needed 

468 for this case. 

469 """ 

470 args = _makeArgs( 

471 butler_config=self.root, 

472 input="test", 

473 output="output", 

474 skip_existing_in=("test",), 

475 ) 

476 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

477 populateButler( 

478 self.pipeline, 

479 butler, 

480 datasetTypes={ 

481 args.input: [ 

482 "add_dataset0", 

483 "add_dataset1", 

484 "add2_dataset1", 

485 "add_init_output1", 

486 "task0_config", 

487 "task0_metadata", 

488 "task0_log", 

489 ] 

490 }, 

491 ) 

492 

493 fwk = CmdLineFwk() 

494 taskFactory = AddTaskFactoryMock() 

495 

496 qgraph = fwk.makeGraph(self.pipeline, args) 

497 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

498 self.assertEqual(len(qgraph), self.nQuanta - 1) 

499 

500 # run whole thing 

501 fwk.runPipeline(qgraph, taskFactory, args) 

502 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

503 

504 def testSimpleQGraphSkipExisting_outputs(self): 

505 """Test for ``--skip-existing`` with output data for one task already 

506 appears in _output_ collection. The ``--extend-run`` option is needed 

507 for this case. 

508 """ 

509 args = _makeArgs( 

510 butler_config=self.root, 

511 input="test", 

512 output_run="output/run", 

513 skip_existing_in=("output/run",), 

514 ) 

515 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

516 populateButler( 

517 self.pipeline, 

518 butler, 

519 datasetTypes={ 

520 args.input: ["add_dataset0"], 

521 args.output_run: [ 

522 "add_dataset1", 

523 "add2_dataset1", 

524 "add_init_output1", 

525 "task0_metadata", 

526 "task0_log", 

527 ], 

528 }, 

529 ) 

530 

531 fwk = CmdLineFwk() 

532 taskFactory = AddTaskFactoryMock() 

533 

534 # fails without --extend-run 

535 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

536 qgraph = fwk.makeGraph(self.pipeline, args) 

537 

538 # retry with --extend-run 

539 args.extend_run = True 

540 qgraph = fwk.makeGraph(self.pipeline, args) 

541 

542 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

543 # Graph does not include quantum for first task 

544 self.assertEqual(len(qgraph), self.nQuanta - 1) 

545 

546 # run whole thing 

547 fwk.runPipeline(qgraph, taskFactory, args) 

548 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

549 

550 def testSimpleQGraphOutputsFail(self): 

551 """Test continuing execution of trivial quantum graph with partial 

552 outputs. 

553 """ 

554 args = _makeArgs(butler_config=self.root, input="test", output="output") 

555 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

556 populateButler(self.pipeline, butler) 

557 

558 fwk = CmdLineFwk() 

559 taskFactory = AddTaskFactoryMock(stopAt=3) 

560 

561 qgraph = fwk.makeGraph(self.pipeline, args) 

562 self.assertEqual(len(qgraph), self.nQuanta) 

563 

564 # run first three quanta 

565 with self.assertRaises(MPGraphExecutorError): 

566 fwk.runPipeline(qgraph, taskFactory, args) 

567 self.assertEqual(taskFactory.countExec, 3) 

568 

569 butler.registry.refresh() 

570 

571 # drop one of the two outputs from one task 

572 ref1 = butler.registry.findDataset( 

573 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

574 ) 

575 self.assertIsNotNone(ref1) 

576 # also drop the metadata output 

577 ref2 = butler.registry.findDataset( 

578 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

579 ) 

580 self.assertIsNotNone(ref2) 

581 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

582 

583 taskFactory.stopAt = -1 

584 args.skip_existing_in = (args.output,) 

585 args.extend_run = True 

586 args.no_versions = True 

587 with self.assertRaises(MPGraphExecutorError): 

588 fwk.runPipeline(qgraph, taskFactory, args) 

589 

590 def testSimpleQGraphClobberOutputs(self): 

591 """Test continuing execution of trivial quantum graph with 

592 --clobber-outputs. 

593 """ 

594 args = _makeArgs(butler_config=self.root, input="test", output="output") 

595 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

596 populateButler(self.pipeline, butler) 

597 

598 fwk = CmdLineFwk() 

599 taskFactory = AddTaskFactoryMock(stopAt=3) 

600 

601 qgraph = fwk.makeGraph(self.pipeline, args) 

602 

603 # should have one task and number of quanta 

604 self.assertEqual(len(qgraph), self.nQuanta) 

605 

606 # run first three quanta 

607 with self.assertRaises(MPGraphExecutorError): 

608 fwk.runPipeline(qgraph, taskFactory, args) 

609 self.assertEqual(taskFactory.countExec, 3) 

610 

611 butler.registry.refresh() 

612 

613 # drop one of the two outputs from one task 

614 ref1 = butler.registry.findDataset( 

615 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

616 ) 

617 self.assertIsNotNone(ref1) 

618 # also drop the metadata output 

619 ref2 = butler.registry.findDataset( 

620 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

621 ) 

622 self.assertIsNotNone(ref2) 

623 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

624 

625 taskFactory.stopAt = -1 

626 args.skip_existing = True 

627 args.extend_run = True 

628 args.clobber_outputs = True 

629 args.no_versions = True 

630 fwk.runPipeline(qgraph, taskFactory, args) 

631 # number of executed quanta is incremented 

632 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

633 

634 def testSimpleQGraphReplaceRun(self): 

635 """Test repeated execution of trivial quantum graph with 

636 --replace-run. 

637 """ 

638 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

639 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

640 populateButler(self.pipeline, butler) 

641 

642 fwk = CmdLineFwk() 

643 taskFactory = AddTaskFactoryMock() 

644 

645 qgraph = fwk.makeGraph(self.pipeline, args) 

646 

647 # should have one task and number of quanta 

648 self.assertEqual(len(qgraph), self.nQuanta) 

649 

650 # deep copy is needed because quanta are updated in place 

651 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

652 self.assertEqual(taskFactory.countExec, self.nQuanta) 

653 

654 # need to refresh collections explicitly (or make new butler/registry) 

655 butler.registry.refresh() 

656 collections = set(butler.registry.queryCollections(...)) 

657 self.assertEqual(collections, {"test", "output", "output/run1"}) 

658 

659 # number of datasets written by pipeline: 

660 # - nQuanta of init_outputs 

661 # - nQuanta of configs 

662 # - packages (single dataset) 

663 # - nQuanta * two output datasets 

664 # - nQuanta of metadata 

665 # - nQuanta of log output 

666 n_outputs = self.nQuanta * 6 + 1 

667 refs = butler.registry.queryDatasets(..., collections="output/run1") 

668 self.assertEqual(len(list(refs)), n_outputs) 

669 

670 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

671 # changed) 

672 args.replace_run = True 

673 args.output_run = "output/run2" 

674 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

675 

676 butler.registry.refresh() 

677 collections = set(butler.registry.queryCollections(...)) 

678 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

679 

680 # new output collection 

681 refs = butler.registry.queryDatasets(..., collections="output/run2") 

682 self.assertEqual(len(list(refs)), n_outputs) 

683 

684 # old output collection is still there 

685 refs = butler.registry.queryDatasets(..., collections="output/run1") 

686 self.assertEqual(len(list(refs)), n_outputs) 

687 

688 # re-run with --replace-run and --prune-replaced=unstore 

689 args.replace_run = True 

690 args.prune_replaced = "unstore" 

691 args.output_run = "output/run3" 

692 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

693 

694 butler.registry.refresh() 

695 collections = set(butler.registry.queryCollections(...)) 

696 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

697 

698 # new output collection 

699 refs = butler.registry.queryDatasets(..., collections="output/run3") 

700 self.assertEqual(len(list(refs)), n_outputs) 

701 

702 # old output collection is still there, and it has all datasets but 

703 # non-InitOutputs are not in datastore 

704 refs = butler.registry.queryDatasets(..., collections="output/run2") 

705 refs = list(refs) 

706 self.assertEqual(len(refs), n_outputs) 

707 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

708 for ref in refs: 

709 if initOutNameRe.fullmatch(ref.datasetType.name): 

710 butler.get(ref, collections="output/run2") 

711 else: 

712 with self.assertRaises(FileNotFoundError): 

713 butler.get(ref, collections="output/run2") 

714 

715 # re-run with --replace-run and --prune-replaced=purge 

716 # This time also remove --input; passing the same inputs that we 

717 # started with and not passing inputs at all should be equivalent. 

718 args.input = None 

719 args.replace_run = True 

720 args.prune_replaced = "purge" 

721 args.output_run = "output/run4" 

722 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

723 

724 butler.registry.refresh() 

725 collections = set(butler.registry.queryCollections(...)) 

726 # output/run3 should disappear now 

727 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

728 

729 # new output collection 

730 refs = butler.registry.queryDatasets(..., collections="output/run4") 

731 self.assertEqual(len(list(refs)), n_outputs) 

732 

733 # Trying to run again with inputs that aren't exactly what we started 

734 # with is an error, and the kind that should not modify the data repo. 

735 with self.assertRaises(ValueError): 

736 args.input = ["test", "output/run2"] 

737 args.prune_replaced = None 

738 args.replace_run = True 

739 args.output_run = "output/run5" 

740 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

741 butler.registry.refresh() 

742 collections = set(butler.registry.queryCollections(...)) 

743 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

744 with self.assertRaises(ValueError): 

745 args.input = ["output/run2", "test"] 

746 args.prune_replaced = None 

747 args.replace_run = True 

748 args.output_run = "output/run6" 

749 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

750 butler.registry.refresh() 

751 collections = set(butler.registry.queryCollections(...)) 

752 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

753 

754 def testSubgraph(self): 

755 """Test successfull execution of trivial quantum graph.""" 

756 args = _makeArgs(butler_config=self.root, input="test", output="output") 

757 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

758 populateButler(self.pipeline, butler) 

759 

760 fwk = CmdLineFwk() 

761 qgraph = fwk.makeGraph(self.pipeline, args) 

762 

763 # Select first two nodes for execution. This depends on node ordering 

764 # which I assume is the same as execution order. 

765 nNodes = 2 

766 nodeIds = [node.nodeId for node in qgraph] 

767 nodeIds = nodeIds[:nNodes] 

768 

769 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

770 self.assertEqual(len(qgraph), self.nQuanta) 

771 

772 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

773 with open(tmpname, "wb") as saveFile: 

774 qgraph.save(saveFile) 

775 

776 args = _makeArgs( 

777 qgraph=tmpname, 

778 qgraph_node_id=nodeIds, 

779 registryConfig=registryConfig, 

780 execution_butler_location=None, 

781 ) 

782 fwk = CmdLineFwk() 

783 

784 # load graph, should only read a subset 

785 qgraph = fwk.makeGraph(pipeline=None, args=args) 

786 self.assertEqual(len(qgraph), nNodes) 

787 

788 def testShowGraph(self): 

789 """Test for --show options for quantum graph.""" 

790 fwk = CmdLineFwk() 

791 

792 nQuanta = 2 

793 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

794 

795 args = _makeArgs(show=["graph"]) 

796 fwk.showInfo(args, pipeline=None, graph=qgraph) 

797 

798 def testShowGraphWorkflow(self): 

799 fwk = CmdLineFwk() 

800 

801 nQuanta = 2 

802 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

803 

804 args = _makeArgs(show=["workflow"]) 

805 fwk.showInfo(args, pipeline=None, graph=qgraph) 

806 

807 # TODO: cannot test "uri" option presently, it instanciates 

808 # butler from command line options and there is no way to pass butler 

809 # mock to that code. 

810 

811 

812class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

813 pass 

814 

815 

816def setup_module(module): 

817 lsst.utils.tests.init() 

818 

819 

820if __name__ == "__main__": 820 ↛ 821line 820 didn't jump to line 821, because the condition on line 820 was never true

821 lsst.utils.tests.init() 

822 unittest.main()