Coverage for tests/test_cmdLineFwk.py: 14%

470 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-14 09:14 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.ctrl.mpexec.showInfo import ShowInfo 

52from lsst.daf.butler import ( 

53 Config, 

54 DataCoordinate, 

55 DatasetRef, 

56 DimensionConfig, 

57 DimensionUniverse, 

58 Quantum, 

59 Registry, 

60) 

61from lsst.daf.butler.core.datasets.type import DatasetType 

62from lsst.daf.butler.registry import RegistryConfig 

63from lsst.pipe.base import ( 

64 Instrument, 

65 Pipeline, 

66 PipelineTaskConfig, 

67 PipelineTaskConnections, 

68 QuantumGraph, 

69 TaskDef, 

70) 

71from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

72from lsst.pipe.base.tests.simpleQGraph import ( 

73 AddTask, 

74 AddTaskFactoryMock, 

75 makeSimpleButler, 

76 makeSimplePipeline, 

77 makeSimpleQGraph, 

78 populateButler, 

79) 

80from lsst.utils.tests import temporaryDirectory 

81 

82logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

83 

84# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

85# instrument from registry, these tests can run fine without actual instrument 

86# and implementing full mock for Instrument is too complicated. 

87Instrument.fromName = lambda name, reg: None 87 ↛ exitline 87 didn't run the lambda on line 87

88 

89 

90@contextlib.contextmanager 

91def makeTmpFile(contents=None, suffix=None): 

92 """Context manager for generating temporary file name. 

93 

94 Temporary file is deleted on exiting context. 

95 

96 Parameters 

97 ---------- 

98 contents : `bytes` 

99 Data to write into a file. 

100 """ 

101 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

102 if contents: 

103 os.write(fd, contents) 

104 os.close(fd) 

105 yield tmpname 

106 with contextlib.suppress(OSError): 

107 os.remove(tmpname) 

108 

109 

110@contextlib.contextmanager 

111def makeSQLiteRegistry(create=True, universe=None): 

112 """Context manager to create new empty registry database. 

113 

114 Yields 

115 ------ 

116 config : `RegistryConfig` 

117 Registry configuration for initialized registry database. 

118 """ 

119 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

120 with temporaryDirectory() as tmpdir: 

121 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

122 config = RegistryConfig() 

123 config["db"] = uri 

124 if create: 

125 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

126 yield config 

127 

128 

129class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

130 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

131 

132 

133class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

134 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

135 

136 def setDefaults(self): 

137 PipelineTaskConfig.setDefaults(self) 

138 

139 

140def _makeArgs(registryConfig=None, **kwargs): 

141 """Return parsed command line arguments. 

142 

143 By default butler_config is set to `Config` populated with some defaults, 

144 it can be overridden completely by keyword argument. 

145 

146 Parameters 

147 ---------- 

148 cmd : `str`, optional 

149 Produce arguments for this pipetask command. 

150 registryConfig : `RegistryConfig`, optional 

151 Override for registry configuration. 

152 **kwargs 

153 Overrides for other arguments. 

154 """ 

155 # Use a mock to get the default value of arguments to 'run'. 

156 

157 mock = unittest.mock.Mock() 

158 

159 @click.command(cls=PipetaskCommand) 

160 @run_options() 

161 def fake_run(ctx, **kwargs): 

162 """Fake "pipetask run" command for gathering input arguments. 

163 

164 The arguments & options should always match the arguments & options in 

165 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

166 """ 

167 mock(**kwargs) 

168 

169 runner = click.testing.CliRunner() 

170 # --butler-config is the only required option 

171 result = runner.invoke(fake_run, "--butler-config /") 

172 if result.exit_code != 0: 

173 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

174 mock.assert_called_once() 

175 args = mock.call_args[1] 

176 args["enableLsstDebug"] = args.pop("debug") 

177 args["execution_butler_location"] = args.pop("save_execution_butler") 

178 if "pipeline_actions" not in args: 

179 args["pipeline_actions"] = [] 

180 args = SimpleNamespace(**args) 

181 

182 # override butler_config with our defaults 

183 if "butler_config" not in kwargs: 

184 args.butler_config = Config() 

185 if registryConfig: 

186 args.butler_config["registry"] = registryConfig 

187 # The default datastore has a relocatable root, so we need to specify 

188 # some root here for it to use 

189 args.butler_config.configFile = "." 

190 

191 # override arguments from keyword parameters 

192 for key, value in kwargs.items(): 

193 setattr(args, key, value) 

194 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

195 return args 

196 

197 

198class FakeDSType(NamedTuple): 

199 name: str 

200 

201 

202@dataclass(frozen=True) 

203class FakeDSRef: 

204 datasetType: str 

205 dataId: tuple 

206 

207 def isComponent(self): 

208 return False 

209 

210 

211# Task class name used by tests, needs to be importable 

212_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

213 

214 

215def _makeDimensionConfig(): 

216 """Make a simple dimension universe configuration.""" 

217 return DimensionConfig( 

218 { 

219 "version": 1, 

220 "namespace": "ctrl_mpexec_test", 

221 "skypix": { 

222 "common": "htm7", 

223 "htm": { 

224 "class": "lsst.sphgeom.HtmPixelization", 

225 "max_level": 24, 

226 }, 

227 }, 

228 "elements": { 

229 "A": { 

230 "keys": [ 

231 { 

232 "name": "id", 

233 "type": "int", 

234 } 

235 ], 

236 "storage": { 

237 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

238 }, 

239 }, 

240 "B": { 

241 "keys": [ 

242 { 

243 "name": "id", 

244 "type": "int", 

245 } 

246 ], 

247 "storage": { 

248 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

249 }, 

250 }, 

251 }, 

252 "packers": {}, 

253 } 

254 ) 

255 

256 

257def _makeQGraph(): 

258 """Make a trivial QuantumGraph with one quantum. 

259 

260 The only thing that we need to do with this quantum graph is to pickle 

261 it, the quanta in this graph are not usable for anything else. 

262 

263 Returns 

264 ------- 

265 qgraph : `~lsst.pipe.base.QuantumGraph` 

266 """ 

267 universe = DimensionUniverse(config=_makeDimensionConfig()) 

268 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

269 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

270 quanta = [ 

271 Quantum( 

272 taskName=_TASK_CLASS, 

273 inputs={ 

274 fakeDSType: [ 

275 DatasetRef( 

276 fakeDSType, 

277 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

278 run="fake_run", 

279 ) 

280 ] 

281 }, 

282 ) 

283 ] # type: ignore 

284 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

285 return qgraph 

286 

287 

288class CmdLineFwkTestCase(unittest.TestCase): 

289 """A test case for CmdLineFwk""" 

290 

291 def testMakePipeline(self): 

292 """Tests for CmdLineFwk.makePipeline method""" 

293 fwk = CmdLineFwk() 

294 

295 # make empty pipeline 

296 args = _makeArgs() 

297 pipeline = fwk.makePipeline(args) 

298 self.assertIsInstance(pipeline, Pipeline) 

299 self.assertEqual(len(pipeline), 0) 

300 

301 # few tests with serialization 

302 with makeTmpFile() as tmpname: 

303 # make empty pipeline and store it in a file 

304 args = _makeArgs(save_pipeline=tmpname) 

305 pipeline = fwk.makePipeline(args) 

306 self.assertIsInstance(pipeline, Pipeline) 

307 

308 # read pipeline from a file 

309 args = _makeArgs(pipeline=tmpname) 

310 pipeline = fwk.makePipeline(args) 

311 self.assertIsInstance(pipeline, Pipeline) 

312 self.assertEqual(len(pipeline), 0) 

313 

314 # single task pipeline, task name can be anything here 

315 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

316 args = _makeArgs(pipeline_actions=actions) 

317 pipeline = fwk.makePipeline(args) 

318 self.assertIsInstance(pipeline, Pipeline) 

319 self.assertEqual(len(pipeline), 1) 

320 

321 # many task pipeline 

322 actions = [ 

323 _ACTION_ADD_TASK("TaskOne:task1a"), 

324 _ACTION_ADD_TASK("TaskTwo:task2"), 

325 _ACTION_ADD_TASK("TaskOne:task1b"), 

326 ] 

327 args = _makeArgs(pipeline_actions=actions) 

328 pipeline = fwk.makePipeline(args) 

329 self.assertIsInstance(pipeline, Pipeline) 

330 self.assertEqual(len(pipeline), 3) 

331 

332 # single task pipeline with config overrides, need real task class 

333 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

334 args = _makeArgs(pipeline_actions=actions) 

335 pipeline = fwk.makePipeline(args) 

336 taskDefs = list(pipeline.toExpandedPipeline()) 

337 self.assertEqual(len(taskDefs), 1) 

338 self.assertEqual(taskDefs[0].config.addend, 100) 

339 

340 overrides = b"config.addend = 1000\n" 

341 with makeTmpFile(overrides) as tmpname: 

342 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

343 args = _makeArgs(pipeline_actions=actions) 

344 pipeline = fwk.makePipeline(args) 

345 taskDefs = list(pipeline.toExpandedPipeline()) 

346 self.assertEqual(len(taskDefs), 1) 

347 self.assertEqual(taskDefs[0].config.addend, 1000) 

348 

349 # Check --instrument option, for now it only checks that it does not 

350 # crash. 

351 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

352 args = _makeArgs(pipeline_actions=actions) 

353 pipeline = fwk.makePipeline(args) 

354 

355 def testMakeGraphFromSave(self): 

356 """Tests for CmdLineFwk.makeGraph method. 

357 

358 Only most trivial case is tested that does not do actual graph 

359 building. 

360 """ 

361 fwk = CmdLineFwk() 

362 

363 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

364 # make non-empty graph and store it in a file 

365 qgraph = _makeQGraph() 

366 with open(tmpname, "wb") as saveFile: 

367 qgraph.save(saveFile) 

368 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

369 qgraph = fwk.makeGraph(None, args) 

370 self.assertIsInstance(qgraph, QuantumGraph) 

371 self.assertEqual(len(qgraph), 1) 

372 

373 # will fail if graph id does not match 

374 args = _makeArgs( 

375 qgraph=tmpname, 

376 qgraph_id="R2-D2 is that you?", 

377 registryConfig=registryConfig, 

378 execution_butler_location=None, 

379 ) 

380 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

381 fwk.makeGraph(None, args) 

382 

383 # save with wrong object type 

384 with open(tmpname, "wb") as saveFile: 

385 pickle.dump({}, saveFile) 

386 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

387 with self.assertRaises(ValueError): 

388 fwk.makeGraph(None, args) 

389 

390 # reading empty graph from pickle should work but makeGraph() 

391 # will return None. 

392 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

393 with open(tmpname, "wb") as saveFile: 

394 qgraph.save(saveFile) 

395 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

396 qgraph = fwk.makeGraph(None, args) 

397 self.assertIs(qgraph, None) 

398 

399 def testShowPipeline(self): 

400 """Test for --show options for pipeline.""" 

401 fwk = CmdLineFwk() 

402 

403 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

404 args = _makeArgs(pipeline_actions=actions) 

405 pipeline = fwk.makePipeline(args) 

406 

407 with self.assertRaises(ValueError): 

408 ShowInfo(["unrecognized", "config"]) 

409 

410 stream = StringIO() 

411 show = ShowInfo( 

412 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

413 stream=stream, 

414 ) 

415 show.show_pipeline_info(pipeline) 

416 self.assertEqual(show.unhandled, frozenset({})) 

417 stream.seek(0) 

418 output = stream.read() 

419 self.assertIn("config.addend=100", output) # config option 

420 self.assertIn("addend\n3", output) # History output 

421 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

422 

423 show = ShowInfo(["pipeline", "uri"], stream=stream) 

424 show.show_pipeline_info(pipeline) 

425 self.assertEqual(show.unhandled, frozenset({"uri"})) 

426 self.assertEqual(show.handled, {"pipeline"}) 

427 

428 stream = StringIO() 

429 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

430 show.show_pipeline_info(pipeline) 

431 stream.seek(0) 

432 output = stream.read().strip() 

433 self.assertEqual("### Configuration for task `task'", output) 

434 

435 stream = StringIO() 

436 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

437 show.show_pipeline_info(pipeline) 

438 stream.seek(0) 

439 output = stream.read().strip() 

440 self.assertEqual("### Configuration for task `task'", output) 

441 

442 stream = StringIO() 

443 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

444 show.show_pipeline_info(pipeline) 

445 stream.seek(0) 

446 output = stream.read().strip() 

447 self.assertIn("NOIGNORECASE", output) 

448 

449 show = ShowInfo(["dump-config=notask"]) 

450 with self.assertRaises(ValueError) as cm: 

451 show.show_pipeline_info(pipeline) 

452 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

453 

454 show = ShowInfo(["history"]) 

455 with self.assertRaises(ValueError) as cm: 

456 show.show_pipeline_info(pipeline) 

457 self.assertIn("Please provide a value", str(cm.exception)) 

458 

459 show = ShowInfo(["history=notask::param"]) 

460 with self.assertRaises(ValueError) as cm: 

461 show.show_pipeline_info(pipeline) 

462 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

463 

464 

465class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

466 """A test case for CmdLineFwk""" 

467 

468 def setUp(self): 

469 super().setUpClass() 

470 self.root = tempfile.mkdtemp() 

471 self.nQuanta = 5 

472 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

473 

474 def tearDown(self): 

475 shutil.rmtree(self.root, ignore_errors=True) 

476 super().tearDownClass() 

477 

478 def testSimpleQGraph(self): 

479 """Test successfull execution of trivial quantum graph.""" 

480 args = _makeArgs(butler_config=self.root, input="test", output="output") 

481 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

482 populateButler(self.pipeline, butler) 

483 

484 fwk = CmdLineFwk() 

485 taskFactory = AddTaskFactoryMock() 

486 

487 qgraph = fwk.makeGraph(self.pipeline, args) 

488 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

489 self.assertEqual(len(qgraph), self.nQuanta) 

490 

491 # Ensure that the output run used in the graph is also used in 

492 # the pipeline execution. It is possible for makeGraph and runPipeline 

493 # to calculate time-stamped runs across a second boundary. 

494 args.output_run = qgraph.metadata["output_run"] 

495 

496 # run whole thing 

497 fwk.runPipeline(qgraph, taskFactory, args) 

498 self.assertEqual(taskFactory.countExec, self.nQuanta) 

499 

500 # test that we've disabled implicit threading 

501 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

502 

503 def testEmptyQGraph(self): 

504 """Test that making an empty QG produces the right error messages.""" 

505 # We make QG generation fail by populating one input collection in the 

506 # butler while using a different one (that we only register, not 

507 # populate) to make the QG. 

508 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

509 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

510 butler.registry.registerCollection("bad_input") 

511 populateButler(self.pipeline, butler) 

512 

513 fwk = CmdLineFwk() 

514 with self.assertLogs(level=logging.CRITICAL) as cm: 

515 qgraph = fwk.makeGraph(self.pipeline, args) 

516 self.assertRegex( 

517 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

518 ) 

519 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

520 self.assertIsNone(qgraph) 

521 

522 def testSimpleQGraphNoSkipExisting_inputs(self): 

523 """Test for case when output data for one task already appears in 

524 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

525 option is present. 

526 """ 

527 args = _makeArgs( 

528 butler_config=self.root, 

529 input="test", 

530 output="output", 

531 ) 

532 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

533 populateButler( 

534 self.pipeline, 

535 butler, 

536 datasetTypes={ 

537 args.input: [ 

538 "add_dataset0", 

539 "add_dataset1", 

540 "add2_dataset1", 

541 "add_init_output1", 

542 "task0_config", 

543 "task0_metadata", 

544 "task0_log", 

545 ] 

546 }, 

547 ) 

548 

549 fwk = CmdLineFwk() 

550 taskFactory = AddTaskFactoryMock() 

551 

552 qgraph = fwk.makeGraph(self.pipeline, args) 

553 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

554 # With current implementation graph has all nQuanta quanta, but when 

555 # executing one quantum is skipped. 

556 self.assertEqual(len(qgraph), self.nQuanta) 

557 

558 # Ensure that the output run used in the graph is also used in 

559 # the pipeline execution. It is possible for makeGraph and runPipeline 

560 # to calculate time-stamped runs across a second boundary. 

561 args.output_run = qgraph.metadata["output_run"] 

562 

563 # run whole thing 

564 fwk.runPipeline(qgraph, taskFactory, args) 

565 self.assertEqual(taskFactory.countExec, self.nQuanta) 

566 

567 def testSimpleQGraphSkipExisting_inputs(self): 

568 """Test for ``--skip-existing`` with output data for one task already 

569 appears in _input_ collection. No ``--extend-run`` option is needed 

570 for this case. 

571 """ 

572 args = _makeArgs( 

573 butler_config=self.root, 

574 input="test", 

575 output="output", 

576 skip_existing_in=("test",), 

577 ) 

578 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

579 populateButler( 

580 self.pipeline, 

581 butler, 

582 datasetTypes={ 

583 args.input: [ 

584 "add_dataset0", 

585 "add_dataset1", 

586 "add2_dataset1", 

587 "add_init_output1", 

588 "task0_config", 

589 "task0_metadata", 

590 "task0_log", 

591 ] 

592 }, 

593 ) 

594 

595 fwk = CmdLineFwk() 

596 taskFactory = AddTaskFactoryMock() 

597 

598 qgraph = fwk.makeGraph(self.pipeline, args) 

599 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

600 self.assertEqual(len(qgraph), self.nQuanta - 1) 

601 

602 # Ensure that the output run used in the graph is also used in 

603 # the pipeline execution. It is possible for makeGraph and runPipeline 

604 # to calculate time-stamped runs across a second boundary. 

605 args.output_run = qgraph.metadata["output_run"] 

606 

607 # run whole thing 

608 fwk.runPipeline(qgraph, taskFactory, args) 

609 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

610 

611 def testSimpleQGraphSkipExisting_outputs(self): 

612 """Test for ``--skip-existing`` with output data for one task already 

613 appears in _output_ collection. The ``--extend-run`` option is needed 

614 for this case. 

615 """ 

616 args = _makeArgs( 

617 butler_config=self.root, 

618 input="test", 

619 output_run="output/run", 

620 skip_existing_in=("output/run",), 

621 ) 

622 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

623 populateButler( 

624 self.pipeline, 

625 butler, 

626 datasetTypes={ 

627 args.input: ["add_dataset0"], 

628 args.output_run: [ 

629 "add_dataset1", 

630 "add2_dataset1", 

631 "add_init_output1", 

632 "task0_metadata", 

633 "task0_log", 

634 ], 

635 }, 

636 ) 

637 

638 fwk = CmdLineFwk() 

639 taskFactory = AddTaskFactoryMock() 

640 

641 # fails without --extend-run 

642 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

643 qgraph = fwk.makeGraph(self.pipeline, args) 

644 

645 # retry with --extend-run 

646 args.extend_run = True 

647 qgraph = fwk.makeGraph(self.pipeline, args) 

648 

649 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

650 # Graph does not include quantum for first task 

651 self.assertEqual(len(qgraph), self.nQuanta - 1) 

652 

653 # run whole thing 

654 fwk.runPipeline(qgraph, taskFactory, args) 

655 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

656 

657 def testSimpleQGraphOutputsFail(self): 

658 """Test continuing execution of trivial quantum graph with partial 

659 outputs. 

660 """ 

661 args = _makeArgs(butler_config=self.root, input="test", output="output") 

662 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

663 populateButler(self.pipeline, butler) 

664 

665 fwk = CmdLineFwk() 

666 taskFactory = AddTaskFactoryMock(stopAt=3) 

667 

668 qgraph = fwk.makeGraph(self.pipeline, args) 

669 self.assertEqual(len(qgraph), self.nQuanta) 

670 

671 # Ensure that the output run used in the graph is also used in 

672 # the pipeline execution. It is possible for makeGraph and runPipeline 

673 # to calculate time-stamped runs across a second boundary. 

674 args.output_run = qgraph.metadata["output_run"] 

675 

676 # run first three quanta 

677 with self.assertRaises(MPGraphExecutorError): 

678 fwk.runPipeline(qgraph, taskFactory, args) 

679 self.assertEqual(taskFactory.countExec, 3) 

680 

681 butler.registry.refresh() 

682 

683 # drop one of the two outputs from one task 

684 ref1 = butler.registry.findDataset( 

685 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

686 ) 

687 self.assertIsNotNone(ref1) 

688 # also drop the metadata output 

689 ref2 = butler.registry.findDataset( 

690 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

691 ) 

692 self.assertIsNotNone(ref2) 

693 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

694 

695 # Ensure that the output run used in the graph is also used in 

696 # the pipeline execution. It is possible for makeGraph and runPipeline 

697 # to calculate time-stamped runs across a second boundary. 

698 args.output_run = qgraph.metadata["output_run"] 

699 

700 taskFactory.stopAt = -1 

701 args.skip_existing_in = (args.output,) 

702 args.extend_run = True 

703 args.no_versions = True 

704 with self.assertRaises(MPGraphExecutorError): 

705 fwk.runPipeline(qgraph, taskFactory, args) 

706 

707 def testSimpleQGraphClobberOutputs(self): 

708 """Test continuing execution of trivial quantum graph with 

709 --clobber-outputs. 

710 """ 

711 args = _makeArgs(butler_config=self.root, input="test", output="output") 

712 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

713 populateButler(self.pipeline, butler) 

714 

715 fwk = CmdLineFwk() 

716 taskFactory = AddTaskFactoryMock(stopAt=3) 

717 

718 qgraph = fwk.makeGraph(self.pipeline, args) 

719 

720 # should have one task and number of quanta 

721 self.assertEqual(len(qgraph), self.nQuanta) 

722 

723 # Ensure that the output run used in the graph is also used in 

724 # the pipeline execution. It is possible for makeGraph and runPipeline 

725 # to calculate time-stamped runs across a second boundary. 

726 args.output_run = qgraph.metadata["output_run"] 

727 

728 # run first three quanta 

729 with self.assertRaises(MPGraphExecutorError): 

730 fwk.runPipeline(qgraph, taskFactory, args) 

731 self.assertEqual(taskFactory.countExec, 3) 

732 

733 butler.registry.refresh() 

734 

735 # drop one of the two outputs from one task 

736 ref1 = butler.registry.findDataset( 

737 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

738 ) 

739 self.assertIsNotNone(ref1) 

740 # also drop the metadata output 

741 ref2 = butler.registry.findDataset( 

742 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

743 ) 

744 self.assertIsNotNone(ref2) 

745 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

746 

747 taskFactory.stopAt = -1 

748 args.skip_existing = True 

749 args.extend_run = True 

750 args.clobber_outputs = True 

751 args.no_versions = True 

752 fwk.runPipeline(qgraph, taskFactory, args) 

753 # number of executed quanta is incremented 

754 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

755 

756 def testSimpleQGraphReplaceRun(self): 

757 """Test repeated execution of trivial quantum graph with 

758 --replace-run. 

759 """ 

760 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

761 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

762 populateButler(self.pipeline, butler) 

763 

764 fwk = CmdLineFwk() 

765 taskFactory = AddTaskFactoryMock() 

766 

767 qgraph = fwk.makeGraph(self.pipeline, args) 

768 

769 # should have one task and number of quanta 

770 self.assertEqual(len(qgraph), self.nQuanta) 

771 

772 # deep copy is needed because quanta are updated in place 

773 fwk.runPipeline(qgraph, taskFactory, args) 

774 self.assertEqual(taskFactory.countExec, self.nQuanta) 

775 

776 # need to refresh collections explicitly (or make new butler/registry) 

777 butler.registry.refresh() 

778 collections = set(butler.registry.queryCollections(...)) 

779 self.assertEqual(collections, {"test", "output", "output/run1"}) 

780 

781 # number of datasets written by pipeline: 

782 # - nQuanta of init_outputs 

783 # - nQuanta of configs 

784 # - packages (single dataset) 

785 # - nQuanta * two output datasets 

786 # - nQuanta of metadata 

787 # - nQuanta of log output 

788 n_outputs = self.nQuanta * 6 + 1 

789 refs = butler.registry.queryDatasets(..., collections="output/run1") 

790 self.assertEqual(len(list(refs)), n_outputs) 

791 

792 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

793 # changed) 

794 args.replace_run = True 

795 args.output_run = "output/run2" 

796 qgraph = fwk.makeGraph(self.pipeline, args) 

797 fwk.runPipeline(qgraph, taskFactory, args) 

798 

799 butler.registry.refresh() 

800 collections = set(butler.registry.queryCollections(...)) 

801 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

802 

803 # new output collection 

804 refs = butler.registry.queryDatasets(..., collections="output/run2") 

805 self.assertEqual(len(list(refs)), n_outputs) 

806 

807 # old output collection is still there 

808 refs = butler.registry.queryDatasets(..., collections="output/run1") 

809 self.assertEqual(len(list(refs)), n_outputs) 

810 

811 # re-run with --replace-run and --prune-replaced=unstore 

812 args.replace_run = True 

813 args.prune_replaced = "unstore" 

814 args.output_run = "output/run3" 

815 qgraph = fwk.makeGraph(self.pipeline, args) 

816 fwk.runPipeline(qgraph, taskFactory, args) 

817 

818 butler.registry.refresh() 

819 collections = set(butler.registry.queryCollections(...)) 

820 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

821 

822 # new output collection 

823 refs = butler.registry.queryDatasets(..., collections="output/run3") 

824 self.assertEqual(len(list(refs)), n_outputs) 

825 

826 # old output collection is still there, and it has all datasets but 

827 # non-InitOutputs are not in datastore 

828 refs = butler.registry.queryDatasets(..., collections="output/run2") 

829 refs = list(refs) 

830 self.assertEqual(len(refs), n_outputs) 

831 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

832 for ref in refs: 

833 if initOutNameRe.fullmatch(ref.datasetType.name): 

834 butler.get(ref) 

835 else: 

836 with self.assertRaises(FileNotFoundError): 

837 butler.get(ref) 

838 

839 # re-run with --replace-run and --prune-replaced=purge 

840 # This time also remove --input; passing the same inputs that we 

841 # started with and not passing inputs at all should be equivalent. 

842 args.input = None 

843 args.replace_run = True 

844 args.prune_replaced = "purge" 

845 args.output_run = "output/run4" 

846 qgraph = fwk.makeGraph(self.pipeline, args) 

847 fwk.runPipeline(qgraph, taskFactory, args) 

848 

849 butler.registry.refresh() 

850 collections = set(butler.registry.queryCollections(...)) 

851 # output/run3 should disappear now 

852 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

853 

854 # new output collection 

855 refs = butler.registry.queryDatasets(..., collections="output/run4") 

856 self.assertEqual(len(list(refs)), n_outputs) 

857 

858 # Trying to run again with inputs that aren't exactly what we started 

859 # with is an error, and the kind that should not modify the data repo. 

860 with self.assertRaises(ValueError): 

861 args.input = ["test", "output/run2"] 

862 args.prune_replaced = None 

863 args.replace_run = True 

864 args.output_run = "output/run5" 

865 qgraph = fwk.makeGraph(self.pipeline, args) 

866 fwk.runPipeline(qgraph, taskFactory, args) 

867 butler.registry.refresh() 

868 collections = set(butler.registry.queryCollections(...)) 

869 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

870 with self.assertRaises(ValueError): 

871 args.input = ["output/run2", "test"] 

872 args.prune_replaced = None 

873 args.replace_run = True 

874 args.output_run = "output/run6" 

875 qgraph = fwk.makeGraph(self.pipeline, args) 

876 fwk.runPipeline(qgraph, taskFactory, args) 

877 butler.registry.refresh() 

878 collections = set(butler.registry.queryCollections(...)) 

879 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

880 

881 def testSubgraph(self): 

882 """Test successful execution of trivial quantum graph.""" 

883 args = _makeArgs(butler_config=self.root, input="test", output="output") 

884 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

885 populateButler(self.pipeline, butler) 

886 

887 fwk = CmdLineFwk() 

888 qgraph = fwk.makeGraph(self.pipeline, args) 

889 

890 # Select first two nodes for execution. This depends on node ordering 

891 # which I assume is the same as execution order. 

892 nNodes = 2 

893 nodeIds = [node.nodeId for node in qgraph] 

894 nodeIds = nodeIds[:nNodes] 

895 

896 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

897 self.assertEqual(len(qgraph), self.nQuanta) 

898 

899 with ( 

900 makeTmpFile(suffix=".qgraph") as tmpname, 

901 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

902 ): 

903 with open(tmpname, "wb") as saveFile: 

904 qgraph.save(saveFile) 

905 

906 args = _makeArgs( 

907 qgraph=tmpname, 

908 qgraph_node_id=nodeIds, 

909 registryConfig=registryConfig, 

910 execution_butler_location=None, 

911 ) 

912 fwk = CmdLineFwk() 

913 

914 # load graph, should only read a subset 

915 qgraph = fwk.makeGraph(pipeline=None, args=args) 

916 self.assertEqual(len(qgraph), nNodes) 

917 

918 def testShowGraph(self): 

919 """Test for --show options for quantum graph.""" 

920 nQuanta = 2 

921 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

922 

923 show = ShowInfo(["graph"]) 

924 show.show_graph_info(qgraph) 

925 self.assertEqual(show.handled, {"graph"}) 

926 

927 def testShowGraphWorkflow(self): 

928 nQuanta = 2 

929 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

930 

931 show = ShowInfo(["workflow"]) 

932 show.show_graph_info(qgraph) 

933 self.assertEqual(show.handled, {"workflow"}) 

934 

935 # TODO: cannot test "uri" option presently, it instantiates 

936 # butler from command line options and there is no way to pass butler 

937 # mock to that code. 

938 show = ShowInfo(["uri"]) 

939 with self.assertRaises(ValueError): # No args given 

940 show.show_graph_info(qgraph) 

941 

942 def testSimpleQGraphDatastoreRecords(self): 

943 """Test quantum graph generation with --qgraph-datastore-records.""" 

944 args = _makeArgs( 

945 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

946 ) 

947 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

948 populateButler(self.pipeline, butler) 

949 

950 fwk = CmdLineFwk() 

951 qgraph = fwk.makeGraph(self.pipeline, args) 

952 self.assertEqual(len(qgraph), self.nQuanta) 

953 for i, qnode in enumerate(qgraph): 

954 quantum = qnode.quantum 

955 self.assertIsNotNone(quantum.datastore_records) 

956 # only the first quantum has a pre-existing input 

957 if i == 0: 

958 datastore_name = "FileDatastore@<butlerRoot>" 

959 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

960 records_data = quantum.datastore_records[datastore_name] 

961 records = dict(records_data.records) 

962 self.assertEqual(len(records), 1) 

963 _, records = records.popitem() 

964 records = records["file_datastore_records"] 

965 self.assertEqual( 

966 [record.path for record in records], 

967 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

968 ) 

969 else: 

970 self.assertEqual(quantum.datastore_records, {}) 

971 

972 

973class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

974 pass 

975 

976 

977def setup_module(module): 

978 lsst.utils.tests.init() 

979 

980 

981if __name__ == "__main__": 

982 lsst.utils.tests.init() 

983 unittest.main()