Coverage for tests/test_cmdLineFwk.py: 13%

504 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-18 09:18 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import click 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base.connectionTypes as cT 

41import lsst.utils.tests 

42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

43from lsst.ctrl.mpexec.cli.opt import run_options 

44from lsst.ctrl.mpexec.cli.utils import ( 

45 _ACTION_ADD_INSTRUMENT, 

46 _ACTION_ADD_TASK, 

47 _ACTION_CONFIG, 

48 _ACTION_CONFIG_FILE, 

49 PipetaskCommand, 

50) 

51from lsst.ctrl.mpexec.showInfo import ShowInfo 

52from lsst.daf.butler import ( 

53 Config, 

54 DataCoordinate, 

55 DatasetRef, 

56 DimensionConfig, 

57 DimensionUniverse, 

58 Quantum, 

59 Registry, 

60) 

61from lsst.daf.butler.core.datasets.type import DatasetType 

62from lsst.daf.butler.registry import RegistryConfig 

63from lsst.pipe.base import ( 

64 Instrument, 

65 Pipeline, 

66 PipelineTaskConfig, 

67 PipelineTaskConnections, 

68 QuantumGraph, 

69 TaskDef, 

70) 

71from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

72from lsst.pipe.base.tests.simpleQGraph import ( 

73 AddTask, 

74 AddTaskFactoryMock, 

75 makeSimpleButler, 

76 makeSimplePipeline, 

77 makeSimpleQGraph, 

78 populateButler, 

79) 

80from lsst.utils.tests import temporaryDirectory 

81 

82logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

83 

84# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

85# instrument from registry, these tests can run fine without actual instrument 

86# and implementing full mock for Instrument is too complicated. 

87Instrument.fromName = lambda name, reg: None 87 ↛ exitline 87 didn't run the lambda on line 87

88 

89 

90@contextlib.contextmanager 

91def makeTmpFile(contents=None, suffix=None): 

92 """Context manager for generating temporary file name. 

93 

94 Temporary file is deleted on exiting context. 

95 

96 Parameters 

97 ---------- 

98 contents : `bytes` 

99 Data to write into a file. 

100 """ 

101 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

102 if contents: 

103 os.write(fd, contents) 

104 os.close(fd) 

105 yield tmpname 

106 with contextlib.suppress(OSError): 

107 os.remove(tmpname) 

108 

109 

110@contextlib.contextmanager 

111def makeSQLiteRegistry(create=True, universe=None): 

112 """Context manager to create new empty registry database. 

113 

114 Yields 

115 ------ 

116 config : `RegistryConfig` 

117 Registry configuration for initialized registry database. 

118 """ 

119 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

120 with temporaryDirectory() as tmpdir: 

121 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

122 config = RegistryConfig() 

123 config["db"] = uri 

124 if create: 

125 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

126 yield config 

127 

128 

129class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

130 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

131 

132 

133class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

134 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

135 

136 def setDefaults(self): 

137 PipelineTaskConfig.setDefaults(self) 

138 

139 

140def _makeArgs(registryConfig=None, **kwargs): 

141 """Return parsed command line arguments. 

142 

143 By default butler_config is set to `Config` populated with some defaults, 

144 it can be overridden completely by keyword argument. 

145 

146 Parameters 

147 ---------- 

148 cmd : `str`, optional 

149 Produce arguments for this pipetask command. 

150 registryConfig : `RegistryConfig`, optional 

151 Override for registry configuration. 

152 **kwargs 

153 Overrides for other arguments. 

154 """ 

155 # Use a mock to get the default value of arguments to 'run'. 

156 

157 mock = unittest.mock.Mock() 

158 

159 @click.command(cls=PipetaskCommand) 

160 @run_options() 

161 def fake_run(ctx, **kwargs): 

162 """Fake "pipetask run" command for gathering input arguments. 

163 

164 The arguments & options should always match the arguments & options in 

165 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

166 """ 

167 mock(**kwargs) 

168 

169 runner = click.testing.CliRunner() 

170 # --butler-config is the only required option 

171 result = runner.invoke(fake_run, "--butler-config /") 

172 if result.exit_code != 0: 

173 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

174 mock.assert_called_once() 

175 args = mock.call_args[1] 

176 args["enableLsstDebug"] = args.pop("debug") 

177 args["execution_butler_location"] = args.pop("save_execution_butler") 

178 if "pipeline_actions" not in args: 

179 args["pipeline_actions"] = [] 

180 if "mock_configs" not in args: 

181 args["mock_configs"] = [] 

182 args = SimpleNamespace(**args) 

183 

184 # override butler_config with our defaults 

185 if "butler_config" not in kwargs: 

186 args.butler_config = Config() 

187 if registryConfig: 

188 args.butler_config["registry"] = registryConfig 

189 # The default datastore has a relocatable root, so we need to specify 

190 # some root here for it to use 

191 args.butler_config.configFile = "." 

192 

193 # override arguments from keyword parameters 

194 for key, value in kwargs.items(): 

195 setattr(args, key, value) 

196 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

197 return args 

198 

199 

200class FakeDSType(NamedTuple): 

201 name: str 

202 

203 

204@dataclass(frozen=True) 

205class FakeDSRef: 

206 datasetType: str 

207 dataId: tuple 

208 

209 def isComponent(self): 

210 return False 

211 

212 

213# Task class name used by tests, needs to be importable 

214_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

215 

216 

217def _makeDimensionConfig(): 

218 """Make a simple dimension universe configuration.""" 

219 return DimensionConfig( 

220 { 

221 "version": 1, 

222 "namespace": "ctrl_mpexec_test", 

223 "skypix": { 

224 "common": "htm7", 

225 "htm": { 

226 "class": "lsst.sphgeom.HtmPixelization", 

227 "max_level": 24, 

228 }, 

229 }, 

230 "elements": { 

231 "A": { 

232 "keys": [ 

233 { 

234 "name": "id", 

235 "type": "int", 

236 } 

237 ], 

238 "storage": { 

239 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

240 }, 

241 }, 

242 "B": { 

243 "keys": [ 

244 { 

245 "name": "id", 

246 "type": "int", 

247 } 

248 ], 

249 "storage": { 

250 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

251 }, 

252 }, 

253 }, 

254 "packers": {}, 

255 } 

256 ) 

257 

258 

259def _makeQGraph(): 

260 """Make a trivial QuantumGraph with one quantum. 

261 

262 The only thing that we need to do with this quantum graph is to pickle 

263 it, the quanta in this graph are not usable for anything else. 

264 

265 Returns 

266 ------- 

267 qgraph : `~lsst.pipe.base.QuantumGraph` 

268 """ 

269 universe = DimensionUniverse(config=_makeDimensionConfig()) 

270 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

271 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

272 quanta = [ 

273 Quantum( 

274 taskName=_TASK_CLASS, 

275 inputs={ 

276 fakeDSType: [ 

277 DatasetRef( 

278 fakeDSType, 

279 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

280 run="fake_run", 

281 ) 

282 ] 

283 }, 

284 ) 

285 ] # type: ignore 

286 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

287 return qgraph 

288 

289 

290class CmdLineFwkTestCase(unittest.TestCase): 

291 """A test case for CmdLineFwk""" 

292 

293 def testMakePipeline(self): 

294 """Tests for CmdLineFwk.makePipeline method""" 

295 fwk = CmdLineFwk() 

296 

297 # make empty pipeline 

298 args = _makeArgs() 

299 pipeline = fwk.makePipeline(args) 

300 self.assertIsInstance(pipeline, Pipeline) 

301 self.assertEqual(len(pipeline), 0) 

302 

303 # few tests with serialization 

304 with makeTmpFile() as tmpname: 

305 # make empty pipeline and store it in a file 

306 args = _makeArgs(save_pipeline=tmpname) 

307 pipeline = fwk.makePipeline(args) 

308 self.assertIsInstance(pipeline, Pipeline) 

309 

310 # read pipeline from a file 

311 args = _makeArgs(pipeline=tmpname) 

312 pipeline = fwk.makePipeline(args) 

313 self.assertIsInstance(pipeline, Pipeline) 

314 self.assertEqual(len(pipeline), 0) 

315 

316 # single task pipeline, task name can be anything here 

317 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

318 args = _makeArgs(pipeline_actions=actions) 

319 pipeline = fwk.makePipeline(args) 

320 self.assertIsInstance(pipeline, Pipeline) 

321 self.assertEqual(len(pipeline), 1) 

322 

323 # many task pipeline 

324 actions = [ 

325 _ACTION_ADD_TASK("TaskOne:task1a"), 

326 _ACTION_ADD_TASK("TaskTwo:task2"), 

327 _ACTION_ADD_TASK("TaskOne:task1b"), 

328 ] 

329 args = _makeArgs(pipeline_actions=actions) 

330 pipeline = fwk.makePipeline(args) 

331 self.assertIsInstance(pipeline, Pipeline) 

332 self.assertEqual(len(pipeline), 3) 

333 

334 # single task pipeline with config overrides, need real task class 

335 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

336 args = _makeArgs(pipeline_actions=actions) 

337 pipeline = fwk.makePipeline(args) 

338 taskDefs = list(pipeline.toExpandedPipeline()) 

339 self.assertEqual(len(taskDefs), 1) 

340 self.assertEqual(taskDefs[0].config.addend, 100) 

341 

342 overrides = b"config.addend = 1000\n" 

343 with makeTmpFile(overrides) as tmpname: 

344 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

345 args = _makeArgs(pipeline_actions=actions) 

346 pipeline = fwk.makePipeline(args) 

347 taskDefs = list(pipeline.toExpandedPipeline()) 

348 self.assertEqual(len(taskDefs), 1) 

349 self.assertEqual(taskDefs[0].config.addend, 1000) 

350 

351 # Check --instrument option, for now it only checks that it does not 

352 # crash. 

353 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

354 args = _makeArgs(pipeline_actions=actions) 

355 pipeline = fwk.makePipeline(args) 

356 

357 def testMakeGraphFromSave(self): 

358 """Tests for CmdLineFwk.makeGraph method. 

359 

360 Only most trivial case is tested that does not do actual graph 

361 building. 

362 """ 

363 fwk = CmdLineFwk() 

364 

365 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

366 # make non-empty graph and store it in a file 

367 qgraph = _makeQGraph() 

368 with open(tmpname, "wb") as saveFile: 

369 qgraph.save(saveFile) 

370 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

371 qgraph = fwk.makeGraph(None, args) 

372 self.assertIsInstance(qgraph, QuantumGraph) 

373 self.assertEqual(len(qgraph), 1) 

374 

375 # will fail if graph id does not match 

376 args = _makeArgs( 

377 qgraph=tmpname, 

378 qgraph_id="R2-D2 is that you?", 

379 registryConfig=registryConfig, 

380 execution_butler_location=None, 

381 ) 

382 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

383 fwk.makeGraph(None, args) 

384 

385 # save with wrong object type 

386 with open(tmpname, "wb") as saveFile: 

387 pickle.dump({}, saveFile) 

388 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

389 with self.assertRaises(ValueError): 

390 fwk.makeGraph(None, args) 

391 

392 # reading empty graph from pickle should work but makeGraph() 

393 # will return None. 

394 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

395 with open(tmpname, "wb") as saveFile: 

396 qgraph.save(saveFile) 

397 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

398 qgraph = fwk.makeGraph(None, args) 

399 self.assertIs(qgraph, None) 

400 

401 def testShowPipeline(self): 

402 """Test for --show options for pipeline.""" 

403 fwk = CmdLineFwk() 

404 

405 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

406 args = _makeArgs(pipeline_actions=actions) 

407 pipeline = fwk.makePipeline(args) 

408 

409 with self.assertRaises(ValueError): 

410 ShowInfo(["unrecognized", "config"]) 

411 

412 stream = StringIO() 

413 show = ShowInfo( 

414 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

415 stream=stream, 

416 ) 

417 show.show_pipeline_info(pipeline) 

418 self.assertEqual(show.unhandled, frozenset({})) 

419 stream.seek(0) 

420 output = stream.read() 

421 self.assertIn("config.addend=100", output) # config option 

422 self.assertIn("addend\n3", output) # History output 

423 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

424 

425 show = ShowInfo(["pipeline", "uri"], stream=stream) 

426 show.show_pipeline_info(pipeline) 

427 self.assertEqual(show.unhandled, frozenset({"uri"})) 

428 self.assertEqual(show.handled, {"pipeline"}) 

429 

430 stream = StringIO() 

431 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

432 show.show_pipeline_info(pipeline) 

433 stream.seek(0) 

434 output = stream.read().strip() 

435 self.assertEqual("### Configuration for task `task'", output) 

436 

437 stream = StringIO() 

438 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

439 show.show_pipeline_info(pipeline) 

440 stream.seek(0) 

441 output = stream.read().strip() 

442 self.assertEqual("### Configuration for task `task'", output) 

443 

444 stream = StringIO() 

445 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

446 show.show_pipeline_info(pipeline) 

447 stream.seek(0) 

448 output = stream.read().strip() 

449 self.assertIn("NOIGNORECASE", output) 

450 

451 show = ShowInfo(["dump-config=notask"]) 

452 with self.assertRaises(ValueError) as cm: 

453 show.show_pipeline_info(pipeline) 

454 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

455 

456 show = ShowInfo(["history"]) 

457 with self.assertRaises(ValueError) as cm: 

458 show.show_pipeline_info(pipeline) 

459 self.assertIn("Please provide a value", str(cm.exception)) 

460 

461 show = ShowInfo(["history=notask::param"]) 

462 with self.assertRaises(ValueError) as cm: 

463 show.show_pipeline_info(pipeline) 

464 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

465 

466 

467class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

468 """A test case for CmdLineFwk""" 

469 

470 def setUp(self): 

471 super().setUpClass() 

472 self.root = tempfile.mkdtemp() 

473 self.nQuanta = 5 

474 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

475 

476 def tearDown(self): 

477 shutil.rmtree(self.root, ignore_errors=True) 

478 super().tearDownClass() 

479 

480 def testSimpleQGraph(self): 

481 """Test successfull execution of trivial quantum graph.""" 

482 args = _makeArgs(butler_config=self.root, input="test", output="output") 

483 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

484 populateButler(self.pipeline, butler) 

485 

486 fwk = CmdLineFwk() 

487 taskFactory = AddTaskFactoryMock() 

488 

489 qgraph = fwk.makeGraph(self.pipeline, args) 

490 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

491 self.assertEqual(len(qgraph), self.nQuanta) 

492 

493 # Ensure that the output run used in the graph is also used in 

494 # the pipeline execution. It is possible for makeGraph and runPipeline 

495 # to calculate time-stamped runs across a second boundary. 

496 args.output_run = qgraph.metadata["output_run"] 

497 

498 # run whole thing 

499 fwk.runPipeline(qgraph, taskFactory, args) 

500 self.assertEqual(taskFactory.countExec, self.nQuanta) 

501 

502 # test that we've disabled implicit threading 

503 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

504 

505 def testEmptyQGraph(self): 

506 """Test that making an empty QG produces the right error messages.""" 

507 # We make QG generation fail by populating one input collection in the 

508 # butler while using a different one (that we only register, not 

509 # populate) to make the QG. 

510 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

511 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

512 butler.registry.registerCollection("bad_input") 

513 populateButler(self.pipeline, butler) 

514 

515 fwk = CmdLineFwk() 

516 with self.assertLogs(level=logging.CRITICAL) as cm: 

517 qgraph = fwk.makeGraph(self.pipeline, args) 

518 self.assertRegex( 

519 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

520 ) 

521 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

522 self.assertIsNone(qgraph) 

523 

524 def testSimpleQGraphNoSkipExisting_inputs(self): 

525 """Test for case when output data for one task already appears in 

526 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

527 option is present. 

528 """ 

529 args = _makeArgs( 

530 butler_config=self.root, 

531 input="test", 

532 output="output", 

533 ) 

534 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

535 populateButler( 

536 self.pipeline, 

537 butler, 

538 datasetTypes={ 

539 args.input: [ 

540 "add_dataset0", 

541 "add_dataset1", 

542 "add2_dataset1", 

543 "add_init_output1", 

544 "task0_config", 

545 "task0_metadata", 

546 "task0_log", 

547 ] 

548 }, 

549 ) 

550 

551 fwk = CmdLineFwk() 

552 taskFactory = AddTaskFactoryMock() 

553 

554 qgraph = fwk.makeGraph(self.pipeline, args) 

555 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

556 # With current implementation graph has all nQuanta quanta, but when 

557 # executing one quantum is skipped. 

558 self.assertEqual(len(qgraph), self.nQuanta) 

559 

560 # Ensure that the output run used in the graph is also used in 

561 # the pipeline execution. It is possible for makeGraph and runPipeline 

562 # to calculate time-stamped runs across a second boundary. 

563 args.output_run = qgraph.metadata["output_run"] 

564 

565 # run whole thing 

566 fwk.runPipeline(qgraph, taskFactory, args) 

567 self.assertEqual(taskFactory.countExec, self.nQuanta) 

568 

569 def testSimpleQGraphSkipExisting_inputs(self): 

570 """Test for ``--skip-existing`` with output data for one task already 

571 appears in _input_ collection. No ``--extend-run`` option is needed 

572 for this case. 

573 """ 

574 args = _makeArgs( 

575 butler_config=self.root, 

576 input="test", 

577 output="output", 

578 skip_existing_in=("test",), 

579 ) 

580 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

581 populateButler( 

582 self.pipeline, 

583 butler, 

584 datasetTypes={ 

585 args.input: [ 

586 "add_dataset0", 

587 "add_dataset1", 

588 "add2_dataset1", 

589 "add_init_output1", 

590 "task0_config", 

591 "task0_metadata", 

592 "task0_log", 

593 ] 

594 }, 

595 ) 

596 

597 fwk = CmdLineFwk() 

598 taskFactory = AddTaskFactoryMock() 

599 

600 qgraph = fwk.makeGraph(self.pipeline, args) 

601 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

602 self.assertEqual(len(qgraph), self.nQuanta - 1) 

603 

604 # Ensure that the output run used in the graph is also used in 

605 # the pipeline execution. It is possible for makeGraph and runPipeline 

606 # to calculate time-stamped runs across a second boundary. 

607 args.output_run = qgraph.metadata["output_run"] 

608 

609 # run whole thing 

610 fwk.runPipeline(qgraph, taskFactory, args) 

611 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

612 

613 def testSimpleQGraphSkipExisting_outputs(self): 

614 """Test for ``--skip-existing`` with output data for one task already 

615 appears in _output_ collection. The ``--extend-run`` option is needed 

616 for this case. 

617 """ 

618 args = _makeArgs( 

619 butler_config=self.root, 

620 input="test", 

621 output_run="output/run", 

622 skip_existing_in=("output/run",), 

623 ) 

624 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

625 populateButler( 

626 self.pipeline, 

627 butler, 

628 datasetTypes={ 

629 args.input: ["add_dataset0"], 

630 args.output_run: [ 

631 "add_dataset1", 

632 "add2_dataset1", 

633 "add_init_output1", 

634 "task0_metadata", 

635 "task0_log", 

636 ], 

637 }, 

638 ) 

639 

640 fwk = CmdLineFwk() 

641 taskFactory = AddTaskFactoryMock() 

642 

643 # fails without --extend-run 

644 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

645 qgraph = fwk.makeGraph(self.pipeline, args) 

646 

647 # retry with --extend-run 

648 args.extend_run = True 

649 qgraph = fwk.makeGraph(self.pipeline, args) 

650 

651 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

652 # Graph does not include quantum for first task 

653 self.assertEqual(len(qgraph), self.nQuanta - 1) 

654 

655 # run whole thing 

656 fwk.runPipeline(qgraph, taskFactory, args) 

657 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

658 

659 def testSimpleQGraphOutputsFail(self): 

660 """Test continuing execution of trivial quantum graph with partial 

661 outputs. 

662 """ 

663 args = _makeArgs(butler_config=self.root, input="test", output="output") 

664 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

665 populateButler(self.pipeline, butler) 

666 

667 fwk = CmdLineFwk() 

668 taskFactory = AddTaskFactoryMock(stopAt=3) 

669 

670 qgraph = fwk.makeGraph(self.pipeline, args) 

671 self.assertEqual(len(qgraph), self.nQuanta) 

672 

673 # Ensure that the output run used in the graph is also used in 

674 # the pipeline execution. It is possible for makeGraph and runPipeline 

675 # to calculate time-stamped runs across a second boundary. 

676 args.output_run = qgraph.metadata["output_run"] 

677 

678 # run first three quanta 

679 with self.assertRaises(MPGraphExecutorError): 

680 fwk.runPipeline(qgraph, taskFactory, args) 

681 self.assertEqual(taskFactory.countExec, 3) 

682 

683 butler.registry.refresh() 

684 

685 # drop one of the two outputs from one task 

686 ref1 = butler.registry.findDataset( 

687 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

688 ) 

689 self.assertIsNotNone(ref1) 

690 # also drop the metadata output 

691 ref2 = butler.registry.findDataset( 

692 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

693 ) 

694 self.assertIsNotNone(ref2) 

695 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

696 

697 # Ensure that the output run used in the graph is also used in 

698 # the pipeline execution. It is possible for makeGraph and runPipeline 

699 # to calculate time-stamped runs across a second boundary. 

700 args.output_run = qgraph.metadata["output_run"] 

701 

702 taskFactory.stopAt = -1 

703 args.skip_existing_in = (args.output,) 

704 args.extend_run = True 

705 args.no_versions = True 

706 with self.assertRaises(MPGraphExecutorError): 

707 fwk.runPipeline(qgraph, taskFactory, args) 

708 

709 def testSimpleQGraphClobberOutputs(self): 

710 """Test continuing execution of trivial quantum graph with 

711 --clobber-outputs. 

712 """ 

713 args = _makeArgs(butler_config=self.root, input="test", output="output") 

714 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

715 populateButler(self.pipeline, butler) 

716 

717 fwk = CmdLineFwk() 

718 taskFactory = AddTaskFactoryMock(stopAt=3) 

719 

720 qgraph = fwk.makeGraph(self.pipeline, args) 

721 

722 # should have one task and number of quanta 

723 self.assertEqual(len(qgraph), self.nQuanta) 

724 

725 # Ensure that the output run used in the graph is also used in 

726 # the pipeline execution. It is possible for makeGraph and runPipeline 

727 # to calculate time-stamped runs across a second boundary. 

728 args.output_run = qgraph.metadata["output_run"] 

729 

730 # run first three quanta 

731 with self.assertRaises(MPGraphExecutorError): 

732 fwk.runPipeline(qgraph, taskFactory, args) 

733 self.assertEqual(taskFactory.countExec, 3) 

734 

735 butler.registry.refresh() 

736 

737 # drop one of the two outputs from one task 

738 ref1 = butler.registry.findDataset( 

739 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

740 ) 

741 self.assertIsNotNone(ref1) 

742 # also drop the metadata output 

743 ref2 = butler.registry.findDataset( 

744 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

745 ) 

746 self.assertIsNotNone(ref2) 

747 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

748 

749 taskFactory.stopAt = -1 

750 args.skip_existing = True 

751 args.extend_run = True 

752 args.clobber_outputs = True 

753 args.no_versions = True 

754 fwk.runPipeline(qgraph, taskFactory, args) 

755 # number of executed quanta is incremented 

756 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

757 

758 def testSimpleQGraphReplaceRun(self): 

759 """Test repeated execution of trivial quantum graph with 

760 --replace-run. 

761 """ 

762 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

763 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

764 populateButler(self.pipeline, butler) 

765 

766 fwk = CmdLineFwk() 

767 taskFactory = AddTaskFactoryMock() 

768 

769 qgraph = fwk.makeGraph(self.pipeline, args) 

770 

771 # should have one task and number of quanta 

772 self.assertEqual(len(qgraph), self.nQuanta) 

773 

774 # deep copy is needed because quanta are updated in place 

775 fwk.runPipeline(qgraph, taskFactory, args) 

776 self.assertEqual(taskFactory.countExec, self.nQuanta) 

777 

778 # need to refresh collections explicitly (or make new butler/registry) 

779 butler.registry.refresh() 

780 collections = set(butler.registry.queryCollections(...)) 

781 self.assertEqual(collections, {"test", "output", "output/run1"}) 

782 

783 # number of datasets written by pipeline: 

784 # - nQuanta of init_outputs 

785 # - nQuanta of configs 

786 # - packages (single dataset) 

787 # - nQuanta * two output datasets 

788 # - nQuanta of metadata 

789 # - nQuanta of log output 

790 n_outputs = self.nQuanta * 6 + 1 

791 refs = butler.registry.queryDatasets(..., collections="output/run1") 

792 self.assertEqual(len(list(refs)), n_outputs) 

793 

794 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

795 # changed) 

796 args.replace_run = True 

797 args.output_run = "output/run2" 

798 qgraph = fwk.makeGraph(self.pipeline, args) 

799 fwk.runPipeline(qgraph, taskFactory, args) 

800 

801 butler.registry.refresh() 

802 collections = set(butler.registry.queryCollections(...)) 

803 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

804 

805 # new output collection 

806 refs = butler.registry.queryDatasets(..., collections="output/run2") 

807 self.assertEqual(len(list(refs)), n_outputs) 

808 

809 # old output collection is still there 

810 refs = butler.registry.queryDatasets(..., collections="output/run1") 

811 self.assertEqual(len(list(refs)), n_outputs) 

812 

813 # re-run with --replace-run and --prune-replaced=unstore 

814 args.replace_run = True 

815 args.prune_replaced = "unstore" 

816 args.output_run = "output/run3" 

817 qgraph = fwk.makeGraph(self.pipeline, args) 

818 fwk.runPipeline(qgraph, taskFactory, args) 

819 

820 butler.registry.refresh() 

821 collections = set(butler.registry.queryCollections(...)) 

822 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

823 

824 # new output collection 

825 refs = butler.registry.queryDatasets(..., collections="output/run3") 

826 self.assertEqual(len(list(refs)), n_outputs) 

827 

828 # old output collection is still there, and it has all datasets but 

829 # non-InitOutputs are not in datastore 

830 refs = butler.registry.queryDatasets(..., collections="output/run2") 

831 refs = list(refs) 

832 self.assertEqual(len(refs), n_outputs) 

833 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

834 for ref in refs: 

835 if initOutNameRe.fullmatch(ref.datasetType.name): 

836 butler.get(ref, collections="output/run2") 

837 else: 

838 with self.assertRaises(FileNotFoundError): 

839 butler.get(ref, collections="output/run2") 

840 

841 # re-run with --replace-run and --prune-replaced=purge 

842 # This time also remove --input; passing the same inputs that we 

843 # started with and not passing inputs at all should be equivalent. 

844 args.input = None 

845 args.replace_run = True 

846 args.prune_replaced = "purge" 

847 args.output_run = "output/run4" 

848 qgraph = fwk.makeGraph(self.pipeline, args) 

849 fwk.runPipeline(qgraph, taskFactory, args) 

850 

851 butler.registry.refresh() 

852 collections = set(butler.registry.queryCollections(...)) 

853 # output/run3 should disappear now 

854 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

855 

856 # new output collection 

857 refs = butler.registry.queryDatasets(..., collections="output/run4") 

858 self.assertEqual(len(list(refs)), n_outputs) 

859 

860 # Trying to run again with inputs that aren't exactly what we started 

861 # with is an error, and the kind that should not modify the data repo. 

862 with self.assertRaises(ValueError): 

863 args.input = ["test", "output/run2"] 

864 args.prune_replaced = None 

865 args.replace_run = True 

866 args.output_run = "output/run5" 

867 qgraph = fwk.makeGraph(self.pipeline, args) 

868 fwk.runPipeline(qgraph, taskFactory, args) 

869 butler.registry.refresh() 

870 collections = set(butler.registry.queryCollections(...)) 

871 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

872 with self.assertRaises(ValueError): 

873 args.input = ["output/run2", "test"] 

874 args.prune_replaced = None 

875 args.replace_run = True 

876 args.output_run = "output/run6" 

877 qgraph = fwk.makeGraph(self.pipeline, args) 

878 fwk.runPipeline(qgraph, taskFactory, args) 

879 butler.registry.refresh() 

880 collections = set(butler.registry.queryCollections(...)) 

881 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

882 

883 def testMockTask(self): 

884 """Test --mock option.""" 

885 args = _makeArgs( 

886 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True 

887 ) 

888 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

889 populateButler(self.pipeline, butler) 

890 

891 fwk = CmdLineFwk() 

892 taskFactory = AddTaskFactoryMock() 

893 

894 qgraph = fwk.makeGraph(self.pipeline, args) 

895 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

896 self.assertEqual(len(qgraph), self.nQuanta) 

897 

898 # Ensure that the output run used in the graph is also used in 

899 # the pipeline execution. It is possible for makeGraph and runPipeline 

900 # to calculate time-stamped runs across a second boundary. 

901 args.output_run = qgraph.metadata["output_run"] 

902 

903 # run whole thing 

904 fwk.runPipeline(qgraph, taskFactory, args) 

905 # None of the actual tasks is executed 

906 self.assertEqual(taskFactory.countExec, 0) 

907 

908 # check dataset types 

909 butler.registry.refresh() 

910 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*"))) 

911 self.assertEqual(len(datasetTypes), self.nQuanta * 2) 

912 

913 def testMockTaskFailure(self): 

914 """Test --mock option and configure one of the tasks to fail.""" 

915 args = _makeArgs( 

916 butler_config=self.root, 

917 input="test", 

918 output="output", 

919 mock=True, 

920 register_dataset_types=True, 

921 mock_configs=[ 

922 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"), 

923 ], 

924 fail_fast=True, 

925 ) 

926 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

927 populateButler(self.pipeline, butler) 

928 

929 fwk = CmdLineFwk() 

930 taskFactory = AddTaskFactoryMock() 

931 

932 qgraph = fwk.makeGraph(self.pipeline, args) 

933 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

934 self.assertEqual(len(qgraph), self.nQuanta) 

935 

936 # Ensure that the output run used in the graph is also used in 

937 # the pipeline execution. It is possible for makeGraph and runPipeline 

938 # to calculate time-stamped runs across a second boundary. 

939 args.output_run = qgraph.metadata["output_run"] 

940 

941 with self.assertRaises(MPGraphExecutorError) as cm: 

942 fwk.runPipeline(qgraph, taskFactory, args) 

943 

944 self.assertIsNotNone(cm.exception.__cause__) 

945 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3") 

946 

947 def testSubgraph(self): 

948 """Test successful execution of trivial quantum graph.""" 

949 args = _makeArgs(butler_config=self.root, input="test", output="output") 

950 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

951 populateButler(self.pipeline, butler) 

952 

953 fwk = CmdLineFwk() 

954 qgraph = fwk.makeGraph(self.pipeline, args) 

955 

956 # Select first two nodes for execution. This depends on node ordering 

957 # which I assume is the same as execution order. 

958 nNodes = 2 

959 nodeIds = [node.nodeId for node in qgraph] 

960 nodeIds = nodeIds[:nNodes] 

961 

962 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

963 self.assertEqual(len(qgraph), self.nQuanta) 

964 

965 with ( 

966 makeTmpFile(suffix=".qgraph") as tmpname, 

967 makeSQLiteRegistry(universe=butler.registry.dimensions) as registryConfig, 

968 ): 

969 with open(tmpname, "wb") as saveFile: 

970 qgraph.save(saveFile) 

971 

972 args = _makeArgs( 

973 qgraph=tmpname, 

974 qgraph_node_id=nodeIds, 

975 registryConfig=registryConfig, 

976 execution_butler_location=None, 

977 ) 

978 fwk = CmdLineFwk() 

979 

980 # load graph, should only read a subset 

981 qgraph = fwk.makeGraph(pipeline=None, args=args) 

982 self.assertEqual(len(qgraph), nNodes) 

983 

984 def testShowGraph(self): 

985 """Test for --show options for quantum graph.""" 

986 nQuanta = 2 

987 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

988 

989 show = ShowInfo(["graph"]) 

990 show.show_graph_info(qgraph) 

991 self.assertEqual(show.handled, {"graph"}) 

992 

993 def testShowGraphWorkflow(self): 

994 nQuanta = 2 

995 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

996 

997 show = ShowInfo(["workflow"]) 

998 show.show_graph_info(qgraph) 

999 self.assertEqual(show.handled, {"workflow"}) 

1000 

1001 # TODO: cannot test "uri" option presently, it instantiates 

1002 # butler from command line options and there is no way to pass butler 

1003 # mock to that code. 

1004 show = ShowInfo(["uri"]) 

1005 with self.assertRaises(ValueError): # No args given 

1006 show.show_graph_info(qgraph) 

1007 

1008 def testSimpleQGraphDatastoreRecords(self): 

1009 """Test quantum graph generation with --qgraph-datastore-records.""" 

1010 args = _makeArgs( 

1011 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1012 ) 

1013 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1014 populateButler(self.pipeline, butler) 

1015 

1016 fwk = CmdLineFwk() 

1017 qgraph = fwk.makeGraph(self.pipeline, args) 

1018 self.assertEqual(len(qgraph), self.nQuanta) 

1019 for i, qnode in enumerate(qgraph): 

1020 quantum = qnode.quantum 

1021 self.assertIsNotNone(quantum.datastore_records) 

1022 # only the first quantum has a pre-existing input 

1023 if i == 0: 

1024 datastore_name = "FileDatastore@<butlerRoot>" 

1025 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1026 records_data = quantum.datastore_records[datastore_name] 

1027 records = dict(records_data.records) 

1028 self.assertEqual(len(records), 1) 

1029 _, records = records.popitem() 

1030 records = records["file_datastore_records"] 

1031 self.assertEqual( 

1032 [record.path for record in records], 

1033 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1034 ) 

1035 else: 

1036 self.assertEqual(quantum.datastore_records, {}) 

1037 

1038 

1039class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1040 pass 

1041 

1042 

1043def setup_module(module): 

1044 lsst.utils.tests.init() 

1045 

1046 

1047if __name__ == "__main__": 1047 ↛ 1048line 1047 didn't jump to line 1048, because the condition on line 1047 was never true

1048 lsst.utils.tests.init() 

1049 unittest.main()