Coverage for tests/test_cmdLineFwk.py: 13%

485 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 09:15 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import astropy.units as u 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.tests.simpleQGraph import ( 

74 AddTask, 

75 AddTaskFactoryMock, 

76 makeSimpleButler, 

77 makeSimplePipeline, 

78 makeSimpleQGraph, 

79 populateButler, 

80) 

81from lsst.utils.tests import temporaryDirectory 

82 

83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

84 

85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

86# instrument from registry, these tests can run fine without actual instrument 

87# and implementing full mock for Instrument is too complicated. 

88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88

89 

90 

91@contextlib.contextmanager 

92def makeTmpFile(contents=None, suffix=None): 

93 """Context manager for generating temporary file name. 

94 

95 Temporary file is deleted on exiting context. 

96 

97 Parameters 

98 ---------- 

99 contents : `bytes` 

100 Data to write into a file. 

101 """ 

102 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

103 if contents: 

104 os.write(fd, contents) 

105 os.close(fd) 

106 yield tmpname 

107 with contextlib.suppress(OSError): 

108 os.remove(tmpname) 

109 

110 

111@contextlib.contextmanager 

112def makeSQLiteRegistry(create=True, universe=None): 

113 """Context manager to create new empty registry database. 

114 

115 Yields 

116 ------ 

117 config : `RegistryConfig` 

118 Registry configuration for initialized registry database. 

119 """ 

120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

121 with temporaryDirectory() as tmpdir: 

122 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

123 config = RegistryConfig() 

124 config["db"] = uri 

125 if create: 

126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

127 yield config 

128 

129 

130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

131 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

132 

133 

134class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

135 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

136 

137 def setDefaults(self): 

138 PipelineTaskConfig.setDefaults(self) 

139 

140 

141def _makeArgs(registryConfig=None, **kwargs): 

142 """Return parsed command line arguments. 

143 

144 By default butler_config is set to `Config` populated with some defaults, 

145 it can be overridden completely by keyword argument. 

146 

147 Parameters 

148 ---------- 

149 cmd : `str`, optional 

150 Produce arguments for this pipetask command. 

151 registryConfig : `RegistryConfig`, optional 

152 Override for registry configuration. 

153 **kwargs 

154 Overrides for other arguments. 

155 """ 

156 # Use a mock to get the default value of arguments to 'run'. 

157 

158 mock = unittest.mock.Mock() 

159 

160 @click.command(cls=PipetaskCommand) 

161 @run_options() 

162 def fake_run(ctx, **kwargs): 

163 """Fake "pipetask run" command for gathering input arguments. 

164 

165 The arguments & options should always match the arguments & options in 

166 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

167 """ 

168 mock(**kwargs) 

169 

170 runner = click.testing.CliRunner() 

171 # --butler-config is the only required option 

172 result = runner.invoke(fake_run, "--butler-config /") 

173 if result.exit_code != 0: 

174 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

175 mock.assert_called_once() 

176 args = mock.call_args[1] 

177 args["enableLsstDebug"] = args.pop("debug") 

178 args["execution_butler_location"] = args.pop("save_execution_butler") 

179 if "pipeline_actions" not in args: 

180 args["pipeline_actions"] = [] 

181 args = SimpleNamespace(**args) 

182 

183 # override butler_config with our defaults 

184 if "butler_config" not in kwargs: 

185 args.butler_config = Config() 

186 if registryConfig: 

187 args.butler_config["registry"] = registryConfig 

188 # The default datastore has a relocatable root, so we need to specify 

189 # some root here for it to use 

190 args.butler_config.configFile = "." 

191 

192 # override arguments from keyword parameters 

193 for key, value in kwargs.items(): 

194 setattr(args, key, value) 

195 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

196 return args 

197 

198 

199class FakeDSType(NamedTuple): 

200 name: str 

201 

202 

203@dataclass(frozen=True) 

204class FakeDSRef: 

205 datasetType: str 

206 dataId: tuple 

207 

208 def isComponent(self): 

209 return False 

210 

211 

212# Task class name used by tests, needs to be importable 

213_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

214 

215 

216def _makeDimensionConfig(): 

217 """Make a simple dimension universe configuration.""" 

218 return DimensionConfig( 

219 { 

220 "version": 1, 

221 "namespace": "ctrl_mpexec_test", 

222 "skypix": { 

223 "common": "htm7", 

224 "htm": { 

225 "class": "lsst.sphgeom.HtmPixelization", 

226 "max_level": 24, 

227 }, 

228 }, 

229 "elements": { 

230 "A": { 

231 "keys": [ 

232 { 

233 "name": "id", 

234 "type": "int", 

235 } 

236 ], 

237 "storage": { 

238 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

239 }, 

240 }, 

241 "B": { 

242 "keys": [ 

243 { 

244 "name": "id", 

245 "type": "int", 

246 } 

247 ], 

248 "storage": { 

249 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

250 }, 

251 }, 

252 }, 

253 "packers": {}, 

254 } 

255 ) 

256 

257 

258def _makeQGraph(): 

259 """Make a trivial QuantumGraph with one quantum. 

260 

261 The only thing that we need to do with this quantum graph is to pickle 

262 it, the quanta in this graph are not usable for anything else. 

263 

264 Returns 

265 ------- 

266 qgraph : `~lsst.pipe.base.QuantumGraph` 

267 """ 

268 universe = DimensionUniverse(config=_makeDimensionConfig()) 

269 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe) 

270 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

271 quanta = [ 

272 Quantum( 

273 taskName=_TASK_CLASS, 

274 inputs={ 

275 fakeDSType: [ 

276 DatasetRef( 

277 fakeDSType, 

278 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

279 run="fake_run", 

280 ) 

281 ] 

282 }, 

283 ) 

284 ] # type: ignore 

285 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

286 return qgraph 

287 

288 

289class CmdLineFwkTestCase(unittest.TestCase): 

290 """A test case for CmdLineFwk""" 

291 

292 def testMakePipeline(self): 

293 """Tests for CmdLineFwk.makePipeline method""" 

294 fwk = CmdLineFwk() 

295 

296 # make empty pipeline 

297 args = _makeArgs() 

298 pipeline = fwk.makePipeline(args) 

299 self.assertIsInstance(pipeline, Pipeline) 

300 self.assertEqual(len(pipeline), 0) 

301 

302 # few tests with serialization 

303 with makeTmpFile() as tmpname: 

304 # make empty pipeline and store it in a file 

305 args = _makeArgs(save_pipeline=tmpname) 

306 pipeline = fwk.makePipeline(args) 

307 self.assertIsInstance(pipeline, Pipeline) 

308 

309 # read pipeline from a file 

310 args = _makeArgs(pipeline=tmpname) 

311 pipeline = fwk.makePipeline(args) 

312 self.assertIsInstance(pipeline, Pipeline) 

313 self.assertEqual(len(pipeline), 0) 

314 

315 # single task pipeline, task name can be anything here 

316 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

317 args = _makeArgs(pipeline_actions=actions) 

318 pipeline = fwk.makePipeline(args) 

319 self.assertIsInstance(pipeline, Pipeline) 

320 self.assertEqual(len(pipeline), 1) 

321 

322 # many task pipeline 

323 actions = [ 

324 _ACTION_ADD_TASK("TaskOne:task1a"), 

325 _ACTION_ADD_TASK("TaskTwo:task2"), 

326 _ACTION_ADD_TASK("TaskOne:task1b"), 

327 ] 

328 args = _makeArgs(pipeline_actions=actions) 

329 pipeline = fwk.makePipeline(args) 

330 self.assertIsInstance(pipeline, Pipeline) 

331 self.assertEqual(len(pipeline), 3) 

332 

333 # single task pipeline with config overrides, need real task class 

334 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

335 args = _makeArgs(pipeline_actions=actions) 

336 pipeline = fwk.makePipeline(args) 

337 taskDefs = list(pipeline.toExpandedPipeline()) 

338 self.assertEqual(len(taskDefs), 1) 

339 self.assertEqual(taskDefs[0].config.addend, 100) 

340 

341 overrides = b"config.addend = 1000\n" 

342 with makeTmpFile(overrides) as tmpname: 

343 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

344 args = _makeArgs(pipeline_actions=actions) 

345 pipeline = fwk.makePipeline(args) 

346 taskDefs = list(pipeline.toExpandedPipeline()) 

347 self.assertEqual(len(taskDefs), 1) 

348 self.assertEqual(taskDefs[0].config.addend, 1000) 

349 

350 # Check --instrument option, for now it only checks that it does not 

351 # crash. 

352 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

353 args = _makeArgs(pipeline_actions=actions) 

354 pipeline = fwk.makePipeline(args) 

355 

356 def testMakeGraphFromSave(self): 

357 """Tests for CmdLineFwk.makeGraph method. 

358 

359 Only most trivial case is tested that does not do actual graph 

360 building. 

361 """ 

362 fwk = CmdLineFwk() 

363 

364 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

365 # make non-empty graph and store it in a file 

366 qgraph = _makeQGraph() 

367 with open(tmpname, "wb") as saveFile: 

368 qgraph.save(saveFile) 

369 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

370 qgraph = fwk.makeGraph(None, args) 

371 self.assertIsInstance(qgraph, QuantumGraph) 

372 self.assertEqual(len(qgraph), 1) 

373 

374 # will fail if graph id does not match 

375 args = _makeArgs( 

376 qgraph=tmpname, 

377 qgraph_id="R2-D2 is that you?", 

378 registryConfig=registryConfig, 

379 execution_butler_location=None, 

380 ) 

381 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

382 fwk.makeGraph(None, args) 

383 

384 # save with wrong object type 

385 with open(tmpname, "wb") as saveFile: 

386 pickle.dump({}, saveFile) 

387 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

388 with self.assertRaises(ValueError): 

389 fwk.makeGraph(None, args) 

390 

391 # reading empty graph from pickle should work but makeGraph() 

392 # will return None. 

393 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig())) 

394 with open(tmpname, "wb") as saveFile: 

395 qgraph.save(saveFile) 

396 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

397 qgraph = fwk.makeGraph(None, args) 

398 self.assertIs(qgraph, None) 

399 

400 def testShowPipeline(self): 

401 """Test for --show options for pipeline.""" 

402 fwk = CmdLineFwk() 

403 

404 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

405 args = _makeArgs(pipeline_actions=actions) 

406 pipeline = fwk.makePipeline(args) 

407 

408 with self.assertRaises(ValueError): 

409 ShowInfo(["unrecognized", "config"]) 

410 

411 stream = StringIO() 

412 show = ShowInfo( 

413 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

414 stream=stream, 

415 ) 

416 show.show_pipeline_info(pipeline) 

417 self.assertEqual(show.unhandled, frozenset({})) 

418 stream.seek(0) 

419 output = stream.read() 

420 self.assertIn("config.addend=100", output) # config option 

421 self.assertIn("addend\n3", output) # History output 

422 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

423 

424 show = ShowInfo(["pipeline", "uri"], stream=stream) 

425 show.show_pipeline_info(pipeline) 

426 self.assertEqual(show.unhandled, frozenset({"uri"})) 

427 self.assertEqual(show.handled, {"pipeline"}) 

428 

429 stream = StringIO() 

430 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

431 show.show_pipeline_info(pipeline) 

432 stream.seek(0) 

433 output = stream.read().strip() 

434 self.assertEqual("### Configuration for task `task'", output) 

435 

436 stream = StringIO() 

437 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

438 show.show_pipeline_info(pipeline) 

439 stream.seek(0) 

440 output = stream.read().strip() 

441 self.assertEqual("### Configuration for task `task'", output) 

442 

443 stream = StringIO() 

444 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

445 show.show_pipeline_info(pipeline) 

446 stream.seek(0) 

447 output = stream.read().strip() 

448 self.assertIn("NOIGNORECASE", output) 

449 

450 show = ShowInfo(["dump-config=notask"]) 

451 with self.assertRaises(ValueError) as cm: 

452 show.show_pipeline_info(pipeline) 

453 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

454 

455 show = ShowInfo(["history"]) 

456 with self.assertRaises(ValueError) as cm: 

457 show.show_pipeline_info(pipeline) 

458 self.assertIn("Please provide a value", str(cm.exception)) 

459 

460 show = ShowInfo(["history=notask::param"]) 

461 with self.assertRaises(ValueError) as cm: 

462 show.show_pipeline_info(pipeline) 

463 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

464 

465 def test_execution_resources_parameters(self) -> None: 

466 """Test creation of the ExecutionResources from command line.""" 

467 fwk = CmdLineFwk() 

468 

469 for params, num_cores, max_mem in ( 

470 ((None, None), 1, None), 

471 ((5, ""), 5, None), 

472 ((None, "50"), 1, 50 * u.MB), 

473 ((5, "50 GB"), 5, 50 * u.GB), 

474 ): 

475 kwargs = {} 

476 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params): 

477 if v is not None: 

478 kwargs[k] = v 

479 args = _makeArgs(**kwargs) 

480 res = fwk._make_execution_resources(args) 

481 self.assertEqual(res.num_cores, num_cores) 

482 self.assertEqual(res.max_mem, max_mem) 

483 

484 args = _makeArgs(memory_per_quantum="50m") 

485 with self.assertRaises(u.UnitConversionError): 

486 fwk._make_execution_resources(args) 

487 

488 

489class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

490 """A test case for CmdLineFwk""" 

491 

492 def setUp(self): 

493 super().setUpClass() 

494 self.root = tempfile.mkdtemp() 

495 self.nQuanta = 5 

496 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

497 

498 def tearDown(self): 

499 shutil.rmtree(self.root, ignore_errors=True) 

500 super().tearDownClass() 

501 

502 def testSimpleQGraph(self): 

503 """Test successfull execution of trivial quantum graph.""" 

504 args = _makeArgs(butler_config=self.root, input="test", output="output") 

505 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

506 populateButler(self.pipeline, butler) 

507 

508 fwk = CmdLineFwk() 

509 taskFactory = AddTaskFactoryMock() 

510 

511 qgraph = fwk.makeGraph(self.pipeline, args) 

512 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

513 self.assertEqual(len(qgraph), self.nQuanta) 

514 

515 # Ensure that the output run used in the graph is also used in 

516 # the pipeline execution. It is possible for makeGraph and runPipeline 

517 # to calculate time-stamped runs across a second boundary. 

518 args.output_run = qgraph.metadata["output_run"] 

519 

520 # run whole thing 

521 fwk.runPipeline(qgraph, taskFactory, args) 

522 self.assertEqual(taskFactory.countExec, self.nQuanta) 

523 

524 # test that we've disabled implicit threading 

525 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

526 

527 def testEmptyQGraph(self): 

528 """Test that making an empty QG produces the right error messages.""" 

529 # We make QG generation fail by populating one input collection in the 

530 # butler while using a different one (that we only register, not 

531 # populate) to make the QG. 

532 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

533 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

534 butler.registry.registerCollection("bad_input") 

535 populateButler(self.pipeline, butler) 

536 

537 fwk = CmdLineFwk() 

538 with self.assertLogs(level=logging.CRITICAL) as cm: 

539 qgraph = fwk.makeGraph(self.pipeline, args) 

540 self.assertRegex( 

541 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

542 ) 

543 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

544 self.assertIsNone(qgraph) 

545 

546 def testSimpleQGraphNoSkipExisting_inputs(self): 

547 """Test for case when output data for one task already appears in 

548 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

549 option is present. 

550 """ 

551 args = _makeArgs( 

552 butler_config=self.root, 

553 input="test", 

554 output="output", 

555 ) 

556 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

557 populateButler( 

558 self.pipeline, 

559 butler, 

560 datasetTypes={ 

561 args.input: [ 

562 "add_dataset0", 

563 "add_dataset1", 

564 "add2_dataset1", 

565 "add_init_output1", 

566 "task0_config", 

567 "task0_metadata", 

568 "task0_log", 

569 ] 

570 }, 

571 ) 

572 

573 fwk = CmdLineFwk() 

574 taskFactory = AddTaskFactoryMock() 

575 

576 qgraph = fwk.makeGraph(self.pipeline, args) 

577 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

578 # With current implementation graph has all nQuanta quanta, but when 

579 # executing one quantum is skipped. 

580 self.assertEqual(len(qgraph), self.nQuanta) 

581 

582 # Ensure that the output run used in the graph is also used in 

583 # the pipeline execution. It is possible for makeGraph and runPipeline 

584 # to calculate time-stamped runs across a second boundary. 

585 args.output_run = qgraph.metadata["output_run"] 

586 

587 # run whole thing 

588 fwk.runPipeline(qgraph, taskFactory, args) 

589 self.assertEqual(taskFactory.countExec, self.nQuanta) 

590 

591 def testSimpleQGraphSkipExisting_inputs(self): 

592 """Test for ``--skip-existing`` with output data for one task already 

593 appears in _input_ collection. No ``--extend-run`` option is needed 

594 for this case. 

595 """ 

596 args = _makeArgs( 

597 butler_config=self.root, 

598 input="test", 

599 output="output", 

600 skip_existing_in=("test",), 

601 ) 

602 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

603 populateButler( 

604 self.pipeline, 

605 butler, 

606 datasetTypes={ 

607 args.input: [ 

608 "add_dataset0", 

609 "add_dataset1", 

610 "add2_dataset1", 

611 "add_init_output1", 

612 "task0_config", 

613 "task0_metadata", 

614 "task0_log", 

615 ] 

616 }, 

617 ) 

618 

619 fwk = CmdLineFwk() 

620 taskFactory = AddTaskFactoryMock() 

621 

622 qgraph = fwk.makeGraph(self.pipeline, args) 

623 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

624 self.assertEqual(len(qgraph), self.nQuanta - 1) 

625 

626 # Ensure that the output run used in the graph is also used in 

627 # the pipeline execution. It is possible for makeGraph and runPipeline 

628 # to calculate time-stamped runs across a second boundary. 

629 args.output_run = qgraph.metadata["output_run"] 

630 

631 # run whole thing 

632 fwk.runPipeline(qgraph, taskFactory, args) 

633 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

634 

635 def testSimpleQGraphSkipExisting_outputs(self): 

636 """Test for ``--skip-existing`` with output data for one task already 

637 appears in _output_ collection. The ``--extend-run`` option is needed 

638 for this case. 

639 """ 

640 args = _makeArgs( 

641 butler_config=self.root, 

642 input="test", 

643 output_run="output/run", 

644 skip_existing_in=("output/run",), 

645 ) 

646 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

647 populateButler( 

648 self.pipeline, 

649 butler, 

650 datasetTypes={ 

651 args.input: ["add_dataset0"], 

652 args.output_run: [ 

653 "add_dataset1", 

654 "add2_dataset1", 

655 "add_init_output1", 

656 "task0_metadata", 

657 "task0_log", 

658 ], 

659 }, 

660 ) 

661 

662 fwk = CmdLineFwk() 

663 taskFactory = AddTaskFactoryMock() 

664 

665 # fails without --extend-run 

666 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

667 qgraph = fwk.makeGraph(self.pipeline, args) 

668 

669 # retry with --extend-run 

670 args.extend_run = True 

671 qgraph = fwk.makeGraph(self.pipeline, args) 

672 

673 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

674 # Graph does not include quantum for first task 

675 self.assertEqual(len(qgraph), self.nQuanta - 1) 

676 

677 # run whole thing 

678 fwk.runPipeline(qgraph, taskFactory, args) 

679 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

680 

681 def testSimpleQGraphOutputsFail(self): 

682 """Test continuing execution of trivial quantum graph with partial 

683 outputs. 

684 """ 

685 args = _makeArgs(butler_config=self.root, input="test", output="output") 

686 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

687 populateButler(self.pipeline, butler) 

688 

689 fwk = CmdLineFwk() 

690 taskFactory = AddTaskFactoryMock(stopAt=3) 

691 

692 qgraph = fwk.makeGraph(self.pipeline, args) 

693 self.assertEqual(len(qgraph), self.nQuanta) 

694 

695 # Ensure that the output run used in the graph is also used in 

696 # the pipeline execution. It is possible for makeGraph and runPipeline 

697 # to calculate time-stamped runs across a second boundary. 

698 args.output_run = qgraph.metadata["output_run"] 

699 

700 # run first three quanta 

701 with self.assertRaises(MPGraphExecutorError): 

702 fwk.runPipeline(qgraph, taskFactory, args) 

703 self.assertEqual(taskFactory.countExec, 3) 

704 

705 butler.registry.refresh() 

706 

707 # drop one of the two outputs from one task 

708 ref1 = butler.registry.findDataset( 

709 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

710 ) 

711 self.assertIsNotNone(ref1) 

712 # also drop the metadata output 

713 ref2 = butler.registry.findDataset( 

714 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

715 ) 

716 self.assertIsNotNone(ref2) 

717 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

718 

719 # Ensure that the output run used in the graph is also used in 

720 # the pipeline execution. It is possible for makeGraph and runPipeline 

721 # to calculate time-stamped runs across a second boundary. 

722 args.output_run = qgraph.metadata["output_run"] 

723 

724 taskFactory.stopAt = -1 

725 args.skip_existing_in = (args.output,) 

726 args.extend_run = True 

727 args.no_versions = True 

728 with self.assertRaises(MPGraphExecutorError): 

729 fwk.runPipeline(qgraph, taskFactory, args) 

730 

731 def testSimpleQGraphClobberOutputs(self): 

732 """Test continuing execution of trivial quantum graph with 

733 --clobber-outputs. 

734 """ 

735 args = _makeArgs(butler_config=self.root, input="test", output="output") 

736 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

737 populateButler(self.pipeline, butler) 

738 

739 fwk = CmdLineFwk() 

740 taskFactory = AddTaskFactoryMock(stopAt=3) 

741 

742 qgraph = fwk.makeGraph(self.pipeline, args) 

743 

744 # should have one task and number of quanta 

745 self.assertEqual(len(qgraph), self.nQuanta) 

746 

747 # Ensure that the output run used in the graph is also used in 

748 # the pipeline execution. It is possible for makeGraph and runPipeline 

749 # to calculate time-stamped runs across a second boundary. 

750 args.output_run = qgraph.metadata["output_run"] 

751 

752 # run first three quanta 

753 with self.assertRaises(MPGraphExecutorError): 

754 fwk.runPipeline(qgraph, taskFactory, args) 

755 self.assertEqual(taskFactory.countExec, 3) 

756 

757 butler.registry.refresh() 

758 

759 # drop one of the two outputs from one task 

760 ref1 = butler.registry.findDataset( 

761 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

762 ) 

763 self.assertIsNotNone(ref1) 

764 # also drop the metadata output 

765 ref2 = butler.registry.findDataset( 

766 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

767 ) 

768 self.assertIsNotNone(ref2) 

769 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

770 

771 taskFactory.stopAt = -1 

772 args.skip_existing = True 

773 args.extend_run = True 

774 args.clobber_outputs = True 

775 args.no_versions = True 

776 fwk.runPipeline(qgraph, taskFactory, args) 

777 # number of executed quanta is incremented 

778 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

779 

780 def testSimpleQGraphReplaceRun(self): 

781 """Test repeated execution of trivial quantum graph with 

782 --replace-run. 

783 """ 

784 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

785 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

786 populateButler(self.pipeline, butler) 

787 

788 fwk = CmdLineFwk() 

789 taskFactory = AddTaskFactoryMock() 

790 

791 qgraph = fwk.makeGraph(self.pipeline, args) 

792 

793 # should have one task and number of quanta 

794 self.assertEqual(len(qgraph), self.nQuanta) 

795 

796 # deep copy is needed because quanta are updated in place 

797 fwk.runPipeline(qgraph, taskFactory, args) 

798 self.assertEqual(taskFactory.countExec, self.nQuanta) 

799 

800 # need to refresh collections explicitly (or make new butler/registry) 

801 butler.registry.refresh() 

802 collections = set(butler.registry.queryCollections(...)) 

803 self.assertEqual(collections, {"test", "output", "output/run1"}) 

804 

805 # number of datasets written by pipeline: 

806 # - nQuanta of init_outputs 

807 # - nQuanta of configs 

808 # - packages (single dataset) 

809 # - nQuanta * two output datasets 

810 # - nQuanta of metadata 

811 # - nQuanta of log output 

812 n_outputs = self.nQuanta * 6 + 1 

813 refs = butler.registry.queryDatasets(..., collections="output/run1") 

814 self.assertEqual(len(list(refs)), n_outputs) 

815 

816 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

817 # changed) 

818 args.replace_run = True 

819 args.output_run = "output/run2" 

820 qgraph = fwk.makeGraph(self.pipeline, args) 

821 fwk.runPipeline(qgraph, taskFactory, args) 

822 

823 butler.registry.refresh() 

824 collections = set(butler.registry.queryCollections(...)) 

825 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

826 

827 # new output collection 

828 refs = butler.registry.queryDatasets(..., collections="output/run2") 

829 self.assertEqual(len(list(refs)), n_outputs) 

830 

831 # old output collection is still there 

832 refs = butler.registry.queryDatasets(..., collections="output/run1") 

833 self.assertEqual(len(list(refs)), n_outputs) 

834 

835 # re-run with --replace-run and --prune-replaced=unstore 

836 args.replace_run = True 

837 args.prune_replaced = "unstore" 

838 args.output_run = "output/run3" 

839 qgraph = fwk.makeGraph(self.pipeline, args) 

840 fwk.runPipeline(qgraph, taskFactory, args) 

841 

842 butler.registry.refresh() 

843 collections = set(butler.registry.queryCollections(...)) 

844 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

845 

846 # new output collection 

847 refs = butler.registry.queryDatasets(..., collections="output/run3") 

848 self.assertEqual(len(list(refs)), n_outputs) 

849 

850 # old output collection is still there, and it has all datasets but 

851 # non-InitOutputs are not in datastore 

852 refs = butler.registry.queryDatasets(..., collections="output/run2") 

853 refs = list(refs) 

854 self.assertEqual(len(refs), n_outputs) 

855 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

856 for ref in refs: 

857 if initOutNameRe.fullmatch(ref.datasetType.name): 

858 butler.get(ref) 

859 else: 

860 with self.assertRaises(FileNotFoundError): 

861 butler.get(ref) 

862 

863 # re-run with --replace-run and --prune-replaced=purge 

864 # This time also remove --input; passing the same inputs that we 

865 # started with and not passing inputs at all should be equivalent. 

866 args.input = None 

867 args.replace_run = True 

868 args.prune_replaced = "purge" 

869 args.output_run = "output/run4" 

870 qgraph = fwk.makeGraph(self.pipeline, args) 

871 fwk.runPipeline(qgraph, taskFactory, args) 

872 

873 butler.registry.refresh() 

874 collections = set(butler.registry.queryCollections(...)) 

875 # output/run3 should disappear now 

876 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

877 

878 # new output collection 

879 refs = butler.registry.queryDatasets(..., collections="output/run4") 

880 self.assertEqual(len(list(refs)), n_outputs) 

881 

882 # Trying to run again with inputs that aren't exactly what we started 

883 # with is an error, and the kind that should not modify the data repo. 

884 with self.assertRaises(ValueError): 

885 args.input = ["test", "output/run2"] 

886 args.prune_replaced = None 

887 args.replace_run = True 

888 args.output_run = "output/run5" 

889 qgraph = fwk.makeGraph(self.pipeline, args) 

890 fwk.runPipeline(qgraph, taskFactory, args) 

891 butler.registry.refresh() 

892 collections = set(butler.registry.queryCollections(...)) 

893 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

894 with self.assertRaises(ValueError): 

895 args.input = ["output/run2", "test"] 

896 args.prune_replaced = None 

897 args.replace_run = True 

898 args.output_run = "output/run6" 

899 qgraph = fwk.makeGraph(self.pipeline, args) 

900 fwk.runPipeline(qgraph, taskFactory, args) 

901 butler.registry.refresh() 

902 collections = set(butler.registry.queryCollections(...)) 

903 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

904 

905 def testSubgraph(self): 

906 """Test successful execution of trivial quantum graph.""" 

907 args = _makeArgs(butler_config=self.root, input="test", output="output") 

908 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

909 populateButler(self.pipeline, butler) 

910 

911 fwk = CmdLineFwk() 

912 qgraph = fwk.makeGraph(self.pipeline, args) 

913 

914 # Select first two nodes for execution. This depends on node ordering 

915 # which I assume is the same as execution order. 

916 nNodes = 2 

917 nodeIds = [node.nodeId for node in qgraph] 

918 nodeIds = nodeIds[:nNodes] 

919 

920 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

921 self.assertEqual(len(qgraph), self.nQuanta) 

922 

923 with ( 

924 makeTmpFile(suffix=".qgraph") as tmpname, 

925 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

926 ): 

927 with open(tmpname, "wb") as saveFile: 

928 qgraph.save(saveFile) 

929 

930 args = _makeArgs( 

931 qgraph=tmpname, 

932 qgraph_node_id=nodeIds, 

933 registryConfig=registryConfig, 

934 execution_butler_location=None, 

935 ) 

936 fwk = CmdLineFwk() 

937 

938 # load graph, should only read a subset 

939 qgraph = fwk.makeGraph(pipeline=None, args=args) 

940 self.assertEqual(len(qgraph), nNodes) 

941 

942 def testShowGraph(self): 

943 """Test for --show options for quantum graph.""" 

944 nQuanta = 2 

945 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

946 

947 show = ShowInfo(["graph"]) 

948 show.show_graph_info(qgraph) 

949 self.assertEqual(show.handled, {"graph"}) 

950 

951 def testShowGraphWorkflow(self): 

952 nQuanta = 2 

953 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

954 

955 show = ShowInfo(["workflow"]) 

956 show.show_graph_info(qgraph) 

957 self.assertEqual(show.handled, {"workflow"}) 

958 

959 # TODO: cannot test "uri" option presently, it instantiates 

960 # butler from command line options and there is no way to pass butler 

961 # mock to that code. 

962 show = ShowInfo(["uri"]) 

963 with self.assertRaises(ValueError): # No args given 

964 show.show_graph_info(qgraph) 

965 

966 def testSimpleQGraphDatastoreRecords(self): 

967 """Test quantum graph generation with --qgraph-datastore-records.""" 

968 args = _makeArgs( 

969 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

970 ) 

971 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

972 populateButler(self.pipeline, butler) 

973 

974 fwk = CmdLineFwk() 

975 qgraph = fwk.makeGraph(self.pipeline, args) 

976 self.assertEqual(len(qgraph), self.nQuanta) 

977 for i, qnode in enumerate(qgraph): 

978 quantum = qnode.quantum 

979 self.assertIsNotNone(quantum.datastore_records) 

980 # only the first quantum has a pre-existing input 

981 if i == 0: 

982 datastore_name = "FileDatastore@<butlerRoot>" 

983 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

984 records_data = quantum.datastore_records[datastore_name] 

985 records = dict(records_data.records) 

986 self.assertEqual(len(records), 1) 

987 _, records = records.popitem() 

988 records = records["file_datastore_records"] 

989 self.assertEqual( 

990 [record.path for record in records], 

991 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

992 ) 

993 else: 

994 self.assertEqual(quantum.datastore_records, {}) 

995 

996 

997class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

998 pass 

999 

1000 

1001def setup_module(module): 

1002 lsst.utils.tests.init() 

1003 

1004 

1005if __name__ == "__main__": 

1006 lsst.utils.tests.init() 

1007 unittest.main()