Coverage for tests/test_cmdLineFwk.py: 13%

515 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-25 09:44 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import astropy.units as u 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.script import transfer_from_graph 

74from lsst.pipe.base.tests.simpleQGraph import ( 

75 AddTask, 

76 AddTaskFactoryMock, 

77 makeSimpleButler, 

78 makeSimplePipeline, 

79 makeSimpleQGraph, 

80 populateButler, 

81) 

82from lsst.utils.tests import temporaryDirectory 

83 

84logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

85 

86# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

87# instrument from registry, these tests can run fine without actual instrument 

88# and implementing full mock for Instrument is too complicated. 

89Instrument.fromName = lambda name, reg: None 89 ↛ exitline 89 didn't run the lambda on line 89

90 

91 

92@contextlib.contextmanager 

93def makeTmpFile(contents=None, suffix=None): 

94 """Context manager for generating temporary file name. 

95 

96 Temporary file is deleted on exiting context. 

97 

98 Parameters 

99 ---------- 

100 contents : `bytes` 

101 Data to write into a file. 

102 """ 

103 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

104 if contents: 

105 os.write(fd, contents) 

106 os.close(fd) 

107 yield tmpname 

108 with contextlib.suppress(OSError): 

109 os.remove(tmpname) 

110 

111 

112@contextlib.contextmanager 

113def makeSQLiteRegistry(create=True, universe=None): 

114 """Context manager to create new empty registry database. 

115 

116 Yields 

117 ------ 

118 config : `RegistryConfig` 

119 Registry configuration for initialized registry database. 

120 """ 

121 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

122 with temporaryDirectory() as tmpdir: 

123 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

124 config = RegistryConfig() 

125 config["db"] = uri 

126 if create: 

127 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

128 yield config 

129 

130 

131class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

132 """Test connection class.""" 

133 

134 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

135 

136 

137class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

138 """Test pipeline config.""" 

139 

140 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

141 

142 def setDefaults(self): 

143 PipelineTaskConfig.setDefaults(self) 

144 

145 

146def _makeArgs(registryConfig=None, **kwargs): 

147 """Return parsed command line arguments. 

148 

149 By default butler_config is set to `Config` populated with some defaults, 

150 it can be overridden completely by keyword argument. 

151 

152 Parameters 

153 ---------- 

154 cmd : `str`, optional 

155 Produce arguments for this pipetask command. 

156 registryConfig : `RegistryConfig`, optional 

157 Override for registry configuration. 

158 **kwargs 

159 Overrides for other arguments. 

160 """ 

161 # Use a mock to get the default value of arguments to 'run'. 

162 

163 mock = unittest.mock.Mock() 

164 

165 @click.command(cls=PipetaskCommand) 

166 @run_options() 

167 def fake_run(ctx, **kwargs): 

168 """Fake "pipetask run" command for gathering input arguments. 

169 

170 The arguments & options should always match the arguments & options in 

171 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

172 """ 

173 mock(**kwargs) 

174 

175 runner = click.testing.CliRunner() 

176 # --butler-config is the only required option 

177 result = runner.invoke(fake_run, "--butler-config /") 

178 if result.exit_code != 0: 

179 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

180 mock.assert_called_once() 

181 args = mock.call_args[1] 

182 args["enableLsstDebug"] = args.pop("debug") 

183 args["execution_butler_location"] = args.pop("save_execution_butler") 

184 if "pipeline_actions" not in args: 

185 args["pipeline_actions"] = [] 

186 args = SimpleNamespace(**args) 

187 

188 # override butler_config with our defaults 

189 if "butler_config" not in kwargs: 

190 args.butler_config = Config() 

191 if registryConfig: 

192 args.butler_config["registry"] = registryConfig 

193 # The default datastore has a relocatable root, so we need to specify 

194 # some root here for it to use 

195 args.butler_config.configFile = "." 

196 

197 # override arguments from keyword parameters 

198 for key, value in kwargs.items(): 

199 setattr(args, key, value) 

200 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

201 return args 

202 

203 

204class FakeDSType(NamedTuple): 

205 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

206 

207 name: str 

208 

209 

210@dataclass(frozen=True) 

211class FakeDSRef: 

212 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

213 

214 datasetType: str 

215 dataId: tuple 

216 

217 def isComponent(self): 

218 return False 

219 

220 

221# Task class name used by tests, needs to be importable 

222_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

223 

224 

225def _makeDimensionConfig(): 

226 """Make a simple dimension universe configuration.""" 

227 return DimensionConfig( 

228 { 

229 "version": 1, 

230 "namespace": "ctrl_mpexec_test", 

231 "skypix": { 

232 "common": "htm7", 

233 "htm": { 

234 "class": "lsst.sphgeom.HtmPixelization", 

235 "max_level": 24, 

236 }, 

237 }, 

238 "elements": { 

239 "A": { 

240 "keys": [ 

241 { 

242 "name": "id", 

243 "type": "int", 

244 } 

245 ], 

246 "storage": { 

247 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

248 }, 

249 }, 

250 "B": { 

251 "keys": [ 

252 { 

253 "name": "id", 

254 "type": "int", 

255 } 

256 ], 

257 "storage": { 

258 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

259 }, 

260 }, 

261 }, 

262 "packers": {}, 

263 } 

264 ) 

265 

266 

267def _makeQGraph(): 

268 """Make a trivial QuantumGraph with one quantum. 

269 

270 The only thing that we need to do with this quantum graph is to pickle 

271 it, the quanta in this graph are not usable for anything else. 

272 

273 Returns 

274 ------- 

275 qgraph : `~lsst.pipe.base.QuantumGraph` 

276 """ 

277 universe = DimensionUniverse(config=_makeDimensionConfig()) 

278 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

279 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

280 quanta = [ 

281 Quantum( 

282 taskName=_TASK_CLASS, 

283 inputs={ 

284 fakeDSType: [ 

285 DatasetRef( 

286 fakeDSType, 

287 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

288 run="fake_run", 

289 ) 

290 ] 

291 }, 

292 ) 

293 ] # type: ignore 

294 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

295 return qgraph 

296 

297 

298class CmdLineFwkTestCase(unittest.TestCase): 

299 """A test case for CmdLineFwk""" 

300 

301 def testMakePipeline(self): 

302 """Tests for CmdLineFwk.makePipeline method""" 

303 fwk = CmdLineFwk() 

304 

305 # make empty pipeline 

306 args = _makeArgs() 

307 pipeline = fwk.makePipeline(args) 

308 self.assertIsInstance(pipeline, Pipeline) 

309 self.assertEqual(len(pipeline), 0) 

310 

311 # few tests with serialization 

312 with makeTmpFile() as tmpname: 

313 # make empty pipeline and store it in a file 

314 args = _makeArgs(save_pipeline=tmpname) 

315 pipeline = fwk.makePipeline(args) 

316 self.assertIsInstance(pipeline, Pipeline) 

317 

318 # read pipeline from a file 

319 args = _makeArgs(pipeline=tmpname) 

320 pipeline = fwk.makePipeline(args) 

321 self.assertIsInstance(pipeline, Pipeline) 

322 self.assertEqual(len(pipeline), 0) 

323 

324 # single task pipeline, task name can be anything here 

325 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

326 args = _makeArgs(pipeline_actions=actions) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 1) 

330 

331 # many task pipeline 

332 actions = [ 

333 _ACTION_ADD_TASK("TaskOne:task1a"), 

334 _ACTION_ADD_TASK("TaskTwo:task2"), 

335 _ACTION_ADD_TASK("TaskOne:task1b"), 

336 ] 

337 args = _makeArgs(pipeline_actions=actions) 

338 pipeline = fwk.makePipeline(args) 

339 self.assertIsInstance(pipeline, Pipeline) 

340 self.assertEqual(len(pipeline), 3) 

341 

342 # single task pipeline with config overrides, need real task class 

343 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

344 args = _makeArgs(pipeline_actions=actions) 

345 pipeline = fwk.makePipeline(args) 

346 taskDefs = list(pipeline.toExpandedPipeline()) 

347 self.assertEqual(len(taskDefs), 1) 

348 self.assertEqual(taskDefs[0].config.addend, 100) 

349 

350 overrides = b"config.addend = 1000\n" 

351 with makeTmpFile(overrides) as tmpname: 

352 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

353 args = _makeArgs(pipeline_actions=actions) 

354 pipeline = fwk.makePipeline(args) 

355 taskDefs = list(pipeline.toExpandedPipeline()) 

356 self.assertEqual(len(taskDefs), 1) 

357 self.assertEqual(taskDefs[0].config.addend, 1000) 

358 

359 # Check --instrument option, for now it only checks that it does not 

360 # crash. 

361 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

362 args = _makeArgs(pipeline_actions=actions) 

363 pipeline = fwk.makePipeline(args) 

364 

365 def testMakeGraphFromSave(self): 

366 """Tests for CmdLineFwk.makeGraph method. 

367 

368 Only most trivial case is tested that does not do actual graph 

369 building. 

370 """ 

371 fwk = CmdLineFwk() 

372 

373 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

374 # make non-empty graph and store it in a file 

375 qgraph = _makeQGraph() 

376 with open(tmpname, "wb") as saveFile: 

377 qgraph.save(saveFile) 

378 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

379 qgraph = fwk.makeGraph(None, args) 

380 self.assertIsInstance(qgraph, QuantumGraph) 

381 self.assertEqual(len(qgraph), 1) 

382 

383 # will fail if graph id does not match 

384 args = _makeArgs( 

385 qgraph=tmpname, 

386 qgraph_id="R2-D2 is that you?", 

387 registryConfig=registryConfig, 

388 execution_butler_location=None, 

389 ) 

390 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

391 fwk.makeGraph(None, args) 

392 

393 # save with wrong object type 

394 with open(tmpname, "wb") as saveFile: 

395 pickle.dump({}, saveFile) 

396 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

397 with self.assertRaises(ValueError): 

398 fwk.makeGraph(None, args) 

399 

400 # reading empty graph from pickle should work but makeGraph() 

401 # will return None. 

402 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

403 with open(tmpname, "wb") as saveFile: 

404 qgraph.save(saveFile) 

405 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

406 qgraph = fwk.makeGraph(None, args) 

407 self.assertIs(qgraph, None) 

408 

409 def testShowPipeline(self): 

410 """Test for --show options for pipeline.""" 

411 fwk = CmdLineFwk() 

412 

413 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

414 args = _makeArgs(pipeline_actions=actions) 

415 pipeline = fwk.makePipeline(args) 

416 

417 with self.assertRaises(ValueError): 

418 ShowInfo(["unrecognized", "config"]) 

419 

420 stream = StringIO() 

421 show = ShowInfo( 

422 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

423 stream=stream, 

424 ) 

425 show.show_pipeline_info(pipeline) 

426 self.assertEqual(show.unhandled, frozenset({})) 

427 stream.seek(0) 

428 output = stream.read() 

429 self.assertIn("config.addend=100", output) # config option 

430 self.assertIn("addend\n3", output) # History output 

431 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

432 

433 show = ShowInfo(["pipeline", "uri"], stream=stream) 

434 show.show_pipeline_info(pipeline) 

435 self.assertEqual(show.unhandled, frozenset({"uri"})) 

436 self.assertEqual(show.handled, {"pipeline"}) 

437 

438 stream = StringIO() 

439 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

440 show.show_pipeline_info(pipeline) 

441 stream.seek(0) 

442 output = stream.read().strip() 

443 self.assertEqual("### Configuration for task `task'", output) 

444 

445 stream = StringIO() 

446 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

447 show.show_pipeline_info(pipeline) 

448 stream.seek(0) 

449 output = stream.read().strip() 

450 self.assertEqual("### Configuration for task `task'", output) 

451 

452 stream = StringIO() 

453 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

454 show.show_pipeline_info(pipeline) 

455 stream.seek(0) 

456 output = stream.read().strip() 

457 self.assertIn("NOIGNORECASE", output) 

458 

459 show = ShowInfo(["dump-config=notask"]) 

460 with self.assertRaises(ValueError) as cm: 

461 show.show_pipeline_info(pipeline) 

462 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

463 

464 show = ShowInfo(["history"]) 

465 with self.assertRaises(ValueError) as cm: 

466 show.show_pipeline_info(pipeline) 

467 self.assertIn("Please provide a value", str(cm.exception)) 

468 

469 show = ShowInfo(["history=notask::param"]) 

470 with self.assertRaises(ValueError) as cm: 

471 show.show_pipeline_info(pipeline) 

472 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

473 

474 def test_execution_resources_parameters(self) -> None: 

475 """Test creation of the ExecutionResources from command line.""" 

476 fwk = CmdLineFwk() 

477 

478 for params, num_cores, max_mem in ( 

479 ((None, None), 1, None), 

480 ((5, ""), 5, None), 

481 ((None, "50"), 1, 50 * u.MB), 

482 ((5, "50 GB"), 5, 50 * u.GB), 

483 ): 

484 kwargs = {} 

485 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

486 if v is not None: 

487 kwargs[k] = v 

488 args = _makeArgs(**kwargs) 

489 res = fwk._make_execution_resources(args) 

490 self.assertEqual(res.num_cores, num_cores) 

491 self.assertEqual(res.max_mem, max_mem) 

492 

493 args = _makeArgs(memory_per_quantum="50m") 

494 with self.assertRaises(u.UnitConversionError): 

495 fwk._make_execution_resources(args) 

496 

497 

498class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

499 """A test case for CmdLineFwk""" 

500 

501 def setUp(self): 

502 super().setUpClass() 

503 self.root = tempfile.mkdtemp() 

504 self.nQuanta = 5 

505 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

506 

507 def tearDown(self): 

508 shutil.rmtree(self.root, ignore_errors=True) 

509 super().tearDownClass() 

510 

511 def testSimpleQGraph(self): 

512 """Test successfull execution of trivial quantum graph.""" 

513 args = _makeArgs(butler_config=self.root, input="test", output="output") 

514 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

515 populateButler(self.pipeline, butler) 

516 

517 fwk = CmdLineFwk() 

518 taskFactory = AddTaskFactoryMock() 

519 

520 qgraph = fwk.makeGraph(self.pipeline, args) 

521 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

522 self.assertEqual(len(qgraph), self.nQuanta) 

523 

524 # Ensure that the output run used in the graph is also used in 

525 # the pipeline execution. It is possible for makeGraph and runPipeline 

526 # to calculate time-stamped runs across a second boundary. 

527 args.output_run = qgraph.metadata["output_run"] 

528 

529 # run whole thing 

530 fwk.runPipeline(qgraph, taskFactory, args) 

531 self.assertEqual(taskFactory.countExec, self.nQuanta) 

532 

533 # test that we've disabled implicit threading 

534 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

535 

536 def test_simple_qgraph_qbb(self): 

537 """Test successful execution of trivial quantum graph in QBB mode.""" 

538 args = _makeArgs( 

539 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

540 ) 

541 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

542 populateButler(self.pipeline, butler) 

543 

544 fwk = CmdLineFwk() 

545 taskFactory = AddTaskFactoryMock() 

546 

547 qgraph = fwk.makeGraph(self.pipeline, args) 

548 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

549 self.assertEqual(len(qgraph), self.nQuanta) 

550 

551 # Ensure that the output run used in the graph is also used in 

552 # the pipeline execution. It is possible for makeGraph and runPipeline 

553 # to calculate time-stamped runs across a second boundary. 

554 output_run = qgraph.metadata["output_run"] 

555 args.output_run = output_run 

556 

557 # QBB must run from serialized graph. 

558 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

559 qgraph.saveUri(temp_graph.name) 

560 

561 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

562 

563 # Check that pre-exec-init can run. 

564 fwk.preExecInitQBB(taskFactory, args) 

565 

566 # Run whole thing. 

567 fwk.runGraphQBB(taskFactory, args) 

568 

569 # Transfer the datasets to the butler. 

570 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

571 self.assertEqual(n1, 31) 

572 

573 self.assertEqual(taskFactory.countExec, self.nQuanta) 

574 

575 # Update the output run and try again. 

576 new_output_run = output_run + "_new" 

577 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True) 

578 self.assertEqual(qgraph.metadata["output_run"], new_output_run) 

579 

580 taskFactory = AddTaskFactoryMock() 

581 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

582 qgraph.saveUri(temp_graph.name) 

583 

584 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

585 

586 # Check that pre-exec-init can run. 

587 fwk.preExecInitQBB(taskFactory, args) 

588 

589 # Run whole thing. 

590 fwk.runGraphQBB(taskFactory, args) 

591 

592 # Transfer the datasets to the butler. 

593 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

594 self.assertEqual(n1, n2) 

595 

596 def testEmptyQGraph(self): 

597 """Test that making an empty QG produces the right error messages.""" 

598 # We make QG generation fail by populating one input collection in the 

599 # butler while using a different one (that we only register, not 

600 # populate) to make the QG. 

601 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

602 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

603 butler.registry.registerCollection("bad_input") 

604 populateButler(self.pipeline, butler) 

605 

606 fwk = CmdLineFwk() 

607 with self.assertLogs(level=logging.CRITICAL) as cm: 

608 qgraph = fwk.makeGraph(self.pipeline, args) 

609 self.assertRegex( 

610 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

611 ) 

612 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

613 self.assertIsNone(qgraph) 

614 

615 def testSimpleQGraphNoSkipExisting_inputs(self): 

616 """Test for case when output data for one task already appears in 

617 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

618 option is present. 

619 """ 

620 args = _makeArgs( 

621 butler_config=self.root, 

622 input="test", 

623 output="output", 

624 ) 

625 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

626 populateButler( 

627 self.pipeline, 

628 butler, 

629 datasetTypes={ 

630 args.input: [ 

631 "add_dataset0", 

632 "add_dataset1", 

633 "add2_dataset1", 

634 "add_init_output1", 

635 "task0_config", 

636 "task0_metadata", 

637 "task0_log", 

638 ] 

639 }, 

640 ) 

641 

642 fwk = CmdLineFwk() 

643 taskFactory = AddTaskFactoryMock() 

644 

645 qgraph = fwk.makeGraph(self.pipeline, args) 

646 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

647 # With current implementation graph has all nQuanta quanta, but when 

648 # executing one quantum is skipped. 

649 self.assertEqual(len(qgraph), self.nQuanta) 

650 

651 # Ensure that the output run used in the graph is also used in 

652 # the pipeline execution. It is possible for makeGraph and runPipeline 

653 # to calculate time-stamped runs across a second boundary. 

654 args.output_run = qgraph.metadata["output_run"] 

655 

656 # run whole thing 

657 fwk.runPipeline(qgraph, taskFactory, args) 

658 self.assertEqual(taskFactory.countExec, self.nQuanta) 

659 

660 def testSimpleQGraphSkipExisting_inputs(self): 

661 """Test for ``--skip-existing`` with output data for one task already 

662 appears in _input_ collection. No ``--extend-run`` option is needed 

663 for this case. 

664 """ 

665 args = _makeArgs( 

666 butler_config=self.root, 

667 input="test", 

668 output="output", 

669 skip_existing_in=("test",), 

670 ) 

671 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

672 populateButler( 

673 self.pipeline, 

674 butler, 

675 datasetTypes={ 

676 args.input: [ 

677 "add_dataset0", 

678 "add_dataset1", 

679 "add2_dataset1", 

680 "add_init_output1", 

681 "task0_config", 

682 "task0_metadata", 

683 "task0_log", 

684 ] 

685 }, 

686 ) 

687 

688 fwk = CmdLineFwk() 

689 taskFactory = AddTaskFactoryMock() 

690 

691 qgraph = fwk.makeGraph(self.pipeline, args) 

692 # If all quanta are skipped, the task is not included in the graph. 

693 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

694 self.assertEqual(len(qgraph), self.nQuanta - 1) 

695 

696 # Ensure that the output run used in the graph is also used in 

697 # the pipeline execution. It is possible for makeGraph and runPipeline 

698 # to calculate time-stamped runs across a second boundary. 

699 args.output_run = qgraph.metadata["output_run"] 

700 

701 # run whole thing 

702 fwk.runPipeline(qgraph, taskFactory, args) 

703 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

704 

705 def testSimpleQGraphSkipExisting_outputs(self): 

706 """Test for ``--skip-existing`` with output data for one task already 

707 appears in _output_ collection. The ``--extend-run`` option is needed 

708 for this case. 

709 """ 

710 args = _makeArgs( 

711 butler_config=self.root, 

712 input="test", 

713 output_run="output/run", 

714 skip_existing_in=("output/run",), 

715 ) 

716 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

717 populateButler( 

718 self.pipeline, 

719 butler, 

720 datasetTypes={ 

721 args.input: ["add_dataset0"], 

722 args.output_run: [ 

723 "add_dataset1", 

724 "add2_dataset1", 

725 "add_init_output1", 

726 "task0_metadata", 

727 "task0_log", 

728 "task0_config", 

729 ], 

730 }, 

731 ) 

732 

733 fwk = CmdLineFwk() 

734 taskFactory = AddTaskFactoryMock() 

735 

736 # fails without --extend-run 

737 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

738 qgraph = fwk.makeGraph(self.pipeline, args) 

739 

740 # retry with --extend-run 

741 args.extend_run = True 

742 qgraph = fwk.makeGraph(self.pipeline, args) 

743 

744 # First task has no remaining quanta, so is left out completely. 

745 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

746 # Graph does not include quantum for first task. 

747 self.assertEqual(len(qgraph), self.nQuanta - 1) 

748 

749 # run whole thing 

750 fwk.runPipeline(qgraph, taskFactory, args) 

751 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

752 

753 def testSimpleQGraphOutputsFail(self): 

754 """Test continuing execution of trivial quantum graph with partial 

755 outputs. 

756 """ 

757 args = _makeArgs(butler_config=self.root, input="test", output="output") 

758 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

759 populateButler(self.pipeline, butler) 

760 

761 fwk = CmdLineFwk() 

762 taskFactory = AddTaskFactoryMock(stopAt=3) 

763 

764 qgraph = fwk.makeGraph(self.pipeline, args) 

765 self.assertEqual(len(qgraph), self.nQuanta) 

766 

767 # Ensure that the output run used in the graph is also used in 

768 # the pipeline execution. It is possible for makeGraph and runPipeline 

769 # to calculate time-stamped runs across a second boundary. 

770 args.output_run = qgraph.metadata["output_run"] 

771 

772 # run first three quanta 

773 with self.assertRaises(MPGraphExecutorError): 

774 fwk.runPipeline(qgraph, taskFactory, args) 

775 self.assertEqual(taskFactory.countExec, 3) 

776 

777 butler.registry.refresh() 

778 

779 # drop one of the two outputs from one task 

780 ref1 = butler.registry.findDataset( 

781 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

782 ) 

783 self.assertIsNotNone(ref1) 

784 # also drop the metadata output 

785 ref2 = butler.registry.findDataset( 

786 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

787 ) 

788 self.assertIsNotNone(ref2) 

789 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

790 

791 # Ensure that the output run used in the graph is also used in 

792 # the pipeline execution. It is possible for makeGraph and runPipeline 

793 # to calculate time-stamped runs across a second boundary. 

794 args.output_run = qgraph.metadata["output_run"] 

795 

796 taskFactory.stopAt = -1 

797 args.skip_existing_in = (args.output,) 

798 args.extend_run = True 

799 args.no_versions = True 

800 with self.assertRaises(MPGraphExecutorError): 

801 fwk.runPipeline(qgraph, taskFactory, args) 

802 

803 def testSimpleQGraphClobberOutputs(self): 

804 """Test continuing execution of trivial quantum graph with 

805 --clobber-outputs. 

806 """ 

807 args = _makeArgs(butler_config=self.root, input="test", output="output") 

808 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

809 populateButler(self.pipeline, butler) 

810 

811 fwk = CmdLineFwk() 

812 taskFactory = AddTaskFactoryMock(stopAt=3) 

813 

814 qgraph = fwk.makeGraph(self.pipeline, args) 

815 

816 # should have one task and number of quanta 

817 self.assertEqual(len(qgraph), self.nQuanta) 

818 

819 # Ensure that the output run used in the graph is also used in 

820 # the pipeline execution. It is possible for makeGraph and runPipeline 

821 # to calculate time-stamped runs across a second boundary. 

822 args.output_run = qgraph.metadata["output_run"] 

823 

824 # run first three quanta 

825 with self.assertRaises(MPGraphExecutorError): 

826 fwk.runPipeline(qgraph, taskFactory, args) 

827 self.assertEqual(taskFactory.countExec, 3) 

828 

829 butler.registry.refresh() 

830 

831 # drop one of the two outputs from one task 

832 ref1 = butler.registry.findDataset( 

833 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

834 ) 

835 self.assertIsNotNone(ref1) 

836 # also drop the metadata output 

837 ref2 = butler.registry.findDataset( 

838 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

839 ) 

840 self.assertIsNotNone(ref2) 

841 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

842 

843 taskFactory.stopAt = -1 

844 args.skip_existing = True 

845 args.extend_run = True 

846 args.clobber_outputs = True 

847 args.no_versions = True 

848 fwk.runPipeline(qgraph, taskFactory, args) 

849 # number of executed quanta is incremented 

850 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

851 

852 def testSimpleQGraphReplaceRun(self): 

853 """Test repeated execution of trivial quantum graph with 

854 --replace-run. 

855 """ 

856 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

857 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

858 populateButler(self.pipeline, butler) 

859 

860 fwk = CmdLineFwk() 

861 taskFactory = AddTaskFactoryMock() 

862 

863 qgraph = fwk.makeGraph(self.pipeline, args) 

864 

865 # should have one task and number of quanta 

866 self.assertEqual(len(qgraph), self.nQuanta) 

867 

868 # deep copy is needed because quanta are updated in place 

869 fwk.runPipeline(qgraph, taskFactory, args) 

870 self.assertEqual(taskFactory.countExec, self.nQuanta) 

871 

872 # need to refresh collections explicitly (or make new butler/registry) 

873 butler.registry.refresh() 

874 collections = set(butler.registry.queryCollections(...)) 

875 self.assertEqual(collections, {"test", "output", "output/run1"}) 

876 

877 # number of datasets written by pipeline: 

878 # - nQuanta of init_outputs 

879 # - nQuanta of configs 

880 # - packages (single dataset) 

881 # - nQuanta * two output datasets 

882 # - nQuanta of metadata 

883 # - nQuanta of log output 

884 n_outputs = self.nQuanta * 6 + 1 

885 refs = butler.registry.queryDatasets(..., collections="output/run1") 

886 self.assertEqual(len(list(refs)), n_outputs) 

887 

888 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

889 # changed) 

890 args.replace_run = True 

891 args.output_run = "output/run2" 

892 qgraph = fwk.makeGraph(self.pipeline, args) 

893 fwk.runPipeline(qgraph, taskFactory, args) 

894 

895 butler.registry.refresh() 

896 collections = set(butler.registry.queryCollections(...)) 

897 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

898 

899 # new output collection 

900 refs = butler.registry.queryDatasets(..., collections="output/run2") 

901 self.assertEqual(len(list(refs)), n_outputs) 

902 

903 # old output collection is still there 

904 refs = butler.registry.queryDatasets(..., collections="output/run1") 

905 self.assertEqual(len(list(refs)), n_outputs) 

906 

907 # re-run with --replace-run and --prune-replaced=unstore 

908 args.replace_run = True 

909 args.prune_replaced = "unstore" 

910 args.output_run = "output/run3" 

911 qgraph = fwk.makeGraph(self.pipeline, args) 

912 fwk.runPipeline(qgraph, taskFactory, args) 

913 

914 butler.registry.refresh() 

915 collections = set(butler.registry.queryCollections(...)) 

916 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

917 

918 # new output collection 

919 refs = butler.registry.queryDatasets(..., collections="output/run3") 

920 self.assertEqual(len(list(refs)), n_outputs) 

921 

922 # old output collection is still there, and it has all datasets but 

923 # non-InitOutputs are not in datastore 

924 refs = butler.registry.queryDatasets(..., collections="output/run2") 

925 refs = list(refs) 

926 self.assertEqual(len(refs), n_outputs) 

927 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

928 for ref in refs: 

929 if initOutNameRe.fullmatch(ref.datasetType.name): 

930 butler.get(ref) 

931 else: 

932 with self.assertRaises(FileNotFoundError): 

933 butler.get(ref) 

934 

935 # re-run with --replace-run and --prune-replaced=purge 

936 # This time also remove --input; passing the same inputs that we 

937 # started with and not passing inputs at all should be equivalent. 

938 args.input = None 

939 args.replace_run = True 

940 args.prune_replaced = "purge" 

941 args.output_run = "output/run4" 

942 qgraph = fwk.makeGraph(self.pipeline, args) 

943 fwk.runPipeline(qgraph, taskFactory, args) 

944 

945 butler.registry.refresh() 

946 collections = set(butler.registry.queryCollections(...)) 

947 # output/run3 should disappear now 

948 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

949 

950 # new output collection 

951 refs = butler.registry.queryDatasets(..., collections="output/run4") 

952 self.assertEqual(len(list(refs)), n_outputs) 

953 

954 # Trying to run again with inputs that aren't exactly what we started 

955 # with is an error, and the kind that should not modify the data repo. 

956 with self.assertRaises(ValueError): 

957 args.input = ["test", "output/run2"] 

958 args.prune_replaced = None 

959 args.replace_run = True 

960 args.output_run = "output/run5" 

961 qgraph = fwk.makeGraph(self.pipeline, args) 

962 fwk.runPipeline(qgraph, taskFactory, args) 

963 butler.registry.refresh() 

964 collections = set(butler.registry.queryCollections(...)) 

965 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

966 with self.assertRaises(ValueError): 

967 args.input = ["output/run2", "test"] 

968 args.prune_replaced = None 

969 args.replace_run = True 

970 args.output_run = "output/run6" 

971 qgraph = fwk.makeGraph(self.pipeline, args) 

972 fwk.runPipeline(qgraph, taskFactory, args) 

973 butler.registry.refresh() 

974 collections = set(butler.registry.queryCollections(...)) 

975 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

976 

977 def testSubgraph(self): 

978 """Test successful execution of trivial quantum graph.""" 

979 args = _makeArgs(butler_config=self.root, input="test", output="output") 

980 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

981 populateButler(self.pipeline, butler) 

982 

983 fwk = CmdLineFwk() 

984 qgraph = fwk.makeGraph(self.pipeline, args) 

985 

986 # Select first two nodes for execution. This depends on node ordering 

987 # which I assume is the same as execution order. 

988 nNodes = 2 

989 nodeIds = [node.nodeId for node in qgraph] 

990 nodeIds = nodeIds[:nNodes] 

991 

992 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

993 self.assertEqual(len(qgraph), self.nQuanta) 

994 

995 with ( 

996 makeTmpFile(suffix=".qgraph") as tmpname, 

997 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

998 ): 

999 with open(tmpname, "wb") as saveFile: 

1000 qgraph.save(saveFile) 

1001 

1002 args = _makeArgs( 

1003 qgraph=tmpname, 

1004 qgraph_node_id=nodeIds, 

1005 registryConfig=registryConfig, 

1006 execution_butler_location=None, 

1007 ) 

1008 fwk = CmdLineFwk() 

1009 

1010 # load graph, should only read a subset 

1011 qgraph = fwk.makeGraph(pipeline=None, args=args) 

1012 self.assertEqual(len(qgraph), nNodes) 

1013 

1014 def testShowGraph(self): 

1015 """Test for --show options for quantum graph.""" 

1016 nQuanta = 2 

1017 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1018 

1019 show = ShowInfo(["graph"]) 

1020 show.show_graph_info(qgraph) 

1021 self.assertEqual(show.handled, {"graph"}) 

1022 

1023 def testShowGraphWorkflow(self): 

1024 nQuanta = 2 

1025 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1026 

1027 show = ShowInfo(["workflow"]) 

1028 show.show_graph_info(qgraph) 

1029 self.assertEqual(show.handled, {"workflow"}) 

1030 

1031 # TODO: cannot test "uri" option presently, it instantiates 

1032 # butler from command line options and there is no way to pass butler 

1033 # mock to that code. 

1034 show = ShowInfo(["uri"]) 

1035 with self.assertRaises(ValueError): # No args given 

1036 show.show_graph_info(qgraph) 

1037 

1038 def testSimpleQGraphDatastoreRecords(self): 

1039 """Test quantum graph generation with --qgraph-datastore-records.""" 

1040 args = _makeArgs( 

1041 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1042 ) 

1043 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1044 populateButler(self.pipeline, butler) 

1045 

1046 fwk = CmdLineFwk() 

1047 qgraph = fwk.makeGraph(self.pipeline, args) 

1048 self.assertEqual(len(qgraph), self.nQuanta) 

1049 for i, qnode in enumerate(qgraph): 

1050 quantum = qnode.quantum 

1051 self.assertIsNotNone(quantum.datastore_records) 

1052 # only the first quantum has a pre-existing input 

1053 if i == 0: 

1054 datastore_name = "FileDatastore@<butlerRoot>" 

1055 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1056 records_data = quantum.datastore_records[datastore_name] 

1057 records = dict(records_data.records) 

1058 self.assertEqual(len(records), 1) 

1059 _, records = records.popitem() 

1060 records = records["file_datastore_records"] 

1061 self.assertEqual( 

1062 [record.path for record in records], 

1063 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1064 ) 

1065 else: 

1066 self.assertEqual(quantum.datastore_records, {}) 

1067 

1068 

1069class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1070 """General file leak detection.""" 

1071 

1072 

1073def setup_module(module): 

1074 """Initialize pytest module.""" 

1075 lsst.utils.tests.init() 

1076 

1077 

1078if __name__ == "__main__": 

1079 lsst.utils.tests.init() 

1080 unittest.main()