Coverage for tests/test_cmdLineFwk.py: 14%

484 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-06 02:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import astropy.units as u 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 Config, 

55 DataCoordinate, 

56 DatasetRef, 

57 DimensionConfig, 

58 DimensionUniverse, 

59 Quantum, 

60 Registry, 

61) 

62from lsst.daf.butler.core.datasets.type import DatasetType 

63from lsst.daf.butler.registry import RegistryConfig 

64from lsst.pipe.base import ( 

65 Instrument, 

66 Pipeline, 

67 PipelineTaskConfig, 

68 PipelineTaskConnections, 

69 QuantumGraph, 

70 TaskDef, 

71) 

72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

73from lsst.pipe.base.tests.simpleQGraph import ( 

74 AddTask, 

75 AddTaskFactoryMock, 

76 makeSimpleButler, 

77 makeSimplePipeline, 

78 makeSimpleQGraph, 

79 populateButler, 

80) 

81from lsst.utils.tests import temporaryDirectory 

82 

83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

84 

85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

86# instrument from registry, these tests can run fine without actual instrument 

87# and implementing full mock for Instrument is too complicated. 

88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88

89 

90 

91@contextlib.contextmanager 

92def makeTmpFile(contents=None, suffix=None): 

93 """Context manager for generating temporary file name. 

94 

95 Temporary file is deleted on exiting context. 

96 

97 Parameters 

98 ---------- 

99 contents : `bytes` 

100 Data to write into a file. 

101 """ 

102 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

103 if contents: 

104 os.write(fd, contents) 

105 os.close(fd) 

106 yield tmpname 

107 with contextlib.suppress(OSError): 

108 os.remove(tmpname) 

109 

110 

111@contextlib.contextmanager 

112def makeSQLiteRegistry(create=True, universe=None): 

113 """Context manager to create new empty registry database. 

114 

115 Yields 

116 ------ 

117 config : `RegistryConfig` 

118 Registry configuration for initialized registry database. 

119 """ 

120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

121 with temporaryDirectory() as tmpdir: 

122 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

123 config = RegistryConfig() 

124 config["db"] = uri 

125 if create: 

126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

127 yield config 

128 

129 

130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

131 """Test connection class.""" 

132 

133 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

134 

135 

136class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

137 """Test pipeline config.""" 

138 

139 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

140 

141 def setDefaults(self): 

142 PipelineTaskConfig.setDefaults(self) 

143 

144 

145def _makeArgs(registryConfig=None, **kwargs): 

146 """Return parsed command line arguments. 

147 

148 By default butler_config is set to `Config` populated with some defaults, 

149 it can be overridden completely by keyword argument. 

150 

151 Parameters 

152 ---------- 

153 cmd : `str`, optional 

154 Produce arguments for this pipetask command. 

155 registryConfig : `RegistryConfig`, optional 

156 Override for registry configuration. 

157 **kwargs 

158 Overrides for other arguments. 

159 """ 

160 # Use a mock to get the default value of arguments to 'run'. 

161 

162 mock = unittest.mock.Mock() 

163 

164 @click.command(cls=PipetaskCommand) 

165 @run_options() 

166 def fake_run(ctx, **kwargs): 

167 """Fake "pipetask run" command for gathering input arguments. 

168 

169 The arguments & options should always match the arguments & options in 

170 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

171 """ 

172 mock(**kwargs) 

173 

174 runner = click.testing.CliRunner() 

175 # --butler-config is the only required option 

176 result = runner.invoke(fake_run, "--butler-config /") 

177 if result.exit_code != 0: 

178 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

179 mock.assert_called_once() 

180 args = mock.call_args[1] 

181 args["enableLsstDebug"] = args.pop("debug") 

182 args["execution_butler_location"] = args.pop("save_execution_butler") 

183 if "pipeline_actions" not in args: 

184 args["pipeline_actions"] = [] 

185 args = SimpleNamespace(**args) 

186 

187 # override butler_config with our defaults 

188 if "butler_config" not in kwargs: 

189 args.butler_config = Config() 

190 if registryConfig: 

191 args.butler_config["registry"] = registryConfig 

192 # The default datastore has a relocatable root, so we need to specify 

193 # some root here for it to use 

194 args.butler_config.configFile = "." 

195 

196 # override arguments from keyword parameters 

197 for key, value in kwargs.items(): 

198 setattr(args, key, value) 

199 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

200 return args 

201 

202 

203class FakeDSType(NamedTuple): 

204 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

205 

206 name: str 

207 

208 

209@dataclass(frozen=True) 

210class FakeDSRef: 

211 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

212 

213 datasetType: str 

214 dataId: tuple 

215 

216 def isComponent(self): 

217 return False 

218 

219 

220# Task class name used by tests, needs to be importable 

221_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

222 

223 

224def _makeDimensionConfig(): 

225 """Make a simple dimension universe configuration.""" 

226 return DimensionConfig( 

227 { 

228 "version": 1, 

229 "namespace": "ctrl_mpexec_test", 

230 "skypix": { 

231 "common": "htm7", 

232 "htm": { 

233 "class": "lsst.sphgeom.HtmPixelization", 

234 "max_level": 24, 

235 }, 

236 }, 

237 "elements": { 

238 "A": { 

239 "keys": [ 

240 { 

241 "name": "id", 

242 "type": "int", 

243 } 

244 ], 

245 "storage": { 

246 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

247 }, 

248 }, 

249 "B": { 

250 "keys": [ 

251 { 

252 "name": "id", 

253 "type": "int", 

254 } 

255 ], 

256 "storage": { 

257 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

258 }, 

259 }, 

260 }, 

261 "packers": {}, 

262 } 

263 ) 

264 

265 

266def _makeQGraph(): 

267 """Make a trivial QuantumGraph with one quantum. 

268 

269 The only thing that we need to do with this quantum graph is to pickle 

270 it, the quanta in this graph are not usable for anything else. 

271 

272 Returns 

273 ------- 

274 qgraph : `~lsst.pipe.base.QuantumGraph` 

275 """ 

276 universe = DimensionUniverse(config=_makeDimensionConfig()) 

277 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

278 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

279 quanta = [ 

280 Quantum( 

281 taskName=_TASK_CLASS, 

282 inputs={ 

283 fakeDSType: [ 

284 DatasetRef( 

285 fakeDSType, 

286 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

287 run="fake_run", 

288 ) 

289 ] 

290 }, 

291 ) 

292 ] # type: ignore 

293 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

294 return qgraph 

295 

296 

297class CmdLineFwkTestCase(unittest.TestCase): 

298 """A test case for CmdLineFwk""" 

299 

300 def testMakePipeline(self): 

301 """Tests for CmdLineFwk.makePipeline method""" 

302 fwk = CmdLineFwk() 

303 

304 # make empty pipeline 

305 args = _makeArgs() 

306 pipeline = fwk.makePipeline(args) 

307 self.assertIsInstance(pipeline, Pipeline) 

308 self.assertEqual(len(pipeline), 0) 

309 

310 # few tests with serialization 

311 with makeTmpFile() as tmpname: 

312 # make empty pipeline and store it in a file 

313 args = _makeArgs(save_pipeline=tmpname) 

314 pipeline = fwk.makePipeline(args) 

315 self.assertIsInstance(pipeline, Pipeline) 

316 

317 # read pipeline from a file 

318 args = _makeArgs(pipeline=tmpname) 

319 pipeline = fwk.makePipeline(args) 

320 self.assertIsInstance(pipeline, Pipeline) 

321 self.assertEqual(len(pipeline), 0) 

322 

323 # single task pipeline, task name can be anything here 

324 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

325 args = _makeArgs(pipeline_actions=actions) 

326 pipeline = fwk.makePipeline(args) 

327 self.assertIsInstance(pipeline, Pipeline) 

328 self.assertEqual(len(pipeline), 1) 

329 

330 # many task pipeline 

331 actions = [ 

332 _ACTION_ADD_TASK("TaskOne:task1a"), 

333 _ACTION_ADD_TASK("TaskTwo:task2"), 

334 _ACTION_ADD_TASK("TaskOne:task1b"), 

335 ] 

336 args = _makeArgs(pipeline_actions=actions) 

337 pipeline = fwk.makePipeline(args) 

338 self.assertIsInstance(pipeline, Pipeline) 

339 self.assertEqual(len(pipeline), 3) 

340 

341 # single task pipeline with config overrides, need real task class 

342 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

343 args = _makeArgs(pipeline_actions=actions) 

344 pipeline = fwk.makePipeline(args) 

345 taskDefs = list(pipeline.toExpandedPipeline()) 

346 self.assertEqual(len(taskDefs), 1) 

347 self.assertEqual(taskDefs[0].config.addend, 100) 

348 

349 overrides = b"config.addend = 1000\n" 

350 with makeTmpFile(overrides) as tmpname: 

351 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

352 args = _makeArgs(pipeline_actions=actions) 

353 pipeline = fwk.makePipeline(args) 

354 taskDefs = list(pipeline.toExpandedPipeline()) 

355 self.assertEqual(len(taskDefs), 1) 

356 self.assertEqual(taskDefs[0].config.addend, 1000) 

357 

358 # Check --instrument option, for now it only checks that it does not 

359 # crash. 

360 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

361 args = _makeArgs(pipeline_actions=actions) 

362 pipeline = fwk.makePipeline(args) 

363 

364 def testMakeGraphFromSave(self): 

365 """Tests for CmdLineFwk.makeGraph method. 

366 

367 Only most trivial case is tested that does not do actual graph 

368 building. 

369 """ 

370 fwk = CmdLineFwk() 

371 

372 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

373 # make non-empty graph and store it in a file 

374 qgraph = _makeQGraph() 

375 with open(tmpname, "wb") as saveFile: 

376 qgraph.save(saveFile) 

377 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

378 qgraph = fwk.makeGraph(None, args) 

379 self.assertIsInstance(qgraph, QuantumGraph) 

380 self.assertEqual(len(qgraph), 1) 

381 

382 # will fail if graph id does not match 

383 args = _makeArgs( 

384 qgraph=tmpname, 

385 qgraph_id="R2-D2 is that you?", 

386 registryConfig=registryConfig, 

387 execution_butler_location=None, 

388 ) 

389 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

390 fwk.makeGraph(None, args) 

391 

392 # save with wrong object type 

393 with open(tmpname, "wb") as saveFile: 

394 pickle.dump({}, saveFile) 

395 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

396 with self.assertRaises(ValueError): 

397 fwk.makeGraph(None, args) 

398 

399 # reading empty graph from pickle should work but makeGraph() 

400 # will return None. 

401 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

402 with open(tmpname, "wb") as saveFile: 

403 qgraph.save(saveFile) 

404 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

405 qgraph = fwk.makeGraph(None, args) 

406 self.assertIs(qgraph, None) 

407 

408 def testShowPipeline(self): 

409 """Test for --show options for pipeline.""" 

410 fwk = CmdLineFwk() 

411 

412 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

413 args = _makeArgs(pipeline_actions=actions) 

414 pipeline = fwk.makePipeline(args) 

415 

416 with self.assertRaises(ValueError): 

417 ShowInfo(["unrecognized", "config"]) 

418 

419 stream = StringIO() 

420 show = ShowInfo( 

421 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

422 stream=stream, 

423 ) 

424 show.show_pipeline_info(pipeline) 

425 self.assertEqual(show.unhandled, frozenset({})) 

426 stream.seek(0) 

427 output = stream.read() 

428 self.assertIn("config.addend=100", output) # config option 

429 self.assertIn("addend\n3", output) # History output 

430 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

431 

432 show = ShowInfo(["pipeline", "uri"], stream=stream) 

433 show.show_pipeline_info(pipeline) 

434 self.assertEqual(show.unhandled, frozenset({"uri"})) 

435 self.assertEqual(show.handled, {"pipeline"}) 

436 

437 stream = StringIO() 

438 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

439 show.show_pipeline_info(pipeline) 

440 stream.seek(0) 

441 output = stream.read().strip() 

442 self.assertEqual("### Configuration for task `task'", output) 

443 

444 stream = StringIO() 

445 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

446 show.show_pipeline_info(pipeline) 

447 stream.seek(0) 

448 output = stream.read().strip() 

449 self.assertEqual("### Configuration for task `task'", output) 

450 

451 stream = StringIO() 

452 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

453 show.show_pipeline_info(pipeline) 

454 stream.seek(0) 

455 output = stream.read().strip() 

456 self.assertIn("NOIGNORECASE", output) 

457 

458 show = ShowInfo(["dump-config=notask"]) 

459 with self.assertRaises(ValueError) as cm: 

460 show.show_pipeline_info(pipeline) 

461 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

462 

463 show = ShowInfo(["history"]) 

464 with self.assertRaises(ValueError) as cm: 

465 show.show_pipeline_info(pipeline) 

466 self.assertIn("Please provide a value", str(cm.exception)) 

467 

468 show = ShowInfo(["history=notask::param"]) 

469 with self.assertRaises(ValueError) as cm: 

470 show.show_pipeline_info(pipeline) 

471 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

472 

473 def test_execution_resources_parameters(self) -> None: 

474 """Test creation of the ExecutionResources from command line.""" 

475 fwk = CmdLineFwk() 

476 

477 for params, num_cores, max_mem in ( 

478 ((None, None), 1, None), 

479 ((5, ""), 5, None), 

480 ((None, "50"), 1, 50 * u.MB), 

481 ((5, "50 GB"), 5, 50 * u.GB), 

482 ): 

483 kwargs = {} 

484 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

485 if v is not None: 

486 kwargs[k] = v 

487 args = _makeArgs(**kwargs) 

488 res = fwk._make_execution_resources(args) 

489 self.assertEqual(res.num_cores, num_cores) 

490 self.assertEqual(res.max_mem, max_mem) 

491 

492 args = _makeArgs(memory_per_quantum="50m") 

493 with self.assertRaises(u.UnitConversionError): 

494 fwk._make_execution_resources(args) 

495 

496 

497class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

498 """A test case for CmdLineFwk""" 

499 

500 def setUp(self): 

501 super().setUpClass() 

502 self.root = tempfile.mkdtemp() 

503 self.nQuanta = 5 

504 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

505 

506 def tearDown(self): 

507 shutil.rmtree(self.root, ignore_errors=True) 

508 super().tearDownClass() 

509 

510 def testSimpleQGraph(self): 

511 """Test successfull execution of trivial quantum graph.""" 

512 args = _makeArgs(butler_config=self.root, input="test", output="output") 

513 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

514 populateButler(self.pipeline, butler) 

515 

516 fwk = CmdLineFwk() 

517 taskFactory = AddTaskFactoryMock() 

518 

519 qgraph = fwk.makeGraph(self.pipeline, args) 

520 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

521 self.assertEqual(len(qgraph), self.nQuanta) 

522 

523 # Ensure that the output run used in the graph is also used in 

524 # the pipeline execution. It is possible for makeGraph and runPipeline 

525 # to calculate time-stamped runs across a second boundary. 

526 args.output_run = qgraph.metadata["output_run"] 

527 

528 # run whole thing 

529 fwk.runPipeline(qgraph, taskFactory, args) 

530 self.assertEqual(taskFactory.countExec, self.nQuanta) 

531 

532 # test that we've disabled implicit threading 

533 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

534 

535 def testEmptyQGraph(self): 

536 """Test that making an empty QG produces the right error messages.""" 

537 # We make QG generation fail by populating one input collection in the 

538 # butler while using a different one (that we only register, not 

539 # populate) to make the QG. 

540 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

541 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

542 butler.registry.registerCollection("bad_input") 

543 populateButler(self.pipeline, butler) 

544 

545 fwk = CmdLineFwk() 

546 with self.assertLogs(level=logging.CRITICAL) as cm: 

547 qgraph = fwk.makeGraph(self.pipeline, args) 

548 self.assertRegex( 

549 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

550 ) 

551 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

552 self.assertIsNone(qgraph) 

553 

554 def testSimpleQGraphNoSkipExisting_inputs(self): 

555 """Test for case when output data for one task already appears in 

556 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

557 option is present. 

558 """ 

559 args = _makeArgs( 

560 butler_config=self.root, 

561 input="test", 

562 output="output", 

563 ) 

564 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

565 populateButler( 

566 self.pipeline, 

567 butler, 

568 datasetTypes={ 

569 args.input: [ 

570 "add_dataset0", 

571 "add_dataset1", 

572 "add2_dataset1", 

573 "add_init_output1", 

574 "task0_config", 

575 "task0_metadata", 

576 "task0_log", 

577 ] 

578 }, 

579 ) 

580 

581 fwk = CmdLineFwk() 

582 taskFactory = AddTaskFactoryMock() 

583 

584 qgraph = fwk.makeGraph(self.pipeline, args) 

585 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

586 # With current implementation graph has all nQuanta quanta, but when 

587 # executing one quantum is skipped. 

588 self.assertEqual(len(qgraph), self.nQuanta) 

589 

590 # Ensure that the output run used in the graph is also used in 

591 # the pipeline execution. It is possible for makeGraph and runPipeline 

592 # to calculate time-stamped runs across a second boundary. 

593 args.output_run = qgraph.metadata["output_run"] 

594 

595 # run whole thing 

596 fwk.runPipeline(qgraph, taskFactory, args) 

597 self.assertEqual(taskFactory.countExec, self.nQuanta) 

598 

599 def testSimpleQGraphSkipExisting_inputs(self): 

600 """Test for ``--skip-existing`` with output data for one task already 

601 appears in _input_ collection. No ``--extend-run`` option is needed 

602 for this case. 

603 """ 

604 args = _makeArgs( 

605 butler_config=self.root, 

606 input="test", 

607 output="output", 

608 skip_existing_in=("test",), 

609 ) 

610 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

611 populateButler( 

612 self.pipeline, 

613 butler, 

614 datasetTypes={ 

615 args.input: [ 

616 "add_dataset0", 

617 "add_dataset1", 

618 "add2_dataset1", 

619 "add_init_output1", 

620 "task0_config", 

621 "task0_metadata", 

622 "task0_log", 

623 ] 

624 }, 

625 ) 

626 

627 fwk = CmdLineFwk() 

628 taskFactory = AddTaskFactoryMock() 

629 

630 qgraph = fwk.makeGraph(self.pipeline, args) 

631 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

632 self.assertEqual(len(qgraph), self.nQuanta - 1) 

633 

634 # Ensure that the output run used in the graph is also used in 

635 # the pipeline execution. It is possible for makeGraph and runPipeline 

636 # to calculate time-stamped runs across a second boundary. 

637 args.output_run = qgraph.metadata["output_run"] 

638 

639 # run whole thing 

640 fwk.runPipeline(qgraph, taskFactory, args) 

641 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

642 

643 def testSimpleQGraphSkipExisting_outputs(self): 

644 """Test for ``--skip-existing`` with output data for one task already 

645 appears in _output_ collection. The ``--extend-run`` option is needed 

646 for this case. 

647 """ 

648 args = _makeArgs( 

649 butler_config=self.root, 

650 input="test", 

651 output_run="output/run", 

652 skip_existing_in=("output/run",), 

653 ) 

654 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

655 populateButler( 

656 self.pipeline, 

657 butler, 

658 datasetTypes={ 

659 args.input: ["add_dataset0"], 

660 args.output_run: [ 

661 "add_dataset1", 

662 "add2_dataset1", 

663 "add_init_output1", 

664 "task0_metadata", 

665 "task0_log", 

666 ], 

667 }, 

668 ) 

669 

670 fwk = CmdLineFwk() 

671 taskFactory = AddTaskFactoryMock() 

672 

673 # fails without --extend-run 

674 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

675 qgraph = fwk.makeGraph(self.pipeline, args) 

676 

677 # retry with --extend-run 

678 args.extend_run = True 

679 qgraph = fwk.makeGraph(self.pipeline, args) 

680 

681 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

682 # Graph does not include quantum for first task 

683 self.assertEqual(len(qgraph), self.nQuanta - 1) 

684 

685 # run whole thing 

686 fwk.runPipeline(qgraph, taskFactory, args) 

687 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

688 

689 def testSimpleQGraphOutputsFail(self): 

690 """Test continuing execution of trivial quantum graph with partial 

691 outputs. 

692 """ 

693 args = _makeArgs(butler_config=self.root, input="test", output="output") 

694 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

695 populateButler(self.pipeline, butler) 

696 

697 fwk = CmdLineFwk() 

698 taskFactory = AddTaskFactoryMock(stopAt=3) 

699 

700 qgraph = fwk.makeGraph(self.pipeline, args) 

701 self.assertEqual(len(qgraph), self.nQuanta) 

702 

703 # Ensure that the output run used in the graph is also used in 

704 # the pipeline execution. It is possible for makeGraph and runPipeline 

705 # to calculate time-stamped runs across a second boundary. 

706 args.output_run = qgraph.metadata["output_run"] 

707 

708 # run first three quanta 

709 with self.assertRaises(MPGraphExecutorError): 

710 fwk.runPipeline(qgraph, taskFactory, args) 

711 self.assertEqual(taskFactory.countExec, 3) 

712 

713 butler.registry.refresh() 

714 

715 # drop one of the two outputs from one task 

716 ref1 = butler.registry.findDataset( 

717 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

718 ) 

719 self.assertIsNotNone(ref1) 

720 # also drop the metadata output 

721 ref2 = butler.registry.findDataset( 

722 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

723 ) 

724 self.assertIsNotNone(ref2) 

725 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

726 

727 # Ensure that the output run used in the graph is also used in 

728 # the pipeline execution. It is possible for makeGraph and runPipeline 

729 # to calculate time-stamped runs across a second boundary. 

730 args.output_run = qgraph.metadata["output_run"] 

731 

732 taskFactory.stopAt = -1 

733 args.skip_existing_in = (args.output,) 

734 args.extend_run = True 

735 args.no_versions = True 

736 with self.assertRaises(MPGraphExecutorError): 

737 fwk.runPipeline(qgraph, taskFactory, args) 

738 

739 def testSimpleQGraphClobberOutputs(self): 

740 """Test continuing execution of trivial quantum graph with 

741 --clobber-outputs. 

742 """ 

743 args = _makeArgs(butler_config=self.root, input="test", output="output") 

744 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

745 populateButler(self.pipeline, butler) 

746 

747 fwk = CmdLineFwk() 

748 taskFactory = AddTaskFactoryMock(stopAt=3) 

749 

750 qgraph = fwk.makeGraph(self.pipeline, args) 

751 

752 # should have one task and number of quanta 

753 self.assertEqual(len(qgraph), self.nQuanta) 

754 

755 # Ensure that the output run used in the graph is also used in 

756 # the pipeline execution. It is possible for makeGraph and runPipeline 

757 # to calculate time-stamped runs across a second boundary. 

758 args.output_run = qgraph.metadata["output_run"] 

759 

760 # run first three quanta 

761 with self.assertRaises(MPGraphExecutorError): 

762 fwk.runPipeline(qgraph, taskFactory, args) 

763 self.assertEqual(taskFactory.countExec, 3) 

764 

765 butler.registry.refresh() 

766 

767 # drop one of the two outputs from one task 

768 ref1 = butler.registry.findDataset( 

769 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

770 ) 

771 self.assertIsNotNone(ref1) 

772 # also drop the metadata output 

773 ref2 = butler.registry.findDataset( 

774 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

775 ) 

776 self.assertIsNotNone(ref2) 

777 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

778 

779 taskFactory.stopAt = -1 

780 args.skip_existing = True 

781 args.extend_run = True 

782 args.clobber_outputs = True 

783 args.no_versions = True 

784 fwk.runPipeline(qgraph, taskFactory, args) 

785 # number of executed quanta is incremented 

786 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

787 

788 def testSimpleQGraphReplaceRun(self): 

789 """Test repeated execution of trivial quantum graph with 

790 --replace-run. 

791 """ 

792 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

793 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

794 populateButler(self.pipeline, butler) 

795 

796 fwk = CmdLineFwk() 

797 taskFactory = AddTaskFactoryMock() 

798 

799 qgraph = fwk.makeGraph(self.pipeline, args) 

800 

801 # should have one task and number of quanta 

802 self.assertEqual(len(qgraph), self.nQuanta) 

803 

804 # deep copy is needed because quanta are updated in place 

805 fwk.runPipeline(qgraph, taskFactory, args) 

806 self.assertEqual(taskFactory.countExec, self.nQuanta) 

807 

808 # need to refresh collections explicitly (or make new butler/registry) 

809 butler.registry.refresh() 

810 collections = set(butler.registry.queryCollections(...)) 

811 self.assertEqual(collections, {"test", "output", "output/run1"}) 

812 

813 # number of datasets written by pipeline: 

814 # - nQuanta of init_outputs 

815 # - nQuanta of configs 

816 # - packages (single dataset) 

817 # - nQuanta * two output datasets 

818 # - nQuanta of metadata 

819 # - nQuanta of log output 

820 n_outputs = self.nQuanta * 6 + 1 

821 refs = butler.registry.queryDatasets(..., collections="output/run1") 

822 self.assertEqual(len(list(refs)), n_outputs) 

823 

824 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

825 # changed) 

826 args.replace_run = True 

827 args.output_run = "output/run2" 

828 qgraph = fwk.makeGraph(self.pipeline, args) 

829 fwk.runPipeline(qgraph, taskFactory, args) 

830 

831 butler.registry.refresh() 

832 collections = set(butler.registry.queryCollections(...)) 

833 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

834 

835 # new output collection 

836 refs = butler.registry.queryDatasets(..., collections="output/run2") 

837 self.assertEqual(len(list(refs)), n_outputs) 

838 

839 # old output collection is still there 

840 refs = butler.registry.queryDatasets(..., collections="output/run1") 

841 self.assertEqual(len(list(refs)), n_outputs) 

842 

843 # re-run with --replace-run and --prune-replaced=unstore 

844 args.replace_run = True 

845 args.prune_replaced = "unstore" 

846 args.output_run = "output/run3" 

847 qgraph = fwk.makeGraph(self.pipeline, args) 

848 fwk.runPipeline(qgraph, taskFactory, args) 

849 

850 butler.registry.refresh() 

851 collections = set(butler.registry.queryCollections(...)) 

852 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

853 

854 # new output collection 

855 refs = butler.registry.queryDatasets(..., collections="output/run3") 

856 self.assertEqual(len(list(refs)), n_outputs) 

857 

858 # old output collection is still there, and it has all datasets but 

859 # non-InitOutputs are not in datastore 

860 refs = butler.registry.queryDatasets(..., collections="output/run2") 

861 refs = list(refs) 

862 self.assertEqual(len(refs), n_outputs) 

863 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

864 for ref in refs: 

865 if initOutNameRe.fullmatch(ref.datasetType.name): 

866 butler.get(ref) 

867 else: 

868 with self.assertRaises(FileNotFoundError): 

869 butler.get(ref) 

870 

871 # re-run with --replace-run and --prune-replaced=purge 

872 # This time also remove --input; passing the same inputs that we 

873 # started with and not passing inputs at all should be equivalent. 

874 args.input = None 

875 args.replace_run = True 

876 args.prune_replaced = "purge" 

877 args.output_run = "output/run4" 

878 qgraph = fwk.makeGraph(self.pipeline, args) 

879 fwk.runPipeline(qgraph, taskFactory, args) 

880 

881 butler.registry.refresh() 

882 collections = set(butler.registry.queryCollections(...)) 

883 # output/run3 should disappear now 

884 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

885 

886 # new output collection 

887 refs = butler.registry.queryDatasets(..., collections="output/run4") 

888 self.assertEqual(len(list(refs)), n_outputs) 

889 

890 # Trying to run again with inputs that aren't exactly what we started 

891 # with is an error, and the kind that should not modify the data repo. 

892 with self.assertRaises(ValueError): 

893 args.input = ["test", "output/run2"] 

894 args.prune_replaced = None 

895 args.replace_run = True 

896 args.output_run = "output/run5" 

897 qgraph = fwk.makeGraph(self.pipeline, args) 

898 fwk.runPipeline(qgraph, taskFactory, args) 

899 butler.registry.refresh() 

900 collections = set(butler.registry.queryCollections(...)) 

901 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

902 with self.assertRaises(ValueError): 

903 args.input = ["output/run2", "test"] 

904 args.prune_replaced = None 

905 args.replace_run = True 

906 args.output_run = "output/run6" 

907 qgraph = fwk.makeGraph(self.pipeline, args) 

908 fwk.runPipeline(qgraph, taskFactory, args) 

909 butler.registry.refresh() 

910 collections = set(butler.registry.queryCollections(...)) 

911 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

912 

913 def testSubgraph(self): 

914 """Test successful execution of trivial quantum graph.""" 

915 args = _makeArgs(butler_config=self.root, input="test", output="output") 

916 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

917 populateButler(self.pipeline, butler) 

918 

919 fwk = CmdLineFwk() 

920 qgraph = fwk.makeGraph(self.pipeline, args) 

921 

922 # Select first two nodes for execution. This depends on node ordering 

923 # which I assume is the same as execution order. 

924 nNodes = 2 

925 nodeIds = [node.nodeId for node in qgraph] 

926 nodeIds = nodeIds[:nNodes] 

927 

928 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

929 self.assertEqual(len(qgraph), self.nQuanta) 

930 

931 with ( 

932 makeTmpFile(suffix=".qgraph") as tmpname, 

933 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

934 ): 

935 with open(tmpname, "wb") as saveFile: 

936 qgraph.save(saveFile) 

937 

938 args = _makeArgs( 

939 qgraph=tmpname, 

940 qgraph_node_id=nodeIds, 

941 registryConfig=registryConfig, 

942 execution_butler_location=None, 

943 ) 

944 fwk = CmdLineFwk() 

945 

946 # load graph, should only read a subset 

947 qgraph = fwk.makeGraph(pipeline=None, args=args) 

948 self.assertEqual(len(qgraph), nNodes) 

949 

950 def testShowGraph(self): 

951 """Test for --show options for quantum graph.""" 

952 nQuanta = 2 

953 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

954 

955 show = ShowInfo(["graph"]) 

956 show.show_graph_info(qgraph) 

957 self.assertEqual(show.handled, {"graph"}) 

958 

959 def testShowGraphWorkflow(self): 

960 nQuanta = 2 

961 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

962 

963 show = ShowInfo(["workflow"]) 

964 show.show_graph_info(qgraph) 

965 self.assertEqual(show.handled, {"workflow"}) 

966 

967 # TODO: cannot test "uri" option presently, it instantiates 

968 # butler from command line options and there is no way to pass butler 

969 # mock to that code. 

970 show = ShowInfo(["uri"]) 

971 with self.assertRaises(ValueError): # No args given 

972 show.show_graph_info(qgraph) 

973 

974 def testSimpleQGraphDatastoreRecords(self): 

975 """Test quantum graph generation with --qgraph-datastore-records.""" 

976 args = _makeArgs( 

977 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

978 ) 

979 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

980 populateButler(self.pipeline, butler) 

981 

982 fwk = CmdLineFwk() 

983 qgraph = fwk.makeGraph(self.pipeline, args) 

984 self.assertEqual(len(qgraph), self.nQuanta) 

985 for i, qnode in enumerate(qgraph): 

986 quantum = qnode.quantum 

987 self.assertIsNotNone(quantum.datastore_records) 

988 # only the first quantum has a pre-existing input 

989 if i == 0: 

990 datastore_name = "FileDatastore@<butlerRoot>" 

991 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

992 records_data = quantum.datastore_records[datastore_name] 

993 records = dict(records_data.records) 

994 self.assertEqual(len(records), 1) 

995 _, records = records.popitem() 

996 records = records["file_datastore_records"] 

997 self.assertEqual( 

998 [record.path for record in records], 

999 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1000 ) 

1001 else: 

1002 self.assertEqual(quantum.datastore_records, {}) 

1003 

1004 

1005class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1006 """General file leak detection.""" 

1007 

1008 

1009def setup_module(module): 

1010 """Initialize pytest module.""" 

1011 lsst.utils.tests.init() 

1012 

1013 

1014if __name__ == "__main__": 

1015 lsst.utils.tests.init() 

1016 unittest.main()