Coverage for tests/test_cmdLineFwk.py: 13%

538 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-13 09:53 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Simple unit test for cmdLineFwk module. 

29""" 

30 

31import contextlib 

32import logging 

33import os 

34import pickle 

35import re 

36import shutil 

37import tempfile 

38import unittest 

39from dataclasses import dataclass 

40from io import StringIO 

41from types import SimpleNamespace 

42from typing import NamedTuple 

43 

44import astropy.units as u 

45import click 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base.connectionTypes as cT 

48import lsst.utils.tests 

49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

50from lsst.ctrl.mpexec.cli.opt import run_options 

51from lsst.ctrl.mpexec.cli.utils import ( 

52 _ACTION_ADD_INSTRUMENT, 

53 _ACTION_ADD_TASK, 

54 _ACTION_CONFIG, 

55 _ACTION_CONFIG_FILE, 

56 PipetaskCommand, 

57) 

58from lsst.ctrl.mpexec.showInfo import ShowInfo 

59from lsst.daf.butler import ( 

60 CollectionType, 

61 Config, 

62 DataCoordinate, 

63 DatasetRef, 

64 DimensionConfig, 

65 DimensionUniverse, 

66 Quantum, 

67 Registry, 

68) 

69from lsst.daf.butler.core.datasets.type import DatasetType 

70from lsst.daf.butler.registry import RegistryConfig 

71from lsst.pipe.base import ( 

72 Instrument, 

73 Pipeline, 

74 PipelineTaskConfig, 

75 PipelineTaskConnections, 

76 QuantumGraph, 

77 TaskDef, 

78) 

79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

80from lsst.pipe.base.script import transfer_from_graph 

81from lsst.pipe.base.tests.simpleQGraph import ( 

82 AddTask, 

83 AddTaskFactoryMock, 

84 makeSimpleButler, 

85 makeSimplePipeline, 

86 makeSimpleQGraph, 

87 populateButler, 

88) 

89from lsst.utils.tests import temporaryDirectory 

90 

91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

92 

93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

94# instrument from registry, these tests can run fine without actual instrument 

95# and implementing full mock for Instrument is too complicated. 

96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96

97 

98 

99@contextlib.contextmanager 

100def makeTmpFile(contents=None, suffix=None): 

101 """Context manager for generating temporary file name. 

102 

103 Temporary file is deleted on exiting context. 

104 

105 Parameters 

106 ---------- 

107 contents : `bytes` 

108 Data to write into a file. 

109 """ 

110 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

111 if contents: 

112 os.write(fd, contents) 

113 os.close(fd) 

114 yield tmpname 

115 with contextlib.suppress(OSError): 

116 os.remove(tmpname) 

117 

118 

119@contextlib.contextmanager 

120def makeSQLiteRegistry(create=True, universe=None): 

121 """Context manager to create new empty registry database. 

122 

123 Yields 

124 ------ 

125 config : `RegistryConfig` 

126 Registry configuration for initialized registry database. 

127 """ 

128 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

129 with temporaryDirectory() as tmpdir: 

130 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

131 config = RegistryConfig() 

132 config["db"] = uri 

133 if create: 

134 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

135 yield config 

136 

137 

138class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

139 """Test connection class.""" 

140 

141 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

142 

143 

144class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

145 """Test pipeline config.""" 

146 

147 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

148 

149 def setDefaults(self): 

150 PipelineTaskConfig.setDefaults(self) 

151 

152 

153def _makeArgs(registryConfig=None, **kwargs): 

154 """Return parsed command line arguments. 

155 

156 By default butler_config is set to `Config` populated with some defaults, 

157 it can be overridden completely by keyword argument. 

158 

159 Parameters 

160 ---------- 

161 cmd : `str`, optional 

162 Produce arguments for this pipetask command. 

163 registryConfig : `RegistryConfig`, optional 

164 Override for registry configuration. 

165 **kwargs 

166 Overrides for other arguments. 

167 """ 

168 # Use a mock to get the default value of arguments to 'run'. 

169 

170 mock = unittest.mock.Mock() 

171 

172 @click.command(cls=PipetaskCommand) 

173 @run_options() 

174 def fake_run(ctx, **kwargs): 

175 """Fake "pipetask run" command for gathering input arguments. 

176 

177 The arguments & options should always match the arguments & options in 

178 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

179 """ 

180 mock(**kwargs) 

181 

182 runner = click.testing.CliRunner() 

183 # --butler-config is the only required option 

184 result = runner.invoke(fake_run, "--butler-config /") 

185 if result.exit_code != 0: 

186 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

187 mock.assert_called_once() 

188 args = mock.call_args[1] 

189 args["enableLsstDebug"] = args.pop("debug") 

190 args["execution_butler_location"] = args.pop("save_execution_butler") 

191 if "pipeline_actions" not in args: 

192 args["pipeline_actions"] = [] 

193 args = SimpleNamespace(**args) 

194 

195 # override butler_config with our defaults 

196 if "butler_config" not in kwargs: 

197 args.butler_config = Config() 

198 if registryConfig: 

199 args.butler_config["registry"] = registryConfig 

200 # The default datastore has a relocatable root, so we need to specify 

201 # some root here for it to use 

202 args.butler_config.configFile = "." 

203 

204 # override arguments from keyword parameters 

205 for key, value in kwargs.items(): 

206 setattr(args, key, value) 

207 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

208 return args 

209 

210 

211class FakeDSType(NamedTuple): 

212 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

213 

214 name: str 

215 

216 

217@dataclass(frozen=True) 

218class FakeDSRef: 

219 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

220 

221 datasetType: str 

222 dataId: tuple 

223 

224 def isComponent(self): 

225 return False 

226 

227 

228# Task class name used by tests, needs to be importable 

229_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

230 

231 

232def _makeDimensionConfig(): 

233 """Make a simple dimension universe configuration.""" 

234 return DimensionConfig( 

235 { 

236 "version": 1, 

237 "namespace": "ctrl_mpexec_test", 

238 "skypix": { 

239 "common": "htm7", 

240 "htm": { 

241 "class": "lsst.sphgeom.HtmPixelization", 

242 "max_level": 24, 

243 }, 

244 }, 

245 "elements": { 

246 "A": { 

247 "keys": [ 

248 { 

249 "name": "id", 

250 "type": "int", 

251 } 

252 ], 

253 "storage": { 

254 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

255 }, 

256 }, 

257 "B": { 

258 "keys": [ 

259 { 

260 "name": "id", 

261 "type": "int", 

262 } 

263 ], 

264 "storage": { 

265 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

266 }, 

267 }, 

268 }, 

269 "packers": {}, 

270 } 

271 ) 

272 

273 

274def _makeQGraph(): 

275 """Make a trivial QuantumGraph with one quantum. 

276 

277 The only thing that we need to do with this quantum graph is to pickle 

278 it, the quanta in this graph are not usable for anything else. 

279 

280 Returns 

281 ------- 

282 qgraph : `~lsst.pipe.base.QuantumGraph` 

283 """ 

284 universe = DimensionUniverse(config=_makeDimensionConfig()) 

285 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

286 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

287 quanta = [ 

288 Quantum( 

289 taskName=_TASK_CLASS, 

290 inputs={ 

291 fakeDSType: [ 

292 DatasetRef( 

293 fakeDSType, 

294 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

295 run="fake_run", 

296 ) 

297 ] 

298 }, 

299 ) 

300 ] # type: ignore 

301 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

302 return qgraph 

303 

304 

305class CmdLineFwkTestCase(unittest.TestCase): 

306 """A test case for CmdLineFwk""" 

307 

308 def testMakePipeline(self): 

309 """Tests for CmdLineFwk.makePipeline method""" 

310 fwk = CmdLineFwk() 

311 

312 # make empty pipeline 

313 args = _makeArgs() 

314 pipeline = fwk.makePipeline(args) 

315 self.assertIsInstance(pipeline, Pipeline) 

316 self.assertEqual(len(pipeline), 0) 

317 

318 # few tests with serialization 

319 with makeTmpFile() as tmpname: 

320 # make empty pipeline and store it in a file 

321 args = _makeArgs(save_pipeline=tmpname) 

322 pipeline = fwk.makePipeline(args) 

323 self.assertIsInstance(pipeline, Pipeline) 

324 

325 # read pipeline from a file 

326 args = _makeArgs(pipeline=tmpname) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 0) 

330 

331 # single task pipeline, task name can be anything here 

332 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 self.assertIsInstance(pipeline, Pipeline) 

336 self.assertEqual(len(pipeline), 1) 

337 

338 # many task pipeline 

339 actions = [ 

340 _ACTION_ADD_TASK("TaskOne:task1a"), 

341 _ACTION_ADD_TASK("TaskTwo:task2"), 

342 _ACTION_ADD_TASK("TaskOne:task1b"), 

343 ] 

344 args = _makeArgs(pipeline_actions=actions) 

345 pipeline = fwk.makePipeline(args) 

346 self.assertIsInstance(pipeline, Pipeline) 

347 self.assertEqual(len(pipeline), 3) 

348 

349 # single task pipeline with config overrides, need real task class 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 taskDefs = list(pipeline.toExpandedPipeline()) 

354 self.assertEqual(len(taskDefs), 1) 

355 self.assertEqual(taskDefs[0].config.addend, 100) 

356 

357 overrides = b"config.addend = 1000\n" 

358 with makeTmpFile(overrides) as tmpname: 

359 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

360 args = _makeArgs(pipeline_actions=actions) 

361 pipeline = fwk.makePipeline(args) 

362 taskDefs = list(pipeline.toExpandedPipeline()) 

363 self.assertEqual(len(taskDefs), 1) 

364 self.assertEqual(taskDefs[0].config.addend, 1000) 

365 

366 # Check --instrument option, for now it only checks that it does not 

367 # crash. 

368 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

369 args = _makeArgs(pipeline_actions=actions) 

370 pipeline = fwk.makePipeline(args) 

371 

372 def testMakeGraphFromSave(self): 

373 """Tests for CmdLineFwk.makeGraph method. 

374 

375 Only most trivial case is tested that does not do actual graph 

376 building. 

377 """ 

378 fwk = CmdLineFwk() 

379 

380 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

381 # make non-empty graph and store it in a file 

382 qgraph = _makeQGraph() 

383 with open(tmpname, "wb") as saveFile: 

384 qgraph.save(saveFile) 

385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

386 qgraph = fwk.makeGraph(None, args) 

387 self.assertIsInstance(qgraph, QuantumGraph) 

388 self.assertEqual(len(qgraph), 1) 

389 

390 # will fail if graph id does not match 

391 args = _makeArgs( 

392 qgraph=tmpname, 

393 qgraph_id="R2-D2 is that you?", 

394 registryConfig=registryConfig, 

395 execution_butler_location=None, 

396 ) 

397 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

398 fwk.makeGraph(None, args) 

399 

400 # save with wrong object type 

401 with open(tmpname, "wb") as saveFile: 

402 pickle.dump({}, saveFile) 

403 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

404 with self.assertRaises(ValueError): 

405 fwk.makeGraph(None, args) 

406 

407 # reading empty graph from pickle should work but makeGraph() 

408 # will return None. 

409 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

410 with open(tmpname, "wb") as saveFile: 

411 qgraph.save(saveFile) 

412 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

413 qgraph = fwk.makeGraph(None, args) 

414 self.assertIs(qgraph, None) 

415 

416 def testShowPipeline(self): 

417 """Test for --show options for pipeline.""" 

418 fwk = CmdLineFwk() 

419 

420 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

421 args = _makeArgs(pipeline_actions=actions) 

422 pipeline = fwk.makePipeline(args) 

423 

424 with self.assertRaises(ValueError): 

425 ShowInfo(["unrecognized", "config"]) 

426 

427 stream = StringIO() 

428 show = ShowInfo( 

429 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

430 stream=stream, 

431 ) 

432 show.show_pipeline_info(pipeline) 

433 self.assertEqual(show.unhandled, frozenset({})) 

434 stream.seek(0) 

435 output = stream.read() 

436 self.assertIn("config.addend=100", output) # config option 

437 self.assertIn("addend\n3", output) # History output 

438 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

439 

440 show = ShowInfo(["pipeline", "uri"], stream=stream) 

441 show.show_pipeline_info(pipeline) 

442 self.assertEqual(show.unhandled, frozenset({"uri"})) 

443 self.assertEqual(show.handled, {"pipeline"}) 

444 

445 stream = StringIO() 

446 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

447 show.show_pipeline_info(pipeline) 

448 stream.seek(0) 

449 output = stream.read().strip() 

450 self.assertEqual("### Configuration for task `task'", output) 

451 

452 stream = StringIO() 

453 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

454 show.show_pipeline_info(pipeline) 

455 stream.seek(0) 

456 output = stream.read().strip() 

457 self.assertEqual("### Configuration for task `task'", output) 

458 

459 stream = StringIO() 

460 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

461 show.show_pipeline_info(pipeline) 

462 stream.seek(0) 

463 output = stream.read().strip() 

464 self.assertIn("NOIGNORECASE", output) 

465 

466 show = ShowInfo(["dump-config=notask"]) 

467 with self.assertRaises(ValueError) as cm: 

468 show.show_pipeline_info(pipeline) 

469 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

470 

471 show = ShowInfo(["history"]) 

472 with self.assertRaises(ValueError) as cm: 

473 show.show_pipeline_info(pipeline) 

474 self.assertIn("Please provide a value", str(cm.exception)) 

475 

476 show = ShowInfo(["history=notask::param"]) 

477 with self.assertRaises(ValueError) as cm: 

478 show.show_pipeline_info(pipeline) 

479 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

480 

481 def test_execution_resources_parameters(self) -> None: 

482 """Test creation of the ExecutionResources from command line.""" 

483 fwk = CmdLineFwk() 

484 

485 for params, num_cores, max_mem in ( 

486 ((None, None), 1, None), 

487 ((5, ""), 5, None), 

488 ((None, "50"), 1, 50 * u.MB), 

489 ((5, "50 GB"), 5, 50 * u.GB), 

490 ): 

491 kwargs = {} 

492 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

493 if v is not None: 

494 kwargs[k] = v 

495 args = _makeArgs(**kwargs) 

496 res = fwk._make_execution_resources(args) 

497 self.assertEqual(res.num_cores, num_cores) 

498 self.assertEqual(res.max_mem, max_mem) 

499 

500 args = _makeArgs(memory_per_quantum="50m") 

501 with self.assertRaises(u.UnitConversionError): 

502 fwk._make_execution_resources(args) 

503 

504 

505class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

506 """A test case for CmdLineFwk""" 

507 

508 def setUp(self): 

509 super().setUpClass() 

510 self.root = tempfile.mkdtemp() 

511 self.nQuanta = 5 

512 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

513 

514 def tearDown(self): 

515 shutil.rmtree(self.root, ignore_errors=True) 

516 super().tearDownClass() 

517 

518 def testSimpleQGraph(self): 

519 """Test successfull execution of trivial quantum graph.""" 

520 args = _makeArgs(butler_config=self.root, input="test", output="output") 

521 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

522 populateButler(self.pipeline, butler) 

523 

524 fwk = CmdLineFwk() 

525 taskFactory = AddTaskFactoryMock() 

526 

527 qgraph = fwk.makeGraph(self.pipeline, args) 

528 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

529 self.assertEqual(len(qgraph), self.nQuanta) 

530 

531 # Ensure that the output run used in the graph is also used in 

532 # the pipeline execution. It is possible for makeGraph and runPipeline 

533 # to calculate time-stamped runs across a second boundary. 

534 args.output_run = qgraph.metadata["output_run"] 

535 

536 # run whole thing 

537 fwk.runPipeline(qgraph, taskFactory, args) 

538 self.assertEqual(taskFactory.countExec, self.nQuanta) 

539 

540 # test that we've disabled implicit threading 

541 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

542 

543 def testSimpleQGraph_rebase(self): 

544 """Test successful execution of trivial quantum graph, with --rebase 

545 used to force redefinition of the output collection. 

546 """ 

547 # Pass one input collection here for the usual test setup; we'll 

548 # override it later. 

549 args = _makeArgs(butler_config=self.root, input="test1", output="output") 

550 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

551 populateButler(self.pipeline, butler) 

552 

553 fwk = CmdLineFwk() 

554 taskFactory = AddTaskFactoryMock() 

555 

556 # We'll actually pass two input collections in. One is empty, but 

557 # the stuff we're testing here doesn't care. 

558 args.input = ["test2", "test1"] 

559 butler.registry.registerCollection("test2", CollectionType.RUN) 

560 

561 # Set up the output collection with a sequence that doesn't end the 

562 # same way as the input collection. This is normally an error. 

563 butler.registry.registerCollection("output", CollectionType.CHAINED) 

564 butler.registry.registerCollection("unexpected_input", CollectionType.RUN) 

565 butler.registry.registerCollection("output/run0", CollectionType.RUN) 

566 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"]) 

567 

568 # Without --rebase, the inconsistent input and output collections are 

569 # an error. 

570 with self.assertRaises(ValueError): 

571 fwk.makeGraph(self.pipeline, args) 

572 

573 # With --rebase, the output collection gets redefined. 

574 args.rebase = True 

575 qgraph = fwk.makeGraph(self.pipeline, args) 

576 

577 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

578 self.assertEqual(len(qgraph), self.nQuanta) 

579 

580 # Ensure that the output run used in the graph is also used in 

581 # the pipeline execution. It is possible for makeGraph and runPipeline 

582 # to calculate time-stamped runs across a second boundary. 

583 args.output_run = qgraph.metadata["output_run"] 

584 

585 fwk.runPipeline(qgraph, taskFactory, args) 

586 self.assertEqual(taskFactory.countExec, self.nQuanta) 

587 

588 butler.registry.refresh() 

589 self.assertEqual( 

590 list(butler.registry.getCollectionChain("output")), 

591 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"], 

592 ) 

593 

594 def test_simple_qgraph_qbb(self): 

595 """Test successful execution of trivial quantum graph in QBB mode.""" 

596 args = _makeArgs( 

597 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

598 ) 

599 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

600 populateButler(self.pipeline, butler) 

601 

602 fwk = CmdLineFwk() 

603 taskFactory = AddTaskFactoryMock() 

604 

605 qgraph = fwk.makeGraph(self.pipeline, args) 

606 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

607 self.assertEqual(len(qgraph), self.nQuanta) 

608 

609 # Ensure that the output run used in the graph is also used in 

610 # the pipeline execution. It is possible for makeGraph and runPipeline 

611 # to calculate time-stamped runs across a second boundary. 

612 output_run = qgraph.metadata["output_run"] 

613 args.output_run = output_run 

614 

615 # QBB must run from serialized graph. 

616 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

617 qgraph.saveUri(temp_graph.name) 

618 

619 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

620 

621 # Check that pre-exec-init can run. 

622 fwk.preExecInitQBB(taskFactory, args) 

623 

624 # Run whole thing. 

625 fwk.runGraphQBB(taskFactory, args) 

626 

627 # Transfer the datasets to the butler. 

628 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

629 self.assertEqual(n1, 31) 

630 

631 self.assertEqual(taskFactory.countExec, self.nQuanta) 

632 

633 # Update the output run and try again. 

634 new_output_run = output_run + "_new" 

635 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True) 

636 self.assertEqual(qgraph.metadata["output_run"], new_output_run) 

637 

638 taskFactory = AddTaskFactoryMock() 

639 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

640 qgraph.saveUri(temp_graph.name) 

641 

642 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

643 

644 # Check that pre-exec-init can run. 

645 fwk.preExecInitQBB(taskFactory, args) 

646 

647 # Run whole thing. 

648 fwk.runGraphQBB(taskFactory, args) 

649 

650 # Transfer the datasets to the butler. 

651 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

652 self.assertEqual(n1, n2) 

653 

654 def testEmptyQGraph(self): 

655 """Test that making an empty QG produces the right error messages.""" 

656 # We make QG generation fail by populating one input collection in the 

657 # butler while using a different one (that we only register, not 

658 # populate) to make the QG. 

659 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

660 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

661 butler.registry.registerCollection("bad_input") 

662 populateButler(self.pipeline, butler) 

663 

664 fwk = CmdLineFwk() 

665 with self.assertLogs(level=logging.CRITICAL) as cm: 

666 qgraph = fwk.makeGraph(self.pipeline, args) 

667 self.assertRegex( 

668 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

669 ) 

670 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

671 self.assertIsNone(qgraph) 

672 

673 def testSimpleQGraphNoSkipExisting_inputs(self): 

674 """Test for case when output data for one task already appears in 

675 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

676 option is present. 

677 """ 

678 args = _makeArgs( 

679 butler_config=self.root, 

680 input="test", 

681 output="output", 

682 ) 

683 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

684 populateButler( 

685 self.pipeline, 

686 butler, 

687 datasetTypes={ 

688 args.input: [ 

689 "add_dataset0", 

690 "add_dataset1", 

691 "add2_dataset1", 

692 "add_init_output1", 

693 "task0_config", 

694 "task0_metadata", 

695 "task0_log", 

696 ] 

697 }, 

698 ) 

699 

700 fwk = CmdLineFwk() 

701 taskFactory = AddTaskFactoryMock() 

702 

703 qgraph = fwk.makeGraph(self.pipeline, args) 

704 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

705 # With current implementation graph has all nQuanta quanta, but when 

706 # executing one quantum is skipped. 

707 self.assertEqual(len(qgraph), self.nQuanta) 

708 

709 # Ensure that the output run used in the graph is also used in 

710 # the pipeline execution. It is possible for makeGraph and runPipeline 

711 # to calculate time-stamped runs across a second boundary. 

712 args.output_run = qgraph.metadata["output_run"] 

713 

714 # run whole thing 

715 fwk.runPipeline(qgraph, taskFactory, args) 

716 self.assertEqual(taskFactory.countExec, self.nQuanta) 

717 

718 def testSimpleQGraphSkipExisting_inputs(self): 

719 """Test for ``--skip-existing`` with output data for one task already 

720 appears in _input_ collection. No ``--extend-run`` option is needed 

721 for this case. 

722 """ 

723 args = _makeArgs( 

724 butler_config=self.root, 

725 input="test", 

726 output="output", 

727 skip_existing_in=("test",), 

728 ) 

729 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

730 populateButler( 

731 self.pipeline, 

732 butler, 

733 datasetTypes={ 

734 args.input: [ 

735 "add_dataset0", 

736 "add_dataset1", 

737 "add2_dataset1", 

738 "add_init_output1", 

739 "task0_config", 

740 "task0_metadata", 

741 "task0_log", 

742 ] 

743 }, 

744 ) 

745 

746 fwk = CmdLineFwk() 

747 taskFactory = AddTaskFactoryMock() 

748 

749 qgraph = fwk.makeGraph(self.pipeline, args) 

750 # If all quanta are skipped, the task is not included in the graph. 

751 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

752 self.assertEqual(len(qgraph), self.nQuanta - 1) 

753 

754 # Ensure that the output run used in the graph is also used in 

755 # the pipeline execution. It is possible for makeGraph and runPipeline 

756 # to calculate time-stamped runs across a second boundary. 

757 args.output_run = qgraph.metadata["output_run"] 

758 

759 # run whole thing 

760 fwk.runPipeline(qgraph, taskFactory, args) 

761 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

762 

763 def testSimpleQGraphSkipExisting_outputs(self): 

764 """Test for ``--skip-existing`` with output data for one task already 

765 appears in _output_ collection. The ``--extend-run`` option is needed 

766 for this case. 

767 """ 

768 args = _makeArgs( 

769 butler_config=self.root, 

770 input="test", 

771 output_run="output/run", 

772 skip_existing_in=("output/run",), 

773 ) 

774 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

775 populateButler( 

776 self.pipeline, 

777 butler, 

778 datasetTypes={ 

779 args.input: ["add_dataset0"], 

780 args.output_run: [ 

781 "add_dataset1", 

782 "add2_dataset1", 

783 "add_init_output1", 

784 "task0_metadata", 

785 "task0_log", 

786 "task0_config", 

787 ], 

788 }, 

789 ) 

790 

791 fwk = CmdLineFwk() 

792 taskFactory = AddTaskFactoryMock() 

793 

794 # fails without --extend-run 

795 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

796 qgraph = fwk.makeGraph(self.pipeline, args) 

797 

798 # retry with --extend-run 

799 args.extend_run = True 

800 qgraph = fwk.makeGraph(self.pipeline, args) 

801 

802 # First task has no remaining quanta, so is left out completely. 

803 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

804 # Graph does not include quantum for first task. 

805 self.assertEqual(len(qgraph), self.nQuanta - 1) 

806 

807 # run whole thing 

808 fwk.runPipeline(qgraph, taskFactory, args) 

809 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

810 

811 def testSimpleQGraphOutputsFail(self): 

812 """Test continuing execution of trivial quantum graph with partial 

813 outputs. 

814 """ 

815 args = _makeArgs(butler_config=self.root, input="test", output="output") 

816 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

817 populateButler(self.pipeline, butler) 

818 

819 fwk = CmdLineFwk() 

820 taskFactory = AddTaskFactoryMock(stopAt=3) 

821 

822 qgraph = fwk.makeGraph(self.pipeline, args) 

823 self.assertEqual(len(qgraph), self.nQuanta) 

824 

825 # Ensure that the output run used in the graph is also used in 

826 # the pipeline execution. It is possible for makeGraph and runPipeline 

827 # to calculate time-stamped runs across a second boundary. 

828 args.output_run = qgraph.metadata["output_run"] 

829 

830 # run first three quanta 

831 with self.assertRaises(MPGraphExecutorError): 

832 fwk.runPipeline(qgraph, taskFactory, args) 

833 self.assertEqual(taskFactory.countExec, 3) 

834 

835 butler.registry.refresh() 

836 

837 # drop one of the two outputs from one task 

838 ref1 = butler.registry.findDataset( 

839 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

840 ) 

841 self.assertIsNotNone(ref1) 

842 # also drop the metadata output 

843 ref2 = butler.registry.findDataset( 

844 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

845 ) 

846 self.assertIsNotNone(ref2) 

847 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

848 

849 # Ensure that the output run used in the graph is also used in 

850 # the pipeline execution. It is possible for makeGraph and runPipeline 

851 # to calculate time-stamped runs across a second boundary. 

852 args.output_run = qgraph.metadata["output_run"] 

853 

854 taskFactory.stopAt = -1 

855 args.skip_existing_in = (args.output,) 

856 args.extend_run = True 

857 args.no_versions = True 

858 with self.assertRaises(MPGraphExecutorError): 

859 fwk.runPipeline(qgraph, taskFactory, args) 

860 

861 def testSimpleQGraphClobberOutputs(self): 

862 """Test continuing execution of trivial quantum graph with 

863 --clobber-outputs. 

864 """ 

865 args = _makeArgs(butler_config=self.root, input="test", output="output") 

866 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

867 populateButler(self.pipeline, butler) 

868 

869 fwk = CmdLineFwk() 

870 taskFactory = AddTaskFactoryMock(stopAt=3) 

871 

872 qgraph = fwk.makeGraph(self.pipeline, args) 

873 

874 # should have one task and number of quanta 

875 self.assertEqual(len(qgraph), self.nQuanta) 

876 

877 # Ensure that the output run used in the graph is also used in 

878 # the pipeline execution. It is possible for makeGraph and runPipeline 

879 # to calculate time-stamped runs across a second boundary. 

880 args.output_run = qgraph.metadata["output_run"] 

881 

882 # run first three quanta 

883 with self.assertRaises(MPGraphExecutorError): 

884 fwk.runPipeline(qgraph, taskFactory, args) 

885 self.assertEqual(taskFactory.countExec, 3) 

886 

887 butler.registry.refresh() 

888 

889 # drop one of the two outputs from one task 

890 ref1 = butler.registry.findDataset( 

891 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

892 ) 

893 self.assertIsNotNone(ref1) 

894 # also drop the metadata output 

895 ref2 = butler.registry.findDataset( 

896 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

897 ) 

898 self.assertIsNotNone(ref2) 

899 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

900 

901 taskFactory.stopAt = -1 

902 args.skip_existing = True 

903 args.extend_run = True 

904 args.clobber_outputs = True 

905 args.no_versions = True 

906 fwk.runPipeline(qgraph, taskFactory, args) 

907 # number of executed quanta is incremented 

908 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

909 

910 def testSimpleQGraphReplaceRun(self): 

911 """Test repeated execution of trivial quantum graph with 

912 --replace-run. 

913 """ 

914 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

915 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

916 populateButler(self.pipeline, butler) 

917 

918 fwk = CmdLineFwk() 

919 taskFactory = AddTaskFactoryMock() 

920 

921 qgraph = fwk.makeGraph(self.pipeline, args) 

922 

923 # should have one task and number of quanta 

924 self.assertEqual(len(qgraph), self.nQuanta) 

925 

926 # deep copy is needed because quanta are updated in place 

927 fwk.runPipeline(qgraph, taskFactory, args) 

928 self.assertEqual(taskFactory.countExec, self.nQuanta) 

929 

930 # need to refresh collections explicitly (or make new butler/registry) 

931 butler.registry.refresh() 

932 collections = set(butler.registry.queryCollections(...)) 

933 self.assertEqual(collections, {"test", "output", "output/run1"}) 

934 

935 # number of datasets written by pipeline: 

936 # - nQuanta of init_outputs 

937 # - nQuanta of configs 

938 # - packages (single dataset) 

939 # - nQuanta * two output datasets 

940 # - nQuanta of metadata 

941 # - nQuanta of log output 

942 n_outputs = self.nQuanta * 6 + 1 

943 refs = butler.registry.queryDatasets(..., collections="output/run1") 

944 self.assertEqual(len(list(refs)), n_outputs) 

945 

946 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

947 # changed) 

948 args.replace_run = True 

949 args.output_run = "output/run2" 

950 qgraph = fwk.makeGraph(self.pipeline, args) 

951 fwk.runPipeline(qgraph, taskFactory, args) 

952 

953 butler.registry.refresh() 

954 collections = set(butler.registry.queryCollections(...)) 

955 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

956 

957 # new output collection 

958 refs = butler.registry.queryDatasets(..., collections="output/run2") 

959 self.assertEqual(len(list(refs)), n_outputs) 

960 

961 # old output collection is still there 

962 refs = butler.registry.queryDatasets(..., collections="output/run1") 

963 self.assertEqual(len(list(refs)), n_outputs) 

964 

965 # re-run with --replace-run and --prune-replaced=unstore 

966 args.replace_run = True 

967 args.prune_replaced = "unstore" 

968 args.output_run = "output/run3" 

969 qgraph = fwk.makeGraph(self.pipeline, args) 

970 fwk.runPipeline(qgraph, taskFactory, args) 

971 

972 butler.registry.refresh() 

973 collections = set(butler.registry.queryCollections(...)) 

974 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

975 

976 # new output collection 

977 refs = butler.registry.queryDatasets(..., collections="output/run3") 

978 self.assertEqual(len(list(refs)), n_outputs) 

979 

980 # old output collection is still there, and it has all datasets but 

981 # non-InitOutputs are not in datastore 

982 refs = butler.registry.queryDatasets(..., collections="output/run2") 

983 refs = list(refs) 

984 self.assertEqual(len(refs), n_outputs) 

985 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

986 for ref in refs: 

987 if initOutNameRe.fullmatch(ref.datasetType.name): 

988 butler.get(ref) 

989 else: 

990 with self.assertRaises(FileNotFoundError): 

991 butler.get(ref) 

992 

993 # re-run with --replace-run and --prune-replaced=purge 

994 # This time also remove --input; passing the same inputs that we 

995 # started with and not passing inputs at all should be equivalent. 

996 args.input = None 

997 args.replace_run = True 

998 args.prune_replaced = "purge" 

999 args.output_run = "output/run4" 

1000 qgraph = fwk.makeGraph(self.pipeline, args) 

1001 fwk.runPipeline(qgraph, taskFactory, args) 

1002 

1003 butler.registry.refresh() 

1004 collections = set(butler.registry.queryCollections(...)) 

1005 # output/run3 should disappear now 

1006 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1007 

1008 # new output collection 

1009 refs = butler.registry.queryDatasets(..., collections="output/run4") 

1010 self.assertEqual(len(list(refs)), n_outputs) 

1011 

1012 # Trying to run again with inputs that aren't exactly what we started 

1013 # with is an error, and the kind that should not modify the data repo. 

1014 with self.assertRaises(ValueError): 

1015 args.input = ["test", "output/run2"] 

1016 args.prune_replaced = None 

1017 args.replace_run = True 

1018 args.output_run = "output/run5" 

1019 qgraph = fwk.makeGraph(self.pipeline, args) 

1020 fwk.runPipeline(qgraph, taskFactory, args) 

1021 butler.registry.refresh() 

1022 collections = set(butler.registry.queryCollections(...)) 

1023 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1024 with self.assertRaises(ValueError): 

1025 args.input = ["output/run2", "test"] 

1026 args.prune_replaced = None 

1027 args.replace_run = True 

1028 args.output_run = "output/run6" 

1029 qgraph = fwk.makeGraph(self.pipeline, args) 

1030 fwk.runPipeline(qgraph, taskFactory, args) 

1031 butler.registry.refresh() 

1032 collections = set(butler.registry.queryCollections(...)) 

1033 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1034 

1035 def testSubgraph(self): 

1036 """Test successful execution of trivial quantum graph.""" 

1037 args = _makeArgs(butler_config=self.root, input="test", output="output") 

1038 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1039 populateButler(self.pipeline, butler) 

1040 

1041 fwk = CmdLineFwk() 

1042 qgraph = fwk.makeGraph(self.pipeline, args) 

1043 

1044 # Select first two nodes for execution. This depends on node ordering 

1045 # which I assume is the same as execution order. 

1046 nNodes = 2 

1047 nodeIds = [node.nodeId for node in qgraph] 

1048 nodeIds = nodeIds[:nNodes] 

1049 

1050 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

1051 self.assertEqual(len(qgraph), self.nQuanta) 

1052 

1053 with ( 

1054 makeTmpFile(suffix=".qgraph") as tmpname, 

1055 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

1056 ): 

1057 with open(tmpname, "wb") as saveFile: 

1058 qgraph.save(saveFile) 

1059 

1060 args = _makeArgs( 

1061 qgraph=tmpname, 

1062 qgraph_node_id=nodeIds, 

1063 registryConfig=registryConfig, 

1064 execution_butler_location=None, 

1065 ) 

1066 fwk = CmdLineFwk() 

1067 

1068 # load graph, should only read a subset 

1069 qgraph = fwk.makeGraph(pipeline=None, args=args) 

1070 self.assertEqual(len(qgraph), nNodes) 

1071 

1072 def testShowGraph(self): 

1073 """Test for --show options for quantum graph.""" 

1074 nQuanta = 2 

1075 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1076 

1077 show = ShowInfo(["graph"]) 

1078 show.show_graph_info(qgraph) 

1079 self.assertEqual(show.handled, {"graph"}) 

1080 

1081 def testShowGraphWorkflow(self): 

1082 nQuanta = 2 

1083 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1084 

1085 show = ShowInfo(["workflow"]) 

1086 show.show_graph_info(qgraph) 

1087 self.assertEqual(show.handled, {"workflow"}) 

1088 

1089 # TODO: cannot test "uri" option presently, it instantiates 

1090 # butler from command line options and there is no way to pass butler 

1091 # mock to that code. 

1092 show = ShowInfo(["uri"]) 

1093 with self.assertRaises(ValueError): # No args given 

1094 show.show_graph_info(qgraph) 

1095 

1096 def testSimpleQGraphDatastoreRecords(self): 

1097 """Test quantum graph generation with --qgraph-datastore-records.""" 

1098 args = _makeArgs( 

1099 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1100 ) 

1101 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1102 populateButler(self.pipeline, butler) 

1103 

1104 fwk = CmdLineFwk() 

1105 qgraph = fwk.makeGraph(self.pipeline, args) 

1106 self.assertEqual(len(qgraph), self.nQuanta) 

1107 for i, qnode in enumerate(qgraph): 

1108 quantum = qnode.quantum 

1109 self.assertIsNotNone(quantum.datastore_records) 

1110 # only the first quantum has a pre-existing input 

1111 if i == 0: 

1112 datastore_name = "FileDatastore@<butlerRoot>" 

1113 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1114 records_data = quantum.datastore_records[datastore_name] 

1115 records = dict(records_data.records) 

1116 self.assertEqual(len(records), 1) 

1117 _, records = records.popitem() 

1118 records = records["file_datastore_records"] 

1119 self.assertEqual( 

1120 [record.path for record in records], 

1121 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1122 ) 

1123 else: 

1124 self.assertEqual(quantum.datastore_records, {}) 

1125 

1126 

1127class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1128 """General file leak detection.""" 

1129 

1130 

1131def setup_module(module): 

1132 """Initialize pytest module.""" 

1133 lsst.utils.tests.init() 

1134 

1135 

1136if __name__ == "__main__": 

1137 lsst.utils.tests.init() 

1138 unittest.main()