Coverage for tests/test_cmdLineFwk.py: 13%

538 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-01 09:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import logging 

27import os 

28import pickle 

29import re 

30import shutil 

31import tempfile 

32import unittest 

33from dataclasses import dataclass 

34from io import StringIO 

35from types import SimpleNamespace 

36from typing import NamedTuple 

37 

38import astropy.units as u 

39import click 

40import lsst.pex.config as pexConfig 

41import lsst.pipe.base.connectionTypes as cT 

42import lsst.utils.tests 

43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

44from lsst.ctrl.mpexec.cli.opt import run_options 

45from lsst.ctrl.mpexec.cli.utils import ( 

46 _ACTION_ADD_INSTRUMENT, 

47 _ACTION_ADD_TASK, 

48 _ACTION_CONFIG, 

49 _ACTION_CONFIG_FILE, 

50 PipetaskCommand, 

51) 

52from lsst.ctrl.mpexec.showInfo import ShowInfo 

53from lsst.daf.butler import ( 

54 CollectionType, 

55 Config, 

56 DataCoordinate, 

57 DatasetRef, 

58 DimensionConfig, 

59 DimensionUniverse, 

60 Quantum, 

61 Registry, 

62) 

63from lsst.daf.butler.core.datasets.type import DatasetType 

64from lsst.daf.butler.registry import RegistryConfig 

65from lsst.pipe.base import ( 

66 Instrument, 

67 Pipeline, 

68 PipelineTaskConfig, 

69 PipelineTaskConnections, 

70 QuantumGraph, 

71 TaskDef, 

72) 

73from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

74from lsst.pipe.base.script import transfer_from_graph 

75from lsst.pipe.base.tests.simpleQGraph import ( 

76 AddTask, 

77 AddTaskFactoryMock, 

78 makeSimpleButler, 

79 makeSimplePipeline, 

80 makeSimpleQGraph, 

81 populateButler, 

82) 

83from lsst.utils.tests import temporaryDirectory 

84 

85logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

86 

87# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

88# instrument from registry, these tests can run fine without actual instrument 

89# and implementing full mock for Instrument is too complicated. 

90Instrument.fromName = lambda name, reg: None 90 ↛ exitline 90 didn't run the lambda on line 90

91 

92 

93@contextlib.contextmanager 

94def makeTmpFile(contents=None, suffix=None): 

95 """Context manager for generating temporary file name. 

96 

97 Temporary file is deleted on exiting context. 

98 

99 Parameters 

100 ---------- 

101 contents : `bytes` 

102 Data to write into a file. 

103 """ 

104 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

105 if contents: 

106 os.write(fd, contents) 

107 os.close(fd) 

108 yield tmpname 

109 with contextlib.suppress(OSError): 

110 os.remove(tmpname) 

111 

112 

113@contextlib.contextmanager 

114def makeSQLiteRegistry(create=True, universe=None): 

115 """Context manager to create new empty registry database. 

116 

117 Yields 

118 ------ 

119 config : `RegistryConfig` 

120 Registry configuration for initialized registry database. 

121 """ 

122 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

123 with temporaryDirectory() as tmpdir: 

124 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

125 config = RegistryConfig() 

126 config["db"] = uri 

127 if create: 

128 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

129 yield config 

130 

131 

132class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

133 """Test connection class.""" 

134 

135 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

136 

137 

138class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

139 """Test pipeline config.""" 

140 

141 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

142 

143 def setDefaults(self): 

144 PipelineTaskConfig.setDefaults(self) 

145 

146 

147def _makeArgs(registryConfig=None, **kwargs): 

148 """Return parsed command line arguments. 

149 

150 By default butler_config is set to `Config` populated with some defaults, 

151 it can be overridden completely by keyword argument. 

152 

153 Parameters 

154 ---------- 

155 cmd : `str`, optional 

156 Produce arguments for this pipetask command. 

157 registryConfig : `RegistryConfig`, optional 

158 Override for registry configuration. 

159 **kwargs 

160 Overrides for other arguments. 

161 """ 

162 # Use a mock to get the default value of arguments to 'run'. 

163 

164 mock = unittest.mock.Mock() 

165 

166 @click.command(cls=PipetaskCommand) 

167 @run_options() 

168 def fake_run(ctx, **kwargs): 

169 """Fake "pipetask run" command for gathering input arguments. 

170 

171 The arguments & options should always match the arguments & options in 

172 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

173 """ 

174 mock(**kwargs) 

175 

176 runner = click.testing.CliRunner() 

177 # --butler-config is the only required option 

178 result = runner.invoke(fake_run, "--butler-config /") 

179 if result.exit_code != 0: 

180 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

181 mock.assert_called_once() 

182 args = mock.call_args[1] 

183 args["enableLsstDebug"] = args.pop("debug") 

184 args["execution_butler_location"] = args.pop("save_execution_butler") 

185 if "pipeline_actions" not in args: 

186 args["pipeline_actions"] = [] 

187 args = SimpleNamespace(**args) 

188 

189 # override butler_config with our defaults 

190 if "butler_config" not in kwargs: 

191 args.butler_config = Config() 

192 if registryConfig: 

193 args.butler_config["registry"] = registryConfig 

194 # The default datastore has a relocatable root, so we need to specify 

195 # some root here for it to use 

196 args.butler_config.configFile = "." 

197 

198 # override arguments from keyword parameters 

199 for key, value in kwargs.items(): 

200 setattr(args, key, value) 

201 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

202 return args 

203 

204 

205class FakeDSType(NamedTuple): 

206 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

207 

208 name: str 

209 

210 

211@dataclass(frozen=True) 

212class FakeDSRef: 

213 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

214 

215 datasetType: str 

216 dataId: tuple 

217 

218 def isComponent(self): 

219 return False 

220 

221 

222# Task class name used by tests, needs to be importable 

223_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

224 

225 

226def _makeDimensionConfig(): 

227 """Make a simple dimension universe configuration.""" 

228 return DimensionConfig( 

229 { 

230 "version": 1, 

231 "namespace": "ctrl_mpexec_test", 

232 "skypix": { 

233 "common": "htm7", 

234 "htm": { 

235 "class": "lsst.sphgeom.HtmPixelization", 

236 "max_level": 24, 

237 }, 

238 }, 

239 "elements": { 

240 "A": { 

241 "keys": [ 

242 { 

243 "name": "id", 

244 "type": "int", 

245 } 

246 ], 

247 "storage": { 

248 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

249 }, 

250 }, 

251 "B": { 

252 "keys": [ 

253 { 

254 "name": "id", 

255 "type": "int", 

256 } 

257 ], 

258 "storage": { 

259 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

260 }, 

261 }, 

262 }, 

263 "packers": {}, 

264 } 

265 ) 

266 

267 

268def _makeQGraph(): 

269 """Make a trivial QuantumGraph with one quantum. 

270 

271 The only thing that we need to do with this quantum graph is to pickle 

272 it, the quanta in this graph are not usable for anything else. 

273 

274 Returns 

275 ------- 

276 qgraph : `~lsst.pipe.base.QuantumGraph` 

277 """ 

278 universe = DimensionUniverse(config=_makeDimensionConfig()) 

279 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

280 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

281 quanta = [ 

282 Quantum( 

283 taskName=_TASK_CLASS, 

284 inputs={ 

285 fakeDSType: [ 

286 DatasetRef( 

287 fakeDSType, 

288 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

289 run="fake_run", 

290 ) 

291 ] 

292 }, 

293 ) 

294 ] # type: ignore 

295 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

296 return qgraph 

297 

298 

299class CmdLineFwkTestCase(unittest.TestCase): 

300 """A test case for CmdLineFwk""" 

301 

302 def testMakePipeline(self): 

303 """Tests for CmdLineFwk.makePipeline method""" 

304 fwk = CmdLineFwk() 

305 

306 # make empty pipeline 

307 args = _makeArgs() 

308 pipeline = fwk.makePipeline(args) 

309 self.assertIsInstance(pipeline, Pipeline) 

310 self.assertEqual(len(pipeline), 0) 

311 

312 # few tests with serialization 

313 with makeTmpFile() as tmpname: 

314 # make empty pipeline and store it in a file 

315 args = _makeArgs(save_pipeline=tmpname) 

316 pipeline = fwk.makePipeline(args) 

317 self.assertIsInstance(pipeline, Pipeline) 

318 

319 # read pipeline from a file 

320 args = _makeArgs(pipeline=tmpname) 

321 pipeline = fwk.makePipeline(args) 

322 self.assertIsInstance(pipeline, Pipeline) 

323 self.assertEqual(len(pipeline), 0) 

324 

325 # single task pipeline, task name can be anything here 

326 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

327 args = _makeArgs(pipeline_actions=actions) 

328 pipeline = fwk.makePipeline(args) 

329 self.assertIsInstance(pipeline, Pipeline) 

330 self.assertEqual(len(pipeline), 1) 

331 

332 # many task pipeline 

333 actions = [ 

334 _ACTION_ADD_TASK("TaskOne:task1a"), 

335 _ACTION_ADD_TASK("TaskTwo:task2"), 

336 _ACTION_ADD_TASK("TaskOne:task1b"), 

337 ] 

338 args = _makeArgs(pipeline_actions=actions) 

339 pipeline = fwk.makePipeline(args) 

340 self.assertIsInstance(pipeline, Pipeline) 

341 self.assertEqual(len(pipeline), 3) 

342 

343 # single task pipeline with config overrides, need real task class 

344 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

345 args = _makeArgs(pipeline_actions=actions) 

346 pipeline = fwk.makePipeline(args) 

347 taskDefs = list(pipeline.toExpandedPipeline()) 

348 self.assertEqual(len(taskDefs), 1) 

349 self.assertEqual(taskDefs[0].config.addend, 100) 

350 

351 overrides = b"config.addend = 1000\n" 

352 with makeTmpFile(overrides) as tmpname: 

353 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

354 args = _makeArgs(pipeline_actions=actions) 

355 pipeline = fwk.makePipeline(args) 

356 taskDefs = list(pipeline.toExpandedPipeline()) 

357 self.assertEqual(len(taskDefs), 1) 

358 self.assertEqual(taskDefs[0].config.addend, 1000) 

359 

360 # Check --instrument option, for now it only checks that it does not 

361 # crash. 

362 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

363 args = _makeArgs(pipeline_actions=actions) 

364 pipeline = fwk.makePipeline(args) 

365 

366 def testMakeGraphFromSave(self): 

367 """Tests for CmdLineFwk.makeGraph method. 

368 

369 Only most trivial case is tested that does not do actual graph 

370 building. 

371 """ 

372 fwk = CmdLineFwk() 

373 

374 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

375 # make non-empty graph and store it in a file 

376 qgraph = _makeQGraph() 

377 with open(tmpname, "wb") as saveFile: 

378 qgraph.save(saveFile) 

379 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

380 qgraph = fwk.makeGraph(None, args) 

381 self.assertIsInstance(qgraph, QuantumGraph) 

382 self.assertEqual(len(qgraph), 1) 

383 

384 # will fail if graph id does not match 

385 args = _makeArgs( 

386 qgraph=tmpname, 

387 qgraph_id="R2-D2 is that you?", 

388 registryConfig=registryConfig, 

389 execution_butler_location=None, 

390 ) 

391 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

392 fwk.makeGraph(None, args) 

393 

394 # save with wrong object type 

395 with open(tmpname, "wb") as saveFile: 

396 pickle.dump({}, saveFile) 

397 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

398 with self.assertRaises(ValueError): 

399 fwk.makeGraph(None, args) 

400 

401 # reading empty graph from pickle should work but makeGraph() 

402 # will return None. 

403 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

404 with open(tmpname, "wb") as saveFile: 

405 qgraph.save(saveFile) 

406 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

407 qgraph = fwk.makeGraph(None, args) 

408 self.assertIs(qgraph, None) 

409 

410 def testShowPipeline(self): 

411 """Test for --show options for pipeline.""" 

412 fwk = CmdLineFwk() 

413 

414 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

415 args = _makeArgs(pipeline_actions=actions) 

416 pipeline = fwk.makePipeline(args) 

417 

418 with self.assertRaises(ValueError): 

419 ShowInfo(["unrecognized", "config"]) 

420 

421 stream = StringIO() 

422 show = ShowInfo( 

423 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

424 stream=stream, 

425 ) 

426 show.show_pipeline_info(pipeline) 

427 self.assertEqual(show.unhandled, frozenset({})) 

428 stream.seek(0) 

429 output = stream.read() 

430 self.assertIn("config.addend=100", output) # config option 

431 self.assertIn("addend\n3", output) # History output 

432 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

433 

434 show = ShowInfo(["pipeline", "uri"], stream=stream) 

435 show.show_pipeline_info(pipeline) 

436 self.assertEqual(show.unhandled, frozenset({"uri"})) 

437 self.assertEqual(show.handled, {"pipeline"}) 

438 

439 stream = StringIO() 

440 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

441 show.show_pipeline_info(pipeline) 

442 stream.seek(0) 

443 output = stream.read().strip() 

444 self.assertEqual("### Configuration for task `task'", output) 

445 

446 stream = StringIO() 

447 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

448 show.show_pipeline_info(pipeline) 

449 stream.seek(0) 

450 output = stream.read().strip() 

451 self.assertEqual("### Configuration for task `task'", output) 

452 

453 stream = StringIO() 

454 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

455 show.show_pipeline_info(pipeline) 

456 stream.seek(0) 

457 output = stream.read().strip() 

458 self.assertIn("NOIGNORECASE", output) 

459 

460 show = ShowInfo(["dump-config=notask"]) 

461 with self.assertRaises(ValueError) as cm: 

462 show.show_pipeline_info(pipeline) 

463 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

464 

465 show = ShowInfo(["history"]) 

466 with self.assertRaises(ValueError) as cm: 

467 show.show_pipeline_info(pipeline) 

468 self.assertIn("Please provide a value", str(cm.exception)) 

469 

470 show = ShowInfo(["history=notask::param"]) 

471 with self.assertRaises(ValueError) as cm: 

472 show.show_pipeline_info(pipeline) 

473 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

474 

475 def test_execution_resources_parameters(self) -> None: 

476 """Test creation of the ExecutionResources from command line.""" 

477 fwk = CmdLineFwk() 

478 

479 for params, num_cores, max_mem in ( 

480 ((None, None), 1, None), 

481 ((5, ""), 5, None), 

482 ((None, "50"), 1, 50 * u.MB), 

483 ((5, "50 GB"), 5, 50 * u.GB), 

484 ): 

485 kwargs = {} 

486 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

487 if v is not None: 

488 kwargs[k] = v 

489 args = _makeArgs(**kwargs) 

490 res = fwk._make_execution_resources(args) 

491 self.assertEqual(res.num_cores, num_cores) 

492 self.assertEqual(res.max_mem, max_mem) 

493 

494 args = _makeArgs(memory_per_quantum="50m") 

495 with self.assertRaises(u.UnitConversionError): 

496 fwk._make_execution_resources(args) 

497 

498 

499class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

500 """A test case for CmdLineFwk""" 

501 

502 def setUp(self): 

503 super().setUpClass() 

504 self.root = tempfile.mkdtemp() 

505 self.nQuanta = 5 

506 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

507 

508 def tearDown(self): 

509 shutil.rmtree(self.root, ignore_errors=True) 

510 super().tearDownClass() 

511 

512 def testSimpleQGraph(self): 

513 """Test successfull execution of trivial quantum graph.""" 

514 args = _makeArgs(butler_config=self.root, input="test", output="output") 

515 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

516 populateButler(self.pipeline, butler) 

517 

518 fwk = CmdLineFwk() 

519 taskFactory = AddTaskFactoryMock() 

520 

521 qgraph = fwk.makeGraph(self.pipeline, args) 

522 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

523 self.assertEqual(len(qgraph), self.nQuanta) 

524 

525 # Ensure that the output run used in the graph is also used in 

526 # the pipeline execution. It is possible for makeGraph and runPipeline 

527 # to calculate time-stamped runs across a second boundary. 

528 args.output_run = qgraph.metadata["output_run"] 

529 

530 # run whole thing 

531 fwk.runPipeline(qgraph, taskFactory, args) 

532 self.assertEqual(taskFactory.countExec, self.nQuanta) 

533 

534 # test that we've disabled implicit threading 

535 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

536 

537 def testSimpleQGraph_rebase(self): 

538 """Test successful execution of trivial quantum graph, with --rebase 

539 used to force redefinition of the output collection. 

540 """ 

541 # Pass one input collection here for the usual test setup; we'll 

542 # override it later. 

543 args = _makeArgs(butler_config=self.root, input="test1", output="output") 

544 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

545 populateButler(self.pipeline, butler) 

546 

547 fwk = CmdLineFwk() 

548 taskFactory = AddTaskFactoryMock() 

549 

550 # We'll actually pass two input collections in. One is empty, but 

551 # the stuff we're testing here doesn't care. 

552 args.input = ["test2", "test1"] 

553 butler.registry.registerCollection("test2", CollectionType.RUN) 

554 

555 # Set up the output collection with a sequence that doesn't end the 

556 # same way as the input collection. This is normally an error. 

557 butler.registry.registerCollection("output", CollectionType.CHAINED) 

558 butler.registry.registerCollection("unexpected_input", CollectionType.RUN) 

559 butler.registry.registerCollection("output/run0", CollectionType.RUN) 

560 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"]) 

561 

562 # Without --rebase, the inconsistent input and output collections are 

563 # an error. 

564 with self.assertRaises(ValueError): 

565 fwk.makeGraph(self.pipeline, args) 

566 

567 # With --rebase, the output collection gets redefined. 

568 args.rebase = True 

569 qgraph = fwk.makeGraph(self.pipeline, args) 

570 

571 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

572 self.assertEqual(len(qgraph), self.nQuanta) 

573 

574 # Ensure that the output run used in the graph is also used in 

575 # the pipeline execution. It is possible for makeGraph and runPipeline 

576 # to calculate time-stamped runs across a second boundary. 

577 args.output_run = qgraph.metadata["output_run"] 

578 

579 fwk.runPipeline(qgraph, taskFactory, args) 

580 self.assertEqual(taskFactory.countExec, self.nQuanta) 

581 

582 butler.registry.refresh() 

583 self.assertEqual( 

584 list(butler.registry.getCollectionChain("output")), 

585 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"], 

586 ) 

587 

588 def test_simple_qgraph_qbb(self): 

589 """Test successful execution of trivial quantum graph in QBB mode.""" 

590 args = _makeArgs( 

591 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

592 ) 

593 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

594 populateButler(self.pipeline, butler) 

595 

596 fwk = CmdLineFwk() 

597 taskFactory = AddTaskFactoryMock() 

598 

599 qgraph = fwk.makeGraph(self.pipeline, args) 

600 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

601 self.assertEqual(len(qgraph), self.nQuanta) 

602 

603 # Ensure that the output run used in the graph is also used in 

604 # the pipeline execution. It is possible for makeGraph and runPipeline 

605 # to calculate time-stamped runs across a second boundary. 

606 output_run = qgraph.metadata["output_run"] 

607 args.output_run = output_run 

608 

609 # QBB must run from serialized graph. 

610 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

611 qgraph.saveUri(temp_graph.name) 

612 

613 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

614 

615 # Check that pre-exec-init can run. 

616 fwk.preExecInitQBB(taskFactory, args) 

617 

618 # Run whole thing. 

619 fwk.runGraphQBB(taskFactory, args) 

620 

621 # Transfer the datasets to the butler. 

622 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

623 self.assertEqual(n1, 31) 

624 

625 self.assertEqual(taskFactory.countExec, self.nQuanta) 

626 

627 # Update the output run and try again. 

628 new_output_run = output_run + "_new" 

629 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True) 

630 self.assertEqual(qgraph.metadata["output_run"], new_output_run) 

631 

632 taskFactory = AddTaskFactoryMock() 

633 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

634 qgraph.saveUri(temp_graph.name) 

635 

636 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

637 

638 # Check that pre-exec-init can run. 

639 fwk.preExecInitQBB(taskFactory, args) 

640 

641 # Run whole thing. 

642 fwk.runGraphQBB(taskFactory, args) 

643 

644 # Transfer the datasets to the butler. 

645 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

646 self.assertEqual(n1, n2) 

647 

648 def testEmptyQGraph(self): 

649 """Test that making an empty QG produces the right error messages.""" 

650 # We make QG generation fail by populating one input collection in the 

651 # butler while using a different one (that we only register, not 

652 # populate) to make the QG. 

653 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

654 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

655 butler.registry.registerCollection("bad_input") 

656 populateButler(self.pipeline, butler) 

657 

658 fwk = CmdLineFwk() 

659 with self.assertLogs(level=logging.CRITICAL) as cm: 

660 qgraph = fwk.makeGraph(self.pipeline, args) 

661 self.assertRegex( 

662 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

663 ) 

664 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

665 self.assertIsNone(qgraph) 

666 

667 def testSimpleQGraphNoSkipExisting_inputs(self): 

668 """Test for case when output data for one task already appears in 

669 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

670 option is present. 

671 """ 

672 args = _makeArgs( 

673 butler_config=self.root, 

674 input="test", 

675 output="output", 

676 ) 

677 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

678 populateButler( 

679 self.pipeline, 

680 butler, 

681 datasetTypes={ 

682 args.input: [ 

683 "add_dataset0", 

684 "add_dataset1", 

685 "add2_dataset1", 

686 "add_init_output1", 

687 "task0_config", 

688 "task0_metadata", 

689 "task0_log", 

690 ] 

691 }, 

692 ) 

693 

694 fwk = CmdLineFwk() 

695 taskFactory = AddTaskFactoryMock() 

696 

697 qgraph = fwk.makeGraph(self.pipeline, args) 

698 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

699 # With current implementation graph has all nQuanta quanta, but when 

700 # executing one quantum is skipped. 

701 self.assertEqual(len(qgraph), self.nQuanta) 

702 

703 # Ensure that the output run used in the graph is also used in 

704 # the pipeline execution. It is possible for makeGraph and runPipeline 

705 # to calculate time-stamped runs across a second boundary. 

706 args.output_run = qgraph.metadata["output_run"] 

707 

708 # run whole thing 

709 fwk.runPipeline(qgraph, taskFactory, args) 

710 self.assertEqual(taskFactory.countExec, self.nQuanta) 

711 

712 def testSimpleQGraphSkipExisting_inputs(self): 

713 """Test for ``--skip-existing`` with output data for one task already 

714 appears in _input_ collection. No ``--extend-run`` option is needed 

715 for this case. 

716 """ 

717 args = _makeArgs( 

718 butler_config=self.root, 

719 input="test", 

720 output="output", 

721 skip_existing_in=("test",), 

722 ) 

723 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

724 populateButler( 

725 self.pipeline, 

726 butler, 

727 datasetTypes={ 

728 args.input: [ 

729 "add_dataset0", 

730 "add_dataset1", 

731 "add2_dataset1", 

732 "add_init_output1", 

733 "task0_config", 

734 "task0_metadata", 

735 "task0_log", 

736 ] 

737 }, 

738 ) 

739 

740 fwk = CmdLineFwk() 

741 taskFactory = AddTaskFactoryMock() 

742 

743 qgraph = fwk.makeGraph(self.pipeline, args) 

744 # If all quanta are skipped, the task is not included in the graph. 

745 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

746 self.assertEqual(len(qgraph), self.nQuanta - 1) 

747 

748 # Ensure that the output run used in the graph is also used in 

749 # the pipeline execution. It is possible for makeGraph and runPipeline 

750 # to calculate time-stamped runs across a second boundary. 

751 args.output_run = qgraph.metadata["output_run"] 

752 

753 # run whole thing 

754 fwk.runPipeline(qgraph, taskFactory, args) 

755 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

756 

757 def testSimpleQGraphSkipExisting_outputs(self): 

758 """Test for ``--skip-existing`` with output data for one task already 

759 appears in _output_ collection. The ``--extend-run`` option is needed 

760 for this case. 

761 """ 

762 args = _makeArgs( 

763 butler_config=self.root, 

764 input="test", 

765 output_run="output/run", 

766 skip_existing_in=("output/run",), 

767 ) 

768 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

769 populateButler( 

770 self.pipeline, 

771 butler, 

772 datasetTypes={ 

773 args.input: ["add_dataset0"], 

774 args.output_run: [ 

775 "add_dataset1", 

776 "add2_dataset1", 

777 "add_init_output1", 

778 "task0_metadata", 

779 "task0_log", 

780 "task0_config", 

781 ], 

782 }, 

783 ) 

784 

785 fwk = CmdLineFwk() 

786 taskFactory = AddTaskFactoryMock() 

787 

788 # fails without --extend-run 

789 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

790 qgraph = fwk.makeGraph(self.pipeline, args) 

791 

792 # retry with --extend-run 

793 args.extend_run = True 

794 qgraph = fwk.makeGraph(self.pipeline, args) 

795 

796 # First task has no remaining quanta, so is left out completely. 

797 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

798 # Graph does not include quantum for first task. 

799 self.assertEqual(len(qgraph), self.nQuanta - 1) 

800 

801 # run whole thing 

802 fwk.runPipeline(qgraph, taskFactory, args) 

803 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

804 

805 def testSimpleQGraphOutputsFail(self): 

806 """Test continuing execution of trivial quantum graph with partial 

807 outputs. 

808 """ 

809 args = _makeArgs(butler_config=self.root, input="test", output="output") 

810 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

811 populateButler(self.pipeline, butler) 

812 

813 fwk = CmdLineFwk() 

814 taskFactory = AddTaskFactoryMock(stopAt=3) 

815 

816 qgraph = fwk.makeGraph(self.pipeline, args) 

817 self.assertEqual(len(qgraph), self.nQuanta) 

818 

819 # Ensure that the output run used in the graph is also used in 

820 # the pipeline execution. It is possible for makeGraph and runPipeline 

821 # to calculate time-stamped runs across a second boundary. 

822 args.output_run = qgraph.metadata["output_run"] 

823 

824 # run first three quanta 

825 with self.assertRaises(MPGraphExecutorError): 

826 fwk.runPipeline(qgraph, taskFactory, args) 

827 self.assertEqual(taskFactory.countExec, 3) 

828 

829 butler.registry.refresh() 

830 

831 # drop one of the two outputs from one task 

832 ref1 = butler.registry.findDataset( 

833 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

834 ) 

835 self.assertIsNotNone(ref1) 

836 # also drop the metadata output 

837 ref2 = butler.registry.findDataset( 

838 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

839 ) 

840 self.assertIsNotNone(ref2) 

841 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

842 

843 # Ensure that the output run used in the graph is also used in 

844 # the pipeline execution. It is possible for makeGraph and runPipeline 

845 # to calculate time-stamped runs across a second boundary. 

846 args.output_run = qgraph.metadata["output_run"] 

847 

848 taskFactory.stopAt = -1 

849 args.skip_existing_in = (args.output,) 

850 args.extend_run = True 

851 args.no_versions = True 

852 with self.assertRaises(MPGraphExecutorError): 

853 fwk.runPipeline(qgraph, taskFactory, args) 

854 

855 def testSimpleQGraphClobberOutputs(self): 

856 """Test continuing execution of trivial quantum graph with 

857 --clobber-outputs. 

858 """ 

859 args = _makeArgs(butler_config=self.root, input="test", output="output") 

860 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

861 populateButler(self.pipeline, butler) 

862 

863 fwk = CmdLineFwk() 

864 taskFactory = AddTaskFactoryMock(stopAt=3) 

865 

866 qgraph = fwk.makeGraph(self.pipeline, args) 

867 

868 # should have one task and number of quanta 

869 self.assertEqual(len(qgraph), self.nQuanta) 

870 

871 # Ensure that the output run used in the graph is also used in 

872 # the pipeline execution. It is possible for makeGraph and runPipeline 

873 # to calculate time-stamped runs across a second boundary. 

874 args.output_run = qgraph.metadata["output_run"] 

875 

876 # run first three quanta 

877 with self.assertRaises(MPGraphExecutorError): 

878 fwk.runPipeline(qgraph, taskFactory, args) 

879 self.assertEqual(taskFactory.countExec, 3) 

880 

881 butler.registry.refresh() 

882 

883 # drop one of the two outputs from one task 

884 ref1 = butler.registry.findDataset( 

885 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

886 ) 

887 self.assertIsNotNone(ref1) 

888 # also drop the metadata output 

889 ref2 = butler.registry.findDataset( 

890 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

891 ) 

892 self.assertIsNotNone(ref2) 

893 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

894 

895 taskFactory.stopAt = -1 

896 args.skip_existing = True 

897 args.extend_run = True 

898 args.clobber_outputs = True 

899 args.no_versions = True 

900 fwk.runPipeline(qgraph, taskFactory, args) 

901 # number of executed quanta is incremented 

902 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

903 

904 def testSimpleQGraphReplaceRun(self): 

905 """Test repeated execution of trivial quantum graph with 

906 --replace-run. 

907 """ 

908 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

909 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

910 populateButler(self.pipeline, butler) 

911 

912 fwk = CmdLineFwk() 

913 taskFactory = AddTaskFactoryMock() 

914 

915 qgraph = fwk.makeGraph(self.pipeline, args) 

916 

917 # should have one task and number of quanta 

918 self.assertEqual(len(qgraph), self.nQuanta) 

919 

920 # deep copy is needed because quanta are updated in place 

921 fwk.runPipeline(qgraph, taskFactory, args) 

922 self.assertEqual(taskFactory.countExec, self.nQuanta) 

923 

924 # need to refresh collections explicitly (or make new butler/registry) 

925 butler.registry.refresh() 

926 collections = set(butler.registry.queryCollections(...)) 

927 self.assertEqual(collections, {"test", "output", "output/run1"}) 

928 

929 # number of datasets written by pipeline: 

930 # - nQuanta of init_outputs 

931 # - nQuanta of configs 

932 # - packages (single dataset) 

933 # - nQuanta * two output datasets 

934 # - nQuanta of metadata 

935 # - nQuanta of log output 

936 n_outputs = self.nQuanta * 6 + 1 

937 refs = butler.registry.queryDatasets(..., collections="output/run1") 

938 self.assertEqual(len(list(refs)), n_outputs) 

939 

940 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

941 # changed) 

942 args.replace_run = True 

943 args.output_run = "output/run2" 

944 qgraph = fwk.makeGraph(self.pipeline, args) 

945 fwk.runPipeline(qgraph, taskFactory, args) 

946 

947 butler.registry.refresh() 

948 collections = set(butler.registry.queryCollections(...)) 

949 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

950 

951 # new output collection 

952 refs = butler.registry.queryDatasets(..., collections="output/run2") 

953 self.assertEqual(len(list(refs)), n_outputs) 

954 

955 # old output collection is still there 

956 refs = butler.registry.queryDatasets(..., collections="output/run1") 

957 self.assertEqual(len(list(refs)), n_outputs) 

958 

959 # re-run with --replace-run and --prune-replaced=unstore 

960 args.replace_run = True 

961 args.prune_replaced = "unstore" 

962 args.output_run = "output/run3" 

963 qgraph = fwk.makeGraph(self.pipeline, args) 

964 fwk.runPipeline(qgraph, taskFactory, args) 

965 

966 butler.registry.refresh() 

967 collections = set(butler.registry.queryCollections(...)) 

968 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

969 

970 # new output collection 

971 refs = butler.registry.queryDatasets(..., collections="output/run3") 

972 self.assertEqual(len(list(refs)), n_outputs) 

973 

974 # old output collection is still there, and it has all datasets but 

975 # non-InitOutputs are not in datastore 

976 refs = butler.registry.queryDatasets(..., collections="output/run2") 

977 refs = list(refs) 

978 self.assertEqual(len(refs), n_outputs) 

979 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

980 for ref in refs: 

981 if initOutNameRe.fullmatch(ref.datasetType.name): 

982 butler.get(ref) 

983 else: 

984 with self.assertRaises(FileNotFoundError): 

985 butler.get(ref) 

986 

987 # re-run with --replace-run and --prune-replaced=purge 

988 # This time also remove --input; passing the same inputs that we 

989 # started with and not passing inputs at all should be equivalent. 

990 args.input = None 

991 args.replace_run = True 

992 args.prune_replaced = "purge" 

993 args.output_run = "output/run4" 

994 qgraph = fwk.makeGraph(self.pipeline, args) 

995 fwk.runPipeline(qgraph, taskFactory, args) 

996 

997 butler.registry.refresh() 

998 collections = set(butler.registry.queryCollections(...)) 

999 # output/run3 should disappear now 

1000 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1001 

1002 # new output collection 

1003 refs = butler.registry.queryDatasets(..., collections="output/run4") 

1004 self.assertEqual(len(list(refs)), n_outputs) 

1005 

1006 # Trying to run again with inputs that aren't exactly what we started 

1007 # with is an error, and the kind that should not modify the data repo. 

1008 with self.assertRaises(ValueError): 

1009 args.input = ["test", "output/run2"] 

1010 args.prune_replaced = None 

1011 args.replace_run = True 

1012 args.output_run = "output/run5" 

1013 qgraph = fwk.makeGraph(self.pipeline, args) 

1014 fwk.runPipeline(qgraph, taskFactory, args) 

1015 butler.registry.refresh() 

1016 collections = set(butler.registry.queryCollections(...)) 

1017 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1018 with self.assertRaises(ValueError): 

1019 args.input = ["output/run2", "test"] 

1020 args.prune_replaced = None 

1021 args.replace_run = True 

1022 args.output_run = "output/run6" 

1023 qgraph = fwk.makeGraph(self.pipeline, args) 

1024 fwk.runPipeline(qgraph, taskFactory, args) 

1025 butler.registry.refresh() 

1026 collections = set(butler.registry.queryCollections(...)) 

1027 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1028 

1029 def testSubgraph(self): 

1030 """Test successful execution of trivial quantum graph.""" 

1031 args = _makeArgs(butler_config=self.root, input="test", output="output") 

1032 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1033 populateButler(self.pipeline, butler) 

1034 

1035 fwk = CmdLineFwk() 

1036 qgraph = fwk.makeGraph(self.pipeline, args) 

1037 

1038 # Select first two nodes for execution. This depends on node ordering 

1039 # which I assume is the same as execution order. 

1040 nNodes = 2 

1041 nodeIds = [node.nodeId for node in qgraph] 

1042 nodeIds = nodeIds[:nNodes] 

1043 

1044 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

1045 self.assertEqual(len(qgraph), self.nQuanta) 

1046 

1047 with ( 

1048 makeTmpFile(suffix=".qgraph") as tmpname, 

1049 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

1050 ): 

1051 with open(tmpname, "wb") as saveFile: 

1052 qgraph.save(saveFile) 

1053 

1054 args = _makeArgs( 

1055 qgraph=tmpname, 

1056 qgraph_node_id=nodeIds, 

1057 registryConfig=registryConfig, 

1058 execution_butler_location=None, 

1059 ) 

1060 fwk = CmdLineFwk() 

1061 

1062 # load graph, should only read a subset 

1063 qgraph = fwk.makeGraph(pipeline=None, args=args) 

1064 self.assertEqual(len(qgraph), nNodes) 

1065 

1066 def testShowGraph(self): 

1067 """Test for --show options for quantum graph.""" 

1068 nQuanta = 2 

1069 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1070 

1071 show = ShowInfo(["graph"]) 

1072 show.show_graph_info(qgraph) 

1073 self.assertEqual(show.handled, {"graph"}) 

1074 

1075 def testShowGraphWorkflow(self): 

1076 nQuanta = 2 

1077 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1078 

1079 show = ShowInfo(["workflow"]) 

1080 show.show_graph_info(qgraph) 

1081 self.assertEqual(show.handled, {"workflow"}) 

1082 

1083 # TODO: cannot test "uri" option presently, it instantiates 

1084 # butler from command line options and there is no way to pass butler 

1085 # mock to that code. 

1086 show = ShowInfo(["uri"]) 

1087 with self.assertRaises(ValueError): # No args given 

1088 show.show_graph_info(qgraph) 

1089 

1090 def testSimpleQGraphDatastoreRecords(self): 

1091 """Test quantum graph generation with --qgraph-datastore-records.""" 

1092 args = _makeArgs( 

1093 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1094 ) 

1095 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1096 populateButler(self.pipeline, butler) 

1097 

1098 fwk = CmdLineFwk() 

1099 qgraph = fwk.makeGraph(self.pipeline, args) 

1100 self.assertEqual(len(qgraph), self.nQuanta) 

1101 for i, qnode in enumerate(qgraph): 

1102 quantum = qnode.quantum 

1103 self.assertIsNotNone(quantum.datastore_records) 

1104 # only the first quantum has a pre-existing input 

1105 if i == 0: 

1106 datastore_name = "FileDatastore@<butlerRoot>" 

1107 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1108 records_data = quantum.datastore_records[datastore_name] 

1109 records = dict(records_data.records) 

1110 self.assertEqual(len(records), 1) 

1111 _, records = records.popitem() 

1112 records = records["file_datastore_records"] 

1113 self.assertEqual( 

1114 [record.path for record in records], 

1115 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1116 ) 

1117 else: 

1118 self.assertEqual(quantum.datastore_records, {}) 

1119 

1120 

1121class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1122 """General file leak detection.""" 

1123 

1124 

1125def setup_module(module): 

1126 """Initialize pytest module.""" 

1127 lsst.utils.tests.init() 

1128 

1129 

1130if __name__ == "__main__": 

1131 lsst.utils.tests.init() 

1132 unittest.main()