Coverage for tests/test_cmdLineFwk.py: 13%

549 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-18 09:41 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Simple unit test for cmdLineFwk module. 

29""" 

30 

31import contextlib 

32import logging 

33import os 

34import pickle 

35import re 

36import shutil 

37import tempfile 

38import unittest 

39from dataclasses import dataclass 

40from io import StringIO 

41from types import SimpleNamespace 

42from typing import NamedTuple 

43 

44import astropy.units as u 

45import click 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base.connectionTypes as cT 

48import lsst.utils.tests 

49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

50from lsst.ctrl.mpexec.cli.opt import run_options 

51from lsst.ctrl.mpexec.cli.utils import ( 

52 _ACTION_ADD_INSTRUMENT, 

53 _ACTION_ADD_TASK, 

54 _ACTION_CONFIG, 

55 _ACTION_CONFIG_FILE, 

56 PipetaskCommand, 

57) 

58from lsst.ctrl.mpexec.showInfo import ShowInfo 

59from lsst.daf.butler import ( 

60 CollectionType, 

61 Config, 

62 DataCoordinate, 

63 DatasetRef, 

64 DatasetType, 

65 DimensionConfig, 

66 DimensionUniverse, 

67 Quantum, 

68 Registry, 

69) 

70from lsst.daf.butler.registry import RegistryConfig 

71from lsst.pipe.base import ( 

72 Instrument, 

73 Pipeline, 

74 PipelineTaskConfig, 

75 PipelineTaskConnections, 

76 QuantumGraph, 

77 TaskDef, 

78) 

79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

80from lsst.pipe.base.script import transfer_from_graph 

81from lsst.pipe.base.tests.simpleQGraph import ( 

82 AddTask, 

83 AddTaskFactoryMock, 

84 makeSimpleButler, 

85 makeSimplePipeline, 

86 makeSimpleQGraph, 

87 populateButler, 

88) 

89from lsst.utils.tests import temporaryDirectory 

90 

91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

92 

93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

94# instrument from registry, these tests can run fine without actual instrument 

95# and implementing full mock for Instrument is too complicated. 

96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96

97 

98 

99@contextlib.contextmanager 

100def makeTmpFile(contents=None, suffix=None): 

101 """Context manager for generating temporary file name. 

102 

103 Temporary file is deleted on exiting context. 

104 

105 Parameters 

106 ---------- 

107 contents : `bytes` 

108 Data to write into a file. 

109 """ 

110 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

111 if contents: 

112 os.write(fd, contents) 

113 os.close(fd) 

114 yield tmpname 

115 with contextlib.suppress(OSError): 

116 os.remove(tmpname) 

117 

118 

119@contextlib.contextmanager 

120def makeSQLiteRegistry(create=True, universe=None): 

121 """Context manager to create new empty registry database. 

122 

123 Yields 

124 ------ 

125 config : `RegistryConfig` 

126 Registry configuration for initialized registry database. 

127 """ 

128 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

129 with temporaryDirectory() as tmpdir: 

130 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

131 config = RegistryConfig() 

132 config["db"] = uri 

133 if create: 

134 Registry.createFromConfig(config, dimensionConfig=dimensionConfig) 

135 yield config 

136 

137 

138class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

139 """Test connection class.""" 

140 

141 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

142 

143 

144class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

145 """Test pipeline config.""" 

146 

147 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

148 

149 def setDefaults(self): 

150 PipelineTaskConfig.setDefaults(self) 

151 

152 

153def _makeArgs(registryConfig=None, **kwargs): 

154 """Return parsed command line arguments. 

155 

156 By default butler_config is set to `Config` populated with some defaults, 

157 it can be overridden completely by keyword argument. 

158 

159 Parameters 

160 ---------- 

161 cmd : `str`, optional 

162 Produce arguments for this pipetask command. 

163 registryConfig : `RegistryConfig`, optional 

164 Override for registry configuration. 

165 **kwargs 

166 Overrides for other arguments. 

167 """ 

168 # Use a mock to get the default value of arguments to 'run'. 

169 

170 mock = unittest.mock.Mock() 

171 

172 @click.command(cls=PipetaskCommand) 

173 @run_options() 

174 def fake_run(ctx, **kwargs): 

175 """Fake "pipetask run" command for gathering input arguments. 

176 

177 The arguments & options should always match the arguments & options in 

178 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

179 """ 

180 mock(**kwargs) 

181 

182 runner = click.testing.CliRunner() 

183 # --butler-config is the only required option 

184 result = runner.invoke(fake_run, "--butler-config /") 

185 if result.exit_code != 0: 

186 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

187 mock.assert_called_once() 

188 args = mock.call_args[1] 

189 args["enableLsstDebug"] = args.pop("debug") 

190 args["execution_butler_location"] = args.pop("save_execution_butler") 

191 if "pipeline_actions" not in args: 

192 args["pipeline_actions"] = [] 

193 args = SimpleNamespace(**args) 

194 

195 # override butler_config with our defaults 

196 if "butler_config" not in kwargs: 

197 args.butler_config = Config() 

198 if registryConfig: 

199 args.butler_config["registry"] = registryConfig 

200 # The default datastore has a relocatable root, so we need to specify 

201 # some root here for it to use 

202 args.butler_config.configFile = "." 

203 

204 # override arguments from keyword parameters 

205 for key, value in kwargs.items(): 

206 setattr(args, key, value) 

207 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

208 return args 

209 

210 

211class FakeDSType(NamedTuple): 

212 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

213 

214 name: str 

215 

216 

217@dataclass(frozen=True) 

218class FakeDSRef: 

219 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

220 

221 datasetType: str 

222 dataId: tuple 

223 

224 def isComponent(self): 

225 return False 

226 

227 

228# Task class name used by tests, needs to be importable 

229_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

230 

231 

232def _makeDimensionConfig(): 

233 """Make a simple dimension universe configuration.""" 

234 return DimensionConfig( 

235 { 

236 "version": 1, 

237 "namespace": "ctrl_mpexec_test", 

238 "skypix": { 

239 "common": "htm7", 

240 "htm": { 

241 "class": "lsst.sphgeom.HtmPixelization", 

242 "max_level": 24, 

243 }, 

244 }, 

245 "elements": { 

246 "A": { 

247 "keys": [ 

248 { 

249 "name": "id", 

250 "type": "int", 

251 } 

252 ], 

253 "storage": { 

254 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

255 }, 

256 }, 

257 "B": { 

258 "keys": [ 

259 { 

260 "name": "id", 

261 "type": "int", 

262 } 

263 ], 

264 "storage": { 

265 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

266 }, 

267 }, 

268 }, 

269 "packers": {}, 

270 } 

271 ) 

272 

273 

274def _makeQGraph(): 

275 """Make a trivial QuantumGraph with one quantum. 

276 

277 The only thing that we need to do with this quantum graph is to pickle 

278 it, the quanta in this graph are not usable for anything else. 

279 

280 Returns 

281 ------- 

282 qgraph : `~lsst.pipe.base.QuantumGraph` 

283 """ 

284 universe = DimensionUniverse(config=_makeDimensionConfig()) 

285 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

286 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

287 quanta = [ 

288 Quantum( 

289 taskName=_TASK_CLASS, 

290 inputs={ 

291 fakeDSType: [ 

292 DatasetRef( 

293 fakeDSType, 

294 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

295 run="fake_run", 

296 ) 

297 ] 

298 }, 

299 ) 

300 ] # type: ignore 

301 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

302 return qgraph 

303 

304 

305class CmdLineFwkTestCase(unittest.TestCase): 

306 """A test case for CmdLineFwk""" 

307 

308 def testMakePipeline(self): 

309 """Tests for CmdLineFwk.makePipeline method""" 

310 fwk = CmdLineFwk() 

311 

312 # make empty pipeline 

313 args = _makeArgs() 

314 pipeline = fwk.makePipeline(args) 

315 self.assertIsInstance(pipeline, Pipeline) 

316 self.assertEqual(len(pipeline), 0) 

317 

318 # few tests with serialization 

319 with makeTmpFile() as tmpname: 

320 # make empty pipeline and store it in a file 

321 args = _makeArgs(save_pipeline=tmpname) 

322 pipeline = fwk.makePipeline(args) 

323 self.assertIsInstance(pipeline, Pipeline) 

324 

325 # read pipeline from a file 

326 args = _makeArgs(pipeline=tmpname) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 0) 

330 

331 # single task pipeline, task name can be anything here 

332 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 self.assertIsInstance(pipeline, Pipeline) 

336 self.assertEqual(len(pipeline), 1) 

337 

338 # many task pipeline 

339 actions = [ 

340 _ACTION_ADD_TASK("TaskOne:task1a"), 

341 _ACTION_ADD_TASK("TaskTwo:task2"), 

342 _ACTION_ADD_TASK("TaskOne:task1b"), 

343 ] 

344 args = _makeArgs(pipeline_actions=actions) 

345 pipeline = fwk.makePipeline(args) 

346 self.assertIsInstance(pipeline, Pipeline) 

347 self.assertEqual(len(pipeline), 3) 

348 

349 # single task pipeline with config overrides, need real task class 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 taskDefs = list(pipeline.toExpandedPipeline()) 

354 self.assertEqual(len(taskDefs), 1) 

355 self.assertEqual(taskDefs[0].config.addend, 100) 

356 

357 overrides = b"config.addend = 1000\n" 

358 with makeTmpFile(overrides) as tmpname: 

359 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

360 args = _makeArgs(pipeline_actions=actions) 

361 pipeline = fwk.makePipeline(args) 

362 taskDefs = list(pipeline.toExpandedPipeline()) 

363 self.assertEqual(len(taskDefs), 1) 

364 self.assertEqual(taskDefs[0].config.addend, 1000) 

365 

366 # Check --instrument option, for now it only checks that it does not 

367 # crash. 

368 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

369 args = _makeArgs(pipeline_actions=actions) 

370 pipeline = fwk.makePipeline(args) 

371 

372 def testMakeGraphFromSave(self): 

373 """Tests for CmdLineFwk.makeGraph method. 

374 

375 Only most trivial case is tested that does not do actual graph 

376 building. 

377 """ 

378 fwk = CmdLineFwk() 

379 

380 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

381 # make non-empty graph and store it in a file 

382 qgraph = _makeQGraph() 

383 with open(tmpname, "wb") as saveFile: 

384 qgraph.save(saveFile) 

385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

386 qgraph = fwk.makeGraph(None, args) 

387 self.assertIsInstance(qgraph, QuantumGraph) 

388 self.assertEqual(len(qgraph), 1) 

389 

390 # will fail if graph id does not match 

391 args = _makeArgs( 

392 qgraph=tmpname, 

393 qgraph_id="R2-D2 is that you?", 

394 registryConfig=registryConfig, 

395 execution_butler_location=None, 

396 ) 

397 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

398 fwk.makeGraph(None, args) 

399 

400 # save with wrong object type 

401 with open(tmpname, "wb") as saveFile: 

402 pickle.dump({}, saveFile) 

403 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

404 with self.assertRaises(ValueError): 

405 fwk.makeGraph(None, args) 

406 

407 # reading empty graph from pickle should work but makeGraph() 

408 # will return None. 

409 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

410 with open(tmpname, "wb") as saveFile: 

411 qgraph.save(saveFile) 

412 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

413 qgraph = fwk.makeGraph(None, args) 

414 self.assertIs(qgraph, None) 

415 

416 def testShowPipeline(self): 

417 """Test for --show options for pipeline.""" 

418 fwk = CmdLineFwk() 

419 

420 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

421 args = _makeArgs(pipeline_actions=actions) 

422 pipeline = fwk.makePipeline(args) 

423 

424 with self.assertRaises(ValueError): 

425 ShowInfo(["unrecognized", "config"]) 

426 

427 stream = StringIO() 

428 show = ShowInfo( 

429 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

430 stream=stream, 

431 ) 

432 show.show_pipeline_info(pipeline, None) 

433 self.assertEqual(show.unhandled, frozenset({})) 

434 stream.seek(0) 

435 output = stream.read() 

436 self.assertIn("config.addend=100", output) # config option 

437 self.assertIn("addend\n3", output) # History output 

438 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

439 

440 show = ShowInfo(["pipeline", "uri"], stream=stream) 

441 show.show_pipeline_info(pipeline, None) 

442 self.assertEqual(show.unhandled, frozenset({"uri"})) 

443 self.assertEqual(show.handled, {"pipeline"}) 

444 

445 stream = StringIO() 

446 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

447 show.show_pipeline_info(pipeline, None) 

448 stream.seek(0) 

449 output = stream.read().strip() 

450 self.assertEqual("### Configuration for task `task'", output) 

451 

452 stream = StringIO() 

453 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

454 show.show_pipeline_info(pipeline, None) 

455 stream.seek(0) 

456 output = stream.read().strip() 

457 self.assertEqual("### Configuration for task `task'", output) 

458 

459 stream = StringIO() 

460 show = ShowInfo(["pipeline-graph"], stream=stream) # No match 

461 show.show_pipeline_info(pipeline, None) 

462 stream.seek(0) 

463 output = stream.read().strip() 

464 self.assertEqual( 

465 "\n".join( 

466 [ 

467 "○ add_dataset_in", 

468 "│", 

469 "■ task", 

470 "│", 

471 "◍ add_dataset_out, add2_dataset_out", 

472 ] 

473 ), 

474 output, 

475 ) 

476 

477 stream = StringIO() 

478 show = ShowInfo(["task-graph"], stream=stream) # No match 

479 show.show_pipeline_info(pipeline, None) 

480 stream.seek(0) 

481 output = stream.read().strip() 

482 self.assertEqual("■ task", output) 

483 

484 stream = StringIO() 

485 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

486 show.show_pipeline_info(pipeline, None) 

487 stream.seek(0) 

488 output = stream.read().strip() 

489 self.assertIn("NOIGNORECASE", output) 

490 

491 show = ShowInfo(["dump-config=notask"]) 

492 with self.assertRaises(ValueError) as cm: 

493 show.show_pipeline_info(pipeline, None) 

494 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

495 

496 show = ShowInfo(["history"]) 

497 with self.assertRaises(ValueError) as cm: 

498 show.show_pipeline_info(pipeline, None) 

499 self.assertIn("Please provide a value", str(cm.exception)) 

500 

501 show = ShowInfo(["history=notask::param"]) 

502 with self.assertRaises(ValueError) as cm: 

503 show.show_pipeline_info(pipeline, None) 

504 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

505 

506 def test_execution_resources_parameters(self) -> None: 

507 """Test creation of the ExecutionResources from command line.""" 

508 fwk = CmdLineFwk() 

509 

510 for params, num_cores, max_mem in ( 

511 ((None, None), 1, None), 

512 ((5, ""), 5, None), 

513 ((None, "50"), 1, 50 * u.MB), 

514 ((5, "50 GB"), 5, 50 * u.GB), 

515 ): 

516 kwargs = {} 

517 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

518 if v is not None: 

519 kwargs[k] = v 

520 args = _makeArgs(**kwargs) 

521 res = fwk._make_execution_resources(args) 

522 self.assertEqual(res.num_cores, num_cores) 

523 self.assertEqual(res.max_mem, max_mem) 

524 

525 args = _makeArgs(memory_per_quantum="50m") 

526 with self.assertRaises(u.UnitConversionError): 

527 fwk._make_execution_resources(args) 

528 

529 

530class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

531 """A test case for CmdLineFwk""" 

532 

533 def setUp(self): 

534 super().setUpClass() 

535 self.root = tempfile.mkdtemp() 

536 self.nQuanta = 5 

537 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

538 

539 def tearDown(self): 

540 shutil.rmtree(self.root, ignore_errors=True) 

541 super().tearDownClass() 

542 

543 def testSimpleQGraph(self): 

544 """Test successfull execution of trivial quantum graph.""" 

545 args = _makeArgs(butler_config=self.root, input="test", output="output") 

546 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

547 populateButler(self.pipeline, butler) 

548 

549 fwk = CmdLineFwk() 

550 taskFactory = AddTaskFactoryMock() 

551 

552 qgraph = fwk.makeGraph(self.pipeline, args) 

553 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

554 self.assertEqual(len(qgraph), self.nQuanta) 

555 

556 # Ensure that the output run used in the graph is also used in 

557 # the pipeline execution. It is possible for makeGraph and runPipeline 

558 # to calculate time-stamped runs across a second boundary. 

559 args.output_run = qgraph.metadata["output_run"] 

560 

561 # run whole thing 

562 fwk.runPipeline(qgraph, taskFactory, args) 

563 self.assertEqual(taskFactory.countExec, self.nQuanta) 

564 

565 # test that we've disabled implicit threading 

566 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

567 

568 def testSimpleQGraph_rebase(self): 

569 """Test successful execution of trivial quantum graph, with --rebase 

570 used to force redefinition of the output collection. 

571 """ 

572 # Pass one input collection here for the usual test setup; we'll 

573 # override it later. 

574 args = _makeArgs(butler_config=self.root, input="test1", output="output") 

575 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

576 populateButler(self.pipeline, butler) 

577 

578 fwk = CmdLineFwk() 

579 taskFactory = AddTaskFactoryMock() 

580 

581 # We'll actually pass two input collections in. One is empty, but 

582 # the stuff we're testing here doesn't care. 

583 args.input = ["test2", "test1"] 

584 butler.registry.registerCollection("test2", CollectionType.RUN) 

585 

586 # Set up the output collection with a sequence that doesn't end the 

587 # same way as the input collection. This is normally an error. 

588 butler.registry.registerCollection("output", CollectionType.CHAINED) 

589 butler.registry.registerCollection("unexpected_input", CollectionType.RUN) 

590 butler.registry.registerCollection("output/run0", CollectionType.RUN) 

591 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"]) 

592 

593 # Without --rebase, the inconsistent input and output collections are 

594 # an error. 

595 with self.assertRaises(ValueError): 

596 fwk.makeGraph(self.pipeline, args) 

597 

598 # With --rebase, the output collection gets redefined. 

599 args.rebase = True 

600 qgraph = fwk.makeGraph(self.pipeline, args) 

601 

602 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

603 self.assertEqual(len(qgraph), self.nQuanta) 

604 

605 # Ensure that the output run used in the graph is also used in 

606 # the pipeline execution. It is possible for makeGraph and runPipeline 

607 # to calculate time-stamped runs across a second boundary. 

608 args.output_run = qgraph.metadata["output_run"] 

609 

610 fwk.runPipeline(qgraph, taskFactory, args) 

611 self.assertEqual(taskFactory.countExec, self.nQuanta) 

612 

613 butler.registry.refresh() 

614 self.assertEqual( 

615 list(butler.registry.getCollectionChain("output")), 

616 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"], 

617 ) 

618 

619 def test_simple_qgraph_qbb(self): 

620 """Test successful execution of trivial quantum graph in QBB mode.""" 

621 args = _makeArgs( 

622 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

623 ) 

624 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

625 populateButler(self.pipeline, butler) 

626 

627 fwk = CmdLineFwk() 

628 taskFactory = AddTaskFactoryMock() 

629 

630 qgraph = fwk.makeGraph(self.pipeline, args) 

631 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

632 self.assertEqual(len(qgraph), self.nQuanta) 

633 

634 # Ensure that the output run used in the graph is also used in 

635 # the pipeline execution. It is possible for makeGraph and runPipeline 

636 # to calculate time-stamped runs across a second boundary. 

637 output_run = qgraph.metadata["output_run"] 

638 args.output_run = output_run 

639 

640 # QBB must run from serialized graph. 

641 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

642 qgraph.saveUri(temp_graph.name) 

643 

644 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

645 

646 # Check that pre-exec-init can run. 

647 fwk.preExecInitQBB(taskFactory, args) 

648 

649 # Run whole thing. 

650 fwk.runGraphQBB(taskFactory, args) 

651 

652 # Transfer the datasets to the butler. 

653 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

654 self.assertEqual(n1, 31) 

655 

656 self.assertEqual(taskFactory.countExec, self.nQuanta) 

657 

658 # Update the output run and try again. 

659 new_output_run = output_run + "_new" 

660 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True) 

661 self.assertEqual(qgraph.metadata["output_run"], new_output_run) 

662 

663 taskFactory = AddTaskFactoryMock() 

664 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

665 qgraph.saveUri(temp_graph.name) 

666 

667 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

668 

669 # Check that pre-exec-init can run. 

670 fwk.preExecInitQBB(taskFactory, args) 

671 

672 # Run whole thing. 

673 fwk.runGraphQBB(taskFactory, args) 

674 

675 # Transfer the datasets to the butler. 

676 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

677 self.assertEqual(n1, n2) 

678 

679 def testEmptyQGraph(self): 

680 """Test that making an empty QG produces the right error messages.""" 

681 # We make QG generation fail by populating one input collection in the 

682 # butler while using a different one (that we only register, not 

683 # populate) to make the QG. 

684 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

685 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

686 butler.registry.registerCollection("bad_input") 

687 populateButler(self.pipeline, butler) 

688 

689 fwk = CmdLineFwk() 

690 with self.assertLogs(level=logging.CRITICAL) as cm: 

691 qgraph = fwk.makeGraph(self.pipeline, args) 

692 self.assertRegex( 

693 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

694 ) 

695 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

696 self.assertIsNone(qgraph) 

697 

698 def testSimpleQGraphNoSkipExisting_inputs(self): 

699 """Test for case when output data for one task already appears in 

700 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

701 option is present. 

702 """ 

703 args = _makeArgs( 

704 butler_config=self.root, 

705 input="test", 

706 output="output", 

707 ) 

708 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

709 populateButler( 

710 self.pipeline, 

711 butler, 

712 datasetTypes={ 

713 args.input: [ 

714 "add_dataset0", 

715 "add_dataset1", 

716 "add2_dataset1", 

717 "add_init_output1", 

718 "task0_config", 

719 "task0_metadata", 

720 "task0_log", 

721 ] 

722 }, 

723 ) 

724 

725 fwk = CmdLineFwk() 

726 taskFactory = AddTaskFactoryMock() 

727 

728 qgraph = fwk.makeGraph(self.pipeline, args) 

729 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

730 # With current implementation graph has all nQuanta quanta, but when 

731 # executing one quantum is skipped. 

732 self.assertEqual(len(qgraph), self.nQuanta) 

733 

734 # Ensure that the output run used in the graph is also used in 

735 # the pipeline execution. It is possible for makeGraph and runPipeline 

736 # to calculate time-stamped runs across a second boundary. 

737 args.output_run = qgraph.metadata["output_run"] 

738 

739 # run whole thing 

740 fwk.runPipeline(qgraph, taskFactory, args) 

741 self.assertEqual(taskFactory.countExec, self.nQuanta) 

742 

743 def testSimpleQGraphSkipExisting_inputs(self): 

744 """Test for ``--skip-existing`` with output data for one task already 

745 appears in _input_ collection. No ``--extend-run`` option is needed 

746 for this case. 

747 """ 

748 args = _makeArgs( 

749 butler_config=self.root, 

750 input="test", 

751 output="output", 

752 skip_existing_in=("test",), 

753 ) 

754 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

755 populateButler( 

756 self.pipeline, 

757 butler, 

758 datasetTypes={ 

759 args.input: [ 

760 "add_dataset0", 

761 "add_dataset1", 

762 "add2_dataset1", 

763 "add_init_output1", 

764 "task0_config", 

765 "task0_metadata", 

766 "task0_log", 

767 ] 

768 }, 

769 ) 

770 

771 fwk = CmdLineFwk() 

772 taskFactory = AddTaskFactoryMock() 

773 

774 qgraph = fwk.makeGraph(self.pipeline, args) 

775 # If all quanta are skipped, the task is not included in the graph. 

776 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

777 self.assertEqual(len(qgraph), self.nQuanta - 1) 

778 

779 # Ensure that the output run used in the graph is also used in 

780 # the pipeline execution. It is possible for makeGraph and runPipeline 

781 # to calculate time-stamped runs across a second boundary. 

782 args.output_run = qgraph.metadata["output_run"] 

783 

784 # run whole thing 

785 fwk.runPipeline(qgraph, taskFactory, args) 

786 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

787 

788 def testSimpleQGraphSkipExisting_outputs(self): 

789 """Test for ``--skip-existing`` with output data for one task already 

790 appears in _output_ collection. The ``--extend-run`` option is needed 

791 for this case. 

792 """ 

793 args = _makeArgs( 

794 butler_config=self.root, 

795 input="test", 

796 output_run="output/run", 

797 skip_existing_in=("output/run",), 

798 ) 

799 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

800 populateButler( 

801 self.pipeline, 

802 butler, 

803 datasetTypes={ 

804 args.input: ["add_dataset0"], 

805 args.output_run: [ 

806 "add_dataset1", 

807 "add2_dataset1", 

808 "add_init_output1", 

809 "task0_metadata", 

810 "task0_log", 

811 "task0_config", 

812 ], 

813 }, 

814 ) 

815 

816 fwk = CmdLineFwk() 

817 taskFactory = AddTaskFactoryMock() 

818 

819 # fails without --extend-run 

820 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

821 qgraph = fwk.makeGraph(self.pipeline, args) 

822 

823 # retry with --extend-run 

824 args.extend_run = True 

825 qgraph = fwk.makeGraph(self.pipeline, args) 

826 

827 # First task has no remaining quanta, so is left out completely. 

828 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

829 # Graph does not include quantum for first task. 

830 self.assertEqual(len(qgraph), self.nQuanta - 1) 

831 

832 # run whole thing 

833 fwk.runPipeline(qgraph, taskFactory, args) 

834 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

835 

836 def testSimpleQGraphOutputsFail(self): 

837 """Test continuing execution of trivial quantum graph with partial 

838 outputs. 

839 """ 

840 args = _makeArgs(butler_config=self.root, input="test", output="output") 

841 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

842 populateButler(self.pipeline, butler) 

843 

844 fwk = CmdLineFwk() 

845 taskFactory = AddTaskFactoryMock(stopAt=3) 

846 

847 qgraph = fwk.makeGraph(self.pipeline, args) 

848 self.assertEqual(len(qgraph), self.nQuanta) 

849 

850 # Ensure that the output run used in the graph is also used in 

851 # the pipeline execution. It is possible for makeGraph and runPipeline 

852 # to calculate time-stamped runs across a second boundary. 

853 args.output_run = qgraph.metadata["output_run"] 

854 

855 # run first three quanta 

856 with self.assertRaises(MPGraphExecutorError): 

857 fwk.runPipeline(qgraph, taskFactory, args) 

858 self.assertEqual(taskFactory.countExec, 3) 

859 

860 butler.registry.refresh() 

861 

862 # drop one of the two outputs from one task 

863 ref1 = butler.registry.findDataset( 

864 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0 

865 ) 

866 self.assertIsNotNone(ref1) 

867 # also drop the metadata output 

868 ref2 = butler.registry.findDataset( 

869 "task1_metadata", collections=args.output, instrument="INSTR", detector=0 

870 ) 

871 self.assertIsNotNone(ref2) 

872 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

873 

874 # Ensure that the output run used in the graph is also used in 

875 # the pipeline execution. It is possible for makeGraph and runPipeline 

876 # to calculate time-stamped runs across a second boundary. 

877 args.output_run = qgraph.metadata["output_run"] 

878 

879 taskFactory.stopAt = -1 

880 args.skip_existing_in = (args.output,) 

881 args.extend_run = True 

882 args.no_versions = True 

883 with self.assertRaises(MPGraphExecutorError): 

884 fwk.runPipeline(qgraph, taskFactory, args) 

885 

886 def testSimpleQGraphClobberOutputs(self): 

887 """Test continuing execution of trivial quantum graph with 

888 --clobber-outputs. 

889 """ 

890 args = _makeArgs(butler_config=self.root, input="test", output="output") 

891 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

892 populateButler(self.pipeline, butler) 

893 

894 fwk = CmdLineFwk() 

895 taskFactory = AddTaskFactoryMock(stopAt=3) 

896 

897 qgraph = fwk.makeGraph(self.pipeline, args) 

898 

899 # should have one task and number of quanta 

900 self.assertEqual(len(qgraph), self.nQuanta) 

901 

902 # Ensure that the output run used in the graph is also used in 

903 # the pipeline execution. It is possible for makeGraph and runPipeline 

904 # to calculate time-stamped runs across a second boundary. 

905 args.output_run = qgraph.metadata["output_run"] 

906 

907 # run first three quanta 

908 with self.assertRaises(MPGraphExecutorError): 

909 fwk.runPipeline(qgraph, taskFactory, args) 

910 self.assertEqual(taskFactory.countExec, 3) 

911 

912 butler.registry.refresh() 

913 

914 # drop one of the two outputs from one task 

915 ref1 = butler.registry.findDataset( 

916 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

917 ) 

918 self.assertIsNotNone(ref1) 

919 # also drop the metadata output 

920 ref2 = butler.registry.findDataset( 

921 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0) 

922 ) 

923 self.assertIsNotNone(ref2) 

924 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

925 

926 taskFactory.stopAt = -1 

927 args.skip_existing = True 

928 args.extend_run = True 

929 args.clobber_outputs = True 

930 args.no_versions = True 

931 fwk.runPipeline(qgraph, taskFactory, args) 

932 # number of executed quanta is incremented 

933 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

934 

935 def testSimpleQGraphReplaceRun(self): 

936 """Test repeated execution of trivial quantum graph with 

937 --replace-run. 

938 """ 

939 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

940 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

941 populateButler(self.pipeline, butler) 

942 

943 fwk = CmdLineFwk() 

944 taskFactory = AddTaskFactoryMock() 

945 

946 qgraph = fwk.makeGraph(self.pipeline, args) 

947 

948 # should have one task and number of quanta 

949 self.assertEqual(len(qgraph), self.nQuanta) 

950 

951 # deep copy is needed because quanta are updated in place 

952 fwk.runPipeline(qgraph, taskFactory, args) 

953 self.assertEqual(taskFactory.countExec, self.nQuanta) 

954 

955 # need to refresh collections explicitly (or make new butler/registry) 

956 butler.registry.refresh() 

957 collections = set(butler.registry.queryCollections(...)) 

958 self.assertEqual(collections, {"test", "output", "output/run1"}) 

959 

960 # number of datasets written by pipeline: 

961 # - nQuanta of init_outputs 

962 # - nQuanta of configs 

963 # - packages (single dataset) 

964 # - nQuanta * two output datasets 

965 # - nQuanta of metadata 

966 # - nQuanta of log output 

967 n_outputs = self.nQuanta * 6 + 1 

968 refs = butler.registry.queryDatasets(..., collections="output/run1") 

969 self.assertEqual(len(list(refs)), n_outputs) 

970 

971 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

972 # changed) 

973 args.replace_run = True 

974 args.output_run = "output/run2" 

975 qgraph = fwk.makeGraph(self.pipeline, args) 

976 fwk.runPipeline(qgraph, taskFactory, args) 

977 

978 butler.registry.refresh() 

979 collections = set(butler.registry.queryCollections(...)) 

980 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

981 

982 # new output collection 

983 refs = butler.registry.queryDatasets(..., collections="output/run2") 

984 self.assertEqual(len(list(refs)), n_outputs) 

985 

986 # old output collection is still there 

987 refs = butler.registry.queryDatasets(..., collections="output/run1") 

988 self.assertEqual(len(list(refs)), n_outputs) 

989 

990 # re-run with --replace-run and --prune-replaced=unstore 

991 args.replace_run = True 

992 args.prune_replaced = "unstore" 

993 args.output_run = "output/run3" 

994 qgraph = fwk.makeGraph(self.pipeline, args) 

995 fwk.runPipeline(qgraph, taskFactory, args) 

996 

997 butler.registry.refresh() 

998 collections = set(butler.registry.queryCollections(...)) 

999 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

1000 

1001 # new output collection 

1002 refs = butler.registry.queryDatasets(..., collections="output/run3") 

1003 self.assertEqual(len(list(refs)), n_outputs) 

1004 

1005 # old output collection is still there, and it has all datasets but 

1006 # non-InitOutputs are not in datastore 

1007 refs = butler.registry.queryDatasets(..., collections="output/run2") 

1008 refs = list(refs) 

1009 self.assertEqual(len(refs), n_outputs) 

1010 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

1011 for ref in refs: 

1012 if initOutNameRe.fullmatch(ref.datasetType.name): 

1013 butler.get(ref) 

1014 else: 

1015 with self.assertRaises(FileNotFoundError): 

1016 butler.get(ref) 

1017 

1018 # re-run with --replace-run and --prune-replaced=purge 

1019 # This time also remove --input; passing the same inputs that we 

1020 # started with and not passing inputs at all should be equivalent. 

1021 args.input = None 

1022 args.replace_run = True 

1023 args.prune_replaced = "purge" 

1024 args.output_run = "output/run4" 

1025 qgraph = fwk.makeGraph(self.pipeline, args) 

1026 fwk.runPipeline(qgraph, taskFactory, args) 

1027 

1028 butler.registry.refresh() 

1029 collections = set(butler.registry.queryCollections(...)) 

1030 # output/run3 should disappear now 

1031 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1032 

1033 # new output collection 

1034 refs = butler.registry.queryDatasets(..., collections="output/run4") 

1035 self.assertEqual(len(list(refs)), n_outputs) 

1036 

1037 # Trying to run again with inputs that aren't exactly what we started 

1038 # with is an error, and the kind that should not modify the data repo. 

1039 with self.assertRaises(ValueError): 

1040 args.input = ["test", "output/run2"] 

1041 args.prune_replaced = None 

1042 args.replace_run = True 

1043 args.output_run = "output/run5" 

1044 qgraph = fwk.makeGraph(self.pipeline, args) 

1045 fwk.runPipeline(qgraph, taskFactory, args) 

1046 butler.registry.refresh() 

1047 collections = set(butler.registry.queryCollections(...)) 

1048 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1049 with self.assertRaises(ValueError): 

1050 args.input = ["output/run2", "test"] 

1051 args.prune_replaced = None 

1052 args.replace_run = True 

1053 args.output_run = "output/run6" 

1054 qgraph = fwk.makeGraph(self.pipeline, args) 

1055 fwk.runPipeline(qgraph, taskFactory, args) 

1056 butler.registry.refresh() 

1057 collections = set(butler.registry.queryCollections(...)) 

1058 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1059 

1060 def testSubgraph(self): 

1061 """Test successful execution of trivial quantum graph.""" 

1062 args = _makeArgs(butler_config=self.root, input="test", output="output") 

1063 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1064 populateButler(self.pipeline, butler) 

1065 

1066 fwk = CmdLineFwk() 

1067 qgraph = fwk.makeGraph(self.pipeline, args) 

1068 

1069 # Select first two nodes for execution. This depends on node ordering 

1070 # which I assume is the same as execution order. 

1071 nNodes = 2 

1072 nodeIds = [node.nodeId for node in qgraph] 

1073 nodeIds = nodeIds[:nNodes] 

1074 

1075 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

1076 self.assertEqual(len(qgraph), self.nQuanta) 

1077 

1078 with ( 

1079 makeTmpFile(suffix=".qgraph") as tmpname, 

1080 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

1081 ): 

1082 with open(tmpname, "wb") as saveFile: 

1083 qgraph.save(saveFile) 

1084 

1085 args = _makeArgs( 

1086 qgraph=tmpname, 

1087 qgraph_node_id=nodeIds, 

1088 registryConfig=registryConfig, 

1089 execution_butler_location=None, 

1090 ) 

1091 fwk = CmdLineFwk() 

1092 

1093 # load graph, should only read a subset 

1094 qgraph = fwk.makeGraph(pipeline=None, args=args) 

1095 self.assertEqual(len(qgraph), nNodes) 

1096 

1097 def testShowGraph(self): 

1098 """Test for --show options for quantum graph.""" 

1099 nQuanta = 2 

1100 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1101 

1102 show = ShowInfo(["graph"]) 

1103 show.show_graph_info(qgraph) 

1104 self.assertEqual(show.handled, {"graph"}) 

1105 

1106 def testShowGraphWorkflow(self): 

1107 nQuanta = 2 

1108 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1109 

1110 show = ShowInfo(["workflow"]) 

1111 show.show_graph_info(qgraph) 

1112 self.assertEqual(show.handled, {"workflow"}) 

1113 

1114 # TODO: cannot test "uri" option presently, it instantiates 

1115 # butler from command line options and there is no way to pass butler 

1116 # mock to that code. 

1117 show = ShowInfo(["uri"]) 

1118 with self.assertRaises(ValueError): # No args given 

1119 show.show_graph_info(qgraph) 

1120 

1121 def testSimpleQGraphDatastoreRecords(self): 

1122 """Test quantum graph generation with --qgraph-datastore-records.""" 

1123 args = _makeArgs( 

1124 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1125 ) 

1126 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1127 populateButler(self.pipeline, butler) 

1128 

1129 fwk = CmdLineFwk() 

1130 qgraph = fwk.makeGraph(self.pipeline, args) 

1131 self.assertEqual(len(qgraph), self.nQuanta) 

1132 for i, qnode in enumerate(qgraph): 

1133 quantum = qnode.quantum 

1134 self.assertIsNotNone(quantum.datastore_records) 

1135 # only the first quantum has a pre-existing input 

1136 if i == 0: 

1137 datastore_name = "FileDatastore@<butlerRoot>" 

1138 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1139 records_data = quantum.datastore_records[datastore_name] 

1140 records = dict(records_data.records) 

1141 self.assertEqual(len(records), 1) 

1142 _, records = records.popitem() 

1143 records = records["file_datastore_records"] 

1144 self.assertEqual( 

1145 [record.path for record in records], 

1146 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1147 ) 

1148 else: 

1149 self.assertEqual(quantum.datastore_records, {}) 

1150 

1151 

1152class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1153 """General file leak detection.""" 

1154 

1155 

1156def setup_module(module): 

1157 """Initialize pytest module.""" 

1158 lsst.utils.tests.init() 

1159 

1160 

1161if __name__ == "__main__": 

1162 lsst.utils.tests.init() 

1163 unittest.main()