Coverage for tests/test_cmdLineFwk.py: 13%

550 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-04 09:50 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Simple unit test for cmdLineFwk module. 

29""" 

30 

31import contextlib 

32import logging 

33import os 

34import pickle 

35import re 

36import shutil 

37import tempfile 

38import unittest 

39from dataclasses import dataclass 

40from io import StringIO 

41from types import SimpleNamespace 

42from typing import NamedTuple 

43 

44import astropy.units as u 

45import click 

46import lsst.pex.config as pexConfig 

47import lsst.pipe.base.connectionTypes as cT 

48import lsst.utils.tests 

49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError 

50from lsst.ctrl.mpexec.cli.opt import run_options 

51from lsst.ctrl.mpexec.cli.utils import ( 

52 _ACTION_ADD_INSTRUMENT, 

53 _ACTION_ADD_TASK, 

54 _ACTION_CONFIG, 

55 _ACTION_CONFIG_FILE, 

56 PipetaskCommand, 

57) 

58from lsst.ctrl.mpexec.showInfo import ShowInfo 

59from lsst.daf.butler import ( 

60 CollectionType, 

61 Config, 

62 DataCoordinate, 

63 DatasetRef, 

64 DatasetType, 

65 DimensionConfig, 

66 DimensionUniverse, 

67 Quantum, 

68) 

69from lsst.daf.butler.registry import RegistryConfig 

70from lsst.daf.butler.registry.sql_registry import SqlRegistry 

71from lsst.pipe.base import ( 

72 Instrument, 

73 Pipeline, 

74 PipelineTaskConfig, 

75 PipelineTaskConnections, 

76 QuantumGraph, 

77 TaskDef, 

78) 

79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant 

80from lsst.pipe.base.script import transfer_from_graph 

81from lsst.pipe.base.tests.simpleQGraph import ( 

82 AddTask, 

83 AddTaskFactoryMock, 

84 makeSimpleButler, 

85 makeSimplePipeline, 

86 makeSimpleQGraph, 

87 populateButler, 

88) 

89from lsst.utils.tests import temporaryDirectory 

90 

91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO)) 

92 

93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

94# instrument from registry, these tests can run fine without actual instrument 

95# and implementing full mock for Instrument is too complicated. 

96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96

97 

98 

99@contextlib.contextmanager 

100def makeTmpFile(contents=None, suffix=None): 

101 """Context manager for generating temporary file name. 

102 

103 Temporary file is deleted on exiting context. 

104 

105 Parameters 

106 ---------- 

107 contents : `bytes` 

108 Data to write into a file. 

109 """ 

110 fd, tmpname = tempfile.mkstemp(suffix=suffix) 

111 if contents: 

112 os.write(fd, contents) 

113 os.close(fd) 

114 yield tmpname 

115 with contextlib.suppress(OSError): 

116 os.remove(tmpname) 

117 

118 

119@contextlib.contextmanager 

120def makeSQLiteRegistry(create=True, universe=None): 

121 """Context manager to create new empty registry database. 

122 

123 Yields 

124 ------ 

125 config : `RegistryConfig` 

126 Registry configuration for initialized registry database. 

127 """ 

128 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig() 

129 with temporaryDirectory() as tmpdir: 

130 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

131 config = RegistryConfig() 

132 config["db"] = uri 

133 if create: 

134 SqlRegistry.createFromConfig(config, dimensionConfig=dimensionConfig) 

135 yield config 

136 

137 

138class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}): 

139 """Test connection class.""" 

140 

141 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog") 

142 

143 

144class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

145 """Test pipeline config.""" 

146 

147 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

148 

149 def setDefaults(self): 

150 PipelineTaskConfig.setDefaults(self) 

151 

152 

153def _makeArgs(registryConfig=None, **kwargs): 

154 """Return parsed command line arguments. 

155 

156 By default butler_config is set to `Config` populated with some defaults, 

157 it can be overridden completely by keyword argument. 

158 

159 Parameters 

160 ---------- 

161 cmd : `str`, optional 

162 Produce arguments for this pipetask command. 

163 registryConfig : `RegistryConfig`, optional 

164 Override for registry configuration. 

165 **kwargs 

166 Overrides for other arguments. 

167 """ 

168 # Use a mock to get the default value of arguments to 'run'. 

169 

170 mock = unittest.mock.Mock() 

171 

172 @click.command(cls=PipetaskCommand) 

173 @run_options() 

174 def fake_run(ctx, **kwargs): 

175 """Fake "pipetask run" command for gathering input arguments. 

176 

177 The arguments & options should always match the arguments & options in 

178 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`. 

179 """ 

180 mock(**kwargs) 

181 

182 runner = click.testing.CliRunner() 

183 # --butler-config is the only required option 

184 result = runner.invoke(fake_run, "--butler-config /") 

185 if result.exit_code != 0: 

186 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}") 

187 mock.assert_called_once() 

188 args = mock.call_args[1] 

189 args["enableLsstDebug"] = args.pop("debug") 

190 args["execution_butler_location"] = args.pop("save_execution_butler") 

191 if "pipeline_actions" not in args: 

192 args["pipeline_actions"] = [] 

193 args = SimpleNamespace(**args) 

194 

195 # override butler_config with our defaults 

196 if "butler_config" not in kwargs: 

197 args.butler_config = Config() 

198 if registryConfig: 

199 args.butler_config["registry"] = registryConfig 

200 # The default datastore has a relocatable root, so we need to specify 

201 # some root here for it to use 

202 args.butler_config.configFile = "." 

203 

204 # override arguments from keyword parameters 

205 for key, value in kwargs.items(): 

206 setattr(args, key, value) 

207 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint) 

208 return args 

209 

210 

211class FakeDSType(NamedTuple): 

212 """A fake `~lsst.daf.butler.DatasetType` class used for testing.""" 

213 

214 name: str 

215 

216 

217@dataclass(frozen=True) 

218class FakeDSRef: 

219 """A fake `~lsst.daf.butler.DatasetRef` class used for testing.""" 

220 

221 datasetType: str 

222 dataId: tuple 

223 

224 def isComponent(self): 

225 return False 

226 

227 

228# Task class name used by tests, needs to be importable 

229_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask" 

230 

231 

232def _makeDimensionConfig(): 

233 """Make a simple dimension universe configuration.""" 

234 return DimensionConfig( 

235 { 

236 "version": 1, 

237 "namespace": "ctrl_mpexec_test", 

238 "skypix": { 

239 "common": "htm7", 

240 "htm": { 

241 "class": "lsst.sphgeom.HtmPixelization", 

242 "max_level": 24, 

243 }, 

244 }, 

245 "elements": { 

246 "A": { 

247 "keys": [ 

248 { 

249 "name": "id", 

250 "type": "int", 

251 } 

252 ], 

253 "storage": { 

254 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

255 }, 

256 }, 

257 "B": { 

258 "keys": [ 

259 { 

260 "name": "id", 

261 "type": "int", 

262 } 

263 ], 

264 "storage": { 

265 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage", 

266 }, 

267 }, 

268 }, 

269 "packers": {}, 

270 } 

271 ) 

272 

273 

274def _makeQGraph(): 

275 """Make a trivial QuantumGraph with one quantum. 

276 

277 The only thing that we need to do with this quantum graph is to pickle 

278 it, the quanta in this graph are not usable for anything else. 

279 

280 Returns 

281 ------- 

282 qgraph : `~lsst.pipe.base.QuantumGraph` 

283 """ 

284 universe = DimensionUniverse(config=_makeDimensionConfig()) 

285 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe) 

286 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask) 

287 quanta = [ 

288 Quantum( 

289 taskName=_TASK_CLASS, 

290 inputs={ 

291 fakeDSType: [ 

292 DatasetRef( 

293 fakeDSType, 

294 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe), 

295 run="fake_run", 

296 ) 

297 ] 

298 }, 

299 ) 

300 ] # type: ignore 

301 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe) 

302 return qgraph 

303 

304 

305class CmdLineFwkTestCase(unittest.TestCase): 

306 """A test case for CmdLineFwk""" 

307 

308 def testMakePipeline(self): 

309 """Tests for CmdLineFwk.makePipeline method""" 

310 fwk = CmdLineFwk() 

311 

312 # make empty pipeline 

313 args = _makeArgs() 

314 pipeline = fwk.makePipeline(args) 

315 self.assertIsInstance(pipeline, Pipeline) 

316 self.assertEqual(len(pipeline), 0) 

317 

318 # few tests with serialization 

319 with makeTmpFile() as tmpname: 

320 # make empty pipeline and store it in a file 

321 args = _makeArgs(save_pipeline=tmpname) 

322 pipeline = fwk.makePipeline(args) 

323 self.assertIsInstance(pipeline, Pipeline) 

324 

325 # read pipeline from a file 

326 args = _makeArgs(pipeline=tmpname) 

327 pipeline = fwk.makePipeline(args) 

328 self.assertIsInstance(pipeline, Pipeline) 

329 self.assertEqual(len(pipeline), 0) 

330 

331 # single task pipeline, task name can be anything here 

332 actions = [_ACTION_ADD_TASK("TaskOne:task1")] 

333 args = _makeArgs(pipeline_actions=actions) 

334 pipeline = fwk.makePipeline(args) 

335 self.assertIsInstance(pipeline, Pipeline) 

336 self.assertEqual(len(pipeline), 1) 

337 

338 # many task pipeline 

339 actions = [ 

340 _ACTION_ADD_TASK("TaskOne:task1a"), 

341 _ACTION_ADD_TASK("TaskTwo:task2"), 

342 _ACTION_ADD_TASK("TaskOne:task1b"), 

343 ] 

344 args = _makeArgs(pipeline_actions=actions) 

345 pipeline = fwk.makePipeline(args) 

346 self.assertIsInstance(pipeline, Pipeline) 

347 self.assertEqual(len(pipeline), 3) 

348 

349 # single task pipeline with config overrides, need real task class 

350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

351 args = _makeArgs(pipeline_actions=actions) 

352 pipeline = fwk.makePipeline(args) 

353 taskDefs = list(pipeline.toExpandedPipeline()) 

354 self.assertEqual(len(taskDefs), 1) 

355 self.assertEqual(taskDefs[0].config.addend, 100) 

356 

357 overrides = b"config.addend = 1000\n" 

358 with makeTmpFile(overrides) as tmpname: 

359 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)] 

360 args = _makeArgs(pipeline_actions=actions) 

361 pipeline = fwk.makePipeline(args) 

362 taskDefs = list(pipeline.toExpandedPipeline()) 

363 self.assertEqual(len(taskDefs), 1) 

364 self.assertEqual(taskDefs[0].config.addend, 1000) 

365 

366 # Check --instrument option, for now it only checks that it does not 

367 # crash. 

368 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")] 

369 args = _makeArgs(pipeline_actions=actions) 

370 pipeline = fwk.makePipeline(args) 

371 

372 def testMakeGraphFromSave(self): 

373 """Tests for CmdLineFwk.makeGraph method. 

374 

375 Only most trivial case is tested that does not do actual graph 

376 building. 

377 """ 

378 fwk = CmdLineFwk() 

379 

380 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig: 

381 # make non-empty graph and store it in a file 

382 qgraph = _makeQGraph() 

383 with open(tmpname, "wb") as saveFile: 

384 qgraph.save(saveFile) 

385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

386 qgraph = fwk.makeGraph(None, args) 

387 self.assertIsInstance(qgraph, QuantumGraph) 

388 self.assertEqual(len(qgraph), 1) 

389 

390 # will fail if graph id does not match 

391 args = _makeArgs( 

392 qgraph=tmpname, 

393 qgraph_id="R2-D2 is that you?", 

394 registryConfig=registryConfig, 

395 execution_butler_location=None, 

396 ) 

397 with self.assertRaisesRegex(ValueError, "graphID does not match"): 

398 fwk.makeGraph(None, args) 

399 

400 # save with wrong object type 

401 with open(tmpname, "wb") as saveFile: 

402 pickle.dump({}, saveFile) 

403 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

404 with self.assertRaises(ValueError): 

405 fwk.makeGraph(None, args) 

406 

407 # reading empty graph from pickle should work but makeGraph() 

408 # will return None. 

409 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig())) 

410 with open(tmpname, "wb") as saveFile: 

411 qgraph.save(saveFile) 

412 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None) 

413 qgraph = fwk.makeGraph(None, args) 

414 self.assertIs(qgraph, None) 

415 

416 def testShowPipeline(self): 

417 """Test for --show options for pipeline.""" 

418 fwk = CmdLineFwk() 

419 

420 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")] 

421 args = _makeArgs(pipeline_actions=actions) 

422 pipeline = fwk.makePipeline(args) 

423 

424 with self.assertRaises(ValueError): 

425 ShowInfo(["unrecognized", "config"]) 

426 

427 stream = StringIO() 

428 show = ShowInfo( 

429 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"], 

430 stream=stream, 

431 ) 

432 show.show_pipeline_info(pipeline, None) 

433 self.assertEqual(show.unhandled, frozenset({})) 

434 stream.seek(0) 

435 output = stream.read() 

436 self.assertIn("config.addend=100", output) # config option 

437 self.assertIn("addend\n3", output) # History output 

438 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline 

439 

440 show = ShowInfo(["pipeline", "uri"], stream=stream) 

441 show.show_pipeline_info(pipeline, None) 

442 self.assertEqual(show.unhandled, frozenset({"uri"})) 

443 self.assertEqual(show.handled, {"pipeline"}) 

444 

445 stream = StringIO() 

446 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match 

447 show.show_pipeline_info(pipeline, None) 

448 stream.seek(0) 

449 output = stream.read().strip() 

450 self.assertEqual("### Configuration for task `task'", output) 

451 

452 stream = StringIO() 

453 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match 

454 show.show_pipeline_info(pipeline, None) 

455 stream.seek(0) 

456 output = stream.read().strip() 

457 self.assertEqual("### Configuration for task `task'", output) 

458 

459 stream = StringIO() 

460 show = ShowInfo(["pipeline-graph"], stream=stream) # No match 

461 show.show_pipeline_info(pipeline, None) 

462 stream.seek(0) 

463 output = stream.read().strip() 

464 self.assertEqual( 

465 "\n".join( 

466 [ 

467 "○ add_dataset_in", 

468 "│", 

469 "■ task", 

470 "│", 

471 "◍ add_dataset_out, add2_dataset_out", 

472 ] 

473 ), 

474 output, 

475 ) 

476 

477 stream = StringIO() 

478 show = ShowInfo(["task-graph"], stream=stream) # No match 

479 show.show_pipeline_info(pipeline, None) 

480 stream.seek(0) 

481 output = stream.read().strip() 

482 self.assertEqual("■ task", output) 

483 

484 stream = StringIO() 

485 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns 

486 show.show_pipeline_info(pipeline, None) 

487 stream.seek(0) 

488 output = stream.read().strip() 

489 self.assertIn("NOIGNORECASE", output) 

490 

491 show = ShowInfo(["dump-config=notask"]) 

492 with self.assertRaises(ValueError) as cm: 

493 show.show_pipeline_info(pipeline, None) 

494 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

495 

496 show = ShowInfo(["history"]) 

497 with self.assertRaises(ValueError) as cm: 

498 show.show_pipeline_info(pipeline, None) 

499 self.assertIn("Please provide a value", str(cm.exception)) 

500 

501 show = ShowInfo(["history=notask::param"]) 

502 with self.assertRaises(ValueError) as cm: 

503 show.show_pipeline_info(pipeline, None) 

504 self.assertIn("Pipeline has no tasks named notask", str(cm.exception)) 

505 

506 def test_execution_resources_parameters(self) -> None: 

507 """Test creation of the ExecutionResources from command line.""" 

508 fwk = CmdLineFwk() 

509 

510 for params, num_cores, max_mem in ( 

511 ((None, None), 1, None), 

512 ((5, ""), 5, None), 

513 ((None, "50"), 1, 50 * u.MB), 

514 ((5, "50 GB"), 5, 50 * u.GB), 

515 ): 

516 kwargs = {} 

517 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True): 

518 if v is not None: 

519 kwargs[k] = v 

520 args = _makeArgs(**kwargs) 

521 res = fwk._make_execution_resources(args) 

522 self.assertEqual(res.num_cores, num_cores) 

523 self.assertEqual(res.max_mem, max_mem) 

524 

525 args = _makeArgs(memory_per_quantum="50m") 

526 with self.assertRaises(u.UnitConversionError): 

527 fwk._make_execution_resources(args) 

528 

529 

530class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

531 """A test case for CmdLineFwk""" 

532 

533 def setUp(self): 

534 super().setUpClass() 

535 self.root = tempfile.mkdtemp() 

536 self.nQuanta = 5 

537 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta) 

538 

539 def tearDown(self): 

540 shutil.rmtree(self.root, ignore_errors=True) 

541 super().tearDownClass() 

542 

543 def testSimpleQGraph(self): 

544 """Test successfull execution of trivial quantum graph.""" 

545 args = _makeArgs(butler_config=self.root, input="test", output="output") 

546 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

547 populateButler(self.pipeline, butler) 

548 

549 fwk = CmdLineFwk() 

550 taskFactory = AddTaskFactoryMock() 

551 

552 qgraph = fwk.makeGraph(self.pipeline, args) 

553 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

554 self.assertEqual(len(qgraph), self.nQuanta) 

555 

556 # Ensure that the output run used in the graph is also used in 

557 # the pipeline execution. It is possible for makeGraph and runPipeline 

558 # to calculate time-stamped runs across a second boundary. 

559 args.output_run = qgraph.metadata["output_run"] 

560 

561 # run whole thing 

562 fwk.runPipeline(qgraph, taskFactory, args) 

563 self.assertEqual(taskFactory.countExec, self.nQuanta) 

564 

565 # test that we've disabled implicit threading 

566 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1") 

567 

568 def testSimpleQGraph_rebase(self): 

569 """Test successful execution of trivial quantum graph, with --rebase 

570 used to force redefinition of the output collection. 

571 """ 

572 # Pass one input collection here for the usual test setup; we'll 

573 # override it later. 

574 args = _makeArgs(butler_config=self.root, input="test1", output="output") 

575 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

576 populateButler(self.pipeline, butler) 

577 

578 fwk = CmdLineFwk() 

579 taskFactory = AddTaskFactoryMock() 

580 

581 # We'll actually pass two input collections in. One is empty, but 

582 # the stuff we're testing here doesn't care. 

583 args.input = ["test2", "test1"] 

584 butler.registry.registerCollection("test2", CollectionType.RUN) 

585 

586 # Set up the output collection with a sequence that doesn't end the 

587 # same way as the input collection. This is normally an error. 

588 butler.registry.registerCollection("output", CollectionType.CHAINED) 

589 butler.registry.registerCollection("unexpected_input", CollectionType.RUN) 

590 butler.registry.registerCollection("output/run0", CollectionType.RUN) 

591 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"]) 

592 

593 # Without --rebase, the inconsistent input and output collections are 

594 # an error. 

595 with self.assertRaises(ValueError): 

596 fwk.makeGraph(self.pipeline, args) 

597 

598 # With --rebase, the output collection gets redefined. 

599 args.rebase = True 

600 qgraph = fwk.makeGraph(self.pipeline, args) 

601 

602 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

603 self.assertEqual(len(qgraph), self.nQuanta) 

604 

605 # Ensure that the output run used in the graph is also used in 

606 # the pipeline execution. It is possible for makeGraph and runPipeline 

607 # to calculate time-stamped runs across a second boundary. 

608 args.output_run = qgraph.metadata["output_run"] 

609 

610 fwk.runPipeline(qgraph, taskFactory, args) 

611 self.assertEqual(taskFactory.countExec, self.nQuanta) 

612 

613 butler.registry.refresh() 

614 self.assertEqual( 

615 list(butler.registry.getCollectionChain("output")), 

616 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"], 

617 ) 

618 

619 def test_simple_qgraph_qbb(self): 

620 """Test successful execution of trivial quantum graph in QBB mode.""" 

621 args = _makeArgs( 

622 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

623 ) 

624 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

625 populateButler(self.pipeline, butler) 

626 

627 fwk = CmdLineFwk() 

628 taskFactory = AddTaskFactoryMock() 

629 

630 qgraph = fwk.makeGraph(self.pipeline, args) 

631 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

632 self.assertEqual(len(qgraph), self.nQuanta) 

633 

634 # Ensure that the output run used in the graph is also used in 

635 # the pipeline execution. It is possible for makeGraph and runPipeline 

636 # to calculate time-stamped runs across a second boundary. 

637 output_run = qgraph.metadata["output_run"] 

638 args.output_run = output_run 

639 

640 # QBB must run from serialized graph. 

641 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

642 qgraph.saveUri(temp_graph.name) 

643 

644 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

645 

646 # Check that pre-exec-init can run. 

647 fwk.preExecInitQBB(taskFactory, args) 

648 

649 # Run whole thing. 

650 fwk.runGraphQBB(taskFactory, args) 

651 

652 # Transfer the datasets to the butler. 

653 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

654 self.assertEqual(n1, 31) 

655 

656 self.assertEqual(taskFactory.countExec, self.nQuanta) 

657 

658 # Update the output run and try again. 

659 new_output_run = output_run + "_new" 

660 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True) 

661 self.assertEqual(qgraph.metadata["output_run"], new_output_run) 

662 

663 taskFactory = AddTaskFactoryMock() 

664 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph: 

665 qgraph.saveUri(temp_graph.name) 

666 

667 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[]) 

668 

669 # Check that pre-exec-init can run. 

670 fwk.preExecInitQBB(taskFactory, args) 

671 

672 # Run whole thing. 

673 fwk.runGraphQBB(taskFactory, args) 

674 

675 # Transfer the datasets to the butler. 

676 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False) 

677 self.assertEqual(n1, n2) 

678 

679 def testEmptyQGraph(self): 

680 """Test that making an empty QG produces the right error messages.""" 

681 # We make QG generation fail by populating one input collection in the 

682 # butler while using a different one (that we only register, not 

683 # populate) to make the QG. 

684 args = _makeArgs(butler_config=self.root, input="bad_input", output="output") 

685 butler = makeSimpleButler(self.root, run="good_input", inMemory=False) 

686 butler.registry.registerCollection("bad_input") 

687 populateButler(self.pipeline, butler) 

688 

689 fwk = CmdLineFwk() 

690 with self.assertLogs(level=logging.CRITICAL) as cm: 

691 qgraph = fwk.makeGraph(self.pipeline, args) 

692 self.assertRegex( 

693 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*" 

694 ) 

695 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*") 

696 self.assertIsNone(qgraph) 

697 

698 def testSimpleQGraphNoSkipExisting_inputs(self): 

699 """Test for case when output data for one task already appears in 

700 _input_ collection, but no ``--extend-run`` or ``-skip-existing`` 

701 option is present. 

702 """ 

703 args = _makeArgs( 

704 butler_config=self.root, 

705 input="test", 

706 output="output", 

707 ) 

708 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

709 populateButler( 

710 self.pipeline, 

711 butler, 

712 datasetTypes={ 

713 args.input: [ 

714 "add_dataset0", 

715 "add_dataset1", 

716 "add2_dataset1", 

717 "add_init_output1", 

718 "task0_config", 

719 "task0_metadata", 

720 "task0_log", 

721 ] 

722 }, 

723 ) 

724 

725 fwk = CmdLineFwk() 

726 taskFactory = AddTaskFactoryMock() 

727 

728 qgraph = fwk.makeGraph(self.pipeline, args) 

729 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

730 # With current implementation graph has all nQuanta quanta, but when 

731 # executing one quantum is skipped. 

732 self.assertEqual(len(qgraph), self.nQuanta) 

733 

734 # Ensure that the output run used in the graph is also used in 

735 # the pipeline execution. It is possible for makeGraph and runPipeline 

736 # to calculate time-stamped runs across a second boundary. 

737 args.output_run = qgraph.metadata["output_run"] 

738 

739 # run whole thing 

740 fwk.runPipeline(qgraph, taskFactory, args) 

741 self.assertEqual(taskFactory.countExec, self.nQuanta) 

742 

743 def testSimpleQGraphSkipExisting_inputs(self): 

744 """Test for ``--skip-existing`` with output data for one task already 

745 appears in _input_ collection. No ``--extend-run`` option is needed 

746 for this case. 

747 """ 

748 args = _makeArgs( 

749 butler_config=self.root, 

750 input="test", 

751 output="output", 

752 skip_existing_in=("test",), 

753 ) 

754 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

755 populateButler( 

756 self.pipeline, 

757 butler, 

758 datasetTypes={ 

759 args.input: [ 

760 "add_dataset0", 

761 "add_dataset1", 

762 "add2_dataset1", 

763 "add_init_output1", 

764 "task0_config", 

765 "task0_metadata", 

766 "task0_log", 

767 ] 

768 }, 

769 ) 

770 

771 fwk = CmdLineFwk() 

772 taskFactory = AddTaskFactoryMock() 

773 

774 qgraph = fwk.makeGraph(self.pipeline, args) 

775 # If all quanta are skipped, the task is not included in the graph. 

776 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

777 self.assertEqual(len(qgraph), self.nQuanta - 1) 

778 

779 # Ensure that the output run used in the graph is also used in 

780 # the pipeline execution. It is possible for makeGraph and runPipeline 

781 # to calculate time-stamped runs across a second boundary. 

782 args.output_run = qgraph.metadata["output_run"] 

783 

784 # run whole thing 

785 fwk.runPipeline(qgraph, taskFactory, args) 

786 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

787 

788 def testSimpleQGraphSkipExisting_outputs(self): 

789 """Test for ``--skip-existing`` with output data for one task already 

790 appears in _output_ collection. The ``--extend-run`` option is needed 

791 for this case. 

792 """ 

793 args = _makeArgs( 

794 butler_config=self.root, 

795 input="test", 

796 output_run="output/run", 

797 skip_existing_in=("output/run",), 

798 ) 

799 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

800 populateButler( 

801 self.pipeline, 

802 butler, 

803 datasetTypes={ 

804 args.input: ["add_dataset0"], 

805 args.output_run: [ 

806 "add_dataset1", 

807 "add2_dataset1", 

808 "add_init_output1", 

809 "task0_metadata", 

810 "task0_log", 

811 "task0_config", 

812 ], 

813 }, 

814 ) 

815 

816 fwk = CmdLineFwk() 

817 taskFactory = AddTaskFactoryMock() 

818 

819 # fails without --extend-run 

820 with self.assertRaisesRegex(ValueError, "--extend-run was not given"): 

821 qgraph = fwk.makeGraph(self.pipeline, args) 

822 

823 # retry with --extend-run 

824 args.extend_run = True 

825 qgraph = fwk.makeGraph(self.pipeline, args) 

826 

827 # First task has no remaining quanta, so is left out completely. 

828 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1) 

829 # Graph does not include quantum for first task. 

830 self.assertEqual(len(qgraph), self.nQuanta - 1) 

831 

832 # run whole thing 

833 fwk.runPipeline(qgraph, taskFactory, args) 

834 self.assertEqual(taskFactory.countExec, self.nQuanta - 1) 

835 

836 def testSimpleQGraphOutputsFail(self): 

837 """Test continuing execution of trivial quantum graph with partial 

838 outputs. 

839 """ 

840 args = _makeArgs(butler_config=self.root, input="test", output="output") 

841 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

842 populateButler(self.pipeline, butler) 

843 

844 fwk = CmdLineFwk() 

845 taskFactory = AddTaskFactoryMock(stopAt=3) 

846 

847 qgraph = fwk.makeGraph(self.pipeline, args) 

848 self.assertEqual(len(qgraph), self.nQuanta) 

849 

850 # Ensure that the output run used in the graph is also used in 

851 # the pipeline execution. It is possible for makeGraph and runPipeline 

852 # to calculate time-stamped runs across a second boundary. 

853 args.output_run = qgraph.metadata["output_run"] 

854 

855 # run first three quanta 

856 with self.assertRaises(MPGraphExecutorError): 

857 fwk.runPipeline(qgraph, taskFactory, args) 

858 self.assertEqual(taskFactory.countExec, 3) 

859 

860 butler.registry.refresh() 

861 

862 # drop one of the two outputs from one task 

863 ref1 = butler.find_dataset("add2_dataset2", collections=args.output, instrument="INSTR", detector=0) 

864 self.assertIsNotNone(ref1) 

865 # also drop the metadata output 

866 ref2 = butler.find_dataset("task1_metadata", collections=args.output, instrument="INSTR", detector=0) 

867 self.assertIsNotNone(ref2) 

868 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

869 

870 # Ensure that the output run used in the graph is also used in 

871 # the pipeline execution. It is possible for makeGraph and runPipeline 

872 # to calculate time-stamped runs across a second boundary. 

873 args.output_run = qgraph.metadata["output_run"] 

874 

875 taskFactory.stopAt = -1 

876 args.skip_existing_in = (args.output,) 

877 args.extend_run = True 

878 args.no_versions = True 

879 with self.assertRaises(MPGraphExecutorError): 

880 fwk.runPipeline(qgraph, taskFactory, args) 

881 

882 def testSimpleQGraphClobberOutputs(self): 

883 """Test continuing execution of trivial quantum graph with 

884 --clobber-outputs. 

885 """ 

886 args = _makeArgs(butler_config=self.root, input="test", output="output") 

887 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

888 populateButler(self.pipeline, butler) 

889 

890 fwk = CmdLineFwk() 

891 taskFactory = AddTaskFactoryMock(stopAt=3) 

892 

893 qgraph = fwk.makeGraph(self.pipeline, args) 

894 

895 # should have one task and number of quanta 

896 self.assertEqual(len(qgraph), self.nQuanta) 

897 

898 # Ensure that the output run used in the graph is also used in 

899 # the pipeline execution. It is possible for makeGraph and runPipeline 

900 # to calculate time-stamped runs across a second boundary. 

901 args.output_run = qgraph.metadata["output_run"] 

902 

903 # run first three quanta 

904 with self.assertRaises(MPGraphExecutorError): 

905 fwk.runPipeline(qgraph, taskFactory, args) 

906 self.assertEqual(taskFactory.countExec, 3) 

907 

908 butler.registry.refresh() 

909 

910 # drop one of the two outputs from one task 

911 ref1 = butler.find_dataset( 

912 "add2_dataset2", collections=args.output, data_id=dict(instrument="INSTR", detector=0) 

913 ) 

914 self.assertIsNotNone(ref1) 

915 # also drop the metadata output 

916 ref2 = butler.find_dataset( 

917 "task1_metadata", collections=args.output, data_id=dict(instrument="INSTR", detector=0) 

918 ) 

919 self.assertIsNotNone(ref2) 

920 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True) 

921 

922 taskFactory.stopAt = -1 

923 args.skip_existing = True 

924 args.extend_run = True 

925 args.clobber_outputs = True 

926 args.no_versions = True 

927 fwk.runPipeline(qgraph, taskFactory, args) 

928 # number of executed quanta is incremented 

929 self.assertEqual(taskFactory.countExec, self.nQuanta + 1) 

930 

931 def testSimpleQGraphReplaceRun(self): 

932 """Test repeated execution of trivial quantum graph with 

933 --replace-run. 

934 """ 

935 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1") 

936 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

937 populateButler(self.pipeline, butler) 

938 

939 fwk = CmdLineFwk() 

940 taskFactory = AddTaskFactoryMock() 

941 

942 qgraph = fwk.makeGraph(self.pipeline, args) 

943 

944 # should have one task and number of quanta 

945 self.assertEqual(len(qgraph), self.nQuanta) 

946 

947 # deep copy is needed because quanta are updated in place 

948 fwk.runPipeline(qgraph, taskFactory, args) 

949 self.assertEqual(taskFactory.countExec, self.nQuanta) 

950 

951 # need to refresh collections explicitly (or make new butler/registry) 

952 butler.registry.refresh() 

953 collections = set(butler.registry.queryCollections(...)) 

954 self.assertEqual(collections, {"test", "output", "output/run1"}) 

955 

956 # number of datasets written by pipeline: 

957 # - nQuanta of init_outputs 

958 # - nQuanta of configs 

959 # - packages (single dataset) 

960 # - nQuanta * two output datasets 

961 # - nQuanta of metadata 

962 # - nQuanta of log output 

963 n_outputs = self.nQuanta * 6 + 1 

964 refs = butler.registry.queryDatasets(..., collections="output/run1") 

965 self.assertEqual(len(list(refs)), n_outputs) 

966 

967 # re-run with --replace-run (--inputs is ignored, as long as it hasn't 

968 # changed) 

969 args.replace_run = True 

970 args.output_run = "output/run2" 

971 qgraph = fwk.makeGraph(self.pipeline, args) 

972 fwk.runPipeline(qgraph, taskFactory, args) 

973 

974 butler.registry.refresh() 

975 collections = set(butler.registry.queryCollections(...)) 

976 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

977 

978 # new output collection 

979 refs = butler.registry.queryDatasets(..., collections="output/run2") 

980 self.assertEqual(len(list(refs)), n_outputs) 

981 

982 # old output collection is still there 

983 refs = butler.registry.queryDatasets(..., collections="output/run1") 

984 self.assertEqual(len(list(refs)), n_outputs) 

985 

986 # re-run with --replace-run and --prune-replaced=unstore 

987 args.replace_run = True 

988 args.prune_replaced = "unstore" 

989 args.output_run = "output/run3" 

990 qgraph = fwk.makeGraph(self.pipeline, args) 

991 fwk.runPipeline(qgraph, taskFactory, args) 

992 

993 butler.registry.refresh() 

994 collections = set(butler.registry.queryCollections(...)) 

995 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

996 

997 # new output collection 

998 refs = butler.registry.queryDatasets(..., collections="output/run3") 

999 self.assertEqual(len(list(refs)), n_outputs) 

1000 

1001 # old output collection is still there, and it has all datasets but 

1002 # non-InitOutputs are not in datastore 

1003 refs = butler.registry.queryDatasets(..., collections="output/run2") 

1004 refs = list(refs) 

1005 self.assertEqual(len(refs), n_outputs) 

1006 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*") 

1007 for ref in refs: 

1008 if initOutNameRe.fullmatch(ref.datasetType.name): 

1009 butler.get(ref) 

1010 else: 

1011 with self.assertRaises(FileNotFoundError): 

1012 butler.get(ref) 

1013 

1014 # re-run with --replace-run and --prune-replaced=purge 

1015 # This time also remove --input; passing the same inputs that we 

1016 # started with and not passing inputs at all should be equivalent. 

1017 args.input = None 

1018 args.replace_run = True 

1019 args.prune_replaced = "purge" 

1020 args.output_run = "output/run4" 

1021 qgraph = fwk.makeGraph(self.pipeline, args) 

1022 fwk.runPipeline(qgraph, taskFactory, args) 

1023 

1024 butler.registry.refresh() 

1025 collections = set(butler.registry.queryCollections(...)) 

1026 # output/run3 should disappear now 

1027 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1028 

1029 # new output collection 

1030 refs = butler.registry.queryDatasets(..., collections="output/run4") 

1031 self.assertEqual(len(list(refs)), n_outputs) 

1032 

1033 # Trying to run again with inputs that aren't exactly what we started 

1034 # with is an error, and the kind that should not modify the data repo. 

1035 with self.assertRaises(ValueError): 

1036 args.input = ["test", "output/run2"] 

1037 args.prune_replaced = None 

1038 args.replace_run = True 

1039 args.output_run = "output/run5" 

1040 qgraph = fwk.makeGraph(self.pipeline, args) 

1041 fwk.runPipeline(qgraph, taskFactory, args) 

1042 butler.registry.refresh() 

1043 collections = set(butler.registry.queryCollections(...)) 

1044 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1045 with self.assertRaises(ValueError): 

1046 args.input = ["output/run2", "test"] 

1047 args.prune_replaced = None 

1048 args.replace_run = True 

1049 args.output_run = "output/run6" 

1050 qgraph = fwk.makeGraph(self.pipeline, args) 

1051 fwk.runPipeline(qgraph, taskFactory, args) 

1052 butler.registry.refresh() 

1053 collections = set(butler.registry.queryCollections(...)) 

1054 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

1055 

1056 def testSubgraph(self): 

1057 """Test successful execution of trivial quantum graph.""" 

1058 args = _makeArgs(butler_config=self.root, input="test", output="output") 

1059 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1060 populateButler(self.pipeline, butler) 

1061 

1062 fwk = CmdLineFwk() 

1063 qgraph = fwk.makeGraph(self.pipeline, args) 

1064 

1065 # Select first two nodes for execution. This depends on node ordering 

1066 # which I assume is the same as execution order. 

1067 nNodes = 2 

1068 nodeIds = [node.nodeId for node in qgraph] 

1069 nodeIds = nodeIds[:nNodes] 

1070 

1071 self.assertEqual(len(qgraph.taskGraph), self.nQuanta) 

1072 self.assertEqual(len(qgraph), self.nQuanta) 

1073 

1074 with ( 

1075 makeTmpFile(suffix=".qgraph") as tmpname, 

1076 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig, 

1077 ): 

1078 with open(tmpname, "wb") as saveFile: 

1079 qgraph.save(saveFile) 

1080 

1081 args = _makeArgs( 

1082 qgraph=tmpname, 

1083 qgraph_node_id=nodeIds, 

1084 registryConfig=registryConfig, 

1085 execution_butler_location=None, 

1086 ) 

1087 fwk = CmdLineFwk() 

1088 

1089 # load graph, should only read a subset 

1090 qgraph = fwk.makeGraph(pipeline=None, args=args) 

1091 self.assertEqual(len(qgraph), nNodes) 

1092 

1093 def testShowGraph(self): 

1094 """Test for --show options for quantum graph.""" 

1095 nQuanta = 2 

1096 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1097 

1098 show = ShowInfo(["graph"]) 

1099 show.show_graph_info(qgraph) 

1100 self.assertEqual(show.handled, {"graph"}) 

1101 

1102 def testShowGraphWorkflow(self): 

1103 nQuanta = 2 

1104 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

1105 

1106 show = ShowInfo(["workflow"]) 

1107 show.show_graph_info(qgraph) 

1108 self.assertEqual(show.handled, {"workflow"}) 

1109 

1110 # TODO: cannot test "uri" option presently, it instantiates 

1111 # butler from command line options and there is no way to pass butler 

1112 # mock to that code. 

1113 show = ShowInfo(["uri"]) 

1114 with self.assertRaises(ValueError): # No args given 

1115 show.show_graph_info(qgraph) 

1116 

1117 def testSimpleQGraphDatastoreRecords(self): 

1118 """Test quantum graph generation with --qgraph-datastore-records.""" 

1119 args = _makeArgs( 

1120 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True 

1121 ) 

1122 butler = makeSimpleButler(self.root, run=args.input, inMemory=False) 

1123 populateButler(self.pipeline, butler) 

1124 

1125 fwk = CmdLineFwk() 

1126 qgraph = fwk.makeGraph(self.pipeline, args) 

1127 self.assertEqual(len(qgraph), self.nQuanta) 

1128 for i, qnode in enumerate(qgraph): 

1129 quantum = qnode.quantum 

1130 self.assertIsNotNone(quantum.datastore_records) 

1131 # only the first quantum has a pre-existing input 

1132 if i == 0: 

1133 datastore_name = "FileDatastore@<butlerRoot>" 

1134 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name}) 

1135 records_data = quantum.datastore_records[datastore_name] 

1136 records = dict(records_data.records) 

1137 self.assertEqual(len(records), 1) 

1138 _, records = records.popitem() 

1139 records = records["file_datastore_records"] 

1140 self.assertEqual( 

1141 [record.path for record in records], 

1142 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"], 

1143 ) 

1144 else: 

1145 self.assertEqual(quantum.datastore_records, {}) 

1146 

1147 

1148class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

1149 """General file leak detection.""" 

1150 

1151 

1152def setup_module(module): 

1153 """Initialize pytest module.""" 

1154 lsst.utils.tests.init() 

1155 

1156 

1157if __name__ == "__main__": 

1158 lsst.utils.tests.init() 

1159 unittest.main()