Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import click 

26from types import SimpleNamespace 

27import contextlib 

28import copy 

29from dataclasses import dataclass 

30import logging 

31import os 

32import pickle 

33import shutil 

34import tempfile 

35from typing import NamedTuple 

36import unittest 

37 

38from lsst.ctrl.mpexec.cmdLineFwk import CmdLineFwk 

39from lsst.ctrl.mpexec.cli.pipetask import cli as pipetaskCli 

40from lsst.ctrl.mpexec.cli.utils import (_ACTION_ADD_TASK, _ACTION_CONFIG, 

41 _ACTION_CONFIG_FILE, _ACTION_ADD_INSTRUMENT) 

42from lsst.daf.butler import Config, Quantum, Registry 

43from lsst.daf.butler.cli.utils import Mocker 

44from lsst.daf.butler.registry import RegistryConfig 

45from lsst.obs.base import Instrument 

46import lsst.pex.config as pexConfig 

47from lsst.pipe.base import (Pipeline, PipelineTask, PipelineTaskConfig, 

48 QuantumGraph, TaskDef, TaskFactory, 

49 PipelineTaskConnections) 

50import lsst.pipe.base.connectionTypes as cT 

51import lsst.utils.tests 

52from lsst.pipe.base.tests.simpleQGraph import (AddTaskFactoryMock, makeSimpleQGraph) 

53from lsst.utils.tests import temporaryDirectory 

54 

55 

56logging.basicConfig(level=logging.INFO) 

57 

58# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

59# instrument from registry, these tests can run fine without actual instrument 

60# and implementing full mock for Instrument is too complicated. 

61Instrument.fromName = lambda name, reg: None 61 ↛ exitline 61 didn't run the lambda on line 61

62 

63 

64@contextlib.contextmanager 

65def makeTmpFile(contents=None): 

66 """Context manager for generating temporary file name. 

67 

68 Temporary file is deleted on exiting context. 

69 

70 Parameters 

71 ---------- 

72 contents : `bytes` 

73 Data to write into a file. 

74 """ 

75 fd, tmpname = tempfile.mkstemp() 

76 if contents: 

77 os.write(fd, contents) 

78 os.close(fd) 

79 yield tmpname 

80 with contextlib.suppress(OSError): 

81 os.remove(tmpname) 

82 

83 

84@contextlib.contextmanager 

85def makeSQLiteRegistry(create=True): 

86 """Context manager to create new empty registry database. 

87 

88 Yields 

89 ------ 

90 config : `RegistryConfig` 

91 Registry configuration for initialized registry database. 

92 """ 

93 with temporaryDirectory() as tmpdir: 

94 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

95 config = RegistryConfig() 

96 config["db"] = uri 

97 if create: 

98 Registry.fromConfig(config, create=True) 

99 yield config 

100 

101 

102class SimpleConnections(PipelineTaskConnections, dimensions=(), 

103 defaultTemplates={"template": "simple"}): 

104 schema = cT.InitInput(doc="Schema", 

105 name="{template}schema", 

106 storageClass="SourceCatalog") 

107 

108 

109class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

110 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

111 

112 def setDefaults(self): 

113 PipelineTaskConfig.setDefaults(self) 

114 

115 

116class TaskOne(PipelineTask): 

117 ConfigClass = SimpleConfig 

118 _DefaultName = "taskOne" 

119 

120 

121class TaskTwo(PipelineTask): 

122 ConfigClass = SimpleConfig 

123 _DefaultName = "taskTwo" 

124 

125 

126class TaskFactoryMock(TaskFactory): 

127 def loadTaskClass(self, taskName): 

128 if taskName == "TaskOne": 

129 return TaskOne, "TaskOne" 

130 elif taskName == "TaskTwo": 

131 return TaskTwo, "TaskTwo" 

132 

133 def makeTask(self, taskClass, config, overrides, butler): 

134 if config is None: 

135 config = taskClass.ConfigClass() 

136 if overrides: 

137 overrides.applyTo(config) 

138 return taskClass(config=config, butler=butler) 

139 

140 

141def _makeArgs(registryConfig=None, **kwargs): 

142 """Return parsed command line arguments. 

143 

144 By default butler_config is set to `Config` populated with some defaults, 

145 it can be overriden completely by keyword argument. 

146 

147 Parameters 

148 ---------- 

149 cmd : `str`, optional 

150 Produce arguments for this pipetask command. 

151 registryConfig : `RegistryConfig`, optional 

152 Override for registry configuration. 

153 **kwargs 

154 Overrides for other arguments. 

155 """ 

156 # Execute the "run" command with the --call-mocker flag set so we can get 

157 # all the default arguments that were passed to the command function out of 

158 # the Mocker call. 

159 # At some point, ctrl_mpexec should stop passing around a SimpleNamespace 

160 # of arguments, which would make this workaround unnecessary. 

161 runner = click.testing.CliRunner() 

162 result = result = runner.invoke(pipetaskCli, ["run", "--call-mocker"]) 

163 if result.exit_code != 0: 

164 raise RuntimeError("Failure getting default args from 'pipetask run'.") 

165 _, args = Mocker.mock.call_args 

166 args["enableLsstDebug"] = args.pop("debug") 

167 if "pipeline_actions" not in args: 

168 args["pipeline_actions"] = [] 

169 args = SimpleNamespace(**args) 

170 

171 # override butler_config with our defaults 

172 args.butler_config = Config() 

173 if registryConfig: 

174 args.butler_config["registry"] = registryConfig 

175 # The default datastore has a relocatable root, so we need to specify 

176 # some root here for it to use 

177 args.butler_config.configFile = "." 

178 # override arguments from keyword parameters 

179 for key, value in kwargs.items(): 

180 setattr(args, key, value) 

181 return args 

182 

183 

184class FakeTaskDef(NamedTuple): 

185 name: str 

186 

187 

188@dataclass(frozen=True) 

189class FakeDSRef: 

190 datasetType: str 

191 dataId: tuple 

192 

193 

194def _makeQGraph(): 

195 """Make a trivial QuantumGraph with one quantum. 

196 

197 The only thing that we need to do with this quantum graph is to pickle 

198 it, the quanta in this graph are not usable for anything else. 

199 

200 Returns 

201 ------- 

202 qgraph : `~lsst.pipe.base.QuantumGraph` 

203 """ 

204 

205 # The task name in TaskDef needs to be a real importable name, use one that is sure to exist 

206 taskDef = TaskDef(taskName="lsst.pipe.base.Struct", config=SimpleConfig()) 

207 quanta = [Quantum(taskName="lsst.pipe.base.Struct", 

208 inputs={FakeTaskDef("A"): FakeDSRef("A", (1, 2))})] # type: ignore 

209 qgraph = QuantumGraph({taskDef: set(quanta)}) 

210 return qgraph 

211 

212 

213class CmdLineFwkTestCase(unittest.TestCase): 

214 """A test case for CmdLineFwk 

215 """ 

216 

217 def testMakePipeline(self): 

218 """Tests for CmdLineFwk.makePipeline method 

219 """ 

220 fwk = CmdLineFwk() 

221 

222 # make empty pipeline 

223 args = _makeArgs() 

224 pipeline = fwk.makePipeline(args) 

225 self.assertIsInstance(pipeline, Pipeline) 

226 self.assertEqual(len(pipeline), 0) 

227 

228 # few tests with serialization 

229 with makeTmpFile() as tmpname: 

230 # make empty pipeline and store it in a file 

231 args = _makeArgs(save_pipeline=tmpname) 

232 pipeline = fwk.makePipeline(args) 

233 self.assertIsInstance(pipeline, Pipeline) 

234 

235 # read pipeline from a file 

236 args = _makeArgs(pipeline=tmpname) 

237 pipeline = fwk.makePipeline(args) 

238 self.assertIsInstance(pipeline, Pipeline) 

239 self.assertEqual(len(pipeline), 0) 

240 

241 # single task pipeline 

242 actions = [ 

243 _ACTION_ADD_TASK("TaskOne:task1") 

244 ] 

245 args = _makeArgs(pipeline_actions=actions) 

246 pipeline = fwk.makePipeline(args) 

247 self.assertIsInstance(pipeline, Pipeline) 

248 self.assertEqual(len(pipeline), 1) 

249 

250 # many task pipeline 

251 actions = [ 

252 _ACTION_ADD_TASK("TaskOne:task1a"), 

253 _ACTION_ADD_TASK("TaskTwo:task2"), 

254 _ACTION_ADD_TASK("TaskOne:task1b") 

255 ] 

256 args = _makeArgs(pipeline_actions=actions) 

257 pipeline = fwk.makePipeline(args) 

258 self.assertIsInstance(pipeline, Pipeline) 

259 self.assertEqual(len(pipeline), 3) 

260 

261 # single task pipeline with config overrides, cannot use TaskOne, need 

262 # something that can be imported with `doImport()` 

263 actions = [ 

264 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

265 _ACTION_CONFIG("task:addend=100") 

266 ] 

267 args = _makeArgs(pipeline_actions=actions) 

268 pipeline = fwk.makePipeline(args) 

269 taskDefs = list(pipeline.toExpandedPipeline()) 

270 self.assertEqual(len(taskDefs), 1) 

271 self.assertEqual(taskDefs[0].config.addend, 100) 

272 

273 overrides = b"config.addend = 1000\n" 

274 with makeTmpFile(overrides) as tmpname: 

275 actions = [ 

276 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

277 _ACTION_CONFIG_FILE("task:" + tmpname) 

278 ] 

279 args = _makeArgs(pipeline_actions=actions) 

280 pipeline = fwk.makePipeline(args) 

281 taskDefs = list(pipeline.toExpandedPipeline()) 

282 self.assertEqual(len(taskDefs), 1) 

283 self.assertEqual(taskDefs[0].config.addend, 1000) 

284 

285 # Check --instrument option, for now it only checks that it does not crash 

286 actions = [ 

287 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

288 _ACTION_ADD_INSTRUMENT("Instrument") 

289 ] 

290 args = _makeArgs(pipeline_actions=actions) 

291 pipeline = fwk.makePipeline(args) 

292 

293 def testMakeGraphFromPickle(self): 

294 """Tests for CmdLineFwk.makeGraph method. 

295 

296 Only most trivial case is tested that does not do actual graph 

297 building. 

298 """ 

299 fwk = CmdLineFwk() 

300 

301 with makeTmpFile() as tmpname, makeSQLiteRegistry() as registryConfig: 

302 

303 # make non-empty graph and store it in a file 

304 qgraph = _makeQGraph() 

305 with open(tmpname, "wb") as pickleFile: 

306 qgraph.save(pickleFile) 

307 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

308 qgraph = fwk.makeGraph(None, args) 

309 self.assertIsInstance(qgraph, QuantumGraph) 

310 self.assertEqual(len(qgraph), 1) 

311 

312 # pickle with wrong object type 

313 with open(tmpname, "wb") as pickleFile: 

314 pickle.dump({}, pickleFile) 

315 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

316 with self.assertRaises(TypeError): 

317 fwk.makeGraph(None, args) 

318 

319 # reading empty graph from pickle should work but makeGraph() 

320 # will return None and make a warning 

321 qgraph = QuantumGraph(dict()) 

322 with open(tmpname, "wb") as pickleFile: 

323 qgraph.save(pickleFile) 

324 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

325 with self.assertWarnsRegex(UserWarning, "QuantumGraph is empty"): 

326 # this also tests that warning is generated for empty graph 

327 qgraph = fwk.makeGraph(None, args) 

328 self.assertIs(qgraph, None) 

329 

330 def testShowPipeline(self): 

331 """Test for --show options for pipeline. 

332 """ 

333 fwk = CmdLineFwk() 

334 

335 actions = [ 

336 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

337 _ACTION_CONFIG("task:addend=100") 

338 ] 

339 args = _makeArgs(pipeline_actions=actions) 

340 pipeline = fwk.makePipeline(args) 

341 

342 args.show = ["pipeline"] 

343 fwk.showInfo(args, pipeline) 

344 args.show = ["config"] 

345 fwk.showInfo(args, pipeline) 

346 args.show = ["history=task::addend"] 

347 fwk.showInfo(args, pipeline) 

348 args.show = ["tasks"] 

349 fwk.showInfo(args, pipeline) 

350 

351 

352class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

353 """A test case for CmdLineFwk 

354 """ 

355 

356 def setUp(self): 

357 super().setUpClass() 

358 self.root = tempfile.mkdtemp() 

359 

360 def tearDown(self): 

361 shutil.rmtree(self.root, ignore_errors=True) 

362 super().tearDownClass() 

363 

364 def testSimpleQGraph(self): 

365 """Test successfull execution of trivial quantum graph. 

366 """ 

367 

368 nQuanta = 5 

369 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

370 

371 self.assertEqual(len(qgraph.taskGraph), 5) 

372 self.assertEqual(len(qgraph), nQuanta) 

373 

374 args = _makeArgs() 

375 fwk = CmdLineFwk() 

376 taskFactory = AddTaskFactoryMock() 

377 

378 # run whole thing 

379 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

380 self.assertEqual(taskFactory.countExec, nQuanta) 

381 

382 def testSimpleQGraphSkipExisting(self): 

383 """Test continuing execution of trivial quantum graph with --skip-existing. 

384 """ 

385 

386 nQuanta = 5 

387 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

388 

389 self.assertEqual(len(qgraph.taskGraph), 5) 

390 self.assertEqual(len(qgraph), nQuanta) 

391 

392 args = _makeArgs() 

393 fwk = CmdLineFwk() 

394 taskFactory = AddTaskFactoryMock(stopAt=3) 

395 

396 # run first three quanta 

397 with self.assertRaises(RuntimeError): 

398 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

399 self.assertEqual(taskFactory.countExec, 3) 

400 

401 # run remaining ones 

402 taskFactory.stopAt = -1 

403 args.skip_existing = True 

404 args.no_versions = True 

405 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

406 self.assertEqual(taskFactory.countExec, nQuanta) 

407 

408 def testSimpleQGraphPartialOutputsFail(self): 

409 """Test continuing execution of trivial quantum graph with partial 

410 outputs. 

411 """ 

412 

413 nQuanta = 5 

414 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

415 

416 # should have one task and number of quanta 

417 self.assertEqual(len(qgraph), nQuanta) 

418 

419 args = _makeArgs() 

420 fwk = CmdLineFwk() 

421 taskFactory = AddTaskFactoryMock(stopAt=3) 

422 

423 # run first three quanta 

424 with self.assertRaises(RuntimeError): 

425 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

426 self.assertEqual(taskFactory.countExec, 3) 

427 

428 # drop one of the two outputs from one task 

429 ref = butler._findDatasetRef("add2_dataset2", instrument="INSTR", detector=0) 

430 self.assertIsNotNone(ref) 

431 butler.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

432 

433 taskFactory.stopAt = -1 

434 args.skip_existing = True 

435 args.no_versions = True 

436 excRe = "Registry inconsistency while checking for existing outputs.*" 

437 with self.assertRaisesRegex(RuntimeError, excRe): 

438 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

439 

440 def testSimpleQGraphClobberPartialOutputs(self): 

441 """Test continuing execution of trivial quantum graph with 

442 --clobber-partial-outputs. 

443 """ 

444 

445 nQuanta = 5 

446 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

447 

448 # should have one task and number of quanta 

449 self.assertEqual(len(qgraph), nQuanta) 

450 

451 args = _makeArgs() 

452 fwk = CmdLineFwk() 

453 taskFactory = AddTaskFactoryMock(stopAt=3) 

454 

455 # run first three quanta 

456 with self.assertRaises(RuntimeError): 

457 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

458 self.assertEqual(taskFactory.countExec, 3) 

459 

460 # drop one of the two outputs from one task 

461 ref = butler._findDatasetRef("add2_dataset2", instrument="INSTR", detector=0) 

462 self.assertIsNotNone(ref) 

463 butler.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

464 

465 taskFactory.stopAt = -1 

466 args.skip_existing = True 

467 args.clobber_partial_outputs = True 

468 args.no_versions = True 

469 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

470 # number of executed quanta is incremented 

471 self.assertEqual(taskFactory.countExec, nQuanta + 1) 

472 

473 def testSimpleQGraphReplaceRun(self): 

474 """Test repeated execution of trivial quantum graph with 

475 --replace-run. 

476 """ 

477 

478 # need non-memory registry in this case 

479 nQuanta = 5 

480 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, inMemory=False) 

481 

482 # should have one task and number of quanta 

483 self.assertEqual(len(qgraph), nQuanta) 

484 

485 fwk = CmdLineFwk() 

486 taskFactory = AddTaskFactoryMock() 

487 

488 # run whole thing 

489 args = _makeArgs( 

490 butler_config=self.root, 

491 input="test", 

492 output="output", 

493 output_run="output/run1") 

494 # deep copy is needed because quanta are updated in place 

495 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

496 self.assertEqual(taskFactory.countExec, nQuanta) 

497 

498 # need to refresh collections explicitly (or make new butler/registry) 

499 butler.registry._collections.refresh() 

500 collections = set(butler.registry.queryCollections(...)) 

501 self.assertEqual(collections, {"test", "output", "output/run1"}) 

502 

503 # number of datasets written by pipeline: 

504 # - nQuanta of init_outputs 

505 # - nQuanta of configs 

506 # - packages (single dataset) 

507 # - nQuanta * two output datasets 

508 # - nQuanta of metadata 

509 n_outputs = nQuanta * 5 + 1 

510 refs = butler.registry.queryDatasets(..., collections="output/run1") 

511 self.assertEqual(len(list(refs)), n_outputs) 

512 

513 # re-run with --replace-run (--inputs is not compatible) 

514 args.input = None 

515 args.replace_run = True 

516 args.output_run = "output/run2" 

517 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

518 

519 butler.registry._collections.refresh() 

520 collections = set(butler.registry.queryCollections(...)) 

521 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

522 

523 # new output collection 

524 refs = butler.registry.queryDatasets(..., collections="output/run2") 

525 self.assertEqual(len(list(refs)), n_outputs) 

526 

527 # old output collection is still there 

528 refs = butler.registry.queryDatasets(..., collections="output/run1") 

529 self.assertEqual(len(list(refs)), n_outputs) 

530 

531 # re-run with --replace-run and --prune-replaced=unstore 

532 args.input = None 

533 args.replace_run = True 

534 args.prune_replaced = "unstore" 

535 args.output_run = "output/run3" 

536 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

537 

538 butler.registry._collections.refresh() 

539 collections = set(butler.registry.queryCollections(...)) 

540 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

541 

542 # new output collection 

543 refs = butler.registry.queryDatasets(..., collections="output/run3") 

544 self.assertEqual(len(list(refs)), n_outputs) 

545 

546 # old output collection is still there, and it has all datasets but 

547 # they are not in datastore 

548 refs = butler.registry.queryDatasets(..., collections="output/run2") 

549 refs = list(refs) 

550 self.assertEqual(len(refs), n_outputs) 

551 with self.assertRaises(FileNotFoundError): 

552 butler.get(refs[0], collections="output/run2") 

553 

554 # re-run with --replace-run and --prune-replaced=purge 

555 args.input = None 

556 args.replace_run = True 

557 args.prune_replaced = "purge" 

558 args.output_run = "output/run4" 

559 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

560 

561 butler.registry._collections.refresh() 

562 collections = set(butler.registry.queryCollections(...)) 

563 # output/run3 should disappear now 

564 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

565 

566 # new output collection 

567 refs = butler.registry.queryDatasets(..., collections="output/run4") 

568 self.assertEqual(len(list(refs)), n_outputs) 

569 

570 def testShowGraph(self): 

571 """Test for --show options for quantum graph. 

572 """ 

573 fwk = CmdLineFwk() 

574 

575 nQuanta = 2 

576 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

577 

578 args = _makeArgs(show=["graph"]) 

579 fwk.showInfo(args, pipeline=None, graph=qgraph) 

580 # TODO: cannot test "workflow" option presently, it instanciates 

581 # butler from command line options and there is no way to pass butler 

582 # mock to that code. 

583 

584 

585class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

586 pass 

587 

588 

589def setup_module(module): 

590 lsst.utils.tests.init() 

591 

592 

593if __name__ == "__main__": 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 lsst.utils.tests.init() 

595 unittest.main()