Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple unit test for cmdLineFwk module. 

23""" 

24 

25import contextlib 

26import copy 

27from dataclasses import dataclass 

28import logging 

29import os 

30import pickle 

31import shutil 

32import tempfile 

33from typing import NamedTuple 

34import unittest 

35 

36from lsst.ctrl.mpexec.cmdLineFwk import CmdLineFwk 

37import lsst.ctrl.mpexec.cmdLineParser as parser_mod 

38from lsst.ctrl.mpexec.cmdLineParser import (_ACTION_ADD_TASK, _ACTION_CONFIG, 

39 _ACTION_CONFIG_FILE, _ACTION_ADD_INSTRUMENT) 

40from lsst.daf.butler import Config, Quantum, Registry 

41from lsst.daf.butler.registry import RegistryConfig 

42from lsst.obs.base import Instrument 

43import lsst.pex.config as pexConfig 

44from lsst.pipe.base import (Pipeline, PipelineTask, PipelineTaskConfig, 

45 QuantumGraph, TaskDef, TaskFactory, 

46 PipelineTaskConnections) 

47import lsst.pipe.base.connectionTypes as cT 

48import lsst.utils.tests 

49from lsst.pipe.base.tests.simpleQGraph import (AddTaskFactoryMock, makeSimpleQGraph) 

50from lsst.utils.tests import temporaryDirectory 

51 

52 

53logging.basicConfig(level=logging.INFO) 

54 

55# Have to monkey-patch Instrument.fromName() to not retrieve non-existing 

56# instrument from registry, these tests can run fine without actual instrument 

57# and implementing full mock for Instrument is too complicated. 

58Instrument.fromName = lambda name, reg: None 58 ↛ exitline 58 didn't run the lambda on line 58

59 

60 

61@contextlib.contextmanager 

62def makeTmpFile(contents=None): 

63 """Context manager for generating temporary file name. 

64 

65 Temporary file is deleted on exiting context. 

66 

67 Parameters 

68 ---------- 

69 contents : `bytes` 

70 Data to write into a file. 

71 """ 

72 fd, tmpname = tempfile.mkstemp() 

73 if contents: 

74 os.write(fd, contents) 

75 os.close(fd) 

76 yield tmpname 

77 with contextlib.suppress(OSError): 

78 os.remove(tmpname) 

79 

80 

81@contextlib.contextmanager 

82def makeSQLiteRegistry(create=True): 

83 """Context manager to create new empty registry database. 

84 

85 Yields 

86 ------ 

87 config : `RegistryConfig` 

88 Registry configuration for initialized registry database. 

89 """ 

90 with temporaryDirectory() as tmpdir: 

91 uri = f"sqlite:///{tmpdir}/gen3.sqlite" 

92 config = RegistryConfig() 

93 config["db"] = uri 

94 if create: 

95 Registry.fromConfig(config, create=True) 

96 yield config 

97 

98 

99class SimpleConnections(PipelineTaskConnections, dimensions=(), 

100 defaultTemplates={"template": "simple"}): 

101 schema = cT.InitInput(doc="Schema", 

102 name="{template}schema", 

103 storageClass="SourceCatalog") 

104 

105 

106class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections): 

107 field = pexConfig.Field(dtype=str, doc="arbitrary string") 

108 

109 def setDefaults(self): 

110 PipelineTaskConfig.setDefaults(self) 

111 

112 

113class TaskOne(PipelineTask): 

114 ConfigClass = SimpleConfig 

115 _DefaultName = "taskOne" 

116 

117 

118class TaskTwo(PipelineTask): 

119 ConfigClass = SimpleConfig 

120 _DefaultName = "taskTwo" 

121 

122 

123class TaskFactoryMock(TaskFactory): 

124 def loadTaskClass(self, taskName): 

125 if taskName == "TaskOne": 

126 return TaskOne, "TaskOne" 

127 elif taskName == "TaskTwo": 

128 return TaskTwo, "TaskTwo" 

129 

130 def makeTask(self, taskClass, config, overrides, butler): 

131 if config is None: 

132 config = taskClass.ConfigClass() 

133 if overrides: 

134 overrides.applyTo(config) 

135 return taskClass(config=config, butler=butler) 

136 

137 

138def _makeArgs(cmd="run", registryConfig=None, **kwargs): 

139 """Return parsed command line arguments. 

140 

141 By default butler_config is set to `Config` populated with some defaults, 

142 it can be overriden completely by keyword argument. 

143 

144 Parameters 

145 ---------- 

146 cmd : `str`, optional 

147 Produce arguments for this pipetask command. 

148 registryConfig : `RegistryConfig`, optional 

149 Override for registry configuration. 

150 **kwargs 

151 Overrides for other arguments. 

152 """ 

153 # call parser for "run" command to set defaults for all arguments 

154 parser = parser_mod.makeParser() 

155 args = parser.parse_args([cmd]) 

156 # override butler_config with our defaults 

157 args.butler_config = Config() 

158 if registryConfig: 

159 args.butler_config["registry"] = registryConfig 

160 # The default datastore has a relocatable root, so we need to specify 

161 # some root here for it to use 

162 args.butler_config.configFile = "." 

163 # override arguments from keyword parameters 

164 for key, value in kwargs.items(): 

165 setattr(args, key, value) 

166 return args 

167 

168 

169class FakeTaskDef(NamedTuple): 

170 name: str 

171 

172 

173@dataclass(frozen=True) 

174class FakeDSRef: 

175 datasetType: str 

176 dataId: tuple 

177 

178 

179def _makeQGraph(): 

180 """Make a trivial QuantumGraph with one quantum. 

181 

182 The only thing that we need to do with this quantum graph is to pickle 

183 it, the quanta in this graph are not usable for anything else. 

184 

185 Returns 

186 ------- 

187 qgraph : `~lsst.pipe.base.QuantumGraph` 

188 """ 

189 

190 # The task name in TaskDef needs to be a real importable name, use one that is sure to exist 

191 taskDef = TaskDef(taskName="lsst.pipe.base.Struct", config=SimpleConfig()) 

192 quanta = [Quantum(taskName="lsst.pipe.base.Struct", 

193 inputs={FakeTaskDef("A"): FakeDSRef("A", (1, 2))})] # type: ignore 

194 qgraph = QuantumGraph({taskDef: set(quanta)}) 

195 return qgraph 

196 

197 

198class CmdLineFwkTestCase(unittest.TestCase): 

199 """A test case for CmdLineFwk 

200 """ 

201 

202 def testMakePipeline(self): 

203 """Tests for CmdLineFwk.makePipeline method 

204 """ 

205 fwk = CmdLineFwk() 

206 

207 # make empty pipeline 

208 args = _makeArgs() 

209 pipeline = fwk.makePipeline(args) 

210 self.assertIsInstance(pipeline, Pipeline) 

211 self.assertEqual(len(pipeline), 0) 

212 

213 # few tests with serialization 

214 with makeTmpFile() as tmpname: 

215 # make empty pipeline and store it in a file 

216 args = _makeArgs(save_pipeline=tmpname) 

217 pipeline = fwk.makePipeline(args) 

218 self.assertIsInstance(pipeline, Pipeline) 

219 

220 # read pipeline from a file 

221 args = _makeArgs(pipeline=tmpname) 

222 pipeline = fwk.makePipeline(args) 

223 self.assertIsInstance(pipeline, Pipeline) 

224 self.assertEqual(len(pipeline), 0) 

225 

226 # single task pipeline 

227 actions = [ 

228 _ACTION_ADD_TASK("TaskOne:task1") 

229 ] 

230 args = _makeArgs(pipeline_actions=actions) 

231 pipeline = fwk.makePipeline(args) 

232 self.assertIsInstance(pipeline, Pipeline) 

233 self.assertEqual(len(pipeline), 1) 

234 

235 # many task pipeline 

236 actions = [ 

237 _ACTION_ADD_TASK("TaskOne:task1a"), 

238 _ACTION_ADD_TASK("TaskTwo:task2"), 

239 _ACTION_ADD_TASK("TaskOne:task1b") 

240 ] 

241 args = _makeArgs(pipeline_actions=actions) 

242 pipeline = fwk.makePipeline(args) 

243 self.assertIsInstance(pipeline, Pipeline) 

244 self.assertEqual(len(pipeline), 3) 

245 

246 # single task pipeline with config overrides, cannot use TaskOne, need 

247 # something that can be imported with `doImport()` 

248 actions = [ 

249 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

250 _ACTION_CONFIG("task:addend=100") 

251 ] 

252 args = _makeArgs(pipeline_actions=actions) 

253 pipeline = fwk.makePipeline(args) 

254 taskDefs = list(pipeline.toExpandedPipeline()) 

255 self.assertEqual(len(taskDefs), 1) 

256 self.assertEqual(taskDefs[0].config.addend, 100) 

257 

258 overrides = b"config.addend = 1000\n" 

259 with makeTmpFile(overrides) as tmpname: 

260 actions = [ 

261 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

262 _ACTION_CONFIG_FILE("task:" + tmpname) 

263 ] 

264 args = _makeArgs(pipeline_actions=actions) 

265 pipeline = fwk.makePipeline(args) 

266 taskDefs = list(pipeline.toExpandedPipeline()) 

267 self.assertEqual(len(taskDefs), 1) 

268 self.assertEqual(taskDefs[0].config.addend, 1000) 

269 

270 # Check --instrument option, for now it only checks that it does not crash 

271 actions = [ 

272 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

273 _ACTION_ADD_INSTRUMENT("Instrument") 

274 ] 

275 args = _makeArgs(pipeline_actions=actions) 

276 pipeline = fwk.makePipeline(args) 

277 

278 def testMakeGraphFromPickle(self): 

279 """Tests for CmdLineFwk.makeGraph method. 

280 

281 Only most trivial case is tested that does not do actual graph 

282 building. 

283 """ 

284 fwk = CmdLineFwk() 

285 

286 with makeTmpFile() as tmpname, makeSQLiteRegistry() as registryConfig: 

287 

288 # make non-empty graph and store it in a file 

289 qgraph = _makeQGraph() 

290 with open(tmpname, "wb") as pickleFile: 

291 qgraph.save(pickleFile) 

292 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

293 qgraph = fwk.makeGraph(None, args) 

294 self.assertIsInstance(qgraph, QuantumGraph) 

295 self.assertEqual(len(qgraph), 1) 

296 

297 # pickle with wrong object type 

298 with open(tmpname, "wb") as pickleFile: 

299 pickle.dump({}, pickleFile) 

300 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

301 with self.assertRaises(TypeError): 

302 fwk.makeGraph(None, args) 

303 

304 # reading empty graph from pickle should work but makeGraph() 

305 # will return None and make a warning 

306 qgraph = QuantumGraph(dict()) 

307 with open(tmpname, "wb") as pickleFile: 

308 qgraph.save(pickleFile) 

309 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig) 

310 with self.assertWarnsRegex(UserWarning, "QuantumGraph is empty"): 

311 # this also tests that warning is generated for empty graph 

312 qgraph = fwk.makeGraph(None, args) 

313 self.assertIs(qgraph, None) 

314 

315 def testShowPipeline(self): 

316 """Test for --show options for pipeline. 

317 """ 

318 fwk = CmdLineFwk() 

319 

320 actions = [ 

321 _ACTION_ADD_TASK("lsst.pipe.base.tests.simpleQGraph.AddTask:task"), 

322 _ACTION_CONFIG("task:addend=100") 

323 ] 

324 args = _makeArgs(pipeline_actions=actions) 

325 pipeline = fwk.makePipeline(args) 

326 

327 args.show = ["pipeline"] 

328 fwk.showInfo(args, pipeline) 

329 args.show = ["config"] 

330 fwk.showInfo(args, pipeline) 

331 args.show = ["history=task::addend"] 

332 fwk.showInfo(args, pipeline) 

333 args.show = ["tasks"] 

334 fwk.showInfo(args, pipeline) 

335 

336 

337class CmdLineFwkTestCaseWithButler(unittest.TestCase): 

338 """A test case for CmdLineFwk 

339 """ 

340 

341 def setUp(self): 

342 super().setUpClass() 

343 self.root = tempfile.mkdtemp() 

344 

345 def tearDown(self): 

346 shutil.rmtree(self.root, ignore_errors=True) 

347 super().tearDownClass() 

348 

349 def testSimpleQGraph(self): 

350 """Test successfull execution of trivial quantum graph. 

351 """ 

352 

353 nQuanta = 5 

354 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

355 

356 self.assertEqual(len(qgraph.taskGraph), 5) 

357 self.assertEqual(len(qgraph), nQuanta) 

358 

359 args = _makeArgs() 

360 fwk = CmdLineFwk() 

361 taskFactory = AddTaskFactoryMock() 

362 

363 # run whole thing 

364 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

365 self.assertEqual(taskFactory.countExec, nQuanta) 

366 

367 def testSimpleQGraphSkipExisting(self): 

368 """Test continuing execution of trivial quantum graph with --skip-existing. 

369 """ 

370 

371 nQuanta = 5 

372 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

373 

374 self.assertEqual(len(qgraph.taskGraph), 5) 

375 self.assertEqual(len(qgraph), nQuanta) 

376 

377 args = _makeArgs() 

378 fwk = CmdLineFwk() 

379 taskFactory = AddTaskFactoryMock(stopAt=3) 

380 

381 # run first three quanta 

382 with self.assertRaises(RuntimeError): 

383 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

384 self.assertEqual(taskFactory.countExec, 3) 

385 

386 # run remaining ones 

387 taskFactory.stopAt = -1 

388 args.skip_existing = True 

389 args.no_versions = True 

390 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

391 self.assertEqual(taskFactory.countExec, nQuanta) 

392 

393 def testSimpleQGraphPartialOutputsFail(self): 

394 """Test continuing execution of trivial quantum graph with partial 

395 outputs. 

396 """ 

397 

398 nQuanta = 5 

399 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

400 

401 # should have one task and number of quanta 

402 self.assertEqual(len(qgraph), nQuanta) 

403 

404 args = _makeArgs() 

405 fwk = CmdLineFwk() 

406 taskFactory = AddTaskFactoryMock(stopAt=3) 

407 

408 # run first three quanta 

409 with self.assertRaises(RuntimeError): 

410 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

411 self.assertEqual(taskFactory.countExec, 3) 

412 

413 # drop one of the two outputs from one task 

414 ref = butler._findDatasetRef("add2_dataset2", instrument="INSTR", detector=0) 

415 self.assertIsNotNone(ref) 

416 butler.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

417 

418 taskFactory.stopAt = -1 

419 args.skip_existing = True 

420 args.no_versions = True 

421 excRe = "Registry inconsistency while checking for existing outputs.*" 

422 with self.assertRaisesRegex(RuntimeError, excRe): 

423 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

424 

425 def testSimpleQGraphClobberPartialOutputs(self): 

426 """Test continuing execution of trivial quantum graph with 

427 --clobber-partial-outputs. 

428 """ 

429 

430 nQuanta = 5 

431 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

432 

433 # should have one task and number of quanta 

434 self.assertEqual(len(qgraph), nQuanta) 

435 

436 args = _makeArgs() 

437 fwk = CmdLineFwk() 

438 taskFactory = AddTaskFactoryMock(stopAt=3) 

439 

440 # run first three quanta 

441 with self.assertRaises(RuntimeError): 

442 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

443 self.assertEqual(taskFactory.countExec, 3) 

444 

445 # drop one of the two outputs from one task 

446 ref = butler._findDatasetRef("add2_dataset2", instrument="INSTR", detector=0) 

447 self.assertIsNotNone(ref) 

448 butler.pruneDatasets([ref], disassociate=True, unstore=True, purge=True) 

449 

450 taskFactory.stopAt = -1 

451 args.skip_existing = True 

452 args.clobber_partial_outputs = True 

453 args.no_versions = True 

454 fwk.runPipeline(qgraph, taskFactory, args, butler=butler) 

455 # number of executed quanta is incremented 

456 self.assertEqual(taskFactory.countExec, nQuanta + 1) 

457 

458 def testSimpleQGraphReplaceRun(self): 

459 """Test repeated execution of trivial quantum graph with 

460 --replace-run. 

461 """ 

462 

463 # need non-memory registry in this case 

464 nQuanta = 5 

465 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root, inMemory=False) 

466 

467 # should have one task and number of quanta 

468 self.assertEqual(len(qgraph), nQuanta) 

469 

470 fwk = CmdLineFwk() 

471 taskFactory = AddTaskFactoryMock() 

472 

473 # run whole thing 

474 args = _makeArgs( 

475 butler_config=self.root, 

476 input="test", 

477 output="output", 

478 output_run="output/run1") 

479 # deep copy is needed because quanta are updated in place 

480 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

481 self.assertEqual(taskFactory.countExec, nQuanta) 

482 

483 # need to refresh collections explicitly (or make new butler/registry) 

484 butler.registry._collections.refresh() 

485 collections = set(butler.registry.queryCollections(...)) 

486 self.assertEqual(collections, {"test", "output", "output/run1"}) 

487 

488 # number of datasets written by pipeline: 

489 # - nQuanta of init_outputs 

490 # - nQuanta of configs 

491 # - packages (single dataset) 

492 # - nQuanta * two output datasets 

493 # - nQuanta of metadata 

494 n_outputs = nQuanta * 5 + 1 

495 refs = butler.registry.queryDatasets(..., collections="output/run1") 

496 self.assertEqual(len(list(refs)), n_outputs) 

497 

498 # re-run with --replace-run (--inputs is not compatible) 

499 args.input = None 

500 args.replace_run = True 

501 args.output_run = "output/run2" 

502 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

503 

504 butler.registry._collections.refresh() 

505 collections = set(butler.registry.queryCollections(...)) 

506 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"}) 

507 

508 # new output collection 

509 refs = butler.registry.queryDatasets(..., collections="output/run2") 

510 self.assertEqual(len(list(refs)), n_outputs) 

511 

512 # old output collection is still there 

513 refs = butler.registry.queryDatasets(..., collections="output/run1") 

514 self.assertEqual(len(list(refs)), n_outputs) 

515 

516 # re-run with --replace-run and --prune-replaced=unstore 

517 args.input = None 

518 args.replace_run = True 

519 args.prune_replaced = "unstore" 

520 args.output_run = "output/run3" 

521 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

522 

523 butler.registry._collections.refresh() 

524 collections = set(butler.registry.queryCollections(...)) 

525 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"}) 

526 

527 # new output collection 

528 refs = butler.registry.queryDatasets(..., collections="output/run3") 

529 self.assertEqual(len(list(refs)), n_outputs) 

530 

531 # old output collection is still there, and it has all datasets but 

532 # they are not in datastore 

533 refs = butler.registry.queryDatasets(..., collections="output/run2") 

534 refs = list(refs) 

535 self.assertEqual(len(refs), n_outputs) 

536 with self.assertRaises(FileNotFoundError): 

537 butler.get(refs[0], collections="output/run2") 

538 

539 # re-run with --replace-run and --prune-replaced=purge 

540 args.input = None 

541 args.replace_run = True 

542 args.prune_replaced = "purge" 

543 args.output_run = "output/run4" 

544 fwk.runPipeline(copy.deepcopy(qgraph), taskFactory, args) 

545 

546 butler.registry._collections.refresh() 

547 collections = set(butler.registry.queryCollections(...)) 

548 # output/run3 should disappear now 

549 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"}) 

550 

551 # new output collection 

552 refs = butler.registry.queryDatasets(..., collections="output/run4") 

553 self.assertEqual(len(list(refs)), n_outputs) 

554 

555 def testShowGraph(self): 

556 """Test for --show options for quantum graph. 

557 """ 

558 fwk = CmdLineFwk() 

559 

560 nQuanta = 2 

561 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root) 

562 

563 args = _makeArgs(show=["graph"]) 

564 fwk.showInfo(args, pipeline=None, graph=qgraph) 

565 # TODO: cannot test "workflow" option presently, it instanciates 

566 # butler from command line options and there is no way to pass butler 

567 # mock to that code. 

568 

569 

570class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase): 

571 pass 

572 

573 

574def setup_module(module): 

575 lsst.utils.tests.init() 

576 

577 

578if __name__ == "__main__": 578 ↛ 579line 578 didn't jump to line 579, because the condition on line 578 was never true

579 lsst.utils.tests.init() 

580 unittest.main()