Coverage for tests/test_cmdLineFwk.py: 13%
501 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:54 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:54 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Simple unit test for cmdLineFwk module.
23"""
25import contextlib
26import logging
27import os
28import pickle
29import re
30import shutil
31import tempfile
32import unittest
33from dataclasses import dataclass
34from io import StringIO
35from types import SimpleNamespace
36from typing import NamedTuple
38import click
39import lsst.pex.config as pexConfig
40import lsst.pipe.base.connectionTypes as cT
41import lsst.utils.tests
42from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
43from lsst.ctrl.mpexec.cli.opt import run_options
44from lsst.ctrl.mpexec.cli.utils import (
45 _ACTION_ADD_INSTRUMENT,
46 _ACTION_ADD_TASK,
47 _ACTION_CONFIG,
48 _ACTION_CONFIG_FILE,
49 PipetaskCommand,
50)
51from lsst.ctrl.mpexec.showInfo import ShowInfo
52from lsst.daf.butler import (
53 Config,
54 DataCoordinate,
55 DatasetRef,
56 DimensionConfig,
57 DimensionUniverse,
58 Quantum,
59 Registry,
60)
61from lsst.daf.butler.core.datasets.type import DatasetType
62from lsst.daf.butler.registry import RegistryConfig
63from lsst.pipe.base import (
64 Instrument,
65 Pipeline,
66 PipelineTaskConfig,
67 PipelineTaskConnections,
68 QuantumGraph,
69 TaskDef,
70)
71from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
72from lsst.pipe.base.tests.simpleQGraph import (
73 AddTask,
74 AddTaskFactoryMock,
75 makeSimpleButler,
76 makeSimplePipeline,
77 makeSimpleQGraph,
78 populateButler,
79)
80from lsst.utils.tests import temporaryDirectory
82logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
84# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
85# instrument from registry, these tests can run fine without actual instrument
86# and implementing full mock for Instrument is too complicated.
87Instrument.fromName = lambda name, reg: None 87 ↛ exitline 87 didn't run the lambda on line 87
90@contextlib.contextmanager
91def makeTmpFile(contents=None, suffix=None):
92 """Context manager for generating temporary file name.
94 Temporary file is deleted on exiting context.
96 Parameters
97 ----------
98 contents : `bytes`
99 Data to write into a file.
100 """
101 fd, tmpname = tempfile.mkstemp(suffix=suffix)
102 if contents:
103 os.write(fd, contents)
104 os.close(fd)
105 yield tmpname
106 with contextlib.suppress(OSError):
107 os.remove(tmpname)
110@contextlib.contextmanager
111def makeSQLiteRegistry(create=True, universe=None):
112 """Context manager to create new empty registry database.
114 Yields
115 ------
116 config : `RegistryConfig`
117 Registry configuration for initialized registry database.
118 """
119 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
120 with temporaryDirectory() as tmpdir:
121 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
122 config = RegistryConfig()
123 config["db"] = uri
124 if create:
125 Registry.createFromConfig(config, dimensionConfig=dimensionConfig)
126 yield config
129class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
130 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
133class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
134 field = pexConfig.Field(dtype=str, doc="arbitrary string")
136 def setDefaults(self):
137 PipelineTaskConfig.setDefaults(self)
140def _makeArgs(registryConfig=None, **kwargs):
141 """Return parsed command line arguments.
143 By default butler_config is set to `Config` populated with some defaults,
144 it can be overridden completely by keyword argument.
146 Parameters
147 ----------
148 cmd : `str`, optional
149 Produce arguments for this pipetask command.
150 registryConfig : `RegistryConfig`, optional
151 Override for registry configuration.
152 **kwargs
153 Overrides for other arguments.
154 """
155 # Use a mock to get the default value of arguments to 'run'.
157 mock = unittest.mock.Mock()
159 @click.command(cls=PipetaskCommand)
160 @run_options()
161 def fake_run(ctx, **kwargs):
162 """Fake "pipetask run" command for gathering input arguments.
164 The arguments & options should always match the arguments & options in
165 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
166 """
167 mock(**kwargs)
169 runner = click.testing.CliRunner()
170 # --butler-config is the only required option
171 result = runner.invoke(fake_run, "--butler-config /")
172 if result.exit_code != 0:
173 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
174 mock.assert_called_once()
175 args = mock.call_args[1]
176 args["enableLsstDebug"] = args.pop("debug")
177 args["execution_butler_location"] = args.pop("save_execution_butler")
178 if "pipeline_actions" not in args:
179 args["pipeline_actions"] = []
180 if "mock_configs" not in args:
181 args["mock_configs"] = []
182 args = SimpleNamespace(**args)
184 # override butler_config with our defaults
185 if "butler_config" not in kwargs:
186 args.butler_config = Config()
187 if registryConfig:
188 args.butler_config["registry"] = registryConfig
189 # The default datastore has a relocatable root, so we need to specify
190 # some root here for it to use
191 args.butler_config.configFile = "."
193 # override arguments from keyword parameters
194 for key, value in kwargs.items():
195 setattr(args, key, value)
196 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
197 return args
200class FakeDSType(NamedTuple):
201 name: str
204@dataclass(frozen=True)
205class FakeDSRef:
206 datasetType: str
207 dataId: tuple
209 def isComponent(self):
210 return False
213# Task class name used by tests, needs to be importable
214_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
217def _makeDimensionConfig():
218 """Make a simple dimension universe configuration."""
219 return DimensionConfig(
220 {
221 "version": 1,
222 "namespace": "ctrl_mpexec_test",
223 "skypix": {
224 "common": "htm7",
225 "htm": {
226 "class": "lsst.sphgeom.HtmPixelization",
227 "max_level": 24,
228 },
229 },
230 "elements": {
231 "A": {
232 "keys": [
233 {
234 "name": "id",
235 "type": "int",
236 }
237 ],
238 "storage": {
239 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
240 },
241 },
242 "B": {
243 "keys": [
244 {
245 "name": "id",
246 "type": "int",
247 }
248 ],
249 "storage": {
250 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
251 },
252 },
253 },
254 "packers": {},
255 }
256 )
259def _makeQGraph():
260 """Make a trivial QuantumGraph with one quantum.
262 The only thing that we need to do with this quantum graph is to pickle
263 it, the quanta in this graph are not usable for anything else.
265 Returns
266 -------
267 qgraph : `~lsst.pipe.base.QuantumGraph`
268 """
269 universe = DimensionUniverse(config=_makeDimensionConfig())
270 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe)
271 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
272 quanta = [
273 Quantum(
274 taskName=_TASK_CLASS,
275 inputs={
276 fakeDSType: [
277 DatasetRef(
278 fakeDSType,
279 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
280 run="fake_run",
281 )
282 ]
283 },
284 )
285 ] # type: ignore
286 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
287 return qgraph
290class CmdLineFwkTestCase(unittest.TestCase):
291 """A test case for CmdLineFwk"""
293 def testMakePipeline(self):
294 """Tests for CmdLineFwk.makePipeline method"""
295 fwk = CmdLineFwk()
297 # make empty pipeline
298 args = _makeArgs()
299 pipeline = fwk.makePipeline(args)
300 self.assertIsInstance(pipeline, Pipeline)
301 self.assertEqual(len(pipeline), 0)
303 # few tests with serialization
304 with makeTmpFile() as tmpname:
305 # make empty pipeline and store it in a file
306 args = _makeArgs(save_pipeline=tmpname)
307 pipeline = fwk.makePipeline(args)
308 self.assertIsInstance(pipeline, Pipeline)
310 # read pipeline from a file
311 args = _makeArgs(pipeline=tmpname)
312 pipeline = fwk.makePipeline(args)
313 self.assertIsInstance(pipeline, Pipeline)
314 self.assertEqual(len(pipeline), 0)
316 # single task pipeline, task name can be anything here
317 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
318 args = _makeArgs(pipeline_actions=actions)
319 pipeline = fwk.makePipeline(args)
320 self.assertIsInstance(pipeline, Pipeline)
321 self.assertEqual(len(pipeline), 1)
323 # many task pipeline
324 actions = [
325 _ACTION_ADD_TASK("TaskOne:task1a"),
326 _ACTION_ADD_TASK("TaskTwo:task2"),
327 _ACTION_ADD_TASK("TaskOne:task1b"),
328 ]
329 args = _makeArgs(pipeline_actions=actions)
330 pipeline = fwk.makePipeline(args)
331 self.assertIsInstance(pipeline, Pipeline)
332 self.assertEqual(len(pipeline), 3)
334 # single task pipeline with config overrides, need real task class
335 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
336 args = _makeArgs(pipeline_actions=actions)
337 pipeline = fwk.makePipeline(args)
338 taskDefs = list(pipeline.toExpandedPipeline())
339 self.assertEqual(len(taskDefs), 1)
340 self.assertEqual(taskDefs[0].config.addend, 100)
342 overrides = b"config.addend = 1000\n"
343 with makeTmpFile(overrides) as tmpname:
344 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
345 args = _makeArgs(pipeline_actions=actions)
346 pipeline = fwk.makePipeline(args)
347 taskDefs = list(pipeline.toExpandedPipeline())
348 self.assertEqual(len(taskDefs), 1)
349 self.assertEqual(taskDefs[0].config.addend, 1000)
351 # Check --instrument option, for now it only checks that it does not
352 # crash.
353 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
354 args = _makeArgs(pipeline_actions=actions)
355 pipeline = fwk.makePipeline(args)
357 def testMakeGraphFromSave(self):
358 """Tests for CmdLineFwk.makeGraph method.
360 Only most trivial case is tested that does not do actual graph
361 building.
362 """
363 fwk = CmdLineFwk()
365 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
366 # make non-empty graph and store it in a file
367 qgraph = _makeQGraph()
368 with open(tmpname, "wb") as saveFile:
369 qgraph.save(saveFile)
370 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
371 qgraph = fwk.makeGraph(None, args)
372 self.assertIsInstance(qgraph, QuantumGraph)
373 self.assertEqual(len(qgraph), 1)
375 # will fail if graph id does not match
376 args = _makeArgs(
377 qgraph=tmpname,
378 qgraph_id="R2-D2 is that you?",
379 registryConfig=registryConfig,
380 execution_butler_location=None,
381 )
382 with self.assertRaisesRegex(ValueError, "graphID does not match"):
383 fwk.makeGraph(None, args)
385 # save with wrong object type
386 with open(tmpname, "wb") as saveFile:
387 pickle.dump({}, saveFile)
388 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
389 with self.assertRaises(ValueError):
390 fwk.makeGraph(None, args)
392 # reading empty graph from pickle should work but makeGraph()
393 # will return None.
394 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig()))
395 with open(tmpname, "wb") as saveFile:
396 qgraph.save(saveFile)
397 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
398 qgraph = fwk.makeGraph(None, args)
399 self.assertIs(qgraph, None)
401 def testShowPipeline(self):
402 """Test for --show options for pipeline."""
403 fwk = CmdLineFwk()
405 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
406 args = _makeArgs(pipeline_actions=actions)
407 pipeline = fwk.makePipeline(args)
409 with self.assertRaises(ValueError):
410 ShowInfo(["unrecognized", "config"])
412 stream = StringIO()
413 show = ShowInfo(
414 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
415 stream=stream,
416 )
417 show.show_pipeline_info(pipeline)
418 self.assertEqual(show.unhandled, frozenset({}))
419 stream.seek(0)
420 output = stream.read()
421 self.assertIn("config.addend=100", output) # config option
422 self.assertIn("addend\n3", output) # History output
423 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
425 show = ShowInfo(["pipeline", "uri"], stream=stream)
426 show.show_pipeline_info(pipeline)
427 self.assertEqual(show.unhandled, frozenset({"uri"}))
428 self.assertEqual(show.handled, {"pipeline"})
430 stream = StringIO()
431 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
432 show.show_pipeline_info(pipeline)
433 stream.seek(0)
434 output = stream.read().strip()
435 self.assertEqual("### Configuration for task `task'", output)
437 stream = StringIO()
438 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
439 show.show_pipeline_info(pipeline)
440 stream.seek(0)
441 output = stream.read().strip()
442 self.assertEqual("### Configuration for task `task'", output)
444 stream = StringIO()
445 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
446 show.show_pipeline_info(pipeline)
447 stream.seek(0)
448 output = stream.read().strip()
449 self.assertIn("NOIGNORECASE", output)
451 show = ShowInfo(["dump-config=notask"])
452 with self.assertRaises(ValueError) as cm:
453 show.show_pipeline_info(pipeline)
454 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
456 show = ShowInfo(["history"])
457 with self.assertRaises(ValueError) as cm:
458 show.show_pipeline_info(pipeline)
459 self.assertIn("Please provide a value", str(cm.exception))
461 show = ShowInfo(["history=notask::param"])
462 with self.assertRaises(ValueError) as cm:
463 show.show_pipeline_info(pipeline)
464 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
467class CmdLineFwkTestCaseWithButler(unittest.TestCase):
468 """A test case for CmdLineFwk"""
470 def setUp(self):
471 super().setUpClass()
472 self.root = tempfile.mkdtemp()
473 self.nQuanta = 5
474 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
476 def tearDown(self):
477 shutil.rmtree(self.root, ignore_errors=True)
478 super().tearDownClass()
480 def testSimpleQGraph(self):
481 """Test successfull execution of trivial quantum graph."""
482 args = _makeArgs(butler_config=self.root, input="test", output="output")
483 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
484 populateButler(self.pipeline, butler)
486 fwk = CmdLineFwk()
487 taskFactory = AddTaskFactoryMock()
489 qgraph = fwk.makeGraph(self.pipeline, args)
490 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
491 self.assertEqual(len(qgraph), self.nQuanta)
493 # Ensure that the output run used in the graph is also used in
494 # the pipeline execution. It is possible for makeGraph and runPipeline
495 # to calculate time-stamped runs across a second boundary.
496 args.output_run = qgraph.metadata["output_run"]
498 # run whole thing
499 fwk.runPipeline(qgraph, taskFactory, args)
500 self.assertEqual(taskFactory.countExec, self.nQuanta)
502 # test that we've disabled implicit threading
503 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
505 def testEmptyQGraph(self):
506 """Test that making an empty QG produces the right error messages."""
507 # We make QG generation fail by populating one input collection in the
508 # butler while using a different one (that we only register, not
509 # populate) to make the QG.
510 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
511 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
512 butler.registry.registerCollection("bad_input")
513 populateButler(self.pipeline, butler)
515 fwk = CmdLineFwk()
516 with self.assertLogs(level=logging.CRITICAL) as cm:
517 qgraph = fwk.makeGraph(self.pipeline, args)
518 self.assertRegex(
519 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
520 )
521 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
522 self.assertIsNone(qgraph)
524 def testSimpleQGraphNoSkipExisting_inputs(self):
525 """Test for case when output data for one task already appears in
526 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
527 option is present.
528 """
529 args = _makeArgs(
530 butler_config=self.root,
531 input="test",
532 output="output",
533 )
534 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
535 populateButler(
536 self.pipeline,
537 butler,
538 datasetTypes={
539 args.input: [
540 "add_dataset0",
541 "add_dataset1",
542 "add2_dataset1",
543 "add_init_output1",
544 "task0_config",
545 "task0_metadata",
546 "task0_log",
547 ]
548 },
549 )
551 fwk = CmdLineFwk()
552 taskFactory = AddTaskFactoryMock()
554 qgraph = fwk.makeGraph(self.pipeline, args)
555 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
556 # With current implementation graph has all nQuanta quanta, but when
557 # executing one quantum is skipped.
558 self.assertEqual(len(qgraph), self.nQuanta)
560 # Ensure that the output run used in the graph is also used in
561 # the pipeline execution. It is possible for makeGraph and runPipeline
562 # to calculate time-stamped runs across a second boundary.
563 args.output_run = qgraph.metadata["output_run"]
565 # run whole thing
566 fwk.runPipeline(qgraph, taskFactory, args)
567 self.assertEqual(taskFactory.countExec, self.nQuanta)
569 def testSimpleQGraphSkipExisting_inputs(self):
570 """Test for ``--skip-existing`` with output data for one task already
571 appears in _input_ collection. No ``--extend-run`` option is needed
572 for this case.
573 """
574 args = _makeArgs(
575 butler_config=self.root,
576 input="test",
577 output="output",
578 skip_existing_in=("test",),
579 )
580 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
581 populateButler(
582 self.pipeline,
583 butler,
584 datasetTypes={
585 args.input: [
586 "add_dataset0",
587 "add_dataset1",
588 "add2_dataset1",
589 "add_init_output1",
590 "task0_config",
591 "task0_metadata",
592 "task0_log",
593 ]
594 },
595 )
597 fwk = CmdLineFwk()
598 taskFactory = AddTaskFactoryMock()
600 qgraph = fwk.makeGraph(self.pipeline, args)
601 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
602 self.assertEqual(len(qgraph), self.nQuanta - 1)
604 # Ensure that the output run used in the graph is also used in
605 # the pipeline execution. It is possible for makeGraph and runPipeline
606 # to calculate time-stamped runs across a second boundary.
607 args.output_run = qgraph.metadata["output_run"]
609 # run whole thing
610 fwk.runPipeline(qgraph, taskFactory, args)
611 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
613 def testSimpleQGraphSkipExisting_outputs(self):
614 """Test for ``--skip-existing`` with output data for one task already
615 appears in _output_ collection. The ``--extend-run`` option is needed
616 for this case.
617 """
618 args = _makeArgs(
619 butler_config=self.root,
620 input="test",
621 output_run="output/run",
622 skip_existing_in=("output/run",),
623 )
624 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
625 populateButler(
626 self.pipeline,
627 butler,
628 datasetTypes={
629 args.input: ["add_dataset0"],
630 args.output_run: [
631 "add_dataset1",
632 "add2_dataset1",
633 "add_init_output1",
634 "task0_metadata",
635 "task0_log",
636 ],
637 },
638 )
640 fwk = CmdLineFwk()
641 taskFactory = AddTaskFactoryMock()
643 # fails without --extend-run
644 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
645 qgraph = fwk.makeGraph(self.pipeline, args)
647 # retry with --extend-run
648 args.extend_run = True
649 qgraph = fwk.makeGraph(self.pipeline, args)
651 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
652 # Graph does not include quantum for first task
653 self.assertEqual(len(qgraph), self.nQuanta - 1)
655 # run whole thing
656 fwk.runPipeline(qgraph, taskFactory, args)
657 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
659 def testSimpleQGraphOutputsFail(self):
660 """Test continuing execution of trivial quantum graph with partial
661 outputs.
662 """
663 args = _makeArgs(butler_config=self.root, input="test", output="output")
664 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
665 populateButler(self.pipeline, butler)
667 fwk = CmdLineFwk()
668 taskFactory = AddTaskFactoryMock(stopAt=3)
670 qgraph = fwk.makeGraph(self.pipeline, args)
671 self.assertEqual(len(qgraph), self.nQuanta)
673 # Ensure that the output run used in the graph is also used in
674 # the pipeline execution. It is possible for makeGraph and runPipeline
675 # to calculate time-stamped runs across a second boundary.
676 args.output_run = qgraph.metadata["output_run"]
678 # run first three quanta
679 with self.assertRaises(MPGraphExecutorError):
680 fwk.runPipeline(qgraph, taskFactory, args)
681 self.assertEqual(taskFactory.countExec, 3)
683 butler.registry.refresh()
685 # drop one of the two outputs from one task
686 ref1 = butler.registry.findDataset(
687 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0
688 )
689 self.assertIsNotNone(ref1)
690 # also drop the metadata output
691 ref2 = butler.registry.findDataset(
692 "task1_metadata", collections=args.output, instrument="INSTR", detector=0
693 )
694 self.assertIsNotNone(ref2)
695 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
697 # Ensure that the output run used in the graph is also used in
698 # the pipeline execution. It is possible for makeGraph and runPipeline
699 # to calculate time-stamped runs across a second boundary.
700 args.output_run = qgraph.metadata["output_run"]
702 taskFactory.stopAt = -1
703 args.skip_existing_in = (args.output,)
704 args.extend_run = True
705 args.no_versions = True
706 with self.assertRaises(MPGraphExecutorError):
707 fwk.runPipeline(qgraph, taskFactory, args)
709 def testSimpleQGraphClobberOutputs(self):
710 """Test continuing execution of trivial quantum graph with
711 --clobber-outputs.
712 """
713 args = _makeArgs(butler_config=self.root, input="test", output="output")
714 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
715 populateButler(self.pipeline, butler)
717 fwk = CmdLineFwk()
718 taskFactory = AddTaskFactoryMock(stopAt=3)
720 qgraph = fwk.makeGraph(self.pipeline, args)
722 # should have one task and number of quanta
723 self.assertEqual(len(qgraph), self.nQuanta)
725 # Ensure that the output run used in the graph is also used in
726 # the pipeline execution. It is possible for makeGraph and runPipeline
727 # to calculate time-stamped runs across a second boundary.
728 args.output_run = qgraph.metadata["output_run"]
730 # run first three quanta
731 with self.assertRaises(MPGraphExecutorError):
732 fwk.runPipeline(qgraph, taskFactory, args)
733 self.assertEqual(taskFactory.countExec, 3)
735 butler.registry.refresh()
737 # drop one of the two outputs from one task
738 ref1 = butler.registry.findDataset(
739 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
740 )
741 self.assertIsNotNone(ref1)
742 # also drop the metadata output
743 ref2 = butler.registry.findDataset(
744 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
745 )
746 self.assertIsNotNone(ref2)
747 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
749 taskFactory.stopAt = -1
750 args.skip_existing = True
751 args.extend_run = True
752 args.clobber_outputs = True
753 args.no_versions = True
754 fwk.runPipeline(qgraph, taskFactory, args)
755 # number of executed quanta is incremented
756 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
758 def testSimpleQGraphReplaceRun(self):
759 """Test repeated execution of trivial quantum graph with
760 --replace-run.
761 """
762 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
763 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
764 populateButler(self.pipeline, butler)
766 fwk = CmdLineFwk()
767 taskFactory = AddTaskFactoryMock()
769 qgraph = fwk.makeGraph(self.pipeline, args)
771 # should have one task and number of quanta
772 self.assertEqual(len(qgraph), self.nQuanta)
774 # deep copy is needed because quanta are updated in place
775 fwk.runPipeline(qgraph, taskFactory, args)
776 self.assertEqual(taskFactory.countExec, self.nQuanta)
778 # need to refresh collections explicitly (or make new butler/registry)
779 butler.registry.refresh()
780 collections = set(butler.registry.queryCollections(...))
781 self.assertEqual(collections, {"test", "output", "output/run1"})
783 # number of datasets written by pipeline:
784 # - nQuanta of init_outputs
785 # - nQuanta of configs
786 # - packages (single dataset)
787 # - nQuanta * two output datasets
788 # - nQuanta of metadata
789 # - nQuanta of log output
790 n_outputs = self.nQuanta * 6 + 1
791 refs = butler.registry.queryDatasets(..., collections="output/run1")
792 self.assertEqual(len(list(refs)), n_outputs)
794 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
795 # changed)
796 args.replace_run = True
797 args.output_run = "output/run2"
798 qgraph = fwk.makeGraph(self.pipeline, args)
799 fwk.runPipeline(qgraph, taskFactory, args)
801 butler.registry.refresh()
802 collections = set(butler.registry.queryCollections(...))
803 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
805 # new output collection
806 refs = butler.registry.queryDatasets(..., collections="output/run2")
807 self.assertEqual(len(list(refs)), n_outputs)
809 # old output collection is still there
810 refs = butler.registry.queryDatasets(..., collections="output/run1")
811 self.assertEqual(len(list(refs)), n_outputs)
813 # re-run with --replace-run and --prune-replaced=unstore
814 args.replace_run = True
815 args.prune_replaced = "unstore"
816 args.output_run = "output/run3"
817 qgraph = fwk.makeGraph(self.pipeline, args)
818 fwk.runPipeline(qgraph, taskFactory, args)
820 butler.registry.refresh()
821 collections = set(butler.registry.queryCollections(...))
822 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
824 # new output collection
825 refs = butler.registry.queryDatasets(..., collections="output/run3")
826 self.assertEqual(len(list(refs)), n_outputs)
828 # old output collection is still there, and it has all datasets but
829 # non-InitOutputs are not in datastore
830 refs = butler.registry.queryDatasets(..., collections="output/run2")
831 refs = list(refs)
832 self.assertEqual(len(refs), n_outputs)
833 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
834 for ref in refs:
835 if initOutNameRe.fullmatch(ref.datasetType.name):
836 butler.get(ref, collections="output/run2")
837 else:
838 with self.assertRaises(FileNotFoundError):
839 butler.get(ref, collections="output/run2")
841 # re-run with --replace-run and --prune-replaced=purge
842 # This time also remove --input; passing the same inputs that we
843 # started with and not passing inputs at all should be equivalent.
844 args.input = None
845 args.replace_run = True
846 args.prune_replaced = "purge"
847 args.output_run = "output/run4"
848 qgraph = fwk.makeGraph(self.pipeline, args)
849 fwk.runPipeline(qgraph, taskFactory, args)
851 butler.registry.refresh()
852 collections = set(butler.registry.queryCollections(...))
853 # output/run3 should disappear now
854 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
856 # new output collection
857 refs = butler.registry.queryDatasets(..., collections="output/run4")
858 self.assertEqual(len(list(refs)), n_outputs)
860 # Trying to run again with inputs that aren't exactly what we started
861 # with is an error, and the kind that should not modify the data repo.
862 with self.assertRaises(ValueError):
863 args.input = ["test", "output/run2"]
864 args.prune_replaced = None
865 args.replace_run = True
866 args.output_run = "output/run5"
867 qgraph = fwk.makeGraph(self.pipeline, args)
868 fwk.runPipeline(qgraph, taskFactory, args)
869 butler.registry.refresh()
870 collections = set(butler.registry.queryCollections(...))
871 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
872 with self.assertRaises(ValueError):
873 args.input = ["output/run2", "test"]
874 args.prune_replaced = None
875 args.replace_run = True
876 args.output_run = "output/run6"
877 qgraph = fwk.makeGraph(self.pipeline, args)
878 fwk.runPipeline(qgraph, taskFactory, args)
879 butler.registry.refresh()
880 collections = set(butler.registry.queryCollections(...))
881 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
883 def testMockTask(self):
884 """Test --mock option."""
885 args = _makeArgs(
886 butler_config=self.root, input="test", output="output", mock=True, register_dataset_types=True
887 )
888 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
889 populateButler(self.pipeline, butler)
891 fwk = CmdLineFwk()
892 taskFactory = AddTaskFactoryMock()
894 qgraph = fwk.makeGraph(self.pipeline, args)
895 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
896 self.assertEqual(len(qgraph), self.nQuanta)
898 # Ensure that the output run used in the graph is also used in
899 # the pipeline execution. It is possible for makeGraph and runPipeline
900 # to calculate time-stamped runs across a second boundary.
901 args.output_run = qgraph.metadata["output_run"]
903 # run whole thing
904 fwk.runPipeline(qgraph, taskFactory, args)
905 # None of the actual tasks is executed
906 self.assertEqual(taskFactory.countExec, 0)
908 # check dataset types
909 butler.registry.refresh()
910 datasetTypes = list(butler.registry.queryDatasetTypes(re.compile("^_mock_.*")))
911 self.assertEqual(len(datasetTypes), self.nQuanta * 2)
913 def testMockTaskFailure(self):
914 """Test --mock option and configure one of the tasks to fail."""
915 args = _makeArgs(
916 butler_config=self.root,
917 input="test",
918 output="output",
919 mock=True,
920 register_dataset_types=True,
921 mock_configs=[
922 _ACTION_CONFIG("task3-mock:failCondition='detector = 0'"),
923 ],
924 fail_fast=True,
925 )
926 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
927 populateButler(self.pipeline, butler)
929 fwk = CmdLineFwk()
930 taskFactory = AddTaskFactoryMock()
932 qgraph = fwk.makeGraph(self.pipeline, args)
933 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
934 self.assertEqual(len(qgraph), self.nQuanta)
936 # Ensure that the output run used in the graph is also used in
937 # the pipeline execution. It is possible for makeGraph and runPipeline
938 # to calculate time-stamped runs across a second boundary.
939 args.output_run = qgraph.metadata["output_run"]
941 with self.assertRaises(MPGraphExecutorError) as cm:
942 fwk.runPipeline(qgraph, taskFactory, args)
944 self.assertIsNotNone(cm.exception.__cause__)
945 self.assertRegex(str(cm.exception.__cause__), "Simulated failure: task=task3")
947 def testSubgraph(self):
948 """Test successful execution of trivial quantum graph."""
949 args = _makeArgs(butler_config=self.root, input="test", output="output")
950 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
951 populateButler(self.pipeline, butler)
953 fwk = CmdLineFwk()
954 qgraph = fwk.makeGraph(self.pipeline, args)
956 # Select first two nodes for execution. This depends on node ordering
957 # which I assume is the same as execution order.
958 nNodes = 2
959 nodeIds = [node.nodeId for node in qgraph]
960 nodeIds = nodeIds[:nNodes]
962 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
963 self.assertEqual(len(qgraph), self.nQuanta)
965 with (
966 makeTmpFile(suffix=".qgraph") as tmpname,
967 makeSQLiteRegistry(universe=butler.registry.dimensions) as registryConfig,
968 ):
969 with open(tmpname, "wb") as saveFile:
970 qgraph.save(saveFile)
972 args = _makeArgs(
973 qgraph=tmpname,
974 qgraph_node_id=nodeIds,
975 registryConfig=registryConfig,
976 execution_butler_location=None,
977 )
978 fwk = CmdLineFwk()
980 # load graph, should only read a subset
981 qgraph = fwk.makeGraph(pipeline=None, args=args)
982 self.assertEqual(len(qgraph), nNodes)
984 def testShowGraph(self):
985 """Test for --show options for quantum graph."""
986 nQuanta = 2
987 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
989 show = ShowInfo(["graph"])
990 show.show_graph_info(qgraph)
991 self.assertEqual(show.handled, {"graph"})
993 def testShowGraphWorkflow(self):
994 nQuanta = 2
995 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
997 show = ShowInfo(["workflow"])
998 show.show_graph_info(qgraph)
999 self.assertEqual(show.handled, {"workflow"})
1001 # TODO: cannot test "uri" option presently, it instantiates
1002 # butler from command line options and there is no way to pass butler
1003 # mock to that code.
1004 show = ShowInfo(["uri"])
1005 with self.assertRaises(ValueError): # No args given
1006 show.show_graph_info(qgraph)
1008 def testSimpleQGraphDatastoreRecords(self):
1009 """Test quantum graph generation with --qgraph-datastore-records."""
1010 args = _makeArgs(
1011 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1012 )
1013 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1014 populateButler(self.pipeline, butler)
1016 fwk = CmdLineFwk()
1017 qgraph = fwk.makeGraph(self.pipeline, args)
1018 self.assertEqual(len(qgraph), self.nQuanta)
1019 for i, qnode in enumerate(qgraph):
1020 quantum = qnode.quantum
1021 self.assertIsNotNone(quantum.datastore_records)
1022 # only the first quantum has a pre-existing input
1023 if i == 0:
1024 datastore_name = "FileDatastore@<butlerRoot>"
1025 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1026 records_data = quantum.datastore_records[datastore_name]
1027 records = dict(records_data.records)
1028 self.assertEqual(len(records), 1)
1029 _, records = records.popitem()
1030 records = records["file_datastore_records"]
1031 self.assertEqual(
1032 [record.path for record in records],
1033 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1034 )
1035 else:
1036 self.assertEqual(quantum.datastore_records, {})
1039class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1040 pass
1043def setup_module(module):
1044 lsst.utils.tests.init()
1047if __name__ == "__main__":
1048 lsst.utils.tests.init()
1049 unittest.main()