Coverage for tests/test_cmdLineFwk.py: 14%
484 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:56 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:56 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Simple unit test for cmdLineFwk module.
23"""
25import contextlib
26import logging
27import os
28import pickle
29import re
30import shutil
31import tempfile
32import unittest
33from dataclasses import dataclass
34from io import StringIO
35from types import SimpleNamespace
36from typing import NamedTuple
38import astropy.units as u
39import click
40import lsst.pex.config as pexConfig
41import lsst.pipe.base.connectionTypes as cT
42import lsst.utils.tests
43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
44from lsst.ctrl.mpexec.cli.opt import run_options
45from lsst.ctrl.mpexec.cli.utils import (
46 _ACTION_ADD_INSTRUMENT,
47 _ACTION_ADD_TASK,
48 _ACTION_CONFIG,
49 _ACTION_CONFIG_FILE,
50 PipetaskCommand,
51)
52from lsst.ctrl.mpexec.showInfo import ShowInfo
53from lsst.daf.butler import (
54 Config,
55 DataCoordinate,
56 DatasetRef,
57 DimensionConfig,
58 DimensionUniverse,
59 Quantum,
60 Registry,
61)
62from lsst.daf.butler.core.datasets.type import DatasetType
63from lsst.daf.butler.registry import RegistryConfig
64from lsst.pipe.base import (
65 Instrument,
66 Pipeline,
67 PipelineTaskConfig,
68 PipelineTaskConnections,
69 QuantumGraph,
70 TaskDef,
71)
72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
73from lsst.pipe.base.tests.simpleQGraph import (
74 AddTask,
75 AddTaskFactoryMock,
76 makeSimpleButler,
77 makeSimplePipeline,
78 makeSimpleQGraph,
79 populateButler,
80)
81from lsst.utils.tests import temporaryDirectory
83logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
85# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
86# instrument from registry, these tests can run fine without actual instrument
87# and implementing full mock for Instrument is too complicated.
88Instrument.fromName = lambda name, reg: None 88 ↛ exitline 88 didn't run the lambda on line 88
91@contextlib.contextmanager
92def makeTmpFile(contents=None, suffix=None):
93 """Context manager for generating temporary file name.
95 Temporary file is deleted on exiting context.
97 Parameters
98 ----------
99 contents : `bytes`
100 Data to write into a file.
101 """
102 fd, tmpname = tempfile.mkstemp(suffix=suffix)
103 if contents:
104 os.write(fd, contents)
105 os.close(fd)
106 yield tmpname
107 with contextlib.suppress(OSError):
108 os.remove(tmpname)
111@contextlib.contextmanager
112def makeSQLiteRegistry(create=True, universe=None):
113 """Context manager to create new empty registry database.
115 Yields
116 ------
117 config : `RegistryConfig`
118 Registry configuration for initialized registry database.
119 """
120 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
121 with temporaryDirectory() as tmpdir:
122 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
123 config = RegistryConfig()
124 config["db"] = uri
125 if create:
126 Registry.createFromConfig(config, dimensionConfig=dimensionConfig)
127 yield config
130class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
131 """Test connection class."""
133 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
136class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
137 """Test pipeline config."""
139 field = pexConfig.Field(dtype=str, doc="arbitrary string")
141 def setDefaults(self):
142 PipelineTaskConfig.setDefaults(self)
145def _makeArgs(registryConfig=None, **kwargs):
146 """Return parsed command line arguments.
148 By default butler_config is set to `Config` populated with some defaults,
149 it can be overridden completely by keyword argument.
151 Parameters
152 ----------
153 cmd : `str`, optional
154 Produce arguments for this pipetask command.
155 registryConfig : `RegistryConfig`, optional
156 Override for registry configuration.
157 **kwargs
158 Overrides for other arguments.
159 """
160 # Use a mock to get the default value of arguments to 'run'.
162 mock = unittest.mock.Mock()
164 @click.command(cls=PipetaskCommand)
165 @run_options()
166 def fake_run(ctx, **kwargs):
167 """Fake "pipetask run" command for gathering input arguments.
169 The arguments & options should always match the arguments & options in
170 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
171 """
172 mock(**kwargs)
174 runner = click.testing.CliRunner()
175 # --butler-config is the only required option
176 result = runner.invoke(fake_run, "--butler-config /")
177 if result.exit_code != 0:
178 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
179 mock.assert_called_once()
180 args = mock.call_args[1]
181 args["enableLsstDebug"] = args.pop("debug")
182 args["execution_butler_location"] = args.pop("save_execution_butler")
183 if "pipeline_actions" not in args:
184 args["pipeline_actions"] = []
185 args = SimpleNamespace(**args)
187 # override butler_config with our defaults
188 if "butler_config" not in kwargs:
189 args.butler_config = Config()
190 if registryConfig:
191 args.butler_config["registry"] = registryConfig
192 # The default datastore has a relocatable root, so we need to specify
193 # some root here for it to use
194 args.butler_config.configFile = "."
196 # override arguments from keyword parameters
197 for key, value in kwargs.items():
198 setattr(args, key, value)
199 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
200 return args
203class FakeDSType(NamedTuple):
204 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
206 name: str
209@dataclass(frozen=True)
210class FakeDSRef:
211 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
213 datasetType: str
214 dataId: tuple
216 def isComponent(self):
217 return False
220# Task class name used by tests, needs to be importable
221_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
224def _makeDimensionConfig():
225 """Make a simple dimension universe configuration."""
226 return DimensionConfig(
227 {
228 "version": 1,
229 "namespace": "ctrl_mpexec_test",
230 "skypix": {
231 "common": "htm7",
232 "htm": {
233 "class": "lsst.sphgeom.HtmPixelization",
234 "max_level": 24,
235 },
236 },
237 "elements": {
238 "A": {
239 "keys": [
240 {
241 "name": "id",
242 "type": "int",
243 }
244 ],
245 "storage": {
246 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
247 },
248 },
249 "B": {
250 "keys": [
251 {
252 "name": "id",
253 "type": "int",
254 }
255 ],
256 "storage": {
257 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
258 },
259 },
260 },
261 "packers": {},
262 }
263 )
266def _makeQGraph():
267 """Make a trivial QuantumGraph with one quantum.
269 The only thing that we need to do with this quantum graph is to pickle
270 it, the quanta in this graph are not usable for anything else.
272 Returns
273 -------
274 qgraph : `~lsst.pipe.base.QuantumGraph`
275 """
276 universe = DimensionUniverse(config=_makeDimensionConfig())
277 fakeDSType = DatasetType("A", tuple(), storageClass="ExposureF", universe=universe)
278 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
279 quanta = [
280 Quantum(
281 taskName=_TASK_CLASS,
282 inputs={
283 fakeDSType: [
284 DatasetRef(
285 fakeDSType,
286 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
287 run="fake_run",
288 )
289 ]
290 },
291 )
292 ] # type: ignore
293 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
294 return qgraph
297class CmdLineFwkTestCase(unittest.TestCase):
298 """A test case for CmdLineFwk"""
300 def testMakePipeline(self):
301 """Tests for CmdLineFwk.makePipeline method"""
302 fwk = CmdLineFwk()
304 # make empty pipeline
305 args = _makeArgs()
306 pipeline = fwk.makePipeline(args)
307 self.assertIsInstance(pipeline, Pipeline)
308 self.assertEqual(len(pipeline), 0)
310 # few tests with serialization
311 with makeTmpFile() as tmpname:
312 # make empty pipeline and store it in a file
313 args = _makeArgs(save_pipeline=tmpname)
314 pipeline = fwk.makePipeline(args)
315 self.assertIsInstance(pipeline, Pipeline)
317 # read pipeline from a file
318 args = _makeArgs(pipeline=tmpname)
319 pipeline = fwk.makePipeline(args)
320 self.assertIsInstance(pipeline, Pipeline)
321 self.assertEqual(len(pipeline), 0)
323 # single task pipeline, task name can be anything here
324 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
325 args = _makeArgs(pipeline_actions=actions)
326 pipeline = fwk.makePipeline(args)
327 self.assertIsInstance(pipeline, Pipeline)
328 self.assertEqual(len(pipeline), 1)
330 # many task pipeline
331 actions = [
332 _ACTION_ADD_TASK("TaskOne:task1a"),
333 _ACTION_ADD_TASK("TaskTwo:task2"),
334 _ACTION_ADD_TASK("TaskOne:task1b"),
335 ]
336 args = _makeArgs(pipeline_actions=actions)
337 pipeline = fwk.makePipeline(args)
338 self.assertIsInstance(pipeline, Pipeline)
339 self.assertEqual(len(pipeline), 3)
341 # single task pipeline with config overrides, need real task class
342 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
343 args = _makeArgs(pipeline_actions=actions)
344 pipeline = fwk.makePipeline(args)
345 taskDefs = list(pipeline.toExpandedPipeline())
346 self.assertEqual(len(taskDefs), 1)
347 self.assertEqual(taskDefs[0].config.addend, 100)
349 overrides = b"config.addend = 1000\n"
350 with makeTmpFile(overrides) as tmpname:
351 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
352 args = _makeArgs(pipeline_actions=actions)
353 pipeline = fwk.makePipeline(args)
354 taskDefs = list(pipeline.toExpandedPipeline())
355 self.assertEqual(len(taskDefs), 1)
356 self.assertEqual(taskDefs[0].config.addend, 1000)
358 # Check --instrument option, for now it only checks that it does not
359 # crash.
360 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
361 args = _makeArgs(pipeline_actions=actions)
362 pipeline = fwk.makePipeline(args)
364 def testMakeGraphFromSave(self):
365 """Tests for CmdLineFwk.makeGraph method.
367 Only most trivial case is tested that does not do actual graph
368 building.
369 """
370 fwk = CmdLineFwk()
372 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
373 # make non-empty graph and store it in a file
374 qgraph = _makeQGraph()
375 with open(tmpname, "wb") as saveFile:
376 qgraph.save(saveFile)
377 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
378 qgraph = fwk.makeGraph(None, args)
379 self.assertIsInstance(qgraph, QuantumGraph)
380 self.assertEqual(len(qgraph), 1)
382 # will fail if graph id does not match
383 args = _makeArgs(
384 qgraph=tmpname,
385 qgraph_id="R2-D2 is that you?",
386 registryConfig=registryConfig,
387 execution_butler_location=None,
388 )
389 with self.assertRaisesRegex(ValueError, "graphID does not match"):
390 fwk.makeGraph(None, args)
392 # save with wrong object type
393 with open(tmpname, "wb") as saveFile:
394 pickle.dump({}, saveFile)
395 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
396 with self.assertRaises(ValueError):
397 fwk.makeGraph(None, args)
399 # reading empty graph from pickle should work but makeGraph()
400 # will return None.
401 qgraph = QuantumGraph(dict(), universe=DimensionUniverse(_makeDimensionConfig()))
402 with open(tmpname, "wb") as saveFile:
403 qgraph.save(saveFile)
404 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
405 qgraph = fwk.makeGraph(None, args)
406 self.assertIs(qgraph, None)
408 def testShowPipeline(self):
409 """Test for --show options for pipeline."""
410 fwk = CmdLineFwk()
412 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
413 args = _makeArgs(pipeline_actions=actions)
414 pipeline = fwk.makePipeline(args)
416 with self.assertRaises(ValueError):
417 ShowInfo(["unrecognized", "config"])
419 stream = StringIO()
420 show = ShowInfo(
421 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
422 stream=stream,
423 )
424 show.show_pipeline_info(pipeline)
425 self.assertEqual(show.unhandled, frozenset({}))
426 stream.seek(0)
427 output = stream.read()
428 self.assertIn("config.addend=100", output) # config option
429 self.assertIn("addend\n3", output) # History output
430 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
432 show = ShowInfo(["pipeline", "uri"], stream=stream)
433 show.show_pipeline_info(pipeline)
434 self.assertEqual(show.unhandled, frozenset({"uri"}))
435 self.assertEqual(show.handled, {"pipeline"})
437 stream = StringIO()
438 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
439 show.show_pipeline_info(pipeline)
440 stream.seek(0)
441 output = stream.read().strip()
442 self.assertEqual("### Configuration for task `task'", output)
444 stream = StringIO()
445 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
446 show.show_pipeline_info(pipeline)
447 stream.seek(0)
448 output = stream.read().strip()
449 self.assertEqual("### Configuration for task `task'", output)
451 stream = StringIO()
452 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
453 show.show_pipeline_info(pipeline)
454 stream.seek(0)
455 output = stream.read().strip()
456 self.assertIn("NOIGNORECASE", output)
458 show = ShowInfo(["dump-config=notask"])
459 with self.assertRaises(ValueError) as cm:
460 show.show_pipeline_info(pipeline)
461 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
463 show = ShowInfo(["history"])
464 with self.assertRaises(ValueError) as cm:
465 show.show_pipeline_info(pipeline)
466 self.assertIn("Please provide a value", str(cm.exception))
468 show = ShowInfo(["history=notask::param"])
469 with self.assertRaises(ValueError) as cm:
470 show.show_pipeline_info(pipeline)
471 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
473 def test_execution_resources_parameters(self) -> None:
474 """Test creation of the ExecutionResources from command line."""
475 fwk = CmdLineFwk()
477 for params, num_cores, max_mem in (
478 ((None, None), 1, None),
479 ((5, ""), 5, None),
480 ((None, "50"), 1, 50 * u.MB),
481 ((5, "50 GB"), 5, 50 * u.GB),
482 ):
483 kwargs = {}
484 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params):
485 if v is not None:
486 kwargs[k] = v
487 args = _makeArgs(**kwargs)
488 res = fwk._make_execution_resources(args)
489 self.assertEqual(res.num_cores, num_cores)
490 self.assertEqual(res.max_mem, max_mem)
492 args = _makeArgs(memory_per_quantum="50m")
493 with self.assertRaises(u.UnitConversionError):
494 fwk._make_execution_resources(args)
497class CmdLineFwkTestCaseWithButler(unittest.TestCase):
498 """A test case for CmdLineFwk"""
500 def setUp(self):
501 super().setUpClass()
502 self.root = tempfile.mkdtemp()
503 self.nQuanta = 5
504 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
506 def tearDown(self):
507 shutil.rmtree(self.root, ignore_errors=True)
508 super().tearDownClass()
510 def testSimpleQGraph(self):
511 """Test successfull execution of trivial quantum graph."""
512 args = _makeArgs(butler_config=self.root, input="test", output="output")
513 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
514 populateButler(self.pipeline, butler)
516 fwk = CmdLineFwk()
517 taskFactory = AddTaskFactoryMock()
519 qgraph = fwk.makeGraph(self.pipeline, args)
520 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
521 self.assertEqual(len(qgraph), self.nQuanta)
523 # Ensure that the output run used in the graph is also used in
524 # the pipeline execution. It is possible for makeGraph and runPipeline
525 # to calculate time-stamped runs across a second boundary.
526 args.output_run = qgraph.metadata["output_run"]
528 # run whole thing
529 fwk.runPipeline(qgraph, taskFactory, args)
530 self.assertEqual(taskFactory.countExec, self.nQuanta)
532 # test that we've disabled implicit threading
533 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
535 def testEmptyQGraph(self):
536 """Test that making an empty QG produces the right error messages."""
537 # We make QG generation fail by populating one input collection in the
538 # butler while using a different one (that we only register, not
539 # populate) to make the QG.
540 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
541 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
542 butler.registry.registerCollection("bad_input")
543 populateButler(self.pipeline, butler)
545 fwk = CmdLineFwk()
546 with self.assertLogs(level=logging.CRITICAL) as cm:
547 qgraph = fwk.makeGraph(self.pipeline, args)
548 self.assertRegex(
549 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
550 )
551 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
552 self.assertIsNone(qgraph)
554 def testSimpleQGraphNoSkipExisting_inputs(self):
555 """Test for case when output data for one task already appears in
556 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
557 option is present.
558 """
559 args = _makeArgs(
560 butler_config=self.root,
561 input="test",
562 output="output",
563 )
564 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
565 populateButler(
566 self.pipeline,
567 butler,
568 datasetTypes={
569 args.input: [
570 "add_dataset0",
571 "add_dataset1",
572 "add2_dataset1",
573 "add_init_output1",
574 "task0_config",
575 "task0_metadata",
576 "task0_log",
577 ]
578 },
579 )
581 fwk = CmdLineFwk()
582 taskFactory = AddTaskFactoryMock()
584 qgraph = fwk.makeGraph(self.pipeline, args)
585 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
586 # With current implementation graph has all nQuanta quanta, but when
587 # executing one quantum is skipped.
588 self.assertEqual(len(qgraph), self.nQuanta)
590 # Ensure that the output run used in the graph is also used in
591 # the pipeline execution. It is possible for makeGraph and runPipeline
592 # to calculate time-stamped runs across a second boundary.
593 args.output_run = qgraph.metadata["output_run"]
595 # run whole thing
596 fwk.runPipeline(qgraph, taskFactory, args)
597 self.assertEqual(taskFactory.countExec, self.nQuanta)
599 def testSimpleQGraphSkipExisting_inputs(self):
600 """Test for ``--skip-existing`` with output data for one task already
601 appears in _input_ collection. No ``--extend-run`` option is needed
602 for this case.
603 """
604 args = _makeArgs(
605 butler_config=self.root,
606 input="test",
607 output="output",
608 skip_existing_in=("test",),
609 )
610 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
611 populateButler(
612 self.pipeline,
613 butler,
614 datasetTypes={
615 args.input: [
616 "add_dataset0",
617 "add_dataset1",
618 "add2_dataset1",
619 "add_init_output1",
620 "task0_config",
621 "task0_metadata",
622 "task0_log",
623 ]
624 },
625 )
627 fwk = CmdLineFwk()
628 taskFactory = AddTaskFactoryMock()
630 qgraph = fwk.makeGraph(self.pipeline, args)
631 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
632 self.assertEqual(len(qgraph), self.nQuanta - 1)
634 # Ensure that the output run used in the graph is also used in
635 # the pipeline execution. It is possible for makeGraph and runPipeline
636 # to calculate time-stamped runs across a second boundary.
637 args.output_run = qgraph.metadata["output_run"]
639 # run whole thing
640 fwk.runPipeline(qgraph, taskFactory, args)
641 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
643 def testSimpleQGraphSkipExisting_outputs(self):
644 """Test for ``--skip-existing`` with output data for one task already
645 appears in _output_ collection. The ``--extend-run`` option is needed
646 for this case.
647 """
648 args = _makeArgs(
649 butler_config=self.root,
650 input="test",
651 output_run="output/run",
652 skip_existing_in=("output/run",),
653 )
654 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
655 populateButler(
656 self.pipeline,
657 butler,
658 datasetTypes={
659 args.input: ["add_dataset0"],
660 args.output_run: [
661 "add_dataset1",
662 "add2_dataset1",
663 "add_init_output1",
664 "task0_metadata",
665 "task0_log",
666 ],
667 },
668 )
670 fwk = CmdLineFwk()
671 taskFactory = AddTaskFactoryMock()
673 # fails without --extend-run
674 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
675 qgraph = fwk.makeGraph(self.pipeline, args)
677 # retry with --extend-run
678 args.extend_run = True
679 qgraph = fwk.makeGraph(self.pipeline, args)
681 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
682 # Graph does not include quantum for first task
683 self.assertEqual(len(qgraph), self.nQuanta - 1)
685 # run whole thing
686 fwk.runPipeline(qgraph, taskFactory, args)
687 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
689 def testSimpleQGraphOutputsFail(self):
690 """Test continuing execution of trivial quantum graph with partial
691 outputs.
692 """
693 args = _makeArgs(butler_config=self.root, input="test", output="output")
694 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
695 populateButler(self.pipeline, butler)
697 fwk = CmdLineFwk()
698 taskFactory = AddTaskFactoryMock(stopAt=3)
700 qgraph = fwk.makeGraph(self.pipeline, args)
701 self.assertEqual(len(qgraph), self.nQuanta)
703 # Ensure that the output run used in the graph is also used in
704 # the pipeline execution. It is possible for makeGraph and runPipeline
705 # to calculate time-stamped runs across a second boundary.
706 args.output_run = qgraph.metadata["output_run"]
708 # run first three quanta
709 with self.assertRaises(MPGraphExecutorError):
710 fwk.runPipeline(qgraph, taskFactory, args)
711 self.assertEqual(taskFactory.countExec, 3)
713 butler.registry.refresh()
715 # drop one of the two outputs from one task
716 ref1 = butler.registry.findDataset(
717 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0
718 )
719 self.assertIsNotNone(ref1)
720 # also drop the metadata output
721 ref2 = butler.registry.findDataset(
722 "task1_metadata", collections=args.output, instrument="INSTR", detector=0
723 )
724 self.assertIsNotNone(ref2)
725 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
727 # Ensure that the output run used in the graph is also used in
728 # the pipeline execution. It is possible for makeGraph and runPipeline
729 # to calculate time-stamped runs across a second boundary.
730 args.output_run = qgraph.metadata["output_run"]
732 taskFactory.stopAt = -1
733 args.skip_existing_in = (args.output,)
734 args.extend_run = True
735 args.no_versions = True
736 with self.assertRaises(MPGraphExecutorError):
737 fwk.runPipeline(qgraph, taskFactory, args)
739 def testSimpleQGraphClobberOutputs(self):
740 """Test continuing execution of trivial quantum graph with
741 --clobber-outputs.
742 """
743 args = _makeArgs(butler_config=self.root, input="test", output="output")
744 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
745 populateButler(self.pipeline, butler)
747 fwk = CmdLineFwk()
748 taskFactory = AddTaskFactoryMock(stopAt=3)
750 qgraph = fwk.makeGraph(self.pipeline, args)
752 # should have one task and number of quanta
753 self.assertEqual(len(qgraph), self.nQuanta)
755 # Ensure that the output run used in the graph is also used in
756 # the pipeline execution. It is possible for makeGraph and runPipeline
757 # to calculate time-stamped runs across a second boundary.
758 args.output_run = qgraph.metadata["output_run"]
760 # run first three quanta
761 with self.assertRaises(MPGraphExecutorError):
762 fwk.runPipeline(qgraph, taskFactory, args)
763 self.assertEqual(taskFactory.countExec, 3)
765 butler.registry.refresh()
767 # drop one of the two outputs from one task
768 ref1 = butler.registry.findDataset(
769 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
770 )
771 self.assertIsNotNone(ref1)
772 # also drop the metadata output
773 ref2 = butler.registry.findDataset(
774 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
775 )
776 self.assertIsNotNone(ref2)
777 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
779 taskFactory.stopAt = -1
780 args.skip_existing = True
781 args.extend_run = True
782 args.clobber_outputs = True
783 args.no_versions = True
784 fwk.runPipeline(qgraph, taskFactory, args)
785 # number of executed quanta is incremented
786 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
788 def testSimpleQGraphReplaceRun(self):
789 """Test repeated execution of trivial quantum graph with
790 --replace-run.
791 """
792 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
793 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
794 populateButler(self.pipeline, butler)
796 fwk = CmdLineFwk()
797 taskFactory = AddTaskFactoryMock()
799 qgraph = fwk.makeGraph(self.pipeline, args)
801 # should have one task and number of quanta
802 self.assertEqual(len(qgraph), self.nQuanta)
804 # deep copy is needed because quanta are updated in place
805 fwk.runPipeline(qgraph, taskFactory, args)
806 self.assertEqual(taskFactory.countExec, self.nQuanta)
808 # need to refresh collections explicitly (or make new butler/registry)
809 butler.registry.refresh()
810 collections = set(butler.registry.queryCollections(...))
811 self.assertEqual(collections, {"test", "output", "output/run1"})
813 # number of datasets written by pipeline:
814 # - nQuanta of init_outputs
815 # - nQuanta of configs
816 # - packages (single dataset)
817 # - nQuanta * two output datasets
818 # - nQuanta of metadata
819 # - nQuanta of log output
820 n_outputs = self.nQuanta * 6 + 1
821 refs = butler.registry.queryDatasets(..., collections="output/run1")
822 self.assertEqual(len(list(refs)), n_outputs)
824 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
825 # changed)
826 args.replace_run = True
827 args.output_run = "output/run2"
828 qgraph = fwk.makeGraph(self.pipeline, args)
829 fwk.runPipeline(qgraph, taskFactory, args)
831 butler.registry.refresh()
832 collections = set(butler.registry.queryCollections(...))
833 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
835 # new output collection
836 refs = butler.registry.queryDatasets(..., collections="output/run2")
837 self.assertEqual(len(list(refs)), n_outputs)
839 # old output collection is still there
840 refs = butler.registry.queryDatasets(..., collections="output/run1")
841 self.assertEqual(len(list(refs)), n_outputs)
843 # re-run with --replace-run and --prune-replaced=unstore
844 args.replace_run = True
845 args.prune_replaced = "unstore"
846 args.output_run = "output/run3"
847 qgraph = fwk.makeGraph(self.pipeline, args)
848 fwk.runPipeline(qgraph, taskFactory, args)
850 butler.registry.refresh()
851 collections = set(butler.registry.queryCollections(...))
852 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
854 # new output collection
855 refs = butler.registry.queryDatasets(..., collections="output/run3")
856 self.assertEqual(len(list(refs)), n_outputs)
858 # old output collection is still there, and it has all datasets but
859 # non-InitOutputs are not in datastore
860 refs = butler.registry.queryDatasets(..., collections="output/run2")
861 refs = list(refs)
862 self.assertEqual(len(refs), n_outputs)
863 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
864 for ref in refs:
865 if initOutNameRe.fullmatch(ref.datasetType.name):
866 butler.get(ref)
867 else:
868 with self.assertRaises(FileNotFoundError):
869 butler.get(ref)
871 # re-run with --replace-run and --prune-replaced=purge
872 # This time also remove --input; passing the same inputs that we
873 # started with and not passing inputs at all should be equivalent.
874 args.input = None
875 args.replace_run = True
876 args.prune_replaced = "purge"
877 args.output_run = "output/run4"
878 qgraph = fwk.makeGraph(self.pipeline, args)
879 fwk.runPipeline(qgraph, taskFactory, args)
881 butler.registry.refresh()
882 collections = set(butler.registry.queryCollections(...))
883 # output/run3 should disappear now
884 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
886 # new output collection
887 refs = butler.registry.queryDatasets(..., collections="output/run4")
888 self.assertEqual(len(list(refs)), n_outputs)
890 # Trying to run again with inputs that aren't exactly what we started
891 # with is an error, and the kind that should not modify the data repo.
892 with self.assertRaises(ValueError):
893 args.input = ["test", "output/run2"]
894 args.prune_replaced = None
895 args.replace_run = True
896 args.output_run = "output/run5"
897 qgraph = fwk.makeGraph(self.pipeline, args)
898 fwk.runPipeline(qgraph, taskFactory, args)
899 butler.registry.refresh()
900 collections = set(butler.registry.queryCollections(...))
901 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
902 with self.assertRaises(ValueError):
903 args.input = ["output/run2", "test"]
904 args.prune_replaced = None
905 args.replace_run = True
906 args.output_run = "output/run6"
907 qgraph = fwk.makeGraph(self.pipeline, args)
908 fwk.runPipeline(qgraph, taskFactory, args)
909 butler.registry.refresh()
910 collections = set(butler.registry.queryCollections(...))
911 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
913 def testSubgraph(self):
914 """Test successful execution of trivial quantum graph."""
915 args = _makeArgs(butler_config=self.root, input="test", output="output")
916 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
917 populateButler(self.pipeline, butler)
919 fwk = CmdLineFwk()
920 qgraph = fwk.makeGraph(self.pipeline, args)
922 # Select first two nodes for execution. This depends on node ordering
923 # which I assume is the same as execution order.
924 nNodes = 2
925 nodeIds = [node.nodeId for node in qgraph]
926 nodeIds = nodeIds[:nNodes]
928 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
929 self.assertEqual(len(qgraph), self.nQuanta)
931 with (
932 makeTmpFile(suffix=".qgraph") as tmpname,
933 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
934 ):
935 with open(tmpname, "wb") as saveFile:
936 qgraph.save(saveFile)
938 args = _makeArgs(
939 qgraph=tmpname,
940 qgraph_node_id=nodeIds,
941 registryConfig=registryConfig,
942 execution_butler_location=None,
943 )
944 fwk = CmdLineFwk()
946 # load graph, should only read a subset
947 qgraph = fwk.makeGraph(pipeline=None, args=args)
948 self.assertEqual(len(qgraph), nNodes)
950 def testShowGraph(self):
951 """Test for --show options for quantum graph."""
952 nQuanta = 2
953 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
955 show = ShowInfo(["graph"])
956 show.show_graph_info(qgraph)
957 self.assertEqual(show.handled, {"graph"})
959 def testShowGraphWorkflow(self):
960 nQuanta = 2
961 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
963 show = ShowInfo(["workflow"])
964 show.show_graph_info(qgraph)
965 self.assertEqual(show.handled, {"workflow"})
967 # TODO: cannot test "uri" option presently, it instantiates
968 # butler from command line options and there is no way to pass butler
969 # mock to that code.
970 show = ShowInfo(["uri"])
971 with self.assertRaises(ValueError): # No args given
972 show.show_graph_info(qgraph)
974 def testSimpleQGraphDatastoreRecords(self):
975 """Test quantum graph generation with --qgraph-datastore-records."""
976 args = _makeArgs(
977 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
978 )
979 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
980 populateButler(self.pipeline, butler)
982 fwk = CmdLineFwk()
983 qgraph = fwk.makeGraph(self.pipeline, args)
984 self.assertEqual(len(qgraph), self.nQuanta)
985 for i, qnode in enumerate(qgraph):
986 quantum = qnode.quantum
987 self.assertIsNotNone(quantum.datastore_records)
988 # only the first quantum has a pre-existing input
989 if i == 0:
990 datastore_name = "FileDatastore@<butlerRoot>"
991 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
992 records_data = quantum.datastore_records[datastore_name]
993 records = dict(records_data.records)
994 self.assertEqual(len(records), 1)
995 _, records = records.popitem()
996 records = records["file_datastore_records"]
997 self.assertEqual(
998 [record.path for record in records],
999 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1000 )
1001 else:
1002 self.assertEqual(quantum.datastore_records, {})
1005class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1006 """General file leak detection."""
1009def setup_module(module):
1010 """Initialize pytest module."""
1011 lsst.utils.tests.init()
1014if __name__ == "__main__":
1015 lsst.utils.tests.init()
1016 unittest.main()