Coverage for tests/test_cmdLineFwk.py: 13%
515 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-25 09:44 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-25 09:44 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Simple unit test for cmdLineFwk module.
23"""
25import contextlib
26import logging
27import os
28import pickle
29import re
30import shutil
31import tempfile
32import unittest
33from dataclasses import dataclass
34from io import StringIO
35from types import SimpleNamespace
36from typing import NamedTuple
38import astropy.units as u
39import click
40import lsst.pex.config as pexConfig
41import lsst.pipe.base.connectionTypes as cT
42import lsst.utils.tests
43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
44from lsst.ctrl.mpexec.cli.opt import run_options
45from lsst.ctrl.mpexec.cli.utils import (
46 _ACTION_ADD_INSTRUMENT,
47 _ACTION_ADD_TASK,
48 _ACTION_CONFIG,
49 _ACTION_CONFIG_FILE,
50 PipetaskCommand,
51)
52from lsst.ctrl.mpexec.showInfo import ShowInfo
53from lsst.daf.butler import (
54 Config,
55 DataCoordinate,
56 DatasetRef,
57 DimensionConfig,
58 DimensionUniverse,
59 Quantum,
60 Registry,
61)
62from lsst.daf.butler.core.datasets.type import DatasetType
63from lsst.daf.butler.registry import RegistryConfig
64from lsst.pipe.base import (
65 Instrument,
66 Pipeline,
67 PipelineTaskConfig,
68 PipelineTaskConnections,
69 QuantumGraph,
70 TaskDef,
71)
72from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
73from lsst.pipe.base.script import transfer_from_graph
74from lsst.pipe.base.tests.simpleQGraph import (
75 AddTask,
76 AddTaskFactoryMock,
77 makeSimpleButler,
78 makeSimplePipeline,
79 makeSimpleQGraph,
80 populateButler,
81)
82from lsst.utils.tests import temporaryDirectory
84logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
86# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
87# instrument from registry, these tests can run fine without actual instrument
88# and implementing full mock for Instrument is too complicated.
89Instrument.fromName = lambda name, reg: None 89 ↛ exitline 89 didn't run the lambda on line 89
92@contextlib.contextmanager
93def makeTmpFile(contents=None, suffix=None):
94 """Context manager for generating temporary file name.
96 Temporary file is deleted on exiting context.
98 Parameters
99 ----------
100 contents : `bytes`
101 Data to write into a file.
102 """
103 fd, tmpname = tempfile.mkstemp(suffix=suffix)
104 if contents:
105 os.write(fd, contents)
106 os.close(fd)
107 yield tmpname
108 with contextlib.suppress(OSError):
109 os.remove(tmpname)
112@contextlib.contextmanager
113def makeSQLiteRegistry(create=True, universe=None):
114 """Context manager to create new empty registry database.
116 Yields
117 ------
118 config : `RegistryConfig`
119 Registry configuration for initialized registry database.
120 """
121 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
122 with temporaryDirectory() as tmpdir:
123 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
124 config = RegistryConfig()
125 config["db"] = uri
126 if create:
127 Registry.createFromConfig(config, dimensionConfig=dimensionConfig)
128 yield config
131class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
132 """Test connection class."""
134 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
137class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
138 """Test pipeline config."""
140 field = pexConfig.Field(dtype=str, doc="arbitrary string")
142 def setDefaults(self):
143 PipelineTaskConfig.setDefaults(self)
146def _makeArgs(registryConfig=None, **kwargs):
147 """Return parsed command line arguments.
149 By default butler_config is set to `Config` populated with some defaults,
150 it can be overridden completely by keyword argument.
152 Parameters
153 ----------
154 cmd : `str`, optional
155 Produce arguments for this pipetask command.
156 registryConfig : `RegistryConfig`, optional
157 Override for registry configuration.
158 **kwargs
159 Overrides for other arguments.
160 """
161 # Use a mock to get the default value of arguments to 'run'.
163 mock = unittest.mock.Mock()
165 @click.command(cls=PipetaskCommand)
166 @run_options()
167 def fake_run(ctx, **kwargs):
168 """Fake "pipetask run" command for gathering input arguments.
170 The arguments & options should always match the arguments & options in
171 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
172 """
173 mock(**kwargs)
175 runner = click.testing.CliRunner()
176 # --butler-config is the only required option
177 result = runner.invoke(fake_run, "--butler-config /")
178 if result.exit_code != 0:
179 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
180 mock.assert_called_once()
181 args = mock.call_args[1]
182 args["enableLsstDebug"] = args.pop("debug")
183 args["execution_butler_location"] = args.pop("save_execution_butler")
184 if "pipeline_actions" not in args:
185 args["pipeline_actions"] = []
186 args = SimpleNamespace(**args)
188 # override butler_config with our defaults
189 if "butler_config" not in kwargs:
190 args.butler_config = Config()
191 if registryConfig:
192 args.butler_config["registry"] = registryConfig
193 # The default datastore has a relocatable root, so we need to specify
194 # some root here for it to use
195 args.butler_config.configFile = "."
197 # override arguments from keyword parameters
198 for key, value in kwargs.items():
199 setattr(args, key, value)
200 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
201 return args
204class FakeDSType(NamedTuple):
205 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
207 name: str
210@dataclass(frozen=True)
211class FakeDSRef:
212 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
214 datasetType: str
215 dataId: tuple
217 def isComponent(self):
218 return False
221# Task class name used by tests, needs to be importable
222_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
225def _makeDimensionConfig():
226 """Make a simple dimension universe configuration."""
227 return DimensionConfig(
228 {
229 "version": 1,
230 "namespace": "ctrl_mpexec_test",
231 "skypix": {
232 "common": "htm7",
233 "htm": {
234 "class": "lsst.sphgeom.HtmPixelization",
235 "max_level": 24,
236 },
237 },
238 "elements": {
239 "A": {
240 "keys": [
241 {
242 "name": "id",
243 "type": "int",
244 }
245 ],
246 "storage": {
247 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
248 },
249 },
250 "B": {
251 "keys": [
252 {
253 "name": "id",
254 "type": "int",
255 }
256 ],
257 "storage": {
258 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
259 },
260 },
261 },
262 "packers": {},
263 }
264 )
267def _makeQGraph():
268 """Make a trivial QuantumGraph with one quantum.
270 The only thing that we need to do with this quantum graph is to pickle
271 it, the quanta in this graph are not usable for anything else.
273 Returns
274 -------
275 qgraph : `~lsst.pipe.base.QuantumGraph`
276 """
277 universe = DimensionUniverse(config=_makeDimensionConfig())
278 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe)
279 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
280 quanta = [
281 Quantum(
282 taskName=_TASK_CLASS,
283 inputs={
284 fakeDSType: [
285 DatasetRef(
286 fakeDSType,
287 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
288 run="fake_run",
289 )
290 ]
291 },
292 )
293 ] # type: ignore
294 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
295 return qgraph
298class CmdLineFwkTestCase(unittest.TestCase):
299 """A test case for CmdLineFwk"""
301 def testMakePipeline(self):
302 """Tests for CmdLineFwk.makePipeline method"""
303 fwk = CmdLineFwk()
305 # make empty pipeline
306 args = _makeArgs()
307 pipeline = fwk.makePipeline(args)
308 self.assertIsInstance(pipeline, Pipeline)
309 self.assertEqual(len(pipeline), 0)
311 # few tests with serialization
312 with makeTmpFile() as tmpname:
313 # make empty pipeline and store it in a file
314 args = _makeArgs(save_pipeline=tmpname)
315 pipeline = fwk.makePipeline(args)
316 self.assertIsInstance(pipeline, Pipeline)
318 # read pipeline from a file
319 args = _makeArgs(pipeline=tmpname)
320 pipeline = fwk.makePipeline(args)
321 self.assertIsInstance(pipeline, Pipeline)
322 self.assertEqual(len(pipeline), 0)
324 # single task pipeline, task name can be anything here
325 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
326 args = _makeArgs(pipeline_actions=actions)
327 pipeline = fwk.makePipeline(args)
328 self.assertIsInstance(pipeline, Pipeline)
329 self.assertEqual(len(pipeline), 1)
331 # many task pipeline
332 actions = [
333 _ACTION_ADD_TASK("TaskOne:task1a"),
334 _ACTION_ADD_TASK("TaskTwo:task2"),
335 _ACTION_ADD_TASK("TaskOne:task1b"),
336 ]
337 args = _makeArgs(pipeline_actions=actions)
338 pipeline = fwk.makePipeline(args)
339 self.assertIsInstance(pipeline, Pipeline)
340 self.assertEqual(len(pipeline), 3)
342 # single task pipeline with config overrides, need real task class
343 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
344 args = _makeArgs(pipeline_actions=actions)
345 pipeline = fwk.makePipeline(args)
346 taskDefs = list(pipeline.toExpandedPipeline())
347 self.assertEqual(len(taskDefs), 1)
348 self.assertEqual(taskDefs[0].config.addend, 100)
350 overrides = b"config.addend = 1000\n"
351 with makeTmpFile(overrides) as tmpname:
352 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
353 args = _makeArgs(pipeline_actions=actions)
354 pipeline = fwk.makePipeline(args)
355 taskDefs = list(pipeline.toExpandedPipeline())
356 self.assertEqual(len(taskDefs), 1)
357 self.assertEqual(taskDefs[0].config.addend, 1000)
359 # Check --instrument option, for now it only checks that it does not
360 # crash.
361 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
362 args = _makeArgs(pipeline_actions=actions)
363 pipeline = fwk.makePipeline(args)
365 def testMakeGraphFromSave(self):
366 """Tests for CmdLineFwk.makeGraph method.
368 Only most trivial case is tested that does not do actual graph
369 building.
370 """
371 fwk = CmdLineFwk()
373 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
374 # make non-empty graph and store it in a file
375 qgraph = _makeQGraph()
376 with open(tmpname, "wb") as saveFile:
377 qgraph.save(saveFile)
378 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
379 qgraph = fwk.makeGraph(None, args)
380 self.assertIsInstance(qgraph, QuantumGraph)
381 self.assertEqual(len(qgraph), 1)
383 # will fail if graph id does not match
384 args = _makeArgs(
385 qgraph=tmpname,
386 qgraph_id="R2-D2 is that you?",
387 registryConfig=registryConfig,
388 execution_butler_location=None,
389 )
390 with self.assertRaisesRegex(ValueError, "graphID does not match"):
391 fwk.makeGraph(None, args)
393 # save with wrong object type
394 with open(tmpname, "wb") as saveFile:
395 pickle.dump({}, saveFile)
396 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
397 with self.assertRaises(ValueError):
398 fwk.makeGraph(None, args)
400 # reading empty graph from pickle should work but makeGraph()
401 # will return None.
402 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig()))
403 with open(tmpname, "wb") as saveFile:
404 qgraph.save(saveFile)
405 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
406 qgraph = fwk.makeGraph(None, args)
407 self.assertIs(qgraph, None)
409 def testShowPipeline(self):
410 """Test for --show options for pipeline."""
411 fwk = CmdLineFwk()
413 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
414 args = _makeArgs(pipeline_actions=actions)
415 pipeline = fwk.makePipeline(args)
417 with self.assertRaises(ValueError):
418 ShowInfo(["unrecognized", "config"])
420 stream = StringIO()
421 show = ShowInfo(
422 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
423 stream=stream,
424 )
425 show.show_pipeline_info(pipeline)
426 self.assertEqual(show.unhandled, frozenset({}))
427 stream.seek(0)
428 output = stream.read()
429 self.assertIn("config.addend=100", output) # config option
430 self.assertIn("addend\n3", output) # History output
431 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
433 show = ShowInfo(["pipeline", "uri"], stream=stream)
434 show.show_pipeline_info(pipeline)
435 self.assertEqual(show.unhandled, frozenset({"uri"}))
436 self.assertEqual(show.handled, {"pipeline"})
438 stream = StringIO()
439 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
440 show.show_pipeline_info(pipeline)
441 stream.seek(0)
442 output = stream.read().strip()
443 self.assertEqual("### Configuration for task `task'", output)
445 stream = StringIO()
446 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
447 show.show_pipeline_info(pipeline)
448 stream.seek(0)
449 output = stream.read().strip()
450 self.assertEqual("### Configuration for task `task'", output)
452 stream = StringIO()
453 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
454 show.show_pipeline_info(pipeline)
455 stream.seek(0)
456 output = stream.read().strip()
457 self.assertIn("NOIGNORECASE", output)
459 show = ShowInfo(["dump-config=notask"])
460 with self.assertRaises(ValueError) as cm:
461 show.show_pipeline_info(pipeline)
462 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
464 show = ShowInfo(["history"])
465 with self.assertRaises(ValueError) as cm:
466 show.show_pipeline_info(pipeline)
467 self.assertIn("Please provide a value", str(cm.exception))
469 show = ShowInfo(["history=notask::param"])
470 with self.assertRaises(ValueError) as cm:
471 show.show_pipeline_info(pipeline)
472 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
474 def test_execution_resources_parameters(self) -> None:
475 """Test creation of the ExecutionResources from command line."""
476 fwk = CmdLineFwk()
478 for params, num_cores, max_mem in (
479 ((None, None), 1, None),
480 ((5, ""), 5, None),
481 ((None, "50"), 1, 50 * u.MB),
482 ((5, "50 GB"), 5, 50 * u.GB),
483 ):
484 kwargs = {}
485 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True):
486 if v is not None:
487 kwargs[k] = v
488 args = _makeArgs(**kwargs)
489 res = fwk._make_execution_resources(args)
490 self.assertEqual(res.num_cores, num_cores)
491 self.assertEqual(res.max_mem, max_mem)
493 args = _makeArgs(memory_per_quantum="50m")
494 with self.assertRaises(u.UnitConversionError):
495 fwk._make_execution_resources(args)
498class CmdLineFwkTestCaseWithButler(unittest.TestCase):
499 """A test case for CmdLineFwk"""
501 def setUp(self):
502 super().setUpClass()
503 self.root = tempfile.mkdtemp()
504 self.nQuanta = 5
505 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
507 def tearDown(self):
508 shutil.rmtree(self.root, ignore_errors=True)
509 super().tearDownClass()
511 def testSimpleQGraph(self):
512 """Test successfull execution of trivial quantum graph."""
513 args = _makeArgs(butler_config=self.root, input="test", output="output")
514 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
515 populateButler(self.pipeline, butler)
517 fwk = CmdLineFwk()
518 taskFactory = AddTaskFactoryMock()
520 qgraph = fwk.makeGraph(self.pipeline, args)
521 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
522 self.assertEqual(len(qgraph), self.nQuanta)
524 # Ensure that the output run used in the graph is also used in
525 # the pipeline execution. It is possible for makeGraph and runPipeline
526 # to calculate time-stamped runs across a second boundary.
527 args.output_run = qgraph.metadata["output_run"]
529 # run whole thing
530 fwk.runPipeline(qgraph, taskFactory, args)
531 self.assertEqual(taskFactory.countExec, self.nQuanta)
533 # test that we've disabled implicit threading
534 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
536 def test_simple_qgraph_qbb(self):
537 """Test successful execution of trivial quantum graph in QBB mode."""
538 args = _makeArgs(
539 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
540 )
541 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
542 populateButler(self.pipeline, butler)
544 fwk = CmdLineFwk()
545 taskFactory = AddTaskFactoryMock()
547 qgraph = fwk.makeGraph(self.pipeline, args)
548 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
549 self.assertEqual(len(qgraph), self.nQuanta)
551 # Ensure that the output run used in the graph is also used in
552 # the pipeline execution. It is possible for makeGraph and runPipeline
553 # to calculate time-stamped runs across a second boundary.
554 output_run = qgraph.metadata["output_run"]
555 args.output_run = output_run
557 # QBB must run from serialized graph.
558 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
559 qgraph.saveUri(temp_graph.name)
561 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
563 # Check that pre-exec-init can run.
564 fwk.preExecInitQBB(taskFactory, args)
566 # Run whole thing.
567 fwk.runGraphQBB(taskFactory, args)
569 # Transfer the datasets to the butler.
570 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
571 self.assertEqual(n1, 31)
573 self.assertEqual(taskFactory.countExec, self.nQuanta)
575 # Update the output run and try again.
576 new_output_run = output_run + "_new"
577 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True)
578 self.assertEqual(qgraph.metadata["output_run"], new_output_run)
580 taskFactory = AddTaskFactoryMock()
581 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
582 qgraph.saveUri(temp_graph.name)
584 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
586 # Check that pre-exec-init can run.
587 fwk.preExecInitQBB(taskFactory, args)
589 # Run whole thing.
590 fwk.runGraphQBB(taskFactory, args)
592 # Transfer the datasets to the butler.
593 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
594 self.assertEqual(n1, n2)
596 def testEmptyQGraph(self):
597 """Test that making an empty QG produces the right error messages."""
598 # We make QG generation fail by populating one input collection in the
599 # butler while using a different one (that we only register, not
600 # populate) to make the QG.
601 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
602 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
603 butler.registry.registerCollection("bad_input")
604 populateButler(self.pipeline, butler)
606 fwk = CmdLineFwk()
607 with self.assertLogs(level=logging.CRITICAL) as cm:
608 qgraph = fwk.makeGraph(self.pipeline, args)
609 self.assertRegex(
610 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
611 )
612 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
613 self.assertIsNone(qgraph)
615 def testSimpleQGraphNoSkipExisting_inputs(self):
616 """Test for case when output data for one task already appears in
617 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
618 option is present.
619 """
620 args = _makeArgs(
621 butler_config=self.root,
622 input="test",
623 output="output",
624 )
625 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
626 populateButler(
627 self.pipeline,
628 butler,
629 datasetTypes={
630 args.input: [
631 "add_dataset0",
632 "add_dataset1",
633 "add2_dataset1",
634 "add_init_output1",
635 "task0_config",
636 "task0_metadata",
637 "task0_log",
638 ]
639 },
640 )
642 fwk = CmdLineFwk()
643 taskFactory = AddTaskFactoryMock()
645 qgraph = fwk.makeGraph(self.pipeline, args)
646 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
647 # With current implementation graph has all nQuanta quanta, but when
648 # executing one quantum is skipped.
649 self.assertEqual(len(qgraph), self.nQuanta)
651 # Ensure that the output run used in the graph is also used in
652 # the pipeline execution. It is possible for makeGraph and runPipeline
653 # to calculate time-stamped runs across a second boundary.
654 args.output_run = qgraph.metadata["output_run"]
656 # run whole thing
657 fwk.runPipeline(qgraph, taskFactory, args)
658 self.assertEqual(taskFactory.countExec, self.nQuanta)
660 def testSimpleQGraphSkipExisting_inputs(self):
661 """Test for ``--skip-existing`` with output data for one task already
662 appears in _input_ collection. No ``--extend-run`` option is needed
663 for this case.
664 """
665 args = _makeArgs(
666 butler_config=self.root,
667 input="test",
668 output="output",
669 skip_existing_in=("test",),
670 )
671 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
672 populateButler(
673 self.pipeline,
674 butler,
675 datasetTypes={
676 args.input: [
677 "add_dataset0",
678 "add_dataset1",
679 "add2_dataset1",
680 "add_init_output1",
681 "task0_config",
682 "task0_metadata",
683 "task0_log",
684 ]
685 },
686 )
688 fwk = CmdLineFwk()
689 taskFactory = AddTaskFactoryMock()
691 qgraph = fwk.makeGraph(self.pipeline, args)
692 # If all quanta are skipped, the task is not included in the graph.
693 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
694 self.assertEqual(len(qgraph), self.nQuanta - 1)
696 # Ensure that the output run used in the graph is also used in
697 # the pipeline execution. It is possible for makeGraph and runPipeline
698 # to calculate time-stamped runs across a second boundary.
699 args.output_run = qgraph.metadata["output_run"]
701 # run whole thing
702 fwk.runPipeline(qgraph, taskFactory, args)
703 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
705 def testSimpleQGraphSkipExisting_outputs(self):
706 """Test for ``--skip-existing`` with output data for one task already
707 appears in _output_ collection. The ``--extend-run`` option is needed
708 for this case.
709 """
710 args = _makeArgs(
711 butler_config=self.root,
712 input="test",
713 output_run="output/run",
714 skip_existing_in=("output/run",),
715 )
716 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
717 populateButler(
718 self.pipeline,
719 butler,
720 datasetTypes={
721 args.input: ["add_dataset0"],
722 args.output_run: [
723 "add_dataset1",
724 "add2_dataset1",
725 "add_init_output1",
726 "task0_metadata",
727 "task0_log",
728 "task0_config",
729 ],
730 },
731 )
733 fwk = CmdLineFwk()
734 taskFactory = AddTaskFactoryMock()
736 # fails without --extend-run
737 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
738 qgraph = fwk.makeGraph(self.pipeline, args)
740 # retry with --extend-run
741 args.extend_run = True
742 qgraph = fwk.makeGraph(self.pipeline, args)
744 # First task has no remaining quanta, so is left out completely.
745 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
746 # Graph does not include quantum for first task.
747 self.assertEqual(len(qgraph), self.nQuanta - 1)
749 # run whole thing
750 fwk.runPipeline(qgraph, taskFactory, args)
751 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
753 def testSimpleQGraphOutputsFail(self):
754 """Test continuing execution of trivial quantum graph with partial
755 outputs.
756 """
757 args = _makeArgs(butler_config=self.root, input="test", output="output")
758 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
759 populateButler(self.pipeline, butler)
761 fwk = CmdLineFwk()
762 taskFactory = AddTaskFactoryMock(stopAt=3)
764 qgraph = fwk.makeGraph(self.pipeline, args)
765 self.assertEqual(len(qgraph), self.nQuanta)
767 # Ensure that the output run used in the graph is also used in
768 # the pipeline execution. It is possible for makeGraph and runPipeline
769 # to calculate time-stamped runs across a second boundary.
770 args.output_run = qgraph.metadata["output_run"]
772 # run first three quanta
773 with self.assertRaises(MPGraphExecutorError):
774 fwk.runPipeline(qgraph, taskFactory, args)
775 self.assertEqual(taskFactory.countExec, 3)
777 butler.registry.refresh()
779 # drop one of the two outputs from one task
780 ref1 = butler.registry.findDataset(
781 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0
782 )
783 self.assertIsNotNone(ref1)
784 # also drop the metadata output
785 ref2 = butler.registry.findDataset(
786 "task1_metadata", collections=args.output, instrument="INSTR", detector=0
787 )
788 self.assertIsNotNone(ref2)
789 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
791 # Ensure that the output run used in the graph is also used in
792 # the pipeline execution. It is possible for makeGraph and runPipeline
793 # to calculate time-stamped runs across a second boundary.
794 args.output_run = qgraph.metadata["output_run"]
796 taskFactory.stopAt = -1
797 args.skip_existing_in = (args.output,)
798 args.extend_run = True
799 args.no_versions = True
800 with self.assertRaises(MPGraphExecutorError):
801 fwk.runPipeline(qgraph, taskFactory, args)
803 def testSimpleQGraphClobberOutputs(self):
804 """Test continuing execution of trivial quantum graph with
805 --clobber-outputs.
806 """
807 args = _makeArgs(butler_config=self.root, input="test", output="output")
808 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
809 populateButler(self.pipeline, butler)
811 fwk = CmdLineFwk()
812 taskFactory = AddTaskFactoryMock(stopAt=3)
814 qgraph = fwk.makeGraph(self.pipeline, args)
816 # should have one task and number of quanta
817 self.assertEqual(len(qgraph), self.nQuanta)
819 # Ensure that the output run used in the graph is also used in
820 # the pipeline execution. It is possible for makeGraph and runPipeline
821 # to calculate time-stamped runs across a second boundary.
822 args.output_run = qgraph.metadata["output_run"]
824 # run first three quanta
825 with self.assertRaises(MPGraphExecutorError):
826 fwk.runPipeline(qgraph, taskFactory, args)
827 self.assertEqual(taskFactory.countExec, 3)
829 butler.registry.refresh()
831 # drop one of the two outputs from one task
832 ref1 = butler.registry.findDataset(
833 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
834 )
835 self.assertIsNotNone(ref1)
836 # also drop the metadata output
837 ref2 = butler.registry.findDataset(
838 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
839 )
840 self.assertIsNotNone(ref2)
841 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
843 taskFactory.stopAt = -1
844 args.skip_existing = True
845 args.extend_run = True
846 args.clobber_outputs = True
847 args.no_versions = True
848 fwk.runPipeline(qgraph, taskFactory, args)
849 # number of executed quanta is incremented
850 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
852 def testSimpleQGraphReplaceRun(self):
853 """Test repeated execution of trivial quantum graph with
854 --replace-run.
855 """
856 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
857 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
858 populateButler(self.pipeline, butler)
860 fwk = CmdLineFwk()
861 taskFactory = AddTaskFactoryMock()
863 qgraph = fwk.makeGraph(self.pipeline, args)
865 # should have one task and number of quanta
866 self.assertEqual(len(qgraph), self.nQuanta)
868 # deep copy is needed because quanta are updated in place
869 fwk.runPipeline(qgraph, taskFactory, args)
870 self.assertEqual(taskFactory.countExec, self.nQuanta)
872 # need to refresh collections explicitly (or make new butler/registry)
873 butler.registry.refresh()
874 collections = set(butler.registry.queryCollections(...))
875 self.assertEqual(collections, {"test", "output", "output/run1"})
877 # number of datasets written by pipeline:
878 # - nQuanta of init_outputs
879 # - nQuanta of configs
880 # - packages (single dataset)
881 # - nQuanta * two output datasets
882 # - nQuanta of metadata
883 # - nQuanta of log output
884 n_outputs = self.nQuanta * 6 + 1
885 refs = butler.registry.queryDatasets(..., collections="output/run1")
886 self.assertEqual(len(list(refs)), n_outputs)
888 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
889 # changed)
890 args.replace_run = True
891 args.output_run = "output/run2"
892 qgraph = fwk.makeGraph(self.pipeline, args)
893 fwk.runPipeline(qgraph, taskFactory, args)
895 butler.registry.refresh()
896 collections = set(butler.registry.queryCollections(...))
897 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
899 # new output collection
900 refs = butler.registry.queryDatasets(..., collections="output/run2")
901 self.assertEqual(len(list(refs)), n_outputs)
903 # old output collection is still there
904 refs = butler.registry.queryDatasets(..., collections="output/run1")
905 self.assertEqual(len(list(refs)), n_outputs)
907 # re-run with --replace-run and --prune-replaced=unstore
908 args.replace_run = True
909 args.prune_replaced = "unstore"
910 args.output_run = "output/run3"
911 qgraph = fwk.makeGraph(self.pipeline, args)
912 fwk.runPipeline(qgraph, taskFactory, args)
914 butler.registry.refresh()
915 collections = set(butler.registry.queryCollections(...))
916 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
918 # new output collection
919 refs = butler.registry.queryDatasets(..., collections="output/run3")
920 self.assertEqual(len(list(refs)), n_outputs)
922 # old output collection is still there, and it has all datasets but
923 # non-InitOutputs are not in datastore
924 refs = butler.registry.queryDatasets(..., collections="output/run2")
925 refs = list(refs)
926 self.assertEqual(len(refs), n_outputs)
927 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
928 for ref in refs:
929 if initOutNameRe.fullmatch(ref.datasetType.name):
930 butler.get(ref)
931 else:
932 with self.assertRaises(FileNotFoundError):
933 butler.get(ref)
935 # re-run with --replace-run and --prune-replaced=purge
936 # This time also remove --input; passing the same inputs that we
937 # started with and not passing inputs at all should be equivalent.
938 args.input = None
939 args.replace_run = True
940 args.prune_replaced = "purge"
941 args.output_run = "output/run4"
942 qgraph = fwk.makeGraph(self.pipeline, args)
943 fwk.runPipeline(qgraph, taskFactory, args)
945 butler.registry.refresh()
946 collections = set(butler.registry.queryCollections(...))
947 # output/run3 should disappear now
948 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
950 # new output collection
951 refs = butler.registry.queryDatasets(..., collections="output/run4")
952 self.assertEqual(len(list(refs)), n_outputs)
954 # Trying to run again with inputs that aren't exactly what we started
955 # with is an error, and the kind that should not modify the data repo.
956 with self.assertRaises(ValueError):
957 args.input = ["test", "output/run2"]
958 args.prune_replaced = None
959 args.replace_run = True
960 args.output_run = "output/run5"
961 qgraph = fwk.makeGraph(self.pipeline, args)
962 fwk.runPipeline(qgraph, taskFactory, args)
963 butler.registry.refresh()
964 collections = set(butler.registry.queryCollections(...))
965 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
966 with self.assertRaises(ValueError):
967 args.input = ["output/run2", "test"]
968 args.prune_replaced = None
969 args.replace_run = True
970 args.output_run = "output/run6"
971 qgraph = fwk.makeGraph(self.pipeline, args)
972 fwk.runPipeline(qgraph, taskFactory, args)
973 butler.registry.refresh()
974 collections = set(butler.registry.queryCollections(...))
975 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
977 def testSubgraph(self):
978 """Test successful execution of trivial quantum graph."""
979 args = _makeArgs(butler_config=self.root, input="test", output="output")
980 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
981 populateButler(self.pipeline, butler)
983 fwk = CmdLineFwk()
984 qgraph = fwk.makeGraph(self.pipeline, args)
986 # Select first two nodes for execution. This depends on node ordering
987 # which I assume is the same as execution order.
988 nNodes = 2
989 nodeIds = [node.nodeId for node in qgraph]
990 nodeIds = nodeIds[:nNodes]
992 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
993 self.assertEqual(len(qgraph), self.nQuanta)
995 with (
996 makeTmpFile(suffix=".qgraph") as tmpname,
997 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
998 ):
999 with open(tmpname, "wb") as saveFile:
1000 qgraph.save(saveFile)
1002 args = _makeArgs(
1003 qgraph=tmpname,
1004 qgraph_node_id=nodeIds,
1005 registryConfig=registryConfig,
1006 execution_butler_location=None,
1007 )
1008 fwk = CmdLineFwk()
1010 # load graph, should only read a subset
1011 qgraph = fwk.makeGraph(pipeline=None, args=args)
1012 self.assertEqual(len(qgraph), nNodes)
1014 def testShowGraph(self):
1015 """Test for --show options for quantum graph."""
1016 nQuanta = 2
1017 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1019 show = ShowInfo(["graph"])
1020 show.show_graph_info(qgraph)
1021 self.assertEqual(show.handled, {"graph"})
1023 def testShowGraphWorkflow(self):
1024 nQuanta = 2
1025 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1027 show = ShowInfo(["workflow"])
1028 show.show_graph_info(qgraph)
1029 self.assertEqual(show.handled, {"workflow"})
1031 # TODO: cannot test "uri" option presently, it instantiates
1032 # butler from command line options and there is no way to pass butler
1033 # mock to that code.
1034 show = ShowInfo(["uri"])
1035 with self.assertRaises(ValueError): # No args given
1036 show.show_graph_info(qgraph)
1038 def testSimpleQGraphDatastoreRecords(self):
1039 """Test quantum graph generation with --qgraph-datastore-records."""
1040 args = _makeArgs(
1041 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1042 )
1043 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1044 populateButler(self.pipeline, butler)
1046 fwk = CmdLineFwk()
1047 qgraph = fwk.makeGraph(self.pipeline, args)
1048 self.assertEqual(len(qgraph), self.nQuanta)
1049 for i, qnode in enumerate(qgraph):
1050 quantum = qnode.quantum
1051 self.assertIsNotNone(quantum.datastore_records)
1052 # only the first quantum has a pre-existing input
1053 if i == 0:
1054 datastore_name = "FileDatastore@<butlerRoot>"
1055 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1056 records_data = quantum.datastore_records[datastore_name]
1057 records = dict(records_data.records)
1058 self.assertEqual(len(records), 1)
1059 _, records = records.popitem()
1060 records = records["file_datastore_records"]
1061 self.assertEqual(
1062 [record.path for record in records],
1063 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1064 )
1065 else:
1066 self.assertEqual(quantum.datastore_records, {})
1069class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1070 """General file leak detection."""
1073def setup_module(module):
1074 """Initialize pytest module."""
1075 lsst.utils.tests.init()
1078if __name__ == "__main__":
1079 lsst.utils.tests.init()
1080 unittest.main()