Coverage for tests/test_cmdLineFwk.py: 13%
538 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-01 09:30 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-01 09:30 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Simple unit test for cmdLineFwk module.
23"""
25import contextlib
26import logging
27import os
28import pickle
29import re
30import shutil
31import tempfile
32import unittest
33from dataclasses import dataclass
34from io import StringIO
35from types import SimpleNamespace
36from typing import NamedTuple
38import astropy.units as u
39import click
40import lsst.pex.config as pexConfig
41import lsst.pipe.base.connectionTypes as cT
42import lsst.utils.tests
43from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
44from lsst.ctrl.mpexec.cli.opt import run_options
45from lsst.ctrl.mpexec.cli.utils import (
46 _ACTION_ADD_INSTRUMENT,
47 _ACTION_ADD_TASK,
48 _ACTION_CONFIG,
49 _ACTION_CONFIG_FILE,
50 PipetaskCommand,
51)
52from lsst.ctrl.mpexec.showInfo import ShowInfo
53from lsst.daf.butler import (
54 CollectionType,
55 Config,
56 DataCoordinate,
57 DatasetRef,
58 DimensionConfig,
59 DimensionUniverse,
60 Quantum,
61 Registry,
62)
63from lsst.daf.butler.core.datasets.type import DatasetType
64from lsst.daf.butler.registry import RegistryConfig
65from lsst.pipe.base import (
66 Instrument,
67 Pipeline,
68 PipelineTaskConfig,
69 PipelineTaskConnections,
70 QuantumGraph,
71 TaskDef,
72)
73from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
74from lsst.pipe.base.script import transfer_from_graph
75from lsst.pipe.base.tests.simpleQGraph import (
76 AddTask,
77 AddTaskFactoryMock,
78 makeSimpleButler,
79 makeSimplePipeline,
80 makeSimpleQGraph,
81 populateButler,
82)
83from lsst.utils.tests import temporaryDirectory
85logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
87# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
88# instrument from registry, these tests can run fine without actual instrument
89# and implementing full mock for Instrument is too complicated.
90Instrument.fromName = lambda name, reg: None 90 ↛ exitline 90 didn't run the lambda on line 90
93@contextlib.contextmanager
94def makeTmpFile(contents=None, suffix=None):
95 """Context manager for generating temporary file name.
97 Temporary file is deleted on exiting context.
99 Parameters
100 ----------
101 contents : `bytes`
102 Data to write into a file.
103 """
104 fd, tmpname = tempfile.mkstemp(suffix=suffix)
105 if contents:
106 os.write(fd, contents)
107 os.close(fd)
108 yield tmpname
109 with contextlib.suppress(OSError):
110 os.remove(tmpname)
113@contextlib.contextmanager
114def makeSQLiteRegistry(create=True, universe=None):
115 """Context manager to create new empty registry database.
117 Yields
118 ------
119 config : `RegistryConfig`
120 Registry configuration for initialized registry database.
121 """
122 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
123 with temporaryDirectory() as tmpdir:
124 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
125 config = RegistryConfig()
126 config["db"] = uri
127 if create:
128 Registry.createFromConfig(config, dimensionConfig=dimensionConfig)
129 yield config
132class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
133 """Test connection class."""
135 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
138class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
139 """Test pipeline config."""
141 field = pexConfig.Field(dtype=str, doc="arbitrary string")
143 def setDefaults(self):
144 PipelineTaskConfig.setDefaults(self)
147def _makeArgs(registryConfig=None, **kwargs):
148 """Return parsed command line arguments.
150 By default butler_config is set to `Config` populated with some defaults,
151 it can be overridden completely by keyword argument.
153 Parameters
154 ----------
155 cmd : `str`, optional
156 Produce arguments for this pipetask command.
157 registryConfig : `RegistryConfig`, optional
158 Override for registry configuration.
159 **kwargs
160 Overrides for other arguments.
161 """
162 # Use a mock to get the default value of arguments to 'run'.
164 mock = unittest.mock.Mock()
166 @click.command(cls=PipetaskCommand)
167 @run_options()
168 def fake_run(ctx, **kwargs):
169 """Fake "pipetask run" command for gathering input arguments.
171 The arguments & options should always match the arguments & options in
172 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
173 """
174 mock(**kwargs)
176 runner = click.testing.CliRunner()
177 # --butler-config is the only required option
178 result = runner.invoke(fake_run, "--butler-config /")
179 if result.exit_code != 0:
180 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
181 mock.assert_called_once()
182 args = mock.call_args[1]
183 args["enableLsstDebug"] = args.pop("debug")
184 args["execution_butler_location"] = args.pop("save_execution_butler")
185 if "pipeline_actions" not in args:
186 args["pipeline_actions"] = []
187 args = SimpleNamespace(**args)
189 # override butler_config with our defaults
190 if "butler_config" not in kwargs:
191 args.butler_config = Config()
192 if registryConfig:
193 args.butler_config["registry"] = registryConfig
194 # The default datastore has a relocatable root, so we need to specify
195 # some root here for it to use
196 args.butler_config.configFile = "."
198 # override arguments from keyword parameters
199 for key, value in kwargs.items():
200 setattr(args, key, value)
201 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
202 return args
205class FakeDSType(NamedTuple):
206 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
208 name: str
211@dataclass(frozen=True)
212class FakeDSRef:
213 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
215 datasetType: str
216 dataId: tuple
218 def isComponent(self):
219 return False
222# Task class name used by tests, needs to be importable
223_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
226def _makeDimensionConfig():
227 """Make a simple dimension universe configuration."""
228 return DimensionConfig(
229 {
230 "version": 1,
231 "namespace": "ctrl_mpexec_test",
232 "skypix": {
233 "common": "htm7",
234 "htm": {
235 "class": "lsst.sphgeom.HtmPixelization",
236 "max_level": 24,
237 },
238 },
239 "elements": {
240 "A": {
241 "keys": [
242 {
243 "name": "id",
244 "type": "int",
245 }
246 ],
247 "storage": {
248 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
249 },
250 },
251 "B": {
252 "keys": [
253 {
254 "name": "id",
255 "type": "int",
256 }
257 ],
258 "storage": {
259 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
260 },
261 },
262 },
263 "packers": {},
264 }
265 )
268def _makeQGraph():
269 """Make a trivial QuantumGraph with one quantum.
271 The only thing that we need to do with this quantum graph is to pickle
272 it, the quanta in this graph are not usable for anything else.
274 Returns
275 -------
276 qgraph : `~lsst.pipe.base.QuantumGraph`
277 """
278 universe = DimensionUniverse(config=_makeDimensionConfig())
279 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe)
280 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
281 quanta = [
282 Quantum(
283 taskName=_TASK_CLASS,
284 inputs={
285 fakeDSType: [
286 DatasetRef(
287 fakeDSType,
288 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
289 run="fake_run",
290 )
291 ]
292 },
293 )
294 ] # type: ignore
295 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
296 return qgraph
299class CmdLineFwkTestCase(unittest.TestCase):
300 """A test case for CmdLineFwk"""
302 def testMakePipeline(self):
303 """Tests for CmdLineFwk.makePipeline method"""
304 fwk = CmdLineFwk()
306 # make empty pipeline
307 args = _makeArgs()
308 pipeline = fwk.makePipeline(args)
309 self.assertIsInstance(pipeline, Pipeline)
310 self.assertEqual(len(pipeline), 0)
312 # few tests with serialization
313 with makeTmpFile() as tmpname:
314 # make empty pipeline and store it in a file
315 args = _makeArgs(save_pipeline=tmpname)
316 pipeline = fwk.makePipeline(args)
317 self.assertIsInstance(pipeline, Pipeline)
319 # read pipeline from a file
320 args = _makeArgs(pipeline=tmpname)
321 pipeline = fwk.makePipeline(args)
322 self.assertIsInstance(pipeline, Pipeline)
323 self.assertEqual(len(pipeline), 0)
325 # single task pipeline, task name can be anything here
326 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
327 args = _makeArgs(pipeline_actions=actions)
328 pipeline = fwk.makePipeline(args)
329 self.assertIsInstance(pipeline, Pipeline)
330 self.assertEqual(len(pipeline), 1)
332 # many task pipeline
333 actions = [
334 _ACTION_ADD_TASK("TaskOne:task1a"),
335 _ACTION_ADD_TASK("TaskTwo:task2"),
336 _ACTION_ADD_TASK("TaskOne:task1b"),
337 ]
338 args = _makeArgs(pipeline_actions=actions)
339 pipeline = fwk.makePipeline(args)
340 self.assertIsInstance(pipeline, Pipeline)
341 self.assertEqual(len(pipeline), 3)
343 # single task pipeline with config overrides, need real task class
344 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
345 args = _makeArgs(pipeline_actions=actions)
346 pipeline = fwk.makePipeline(args)
347 taskDefs = list(pipeline.toExpandedPipeline())
348 self.assertEqual(len(taskDefs), 1)
349 self.assertEqual(taskDefs[0].config.addend, 100)
351 overrides = b"config.addend = 1000\n"
352 with makeTmpFile(overrides) as tmpname:
353 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
354 args = _makeArgs(pipeline_actions=actions)
355 pipeline = fwk.makePipeline(args)
356 taskDefs = list(pipeline.toExpandedPipeline())
357 self.assertEqual(len(taskDefs), 1)
358 self.assertEqual(taskDefs[0].config.addend, 1000)
360 # Check --instrument option, for now it only checks that it does not
361 # crash.
362 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
363 args = _makeArgs(pipeline_actions=actions)
364 pipeline = fwk.makePipeline(args)
366 def testMakeGraphFromSave(self):
367 """Tests for CmdLineFwk.makeGraph method.
369 Only most trivial case is tested that does not do actual graph
370 building.
371 """
372 fwk = CmdLineFwk()
374 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
375 # make non-empty graph and store it in a file
376 qgraph = _makeQGraph()
377 with open(tmpname, "wb") as saveFile:
378 qgraph.save(saveFile)
379 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
380 qgraph = fwk.makeGraph(None, args)
381 self.assertIsInstance(qgraph, QuantumGraph)
382 self.assertEqual(len(qgraph), 1)
384 # will fail if graph id does not match
385 args = _makeArgs(
386 qgraph=tmpname,
387 qgraph_id="R2-D2 is that you?",
388 registryConfig=registryConfig,
389 execution_butler_location=None,
390 )
391 with self.assertRaisesRegex(ValueError, "graphID does not match"):
392 fwk.makeGraph(None, args)
394 # save with wrong object type
395 with open(tmpname, "wb") as saveFile:
396 pickle.dump({}, saveFile)
397 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
398 with self.assertRaises(ValueError):
399 fwk.makeGraph(None, args)
401 # reading empty graph from pickle should work but makeGraph()
402 # will return None.
403 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig()))
404 with open(tmpname, "wb") as saveFile:
405 qgraph.save(saveFile)
406 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
407 qgraph = fwk.makeGraph(None, args)
408 self.assertIs(qgraph, None)
410 def testShowPipeline(self):
411 """Test for --show options for pipeline."""
412 fwk = CmdLineFwk()
414 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
415 args = _makeArgs(pipeline_actions=actions)
416 pipeline = fwk.makePipeline(args)
418 with self.assertRaises(ValueError):
419 ShowInfo(["unrecognized", "config"])
421 stream = StringIO()
422 show = ShowInfo(
423 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
424 stream=stream,
425 )
426 show.show_pipeline_info(pipeline)
427 self.assertEqual(show.unhandled, frozenset({}))
428 stream.seek(0)
429 output = stream.read()
430 self.assertIn("config.addend=100", output) # config option
431 self.assertIn("addend\n3", output) # History output
432 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
434 show = ShowInfo(["pipeline", "uri"], stream=stream)
435 show.show_pipeline_info(pipeline)
436 self.assertEqual(show.unhandled, frozenset({"uri"}))
437 self.assertEqual(show.handled, {"pipeline"})
439 stream = StringIO()
440 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
441 show.show_pipeline_info(pipeline)
442 stream.seek(0)
443 output = stream.read().strip()
444 self.assertEqual("### Configuration for task `task'", output)
446 stream = StringIO()
447 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
448 show.show_pipeline_info(pipeline)
449 stream.seek(0)
450 output = stream.read().strip()
451 self.assertEqual("### Configuration for task `task'", output)
453 stream = StringIO()
454 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
455 show.show_pipeline_info(pipeline)
456 stream.seek(0)
457 output = stream.read().strip()
458 self.assertIn("NOIGNORECASE", output)
460 show = ShowInfo(["dump-config=notask"])
461 with self.assertRaises(ValueError) as cm:
462 show.show_pipeline_info(pipeline)
463 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
465 show = ShowInfo(["history"])
466 with self.assertRaises(ValueError) as cm:
467 show.show_pipeline_info(pipeline)
468 self.assertIn("Please provide a value", str(cm.exception))
470 show = ShowInfo(["history=notask::param"])
471 with self.assertRaises(ValueError) as cm:
472 show.show_pipeline_info(pipeline)
473 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
475 def test_execution_resources_parameters(self) -> None:
476 """Test creation of the ExecutionResources from command line."""
477 fwk = CmdLineFwk()
479 for params, num_cores, max_mem in (
480 ((None, None), 1, None),
481 ((5, ""), 5, None),
482 ((None, "50"), 1, 50 * u.MB),
483 ((5, "50 GB"), 5, 50 * u.GB),
484 ):
485 kwargs = {}
486 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True):
487 if v is not None:
488 kwargs[k] = v
489 args = _makeArgs(**kwargs)
490 res = fwk._make_execution_resources(args)
491 self.assertEqual(res.num_cores, num_cores)
492 self.assertEqual(res.max_mem, max_mem)
494 args = _makeArgs(memory_per_quantum="50m")
495 with self.assertRaises(u.UnitConversionError):
496 fwk._make_execution_resources(args)
499class CmdLineFwkTestCaseWithButler(unittest.TestCase):
500 """A test case for CmdLineFwk"""
502 def setUp(self):
503 super().setUpClass()
504 self.root = tempfile.mkdtemp()
505 self.nQuanta = 5
506 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
508 def tearDown(self):
509 shutil.rmtree(self.root, ignore_errors=True)
510 super().tearDownClass()
512 def testSimpleQGraph(self):
513 """Test successfull execution of trivial quantum graph."""
514 args = _makeArgs(butler_config=self.root, input="test", output="output")
515 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
516 populateButler(self.pipeline, butler)
518 fwk = CmdLineFwk()
519 taskFactory = AddTaskFactoryMock()
521 qgraph = fwk.makeGraph(self.pipeline, args)
522 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
523 self.assertEqual(len(qgraph), self.nQuanta)
525 # Ensure that the output run used in the graph is also used in
526 # the pipeline execution. It is possible for makeGraph and runPipeline
527 # to calculate time-stamped runs across a second boundary.
528 args.output_run = qgraph.metadata["output_run"]
530 # run whole thing
531 fwk.runPipeline(qgraph, taskFactory, args)
532 self.assertEqual(taskFactory.countExec, self.nQuanta)
534 # test that we've disabled implicit threading
535 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
537 def testSimpleQGraph_rebase(self):
538 """Test successful execution of trivial quantum graph, with --rebase
539 used to force redefinition of the output collection.
540 """
541 # Pass one input collection here for the usual test setup; we'll
542 # override it later.
543 args = _makeArgs(butler_config=self.root, input="test1", output="output")
544 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
545 populateButler(self.pipeline, butler)
547 fwk = CmdLineFwk()
548 taskFactory = AddTaskFactoryMock()
550 # We'll actually pass two input collections in. One is empty, but
551 # the stuff we're testing here doesn't care.
552 args.input = ["test2", "test1"]
553 butler.registry.registerCollection("test2", CollectionType.RUN)
555 # Set up the output collection with a sequence that doesn't end the
556 # same way as the input collection. This is normally an error.
557 butler.registry.registerCollection("output", CollectionType.CHAINED)
558 butler.registry.registerCollection("unexpected_input", CollectionType.RUN)
559 butler.registry.registerCollection("output/run0", CollectionType.RUN)
560 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"])
562 # Without --rebase, the inconsistent input and output collections are
563 # an error.
564 with self.assertRaises(ValueError):
565 fwk.makeGraph(self.pipeline, args)
567 # With --rebase, the output collection gets redefined.
568 args.rebase = True
569 qgraph = fwk.makeGraph(self.pipeline, args)
571 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
572 self.assertEqual(len(qgraph), self.nQuanta)
574 # Ensure that the output run used in the graph is also used in
575 # the pipeline execution. It is possible for makeGraph and runPipeline
576 # to calculate time-stamped runs across a second boundary.
577 args.output_run = qgraph.metadata["output_run"]
579 fwk.runPipeline(qgraph, taskFactory, args)
580 self.assertEqual(taskFactory.countExec, self.nQuanta)
582 butler.registry.refresh()
583 self.assertEqual(
584 list(butler.registry.getCollectionChain("output")),
585 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"],
586 )
588 def test_simple_qgraph_qbb(self):
589 """Test successful execution of trivial quantum graph in QBB mode."""
590 args = _makeArgs(
591 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
592 )
593 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
594 populateButler(self.pipeline, butler)
596 fwk = CmdLineFwk()
597 taskFactory = AddTaskFactoryMock()
599 qgraph = fwk.makeGraph(self.pipeline, args)
600 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
601 self.assertEqual(len(qgraph), self.nQuanta)
603 # Ensure that the output run used in the graph is also used in
604 # the pipeline execution. It is possible for makeGraph and runPipeline
605 # to calculate time-stamped runs across a second boundary.
606 output_run = qgraph.metadata["output_run"]
607 args.output_run = output_run
609 # QBB must run from serialized graph.
610 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
611 qgraph.saveUri(temp_graph.name)
613 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
615 # Check that pre-exec-init can run.
616 fwk.preExecInitQBB(taskFactory, args)
618 # Run whole thing.
619 fwk.runGraphQBB(taskFactory, args)
621 # Transfer the datasets to the butler.
622 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
623 self.assertEqual(n1, 31)
625 self.assertEqual(taskFactory.countExec, self.nQuanta)
627 # Update the output run and try again.
628 new_output_run = output_run + "_new"
629 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True)
630 self.assertEqual(qgraph.metadata["output_run"], new_output_run)
632 taskFactory = AddTaskFactoryMock()
633 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
634 qgraph.saveUri(temp_graph.name)
636 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
638 # Check that pre-exec-init can run.
639 fwk.preExecInitQBB(taskFactory, args)
641 # Run whole thing.
642 fwk.runGraphQBB(taskFactory, args)
644 # Transfer the datasets to the butler.
645 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
646 self.assertEqual(n1, n2)
648 def testEmptyQGraph(self):
649 """Test that making an empty QG produces the right error messages."""
650 # We make QG generation fail by populating one input collection in the
651 # butler while using a different one (that we only register, not
652 # populate) to make the QG.
653 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
654 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
655 butler.registry.registerCollection("bad_input")
656 populateButler(self.pipeline, butler)
658 fwk = CmdLineFwk()
659 with self.assertLogs(level=logging.CRITICAL) as cm:
660 qgraph = fwk.makeGraph(self.pipeline, args)
661 self.assertRegex(
662 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
663 )
664 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
665 self.assertIsNone(qgraph)
667 def testSimpleQGraphNoSkipExisting_inputs(self):
668 """Test for case when output data for one task already appears in
669 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
670 option is present.
671 """
672 args = _makeArgs(
673 butler_config=self.root,
674 input="test",
675 output="output",
676 )
677 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
678 populateButler(
679 self.pipeline,
680 butler,
681 datasetTypes={
682 args.input: [
683 "add_dataset0",
684 "add_dataset1",
685 "add2_dataset1",
686 "add_init_output1",
687 "task0_config",
688 "task0_metadata",
689 "task0_log",
690 ]
691 },
692 )
694 fwk = CmdLineFwk()
695 taskFactory = AddTaskFactoryMock()
697 qgraph = fwk.makeGraph(self.pipeline, args)
698 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
699 # With current implementation graph has all nQuanta quanta, but when
700 # executing one quantum is skipped.
701 self.assertEqual(len(qgraph), self.nQuanta)
703 # Ensure that the output run used in the graph is also used in
704 # the pipeline execution. It is possible for makeGraph and runPipeline
705 # to calculate time-stamped runs across a second boundary.
706 args.output_run = qgraph.metadata["output_run"]
708 # run whole thing
709 fwk.runPipeline(qgraph, taskFactory, args)
710 self.assertEqual(taskFactory.countExec, self.nQuanta)
712 def testSimpleQGraphSkipExisting_inputs(self):
713 """Test for ``--skip-existing`` with output data for one task already
714 appears in _input_ collection. No ``--extend-run`` option is needed
715 for this case.
716 """
717 args = _makeArgs(
718 butler_config=self.root,
719 input="test",
720 output="output",
721 skip_existing_in=("test",),
722 )
723 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
724 populateButler(
725 self.pipeline,
726 butler,
727 datasetTypes={
728 args.input: [
729 "add_dataset0",
730 "add_dataset1",
731 "add2_dataset1",
732 "add_init_output1",
733 "task0_config",
734 "task0_metadata",
735 "task0_log",
736 ]
737 },
738 )
740 fwk = CmdLineFwk()
741 taskFactory = AddTaskFactoryMock()
743 qgraph = fwk.makeGraph(self.pipeline, args)
744 # If all quanta are skipped, the task is not included in the graph.
745 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
746 self.assertEqual(len(qgraph), self.nQuanta - 1)
748 # Ensure that the output run used in the graph is also used in
749 # the pipeline execution. It is possible for makeGraph and runPipeline
750 # to calculate time-stamped runs across a second boundary.
751 args.output_run = qgraph.metadata["output_run"]
753 # run whole thing
754 fwk.runPipeline(qgraph, taskFactory, args)
755 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
757 def testSimpleQGraphSkipExisting_outputs(self):
758 """Test for ``--skip-existing`` with output data for one task already
759 appears in _output_ collection. The ``--extend-run`` option is needed
760 for this case.
761 """
762 args = _makeArgs(
763 butler_config=self.root,
764 input="test",
765 output_run="output/run",
766 skip_existing_in=("output/run",),
767 )
768 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
769 populateButler(
770 self.pipeline,
771 butler,
772 datasetTypes={
773 args.input: ["add_dataset0"],
774 args.output_run: [
775 "add_dataset1",
776 "add2_dataset1",
777 "add_init_output1",
778 "task0_metadata",
779 "task0_log",
780 "task0_config",
781 ],
782 },
783 )
785 fwk = CmdLineFwk()
786 taskFactory = AddTaskFactoryMock()
788 # fails without --extend-run
789 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
790 qgraph = fwk.makeGraph(self.pipeline, args)
792 # retry with --extend-run
793 args.extend_run = True
794 qgraph = fwk.makeGraph(self.pipeline, args)
796 # First task has no remaining quanta, so is left out completely.
797 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
798 # Graph does not include quantum for first task.
799 self.assertEqual(len(qgraph), self.nQuanta - 1)
801 # run whole thing
802 fwk.runPipeline(qgraph, taskFactory, args)
803 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
805 def testSimpleQGraphOutputsFail(self):
806 """Test continuing execution of trivial quantum graph with partial
807 outputs.
808 """
809 args = _makeArgs(butler_config=self.root, input="test", output="output")
810 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
811 populateButler(self.pipeline, butler)
813 fwk = CmdLineFwk()
814 taskFactory = AddTaskFactoryMock(stopAt=3)
816 qgraph = fwk.makeGraph(self.pipeline, args)
817 self.assertEqual(len(qgraph), self.nQuanta)
819 # Ensure that the output run used in the graph is also used in
820 # the pipeline execution. It is possible for makeGraph and runPipeline
821 # to calculate time-stamped runs across a second boundary.
822 args.output_run = qgraph.metadata["output_run"]
824 # run first three quanta
825 with self.assertRaises(MPGraphExecutorError):
826 fwk.runPipeline(qgraph, taskFactory, args)
827 self.assertEqual(taskFactory.countExec, 3)
829 butler.registry.refresh()
831 # drop one of the two outputs from one task
832 ref1 = butler.registry.findDataset(
833 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0
834 )
835 self.assertIsNotNone(ref1)
836 # also drop the metadata output
837 ref2 = butler.registry.findDataset(
838 "task1_metadata", collections=args.output, instrument="INSTR", detector=0
839 )
840 self.assertIsNotNone(ref2)
841 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
843 # Ensure that the output run used in the graph is also used in
844 # the pipeline execution. It is possible for makeGraph and runPipeline
845 # to calculate time-stamped runs across a second boundary.
846 args.output_run = qgraph.metadata["output_run"]
848 taskFactory.stopAt = -1
849 args.skip_existing_in = (args.output,)
850 args.extend_run = True
851 args.no_versions = True
852 with self.assertRaises(MPGraphExecutorError):
853 fwk.runPipeline(qgraph, taskFactory, args)
855 def testSimpleQGraphClobberOutputs(self):
856 """Test continuing execution of trivial quantum graph with
857 --clobber-outputs.
858 """
859 args = _makeArgs(butler_config=self.root, input="test", output="output")
860 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
861 populateButler(self.pipeline, butler)
863 fwk = CmdLineFwk()
864 taskFactory = AddTaskFactoryMock(stopAt=3)
866 qgraph = fwk.makeGraph(self.pipeline, args)
868 # should have one task and number of quanta
869 self.assertEqual(len(qgraph), self.nQuanta)
871 # Ensure that the output run used in the graph is also used in
872 # the pipeline execution. It is possible for makeGraph and runPipeline
873 # to calculate time-stamped runs across a second boundary.
874 args.output_run = qgraph.metadata["output_run"]
876 # run first three quanta
877 with self.assertRaises(MPGraphExecutorError):
878 fwk.runPipeline(qgraph, taskFactory, args)
879 self.assertEqual(taskFactory.countExec, 3)
881 butler.registry.refresh()
883 # drop one of the two outputs from one task
884 ref1 = butler.registry.findDataset(
885 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
886 )
887 self.assertIsNotNone(ref1)
888 # also drop the metadata output
889 ref2 = butler.registry.findDataset(
890 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
891 )
892 self.assertIsNotNone(ref2)
893 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
895 taskFactory.stopAt = -1
896 args.skip_existing = True
897 args.extend_run = True
898 args.clobber_outputs = True
899 args.no_versions = True
900 fwk.runPipeline(qgraph, taskFactory, args)
901 # number of executed quanta is incremented
902 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
904 def testSimpleQGraphReplaceRun(self):
905 """Test repeated execution of trivial quantum graph with
906 --replace-run.
907 """
908 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
909 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
910 populateButler(self.pipeline, butler)
912 fwk = CmdLineFwk()
913 taskFactory = AddTaskFactoryMock()
915 qgraph = fwk.makeGraph(self.pipeline, args)
917 # should have one task and number of quanta
918 self.assertEqual(len(qgraph), self.nQuanta)
920 # deep copy is needed because quanta are updated in place
921 fwk.runPipeline(qgraph, taskFactory, args)
922 self.assertEqual(taskFactory.countExec, self.nQuanta)
924 # need to refresh collections explicitly (or make new butler/registry)
925 butler.registry.refresh()
926 collections = set(butler.registry.queryCollections(...))
927 self.assertEqual(collections, {"test", "output", "output/run1"})
929 # number of datasets written by pipeline:
930 # - nQuanta of init_outputs
931 # - nQuanta of configs
932 # - packages (single dataset)
933 # - nQuanta * two output datasets
934 # - nQuanta of metadata
935 # - nQuanta of log output
936 n_outputs = self.nQuanta * 6 + 1
937 refs = butler.registry.queryDatasets(..., collections="output/run1")
938 self.assertEqual(len(list(refs)), n_outputs)
940 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
941 # changed)
942 args.replace_run = True
943 args.output_run = "output/run2"
944 qgraph = fwk.makeGraph(self.pipeline, args)
945 fwk.runPipeline(qgraph, taskFactory, args)
947 butler.registry.refresh()
948 collections = set(butler.registry.queryCollections(...))
949 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
951 # new output collection
952 refs = butler.registry.queryDatasets(..., collections="output/run2")
953 self.assertEqual(len(list(refs)), n_outputs)
955 # old output collection is still there
956 refs = butler.registry.queryDatasets(..., collections="output/run1")
957 self.assertEqual(len(list(refs)), n_outputs)
959 # re-run with --replace-run and --prune-replaced=unstore
960 args.replace_run = True
961 args.prune_replaced = "unstore"
962 args.output_run = "output/run3"
963 qgraph = fwk.makeGraph(self.pipeline, args)
964 fwk.runPipeline(qgraph, taskFactory, args)
966 butler.registry.refresh()
967 collections = set(butler.registry.queryCollections(...))
968 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
970 # new output collection
971 refs = butler.registry.queryDatasets(..., collections="output/run3")
972 self.assertEqual(len(list(refs)), n_outputs)
974 # old output collection is still there, and it has all datasets but
975 # non-InitOutputs are not in datastore
976 refs = butler.registry.queryDatasets(..., collections="output/run2")
977 refs = list(refs)
978 self.assertEqual(len(refs), n_outputs)
979 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
980 for ref in refs:
981 if initOutNameRe.fullmatch(ref.datasetType.name):
982 butler.get(ref)
983 else:
984 with self.assertRaises(FileNotFoundError):
985 butler.get(ref)
987 # re-run with --replace-run and --prune-replaced=purge
988 # This time also remove --input; passing the same inputs that we
989 # started with and not passing inputs at all should be equivalent.
990 args.input = None
991 args.replace_run = True
992 args.prune_replaced = "purge"
993 args.output_run = "output/run4"
994 qgraph = fwk.makeGraph(self.pipeline, args)
995 fwk.runPipeline(qgraph, taskFactory, args)
997 butler.registry.refresh()
998 collections = set(butler.registry.queryCollections(...))
999 # output/run3 should disappear now
1000 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1002 # new output collection
1003 refs = butler.registry.queryDatasets(..., collections="output/run4")
1004 self.assertEqual(len(list(refs)), n_outputs)
1006 # Trying to run again with inputs that aren't exactly what we started
1007 # with is an error, and the kind that should not modify the data repo.
1008 with self.assertRaises(ValueError):
1009 args.input = ["test", "output/run2"]
1010 args.prune_replaced = None
1011 args.replace_run = True
1012 args.output_run = "output/run5"
1013 qgraph = fwk.makeGraph(self.pipeline, args)
1014 fwk.runPipeline(qgraph, taskFactory, args)
1015 butler.registry.refresh()
1016 collections = set(butler.registry.queryCollections(...))
1017 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1018 with self.assertRaises(ValueError):
1019 args.input = ["output/run2", "test"]
1020 args.prune_replaced = None
1021 args.replace_run = True
1022 args.output_run = "output/run6"
1023 qgraph = fwk.makeGraph(self.pipeline, args)
1024 fwk.runPipeline(qgraph, taskFactory, args)
1025 butler.registry.refresh()
1026 collections = set(butler.registry.queryCollections(...))
1027 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1029 def testSubgraph(self):
1030 """Test successful execution of trivial quantum graph."""
1031 args = _makeArgs(butler_config=self.root, input="test", output="output")
1032 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1033 populateButler(self.pipeline, butler)
1035 fwk = CmdLineFwk()
1036 qgraph = fwk.makeGraph(self.pipeline, args)
1038 # Select first two nodes for execution. This depends on node ordering
1039 # which I assume is the same as execution order.
1040 nNodes = 2
1041 nodeIds = [node.nodeId for node in qgraph]
1042 nodeIds = nodeIds[:nNodes]
1044 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
1045 self.assertEqual(len(qgraph), self.nQuanta)
1047 with (
1048 makeTmpFile(suffix=".qgraph") as tmpname,
1049 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
1050 ):
1051 with open(tmpname, "wb") as saveFile:
1052 qgraph.save(saveFile)
1054 args = _makeArgs(
1055 qgraph=tmpname,
1056 qgraph_node_id=nodeIds,
1057 registryConfig=registryConfig,
1058 execution_butler_location=None,
1059 )
1060 fwk = CmdLineFwk()
1062 # load graph, should only read a subset
1063 qgraph = fwk.makeGraph(pipeline=None, args=args)
1064 self.assertEqual(len(qgraph), nNodes)
1066 def testShowGraph(self):
1067 """Test for --show options for quantum graph."""
1068 nQuanta = 2
1069 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1071 show = ShowInfo(["graph"])
1072 show.show_graph_info(qgraph)
1073 self.assertEqual(show.handled, {"graph"})
1075 def testShowGraphWorkflow(self):
1076 nQuanta = 2
1077 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1079 show = ShowInfo(["workflow"])
1080 show.show_graph_info(qgraph)
1081 self.assertEqual(show.handled, {"workflow"})
1083 # TODO: cannot test "uri" option presently, it instantiates
1084 # butler from command line options and there is no way to pass butler
1085 # mock to that code.
1086 show = ShowInfo(["uri"])
1087 with self.assertRaises(ValueError): # No args given
1088 show.show_graph_info(qgraph)
1090 def testSimpleQGraphDatastoreRecords(self):
1091 """Test quantum graph generation with --qgraph-datastore-records."""
1092 args = _makeArgs(
1093 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1094 )
1095 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1096 populateButler(self.pipeline, butler)
1098 fwk = CmdLineFwk()
1099 qgraph = fwk.makeGraph(self.pipeline, args)
1100 self.assertEqual(len(qgraph), self.nQuanta)
1101 for i, qnode in enumerate(qgraph):
1102 quantum = qnode.quantum
1103 self.assertIsNotNone(quantum.datastore_records)
1104 # only the first quantum has a pre-existing input
1105 if i == 0:
1106 datastore_name = "FileDatastore@<butlerRoot>"
1107 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1108 records_data = quantum.datastore_records[datastore_name]
1109 records = dict(records_data.records)
1110 self.assertEqual(len(records), 1)
1111 _, records = records.popitem()
1112 records = records["file_datastore_records"]
1113 self.assertEqual(
1114 [record.path for record in records],
1115 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1116 )
1117 else:
1118 self.assertEqual(quantum.datastore_records, {})
1121class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1122 """General file leak detection."""
1125def setup_module(module):
1126 """Initialize pytest module."""
1127 lsst.utils.tests.init()
1130if __name__ == "__main__":
1131 lsst.utils.tests.init()
1132 unittest.main()