Coverage for tests/test_cmdLineFwk.py: 13%
538 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:53 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:53 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Simple unit test for cmdLineFwk module.
29"""
31import contextlib
32import logging
33import os
34import pickle
35import re
36import shutil
37import tempfile
38import unittest
39from dataclasses import dataclass
40from io import StringIO
41from types import SimpleNamespace
42from typing import NamedTuple
44import astropy.units as u
45import click
46import lsst.pex.config as pexConfig
47import lsst.pipe.base.connectionTypes as cT
48import lsst.utils.tests
49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
50from lsst.ctrl.mpexec.cli.opt import run_options
51from lsst.ctrl.mpexec.cli.utils import (
52 _ACTION_ADD_INSTRUMENT,
53 _ACTION_ADD_TASK,
54 _ACTION_CONFIG,
55 _ACTION_CONFIG_FILE,
56 PipetaskCommand,
57)
58from lsst.ctrl.mpexec.showInfo import ShowInfo
59from lsst.daf.butler import (
60 CollectionType,
61 Config,
62 DataCoordinate,
63 DatasetRef,
64 DimensionConfig,
65 DimensionUniverse,
66 Quantum,
67 Registry,
68)
69from lsst.daf.butler.core.datasets.type import DatasetType
70from lsst.daf.butler.registry import RegistryConfig
71from lsst.pipe.base import (
72 Instrument,
73 Pipeline,
74 PipelineTaskConfig,
75 PipelineTaskConnections,
76 QuantumGraph,
77 TaskDef,
78)
79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
80from lsst.pipe.base.script import transfer_from_graph
81from lsst.pipe.base.tests.simpleQGraph import (
82 AddTask,
83 AddTaskFactoryMock,
84 makeSimpleButler,
85 makeSimplePipeline,
86 makeSimpleQGraph,
87 populateButler,
88)
89from lsst.utils.tests import temporaryDirectory
91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
94# instrument from registry, these tests can run fine without actual instrument
95# and implementing full mock for Instrument is too complicated.
96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96
99@contextlib.contextmanager
100def makeTmpFile(contents=None, suffix=None):
101 """Context manager for generating temporary file name.
103 Temporary file is deleted on exiting context.
105 Parameters
106 ----------
107 contents : `bytes`
108 Data to write into a file.
109 """
110 fd, tmpname = tempfile.mkstemp(suffix=suffix)
111 if contents:
112 os.write(fd, contents)
113 os.close(fd)
114 yield tmpname
115 with contextlib.suppress(OSError):
116 os.remove(tmpname)
119@contextlib.contextmanager
120def makeSQLiteRegistry(create=True, universe=None):
121 """Context manager to create new empty registry database.
123 Yields
124 ------
125 config : `RegistryConfig`
126 Registry configuration for initialized registry database.
127 """
128 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
129 with temporaryDirectory() as tmpdir:
130 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
131 config = RegistryConfig()
132 config["db"] = uri
133 if create:
134 Registry.createFromConfig(config, dimensionConfig=dimensionConfig)
135 yield config
138class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
139 """Test connection class."""
141 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
144class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
145 """Test pipeline config."""
147 field = pexConfig.Field(dtype=str, doc="arbitrary string")
149 def setDefaults(self):
150 PipelineTaskConfig.setDefaults(self)
153def _makeArgs(registryConfig=None, **kwargs):
154 """Return parsed command line arguments.
156 By default butler_config is set to `Config` populated with some defaults,
157 it can be overridden completely by keyword argument.
159 Parameters
160 ----------
161 cmd : `str`, optional
162 Produce arguments for this pipetask command.
163 registryConfig : `RegistryConfig`, optional
164 Override for registry configuration.
165 **kwargs
166 Overrides for other arguments.
167 """
168 # Use a mock to get the default value of arguments to 'run'.
170 mock = unittest.mock.Mock()
172 @click.command(cls=PipetaskCommand)
173 @run_options()
174 def fake_run(ctx, **kwargs):
175 """Fake "pipetask run" command for gathering input arguments.
177 The arguments & options should always match the arguments & options in
178 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
179 """
180 mock(**kwargs)
182 runner = click.testing.CliRunner()
183 # --butler-config is the only required option
184 result = runner.invoke(fake_run, "--butler-config /")
185 if result.exit_code != 0:
186 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
187 mock.assert_called_once()
188 args = mock.call_args[1]
189 args["enableLsstDebug"] = args.pop("debug")
190 args["execution_butler_location"] = args.pop("save_execution_butler")
191 if "pipeline_actions" not in args:
192 args["pipeline_actions"] = []
193 args = SimpleNamespace(**args)
195 # override butler_config with our defaults
196 if "butler_config" not in kwargs:
197 args.butler_config = Config()
198 if registryConfig:
199 args.butler_config["registry"] = registryConfig
200 # The default datastore has a relocatable root, so we need to specify
201 # some root here for it to use
202 args.butler_config.configFile = "."
204 # override arguments from keyword parameters
205 for key, value in kwargs.items():
206 setattr(args, key, value)
207 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
208 return args
211class FakeDSType(NamedTuple):
212 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
214 name: str
217@dataclass(frozen=True)
218class FakeDSRef:
219 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
221 datasetType: str
222 dataId: tuple
224 def isComponent(self):
225 return False
228# Task class name used by tests, needs to be importable
229_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
232def _makeDimensionConfig():
233 """Make a simple dimension universe configuration."""
234 return DimensionConfig(
235 {
236 "version": 1,
237 "namespace": "ctrl_mpexec_test",
238 "skypix": {
239 "common": "htm7",
240 "htm": {
241 "class": "lsst.sphgeom.HtmPixelization",
242 "max_level": 24,
243 },
244 },
245 "elements": {
246 "A": {
247 "keys": [
248 {
249 "name": "id",
250 "type": "int",
251 }
252 ],
253 "storage": {
254 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
255 },
256 },
257 "B": {
258 "keys": [
259 {
260 "name": "id",
261 "type": "int",
262 }
263 ],
264 "storage": {
265 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
266 },
267 },
268 },
269 "packers": {},
270 }
271 )
274def _makeQGraph():
275 """Make a trivial QuantumGraph with one quantum.
277 The only thing that we need to do with this quantum graph is to pickle
278 it, the quanta in this graph are not usable for anything else.
280 Returns
281 -------
282 qgraph : `~lsst.pipe.base.QuantumGraph`
283 """
284 universe = DimensionUniverse(config=_makeDimensionConfig())
285 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe)
286 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
287 quanta = [
288 Quantum(
289 taskName=_TASK_CLASS,
290 inputs={
291 fakeDSType: [
292 DatasetRef(
293 fakeDSType,
294 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
295 run="fake_run",
296 )
297 ]
298 },
299 )
300 ] # type: ignore
301 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
302 return qgraph
305class CmdLineFwkTestCase(unittest.TestCase):
306 """A test case for CmdLineFwk"""
308 def testMakePipeline(self):
309 """Tests for CmdLineFwk.makePipeline method"""
310 fwk = CmdLineFwk()
312 # make empty pipeline
313 args = _makeArgs()
314 pipeline = fwk.makePipeline(args)
315 self.assertIsInstance(pipeline, Pipeline)
316 self.assertEqual(len(pipeline), 0)
318 # few tests with serialization
319 with makeTmpFile() as tmpname:
320 # make empty pipeline and store it in a file
321 args = _makeArgs(save_pipeline=tmpname)
322 pipeline = fwk.makePipeline(args)
323 self.assertIsInstance(pipeline, Pipeline)
325 # read pipeline from a file
326 args = _makeArgs(pipeline=tmpname)
327 pipeline = fwk.makePipeline(args)
328 self.assertIsInstance(pipeline, Pipeline)
329 self.assertEqual(len(pipeline), 0)
331 # single task pipeline, task name can be anything here
332 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
333 args = _makeArgs(pipeline_actions=actions)
334 pipeline = fwk.makePipeline(args)
335 self.assertIsInstance(pipeline, Pipeline)
336 self.assertEqual(len(pipeline), 1)
338 # many task pipeline
339 actions = [
340 _ACTION_ADD_TASK("TaskOne:task1a"),
341 _ACTION_ADD_TASK("TaskTwo:task2"),
342 _ACTION_ADD_TASK("TaskOne:task1b"),
343 ]
344 args = _makeArgs(pipeline_actions=actions)
345 pipeline = fwk.makePipeline(args)
346 self.assertIsInstance(pipeline, Pipeline)
347 self.assertEqual(len(pipeline), 3)
349 # single task pipeline with config overrides, need real task class
350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
351 args = _makeArgs(pipeline_actions=actions)
352 pipeline = fwk.makePipeline(args)
353 taskDefs = list(pipeline.toExpandedPipeline())
354 self.assertEqual(len(taskDefs), 1)
355 self.assertEqual(taskDefs[0].config.addend, 100)
357 overrides = b"config.addend = 1000\n"
358 with makeTmpFile(overrides) as tmpname:
359 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
360 args = _makeArgs(pipeline_actions=actions)
361 pipeline = fwk.makePipeline(args)
362 taskDefs = list(pipeline.toExpandedPipeline())
363 self.assertEqual(len(taskDefs), 1)
364 self.assertEqual(taskDefs[0].config.addend, 1000)
366 # Check --instrument option, for now it only checks that it does not
367 # crash.
368 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
369 args = _makeArgs(pipeline_actions=actions)
370 pipeline = fwk.makePipeline(args)
372 def testMakeGraphFromSave(self):
373 """Tests for CmdLineFwk.makeGraph method.
375 Only most trivial case is tested that does not do actual graph
376 building.
377 """
378 fwk = CmdLineFwk()
380 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
381 # make non-empty graph and store it in a file
382 qgraph = _makeQGraph()
383 with open(tmpname, "wb") as saveFile:
384 qgraph.save(saveFile)
385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
386 qgraph = fwk.makeGraph(None, args)
387 self.assertIsInstance(qgraph, QuantumGraph)
388 self.assertEqual(len(qgraph), 1)
390 # will fail if graph id does not match
391 args = _makeArgs(
392 qgraph=tmpname,
393 qgraph_id="R2-D2 is that you?",
394 registryConfig=registryConfig,
395 execution_butler_location=None,
396 )
397 with self.assertRaisesRegex(ValueError, "graphID does not match"):
398 fwk.makeGraph(None, args)
400 # save with wrong object type
401 with open(tmpname, "wb") as saveFile:
402 pickle.dump({}, saveFile)
403 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
404 with self.assertRaises(ValueError):
405 fwk.makeGraph(None, args)
407 # reading empty graph from pickle should work but makeGraph()
408 # will return None.
409 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig()))
410 with open(tmpname, "wb") as saveFile:
411 qgraph.save(saveFile)
412 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
413 qgraph = fwk.makeGraph(None, args)
414 self.assertIs(qgraph, None)
416 def testShowPipeline(self):
417 """Test for --show options for pipeline."""
418 fwk = CmdLineFwk()
420 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
421 args = _makeArgs(pipeline_actions=actions)
422 pipeline = fwk.makePipeline(args)
424 with self.assertRaises(ValueError):
425 ShowInfo(["unrecognized", "config"])
427 stream = StringIO()
428 show = ShowInfo(
429 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
430 stream=stream,
431 )
432 show.show_pipeline_info(pipeline)
433 self.assertEqual(show.unhandled, frozenset({}))
434 stream.seek(0)
435 output = stream.read()
436 self.assertIn("config.addend=100", output) # config option
437 self.assertIn("addend\n3", output) # History output
438 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
440 show = ShowInfo(["pipeline", "uri"], stream=stream)
441 show.show_pipeline_info(pipeline)
442 self.assertEqual(show.unhandled, frozenset({"uri"}))
443 self.assertEqual(show.handled, {"pipeline"})
445 stream = StringIO()
446 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
447 show.show_pipeline_info(pipeline)
448 stream.seek(0)
449 output = stream.read().strip()
450 self.assertEqual("### Configuration for task `task'", output)
452 stream = StringIO()
453 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
454 show.show_pipeline_info(pipeline)
455 stream.seek(0)
456 output = stream.read().strip()
457 self.assertEqual("### Configuration for task `task'", output)
459 stream = StringIO()
460 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
461 show.show_pipeline_info(pipeline)
462 stream.seek(0)
463 output = stream.read().strip()
464 self.assertIn("NOIGNORECASE", output)
466 show = ShowInfo(["dump-config=notask"])
467 with self.assertRaises(ValueError) as cm:
468 show.show_pipeline_info(pipeline)
469 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
471 show = ShowInfo(["history"])
472 with self.assertRaises(ValueError) as cm:
473 show.show_pipeline_info(pipeline)
474 self.assertIn("Please provide a value", str(cm.exception))
476 show = ShowInfo(["history=notask::param"])
477 with self.assertRaises(ValueError) as cm:
478 show.show_pipeline_info(pipeline)
479 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
481 def test_execution_resources_parameters(self) -> None:
482 """Test creation of the ExecutionResources from command line."""
483 fwk = CmdLineFwk()
485 for params, num_cores, max_mem in (
486 ((None, None), 1, None),
487 ((5, ""), 5, None),
488 ((None, "50"), 1, 50 * u.MB),
489 ((5, "50 GB"), 5, 50 * u.GB),
490 ):
491 kwargs = {}
492 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True):
493 if v is not None:
494 kwargs[k] = v
495 args = _makeArgs(**kwargs)
496 res = fwk._make_execution_resources(args)
497 self.assertEqual(res.num_cores, num_cores)
498 self.assertEqual(res.max_mem, max_mem)
500 args = _makeArgs(memory_per_quantum="50m")
501 with self.assertRaises(u.UnitConversionError):
502 fwk._make_execution_resources(args)
505class CmdLineFwkTestCaseWithButler(unittest.TestCase):
506 """A test case for CmdLineFwk"""
508 def setUp(self):
509 super().setUpClass()
510 self.root = tempfile.mkdtemp()
511 self.nQuanta = 5
512 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
514 def tearDown(self):
515 shutil.rmtree(self.root, ignore_errors=True)
516 super().tearDownClass()
518 def testSimpleQGraph(self):
519 """Test successfull execution of trivial quantum graph."""
520 args = _makeArgs(butler_config=self.root, input="test", output="output")
521 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
522 populateButler(self.pipeline, butler)
524 fwk = CmdLineFwk()
525 taskFactory = AddTaskFactoryMock()
527 qgraph = fwk.makeGraph(self.pipeline, args)
528 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
529 self.assertEqual(len(qgraph), self.nQuanta)
531 # Ensure that the output run used in the graph is also used in
532 # the pipeline execution. It is possible for makeGraph and runPipeline
533 # to calculate time-stamped runs across a second boundary.
534 args.output_run = qgraph.metadata["output_run"]
536 # run whole thing
537 fwk.runPipeline(qgraph, taskFactory, args)
538 self.assertEqual(taskFactory.countExec, self.nQuanta)
540 # test that we've disabled implicit threading
541 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
543 def testSimpleQGraph_rebase(self):
544 """Test successful execution of trivial quantum graph, with --rebase
545 used to force redefinition of the output collection.
546 """
547 # Pass one input collection here for the usual test setup; we'll
548 # override it later.
549 args = _makeArgs(butler_config=self.root, input="test1", output="output")
550 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
551 populateButler(self.pipeline, butler)
553 fwk = CmdLineFwk()
554 taskFactory = AddTaskFactoryMock()
556 # We'll actually pass two input collections in. One is empty, but
557 # the stuff we're testing here doesn't care.
558 args.input = ["test2", "test1"]
559 butler.registry.registerCollection("test2", CollectionType.RUN)
561 # Set up the output collection with a sequence that doesn't end the
562 # same way as the input collection. This is normally an error.
563 butler.registry.registerCollection("output", CollectionType.CHAINED)
564 butler.registry.registerCollection("unexpected_input", CollectionType.RUN)
565 butler.registry.registerCollection("output/run0", CollectionType.RUN)
566 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"])
568 # Without --rebase, the inconsistent input and output collections are
569 # an error.
570 with self.assertRaises(ValueError):
571 fwk.makeGraph(self.pipeline, args)
573 # With --rebase, the output collection gets redefined.
574 args.rebase = True
575 qgraph = fwk.makeGraph(self.pipeline, args)
577 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
578 self.assertEqual(len(qgraph), self.nQuanta)
580 # Ensure that the output run used in the graph is also used in
581 # the pipeline execution. It is possible for makeGraph and runPipeline
582 # to calculate time-stamped runs across a second boundary.
583 args.output_run = qgraph.metadata["output_run"]
585 fwk.runPipeline(qgraph, taskFactory, args)
586 self.assertEqual(taskFactory.countExec, self.nQuanta)
588 butler.registry.refresh()
589 self.assertEqual(
590 list(butler.registry.getCollectionChain("output")),
591 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"],
592 )
594 def test_simple_qgraph_qbb(self):
595 """Test successful execution of trivial quantum graph in QBB mode."""
596 args = _makeArgs(
597 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
598 )
599 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
600 populateButler(self.pipeline, butler)
602 fwk = CmdLineFwk()
603 taskFactory = AddTaskFactoryMock()
605 qgraph = fwk.makeGraph(self.pipeline, args)
606 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
607 self.assertEqual(len(qgraph), self.nQuanta)
609 # Ensure that the output run used in the graph is also used in
610 # the pipeline execution. It is possible for makeGraph and runPipeline
611 # to calculate time-stamped runs across a second boundary.
612 output_run = qgraph.metadata["output_run"]
613 args.output_run = output_run
615 # QBB must run from serialized graph.
616 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
617 qgraph.saveUri(temp_graph.name)
619 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
621 # Check that pre-exec-init can run.
622 fwk.preExecInitQBB(taskFactory, args)
624 # Run whole thing.
625 fwk.runGraphQBB(taskFactory, args)
627 # Transfer the datasets to the butler.
628 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
629 self.assertEqual(n1, 31)
631 self.assertEqual(taskFactory.countExec, self.nQuanta)
633 # Update the output run and try again.
634 new_output_run = output_run + "_new"
635 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True)
636 self.assertEqual(qgraph.metadata["output_run"], new_output_run)
638 taskFactory = AddTaskFactoryMock()
639 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
640 qgraph.saveUri(temp_graph.name)
642 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
644 # Check that pre-exec-init can run.
645 fwk.preExecInitQBB(taskFactory, args)
647 # Run whole thing.
648 fwk.runGraphQBB(taskFactory, args)
650 # Transfer the datasets to the butler.
651 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
652 self.assertEqual(n1, n2)
654 def testEmptyQGraph(self):
655 """Test that making an empty QG produces the right error messages."""
656 # We make QG generation fail by populating one input collection in the
657 # butler while using a different one (that we only register, not
658 # populate) to make the QG.
659 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
660 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
661 butler.registry.registerCollection("bad_input")
662 populateButler(self.pipeline, butler)
664 fwk = CmdLineFwk()
665 with self.assertLogs(level=logging.CRITICAL) as cm:
666 qgraph = fwk.makeGraph(self.pipeline, args)
667 self.assertRegex(
668 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
669 )
670 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
671 self.assertIsNone(qgraph)
673 def testSimpleQGraphNoSkipExisting_inputs(self):
674 """Test for case when output data for one task already appears in
675 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
676 option is present.
677 """
678 args = _makeArgs(
679 butler_config=self.root,
680 input="test",
681 output="output",
682 )
683 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
684 populateButler(
685 self.pipeline,
686 butler,
687 datasetTypes={
688 args.input: [
689 "add_dataset0",
690 "add_dataset1",
691 "add2_dataset1",
692 "add_init_output1",
693 "task0_config",
694 "task0_metadata",
695 "task0_log",
696 ]
697 },
698 )
700 fwk = CmdLineFwk()
701 taskFactory = AddTaskFactoryMock()
703 qgraph = fwk.makeGraph(self.pipeline, args)
704 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
705 # With current implementation graph has all nQuanta quanta, but when
706 # executing one quantum is skipped.
707 self.assertEqual(len(qgraph), self.nQuanta)
709 # Ensure that the output run used in the graph is also used in
710 # the pipeline execution. It is possible for makeGraph and runPipeline
711 # to calculate time-stamped runs across a second boundary.
712 args.output_run = qgraph.metadata["output_run"]
714 # run whole thing
715 fwk.runPipeline(qgraph, taskFactory, args)
716 self.assertEqual(taskFactory.countExec, self.nQuanta)
718 def testSimpleQGraphSkipExisting_inputs(self):
719 """Test for ``--skip-existing`` with output data for one task already
720 appears in _input_ collection. No ``--extend-run`` option is needed
721 for this case.
722 """
723 args = _makeArgs(
724 butler_config=self.root,
725 input="test",
726 output="output",
727 skip_existing_in=("test",),
728 )
729 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
730 populateButler(
731 self.pipeline,
732 butler,
733 datasetTypes={
734 args.input: [
735 "add_dataset0",
736 "add_dataset1",
737 "add2_dataset1",
738 "add_init_output1",
739 "task0_config",
740 "task0_metadata",
741 "task0_log",
742 ]
743 },
744 )
746 fwk = CmdLineFwk()
747 taskFactory = AddTaskFactoryMock()
749 qgraph = fwk.makeGraph(self.pipeline, args)
750 # If all quanta are skipped, the task is not included in the graph.
751 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
752 self.assertEqual(len(qgraph), self.nQuanta - 1)
754 # Ensure that the output run used in the graph is also used in
755 # the pipeline execution. It is possible for makeGraph and runPipeline
756 # to calculate time-stamped runs across a second boundary.
757 args.output_run = qgraph.metadata["output_run"]
759 # run whole thing
760 fwk.runPipeline(qgraph, taskFactory, args)
761 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
763 def testSimpleQGraphSkipExisting_outputs(self):
764 """Test for ``--skip-existing`` with output data for one task already
765 appears in _output_ collection. The ``--extend-run`` option is needed
766 for this case.
767 """
768 args = _makeArgs(
769 butler_config=self.root,
770 input="test",
771 output_run="output/run",
772 skip_existing_in=("output/run",),
773 )
774 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
775 populateButler(
776 self.pipeline,
777 butler,
778 datasetTypes={
779 args.input: ["add_dataset0"],
780 args.output_run: [
781 "add_dataset1",
782 "add2_dataset1",
783 "add_init_output1",
784 "task0_metadata",
785 "task0_log",
786 "task0_config",
787 ],
788 },
789 )
791 fwk = CmdLineFwk()
792 taskFactory = AddTaskFactoryMock()
794 # fails without --extend-run
795 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
796 qgraph = fwk.makeGraph(self.pipeline, args)
798 # retry with --extend-run
799 args.extend_run = True
800 qgraph = fwk.makeGraph(self.pipeline, args)
802 # First task has no remaining quanta, so is left out completely.
803 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
804 # Graph does not include quantum for first task.
805 self.assertEqual(len(qgraph), self.nQuanta - 1)
807 # run whole thing
808 fwk.runPipeline(qgraph, taskFactory, args)
809 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
811 def testSimpleQGraphOutputsFail(self):
812 """Test continuing execution of trivial quantum graph with partial
813 outputs.
814 """
815 args = _makeArgs(butler_config=self.root, input="test", output="output")
816 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
817 populateButler(self.pipeline, butler)
819 fwk = CmdLineFwk()
820 taskFactory = AddTaskFactoryMock(stopAt=3)
822 qgraph = fwk.makeGraph(self.pipeline, args)
823 self.assertEqual(len(qgraph), self.nQuanta)
825 # Ensure that the output run used in the graph is also used in
826 # the pipeline execution. It is possible for makeGraph and runPipeline
827 # to calculate time-stamped runs across a second boundary.
828 args.output_run = qgraph.metadata["output_run"]
830 # run first three quanta
831 with self.assertRaises(MPGraphExecutorError):
832 fwk.runPipeline(qgraph, taskFactory, args)
833 self.assertEqual(taskFactory.countExec, 3)
835 butler.registry.refresh()
837 # drop one of the two outputs from one task
838 ref1 = butler.registry.findDataset(
839 "add2_dataset2", collections=args.output, instrument="INSTR", detector=0
840 )
841 self.assertIsNotNone(ref1)
842 # also drop the metadata output
843 ref2 = butler.registry.findDataset(
844 "task1_metadata", collections=args.output, instrument="INSTR", detector=0
845 )
846 self.assertIsNotNone(ref2)
847 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
849 # Ensure that the output run used in the graph is also used in
850 # the pipeline execution. It is possible for makeGraph and runPipeline
851 # to calculate time-stamped runs across a second boundary.
852 args.output_run = qgraph.metadata["output_run"]
854 taskFactory.stopAt = -1
855 args.skip_existing_in = (args.output,)
856 args.extend_run = True
857 args.no_versions = True
858 with self.assertRaises(MPGraphExecutorError):
859 fwk.runPipeline(qgraph, taskFactory, args)
861 def testSimpleQGraphClobberOutputs(self):
862 """Test continuing execution of trivial quantum graph with
863 --clobber-outputs.
864 """
865 args = _makeArgs(butler_config=self.root, input="test", output="output")
866 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
867 populateButler(self.pipeline, butler)
869 fwk = CmdLineFwk()
870 taskFactory = AddTaskFactoryMock(stopAt=3)
872 qgraph = fwk.makeGraph(self.pipeline, args)
874 # should have one task and number of quanta
875 self.assertEqual(len(qgraph), self.nQuanta)
877 # Ensure that the output run used in the graph is also used in
878 # the pipeline execution. It is possible for makeGraph and runPipeline
879 # to calculate time-stamped runs across a second boundary.
880 args.output_run = qgraph.metadata["output_run"]
882 # run first three quanta
883 with self.assertRaises(MPGraphExecutorError):
884 fwk.runPipeline(qgraph, taskFactory, args)
885 self.assertEqual(taskFactory.countExec, 3)
887 butler.registry.refresh()
889 # drop one of the two outputs from one task
890 ref1 = butler.registry.findDataset(
891 "add2_dataset2", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
892 )
893 self.assertIsNotNone(ref1)
894 # also drop the metadata output
895 ref2 = butler.registry.findDataset(
896 "task1_metadata", collections=args.output, dataId=dict(instrument="INSTR", detector=0)
897 )
898 self.assertIsNotNone(ref2)
899 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
901 taskFactory.stopAt = -1
902 args.skip_existing = True
903 args.extend_run = True
904 args.clobber_outputs = True
905 args.no_versions = True
906 fwk.runPipeline(qgraph, taskFactory, args)
907 # number of executed quanta is incremented
908 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
910 def testSimpleQGraphReplaceRun(self):
911 """Test repeated execution of trivial quantum graph with
912 --replace-run.
913 """
914 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
915 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
916 populateButler(self.pipeline, butler)
918 fwk = CmdLineFwk()
919 taskFactory = AddTaskFactoryMock()
921 qgraph = fwk.makeGraph(self.pipeline, args)
923 # should have one task and number of quanta
924 self.assertEqual(len(qgraph), self.nQuanta)
926 # deep copy is needed because quanta are updated in place
927 fwk.runPipeline(qgraph, taskFactory, args)
928 self.assertEqual(taskFactory.countExec, self.nQuanta)
930 # need to refresh collections explicitly (or make new butler/registry)
931 butler.registry.refresh()
932 collections = set(butler.registry.queryCollections(...))
933 self.assertEqual(collections, {"test", "output", "output/run1"})
935 # number of datasets written by pipeline:
936 # - nQuanta of init_outputs
937 # - nQuanta of configs
938 # - packages (single dataset)
939 # - nQuanta * two output datasets
940 # - nQuanta of metadata
941 # - nQuanta of log output
942 n_outputs = self.nQuanta * 6 + 1
943 refs = butler.registry.queryDatasets(..., collections="output/run1")
944 self.assertEqual(len(list(refs)), n_outputs)
946 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
947 # changed)
948 args.replace_run = True
949 args.output_run = "output/run2"
950 qgraph = fwk.makeGraph(self.pipeline, args)
951 fwk.runPipeline(qgraph, taskFactory, args)
953 butler.registry.refresh()
954 collections = set(butler.registry.queryCollections(...))
955 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
957 # new output collection
958 refs = butler.registry.queryDatasets(..., collections="output/run2")
959 self.assertEqual(len(list(refs)), n_outputs)
961 # old output collection is still there
962 refs = butler.registry.queryDatasets(..., collections="output/run1")
963 self.assertEqual(len(list(refs)), n_outputs)
965 # re-run with --replace-run and --prune-replaced=unstore
966 args.replace_run = True
967 args.prune_replaced = "unstore"
968 args.output_run = "output/run3"
969 qgraph = fwk.makeGraph(self.pipeline, args)
970 fwk.runPipeline(qgraph, taskFactory, args)
972 butler.registry.refresh()
973 collections = set(butler.registry.queryCollections(...))
974 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
976 # new output collection
977 refs = butler.registry.queryDatasets(..., collections="output/run3")
978 self.assertEqual(len(list(refs)), n_outputs)
980 # old output collection is still there, and it has all datasets but
981 # non-InitOutputs are not in datastore
982 refs = butler.registry.queryDatasets(..., collections="output/run2")
983 refs = list(refs)
984 self.assertEqual(len(refs), n_outputs)
985 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
986 for ref in refs:
987 if initOutNameRe.fullmatch(ref.datasetType.name):
988 butler.get(ref)
989 else:
990 with self.assertRaises(FileNotFoundError):
991 butler.get(ref)
993 # re-run with --replace-run and --prune-replaced=purge
994 # This time also remove --input; passing the same inputs that we
995 # started with and not passing inputs at all should be equivalent.
996 args.input = None
997 args.replace_run = True
998 args.prune_replaced = "purge"
999 args.output_run = "output/run4"
1000 qgraph = fwk.makeGraph(self.pipeline, args)
1001 fwk.runPipeline(qgraph, taskFactory, args)
1003 butler.registry.refresh()
1004 collections = set(butler.registry.queryCollections(...))
1005 # output/run3 should disappear now
1006 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1008 # new output collection
1009 refs = butler.registry.queryDatasets(..., collections="output/run4")
1010 self.assertEqual(len(list(refs)), n_outputs)
1012 # Trying to run again with inputs that aren't exactly what we started
1013 # with is an error, and the kind that should not modify the data repo.
1014 with self.assertRaises(ValueError):
1015 args.input = ["test", "output/run2"]
1016 args.prune_replaced = None
1017 args.replace_run = True
1018 args.output_run = "output/run5"
1019 qgraph = fwk.makeGraph(self.pipeline, args)
1020 fwk.runPipeline(qgraph, taskFactory, args)
1021 butler.registry.refresh()
1022 collections = set(butler.registry.queryCollections(...))
1023 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1024 with self.assertRaises(ValueError):
1025 args.input = ["output/run2", "test"]
1026 args.prune_replaced = None
1027 args.replace_run = True
1028 args.output_run = "output/run6"
1029 qgraph = fwk.makeGraph(self.pipeline, args)
1030 fwk.runPipeline(qgraph, taskFactory, args)
1031 butler.registry.refresh()
1032 collections = set(butler.registry.queryCollections(...))
1033 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1035 def testSubgraph(self):
1036 """Test successful execution of trivial quantum graph."""
1037 args = _makeArgs(butler_config=self.root, input="test", output="output")
1038 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1039 populateButler(self.pipeline, butler)
1041 fwk = CmdLineFwk()
1042 qgraph = fwk.makeGraph(self.pipeline, args)
1044 # Select first two nodes for execution. This depends on node ordering
1045 # which I assume is the same as execution order.
1046 nNodes = 2
1047 nodeIds = [node.nodeId for node in qgraph]
1048 nodeIds = nodeIds[:nNodes]
1050 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
1051 self.assertEqual(len(qgraph), self.nQuanta)
1053 with (
1054 makeTmpFile(suffix=".qgraph") as tmpname,
1055 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
1056 ):
1057 with open(tmpname, "wb") as saveFile:
1058 qgraph.save(saveFile)
1060 args = _makeArgs(
1061 qgraph=tmpname,
1062 qgraph_node_id=nodeIds,
1063 registryConfig=registryConfig,
1064 execution_butler_location=None,
1065 )
1066 fwk = CmdLineFwk()
1068 # load graph, should only read a subset
1069 qgraph = fwk.makeGraph(pipeline=None, args=args)
1070 self.assertEqual(len(qgraph), nNodes)
1072 def testShowGraph(self):
1073 """Test for --show options for quantum graph."""
1074 nQuanta = 2
1075 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1077 show = ShowInfo(["graph"])
1078 show.show_graph_info(qgraph)
1079 self.assertEqual(show.handled, {"graph"})
1081 def testShowGraphWorkflow(self):
1082 nQuanta = 2
1083 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1085 show = ShowInfo(["workflow"])
1086 show.show_graph_info(qgraph)
1087 self.assertEqual(show.handled, {"workflow"})
1089 # TODO: cannot test "uri" option presently, it instantiates
1090 # butler from command line options and there is no way to pass butler
1091 # mock to that code.
1092 show = ShowInfo(["uri"])
1093 with self.assertRaises(ValueError): # No args given
1094 show.show_graph_info(qgraph)
1096 def testSimpleQGraphDatastoreRecords(self):
1097 """Test quantum graph generation with --qgraph-datastore-records."""
1098 args = _makeArgs(
1099 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1100 )
1101 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1102 populateButler(self.pipeline, butler)
1104 fwk = CmdLineFwk()
1105 qgraph = fwk.makeGraph(self.pipeline, args)
1106 self.assertEqual(len(qgraph), self.nQuanta)
1107 for i, qnode in enumerate(qgraph):
1108 quantum = qnode.quantum
1109 self.assertIsNotNone(quantum.datastore_records)
1110 # only the first quantum has a pre-existing input
1111 if i == 0:
1112 datastore_name = "FileDatastore@<butlerRoot>"
1113 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1114 records_data = quantum.datastore_records[datastore_name]
1115 records = dict(records_data.records)
1116 self.assertEqual(len(records), 1)
1117 _, records = records.popitem()
1118 records = records["file_datastore_records"]
1119 self.assertEqual(
1120 [record.path for record in records],
1121 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1122 )
1123 else:
1124 self.assertEqual(quantum.datastore_records, {})
1127class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1128 """General file leak detection."""
1131def setup_module(module):
1132 """Initialize pytest module."""
1133 lsst.utils.tests.init()
1136if __name__ == "__main__":
1137 lsst.utils.tests.init()
1138 unittest.main()