Coverage for tests/test_cmdLineFwk.py: 12%
566 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:53 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:53 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Simple unit test for cmdLineFwk module.
29"""
31import contextlib
32import logging
33import os
34import pickle
35import re
36import shutil
37import tempfile
38import unittest
39from dataclasses import dataclass
40from io import StringIO
41from types import SimpleNamespace
42from typing import NamedTuple
44import astropy.units as u
45import click
46import lsst.pex.config as pexConfig
47import lsst.pipe.base.connectionTypes as cT
48import lsst.utils.tests
49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError, Report
50from lsst.ctrl.mpexec.cli.opt import run_options
51from lsst.ctrl.mpexec.cli.utils import (
52 _ACTION_ADD_INSTRUMENT,
53 _ACTION_ADD_TASK,
54 _ACTION_CONFIG,
55 _ACTION_CONFIG_FILE,
56 PipetaskCommand,
57)
58from lsst.ctrl.mpexec.showInfo import ShowInfo
59from lsst.daf.butler import (
60 CollectionType,
61 Config,
62 DataCoordinate,
63 DatasetRef,
64 DatasetType,
65 DimensionConfig,
66 DimensionUniverse,
67 Quantum,
68)
69from lsst.daf.butler.registry import RegistryConfig
70from lsst.daf.butler.registry.sql_registry import SqlRegistry
71from lsst.pipe.base import (
72 Instrument,
73 Pipeline,
74 PipelineTaskConfig,
75 PipelineTaskConnections,
76 QuantumGraph,
77 TaskDef,
78)
79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
80from lsst.pipe.base.script import transfer_from_graph
81from lsst.pipe.base.tests.simpleQGraph import (
82 AddTask,
83 AddTaskFactoryMock,
84 makeSimpleButler,
85 makeSimplePipeline,
86 makeSimpleQGraph,
87 populateButler,
88)
89from lsst.utils.tests import temporaryDirectory
91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
94# instrument from registry, these tests can run fine without actual instrument
95# and implementing full mock for Instrument is too complicated.
96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96
99@contextlib.contextmanager
100def makeTmpFile(contents=None, suffix=None):
101 """Context manager for generating temporary file name.
103 Temporary file is deleted on exiting context.
105 Parameters
106 ----------
107 contents : `bytes` or `None`, optional
108 Data to write into a file.
109 suffix : `str` or `None`, optional
110 Suffix to use for temporary file.
112 Yields
113 ------
114 `str`
115 Name of the temporary file.
116 """
117 fd, tmpname = tempfile.mkstemp(suffix=suffix)
118 if contents:
119 os.write(fd, contents)
120 os.close(fd)
121 yield tmpname
122 with contextlib.suppress(OSError):
123 os.remove(tmpname)
126@contextlib.contextmanager
127def makeSQLiteRegistry(create=True, universe=None):
128 """Context manager to create new empty registry database.
130 Parameters
131 ----------
132 create : `bool`, optional
133 Whether to create the registry or not.
134 universe : `~lsst.daf.butler.DimensionUniverse` or `None`, optional
135 The dimension universe to use with the registry.
137 Yields
138 ------
139 config : `RegistryConfig`
140 Registry configuration for initialized registry database.
141 """
142 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
143 with temporaryDirectory() as tmpdir:
144 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
145 config = RegistryConfig()
146 config["db"] = uri
147 if create:
148 SqlRegistry.createFromConfig(config, dimensionConfig=dimensionConfig)
149 yield config
152class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
153 """Test connection class."""
155 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
158class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
159 """Test pipeline config."""
161 field = pexConfig.Field(dtype=str, doc="arbitrary string")
163 def setDefaults(self):
164 PipelineTaskConfig.setDefaults(self)
167def _makeArgs(registryConfig=None, **kwargs):
168 """Return parsed command line arguments.
170 By default butler_config is set to `Config` populated with some defaults,
171 it can be overridden completely by keyword argument.
173 Parameters
174 ----------
175 cmd : `str`, optional
176 Produce arguments for this pipetask command.
177 registryConfig : `RegistryConfig`, optional
178 Override for registry configuration.
179 **kwargs
180 Overrides for other arguments.
181 """
182 # Use a mock to get the default value of arguments to 'run'.
184 mock = unittest.mock.Mock()
186 @click.command(cls=PipetaskCommand)
187 @run_options()
188 def fake_run(ctx, **kwargs): # numpydoc ignore=PR01
189 """Fake "pipetask run" command for gathering input arguments.
191 The arguments & options should always match the arguments & options in
192 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
193 """
194 mock(**kwargs)
196 runner = click.testing.CliRunner()
197 # --butler-config is the only required option
198 result = runner.invoke(fake_run, "--butler-config /")
199 if result.exit_code != 0:
200 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
201 mock.assert_called_once()
202 args = mock.call_args[1]
203 args["enableLsstDebug"] = args.pop("debug")
204 args["execution_butler_location"] = args.pop("save_execution_butler")
205 if "pipeline_actions" not in args:
206 args["pipeline_actions"] = []
207 args = SimpleNamespace(**args)
209 # override butler_config with our defaults
210 if "butler_config" not in kwargs:
211 args.butler_config = Config()
212 if registryConfig:
213 args.butler_config["registry"] = registryConfig
214 # The default datastore has a relocatable root, so we need to specify
215 # some root here for it to use
216 args.butler_config.configFile = "."
218 # override arguments from keyword parameters
219 for key, value in kwargs.items():
220 setattr(args, key, value)
221 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
222 return args
225class FakeDSType(NamedTuple):
226 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
228 name: str
231@dataclass(frozen=True)
232class FakeDSRef:
233 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
235 datasetType: str
236 dataId: tuple
238 def isComponent(self):
239 return False
242# Task class name used by tests, needs to be importable
243_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
246def _makeDimensionConfig():
247 """Make a simple dimension universe configuration."""
248 return DimensionConfig(
249 {
250 "version": 1,
251 "namespace": "ctrl_mpexec_test",
252 "skypix": {
253 "common": "htm7",
254 "htm": {
255 "class": "lsst.sphgeom.HtmPixelization",
256 "max_level": 24,
257 },
258 },
259 "elements": {
260 "A": {
261 "keys": [
262 {
263 "name": "id",
264 "type": "int",
265 }
266 ],
267 "storage": {
268 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
269 },
270 },
271 "B": {
272 "keys": [
273 {
274 "name": "id",
275 "type": "int",
276 }
277 ],
278 "storage": {
279 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
280 },
281 },
282 },
283 "packers": {},
284 }
285 )
288def _makeQGraph():
289 """Make a trivial QuantumGraph with one quantum.
291 The only thing that we need to do with this quantum graph is to pickle
292 it, the quanta in this graph are not usable for anything else.
294 Returns
295 -------
296 qgraph : `~lsst.pipe.base.QuantumGraph`
297 """
298 universe = DimensionUniverse(config=_makeDimensionConfig())
299 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe)
300 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
301 quanta = [
302 Quantum(
303 taskName=_TASK_CLASS,
304 inputs={
305 fakeDSType: [
306 DatasetRef(
307 fakeDSType,
308 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
309 run="fake_run",
310 )
311 ]
312 },
313 )
314 ] # type: ignore
315 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
316 return qgraph
319class CmdLineFwkTestCase(unittest.TestCase):
320 """A test case for CmdLineFwk."""
322 def testMakePipeline(self):
323 """Tests for CmdLineFwk.makePipeline method."""
324 fwk = CmdLineFwk()
326 # make empty pipeline
327 args = _makeArgs()
328 pipeline = fwk.makePipeline(args)
329 self.assertIsInstance(pipeline, Pipeline)
330 self.assertEqual(len(pipeline), 0)
332 # few tests with serialization
333 with makeTmpFile() as tmpname:
334 # make empty pipeline and store it in a file
335 args = _makeArgs(save_pipeline=tmpname)
336 pipeline = fwk.makePipeline(args)
337 self.assertIsInstance(pipeline, Pipeline)
339 # read pipeline from a file
340 args = _makeArgs(pipeline=tmpname)
341 pipeline = fwk.makePipeline(args)
342 self.assertIsInstance(pipeline, Pipeline)
343 self.assertEqual(len(pipeline), 0)
345 # single task pipeline, task name can be anything here
346 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
347 args = _makeArgs(pipeline_actions=actions)
348 pipeline = fwk.makePipeline(args)
349 self.assertIsInstance(pipeline, Pipeline)
350 self.assertEqual(len(pipeline), 1)
352 # many task pipeline
353 actions = [
354 _ACTION_ADD_TASK("TaskOne:task1a"),
355 _ACTION_ADD_TASK("TaskTwo:task2"),
356 _ACTION_ADD_TASK("TaskOne:task1b"),
357 ]
358 args = _makeArgs(pipeline_actions=actions)
359 pipeline = fwk.makePipeline(args)
360 self.assertIsInstance(pipeline, Pipeline)
361 self.assertEqual(len(pipeline), 3)
363 # single task pipeline with config overrides, need real task class
364 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
365 args = _makeArgs(pipeline_actions=actions)
366 pipeline = fwk.makePipeline(args)
367 taskDefs = list(pipeline.toExpandedPipeline())
368 self.assertEqual(len(taskDefs), 1)
369 self.assertEqual(taskDefs[0].config.addend, 100)
371 overrides = b"config.addend = 1000\n"
372 with makeTmpFile(overrides) as tmpname:
373 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
374 args = _makeArgs(pipeline_actions=actions)
375 pipeline = fwk.makePipeline(args)
376 taskDefs = list(pipeline.toExpandedPipeline())
377 self.assertEqual(len(taskDefs), 1)
378 self.assertEqual(taskDefs[0].config.addend, 1000)
380 # Check --instrument option, for now it only checks that it does not
381 # crash.
382 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
383 args = _makeArgs(pipeline_actions=actions)
384 pipeline = fwk.makePipeline(args)
386 def testMakeGraphFromSave(self):
387 """Tests for CmdLineFwk.makeGraph method.
389 Only most trivial case is tested that does not do actual graph
390 building.
391 """
392 fwk = CmdLineFwk()
394 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
395 # make non-empty graph and store it in a file
396 qgraph = _makeQGraph()
397 with open(tmpname, "wb") as saveFile:
398 qgraph.save(saveFile)
399 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
400 qgraph = fwk.makeGraph(None, args)
401 self.assertIsInstance(qgraph, QuantumGraph)
402 self.assertEqual(len(qgraph), 1)
404 # will fail if graph id does not match
405 args = _makeArgs(
406 qgraph=tmpname,
407 qgraph_id="R2-D2 is that you?",
408 registryConfig=registryConfig,
409 execution_butler_location=None,
410 )
411 with self.assertRaisesRegex(ValueError, "graphID does not match"):
412 fwk.makeGraph(None, args)
414 # save with wrong object type
415 with open(tmpname, "wb") as saveFile:
416 pickle.dump({}, saveFile)
417 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
418 with self.assertRaises(ValueError):
419 fwk.makeGraph(None, args)
421 # reading empty graph from pickle should work but makeGraph()
422 # will return None.
423 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig()))
424 with open(tmpname, "wb") as saveFile:
425 qgraph.save(saveFile)
426 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
427 qgraph = fwk.makeGraph(None, args)
428 self.assertIs(qgraph, None)
430 def testShowPipeline(self):
431 """Test for --show options for pipeline."""
432 fwk = CmdLineFwk()
434 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
435 args = _makeArgs(pipeline_actions=actions)
436 pipeline = fwk.makePipeline(args)
438 with self.assertRaises(ValueError):
439 ShowInfo(["unrecognized", "config"])
441 stream = StringIO()
442 show = ShowInfo(
443 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
444 stream=stream,
445 )
446 show.show_pipeline_info(pipeline, None)
447 self.assertEqual(show.unhandled, frozenset({}))
448 stream.seek(0)
449 output = stream.read()
450 self.assertIn("config.addend=100", output) # config option
451 self.assertIn("addend\n3", output) # History output
452 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
454 show = ShowInfo(["pipeline", "uri"], stream=stream)
455 show.show_pipeline_info(pipeline, None)
456 self.assertEqual(show.unhandled, frozenset({"uri"}))
457 self.assertEqual(show.handled, {"pipeline"})
459 stream = StringIO()
460 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
461 show.show_pipeline_info(pipeline, None)
462 stream.seek(0)
463 output = stream.read().strip()
464 self.assertEqual("### Configuration for task `task'", output)
466 stream = StringIO()
467 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
468 show.show_pipeline_info(pipeline, None)
469 stream.seek(0)
470 output = stream.read().strip()
471 self.assertEqual("### Configuration for task `task'", output)
473 stream = StringIO()
474 show = ShowInfo(["pipeline-graph"], stream=stream) # No match
475 show.show_pipeline_info(pipeline, None)
476 stream.seek(0)
477 output = stream.read().strip()
478 self.assertEqual(
479 "\n".join(
480 [
481 "○ add_dataset_in",
482 "│",
483 "■ task",
484 "│",
485 "◍ add_dataset_out, add2_dataset_out",
486 ]
487 ),
488 output,
489 )
491 stream = StringIO()
492 show = ShowInfo(["task-graph"], stream=stream) # No match
493 show.show_pipeline_info(pipeline, None)
494 stream.seek(0)
495 output = stream.read().strip()
496 self.assertEqual("■ task", output)
498 stream = StringIO()
499 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
500 show.show_pipeline_info(pipeline, None)
501 stream.seek(0)
502 output = stream.read().strip()
503 self.assertIn("NOIGNORECASE", output)
505 show = ShowInfo(["dump-config=notask"])
506 with self.assertRaises(ValueError) as cm:
507 show.show_pipeline_info(pipeline, None)
508 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
510 show = ShowInfo(["history"])
511 with self.assertRaises(ValueError) as cm:
512 show.show_pipeline_info(pipeline, None)
513 self.assertIn("Please provide a value", str(cm.exception))
515 show = ShowInfo(["history=notask::param"])
516 with self.assertRaises(ValueError) as cm:
517 show.show_pipeline_info(pipeline, None)
518 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
520 def test_execution_resources_parameters(self) -> None:
521 """Test creation of the ExecutionResources from command line."""
522 fwk = CmdLineFwk()
524 for params, num_cores, max_mem in (
525 ((None, None), 1, None),
526 ((5, ""), 5, None),
527 ((None, "50"), 1, 50 * u.MB),
528 ((5, "50 GB"), 5, 50 * u.GB),
529 ):
530 kwargs = {}
531 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True):
532 if v is not None:
533 kwargs[k] = v
534 args = _makeArgs(**kwargs)
535 res = fwk._make_execution_resources(args)
536 self.assertEqual(res.num_cores, num_cores)
537 self.assertEqual(res.max_mem, max_mem)
539 args = _makeArgs(memory_per_quantum="50m")
540 with self.assertRaises(u.UnitConversionError):
541 fwk._make_execution_resources(args)
544class CmdLineFwkTestCaseWithButler(unittest.TestCase):
545 """A test case for CmdLineFwk."""
547 def setUp(self):
548 super().setUpClass()
549 self.root = tempfile.mkdtemp()
550 self.nQuanta = 5
551 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
553 def tearDown(self):
554 shutil.rmtree(self.root, ignore_errors=True)
555 super().tearDownClass()
557 def testSimpleQGraph(self):
558 """Test successfull execution of trivial quantum graph."""
559 args = _makeArgs(butler_config=self.root, input="test", output="output")
560 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
561 populateButler(self.pipeline, butler)
563 fwk = CmdLineFwk()
564 taskFactory = AddTaskFactoryMock()
566 qgraph = fwk.makeGraph(self.pipeline, args)
567 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
568 self.assertEqual(len(qgraph), self.nQuanta)
570 # Ensure that the output run used in the graph is also used in
571 # the pipeline execution. It is possible for makeGraph and runPipeline
572 # to calculate time-stamped runs across a second boundary.
573 args.output_run = qgraph.metadata["output_run"]
575 # run whole thing
576 fwk.runPipeline(qgraph, taskFactory, args)
577 self.assertEqual(taskFactory.countExec, self.nQuanta)
579 # test that we've disabled implicit threading
580 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
582 def testSimpleQGraph_rebase(self):
583 """Test successful execution of trivial quantum graph, with --rebase
584 used to force redefinition of the output collection.
585 """
586 # Pass one input collection here for the usual test setup; we'll
587 # override it later.
588 args = _makeArgs(butler_config=self.root, input="test1", output="output")
589 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
590 populateButler(self.pipeline, butler)
592 fwk = CmdLineFwk()
593 taskFactory = AddTaskFactoryMock()
595 # We'll actually pass two input collections in. One is empty, but
596 # the stuff we're testing here doesn't care.
597 args.input = ["test2", "test1"]
598 butler.registry.registerCollection("test2", CollectionType.RUN)
600 # Set up the output collection with a sequence that doesn't end the
601 # same way as the input collection. This is normally an error.
602 butler.registry.registerCollection("output", CollectionType.CHAINED)
603 butler.registry.registerCollection("unexpected_input", CollectionType.RUN)
604 butler.registry.registerCollection("output/run0", CollectionType.RUN)
605 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"])
607 # Without --rebase, the inconsistent input and output collections are
608 # an error.
609 with self.assertRaises(ValueError):
610 fwk.makeGraph(self.pipeline, args)
612 # With --rebase, the output collection gets redefined.
613 args.rebase = True
614 qgraph = fwk.makeGraph(self.pipeline, args)
616 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
617 self.assertEqual(len(qgraph), self.nQuanta)
619 # Ensure that the output run used in the graph is also used in
620 # the pipeline execution. It is possible for makeGraph and runPipeline
621 # to calculate time-stamped runs across a second boundary.
622 args.output_run = qgraph.metadata["output_run"]
624 fwk.runPipeline(qgraph, taskFactory, args)
625 self.assertEqual(taskFactory.countExec, self.nQuanta)
627 butler.registry.refresh()
628 self.assertEqual(
629 list(butler.registry.getCollectionChain("output")),
630 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"],
631 )
633 def test_simple_qgraph_qbb(self):
634 """Test successful execution of trivial quantum graph in QBB mode."""
635 args = _makeArgs(
636 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
637 )
638 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
639 populateButler(self.pipeline, butler)
641 fwk = CmdLineFwk()
642 taskFactory = AddTaskFactoryMock()
644 qgraph = fwk.makeGraph(self.pipeline, args)
645 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
646 self.assertEqual(len(qgraph), self.nQuanta)
648 # Ensure that the output run used in the graph is also used in
649 # the pipeline execution. It is possible for makeGraph and runPipeline
650 # to calculate time-stamped runs across a second boundary.
651 output_run = qgraph.metadata["output_run"]
652 args.output_run = output_run
654 # QBB must run from serialized graph.
655 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
656 qgraph.saveUri(temp_graph.name)
658 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
660 # Check that pre-exec-init can run.
661 fwk.preExecInitQBB(taskFactory, args)
663 # Run whole thing.
664 fwk.runGraphQBB(taskFactory, args)
666 # Transfer the datasets to the butler.
667 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False, False)
668 self.assertEqual(n1, 31)
670 self.assertEqual(taskFactory.countExec, self.nQuanta)
672 # Update the output run and try again.
673 new_output_run = output_run + "_new"
674 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True)
675 self.assertEqual(qgraph.metadata["output_run"], new_output_run)
677 taskFactory = AddTaskFactoryMock()
678 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
679 qgraph.saveUri(temp_graph.name)
681 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
683 # Check that pre-exec-init can run.
684 fwk.preExecInitQBB(taskFactory, args)
686 # Run whole thing.
687 fwk.runGraphQBB(taskFactory, args)
689 # Transfer the datasets to the butler.
690 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False, False)
691 self.assertEqual(n1, n2)
693 def testEmptyQGraph(self):
694 """Test that making an empty QG produces the right error messages."""
695 # We make QG generation fail by populating one input collection in the
696 # butler while using a different one (that we only register, not
697 # populate) to make the QG.
698 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
699 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
700 butler.registry.registerCollection("bad_input")
701 populateButler(self.pipeline, butler)
703 fwk = CmdLineFwk()
704 with self.assertLogs(level=logging.CRITICAL) as cm:
705 qgraph = fwk.makeGraph(self.pipeline, args)
706 self.assertRegex(
707 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
708 )
709 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
710 self.assertIsNone(qgraph)
712 def testSimpleQGraphNoSkipExisting_inputs(self):
713 """Test for case when output data for one task already appears in
714 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
715 option is present.
716 """
717 args = _makeArgs(
718 butler_config=self.root,
719 input="test",
720 output="output",
721 )
722 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
723 populateButler(
724 self.pipeline,
725 butler,
726 datasetTypes={
727 args.input: [
728 "add_dataset0",
729 "add_dataset1",
730 "add2_dataset1",
731 "add_init_output1",
732 "task0_config",
733 "task0_metadata",
734 "task0_log",
735 ]
736 },
737 )
739 fwk = CmdLineFwk()
740 taskFactory = AddTaskFactoryMock()
742 qgraph = fwk.makeGraph(self.pipeline, args)
743 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
744 # With current implementation graph has all nQuanta quanta, but when
745 # executing one quantum is skipped.
746 self.assertEqual(len(qgraph), self.nQuanta)
748 # Ensure that the output run used in the graph is also used in
749 # the pipeline execution. It is possible for makeGraph and runPipeline
750 # to calculate time-stamped runs across a second boundary.
751 args.output_run = qgraph.metadata["output_run"]
753 # run whole thing
754 fwk.runPipeline(qgraph, taskFactory, args)
755 self.assertEqual(taskFactory.countExec, self.nQuanta)
757 def testSimpleQGraphSkipExisting_inputs(self):
758 """Test for ``--skip-existing`` with output data for one task already
759 appears in _input_ collection. No ``--extend-run`` option is needed
760 for this case.
761 """
762 args = _makeArgs(
763 butler_config=self.root,
764 input="test",
765 output="output",
766 skip_existing_in=("test",),
767 )
768 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
769 populateButler(
770 self.pipeline,
771 butler,
772 datasetTypes={
773 args.input: [
774 "add_dataset0",
775 "add_dataset1",
776 "add2_dataset1",
777 "add_init_output1",
778 "task0_config",
779 "task0_metadata",
780 "task0_log",
781 ]
782 },
783 )
785 fwk = CmdLineFwk()
786 taskFactory = AddTaskFactoryMock()
788 qgraph = fwk.makeGraph(self.pipeline, args)
789 # If all quanta are skipped, the task is not included in the graph.
790 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
791 self.assertEqual(len(qgraph), self.nQuanta - 1)
793 # Ensure that the output run used in the graph is also used in
794 # the pipeline execution. It is possible for makeGraph and runPipeline
795 # to calculate time-stamped runs across a second boundary.
796 args.output_run = qgraph.metadata["output_run"]
798 # run whole thing
799 fwk.runPipeline(qgraph, taskFactory, args)
800 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
802 def testSimpleQGraphSkipExisting_outputs(self):
803 """Test for ``--skip-existing`` with output data for one task already
804 appears in _output_ collection. The ``--extend-run`` option is needed
805 for this case.
806 """
807 args = _makeArgs(
808 butler_config=self.root,
809 input="test",
810 output_run="output/run",
811 skip_existing_in=("output/run",),
812 )
813 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
814 populateButler(
815 self.pipeline,
816 butler,
817 datasetTypes={
818 args.input: ["add_dataset0"],
819 args.output_run: [
820 "add_dataset1",
821 "add2_dataset1",
822 "add_init_output1",
823 "task0_metadata",
824 "task0_log",
825 "task0_config",
826 ],
827 },
828 )
830 fwk = CmdLineFwk()
831 taskFactory = AddTaskFactoryMock()
833 # fails without --extend-run
834 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
835 qgraph = fwk.makeGraph(self.pipeline, args)
837 # retry with --extend-run
838 args.extend_run = True
839 qgraph = fwk.makeGraph(self.pipeline, args)
841 # First task has no remaining quanta, so is left out completely.
842 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
843 # Graph does not include quantum for first task.
844 self.assertEqual(len(qgraph), self.nQuanta - 1)
846 # run whole thing
847 fwk.runPipeline(qgraph, taskFactory, args)
848 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
850 def testSimpleQGraphOutputsFail(self):
851 """Test continuing execution of trivial quantum graph with partial
852 outputs.
853 """
854 args = _makeArgs(butler_config=self.root, input="test", output="output")
855 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
856 populateButler(self.pipeline, butler)
858 fwk = CmdLineFwk()
859 taskFactory = AddTaskFactoryMock(stopAt=3)
861 qgraph = fwk.makeGraph(self.pipeline, args)
862 self.assertEqual(len(qgraph), self.nQuanta)
864 # Ensure that the output run used in the graph is also used in
865 # the pipeline execution. It is possible for makeGraph and runPipeline
866 # to calculate time-stamped runs across a second boundary.
867 args.output_run = qgraph.metadata["output_run"]
869 # run first three quanta
870 with self.assertRaises(MPGraphExecutorError):
871 fwk.runPipeline(qgraph, taskFactory, args)
872 self.assertEqual(taskFactory.countExec, 3)
874 butler.registry.refresh()
876 # drop one of the two outputs from one task
877 ref1 = butler.find_dataset("add2_dataset2", collections=args.output, instrument="INSTR", detector=0)
878 self.assertIsNotNone(ref1)
879 # also drop the metadata output
880 ref2 = butler.find_dataset("task1_metadata", collections=args.output, instrument="INSTR", detector=0)
881 self.assertIsNotNone(ref2)
882 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
884 # Ensure that the output run used in the graph is also used in
885 # the pipeline execution. It is possible for makeGraph and runPipeline
886 # to calculate time-stamped runs across a second boundary.
887 args.output_run = qgraph.metadata["output_run"]
889 taskFactory.stopAt = -1
890 args.skip_existing_in = (args.output,)
891 args.extend_run = True
892 args.no_versions = True
893 with self.assertRaises(MPGraphExecutorError):
894 fwk.runPipeline(qgraph, taskFactory, args)
896 def testSimpleQGraphClobberOutputs(self):
897 """Test continuing execution of trivial quantum graph with
898 --clobber-outputs.
899 """
900 args = _makeArgs(butler_config=self.root, input="test", output="output")
901 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
902 populateButler(self.pipeline, butler)
904 fwk = CmdLineFwk()
905 taskFactory = AddTaskFactoryMock(stopAt=3)
907 qgraph = fwk.makeGraph(self.pipeline, args)
909 # should have one task and number of quanta
910 self.assertEqual(len(qgraph), self.nQuanta)
912 # Ensure that the output run used in the graph is also used in
913 # the pipeline execution. It is possible for makeGraph and runPipeline
914 # to calculate time-stamped runs across a second boundary.
915 args.output_run = qgraph.metadata["output_run"]
917 # run first three quanta
918 with self.assertRaises(MPGraphExecutorError):
919 fwk.runPipeline(qgraph, taskFactory, args)
920 self.assertEqual(taskFactory.countExec, 3)
922 butler.registry.refresh()
924 # drop one of the two outputs from one task
925 ref1 = butler.find_dataset(
926 "add2_dataset2", collections=args.output, data_id=dict(instrument="INSTR", detector=0)
927 )
928 self.assertIsNotNone(ref1)
929 # also drop the metadata output
930 ref2 = butler.find_dataset(
931 "task1_metadata", collections=args.output, data_id=dict(instrument="INSTR", detector=0)
932 )
933 self.assertIsNotNone(ref2)
934 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
936 taskFactory.stopAt = -1
937 args.skip_existing = True
938 args.extend_run = True
939 args.clobber_outputs = True
940 args.no_versions = True
941 fwk.runPipeline(qgraph, taskFactory, args)
942 # number of executed quanta is incremented
943 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
945 def testSimpleQGraphReplaceRun(self):
946 """Test repeated execution of trivial quantum graph with
947 --replace-run.
948 """
949 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
950 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
951 populateButler(self.pipeline, butler)
953 fwk = CmdLineFwk()
954 taskFactory = AddTaskFactoryMock()
956 qgraph = fwk.makeGraph(self.pipeline, args)
958 # should have one task and number of quanta
959 self.assertEqual(len(qgraph), self.nQuanta)
961 # deep copy is needed because quanta are updated in place
962 fwk.runPipeline(qgraph, taskFactory, args)
963 self.assertEqual(taskFactory.countExec, self.nQuanta)
965 # need to refresh collections explicitly (or make new butler/registry)
966 butler.registry.refresh()
967 collections = set(butler.registry.queryCollections(...))
968 self.assertEqual(collections, {"test", "output", "output/run1"})
970 # number of datasets written by pipeline:
971 # - nQuanta of init_outputs
972 # - nQuanta of configs
973 # - packages (single dataset)
974 # - nQuanta * two output datasets
975 # - nQuanta of metadata
976 # - nQuanta of log output
977 n_outputs = self.nQuanta * 6 + 1
978 refs = butler.registry.queryDatasets(..., collections="output/run1")
979 self.assertEqual(len(list(refs)), n_outputs)
981 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
982 # changed)
983 args.replace_run = True
984 args.output_run = "output/run2"
985 qgraph = fwk.makeGraph(self.pipeline, args)
986 fwk.runPipeline(qgraph, taskFactory, args)
988 butler.registry.refresh()
989 collections = set(butler.registry.queryCollections(...))
990 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
992 # new output collection
993 refs = butler.registry.queryDatasets(..., collections="output/run2")
994 self.assertEqual(len(list(refs)), n_outputs)
996 # old output collection is still there
997 refs = butler.registry.queryDatasets(..., collections="output/run1")
998 self.assertEqual(len(list(refs)), n_outputs)
1000 # re-run with --replace-run and --prune-replaced=unstore
1001 args.replace_run = True
1002 args.prune_replaced = "unstore"
1003 args.output_run = "output/run3"
1004 qgraph = fwk.makeGraph(self.pipeline, args)
1005 fwk.runPipeline(qgraph, taskFactory, args)
1007 butler.registry.refresh()
1008 collections = set(butler.registry.queryCollections(...))
1009 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
1011 # new output collection
1012 refs = butler.registry.queryDatasets(..., collections="output/run3")
1013 self.assertEqual(len(list(refs)), n_outputs)
1015 # old output collection is still there, and it has all datasets but
1016 # non-InitOutputs are not in datastore
1017 refs = butler.registry.queryDatasets(..., collections="output/run2")
1018 refs = list(refs)
1019 self.assertEqual(len(refs), n_outputs)
1020 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
1021 for ref in refs:
1022 if initOutNameRe.fullmatch(ref.datasetType.name):
1023 butler.get(ref)
1024 else:
1025 with self.assertRaises(FileNotFoundError):
1026 butler.get(ref)
1028 # re-run with --replace-run and --prune-replaced=purge
1029 # This time also remove --input; passing the same inputs that we
1030 # started with and not passing inputs at all should be equivalent.
1031 args.input = None
1032 args.replace_run = True
1033 args.prune_replaced = "purge"
1034 args.output_run = "output/run4"
1035 qgraph = fwk.makeGraph(self.pipeline, args)
1036 fwk.runPipeline(qgraph, taskFactory, args)
1038 butler.registry.refresh()
1039 collections = set(butler.registry.queryCollections(...))
1040 # output/run3 should disappear now
1041 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1043 # new output collection
1044 refs = butler.registry.queryDatasets(..., collections="output/run4")
1045 self.assertEqual(len(list(refs)), n_outputs)
1047 # Trying to run again with inputs that aren't exactly what we started
1048 # with is an error, and the kind that should not modify the data repo.
1049 with self.assertRaises(ValueError):
1050 args.input = ["test", "output/run2"]
1051 args.prune_replaced = None
1052 args.replace_run = True
1053 args.output_run = "output/run5"
1054 qgraph = fwk.makeGraph(self.pipeline, args)
1055 fwk.runPipeline(qgraph, taskFactory, args)
1056 butler.registry.refresh()
1057 collections = set(butler.registry.queryCollections(...))
1058 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1059 with self.assertRaises(ValueError):
1060 args.input = ["output/run2", "test"]
1061 args.prune_replaced = None
1062 args.replace_run = True
1063 args.output_run = "output/run6"
1064 qgraph = fwk.makeGraph(self.pipeline, args)
1065 fwk.runPipeline(qgraph, taskFactory, args)
1066 butler.registry.refresh()
1067 collections = set(butler.registry.queryCollections(...))
1068 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1070 def testSubgraph(self):
1071 """Test successful execution of trivial quantum graph."""
1072 args = _makeArgs(butler_config=self.root, input="test", output="output")
1073 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1074 populateButler(self.pipeline, butler)
1076 fwk = CmdLineFwk()
1077 qgraph = fwk.makeGraph(self.pipeline, args)
1079 # Select first two nodes for execution. This depends on node ordering
1080 # which I assume is the same as execution order.
1081 nNodes = 2
1082 nodeIds = [node.nodeId for node in qgraph]
1083 nodeIds = nodeIds[:nNodes]
1085 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
1086 self.assertEqual(len(qgraph), self.nQuanta)
1088 with (
1089 makeTmpFile(suffix=".qgraph") as tmpname,
1090 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
1091 ):
1092 with open(tmpname, "wb") as saveFile:
1093 qgraph.save(saveFile)
1095 args = _makeArgs(
1096 qgraph=tmpname,
1097 qgraph_node_id=nodeIds,
1098 registryConfig=registryConfig,
1099 execution_butler_location=None,
1100 )
1101 fwk = CmdLineFwk()
1103 # load graph, should only read a subset
1104 qgraph = fwk.makeGraph(pipeline=None, args=args)
1105 self.assertEqual(len(qgraph), nNodes)
1107 def testShowGraph(self):
1108 """Test for --show options for quantum graph."""
1109 nQuanta = 2
1110 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1112 show = ShowInfo(["graph"])
1113 show.show_graph_info(qgraph)
1114 self.assertEqual(show.handled, {"graph"})
1116 def testShowGraphWorkflow(self):
1117 nQuanta = 2
1118 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1120 show = ShowInfo(["workflow"])
1121 show.show_graph_info(qgraph)
1122 self.assertEqual(show.handled, {"workflow"})
1124 # TODO: cannot test "uri" option presently, it instantiates
1125 # butler from command line options and there is no way to pass butler
1126 # mock to that code.
1127 show = ShowInfo(["uri"])
1128 with self.assertRaises(ValueError): # No args given
1129 show.show_graph_info(qgraph)
1131 def testSimpleQGraphDatastoreRecords(self):
1132 """Test quantum graph generation with --qgraph-datastore-records."""
1133 args = _makeArgs(
1134 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1135 )
1136 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1137 populateButler(self.pipeline, butler)
1139 fwk = CmdLineFwk()
1140 qgraph = fwk.makeGraph(self.pipeline, args)
1141 self.assertEqual(len(qgraph), self.nQuanta)
1142 for i, qnode in enumerate(qgraph):
1143 quantum = qnode.quantum
1144 self.assertIsNotNone(quantum.datastore_records)
1145 # only the first quantum has a pre-existing input
1146 if i == 0:
1147 datastore_name = "FileDatastore@<butlerRoot>"
1148 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1149 records_data = quantum.datastore_records[datastore_name]
1150 records = dict(records_data.records)
1151 self.assertEqual(len(records), 1)
1152 _, records = records.popitem()
1153 records = records["file_datastore_records"]
1154 self.assertEqual(
1155 [record.path for record in records],
1156 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1157 )
1158 else:
1159 self.assertEqual(quantum.datastore_records, {})
1161 def testSummary(self):
1162 """Test generating a summary report."""
1163 args = _makeArgs(butler_config=self.root, input="test", output="output")
1164 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1165 populateButler(self.pipeline, butler)
1167 fwk = CmdLineFwk()
1168 taskFactory = AddTaskFactoryMock()
1170 qgraph = fwk.makeGraph(self.pipeline, args)
1171 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
1172 self.assertEqual(len(qgraph), self.nQuanta)
1174 # Ensure that the output run used in the graph is also used in
1175 # the pipeline execution. It is possible for makeGraph and runPipeline
1176 # to calculate time-stamped runs across a second boundary.
1177 args.output_run = qgraph.metadata["output_run"]
1179 with makeTmpFile(suffix=".json") as tmpname:
1180 args.summary = tmpname
1182 # run whole thing
1183 fwk.runPipeline(qgraph, taskFactory, args)
1184 self.assertEqual(taskFactory.countExec, self.nQuanta)
1185 with open(tmpname) as fh:
1186 Report.model_validate_json(fh.read())
1189class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1190 """General file leak detection."""
1193def setup_module(module):
1194 """Initialize pytest module.
1196 Parameters
1197 ----------
1198 module : `~types.ModuleType`
1199 Module to set up.
1200 """
1201 lsst.utils.tests.init()
1204if __name__ == "__main__":
1205 lsst.utils.tests.init()
1206 unittest.main()