Coverage for tests/test_cmdLineFwk.py: 13%
550 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Simple unit test for cmdLineFwk module.
29"""
31import contextlib
32import logging
33import os
34import pickle
35import re
36import shutil
37import tempfile
38import unittest
39from dataclasses import dataclass
40from io import StringIO
41from types import SimpleNamespace
42from typing import NamedTuple
44import astropy.units as u
45import click
46import lsst.pex.config as pexConfig
47import lsst.pipe.base.connectionTypes as cT
48import lsst.utils.tests
49from lsst.ctrl.mpexec import CmdLineFwk, MPGraphExecutorError
50from lsst.ctrl.mpexec.cli.opt import run_options
51from lsst.ctrl.mpexec.cli.utils import (
52 _ACTION_ADD_INSTRUMENT,
53 _ACTION_ADD_TASK,
54 _ACTION_CONFIG,
55 _ACTION_CONFIG_FILE,
56 PipetaskCommand,
57)
58from lsst.ctrl.mpexec.showInfo import ShowInfo
59from lsst.daf.butler import (
60 CollectionType,
61 Config,
62 DataCoordinate,
63 DatasetRef,
64 DatasetType,
65 DimensionConfig,
66 DimensionUniverse,
67 Quantum,
68)
69from lsst.daf.butler.registry import RegistryConfig
70from lsst.daf.butler.registry.sql_registry import SqlRegistry
71from lsst.pipe.base import (
72 Instrument,
73 Pipeline,
74 PipelineTaskConfig,
75 PipelineTaskConnections,
76 QuantumGraph,
77 TaskDef,
78)
79from lsst.pipe.base.graphBuilder import DatasetQueryConstraintVariant as DQCVariant
80from lsst.pipe.base.script import transfer_from_graph
81from lsst.pipe.base.tests.simpleQGraph import (
82 AddTask,
83 AddTaskFactoryMock,
84 makeSimpleButler,
85 makeSimplePipeline,
86 makeSimpleQGraph,
87 populateButler,
88)
89from lsst.utils.tests import temporaryDirectory
91logging.basicConfig(level=getattr(logging, os.environ.get("UNIT_TEST_LOGGING_LEVEL", "INFO"), logging.INFO))
93# Have to monkey-patch Instrument.fromName() to not retrieve non-existing
94# instrument from registry, these tests can run fine without actual instrument
95# and implementing full mock for Instrument is too complicated.
96Instrument.fromName = lambda name, reg: None 96 ↛ exitline 96 didn't run the lambda on line 96
99@contextlib.contextmanager
100def makeTmpFile(contents=None, suffix=None):
101 """Context manager for generating temporary file name.
103 Temporary file is deleted on exiting context.
105 Parameters
106 ----------
107 contents : `bytes`
108 Data to write into a file.
109 """
110 fd, tmpname = tempfile.mkstemp(suffix=suffix)
111 if contents:
112 os.write(fd, contents)
113 os.close(fd)
114 yield tmpname
115 with contextlib.suppress(OSError):
116 os.remove(tmpname)
119@contextlib.contextmanager
120def makeSQLiteRegistry(create=True, universe=None):
121 """Context manager to create new empty registry database.
123 Yields
124 ------
125 config : `RegistryConfig`
126 Registry configuration for initialized registry database.
127 """
128 dimensionConfig = universe.dimensionConfig if universe is not None else _makeDimensionConfig()
129 with temporaryDirectory() as tmpdir:
130 uri = f"sqlite:///{tmpdir}/gen3.sqlite"
131 config = RegistryConfig()
132 config["db"] = uri
133 if create:
134 SqlRegistry.createFromConfig(config, dimensionConfig=dimensionConfig)
135 yield config
138class SimpleConnections(PipelineTaskConnections, dimensions=(), defaultTemplates={"template": "simple"}):
139 """Test connection class."""
141 schema = cT.InitInput(doc="Schema", name="{template}schema", storageClass="SourceCatalog")
144class SimpleConfig(PipelineTaskConfig, pipelineConnections=SimpleConnections):
145 """Test pipeline config."""
147 field = pexConfig.Field(dtype=str, doc="arbitrary string")
149 def setDefaults(self):
150 PipelineTaskConfig.setDefaults(self)
153def _makeArgs(registryConfig=None, **kwargs):
154 """Return parsed command line arguments.
156 By default butler_config is set to `Config` populated with some defaults,
157 it can be overridden completely by keyword argument.
159 Parameters
160 ----------
161 cmd : `str`, optional
162 Produce arguments for this pipetask command.
163 registryConfig : `RegistryConfig`, optional
164 Override for registry configuration.
165 **kwargs
166 Overrides for other arguments.
167 """
168 # Use a mock to get the default value of arguments to 'run'.
170 mock = unittest.mock.Mock()
172 @click.command(cls=PipetaskCommand)
173 @run_options()
174 def fake_run(ctx, **kwargs):
175 """Fake "pipetask run" command for gathering input arguments.
177 The arguments & options should always match the arguments & options in
178 the "real" command function `lsst.ctrl.mpexec.cli.cmd.run`.
179 """
180 mock(**kwargs)
182 runner = click.testing.CliRunner()
183 # --butler-config is the only required option
184 result = runner.invoke(fake_run, "--butler-config /")
185 if result.exit_code != 0:
186 raise RuntimeError(f"Failure getting default args from 'fake_run': {result}")
187 mock.assert_called_once()
188 args = mock.call_args[1]
189 args["enableLsstDebug"] = args.pop("debug")
190 args["execution_butler_location"] = args.pop("save_execution_butler")
191 if "pipeline_actions" not in args:
192 args["pipeline_actions"] = []
193 args = SimpleNamespace(**args)
195 # override butler_config with our defaults
196 if "butler_config" not in kwargs:
197 args.butler_config = Config()
198 if registryConfig:
199 args.butler_config["registry"] = registryConfig
200 # The default datastore has a relocatable root, so we need to specify
201 # some root here for it to use
202 args.butler_config.configFile = "."
204 # override arguments from keyword parameters
205 for key, value in kwargs.items():
206 setattr(args, key, value)
207 args.dataset_query_constraint = DQCVariant.fromExpression(args.dataset_query_constraint)
208 return args
211class FakeDSType(NamedTuple):
212 """A fake `~lsst.daf.butler.DatasetType` class used for testing."""
214 name: str
217@dataclass(frozen=True)
218class FakeDSRef:
219 """A fake `~lsst.daf.butler.DatasetRef` class used for testing."""
221 datasetType: str
222 dataId: tuple
224 def isComponent(self):
225 return False
228# Task class name used by tests, needs to be importable
229_TASK_CLASS = "lsst.pipe.base.tests.simpleQGraph.AddTask"
232def _makeDimensionConfig():
233 """Make a simple dimension universe configuration."""
234 return DimensionConfig(
235 {
236 "version": 1,
237 "namespace": "ctrl_mpexec_test",
238 "skypix": {
239 "common": "htm7",
240 "htm": {
241 "class": "lsst.sphgeom.HtmPixelization",
242 "max_level": 24,
243 },
244 },
245 "elements": {
246 "A": {
247 "keys": [
248 {
249 "name": "id",
250 "type": "int",
251 }
252 ],
253 "storage": {
254 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
255 },
256 },
257 "B": {
258 "keys": [
259 {
260 "name": "id",
261 "type": "int",
262 }
263 ],
264 "storage": {
265 "cls": "lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage",
266 },
267 },
268 },
269 "packers": {},
270 }
271 )
274def _makeQGraph():
275 """Make a trivial QuantumGraph with one quantum.
277 The only thing that we need to do with this quantum graph is to pickle
278 it, the quanta in this graph are not usable for anything else.
280 Returns
281 -------
282 qgraph : `~lsst.pipe.base.QuantumGraph`
283 """
284 universe = DimensionUniverse(config=_makeDimensionConfig())
285 fakeDSType = DatasetType("A", (), storageClass="ExposureF", universe=universe)
286 taskDef = TaskDef(taskName=_TASK_CLASS, config=AddTask.ConfigClass(), taskClass=AddTask)
287 quanta = [
288 Quantum(
289 taskName=_TASK_CLASS,
290 inputs={
291 fakeDSType: [
292 DatasetRef(
293 fakeDSType,
294 DataCoordinate.standardize({"A": 1, "B": 2}, universe=universe),
295 run="fake_run",
296 )
297 ]
298 },
299 )
300 ] # type: ignore
301 qgraph = QuantumGraph({taskDef: set(quanta)}, universe=universe)
302 return qgraph
305class CmdLineFwkTestCase(unittest.TestCase):
306 """A test case for CmdLineFwk"""
308 def testMakePipeline(self):
309 """Tests for CmdLineFwk.makePipeline method"""
310 fwk = CmdLineFwk()
312 # make empty pipeline
313 args = _makeArgs()
314 pipeline = fwk.makePipeline(args)
315 self.assertIsInstance(pipeline, Pipeline)
316 self.assertEqual(len(pipeline), 0)
318 # few tests with serialization
319 with makeTmpFile() as tmpname:
320 # make empty pipeline and store it in a file
321 args = _makeArgs(save_pipeline=tmpname)
322 pipeline = fwk.makePipeline(args)
323 self.assertIsInstance(pipeline, Pipeline)
325 # read pipeline from a file
326 args = _makeArgs(pipeline=tmpname)
327 pipeline = fwk.makePipeline(args)
328 self.assertIsInstance(pipeline, Pipeline)
329 self.assertEqual(len(pipeline), 0)
331 # single task pipeline, task name can be anything here
332 actions = [_ACTION_ADD_TASK("TaskOne:task1")]
333 args = _makeArgs(pipeline_actions=actions)
334 pipeline = fwk.makePipeline(args)
335 self.assertIsInstance(pipeline, Pipeline)
336 self.assertEqual(len(pipeline), 1)
338 # many task pipeline
339 actions = [
340 _ACTION_ADD_TASK("TaskOne:task1a"),
341 _ACTION_ADD_TASK("TaskTwo:task2"),
342 _ACTION_ADD_TASK("TaskOne:task1b"),
343 ]
344 args = _makeArgs(pipeline_actions=actions)
345 pipeline = fwk.makePipeline(args)
346 self.assertIsInstance(pipeline, Pipeline)
347 self.assertEqual(len(pipeline), 3)
349 # single task pipeline with config overrides, need real task class
350 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
351 args = _makeArgs(pipeline_actions=actions)
352 pipeline = fwk.makePipeline(args)
353 taskDefs = list(pipeline.toExpandedPipeline())
354 self.assertEqual(len(taskDefs), 1)
355 self.assertEqual(taskDefs[0].config.addend, 100)
357 overrides = b"config.addend = 1000\n"
358 with makeTmpFile(overrides) as tmpname:
359 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG_FILE("task:" + tmpname)]
360 args = _makeArgs(pipeline_actions=actions)
361 pipeline = fwk.makePipeline(args)
362 taskDefs = list(pipeline.toExpandedPipeline())
363 self.assertEqual(len(taskDefs), 1)
364 self.assertEqual(taskDefs[0].config.addend, 1000)
366 # Check --instrument option, for now it only checks that it does not
367 # crash.
368 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_ADD_INSTRUMENT("Instrument")]
369 args = _makeArgs(pipeline_actions=actions)
370 pipeline = fwk.makePipeline(args)
372 def testMakeGraphFromSave(self):
373 """Tests for CmdLineFwk.makeGraph method.
375 Only most trivial case is tested that does not do actual graph
376 building.
377 """
378 fwk = CmdLineFwk()
380 with makeTmpFile(suffix=".qgraph") as tmpname, makeSQLiteRegistry() as registryConfig:
381 # make non-empty graph and store it in a file
382 qgraph = _makeQGraph()
383 with open(tmpname, "wb") as saveFile:
384 qgraph.save(saveFile)
385 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
386 qgraph = fwk.makeGraph(None, args)
387 self.assertIsInstance(qgraph, QuantumGraph)
388 self.assertEqual(len(qgraph), 1)
390 # will fail if graph id does not match
391 args = _makeArgs(
392 qgraph=tmpname,
393 qgraph_id="R2-D2 is that you?",
394 registryConfig=registryConfig,
395 execution_butler_location=None,
396 )
397 with self.assertRaisesRegex(ValueError, "graphID does not match"):
398 fwk.makeGraph(None, args)
400 # save with wrong object type
401 with open(tmpname, "wb") as saveFile:
402 pickle.dump({}, saveFile)
403 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
404 with self.assertRaises(ValueError):
405 fwk.makeGraph(None, args)
407 # reading empty graph from pickle should work but makeGraph()
408 # will return None.
409 qgraph = QuantumGraph({}, universe=DimensionUniverse(_makeDimensionConfig()))
410 with open(tmpname, "wb") as saveFile:
411 qgraph.save(saveFile)
412 args = _makeArgs(qgraph=tmpname, registryConfig=registryConfig, execution_butler_location=None)
413 qgraph = fwk.makeGraph(None, args)
414 self.assertIs(qgraph, None)
416 def testShowPipeline(self):
417 """Test for --show options for pipeline."""
418 fwk = CmdLineFwk()
420 actions = [_ACTION_ADD_TASK(f"{_TASK_CLASS}:task"), _ACTION_CONFIG("task:addend=100")]
421 args = _makeArgs(pipeline_actions=actions)
422 pipeline = fwk.makePipeline(args)
424 with self.assertRaises(ValueError):
425 ShowInfo(["unrecognized", "config"])
427 stream = StringIO()
428 show = ShowInfo(
429 ["pipeline", "config", "history=task::addend", "tasks", "dump-config", "config=task::add*"],
430 stream=stream,
431 )
432 show.show_pipeline_info(pipeline, None)
433 self.assertEqual(show.unhandled, frozenset({}))
434 stream.seek(0)
435 output = stream.read()
436 self.assertIn("config.addend=100", output) # config option
437 self.assertIn("addend\n3", output) # History output
438 self.assertIn("class: lsst.pipe.base.tests.simpleQGraph.AddTask", output) # pipeline
440 show = ShowInfo(["pipeline", "uri"], stream=stream)
441 show.show_pipeline_info(pipeline, None)
442 self.assertEqual(show.unhandled, frozenset({"uri"}))
443 self.assertEqual(show.handled, {"pipeline"})
445 stream = StringIO()
446 show = ShowInfo(["config=task::addend.missing"], stream=stream) # No match
447 show.show_pipeline_info(pipeline, None)
448 stream.seek(0)
449 output = stream.read().strip()
450 self.assertEqual("### Configuration for task `task'", output)
452 stream = StringIO()
453 show = ShowInfo(["config=task::addEnd:NOIGNORECASE"], stream=stream) # No match
454 show.show_pipeline_info(pipeline, None)
455 stream.seek(0)
456 output = stream.read().strip()
457 self.assertEqual("### Configuration for task `task'", output)
459 stream = StringIO()
460 show = ShowInfo(["pipeline-graph"], stream=stream) # No match
461 show.show_pipeline_info(pipeline, None)
462 stream.seek(0)
463 output = stream.read().strip()
464 self.assertEqual(
465 "\n".join(
466 [
467 "○ add_dataset_in",
468 "│",
469 "■ task",
470 "│",
471 "◍ add_dataset_out, add2_dataset_out",
472 ]
473 ),
474 output,
475 )
477 stream = StringIO()
478 show = ShowInfo(["task-graph"], stream=stream) # No match
479 show.show_pipeline_info(pipeline, None)
480 stream.seek(0)
481 output = stream.read().strip()
482 self.assertEqual("■ task", output)
484 stream = StringIO()
485 show = ShowInfo(["config=task::addEnd"], stream=stream) # Match but warns
486 show.show_pipeline_info(pipeline, None)
487 stream.seek(0)
488 output = stream.read().strip()
489 self.assertIn("NOIGNORECASE", output)
491 show = ShowInfo(["dump-config=notask"])
492 with self.assertRaises(ValueError) as cm:
493 show.show_pipeline_info(pipeline, None)
494 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
496 show = ShowInfo(["history"])
497 with self.assertRaises(ValueError) as cm:
498 show.show_pipeline_info(pipeline, None)
499 self.assertIn("Please provide a value", str(cm.exception))
501 show = ShowInfo(["history=notask::param"])
502 with self.assertRaises(ValueError) as cm:
503 show.show_pipeline_info(pipeline, None)
504 self.assertIn("Pipeline has no tasks named notask", str(cm.exception))
506 def test_execution_resources_parameters(self) -> None:
507 """Test creation of the ExecutionResources from command line."""
508 fwk = CmdLineFwk()
510 for params, num_cores, max_mem in (
511 ((None, None), 1, None),
512 ((5, ""), 5, None),
513 ((None, "50"), 1, 50 * u.MB),
514 ((5, "50 GB"), 5, 50 * u.GB),
515 ):
516 kwargs = {}
517 for k, v in zip(("cores_per_quantum", "memory_per_quantum"), params, strict=True):
518 if v is not None:
519 kwargs[k] = v
520 args = _makeArgs(**kwargs)
521 res = fwk._make_execution_resources(args)
522 self.assertEqual(res.num_cores, num_cores)
523 self.assertEqual(res.max_mem, max_mem)
525 args = _makeArgs(memory_per_quantum="50m")
526 with self.assertRaises(u.UnitConversionError):
527 fwk._make_execution_resources(args)
530class CmdLineFwkTestCaseWithButler(unittest.TestCase):
531 """A test case for CmdLineFwk"""
533 def setUp(self):
534 super().setUpClass()
535 self.root = tempfile.mkdtemp()
536 self.nQuanta = 5
537 self.pipeline = makeSimplePipeline(nQuanta=self.nQuanta)
539 def tearDown(self):
540 shutil.rmtree(self.root, ignore_errors=True)
541 super().tearDownClass()
543 def testSimpleQGraph(self):
544 """Test successfull execution of trivial quantum graph."""
545 args = _makeArgs(butler_config=self.root, input="test", output="output")
546 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
547 populateButler(self.pipeline, butler)
549 fwk = CmdLineFwk()
550 taskFactory = AddTaskFactoryMock()
552 qgraph = fwk.makeGraph(self.pipeline, args)
553 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
554 self.assertEqual(len(qgraph), self.nQuanta)
556 # Ensure that the output run used in the graph is also used in
557 # the pipeline execution. It is possible for makeGraph and runPipeline
558 # to calculate time-stamped runs across a second boundary.
559 args.output_run = qgraph.metadata["output_run"]
561 # run whole thing
562 fwk.runPipeline(qgraph, taskFactory, args)
563 self.assertEqual(taskFactory.countExec, self.nQuanta)
565 # test that we've disabled implicit threading
566 self.assertEqual(os.environ["OMP_NUM_THREADS"], "1")
568 def testSimpleQGraph_rebase(self):
569 """Test successful execution of trivial quantum graph, with --rebase
570 used to force redefinition of the output collection.
571 """
572 # Pass one input collection here for the usual test setup; we'll
573 # override it later.
574 args = _makeArgs(butler_config=self.root, input="test1", output="output")
575 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
576 populateButler(self.pipeline, butler)
578 fwk = CmdLineFwk()
579 taskFactory = AddTaskFactoryMock()
581 # We'll actually pass two input collections in. One is empty, but
582 # the stuff we're testing here doesn't care.
583 args.input = ["test2", "test1"]
584 butler.registry.registerCollection("test2", CollectionType.RUN)
586 # Set up the output collection with a sequence that doesn't end the
587 # same way as the input collection. This is normally an error.
588 butler.registry.registerCollection("output", CollectionType.CHAINED)
589 butler.registry.registerCollection("unexpected_input", CollectionType.RUN)
590 butler.registry.registerCollection("output/run0", CollectionType.RUN)
591 butler.registry.setCollectionChain("output", ["test2", "unexpected_input", "test1", "output/run0"])
593 # Without --rebase, the inconsistent input and output collections are
594 # an error.
595 with self.assertRaises(ValueError):
596 fwk.makeGraph(self.pipeline, args)
598 # With --rebase, the output collection gets redefined.
599 args.rebase = True
600 qgraph = fwk.makeGraph(self.pipeline, args)
602 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
603 self.assertEqual(len(qgraph), self.nQuanta)
605 # Ensure that the output run used in the graph is also used in
606 # the pipeline execution. It is possible for makeGraph and runPipeline
607 # to calculate time-stamped runs across a second boundary.
608 args.output_run = qgraph.metadata["output_run"]
610 fwk.runPipeline(qgraph, taskFactory, args)
611 self.assertEqual(taskFactory.countExec, self.nQuanta)
613 butler.registry.refresh()
614 self.assertEqual(
615 list(butler.registry.getCollectionChain("output")),
616 [args.output_run, "output/run0", "test2", "test1", "unexpected_input"],
617 )
619 def test_simple_qgraph_qbb(self):
620 """Test successful execution of trivial quantum graph in QBB mode."""
621 args = _makeArgs(
622 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
623 )
624 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
625 populateButler(self.pipeline, butler)
627 fwk = CmdLineFwk()
628 taskFactory = AddTaskFactoryMock()
630 qgraph = fwk.makeGraph(self.pipeline, args)
631 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
632 self.assertEqual(len(qgraph), self.nQuanta)
634 # Ensure that the output run used in the graph is also used in
635 # the pipeline execution. It is possible for makeGraph and runPipeline
636 # to calculate time-stamped runs across a second boundary.
637 output_run = qgraph.metadata["output_run"]
638 args.output_run = output_run
640 # QBB must run from serialized graph.
641 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
642 qgraph.saveUri(temp_graph.name)
644 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
646 # Check that pre-exec-init can run.
647 fwk.preExecInitQBB(taskFactory, args)
649 # Run whole thing.
650 fwk.runGraphQBB(taskFactory, args)
652 # Transfer the datasets to the butler.
653 n1 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
654 self.assertEqual(n1, 31)
656 self.assertEqual(taskFactory.countExec, self.nQuanta)
658 # Update the output run and try again.
659 new_output_run = output_run + "_new"
660 qgraph.updateRun(new_output_run, metadata_key="output_run", update_graph_id=True)
661 self.assertEqual(qgraph.metadata["output_run"], new_output_run)
663 taskFactory = AddTaskFactoryMock()
664 with tempfile.NamedTemporaryFile(suffix=".qgraph") as temp_graph:
665 qgraph.saveUri(temp_graph.name)
667 args = _makeArgs(butler_config=self.root, qgraph=temp_graph.name, config_search_path=[])
669 # Check that pre-exec-init can run.
670 fwk.preExecInitQBB(taskFactory, args)
672 # Run whole thing.
673 fwk.runGraphQBB(taskFactory, args)
675 # Transfer the datasets to the butler.
676 n2 = transfer_from_graph(temp_graph.name, self.root, True, False, False)
677 self.assertEqual(n1, n2)
679 def testEmptyQGraph(self):
680 """Test that making an empty QG produces the right error messages."""
681 # We make QG generation fail by populating one input collection in the
682 # butler while using a different one (that we only register, not
683 # populate) to make the QG.
684 args = _makeArgs(butler_config=self.root, input="bad_input", output="output")
685 butler = makeSimpleButler(self.root, run="good_input", inMemory=False)
686 butler.registry.registerCollection("bad_input")
687 populateButler(self.pipeline, butler)
689 fwk = CmdLineFwk()
690 with self.assertLogs(level=logging.CRITICAL) as cm:
691 qgraph = fwk.makeGraph(self.pipeline, args)
692 self.assertRegex(
693 cm.output[0], ".*Initial data ID query returned no rows, so QuantumGraph will be empty.*"
694 )
695 self.assertRegex(cm.output[1], ".*No datasets.*bad_input.*")
696 self.assertIsNone(qgraph)
698 def testSimpleQGraphNoSkipExisting_inputs(self):
699 """Test for case when output data for one task already appears in
700 _input_ collection, but no ``--extend-run`` or ``-skip-existing``
701 option is present.
702 """
703 args = _makeArgs(
704 butler_config=self.root,
705 input="test",
706 output="output",
707 )
708 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
709 populateButler(
710 self.pipeline,
711 butler,
712 datasetTypes={
713 args.input: [
714 "add_dataset0",
715 "add_dataset1",
716 "add2_dataset1",
717 "add_init_output1",
718 "task0_config",
719 "task0_metadata",
720 "task0_log",
721 ]
722 },
723 )
725 fwk = CmdLineFwk()
726 taskFactory = AddTaskFactoryMock()
728 qgraph = fwk.makeGraph(self.pipeline, args)
729 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
730 # With current implementation graph has all nQuanta quanta, but when
731 # executing one quantum is skipped.
732 self.assertEqual(len(qgraph), self.nQuanta)
734 # Ensure that the output run used in the graph is also used in
735 # the pipeline execution. It is possible for makeGraph and runPipeline
736 # to calculate time-stamped runs across a second boundary.
737 args.output_run = qgraph.metadata["output_run"]
739 # run whole thing
740 fwk.runPipeline(qgraph, taskFactory, args)
741 self.assertEqual(taskFactory.countExec, self.nQuanta)
743 def testSimpleQGraphSkipExisting_inputs(self):
744 """Test for ``--skip-existing`` with output data for one task already
745 appears in _input_ collection. No ``--extend-run`` option is needed
746 for this case.
747 """
748 args = _makeArgs(
749 butler_config=self.root,
750 input="test",
751 output="output",
752 skip_existing_in=("test",),
753 )
754 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
755 populateButler(
756 self.pipeline,
757 butler,
758 datasetTypes={
759 args.input: [
760 "add_dataset0",
761 "add_dataset1",
762 "add2_dataset1",
763 "add_init_output1",
764 "task0_config",
765 "task0_metadata",
766 "task0_log",
767 ]
768 },
769 )
771 fwk = CmdLineFwk()
772 taskFactory = AddTaskFactoryMock()
774 qgraph = fwk.makeGraph(self.pipeline, args)
775 # If all quanta are skipped, the task is not included in the graph.
776 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
777 self.assertEqual(len(qgraph), self.nQuanta - 1)
779 # Ensure that the output run used in the graph is also used in
780 # the pipeline execution. It is possible for makeGraph and runPipeline
781 # to calculate time-stamped runs across a second boundary.
782 args.output_run = qgraph.metadata["output_run"]
784 # run whole thing
785 fwk.runPipeline(qgraph, taskFactory, args)
786 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
788 def testSimpleQGraphSkipExisting_outputs(self):
789 """Test for ``--skip-existing`` with output data for one task already
790 appears in _output_ collection. The ``--extend-run`` option is needed
791 for this case.
792 """
793 args = _makeArgs(
794 butler_config=self.root,
795 input="test",
796 output_run="output/run",
797 skip_existing_in=("output/run",),
798 )
799 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
800 populateButler(
801 self.pipeline,
802 butler,
803 datasetTypes={
804 args.input: ["add_dataset0"],
805 args.output_run: [
806 "add_dataset1",
807 "add2_dataset1",
808 "add_init_output1",
809 "task0_metadata",
810 "task0_log",
811 "task0_config",
812 ],
813 },
814 )
816 fwk = CmdLineFwk()
817 taskFactory = AddTaskFactoryMock()
819 # fails without --extend-run
820 with self.assertRaisesRegex(ValueError, "--extend-run was not given"):
821 qgraph = fwk.makeGraph(self.pipeline, args)
823 # retry with --extend-run
824 args.extend_run = True
825 qgraph = fwk.makeGraph(self.pipeline, args)
827 # First task has no remaining quanta, so is left out completely.
828 self.assertEqual(len(qgraph.taskGraph), self.nQuanta - 1)
829 # Graph does not include quantum for first task.
830 self.assertEqual(len(qgraph), self.nQuanta - 1)
832 # run whole thing
833 fwk.runPipeline(qgraph, taskFactory, args)
834 self.assertEqual(taskFactory.countExec, self.nQuanta - 1)
836 def testSimpleQGraphOutputsFail(self):
837 """Test continuing execution of trivial quantum graph with partial
838 outputs.
839 """
840 args = _makeArgs(butler_config=self.root, input="test", output="output")
841 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
842 populateButler(self.pipeline, butler)
844 fwk = CmdLineFwk()
845 taskFactory = AddTaskFactoryMock(stopAt=3)
847 qgraph = fwk.makeGraph(self.pipeline, args)
848 self.assertEqual(len(qgraph), self.nQuanta)
850 # Ensure that the output run used in the graph is also used in
851 # the pipeline execution. It is possible for makeGraph and runPipeline
852 # to calculate time-stamped runs across a second boundary.
853 args.output_run = qgraph.metadata["output_run"]
855 # run first three quanta
856 with self.assertRaises(MPGraphExecutorError):
857 fwk.runPipeline(qgraph, taskFactory, args)
858 self.assertEqual(taskFactory.countExec, 3)
860 butler.registry.refresh()
862 # drop one of the two outputs from one task
863 ref1 = butler.find_dataset("add2_dataset2", collections=args.output, instrument="INSTR", detector=0)
864 self.assertIsNotNone(ref1)
865 # also drop the metadata output
866 ref2 = butler.find_dataset("task1_metadata", collections=args.output, instrument="INSTR", detector=0)
867 self.assertIsNotNone(ref2)
868 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
870 # Ensure that the output run used in the graph is also used in
871 # the pipeline execution. It is possible for makeGraph and runPipeline
872 # to calculate time-stamped runs across a second boundary.
873 args.output_run = qgraph.metadata["output_run"]
875 taskFactory.stopAt = -1
876 args.skip_existing_in = (args.output,)
877 args.extend_run = True
878 args.no_versions = True
879 with self.assertRaises(MPGraphExecutorError):
880 fwk.runPipeline(qgraph, taskFactory, args)
882 def testSimpleQGraphClobberOutputs(self):
883 """Test continuing execution of trivial quantum graph with
884 --clobber-outputs.
885 """
886 args = _makeArgs(butler_config=self.root, input="test", output="output")
887 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
888 populateButler(self.pipeline, butler)
890 fwk = CmdLineFwk()
891 taskFactory = AddTaskFactoryMock(stopAt=3)
893 qgraph = fwk.makeGraph(self.pipeline, args)
895 # should have one task and number of quanta
896 self.assertEqual(len(qgraph), self.nQuanta)
898 # Ensure that the output run used in the graph is also used in
899 # the pipeline execution. It is possible for makeGraph and runPipeline
900 # to calculate time-stamped runs across a second boundary.
901 args.output_run = qgraph.metadata["output_run"]
903 # run first three quanta
904 with self.assertRaises(MPGraphExecutorError):
905 fwk.runPipeline(qgraph, taskFactory, args)
906 self.assertEqual(taskFactory.countExec, 3)
908 butler.registry.refresh()
910 # drop one of the two outputs from one task
911 ref1 = butler.find_dataset(
912 "add2_dataset2", collections=args.output, data_id=dict(instrument="INSTR", detector=0)
913 )
914 self.assertIsNotNone(ref1)
915 # also drop the metadata output
916 ref2 = butler.find_dataset(
917 "task1_metadata", collections=args.output, data_id=dict(instrument="INSTR", detector=0)
918 )
919 self.assertIsNotNone(ref2)
920 butler.pruneDatasets([ref1, ref2], disassociate=True, unstore=True, purge=True)
922 taskFactory.stopAt = -1
923 args.skip_existing = True
924 args.extend_run = True
925 args.clobber_outputs = True
926 args.no_versions = True
927 fwk.runPipeline(qgraph, taskFactory, args)
928 # number of executed quanta is incremented
929 self.assertEqual(taskFactory.countExec, self.nQuanta + 1)
931 def testSimpleQGraphReplaceRun(self):
932 """Test repeated execution of trivial quantum graph with
933 --replace-run.
934 """
935 args = _makeArgs(butler_config=self.root, input="test", output="output", output_run="output/run1")
936 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
937 populateButler(self.pipeline, butler)
939 fwk = CmdLineFwk()
940 taskFactory = AddTaskFactoryMock()
942 qgraph = fwk.makeGraph(self.pipeline, args)
944 # should have one task and number of quanta
945 self.assertEqual(len(qgraph), self.nQuanta)
947 # deep copy is needed because quanta are updated in place
948 fwk.runPipeline(qgraph, taskFactory, args)
949 self.assertEqual(taskFactory.countExec, self.nQuanta)
951 # need to refresh collections explicitly (or make new butler/registry)
952 butler.registry.refresh()
953 collections = set(butler.registry.queryCollections(...))
954 self.assertEqual(collections, {"test", "output", "output/run1"})
956 # number of datasets written by pipeline:
957 # - nQuanta of init_outputs
958 # - nQuanta of configs
959 # - packages (single dataset)
960 # - nQuanta * two output datasets
961 # - nQuanta of metadata
962 # - nQuanta of log output
963 n_outputs = self.nQuanta * 6 + 1
964 refs = butler.registry.queryDatasets(..., collections="output/run1")
965 self.assertEqual(len(list(refs)), n_outputs)
967 # re-run with --replace-run (--inputs is ignored, as long as it hasn't
968 # changed)
969 args.replace_run = True
970 args.output_run = "output/run2"
971 qgraph = fwk.makeGraph(self.pipeline, args)
972 fwk.runPipeline(qgraph, taskFactory, args)
974 butler.registry.refresh()
975 collections = set(butler.registry.queryCollections(...))
976 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2"})
978 # new output collection
979 refs = butler.registry.queryDatasets(..., collections="output/run2")
980 self.assertEqual(len(list(refs)), n_outputs)
982 # old output collection is still there
983 refs = butler.registry.queryDatasets(..., collections="output/run1")
984 self.assertEqual(len(list(refs)), n_outputs)
986 # re-run with --replace-run and --prune-replaced=unstore
987 args.replace_run = True
988 args.prune_replaced = "unstore"
989 args.output_run = "output/run3"
990 qgraph = fwk.makeGraph(self.pipeline, args)
991 fwk.runPipeline(qgraph, taskFactory, args)
993 butler.registry.refresh()
994 collections = set(butler.registry.queryCollections(...))
995 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run3"})
997 # new output collection
998 refs = butler.registry.queryDatasets(..., collections="output/run3")
999 self.assertEqual(len(list(refs)), n_outputs)
1001 # old output collection is still there, and it has all datasets but
1002 # non-InitOutputs are not in datastore
1003 refs = butler.registry.queryDatasets(..., collections="output/run2")
1004 refs = list(refs)
1005 self.assertEqual(len(refs), n_outputs)
1006 initOutNameRe = re.compile("packages|task.*_config|add_init_output.*")
1007 for ref in refs:
1008 if initOutNameRe.fullmatch(ref.datasetType.name):
1009 butler.get(ref)
1010 else:
1011 with self.assertRaises(FileNotFoundError):
1012 butler.get(ref)
1014 # re-run with --replace-run and --prune-replaced=purge
1015 # This time also remove --input; passing the same inputs that we
1016 # started with and not passing inputs at all should be equivalent.
1017 args.input = None
1018 args.replace_run = True
1019 args.prune_replaced = "purge"
1020 args.output_run = "output/run4"
1021 qgraph = fwk.makeGraph(self.pipeline, args)
1022 fwk.runPipeline(qgraph, taskFactory, args)
1024 butler.registry.refresh()
1025 collections = set(butler.registry.queryCollections(...))
1026 # output/run3 should disappear now
1027 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1029 # new output collection
1030 refs = butler.registry.queryDatasets(..., collections="output/run4")
1031 self.assertEqual(len(list(refs)), n_outputs)
1033 # Trying to run again with inputs that aren't exactly what we started
1034 # with is an error, and the kind that should not modify the data repo.
1035 with self.assertRaises(ValueError):
1036 args.input = ["test", "output/run2"]
1037 args.prune_replaced = None
1038 args.replace_run = True
1039 args.output_run = "output/run5"
1040 qgraph = fwk.makeGraph(self.pipeline, args)
1041 fwk.runPipeline(qgraph, taskFactory, args)
1042 butler.registry.refresh()
1043 collections = set(butler.registry.queryCollections(...))
1044 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1045 with self.assertRaises(ValueError):
1046 args.input = ["output/run2", "test"]
1047 args.prune_replaced = None
1048 args.replace_run = True
1049 args.output_run = "output/run6"
1050 qgraph = fwk.makeGraph(self.pipeline, args)
1051 fwk.runPipeline(qgraph, taskFactory, args)
1052 butler.registry.refresh()
1053 collections = set(butler.registry.queryCollections(...))
1054 self.assertEqual(collections, {"test", "output", "output/run1", "output/run2", "output/run4"})
1056 def testSubgraph(self):
1057 """Test successful execution of trivial quantum graph."""
1058 args = _makeArgs(butler_config=self.root, input="test", output="output")
1059 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1060 populateButler(self.pipeline, butler)
1062 fwk = CmdLineFwk()
1063 qgraph = fwk.makeGraph(self.pipeline, args)
1065 # Select first two nodes for execution. This depends on node ordering
1066 # which I assume is the same as execution order.
1067 nNodes = 2
1068 nodeIds = [node.nodeId for node in qgraph]
1069 nodeIds = nodeIds[:nNodes]
1071 self.assertEqual(len(qgraph.taskGraph), self.nQuanta)
1072 self.assertEqual(len(qgraph), self.nQuanta)
1074 with (
1075 makeTmpFile(suffix=".qgraph") as tmpname,
1076 makeSQLiteRegistry(universe=butler.dimensions) as registryConfig,
1077 ):
1078 with open(tmpname, "wb") as saveFile:
1079 qgraph.save(saveFile)
1081 args = _makeArgs(
1082 qgraph=tmpname,
1083 qgraph_node_id=nodeIds,
1084 registryConfig=registryConfig,
1085 execution_butler_location=None,
1086 )
1087 fwk = CmdLineFwk()
1089 # load graph, should only read a subset
1090 qgraph = fwk.makeGraph(pipeline=None, args=args)
1091 self.assertEqual(len(qgraph), nNodes)
1093 def testShowGraph(self):
1094 """Test for --show options for quantum graph."""
1095 nQuanta = 2
1096 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1098 show = ShowInfo(["graph"])
1099 show.show_graph_info(qgraph)
1100 self.assertEqual(show.handled, {"graph"})
1102 def testShowGraphWorkflow(self):
1103 nQuanta = 2
1104 butler, qgraph = makeSimpleQGraph(nQuanta, root=self.root)
1106 show = ShowInfo(["workflow"])
1107 show.show_graph_info(qgraph)
1108 self.assertEqual(show.handled, {"workflow"})
1110 # TODO: cannot test "uri" option presently, it instantiates
1111 # butler from command line options and there is no way to pass butler
1112 # mock to that code.
1113 show = ShowInfo(["uri"])
1114 with self.assertRaises(ValueError): # No args given
1115 show.show_graph_info(qgraph)
1117 def testSimpleQGraphDatastoreRecords(self):
1118 """Test quantum graph generation with --qgraph-datastore-records."""
1119 args = _makeArgs(
1120 butler_config=self.root, input="test", output="output", qgraph_datastore_records=True
1121 )
1122 butler = makeSimpleButler(self.root, run=args.input, inMemory=False)
1123 populateButler(self.pipeline, butler)
1125 fwk = CmdLineFwk()
1126 qgraph = fwk.makeGraph(self.pipeline, args)
1127 self.assertEqual(len(qgraph), self.nQuanta)
1128 for i, qnode in enumerate(qgraph):
1129 quantum = qnode.quantum
1130 self.assertIsNotNone(quantum.datastore_records)
1131 # only the first quantum has a pre-existing input
1132 if i == 0:
1133 datastore_name = "FileDatastore@<butlerRoot>"
1134 self.assertEqual(set(quantum.datastore_records.keys()), {datastore_name})
1135 records_data = quantum.datastore_records[datastore_name]
1136 records = dict(records_data.records)
1137 self.assertEqual(len(records), 1)
1138 _, records = records.popitem()
1139 records = records["file_datastore_records"]
1140 self.assertEqual(
1141 [record.path for record in records],
1142 ["test/add_dataset0/add_dataset0_INSTR_det0_test.pickle"],
1143 )
1144 else:
1145 self.assertEqual(quantum.datastore_records, {})
1148class MyMemoryTestCase(lsst.utils.tests.MemoryTestCase):
1149 """General file leak detection."""
1152def setup_module(module):
1153 """Initialize pytest module."""
1154 lsst.utils.tests.init()
1157if __name__ == "__main__":
1158 lsst.utils.tests.init()
1159 unittest.main()