Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import datetime
32import fnmatch
33import logging
34import pickle
35import re
36import sys
37from typing import List, Optional, Tuple
38import warnings
40# -----------------------------
41# Imports for other modules --
42# -----------------------------
43from lsst.daf.butler import (
44 Butler,
45 CollectionSearch,
46 CollectionType,
47 DatasetRef,
48 DatasetTypeRestriction,
49 Registry,
50)
51from lsst.daf.butler.registry import MissingCollectionError
52import lsst.log
53import lsst.pex.config as pexConfig
54from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
55from .cmdLineParser import makeParser
56from .dotTools import graph2dot, pipeline2dot
57from .executionGraphFixup import ExecutionGraphFixup
58from .mpGraphExecutor import MPGraphExecutor
59from .preExecInit import PreExecInit
60from .singleQuantumExecutor import SingleQuantumExecutor
61from .taskFactory import TaskFactory
62from . import util
63from lsst.utils import doImport
65# ----------------------------------
66# Local non-exported definitions --
67# ----------------------------------
69# logging properties
70_LOG_PROP = """\
71log4j.rootLogger=INFO, A1
72log4j.appender.A1=ConsoleAppender
73log4j.appender.A1.Target=System.err
74log4j.appender.A1.layout=PatternLayout
75log4j.appender.A1.layout.ConversionPattern={}
76"""
78_LOG = logging.getLogger(__name__.partition(".")[2])
81class _OutputChainedCollectionInfo:
82 """A helper class for handling command-line arguments related to an output
83 `~lsst.daf.butler.CollectionType.CHAINED` collection.
85 Parameters
86 ----------
87 registry : `lsst.daf.butler.Registry`
88 Butler registry that collections will be added to and/or queried from.
89 name : `str`
90 Name of the collection given on the command line.
91 """
92 def __init__(self, registry: Registry, name: str):
93 self.name = name
94 try:
95 self.chain = list(registry.getCollectionChain(name))
96 self.exists = True
97 except MissingCollectionError:
98 self.chain = []
99 self.exists = False
101 def __str__(self):
102 return self.name
104 name: str
105 """Name of the collection provided on the command line (`str`).
106 """
108 exists: bool
109 """Whether this collection already exists in the registry (`bool`).
110 """
112 chain: List[Tuple[str, DatasetTypeRestriction]]
113 """The definition of the collection, if it already exists (`list`).
115 Empty if the collection does not alredy exist.
116 """
119class _OutputRunCollectionInfo:
120 """A helper class for handling command-line arguments related to an output
121 `~lsst.daf.butler.CollectionType.RUN` collection.
123 Parameters
124 ----------
125 registry : `lsst.daf.butler.Registry`
126 Butler registry that collections will be added to and/or queried from.
127 name : `str`
128 Name of the collection given on the command line.
129 """
130 def __init__(self, registry: Registry, name: str):
131 self.name = name
132 try:
133 actualType = registry.getCollectionType(name)
134 if actualType is not CollectionType.RUN:
135 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
136 self.exists = True
137 except MissingCollectionError:
138 self.exists = False
140 name: str
141 """Name of the collection provided on the command line (`str`).
142 """
144 exists: bool
145 """Whether this collection already exists in the registry (`bool`).
146 """
149class _ButlerFactory:
150 """A helper class for processing command-line arguments related to input
151 and output collections.
153 Parameters
154 ----------
155 registry : `lsst.daf.butler.Registry`
156 Butler registry that collections will be added to and/or queried from.
158 args : `argparse.Namespace`
159 Parsed command-line arguments. The following attributes are used,
160 either at construction or in later methods.
162 ``output``
163 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
164 input/output collection.
166 ``output_run``
167 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
168 collection.
170 ``extend_run``
171 A boolean indicating whether ``output_run`` should already exist
172 and be extended.
174 ``replace_run``
175 A boolean indicating that (if `True`) ``output_run`` should already
176 exist but will be removed from the output chained collection and
177 replaced with a new one.
179 ``prune_replaced``
180 A boolean indicating whether to prune the replaced run (requires
181 ``replace_run``).
183 ``inputs``
184 Input collections of any type; may be any type handled by
185 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
187 ``butler_config``
188 Path to a data repository root or configuration file.
190 writeable : `bool`
191 If `True`, a `Butler` is being initialized in a context where actual
192 writes should happens, and hence no output run is necessary.
194 Raises
195 ------
196 ValueError
197 Raised if ``writeable is True`` but there are no output collections.
198 """
199 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
200 if args.output is not None:
201 self.output = _OutputChainedCollectionInfo(registry, args.output)
202 else:
203 self.output = None
204 if args.output_run is not None:
205 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
206 elif self.output is not None:
207 if args.extend_run:
208 runName, _ = self.output.chain[0]
209 else:
210 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now())
211 self.outputRun = _OutputRunCollectionInfo(registry, runName)
212 elif not writeable:
213 # If we're not writing yet, ok to have no output run.
214 self.outputRun = None
215 else:
216 raise ValueError("Cannot write without at least one of (--output, --output-run).")
217 self.inputs = list(CollectionSearch.fromExpression(args.input))
219 def check(self, args: argparse.Namespace):
220 """Check command-line options for consistency with each other and the
221 data repository.
223 Parameters
224 ----------
225 args : `argparse.Namespace`
226 Parsed command-line arguments. See class documentation for the
227 construction parameter of the same name.
228 """
229 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
230 if self.inputs and self.output is not None and self.output.exists:
231 raise ValueError("Cannot use --output with existing collection with --inputs.")
232 if args.extend_run and self.outputRun is None:
233 raise ValueError("Cannot --extend-run when no output collection is given.")
234 if args.extend_run and not self.outputRun.exists:
235 raise ValueError(f"Cannot --extend-run; output collection "
236 f"'{self.outputRun.name}' does not exist.")
237 if not args.extend_run and self.outputRun.exists:
238 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
239 f"--extend-run was not given.")
240 if args.prune_replaced and not args.replace_run:
241 raise ValueError(f"--prune-replaced requires --replace-run.")
242 if args.replace_run and (self.output is None or not self.output.exists):
243 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.")
245 @classmethod
246 def _makeReadParts(cls, args: argparse.Namespace):
247 """Common implementation for `makeReadButler` and
248 `makeRegistryAndCollections`.
250 Parameters
251 ----------
252 args : `argparse.Namespace`
253 Parsed command-line arguments. See class documentation for the
254 construction parameter of the same name.
256 Returns
257 -------
258 butler : `lsst.daf.butler.Butler`
259 A read-only butler constructed from the repo at
260 ``args.butler_config``, but with no default collections.
261 inputs : `lsst.daf.butler.registry.CollectionSearch`
262 A collection search path constructed according to ``args``.
263 self : `_ButlerFactory`
264 A new `_ButlerFactory` instance representing the processed version
265 of ``args``.
266 """
267 butler = Butler(args.butler_config, writeable=False)
268 self = cls(butler.registry, args, writeable=False)
269 self.check(args)
270 if self.output and self.output.exists:
271 if args.replace_run:
272 replaced, _ = self.output.chain[0]
273 inputs = self.output.chain[1:]
274 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
275 self.output.name, replaced)
276 else:
277 inputs = [self.output.name]
278 else:
279 inputs = list(self.inputs)
280 if args.extend_run:
281 inputs.insert(0, self.outputRun.name)
282 inputs = CollectionSearch.fromExpression(inputs)
283 return butler, inputs, self
285 @classmethod
286 def makeReadButler(cls, args: argparse.Namespace):
287 """Construct a read-only butler according to the given command-line
288 arguments.
290 Parameters
291 ----------
292 args : `argparse.Namespace`
293 Parsed command-line arguments. See class documentation for the
294 construction parameter of the same name.
296 Returns
297 -------
298 butler : `lsst.daf.butler.Butler`
299 A read-only butler initialized with the collections specified by
300 ``args``.
301 """
302 butler, inputs, _ = cls._makeReadParts(args)
303 _LOG.debug("Preparing butler to read from %s.", inputs)
304 return Butler(butler=butler, collections=inputs)
306 @classmethod
307 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch:
308 """Return a read-only registry, a collection search path, and the name
309 of the run to be used for future writes.
311 Parameters
312 ----------
313 args : `argparse.Namespace`
314 Parsed command-line arguments. See class documentation for the
315 construction parameter of the same name.
317 Returns
318 -------
319 registry : `lsst.daf.butler.Registry`
320 Butler registry that collections will be added to and/or queried
321 from.
322 inputs : `lsst.daf.butler.registry.CollectionSearch`
323 Collections to search for datasets.
324 run : `str` or `None`
325 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
326 if it already exists, or `None` if it does not.
327 """
328 butler, inputs, self = cls._makeReadParts(args)
329 run = self.outputRun.name if args.extend_run else None
330 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
331 return butler.registry, inputs, run
333 @classmethod
334 def makeWriteButler(cls, args: argparse.Namespace) -> Butler:
335 """Return a read-write butler initialized to write to and read from
336 the collections specified by the given command-line arguments.
338 Parameters
339 ----------
340 args : `argparse.Namespace`
341 Parsed command-line arguments. See class documentation for the
342 construction parameter of the same name.
344 Returns
345 -------
346 butler : `lsst.daf.butler.Butler`
347 A read-write butler initialized according to the given arguments.
348 """
349 butler = Butler(args.butler_config, writeable=True)
350 self = cls(butler.registry, args, writeable=True)
351 self.check(args)
352 if self.output is not None:
353 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
354 if args.replace_run:
355 replaced, _ = chainDefinition.pop(0)
356 if args.prune_replaced:
357 # TODO: DM-23671: need a butler API for pruning an
358 # entire RUN collection, then apply it to 'replaced'
359 # here.
360 raise NotImplementedError("Support for --prune-replaced is not yet implemented.")
361 chainDefinition.insert(0, self.outputRun.name)
362 chainDefinition = CollectionSearch.fromExpression(chainDefinition)
363 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
364 self.outputRun.name, self.output.name, chainDefinition)
365 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name,
366 chains={self.output.name: chainDefinition})
367 else:
368 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs)
369 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
370 return Butler(butler=butler, run=self.outputRun.name, collections=inputs)
372 output: Optional[_OutputChainedCollectionInfo]
373 """Information about the output chained collection, if there is or will be
374 one (`_OutputChainedCollectionInfo` or `None`).
375 """
377 outputRun: Optional[_OutputRunCollectionInfo]
378 """Information about the output run collection, if there is or will be
379 one (`_OutputRunCollectionInfo` or `None`).
380 """
382 inputs: List[Tuple[str, DatasetTypeRestriction]]
383 """Input collections, including those also used for outputs and any
384 restrictions on dataset types (`list`).
385 """
388class _FilteredStream:
389 """A file-like object that filters some config fields.
391 Note
392 ----
393 This class depends on implementation details of ``Config.saveToStream``
394 methods, in particular that that method uses single call to write()
395 method to save information about single config field, and that call
396 combines comments string(s) for a field and field path and value.
397 This class will not work reliably on the "import" strings, so imports
398 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
399 """
400 def __init__(self, pattern):
401 # obey case if pattern isn't lowercase or requests NOIGNORECASE
402 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
404 if mat:
405 pattern = mat.group(1)
406 self._pattern = re.compile(fnmatch.translate(pattern))
407 else:
408 if pattern != pattern.lower():
409 print(f"Matching \"{pattern}\" without regard to case "
410 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
411 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
413 def write(self, showStr):
414 # Strip off doc string line(s) and cut off at "=" for string matching
415 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
416 if self._pattern.search(matchStr):
417 sys.stdout.write(showStr)
419# ------------------------
420# Exported definitions --
421# ------------------------
424class CmdLineFwk:
425 """PipelineTask framework which executes tasks from command line.
427 In addition to executing tasks this activator provides additional methods
428 for task management like dumping configuration or execution chain.
429 """
431 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
433 def __init__(self):
434 pass
436 def parseAndRun(self, argv=None):
437 """
438 This method is a main entry point for this class, it parses command
439 line and executes all commands.
441 Parameters
442 ----------
443 argv : `list` of `str`, optional
444 list of command line arguments, if not specified then
445 `sys.argv[1:]` is used
446 """
448 if argv is None:
449 argv = sys.argv[1:]
451 # start with parsing command line, only do partial parsing now as
452 # the tasks can add more arguments later
453 parser = makeParser()
454 args = parser.parse_args(argv)
456 # First thing to do is to setup logging.
457 self.configLog(args.longlog, args.loglevel)
459 taskFactory = TaskFactory()
461 # make pipeline out of command line arguments (can return empty pipeline)
462 try:
463 pipeline = self.makePipeline(args)
464 except Exception as exc:
465 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
466 raise
468 if args.subcommand == "build":
469 # stop here but process --show option first
470 self.showInfo(args, pipeline)
471 return 0
473 # make quantum graph
474 try:
475 qgraph = self.makeGraph(pipeline, args)
476 except Exception as exc:
477 print("Failed to build graph: {}".format(exc), file=sys.stderr)
478 raise
480 # optionally dump some info
481 self.showInfo(args, pipeline, qgraph)
483 if qgraph is None:
484 # No need to raise an exception here, code that makes graph
485 # should have printed warning message already.
486 return 2
488 if args.subcommand == "qgraph":
489 # stop here
490 return 0
492 # execute
493 if args.subcommand == "run":
494 return self.runPipeline(qgraph, taskFactory, args)
496 @staticmethod
497 def configLog(longlog, logLevels):
498 """Configure logging system.
500 Parameters
501 ----------
502 longlog : `bool`
503 If True then make log messages appear in "long format"
504 logLevels : `list` of `tuple`
505 per-component logging levels, each item in the list is a tuple
506 (component, level), `component` is a logger name or `None` for root
507 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
508 """
509 if longlog:
510 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
511 else:
512 message_fmt = "%c %p: %m%n"
514 # global logging config
515 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
517 # Forward all Python logging to lsst.log
518 lgr = logging.getLogger()
519 lgr.setLevel(logging.INFO) # same as in log4cxx config above
520 lgr.addHandler(lsst.log.LogHandler())
522 # also capture warnings and send them to logging
523 logging.captureWarnings(True)
525 # configure individual loggers
526 for component, level in logLevels:
527 level = getattr(lsst.log.Log, level.upper(), None)
528 if level is not None:
529 # set logging level for lsst.log
530 logger = lsst.log.Log.getLogger(component or "")
531 logger.setLevel(level)
532 # set logging level for Python logging
533 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
534 logging.getLogger(component).setLevel(pyLevel)
536 def makePipeline(self, args):
537 """Build a pipeline from command line arguments.
539 Parameters
540 ----------
541 args : `argparse.Namespace`
542 Parsed command line
544 Returns
545 -------
546 pipeline : `~lsst.pipe.base.Pipeline`
547 """
548 if args.pipeline:
549 pipeline = Pipeline.fromFile(args.pipeline)
550 else:
551 pipeline = Pipeline("anonymous")
553 # loop over all pipeline actions and apply them in order
554 for action in args.pipeline_actions:
555 if action.action == "add_instrument":
557 pipeline.addInstrument(action.value)
559 elif action.action == "new_task":
561 pipeline.addTask(action.value, action.label)
563 elif action.action == "delete_task":
565 pipeline.removeTask(action.label)
567 elif action.action == "config":
569 # action value string is "field=value", split it at '='
570 field, _, value = action.value.partition("=")
571 pipeline.addConfigOverride(action.label, field, value)
573 elif action.action == "configfile":
575 pipeline.addConfigFile(action.label, action.value)
577 else:
579 raise ValueError(f"Unexpected pipeline action: {action.action}")
581 if args.save_pipeline:
582 pipeline.toFile(args.save_pipeline)
584 if args.pipeline_dot:
585 pipeline2dot(pipeline, args.pipeline_dot)
587 return pipeline
589 def makeGraph(self, pipeline, args):
590 """Build a graph from command line arguments.
592 Parameters
593 ----------
594 pipeline : `~lsst.pipe.base.Pipeline`
595 Pipeline, can be empty or ``None`` if graph is read from pickle
596 file.
597 args : `argparse.Namespace`
598 Parsed command line
600 Returns
601 -------
602 graph : `~lsst.pipe.base.QuantumGraph` or `None`
603 If resulting graph is empty then `None` is returned.
604 """
606 if args.qgraph:
608 with open(args.qgraph, 'rb') as pickleFile:
609 qgraph = pickle.load(pickleFile)
610 if not isinstance(qgraph, QuantumGraph):
611 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format(
612 type(qgraph)))
614 # pipeline can not be provided in this case
615 if pipeline:
616 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
618 else:
620 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
622 # make execution plan (a.k.a. DAG) for pipeline
623 graphBuilder = GraphBuilder(registry,
624 skipExisting=args.skip_existing)
625 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query)
627 # count quanta in graph and give a warning if it's empty and return None
628 nQuanta = qgraph.countQuanta()
629 if nQuanta == 0:
630 warnings.warn("QuantumGraph is empty", stacklevel=2)
631 return None
632 else:
633 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
634 nQuanta, len(qgraph))
636 if args.save_qgraph:
637 with open(args.save_qgraph, "wb") as pickleFile:
638 pickle.dump(qgraph, pickleFile)
640 if args.save_single_quanta:
641 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()):
642 filename = args.save_single_quanta.format(iq)
643 with open(filename, "wb") as pickleFile:
644 pickle.dump(sqgraph, pickleFile)
646 if args.qgraph_dot:
647 graph2dot(qgraph, args.qgraph_dot)
649 return qgraph
651 def runPipeline(self, graph, taskFactory, args, butler=None):
652 """Execute complete QuantumGraph.
654 Parameters
655 ----------
656 graph : `QuantumGraph`
657 Execution graph.
658 taskFactory : `~lsst.pipe.base.TaskFactory`
659 Task factory
660 args : `argparse.Namespace`
661 Parsed command line
662 butler : `~lsst.daf.butler.Butler`, optional
663 Data Butler instance, if not defined then new instance is made
664 using command line options.
665 """
666 # make butler instance
667 if butler is None:
668 butler = _ButlerFactory.makeWriteButler(args)
670 # Enable lsstDebug debugging. Note that this is done once in the
671 # main process before PreExecInit and it is also repeated before
672 # running each task in SingleQuantumExecutor (which may not be
673 # needed if `multipocessing` always uses fork start method).
674 if args.enableLsstDebug:
675 try:
676 _LOG.debug("Will try to import debug.py")
677 import debug # noqa:F401
678 except ImportError:
679 _LOG.warn("No 'debug' module found.")
681 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing)
682 preExecInit.initialize(graph,
683 saveInitOutputs=not args.skip_init_writes,
684 registerDatasetTypes=args.register_dataset_types)
686 if not args.init_only:
687 graphFixup = self._importGraphFixup(args)
688 quantumExecutor = SingleQuantumExecutor(taskFactory,
689 skipExisting=args.skip_existing,
690 enableLsstDebug=args.enableLsstDebug)
691 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT,
692 quantumExecutor=quantumExecutor,
693 executionGraphFixup=graphFixup)
694 with util.profile(args.profile, _LOG):
695 executor.execute(graph, butler)
697 def showInfo(self, args, pipeline, graph=None):
698 """Display useful info about pipeline and environment.
700 Parameters
701 ----------
702 args : `argparse.Namespace`
703 Parsed command line
704 pipeline : `Pipeline`
705 Pipeline definition
706 graph : `QuantumGraph`, optional
707 Execution graph
708 """
709 showOpts = args.show
710 for what in showOpts:
711 showCommand, _, showArgs = what.partition("=")
713 if showCommand in ["pipeline", "config", "history", "tasks"]:
714 if not pipeline:
715 _LOG.warning("Pipeline is required for --show=%s", showCommand)
716 continue
718 if showCommand in ["graph", "workflow"]:
719 if not graph:
720 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
721 continue
723 if showCommand == "pipeline":
724 print(pipeline)
725 elif showCommand == "config":
726 self._showConfig(pipeline, showArgs, False)
727 elif showCommand == "dump-config":
728 self._showConfig(pipeline, showArgs, True)
729 elif showCommand == "history":
730 self._showConfigHistory(pipeline, showArgs)
731 elif showCommand == "tasks":
732 self._showTaskHierarchy(pipeline)
733 elif showCommand == "graph":
734 if graph:
735 self._showGraph(graph)
736 elif showCommand == "workflow":
737 if graph:
738 self._showWorkflow(graph, args)
739 else:
740 print("Unknown value for show: %s (choose from '%s')" %
741 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
742 file=sys.stderr)
743 sys.exit(1)
745 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
746 """Show task configuration
748 Parameters
749 ----------
750 pipeline : `Pipeline`
751 Pipeline definition
752 showArgs : `str`
753 Defines what to show
754 dumpFullConfig : `bool`
755 If true then dump complete task configuration with all imports.
756 """
757 stream = sys.stdout
758 if dumpFullConfig:
759 # Task label can be given with this option
760 taskName = showArgs
761 else:
762 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
763 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
764 taskName = matConfig.group(1)
765 pattern = matConfig.group(2)
766 if pattern:
767 stream = _FilteredStream(pattern)
769 tasks = util.filterTasks(pipeline, taskName)
770 if not tasks:
771 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
772 sys.exit(1)
774 for taskDef in tasks:
775 print("### Configuration for task `{}'".format(taskDef.label))
776 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
778 def _showConfigHistory(self, pipeline, showArgs):
779 """Show history for task configuration
781 Parameters
782 ----------
783 pipeline : `Pipeline`
784 Pipeline definition
785 showArgs : `str`
786 Defines what to show
787 """
789 taskName = None
790 pattern = None
791 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
792 if matHistory:
793 taskName = matHistory.group(1)
794 pattern = matHistory.group(2)
795 if not pattern:
796 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
797 sys.exit(1)
799 tasks = util.filterTasks(pipeline, taskName)
800 if not tasks:
801 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
802 sys.exit(1)
804 cpath, _, cname = pattern.rpartition(".")
805 found = False
806 for taskDef in tasks:
807 try:
808 if not cpath:
809 # looking for top-level field
810 hconfig = taskDef.config
811 else:
812 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
813 except AttributeError:
814 # Means this config object has no such field, but maybe some other task has it.
815 continue
816 except Exception:
817 # Any other exception probably means some error in the expression.
818 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
819 sys.exit(1)
821 if hasattr(hconfig, cname):
822 print(f"### Configuration field for task `{taskDef.label}'")
823 print(pexConfig.history.format(hconfig, cname))
824 found = True
826 if not found:
827 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
828 sys.exit(1)
830 def _showTaskHierarchy(self, pipeline):
831 """Print task hierarchy to stdout
833 Parameters
834 ----------
835 pipeline: `Pipeline`
836 """
837 for taskDef in pipeline.toExpandedPipeline():
838 print("### Subtasks for task `{}'".format(taskDef.taskName))
840 for configName, taskName in util.subTaskIter(taskDef.config):
841 print("{}: {}".format(configName, taskName))
843 def _showGraph(self, graph):
844 """Print quanta information to stdout
846 Parameters
847 ----------
848 graph : `QuantumGraph`
849 Execution graph.
850 """
851 for taskNodes in graph:
852 print(taskNodes.taskDef)
854 for iq, quantum in enumerate(taskNodes.quanta):
855 print(" Quantum {}:".format(iq))
856 print(" inputs:")
857 for key, refs in quantum.predictedInputs.items():
858 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
859 print(" {}: [{}]".format(key, ", ".join(dataIds)))
860 print(" outputs:")
861 for key, refs in quantum.outputs.items():
862 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
863 print(" {}: [{}]".format(key, ", ".join(dataIds)))
865 def _showWorkflow(self, graph, args):
866 """Print quanta information and dependency to stdout
868 The input and predicted output URIs based on the Butler repo are printed.
870 Parameters
871 ----------
872 graph : `QuantumGraph`
873 Execution graph.
874 args : `argparse.Namespace`
875 Parsed command line
876 """
877 butler = _ButlerFactory.makeReadButler(args)
878 hashToParent = {}
879 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
880 shortname = taskDef.taskName.split('.')[-1]
881 print("Quantum {}: {}".format(iq, shortname))
882 print(" inputs:")
883 for key, refs in quantum.predictedInputs.items():
884 for ref in refs:
885 if butler.datastore.exists(ref):
886 print(" {}".format(butler.datastore.getUri(ref)))
887 else:
888 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
889 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
890 print(" outputs:")
891 for key, refs in quantum.outputs.items():
892 for ref in refs:
893 if butler.datastore.exists(ref):
894 print(" {}".format(butler.datastore.getUri(ref)))
895 else:
896 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
897 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
898 # Store hash to figure out dependency
899 dhash = hash((key, ref.dataId))
900 hashToParent[dhash] = iq
902 uses = set()
903 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
904 for key, refs in quantum.predictedInputs.items():
905 for ref in refs:
906 dhash = hash((key, ref.dataId))
907 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses:
908 parentIq = hashToParent[dhash]
909 uses.add((iq, parentIq)) # iq uses parentIq
910 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))
912 def _importGraphFixup(self, args):
913 """Import/instantiate graph fixup object.
915 Parameters
916 ----------
917 args : `argparse.Namespace`
918 Parsed command line.
920 Returns
921 -------
922 fixup : `ExecutionGraphFixup` or `None`
924 Raises
925 ------
926 ValueError
927 Raised if import fails, method call raises exception, or returned
928 instance has unexpected type.
929 """
930 if args.graph_fixup:
931 try:
932 factory = doImport(args.graph_fixup)
933 except Exception as exc:
934 raise ValueError("Failed to import graph fixup class/method") from exc
935 try:
936 fixup = factory()
937 except Exception as exc:
938 raise ValueError("Failed to make instance of graph fixup") from exc
939 if not isinstance(fixup, ExecutionGraphFixup):
940 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
941 return fixup