Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import datetime
32import fnmatch
33import logging
34import pickle
35import re
36import sys
37from typing import List, Optional, Tuple
38import warnings
40# -----------------------------
41# Imports for other modules --
42# -----------------------------
43from lsst.daf.butler import (
44 Butler,
45 CollectionSearch,
46 CollectionType,
47 DatasetRef,
48 DatasetTypeRestriction,
49 Registry,
50)
51from lsst.daf.butler.registry import MissingCollectionError
52import lsst.log
53import lsst.pex.config as pexConfig
54from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
55from .cmdLineParser import makeParser
56from .dotTools import graph2dot, pipeline2dot
57from .mpGraphExecutor import MPGraphExecutor
58from .preExecInit import PreExecInit
59from .taskFactory import TaskFactory
60from . import util
62# ----------------------------------
63# Local non-exported definitions --
64# ----------------------------------
66# logging properties
67_LOG_PROP = """\
68log4j.rootLogger=INFO, A1
69log4j.appender.A1=ConsoleAppender
70log4j.appender.A1.Target=System.err
71log4j.appender.A1.layout=PatternLayout
72log4j.appender.A1.layout.ConversionPattern={}
73"""
75_LOG = logging.getLogger(__name__.partition(".")[2])
78class _OutputChainedCollectionInfo:
79 """A helper class for handling command-line arguments related to an output
80 `~lsst.daf.butler.CollectionType.CHAINED` collection.
82 Parameters
83 ----------
84 registry : `lsst.daf.butler.Registry`
85 Butler registry that collections will be added to and/or queried from.
86 name : `str`
87 Name of the collection given on the command line.
88 """
89 def __init__(self, registry: Registry, name: str):
90 self.name = name
91 try:
92 self.chain = list(registry.getCollectionChain(name))
93 self.exists = True
94 except MissingCollectionError:
95 self.chain = []
96 self.exists = False
98 def __str__(self):
99 return self.name
101 name: str
102 """Name of the collection provided on the command line (`str`).
103 """
105 exists: bool
106 """Whether this collection already exists in the registry (`bool`).
107 """
109 chain: List[Tuple[str, DatasetTypeRestriction]]
110 """The definition of the collection, if it already exists (`list`).
112 Empty if the collection does not alredy exist.
113 """
116class _OutputRunCollectionInfo:
117 """A helper class for handling command-line arguments related to an output
118 `~lsst.daf.butler.CollectionType.RUN` collection.
120 Parameters
121 ----------
122 registry : `lsst.daf.butler.Registry`
123 Butler registry that collections will be added to and/or queried from.
124 name : `str`
125 Name of the collection given on the command line.
126 """
127 def __init__(self, registry: Registry, name: str):
128 self.name = name
129 try:
130 actualType = registry.getCollectionType(name)
131 if actualType is not CollectionType.RUN:
132 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
133 self.exists = True
134 except MissingCollectionError:
135 self.exists = False
137 name: str
138 """Name of the collection provided on the command line (`str`).
139 """
141 exists: bool
142 """Whether this collection already exists in the registry (`bool`).
143 """
146class _ButlerFactory:
147 """A helper class for processing command-line arguments related to input
148 and output collections.
150 Parameters
151 ----------
152 registry : `lsst.daf.butler.Registry`
153 Butler registry that collections will be added to and/or queried from.
155 args : `argparse.Namespace`
156 Parsed command-line arguments. The following attributes are used,
157 either at construction or in later methods.
159 ``output``
160 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
161 input/output collection.
163 ``output_run``
164 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
165 collection.
167 ``extend_run``
168 A boolean indicating whether ``output_run`` should already exist
169 and be extended.
171 ``replace_run``
172 A boolean indicating that (if `True`) ``output_run`` should already
173 exist but will be removed from the output chained collection and
174 replaced with a new one.
176 ``prune_replaced``
177 A boolean indicating whether to prune the replaced run (requires
178 ``replace_run``).
180 ``inputs``
181 Input collections of any type; may be any type handled by
182 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
184 ``butler_config``
185 Path to a data repository root or configuration file.
187 writeable : `bool`
188 If `True`, a `Butler` is being initialized in a context where actual
189 writes should happens, and hence no output run is necessary.
191 Raises
192 ------
193 ValueError
194 Raised if ``writeable is True`` but there are no output collections.
195 """
196 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
197 if args.output is not None:
198 self.output = _OutputChainedCollectionInfo(registry, args.output)
199 else:
200 self.output = None
201 if args.output_run is not None:
202 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
203 elif self.output is not None:
204 if args.extend_run:
205 runName, _ = self.output.chain[0]
206 else:
207 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now())
208 self.outputRun = _OutputRunCollectionInfo(registry, runName)
209 elif not writeable:
210 # If we're not writing yet, ok to have no output run.
211 self.outputRun = None
212 else:
213 raise ValueError("Cannot write without at least one of (--output, --output-run).")
214 self.inputs = list(CollectionSearch.fromExpression(args.input))
216 def check(self, args: argparse.Namespace):
217 """Check command-line options for consistency with each other and the
218 data repository.
220 Parameters
221 ----------
222 args : `argparse.Namespace`
223 Parsed command-line arguments. See class documentation for the
224 construction parameter of the same name.
225 """
226 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
227 if self.inputs and self.output is not None and self.output.exists:
228 raise ValueError("Cannot use --output with existing collection with --inputs.")
229 if args.extend_run and self.outputRun is None:
230 raise ValueError("Cannot --extend-run when no output collection is given.")
231 if args.extend_run and not self.outputRun.exists:
232 raise ValueError(f"Cannot --extend-run; output collection "
233 f"'{self.outputRun.name}' does not exist.")
234 if not args.extend_run and self.outputRun.exists:
235 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
236 f"--extend-run was not given.")
237 if args.prune_replaced and not args.replace_run:
238 raise ValueError(f"--prune-replaced requires --replace-run.")
239 if args.replace_run and (self.output is None or not self.output.exists):
240 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.")
242 @classmethod
243 def _makeReadParts(cls, args: argparse.Namespace):
244 """Common implementation for `makeReadButler` and
245 `makeRegistryAndCollections`.
247 Parameters
248 ----------
249 args : `argparse.Namespace`
250 Parsed command-line arguments. See class documentation for the
251 construction parameter of the same name.
253 Returns
254 -------
255 butler : `lsst.daf.butler.Butler`
256 A read-only butler constructed from the repo at
257 ``args.butler_config``, but with no default collections.
258 inputs : `lsst.daf.butler.registry.CollectionSearch`
259 A collection search path constructed according to ``args``.
260 self : `_ButlerFactory`
261 A new `_ButlerFactory` instance representing the processed version
262 of ``args``.
263 """
264 butler = Butler(args.butler_config, writeable=False)
265 self = cls(butler.registry, args, writeable=False)
266 self.check(args)
267 if self.output and self.output.exists:
268 if args.replace_run:
269 replaced, _ = self.output.chain[0]
270 inputs = self.output.chain[1:]
271 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
272 self.output.name, replaced)
273 else:
274 inputs = [self.output.name]
275 else:
276 inputs = list(self.inputs)
277 if args.extend_run:
278 inputs.insert(0, self.outputRun.name)
279 inputs = CollectionSearch.fromExpression(inputs)
280 return butler, inputs, self
282 @classmethod
283 def makeReadButler(cls, args: argparse.Namespace):
284 """Construct a read-only butler according to the given command-line
285 arguments.
287 Parameters
288 ----------
289 args : `argparse.Namespace`
290 Parsed command-line arguments. See class documentation for the
291 construction parameter of the same name.
293 Returns
294 -------
295 butler : `lsst.daf.butler.Butler`
296 A read-only butler initialized with the collections specified by
297 ``args``.
298 """
299 butler, inputs, _ = cls._makeReadParts(args)
300 _LOG.debug("Preparing butler to read from %s.", inputs)
301 return Butler(butler=butler, collections=inputs)
303 @classmethod
304 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch:
305 """Return a read-only registry, a collection search path, and the name
306 of the run to be used for future writes.
308 Parameters
309 ----------
310 args : `argparse.Namespace`
311 Parsed command-line arguments. See class documentation for the
312 construction parameter of the same name.
314 Returns
315 -------
316 registry : `lsst.daf.butler.Registry`
317 Butler registry that collections will be added to and/or queried
318 from.
319 inputs : `lsst.daf.butler.registry.CollectionSearch`
320 Collections to search for datasets.
321 run : `str` or `None`
322 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
323 if it already exists, or `None` if it does not.
324 """
325 butler, inputs, self = cls._makeReadParts(args)
326 run = self.outputRun.name if args.extend_run else None
327 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
328 return butler.registry, inputs, run
330 @classmethod
331 def makeWriteButler(cls, args: argparse.Namespace) -> Butler:
332 """Return a read-write butler initialized to write to and read from
333 the collections specified by the given command-line arguments.
335 Parameters
336 ----------
337 args : `argparse.Namespace`
338 Parsed command-line arguments. See class documentation for the
339 construction parameter of the same name.
341 Returns
342 -------
343 butler : `lsst.daf.butler.Butler`
344 A read-write butler initialized according to the given arguments.
345 """
346 butler = Butler(args.butler_config, writeable=True)
347 self = cls(butler.registry, args, writeable=True)
348 self.check(args)
349 if self.output is not None:
350 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
351 if args.replace_run:
352 replaced, _ = chainDefinition.pop(0)
353 if args.prune_replaced:
354 # TODO: DM-23671: need a butler API for pruning an
355 # entire RUN collection, then apply it to 'replaced'
356 # here.
357 raise NotImplementedError("Support for --prune-replaced is not yet implemented.")
358 chainDefinition.insert(0, self.outputRun.name)
359 chainDefinition = CollectionSearch.fromExpression(chainDefinition)
360 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
361 self.outputRun.name, self.output.name, chainDefinition)
362 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name,
363 chains={self.output.name: chainDefinition})
364 else:
365 inputs = CollectionSearch.fromExpression([self.outputRun] + self.inputs)
366 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
367 return Butler(butler=butler, run=self.outputRun.name, collections=inputs)
369 output: Optional[_OutputChainedCollectionInfo]
370 """Information about the output chained collection, if there is or will be
371 one (`_OutputChainedCollectionInfo` or `None`).
372 """
374 outputRun: Optional[_OutputRunCollectionInfo]
375 """Information about the output run collection, if there is or will be
376 one (`_OutputRunCollectionInfo` or `None`).
377 """
379 inputs: List[Tuple[str, DatasetTypeRestriction]]
380 """Input collections, including those also used for outputs and any
381 restrictions on dataset types (`list`).
382 """
385class _FilteredStream:
386 """A file-like object that filters some config fields.
388 Note
389 ----
390 This class depends on implementation details of ``Config.saveToStream``
391 methods, in particular that that method uses single call to write()
392 method to save information about single config field, and that call
393 combines comments string(s) for a field and field path and value.
394 This class will not work reliably on the "import" strings, so imports
395 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
396 """
397 def __init__(self, pattern):
398 # obey case if pattern isn't lowercase or requests NOIGNORECASE
399 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
401 if mat:
402 pattern = mat.group(1)
403 self._pattern = re.compile(fnmatch.translate(pattern))
404 else:
405 if pattern != pattern.lower():
406 print(f"Matching \"{pattern}\" without regard to case "
407 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
408 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
410 def write(self, showStr):
411 # Strip off doc string line(s) and cut off at "=" for string matching
412 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
413 if self._pattern.search(matchStr):
414 sys.stdout.write(showStr)
416# ------------------------
417# Exported definitions --
418# ------------------------
421class CmdLineFwk:
422 """PipelineTask framework which executes tasks from command line.
424 In addition to executing tasks this activator provides additional methods
425 for task management like dumping configuration or execution chain.
426 """
428 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
430 def __init__(self):
431 pass
433 def parseAndRun(self, argv=None):
434 """
435 This method is a main entry point for this class, it parses command
436 line and executes all commands.
438 Parameters
439 ----------
440 argv : `list` of `str`, optional
441 list of command line arguments, if not specified then
442 `sys.argv[1:]` is used
443 """
445 if argv is None:
446 argv = sys.argv[1:]
448 # start with parsing command line, only do partial parsing now as
449 # the tasks can add more arguments later
450 parser = makeParser()
451 args = parser.parse_args(argv)
453 # First thing to do is to setup logging.
454 self.configLog(args.longlog, args.loglevel)
456 taskFactory = TaskFactory()
458 # make pipeline out of command line arguments (can return empty pipeline)
459 try:
460 pipeline = self.makePipeline(args)
461 except Exception as exc:
462 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
463 raise
465 if args.subcommand == "build":
466 # stop here but process --show option first
467 self.showInfo(args, pipeline)
468 return 0
470 # make quantum graph
471 try:
472 qgraph = self.makeGraph(pipeline, args)
473 except Exception as exc:
474 print("Failed to build graph: {}".format(exc), file=sys.stderr)
475 raise
477 # optionally dump some info
478 self.showInfo(args, pipeline, qgraph)
480 if qgraph is None:
481 # No need to raise an exception here, code that makes graph
482 # should have printed warning message already.
483 return 2
485 if args.subcommand == "qgraph":
486 # stop here
487 return 0
489 # execute
490 if args.subcommand == "run":
491 return self.runPipeline(qgraph, taskFactory, args)
493 @staticmethod
494 def configLog(longlog, logLevels):
495 """Configure logging system.
497 Parameters
498 ----------
499 longlog : `bool`
500 If True then make log messages appear in "long format"
501 logLevels : `list` of `tuple`
502 per-component logging levels, each item in the list is a tuple
503 (component, level), `component` is a logger name or `None` for root
504 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
505 """
506 if longlog:
507 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
508 else:
509 message_fmt = "%c %p: %m%n"
511 # global logging config
512 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
514 # Forward all Python logging to lsst.log
515 lgr = logging.getLogger()
516 lgr.setLevel(logging.INFO) # same as in log4cxx config above
517 lgr.addHandler(lsst.log.LogHandler())
519 # also capture warnings and send them to logging
520 logging.captureWarnings(True)
522 # configure individual loggers
523 for component, level in logLevels:
524 level = getattr(lsst.log.Log, level.upper(), None)
525 if level is not None:
526 # set logging level for lsst.log
527 logger = lsst.log.Log.getLogger(component or "")
528 logger.setLevel(level)
529 # set logging level for Python logging
530 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
531 logging.getLogger(component).setLevel(pyLevel)
533 def makePipeline(self, args):
534 """Build a pipeline from command line arguments.
536 Parameters
537 ----------
538 args : `argparse.Namespace`
539 Parsed command line
541 Returns
542 -------
543 pipeline : `~lsst.pipe.base.Pipeline`
544 """
545 if args.pipeline:
546 pipeline = Pipeline.fromFile(args.pipeline)
547 else:
548 pipeline = Pipeline("anonymous")
550 # loop over all pipeline actions and apply them in order
551 for action in args.pipeline_actions:
552 if action.action == "add_instrument":
554 pipeline.addInstrument(action.value)
556 elif action.action == "new_task":
558 pipeline.addTask(action.value, action.label)
560 elif action.action == "delete_task":
562 pipeline.removeTask(action.label)
564 elif action.action == "config":
566 # action value string is "field=value", split it at '='
567 field, _, value = action.value.partition("=")
568 pipeline.addConfigOverride(action.label, field, value)
570 elif action.action == "configfile":
572 pipeline.addConfigFile(action.label, action.value)
574 else:
576 raise ValueError(f"Unexpected pipeline action: {action.action}")
578 if args.save_pipeline:
579 pipeline.toFile(args.save_pipeline)
581 if args.pipeline_dot:
582 pipeline2dot(pipeline, args.pipeline_dot)
584 return pipeline
586 def makeGraph(self, pipeline, args):
587 """Build a graph from command line arguments.
589 Parameters
590 ----------
591 pipeline : `~lsst.pipe.base.Pipeline`
592 Pipeline, can be empty or ``None`` if graph is read from pickle
593 file.
594 args : `argparse.Namespace`
595 Parsed command line
597 Returns
598 -------
599 graph : `~lsst.pipe.base.QuantumGraph` or `None`
600 If resulting graph is empty then `None` is returned.
601 """
603 if args.qgraph:
605 with open(args.qgraph, 'rb') as pickleFile:
606 qgraph = pickle.load(pickleFile)
607 if not isinstance(qgraph, QuantumGraph):
608 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format(
609 type(qgraph)))
611 # pipeline can not be provided in this case
612 if pipeline:
613 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
615 else:
617 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
619 # make execution plan (a.k.a. DAG) for pipeline
620 graphBuilder = GraphBuilder(registry,
621 skipExisting=args.skip_existing)
622 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query)
624 # count quanta in graph and give a warning if it's empty and return None
625 nQuanta = qgraph.countQuanta()
626 if nQuanta == 0:
627 warnings.warn("QuantumGraph is empty", stacklevel=2)
628 return None
629 else:
630 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
631 nQuanta, len(qgraph))
633 if args.save_qgraph:
634 with open(args.save_qgraph, "wb") as pickleFile:
635 pickle.dump(qgraph, pickleFile)
637 if args.save_single_quanta:
638 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()):
639 filename = args.save_single_quanta.format(iq)
640 with open(filename, "wb") as pickleFile:
641 pickle.dump(sqgraph, pickleFile)
643 if args.qgraph_dot:
644 graph2dot(qgraph, args.qgraph_dot)
646 return qgraph
648 def runPipeline(self, graph, taskFactory, args, butler=None):
649 """Execute complete QuantumGraph.
651 Parameters
652 ----------
653 graph : `QuantumGraph`
654 Execution graph.
655 taskFactory : `~lsst.pipe.base.TaskFactory`
656 Task factory
657 args : `argparse.Namespace`
658 Parsed command line
659 butler : `~lsst.daf.butler.Butler`, optional
660 Data Butler instance, if not defined then new instance is made
661 using command line options.
662 """
663 # make butler instance
664 if butler is None:
665 butler = _ButlerFactory.makeWriteButler(args)
667 # Enable lsstDebug debugging. Note that this is done once in the
668 # main process before PreExecInit and it is also repeated before
669 # running each task in SingleQuantumExecutor (which may not be
670 # needed if `multipocessing` always uses fork start method).
671 if args.enableLsstDebug:
672 try:
673 _LOG.debug("Will try to import debug.py")
674 import debug # noqa:F401
675 except ImportError:
676 _LOG.warn("No 'debug' module found.")
678 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing)
679 preExecInit.initialize(graph,
680 saveInitOutputs=not args.skip_init_writes,
681 registerDatasetTypes=args.register_dataset_types)
683 if not args.init_only:
684 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT,
685 skipExisting=args.skip_existing,
686 enableLsstDebug=args.enableLsstDebug)
687 with util.profile(args.profile, _LOG):
688 executor.execute(graph, butler, taskFactory)
690 def showInfo(self, args, pipeline, graph=None):
691 """Display useful info about pipeline and environment.
693 Parameters
694 ----------
695 args : `argparse.Namespace`
696 Parsed command line
697 pipeline : `Pipeline`
698 Pipeline definition
699 graph : `QuantumGraph`, optional
700 Execution graph
701 """
702 showOpts = args.show
703 for what in showOpts:
704 showCommand, _, showArgs = what.partition("=")
706 if showCommand in ["pipeline", "config", "history", "tasks"]:
707 if not pipeline:
708 _LOG.warning("Pipeline is required for --show=%s", showCommand)
709 continue
711 if showCommand in ["graph", "workflow"]:
712 if not graph:
713 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
714 continue
716 if showCommand == "pipeline":
717 print(pipeline)
718 elif showCommand == "config":
719 self._showConfig(pipeline, showArgs, False)
720 elif showCommand == "dump-config":
721 self._showConfig(pipeline, showArgs, True)
722 elif showCommand == "history":
723 self._showConfigHistory(pipeline, showArgs)
724 elif showCommand == "tasks":
725 self._showTaskHierarchy(pipeline)
726 elif showCommand == "graph":
727 if graph:
728 self._showGraph(graph)
729 elif showCommand == "workflow":
730 if graph:
731 self._showWorkflow(graph, args)
732 else:
733 print("Unknown value for show: %s (choose from '%s')" %
734 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
735 file=sys.stderr)
736 sys.exit(1)
738 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
739 """Show task configuration
741 Parameters
742 ----------
743 pipeline : `Pipeline`
744 Pipeline definition
745 showArgs : `str`
746 Defines what to show
747 dumpFullConfig : `bool`
748 If true then dump complete task configuration with all imports.
749 """
750 stream = sys.stdout
751 if dumpFullConfig:
752 # Task label can be given with this option
753 taskName = showArgs
754 else:
755 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
756 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
757 taskName = matConfig.group(1)
758 pattern = matConfig.group(2)
759 if pattern:
760 stream = _FilteredStream(pattern)
762 tasks = util.filterTasks(pipeline, taskName)
763 if not tasks:
764 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
765 sys.exit(1)
767 for taskDef in tasks:
768 print("### Configuration for task `{}'".format(taskDef.label))
769 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
771 def _showConfigHistory(self, pipeline, showArgs):
772 """Show history for task configuration
774 Parameters
775 ----------
776 pipeline : `Pipeline`
777 Pipeline definition
778 showArgs : `str`
779 Defines what to show
780 """
782 taskName = None
783 pattern = None
784 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
785 if matHistory:
786 taskName = matHistory.group(1)
787 pattern = matHistory.group(2)
788 if not pattern:
789 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
790 sys.exit(1)
792 tasks = util.filterTasks(pipeline, taskName)
793 if not tasks:
794 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
795 sys.exit(1)
797 cpath, _, cname = pattern.rpartition(".")
798 found = False
799 for taskDef in tasks:
800 try:
801 if not cpath:
802 # looking for top-level field
803 hconfig = taskDef.config
804 else:
805 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
806 except AttributeError:
807 # Means this config object has no such field, but maybe some other task has it.
808 continue
809 except Exception:
810 # Any other exception probably means some error in the expression.
811 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
812 sys.exit(1)
814 if hasattr(hconfig, cname):
815 print(f"### Configuration field for task `{taskDef.label}'")
816 print(pexConfig.history.format(hconfig, cname))
817 found = True
819 if not found:
820 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
821 sys.exit(1)
823 def _showTaskHierarchy(self, pipeline):
824 """Print task hierarchy to stdout
826 Parameters
827 ----------
828 pipeline: `Pipeline`
829 """
830 for taskDef in pipeline.toExpandedPipeline():
831 print("### Subtasks for task `{}'".format(taskDef.taskName))
833 for configName, taskName in util.subTaskIter(taskDef.config):
834 print("{}: {}".format(configName, taskName))
836 def _showGraph(self, graph):
837 """Print quanta information to stdout
839 Parameters
840 ----------
841 graph : `QuantumGraph`
842 Execution graph.
843 """
844 for taskNodes in graph:
845 print(taskNodes.taskDef)
847 for iq, quantum in enumerate(taskNodes.quanta):
848 print(" Quantum {}:".format(iq))
849 print(" inputs:")
850 for key, refs in quantum.predictedInputs.items():
851 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
852 print(" {}: [{}]".format(key, ", ".join(dataIds)))
853 print(" outputs:")
854 for key, refs in quantum.outputs.items():
855 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
856 print(" {}: [{}]".format(key, ", ".join(dataIds)))
858 def _showWorkflow(self, graph, args):
859 """Print quanta information and dependency to stdout
861 The input and predicted output URIs based on the Butler repo are printed.
863 Parameters
864 ----------
865 graph : `QuantumGraph`
866 Execution graph.
867 args : `argparse.Namespace`
868 Parsed command line
869 """
870 butler = _ButlerFactory.makeReadButler(args)
871 hashToParent = {}
872 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
873 shortname = taskDef.taskName.split('.')[-1]
874 print("Quantum {}: {}".format(iq, shortname))
875 print(" inputs:")
876 for key, refs in quantum.predictedInputs.items():
877 for ref in refs:
878 if butler.datastore.exists(ref):
879 print(" {}".format(butler.datastore.getUri(ref)))
880 else:
881 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
882 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
883 print(" outputs:")
884 for key, refs in quantum.outputs.items():
885 for ref in refs:
886 if butler.datastore.exists(ref):
887 print(" {}".format(butler.datastore.getUri(ref)))
888 else:
889 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
890 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
891 # Store hash to figure out dependency
892 dhash = hash((key, ref.dataId))
893 hashToParent[dhash] = iq
895 uses = set()
896 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
897 for key, refs in quantum.predictedInputs.items():
898 for ref in refs:
899 dhash = hash((key, ref.dataId))
900 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses:
901 parentIq = hashToParent[dhash]
902 uses.add((iq, parentIq)) # iq uses parentIq
903 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))