Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import datetime
32import fnmatch
33import logging
34import pickle
35import re
36import sys
37from typing import List, Optional, Tuple
38import warnings
40# -----------------------------
41# Imports for other modules --
42# -----------------------------
43from lsst.daf.butler import (
44 Butler,
45 CollectionSearch,
46 CollectionType,
47 DatasetRef,
48 DatasetTypeRestriction,
49 Registry,
50)
51from lsst.daf.butler.registry import MissingCollectionError
52import lsst.log
53import lsst.pex.config as pexConfig
54from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
55from .cmdLineParser import makeParser
56from .dotTools import graph2dot, pipeline2dot
57from .executionGraphFixup import ExecutionGraphFixup
58from .mpGraphExecutor import MPGraphExecutor
59from .preExecInit import PreExecInit
60from .singleQuantumExecutor import SingleQuantumExecutor
61from .taskFactory import TaskFactory
62from . import util
63from lsst.utils import doImport
65# ----------------------------------
66# Local non-exported definitions --
67# ----------------------------------
69# logging properties
70_LOG_PROP = """\
71log4j.rootLogger=INFO, A1
72log4j.appender.A1=ConsoleAppender
73log4j.appender.A1.Target=System.err
74log4j.appender.A1.layout=PatternLayout
75log4j.appender.A1.layout.ConversionPattern={}
76"""
78_LOG = logging.getLogger(__name__.partition(".")[2])
81class _OutputChainedCollectionInfo:
82 """A helper class for handling command-line arguments related to an output
83 `~lsst.daf.butler.CollectionType.CHAINED` collection.
85 Parameters
86 ----------
87 registry : `lsst.daf.butler.Registry`
88 Butler registry that collections will be added to and/or queried from.
89 name : `str`
90 Name of the collection given on the command line.
91 """
92 def __init__(self, registry: Registry, name: str):
93 self.name = name
94 try:
95 self.chain = list(registry.getCollectionChain(name))
96 self.exists = True
97 except MissingCollectionError:
98 self.chain = []
99 self.exists = False
101 def __str__(self):
102 return self.name
104 name: str
105 """Name of the collection provided on the command line (`str`).
106 """
108 exists: bool
109 """Whether this collection already exists in the registry (`bool`).
110 """
112 chain: List[Tuple[str, DatasetTypeRestriction]]
113 """The definition of the collection, if it already exists (`list`).
115 Empty if the collection does not alredy exist.
116 """
119class _OutputRunCollectionInfo:
120 """A helper class for handling command-line arguments related to an output
121 `~lsst.daf.butler.CollectionType.RUN` collection.
123 Parameters
124 ----------
125 registry : `lsst.daf.butler.Registry`
126 Butler registry that collections will be added to and/or queried from.
127 name : `str`
128 Name of the collection given on the command line.
129 """
130 def __init__(self, registry: Registry, name: str):
131 self.name = name
132 try:
133 actualType = registry.getCollectionType(name)
134 if actualType is not CollectionType.RUN:
135 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
136 self.exists = True
137 except MissingCollectionError:
138 self.exists = False
140 name: str
141 """Name of the collection provided on the command line (`str`).
142 """
144 exists: bool
145 """Whether this collection already exists in the registry (`bool`).
146 """
149class _ButlerFactory:
150 """A helper class for processing command-line arguments related to input
151 and output collections.
153 Parameters
154 ----------
155 registry : `lsst.daf.butler.Registry`
156 Butler registry that collections will be added to and/or queried from.
158 args : `argparse.Namespace`
159 Parsed command-line arguments. The following attributes are used,
160 either at construction or in later methods.
162 ``output``
163 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
164 input/output collection.
166 ``output_run``
167 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
168 collection.
170 ``extend_run``
171 A boolean indicating whether ``output_run`` should already exist
172 and be extended.
174 ``replace_run``
175 A boolean indicating that (if `True`) ``output_run`` should already
176 exist but will be removed from the output chained collection and
177 replaced with a new one.
179 ``prune_replaced``
180 A boolean indicating whether to prune the replaced run (requires
181 ``replace_run``).
183 ``inputs``
184 Input collections of any type; may be any type handled by
185 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
187 ``butler_config``
188 Path to a data repository root or configuration file.
190 writeable : `bool`
191 If `True`, a `Butler` is being initialized in a context where actual
192 writes should happens, and hence no output run is necessary.
194 Raises
195 ------
196 ValueError
197 Raised if ``writeable is True`` but there are no output collections.
198 """
199 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
200 if args.output is not None:
201 self.output = _OutputChainedCollectionInfo(registry, args.output)
202 else:
203 self.output = None
204 if args.output_run is not None:
205 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
206 elif self.output is not None:
207 if args.extend_run:
208 runName, _ = self.output.chain[0]
209 else:
210 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now())
211 self.outputRun = _OutputRunCollectionInfo(registry, runName)
212 elif not writeable:
213 # If we're not writing yet, ok to have no output run.
214 self.outputRun = None
215 else:
216 raise ValueError("Cannot write without at least one of (--output, --output-run).")
217 self.inputs = list(CollectionSearch.fromExpression(args.input))
219 def check(self, args: argparse.Namespace):
220 """Check command-line options for consistency with each other and the
221 data repository.
223 Parameters
224 ----------
225 args : `argparse.Namespace`
226 Parsed command-line arguments. See class documentation for the
227 construction parameter of the same name.
228 """
229 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
230 if self.inputs and self.output is not None and self.output.exists:
231 raise ValueError("Cannot use --output with existing collection with --inputs.")
232 if args.extend_run and self.outputRun is None:
233 raise ValueError("Cannot --extend-run when no output collection is given.")
234 if args.extend_run and not self.outputRun.exists:
235 raise ValueError(f"Cannot --extend-run; output collection "
236 f"'{self.outputRun.name}' does not exist.")
237 if not args.extend_run and self.outputRun.exists:
238 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
239 f"--extend-run was not given.")
240 if args.prune_replaced and not args.replace_run:
241 raise ValueError(f"--prune-replaced requires --replace-run.")
242 if args.replace_run and (self.output is None or not self.output.exists):
243 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.")
245 @classmethod
246 def _makeReadParts(cls, args: argparse.Namespace):
247 """Common implementation for `makeReadButler` and
248 `makeRegistryAndCollections`.
250 Parameters
251 ----------
252 args : `argparse.Namespace`
253 Parsed command-line arguments. See class documentation for the
254 construction parameter of the same name.
256 Returns
257 -------
258 butler : `lsst.daf.butler.Butler`
259 A read-only butler constructed from the repo at
260 ``args.butler_config``, but with no default collections.
261 inputs : `lsst.daf.butler.registry.CollectionSearch`
262 A collection search path constructed according to ``args``.
263 self : `_ButlerFactory`
264 A new `_ButlerFactory` instance representing the processed version
265 of ``args``.
266 """
267 butler = Butler(args.butler_config, writeable=False)
268 self = cls(butler.registry, args, writeable=False)
269 self.check(args)
270 if self.output and self.output.exists:
271 if args.replace_run:
272 replaced, _ = self.output.chain[0]
273 inputs = self.output.chain[1:]
274 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
275 self.output.name, replaced)
276 else:
277 inputs = [self.output.name]
278 else:
279 inputs = list(self.inputs)
280 if args.extend_run:
281 inputs.insert(0, self.outputRun.name)
282 inputs = CollectionSearch.fromExpression(inputs)
283 return butler, inputs, self
285 @classmethod
286 def makeReadButler(cls, args: argparse.Namespace):
287 """Construct a read-only butler according to the given command-line
288 arguments.
290 Parameters
291 ----------
292 args : `argparse.Namespace`
293 Parsed command-line arguments. See class documentation for the
294 construction parameter of the same name.
296 Returns
297 -------
298 butler : `lsst.daf.butler.Butler`
299 A read-only butler initialized with the collections specified by
300 ``args``.
301 """
302 butler, inputs, _ = cls._makeReadParts(args)
303 _LOG.debug("Preparing butler to read from %s.", inputs)
304 return Butler(butler=butler, collections=inputs)
306 @classmethod
307 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch:
308 """Return a read-only registry, a collection search path, and the name
309 of the run to be used for future writes.
311 Parameters
312 ----------
313 args : `argparse.Namespace`
314 Parsed command-line arguments. See class documentation for the
315 construction parameter of the same name.
317 Returns
318 -------
319 registry : `lsst.daf.butler.Registry`
320 Butler registry that collections will be added to and/or queried
321 from.
322 inputs : `lsst.daf.butler.registry.CollectionSearch`
323 Collections to search for datasets.
324 run : `str` or `None`
325 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
326 if it already exists, or `None` if it does not.
327 """
328 butler, inputs, self = cls._makeReadParts(args)
329 run = self.outputRun.name if args.extend_run else None
330 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
331 return butler.registry, inputs, run
333 @classmethod
334 def makeWriteButler(cls, args: argparse.Namespace) -> Butler:
335 """Return a read-write butler initialized to write to and read from
336 the collections specified by the given command-line arguments.
338 Parameters
339 ----------
340 args : `argparse.Namespace`
341 Parsed command-line arguments. See class documentation for the
342 construction parameter of the same name.
344 Returns
345 -------
346 butler : `lsst.daf.butler.Butler`
347 A read-write butler initialized according to the given arguments.
348 """
349 butler = Butler(args.butler_config, writeable=True)
350 self = cls(butler.registry, args, writeable=True)
351 self.check(args)
352 if self.output is not None:
353 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
354 if args.replace_run:
355 replaced, _ = chainDefinition.pop(0)
356 if args.prune_replaced:
357 # TODO: DM-23671: need a butler API for pruning an
358 # entire RUN collection, then apply it to 'replaced'
359 # here.
360 raise NotImplementedError("Support for --prune-replaced is not yet implemented.")
361 chainDefinition.insert(0, self.outputRun.name)
362 chainDefinition = CollectionSearch.fromExpression(chainDefinition)
363 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
364 self.outputRun.name, self.output.name, chainDefinition)
365 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name,
366 chains={self.output.name: chainDefinition})
367 else:
368 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs)
369 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
370 return Butler(butler=butler, run=self.outputRun.name, collections=inputs)
372 output: Optional[_OutputChainedCollectionInfo]
373 """Information about the output chained collection, if there is or will be
374 one (`_OutputChainedCollectionInfo` or `None`).
375 """
377 outputRun: Optional[_OutputRunCollectionInfo]
378 """Information about the output run collection, if there is or will be
379 one (`_OutputRunCollectionInfo` or `None`).
380 """
382 inputs: List[Tuple[str, DatasetTypeRestriction]]
383 """Input collections, including those also used for outputs and any
384 restrictions on dataset types (`list`).
385 """
388class _FilteredStream:
389 """A file-like object that filters some config fields.
391 Note
392 ----
393 This class depends on implementation details of ``Config.saveToStream``
394 methods, in particular that that method uses single call to write()
395 method to save information about single config field, and that call
396 combines comments string(s) for a field and field path and value.
397 This class will not work reliably on the "import" strings, so imports
398 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
399 """
400 def __init__(self, pattern):
401 # obey case if pattern isn't lowercase or requests NOIGNORECASE
402 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
404 if mat:
405 pattern = mat.group(1)
406 self._pattern = re.compile(fnmatch.translate(pattern))
407 else:
408 if pattern != pattern.lower():
409 print(f"Matching \"{pattern}\" without regard to case "
410 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
411 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
413 def write(self, showStr):
414 # Strip off doc string line(s) and cut off at "=" for string matching
415 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
416 if self._pattern.search(matchStr):
417 sys.stdout.write(showStr)
419# ------------------------
420# Exported definitions --
421# ------------------------
424class CmdLineFwk:
425 """PipelineTask framework which executes tasks from command line.
427 In addition to executing tasks this activator provides additional methods
428 for task management like dumping configuration or execution chain.
429 """
431 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
433 def __init__(self):
434 pass
436 def parseAndRun(self, argv=None):
437 """
438 This method is a main entry point for this class, it parses command
439 line and executes all commands.
441 Parameters
442 ----------
443 argv : `list` of `str`, optional
444 list of command line arguments, if not specified then
445 `sys.argv[1:]` is used
446 """
448 if argv is None:
449 argv = sys.argv[1:]
451 # start with parsing command line, only do partial parsing now as
452 # the tasks can add more arguments later
453 parser = makeParser()
454 args = parser.parse_args(argv)
456 # First thing to do is to setup logging.
457 self.configLog(args.longlog, args.loglevel)
459 taskFactory = TaskFactory()
461 # make pipeline out of command line arguments (can return empty pipeline)
462 try:
463 pipeline = self.makePipeline(args)
464 except Exception as exc:
465 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
466 raise
468 if args.subcommand == "build":
469 # stop here but process --show option first
470 self.showInfo(args, pipeline)
471 return 0
473 # make quantum graph
474 try:
475 qgraph = self.makeGraph(pipeline, args)
476 except Exception as exc:
477 print("Failed to build graph: {}".format(exc), file=sys.stderr)
478 raise
480 # optionally dump some info
481 self.showInfo(args, pipeline, qgraph)
483 if qgraph is None:
484 # No need to raise an exception here, code that makes graph
485 # should have printed warning message already.
486 return 2
488 if args.subcommand == "qgraph":
489 # stop here
490 return 0
492 # execute
493 if args.subcommand == "run":
494 return self.runPipeline(qgraph, taskFactory, args)
496 @staticmethod
497 def configLog(longlog, logLevels):
498 """Configure logging system.
500 Parameters
501 ----------
502 longlog : `bool`
503 If True then make log messages appear in "long format"
504 logLevels : `list` of `tuple`
505 per-component logging levels, each item in the list is a tuple
506 (component, level), `component` is a logger name or `None` for root
507 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
508 """
509 if longlog:
510 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
511 else:
512 message_fmt = "%c %p: %m%n"
514 # global logging config
515 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
517 # Forward all Python logging to lsst.log
518 lgr = logging.getLogger()
519 lgr.setLevel(logging.INFO) # same as in log4cxx config above
520 lgr.addHandler(lsst.log.LogHandler())
522 # also capture warnings and send them to logging
523 logging.captureWarnings(True)
525 # configure individual loggers
526 for component, level in logLevels:
527 level = getattr(lsst.log.Log, level.upper(), None)
528 if level is not None:
529 # set logging level for lsst.log
530 logger = lsst.log.Log.getLogger(component or "")
531 logger.setLevel(level)
532 # set logging level for Python logging
533 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
534 logging.getLogger(component).setLevel(pyLevel)
536 def makePipeline(self, args):
537 """Build a pipeline from command line arguments.
539 Parameters
540 ----------
541 args : `argparse.Namespace`
542 Parsed command line
544 Returns
545 -------
546 pipeline : `~lsst.pipe.base.Pipeline`
547 """
548 if args.pipeline:
549 pipeline = Pipeline.fromFile(args.pipeline)
550 else:
551 pipeline = Pipeline("anonymous")
553 # loop over all pipeline actions and apply them in order
554 for action in args.pipeline_actions:
555 if action.action == "add_instrument":
557 pipeline.addInstrument(action.value)
559 elif action.action == "new_task":
561 pipeline.addTask(action.value, action.label)
563 elif action.action == "delete_task":
565 pipeline.removeTask(action.label)
567 elif action.action == "config":
569 # action value string is "field=value", split it at '='
570 field, _, value = action.value.partition("=")
571 pipeline.addConfigOverride(action.label, field, value)
573 elif action.action == "configfile":
575 pipeline.addConfigFile(action.label, action.value)
577 else:
579 raise ValueError(f"Unexpected pipeline action: {action.action}")
581 if args.save_pipeline:
582 pipeline.toFile(args.save_pipeline)
584 if args.pipeline_dot:
585 pipeline2dot(pipeline, args.pipeline_dot)
587 return pipeline
589 def makeGraph(self, pipeline, args):
590 """Build a graph from command line arguments.
592 Parameters
593 ----------
594 pipeline : `~lsst.pipe.base.Pipeline`
595 Pipeline, can be empty or ``None`` if graph is read from pickle
596 file.
597 args : `argparse.Namespace`
598 Parsed command line
600 Returns
601 -------
602 graph : `~lsst.pipe.base.QuantumGraph` or `None`
603 If resulting graph is empty then `None` is returned.
604 """
606 if args.qgraph:
608 with open(args.qgraph, 'rb') as pickleFile:
609 qgraph = pickle.load(pickleFile)
610 if not isinstance(qgraph, QuantumGraph):
611 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format(
612 type(qgraph)))
614 # pipeline can not be provided in this case
615 if pipeline:
616 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
618 else:
620 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
622 # make execution plan (a.k.a. DAG) for pipeline
623 graphBuilder = GraphBuilder(registry,
624 skipExisting=args.skip_existing)
625 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query)
627 # count quanta in graph and give a warning if it's empty and return None
628 nQuanta = qgraph.countQuanta()
629 if nQuanta == 0:
630 warnings.warn("QuantumGraph is empty", stacklevel=2)
631 return None
632 else:
633 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
634 nQuanta, len(qgraph))
636 if args.save_qgraph:
637 with open(args.save_qgraph, "wb") as pickleFile:
638 pickle.dump(qgraph, pickleFile)
640 if args.save_single_quanta:
641 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()):
642 filename = args.save_single_quanta.format(iq)
643 with open(filename, "wb") as pickleFile:
644 pickle.dump(sqgraph, pickleFile)
646 if args.qgraph_dot:
647 graph2dot(qgraph, args.qgraph_dot)
649 return qgraph
651 def runPipeline(self, graph, taskFactory, args, butler=None):
652 """Execute complete QuantumGraph.
654 Parameters
655 ----------
656 graph : `QuantumGraph`
657 Execution graph.
658 taskFactory : `~lsst.pipe.base.TaskFactory`
659 Task factory
660 args : `argparse.Namespace`
661 Parsed command line
662 butler : `~lsst.daf.butler.Butler`, optional
663 Data Butler instance, if not defined then new instance is made
664 using command line options.
665 """
666 # make butler instance
667 if butler is None:
668 butler = _ButlerFactory.makeWriteButler(args)
670 # Enable lsstDebug debugging. Note that this is done once in the
671 # main process before PreExecInit and it is also repeated before
672 # running each task in SingleQuantumExecutor (which may not be
673 # needed if `multipocessing` always uses fork start method).
674 if args.enableLsstDebug:
675 try:
676 _LOG.debug("Will try to import debug.py")
677 import debug # noqa:F401
678 except ImportError:
679 _LOG.warn("No 'debug' module found.")
681 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing)
682 preExecInit.initialize(graph,
683 saveInitOutputs=not args.skip_init_writes,
684 registerDatasetTypes=args.register_dataset_types,
685 saveVersions=not args.no_versions)
687 if not args.init_only:
688 graphFixup = self._importGraphFixup(args)
689 quantumExecutor = SingleQuantumExecutor(taskFactory,
690 skipExisting=args.skip_existing,
691 enableLsstDebug=args.enableLsstDebug)
692 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT,
693 quantumExecutor=quantumExecutor,
694 executionGraphFixup=graphFixup)
695 with util.profile(args.profile, _LOG):
696 executor.execute(graph, butler)
698 def showInfo(self, args, pipeline, graph=None):
699 """Display useful info about pipeline and environment.
701 Parameters
702 ----------
703 args : `argparse.Namespace`
704 Parsed command line
705 pipeline : `Pipeline`
706 Pipeline definition
707 graph : `QuantumGraph`, optional
708 Execution graph
709 """
710 showOpts = args.show
711 for what in showOpts:
712 showCommand, _, showArgs = what.partition("=")
714 if showCommand in ["pipeline", "config", "history", "tasks"]:
715 if not pipeline:
716 _LOG.warning("Pipeline is required for --show=%s", showCommand)
717 continue
719 if showCommand in ["graph", "workflow"]:
720 if not graph:
721 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
722 continue
724 if showCommand == "pipeline":
725 print(pipeline)
726 elif showCommand == "config":
727 self._showConfig(pipeline, showArgs, False)
728 elif showCommand == "dump-config":
729 self._showConfig(pipeline, showArgs, True)
730 elif showCommand == "history":
731 self._showConfigHistory(pipeline, showArgs)
732 elif showCommand == "tasks":
733 self._showTaskHierarchy(pipeline)
734 elif showCommand == "graph":
735 if graph:
736 self._showGraph(graph)
737 elif showCommand == "workflow":
738 if graph:
739 self._showWorkflow(graph, args)
740 else:
741 print("Unknown value for show: %s (choose from '%s')" %
742 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
743 file=sys.stderr)
744 sys.exit(1)
746 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
747 """Show task configuration
749 Parameters
750 ----------
751 pipeline : `Pipeline`
752 Pipeline definition
753 showArgs : `str`
754 Defines what to show
755 dumpFullConfig : `bool`
756 If true then dump complete task configuration with all imports.
757 """
758 stream = sys.stdout
759 if dumpFullConfig:
760 # Task label can be given with this option
761 taskName = showArgs
762 else:
763 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
764 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
765 taskName = matConfig.group(1)
766 pattern = matConfig.group(2)
767 if pattern:
768 stream = _FilteredStream(pattern)
770 tasks = util.filterTasks(pipeline, taskName)
771 if not tasks:
772 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
773 sys.exit(1)
775 for taskDef in tasks:
776 print("### Configuration for task `{}'".format(taskDef.label))
777 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
779 def _showConfigHistory(self, pipeline, showArgs):
780 """Show history for task configuration
782 Parameters
783 ----------
784 pipeline : `Pipeline`
785 Pipeline definition
786 showArgs : `str`
787 Defines what to show
788 """
790 taskName = None
791 pattern = None
792 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
793 if matHistory:
794 taskName = matHistory.group(1)
795 pattern = matHistory.group(2)
796 if not pattern:
797 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
798 sys.exit(1)
800 tasks = util.filterTasks(pipeline, taskName)
801 if not tasks:
802 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
803 sys.exit(1)
805 cpath, _, cname = pattern.rpartition(".")
806 found = False
807 for taskDef in tasks:
808 try:
809 if not cpath:
810 # looking for top-level field
811 hconfig = taskDef.config
812 else:
813 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
814 except AttributeError:
815 # Means this config object has no such field, but maybe some other task has it.
816 continue
817 except Exception:
818 # Any other exception probably means some error in the expression.
819 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
820 sys.exit(1)
822 if hasattr(hconfig, cname):
823 print(f"### Configuration field for task `{taskDef.label}'")
824 print(pexConfig.history.format(hconfig, cname))
825 found = True
827 if not found:
828 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
829 sys.exit(1)
831 def _showTaskHierarchy(self, pipeline):
832 """Print task hierarchy to stdout
834 Parameters
835 ----------
836 pipeline: `Pipeline`
837 """
838 for taskDef in pipeline.toExpandedPipeline():
839 print("### Subtasks for task `{}'".format(taskDef.taskName))
841 for configName, taskName in util.subTaskIter(taskDef.config):
842 print("{}: {}".format(configName, taskName))
844 def _showGraph(self, graph):
845 """Print quanta information to stdout
847 Parameters
848 ----------
849 graph : `QuantumGraph`
850 Execution graph.
851 """
852 for taskNodes in graph:
853 print(taskNodes.taskDef)
855 for iq, quantum in enumerate(taskNodes.quanta):
856 print(" Quantum {}:".format(iq))
857 print(" inputs:")
858 for key, refs in quantum.predictedInputs.items():
859 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
860 print(" {}: [{}]".format(key, ", ".join(dataIds)))
861 print(" outputs:")
862 for key, refs in quantum.outputs.items():
863 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
864 print(" {}: [{}]".format(key, ", ".join(dataIds)))
866 def _showWorkflow(self, graph, args):
867 """Print quanta information and dependency to stdout
869 The input and predicted output URIs based on the Butler repo are printed.
871 Parameters
872 ----------
873 graph : `QuantumGraph`
874 Execution graph.
875 args : `argparse.Namespace`
876 Parsed command line
877 """
878 butler = _ButlerFactory.makeReadButler(args)
879 hashToParent = {}
880 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
881 shortname = taskDef.taskName.split('.')[-1]
882 print("Quantum {}: {}".format(iq, shortname))
883 print(" inputs:")
884 for key, refs in quantum.predictedInputs.items():
885 for ref in refs:
886 if butler.datastore.exists(ref):
887 print(" {}".format(butler.datastore.getUri(ref)))
888 else:
889 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
890 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
891 print(" outputs:")
892 for key, refs in quantum.outputs.items():
893 for ref in refs:
894 if butler.datastore.exists(ref):
895 print(" {}".format(butler.datastore.getUri(ref)))
896 else:
897 fakeRef = DatasetRef(ref.datasetType, ref.dataId)
898 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
899 # Store hash to figure out dependency
900 dhash = hash((key, ref.dataId))
901 hashToParent[dhash] = iq
903 uses = set()
904 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
905 for key, refs in quantum.predictedInputs.items():
906 for ref in refs:
907 dhash = hash((key, ref.dataId))
908 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses:
909 parentIq = hashToParent[dhash]
910 uses.add((iq, parentIq)) # iq uses parentIq
911 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))
913 def _importGraphFixup(self, args):
914 """Import/instantiate graph fixup object.
916 Parameters
917 ----------
918 args : `argparse.Namespace`
919 Parsed command line.
921 Returns
922 -------
923 fixup : `ExecutionGraphFixup` or `None`
925 Raises
926 ------
927 ValueError
928 Raised if import fails, method call raises exception, or returned
929 instance has unexpected type.
930 """
931 if args.graph_fixup:
932 try:
933 factory = doImport(args.graph_fixup)
934 except Exception as exc:
935 raise ValueError("Failed to import graph fixup class/method") from exc
936 try:
937 fixup = factory()
938 except Exception as exc:
939 raise ValueError("Failed to make instance of graph fixup") from exc
940 if not isinstance(fixup, ExecutionGraphFixup):
941 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
942 return fixup