Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import datetime
32import fnmatch
33import logging
34import os
35import re
36import sys
37from typing import List, Optional, Tuple
38import warnings
40# -----------------------------
41# Imports for other modules --
42# -----------------------------
43from lsst.daf.butler import (
44 Butler,
45 CollectionSearch,
46 CollectionType,
47 DatasetTypeRestriction,
48 Registry,
49)
50from lsst.daf.butler.registry import MissingCollectionError
51import lsst.log
52import lsst.pex.config as pexConfig
53from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
54from .cmdLineParser import makeParser
55from .dotTools import graph2dot, pipeline2dot
56from .executionGraphFixup import ExecutionGraphFixup
57from .mpGraphExecutor import MPGraphExecutor
58from .preExecInit import PreExecInit
59from .singleQuantumExecutor import SingleQuantumExecutor
60from .taskFactory import TaskFactory
61from . import util
62from lsst.utils import doImport
64# ----------------------------------
65# Local non-exported definitions --
66# ----------------------------------
68# logging properties
69_LOG_PROP = """\
70log4j.rootLogger=INFO, A1
71log4j.appender.A1=ConsoleAppender
72log4j.appender.A1.Target=System.err
73log4j.appender.A1.layout=PatternLayout
74log4j.appender.A1.layout.ConversionPattern={}
75"""
77_LOG = logging.getLogger(__name__.partition(".")[2])
80class _OutputChainedCollectionInfo:
81 """A helper class for handling command-line arguments related to an output
82 `~lsst.daf.butler.CollectionType.CHAINED` collection.
84 Parameters
85 ----------
86 registry : `lsst.daf.butler.Registry`
87 Butler registry that collections will be added to and/or queried from.
88 name : `str`
89 Name of the collection given on the command line.
90 """
91 def __init__(self, registry: Registry, name: str):
92 self.name = name
93 try:
94 self.chain = list(registry.getCollectionChain(name))
95 self.exists = True
96 except MissingCollectionError:
97 self.chain = []
98 self.exists = False
100 def __str__(self):
101 return self.name
103 name: str
104 """Name of the collection provided on the command line (`str`).
105 """
107 exists: bool
108 """Whether this collection already exists in the registry (`bool`).
109 """
111 chain: List[Tuple[str, DatasetTypeRestriction]]
112 """The definition of the collection, if it already exists (`list`).
114 Empty if the collection does not alredy exist.
115 """
118class _OutputRunCollectionInfo:
119 """A helper class for handling command-line arguments related to an output
120 `~lsst.daf.butler.CollectionType.RUN` collection.
122 Parameters
123 ----------
124 registry : `lsst.daf.butler.Registry`
125 Butler registry that collections will be added to and/or queried from.
126 name : `str`
127 Name of the collection given on the command line.
128 """
129 def __init__(self, registry: Registry, name: str):
130 self.name = name
131 try:
132 actualType = registry.getCollectionType(name)
133 if actualType is not CollectionType.RUN:
134 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
135 self.exists = True
136 except MissingCollectionError:
137 self.exists = False
139 name: str
140 """Name of the collection provided on the command line (`str`).
141 """
143 exists: bool
144 """Whether this collection already exists in the registry (`bool`).
145 """
148class _ButlerFactory:
149 """A helper class for processing command-line arguments related to input
150 and output collections.
152 Parameters
153 ----------
154 registry : `lsst.daf.butler.Registry`
155 Butler registry that collections will be added to and/or queried from.
157 args : `argparse.Namespace`
158 Parsed command-line arguments. The following attributes are used,
159 either at construction or in later methods.
161 ``output``
162 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
163 input/output collection.
165 ``output_run``
166 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
167 collection.
169 ``extend_run``
170 A boolean indicating whether ``output_run`` should already exist
171 and be extended.
173 ``replace_run``
174 A boolean indicating that (if `True`) ``output_run`` should already
175 exist but will be removed from the output chained collection and
176 replaced with a new one.
178 ``prune_replaced``
179 A boolean indicating whether to prune the replaced run (requires
180 ``replace_run``).
182 ``inputs``
183 Input collections of any type; may be any type handled by
184 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
186 ``butler_config``
187 Path to a data repository root or configuration file.
189 writeable : `bool`
190 If `True`, a `Butler` is being initialized in a context where actual
191 writes should happens, and hence no output run is necessary.
193 Raises
194 ------
195 ValueError
196 Raised if ``writeable is True`` but there are no output collections.
197 """
198 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
199 if args.output is not None:
200 self.output = _OutputChainedCollectionInfo(registry, args.output)
201 else:
202 self.output = None
203 if args.output_run is not None:
204 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
205 elif self.output is not None:
206 if args.extend_run:
207 runName, _ = self.output.chain[0]
208 else:
209 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now())
210 self.outputRun = _OutputRunCollectionInfo(registry, runName)
211 elif not writeable:
212 # If we're not writing yet, ok to have no output run.
213 self.outputRun = None
214 else:
215 raise ValueError("Cannot write without at least one of (--output, --output-run).")
216 self.inputs = list(CollectionSearch.fromExpression(args.input)) if args.input else []
218 def check(self, args: argparse.Namespace):
219 """Check command-line options for consistency with each other and the
220 data repository.
222 Parameters
223 ----------
224 args : `argparse.Namespace`
225 Parsed command-line arguments. See class documentation for the
226 construction parameter of the same name.
227 """
228 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
229 if self.inputs and self.output is not None and self.output.exists:
230 raise ValueError("Cannot use --output with existing collection with --inputs.")
231 if args.extend_run and self.outputRun is None:
232 raise ValueError("Cannot --extend-run when no output collection is given.")
233 if args.extend_run and not self.outputRun.exists:
234 raise ValueError(f"Cannot --extend-run; output collection "
235 f"'{self.outputRun.name}' does not exist.")
236 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
237 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
238 f"--extend-run was not given.")
239 if args.prune_replaced and not args.replace_run:
240 raise ValueError("--prune-replaced requires --replace-run.")
241 if args.replace_run and (self.output is None or not self.output.exists):
242 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
244 @classmethod
245 def _makeReadParts(cls, args: argparse.Namespace):
246 """Common implementation for `makeReadButler` and
247 `makeRegistryAndCollections`.
249 Parameters
250 ----------
251 args : `argparse.Namespace`
252 Parsed command-line arguments. See class documentation for the
253 construction parameter of the same name.
255 Returns
256 -------
257 butler : `lsst.daf.butler.Butler`
258 A read-only butler constructed from the repo at
259 ``args.butler_config``, but with no default collections.
260 inputs : `lsst.daf.butler.registry.CollectionSearch`
261 A collection search path constructed according to ``args``.
262 self : `_ButlerFactory`
263 A new `_ButlerFactory` instance representing the processed version
264 of ``args``.
265 """
266 butler = Butler(args.butler_config, writeable=False)
267 self = cls(butler.registry, args, writeable=False)
268 self.check(args)
269 if self.output and self.output.exists:
270 if args.replace_run:
271 replaced, _ = self.output.chain[0]
272 inputs = self.output.chain[1:]
273 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
274 self.output.name, replaced)
275 else:
276 inputs = [self.output.name]
277 else:
278 inputs = list(self.inputs)
279 if args.extend_run:
280 inputs.insert(0, self.outputRun.name)
281 inputs = CollectionSearch.fromExpression(inputs)
282 return butler, inputs, self
284 @classmethod
285 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
286 """Construct a read-only butler according to the given command-line
287 arguments.
289 Parameters
290 ----------
291 args : `argparse.Namespace`
292 Parsed command-line arguments. See class documentation for the
293 construction parameter of the same name.
295 Returns
296 -------
297 butler : `lsst.daf.butler.Butler`
298 A read-only butler initialized with the collections specified by
299 ``args``.
300 """
301 butler, inputs, _ = cls._makeReadParts(args)
302 _LOG.debug("Preparing butler to read from %s.", inputs)
303 return Butler(butler=butler, collections=inputs)
305 @classmethod
306 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \
307 Tuple[Registry, CollectionSearch, Optional[str]]:
308 """Return a read-only registry, a collection search path, and the name
309 of the run to be used for future writes.
311 Parameters
312 ----------
313 args : `argparse.Namespace`
314 Parsed command-line arguments. See class documentation for the
315 construction parameter of the same name.
317 Returns
318 -------
319 registry : `lsst.daf.butler.Registry`
320 Butler registry that collections will be added to and/or queried
321 from.
322 inputs : `lsst.daf.butler.registry.CollectionSearch`
323 Collections to search for datasets.
324 run : `str` or `None`
325 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
326 if it already exists, or `None` if it does not.
327 """
328 butler, inputs, self = cls._makeReadParts(args)
329 run = self.outputRun.name if args.extend_run else None
330 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
331 return butler.registry, inputs, run
333 @classmethod
334 def makeWriteButler(cls, args: argparse.Namespace) -> Butler:
335 """Return a read-write butler initialized to write to and read from
336 the collections specified by the given command-line arguments.
338 Parameters
339 ----------
340 args : `argparse.Namespace`
341 Parsed command-line arguments. See class documentation for the
342 construction parameter of the same name.
344 Returns
345 -------
346 butler : `lsst.daf.butler.Butler`
347 A read-write butler initialized according to the given arguments.
348 """
349 butler = Butler(args.butler_config, writeable=True)
350 self = cls(butler.registry, args, writeable=True)
351 self.check(args)
352 if self.output is not None:
353 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
354 if args.replace_run:
355 replaced, _ = chainDefinition.pop(0)
356 if args.prune_replaced == "unstore":
357 # Remove datasets from datastore
358 with butler.transaction():
359 refs = butler.registry.queryDatasets(..., collections=replaced)
360 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False)
361 elif args.prune_replaced == "purge":
362 # Erase entire collection and all datasets, need to remove
363 # collection from its chain collection first.
364 with butler.transaction():
365 butler.registry.setCollectionChain(self.output.name, chainDefinition)
366 butler.pruneCollection(replaced, purge=True, unstore=True)
367 elif args.prune_replaced is not None:
368 raise NotImplementedError(
369 f"Unsupported --prune-replaced option '{args.prune_replaced}'."
370 )
371 chainDefinition.insert(0, self.outputRun.name)
372 chainDefinition = CollectionSearch.fromExpression(chainDefinition)
373 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
374 self.outputRun.name, self.output.name, chainDefinition)
375 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name,
376 chains={self.output.name: chainDefinition})
377 else:
378 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs)
379 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
380 return Butler(butler=butler, run=self.outputRun.name, collections=inputs)
382 output: Optional[_OutputChainedCollectionInfo]
383 """Information about the output chained collection, if there is or will be
384 one (`_OutputChainedCollectionInfo` or `None`).
385 """
387 outputRun: Optional[_OutputRunCollectionInfo]
388 """Information about the output run collection, if there is or will be
389 one (`_OutputRunCollectionInfo` or `None`).
390 """
392 inputs: List[Tuple[str, DatasetTypeRestriction]]
393 """Input collections, including those also used for outputs and any
394 restrictions on dataset types (`list`).
395 """
398class _FilteredStream:
399 """A file-like object that filters some config fields.
401 Note
402 ----
403 This class depends on implementation details of ``Config.saveToStream``
404 methods, in particular that that method uses single call to write()
405 method to save information about single config field, and that call
406 combines comments string(s) for a field and field path and value.
407 This class will not work reliably on the "import" strings, so imports
408 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
409 """
410 def __init__(self, pattern):
411 # obey case if pattern isn't lowercase or requests NOIGNORECASE
412 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
414 if mat:
415 pattern = mat.group(1)
416 self._pattern = re.compile(fnmatch.translate(pattern))
417 else:
418 if pattern != pattern.lower():
419 print(f"Matching \"{pattern}\" without regard to case "
420 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
421 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
423 def write(self, showStr):
424 # Strip off doc string line(s) and cut off at "=" for string matching
425 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
426 if self._pattern.search(matchStr):
427 sys.stdout.write(showStr)
429# ------------------------
430# Exported definitions --
431# ------------------------
434class CmdLineFwk:
435 """PipelineTask framework which executes tasks from command line.
437 In addition to executing tasks this activator provides additional methods
438 for task management like dumping configuration or execution chain.
439 """
441 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
443 def __init__(self):
444 pass
446 def parseAndRun(self, argv=None):
447 """
448 This method is a main entry point for this class, it parses command
449 line and executes all commands.
451 Parameters
452 ----------
453 argv : `list` of `str`, optional
454 list of command line arguments, if not specified then
455 `sys.argv[1:]` is used
456 """
458 if argv is None:
459 argv = sys.argv[1:]
461 # start with parsing command line, only do partial parsing now as
462 # the tasks can add more arguments later
463 parser = makeParser()
464 args = parser.parse_args(argv)
466 # First thing to do is to setup logging.
467 self.configLog(args.longlog, args.loglevel)
469 taskFactory = TaskFactory()
471 # make pipeline out of command line arguments (can return empty pipeline)
472 try:
473 pipeline = self.makePipeline(args)
474 except Exception as exc:
475 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
476 raise
478 if args.subcommand == "build":
479 # stop here but process --show option first
480 self.showInfo(args, pipeline)
481 return 0
483 # make quantum graph
484 try:
485 qgraph = self.makeGraph(pipeline, args)
486 except Exception as exc:
487 print("Failed to build graph: {}".format(exc), file=sys.stderr)
488 raise
490 # optionally dump some info
491 self.showInfo(args, pipeline, qgraph)
493 if qgraph is None:
494 # No need to raise an exception here, code that makes graph
495 # should have printed warning message already.
496 return 2
498 if args.subcommand == "qgraph":
499 # stop here
500 return 0
502 # execute
503 if args.subcommand == "run":
504 return self.runPipeline(qgraph, taskFactory, args)
506 @staticmethod
507 def configLog(longlog, logLevels):
508 """Configure logging system.
510 Parameters
511 ----------
512 longlog : `bool`
513 If True then make log messages appear in "long format"
514 logLevels : `list` of `tuple`
515 per-component logging levels, each item in the list is a tuple
516 (component, level), `component` is a logger name or `None` for root
517 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
518 """
519 if longlog:
520 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
521 else:
522 message_fmt = "%c %p: %m%n"
524 # Initialize global logging config. Skip if the env var LSST_LOG_CONFIG exists.
525 # The file it points to would already configure lsst.log.
526 if not os.path.isfile(os.environ.get("LSST_LOG_CONFIG", "")):
527 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
529 # Forward all Python logging to lsst.log
530 lgr = logging.getLogger()
531 lgr.setLevel(logging.INFO) # same as in log4cxx config above
532 lgr.addHandler(lsst.log.LogHandler())
534 # also capture warnings and send them to logging
535 logging.captureWarnings(True)
537 # configure individual loggers
538 for component, level in logLevels:
539 level = getattr(lsst.log.Log, level.upper(), None)
540 if level is not None:
541 # set logging level for lsst.log
542 logger = lsst.log.Log.getLogger(component or "")
543 logger.setLevel(level)
544 # set logging level for Python logging
545 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
546 logging.getLogger(component).setLevel(pyLevel)
548 def makePipeline(self, args):
549 """Build a pipeline from command line arguments.
551 Parameters
552 ----------
553 args : `argparse.Namespace`
554 Parsed command line
556 Returns
557 -------
558 pipeline : `~lsst.pipe.base.Pipeline`
559 """
560 if args.pipeline:
561 pipeline = Pipeline.fromFile(args.pipeline)
562 else:
563 pipeline = Pipeline("anonymous")
565 # loop over all pipeline actions and apply them in order
566 for action in args.pipeline_actions:
567 if action.action == "add_instrument":
569 pipeline.addInstrument(action.value)
571 elif action.action == "new_task":
573 pipeline.addTask(action.value, action.label)
575 elif action.action == "delete_task":
577 pipeline.removeTask(action.label)
579 elif action.action == "config":
581 # action value string is "field=value", split it at '='
582 field, _, value = action.value.partition("=")
583 pipeline.addConfigOverride(action.label, field, value)
585 elif action.action == "configfile":
587 pipeline.addConfigFile(action.label, action.value)
589 else:
591 raise ValueError(f"Unexpected pipeline action: {action.action}")
593 if args.save_pipeline:
594 pipeline.toFile(args.save_pipeline)
596 if args.pipeline_dot:
597 pipeline2dot(pipeline, args.pipeline_dot)
599 return pipeline
601 def makeGraph(self, pipeline, args):
602 """Build a graph from command line arguments.
604 Parameters
605 ----------
606 pipeline : `~lsst.pipe.base.Pipeline`
607 Pipeline, can be empty or ``None`` if graph is read from a file.
608 args : `argparse.Namespace`
609 Parsed command line
611 Returns
612 -------
613 graph : `~lsst.pipe.base.QuantumGraph` or `None`
614 If resulting graph is empty then `None` is returned.
615 """
617 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
619 if args.qgraph:
621 with open(args.qgraph, 'rb') as pickleFile:
622 qgraph = QuantumGraph.load(pickleFile, registry.dimensions)
624 # pipeline can not be provided in this case
625 if pipeline:
626 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
628 else:
630 # make execution plan (a.k.a. DAG) for pipeline
631 graphBuilder = GraphBuilder(registry,
632 skipExisting=args.skip_existing)
633 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query)
635 # count quanta in graph and give a warning if it's empty and return None
636 nQuanta = len(qgraph)
637 if nQuanta == 0:
638 warnings.warn("QuantumGraph is empty", stacklevel=2)
639 return None
640 else:
641 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
642 nQuanta, len(qgraph.taskGraph))
644 if args.save_qgraph:
645 with open(args.save_qgraph, "wb") as pickleFile:
646 qgraph.save(pickleFile)
648 if args.save_single_quanta:
649 for iq, quantumNode in enumerate(qgraph):
650 sqgraph = qgraph.subset(quantumNode)
651 filename = args.save_single_quanta.format(iq)
652 with open(filename, "wb") as pickleFile:
653 sqgraph.save(pickleFile)
655 if args.qgraph_dot:
656 graph2dot(qgraph, args.qgraph_dot)
658 return qgraph
660 def runPipeline(self, graph, taskFactory, args, butler=None):
661 """Execute complete QuantumGraph.
663 Parameters
664 ----------
665 graph : `QuantumGraph`
666 Execution graph.
667 taskFactory : `~lsst.pipe.base.TaskFactory`
668 Task factory
669 args : `argparse.Namespace`
670 Parsed command line
671 butler : `~lsst.daf.butler.Butler`, optional
672 Data Butler instance, if not defined then new instance is made
673 using command line options.
674 """
675 # make butler instance
676 if butler is None:
677 butler = _ButlerFactory.makeWriteButler(args)
679 # Enable lsstDebug debugging. Note that this is done once in the
680 # main process before PreExecInit and it is also repeated before
681 # running each task in SingleQuantumExecutor (which may not be
682 # needed if `multipocessing` always uses fork start method).
683 if args.enableLsstDebug:
684 try:
685 _LOG.debug("Will try to import debug.py")
686 import debug # noqa:F401
687 except ImportError:
688 _LOG.warn("No 'debug' module found.")
690 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing)
691 preExecInit.initialize(graph,
692 saveInitOutputs=not args.skip_init_writes,
693 registerDatasetTypes=args.register_dataset_types,
694 saveVersions=not args.no_versions)
696 if not args.init_only:
697 graphFixup = self._importGraphFixup(args)
698 quantumExecutor = SingleQuantumExecutor(taskFactory,
699 skipExisting=args.skip_existing,
700 clobberPartialOutputs=args.clobber_partial_outputs,
701 enableLsstDebug=args.enableLsstDebug)
702 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
703 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout,
704 quantumExecutor=quantumExecutor,
705 failFast=args.fail_fast,
706 executionGraphFixup=graphFixup)
707 with util.profile(args.profile, _LOG):
708 executor.execute(graph, butler)
710 def showInfo(self, args, pipeline, graph=None):
711 """Display useful info about pipeline and environment.
713 Parameters
714 ----------
715 args : `argparse.Namespace`
716 Parsed command line
717 pipeline : `Pipeline`
718 Pipeline definition
719 graph : `QuantumGraph`, optional
720 Execution graph
721 """
722 showOpts = args.show
723 for what in showOpts:
724 showCommand, _, showArgs = what.partition("=")
726 if showCommand in ["pipeline", "config", "history", "tasks"]:
727 if not pipeline:
728 _LOG.warning("Pipeline is required for --show=%s", showCommand)
729 continue
731 if showCommand in ["graph", "workflow"]:
732 if not graph:
733 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
734 continue
736 if showCommand == "pipeline":
737 print(pipeline)
738 elif showCommand == "config":
739 self._showConfig(pipeline, showArgs, False)
740 elif showCommand == "dump-config":
741 self._showConfig(pipeline, showArgs, True)
742 elif showCommand == "history":
743 self._showConfigHistory(pipeline, showArgs)
744 elif showCommand == "tasks":
745 self._showTaskHierarchy(pipeline)
746 elif showCommand == "graph":
747 if graph:
748 self._showGraph(graph)
749 elif showCommand == "workflow":
750 if graph:
751 self._showWorkflow(graph, args)
752 else:
753 print("Unknown value for show: %s (choose from '%s')" %
754 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
755 file=sys.stderr)
756 sys.exit(1)
758 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
759 """Show task configuration
761 Parameters
762 ----------
763 pipeline : `Pipeline`
764 Pipeline definition
765 showArgs : `str`
766 Defines what to show
767 dumpFullConfig : `bool`
768 If true then dump complete task configuration with all imports.
769 """
770 stream = sys.stdout
771 if dumpFullConfig:
772 # Task label can be given with this option
773 taskName = showArgs
774 else:
775 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
776 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
777 taskName = matConfig.group(1)
778 pattern = matConfig.group(2)
779 if pattern:
780 stream = _FilteredStream(pattern)
782 tasks = util.filterTasks(pipeline, taskName)
783 if not tasks:
784 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
785 sys.exit(1)
787 for taskDef in tasks:
788 print("### Configuration for task `{}'".format(taskDef.label))
789 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
791 def _showConfigHistory(self, pipeline, showArgs):
792 """Show history for task configuration
794 Parameters
795 ----------
796 pipeline : `Pipeline`
797 Pipeline definition
798 showArgs : `str`
799 Defines what to show
800 """
802 taskName = None
803 pattern = None
804 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
805 if matHistory:
806 taskName = matHistory.group(1)
807 pattern = matHistory.group(2)
808 if not pattern:
809 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
810 sys.exit(1)
812 tasks = util.filterTasks(pipeline, taskName)
813 if not tasks:
814 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
815 sys.exit(1)
817 cpath, _, cname = pattern.rpartition(".")
818 found = False
819 for taskDef in tasks:
820 try:
821 if not cpath:
822 # looking for top-level field
823 hconfig = taskDef.config
824 else:
825 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
826 except AttributeError:
827 # Means this config object has no such field, but maybe some other task has it.
828 continue
829 except Exception:
830 # Any other exception probably means some error in the expression.
831 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
832 sys.exit(1)
834 if hasattr(hconfig, cname):
835 print(f"### Configuration field for task `{taskDef.label}'")
836 print(pexConfig.history.format(hconfig, cname))
837 found = True
839 if not found:
840 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
841 sys.exit(1)
843 def _showTaskHierarchy(self, pipeline):
844 """Print task hierarchy to stdout
846 Parameters
847 ----------
848 pipeline: `Pipeline`
849 """
850 for taskDef in pipeline.toExpandedPipeline():
851 print("### Subtasks for task `{}'".format(taskDef.taskName))
853 for configName, taskName in util.subTaskIter(taskDef.config):
854 print("{}: {}".format(configName, taskName))
856 def _showGraph(self, graph):
857 """Print quanta information to stdout
859 Parameters
860 ----------
861 graph : `QuantumGraph`
862 Execution graph.
863 """
864 for taskNode in graph.taskGraph:
865 print(taskNode)
867 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
868 print(" Quantum {}:".format(iq))
869 print(" inputs:")
870 for key, refs in quantum.inputs.items():
871 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
872 print(" {}: [{}]".format(key, ", ".join(dataIds)))
873 print(" outputs:")
874 for key, refs in quantum.outputs.items():
875 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
876 print(" {}: [{}]".format(key, ", ".join(dataIds)))
878 def _showWorkflow(self, graph, args):
879 """Print quanta information and dependency to stdout
881 The input and predicted output URIs based on the Butler repo are printed.
883 Parameters
884 ----------
885 graph : `QuantumGraph`
886 Execution graph.
887 args : `argparse.Namespace`
888 Parsed command line
889 """
890 def dumpURIs(thisRef):
891 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
892 if primary:
893 print(f" {primary}")
894 else:
895 print(" (disassembled artifact)")
896 for compName, compUri in components.items():
897 print(f" {compName}: {compUri}")
899 butler = _ButlerFactory.makeReadButler(args)
900 for qdata in graph.traverse():
901 shortname = qdata.taskDef.taskName.split('.')[-1]
902 print("Quantum {}: {}".format(qdata.index, shortname))
903 print(" inputs:")
904 for key, refs in qdata.quantum.inputs.items():
905 for ref in refs:
906 dumpURIs(ref)
907 print(" outputs:")
908 for key, refs in qdata.quantum.outputs.items():
909 for ref in refs:
910 dumpURIs(ref)
911 for parent in qdata.dependencies:
912 print("Parent Quantum {} - Child Quantum {}".format(parent, qdata.index))
914 def _importGraphFixup(self, args):
915 """Import/instantiate graph fixup object.
917 Parameters
918 ----------
919 args : `argparse.Namespace`
920 Parsed command line.
922 Returns
923 -------
924 fixup : `ExecutionGraphFixup` or `None`
926 Raises
927 ------
928 ValueError
929 Raised if import fails, method call raises exception, or returned
930 instance has unexpected type.
931 """
932 if args.graph_fixup:
933 try:
934 factory = doImport(args.graph_fixup)
935 except Exception as exc:
936 raise ValueError("Failed to import graph fixup class/method") from exc
937 try:
938 fixup = factory()
939 except Exception as exc:
940 raise ValueError("Failed to make instance of graph fixup") from exc
941 if not isinstance(fixup, ExecutionGraphFixup):
942 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
943 return fixup