Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import datetime
32import fnmatch
33import logging
34import re
35import sys
36from typing import List, Optional, Tuple
37import warnings
39# -----------------------------
40# Imports for other modules --
41# -----------------------------
42from lsst.daf.butler import (
43 Butler,
44 CollectionSearch,
45 CollectionType,
46 DatasetTypeRestriction,
47 Registry,
48)
49from lsst.daf.butler.registry import MissingCollectionError
50import lsst.log
51import lsst.pex.config as pexConfig
52from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
53from .cmdLineParser import makeParser
54from .dotTools import graph2dot, pipeline2dot
55from .executionGraphFixup import ExecutionGraphFixup
56from .mpGraphExecutor import MPGraphExecutor
57from .preExecInit import PreExecInit
58from .singleQuantumExecutor import SingleQuantumExecutor
59from .taskFactory import TaskFactory
60from . import util
61from lsst.utils import doImport
63# ----------------------------------
64# Local non-exported definitions --
65# ----------------------------------
67# logging properties
68_LOG_PROP = """\
69log4j.rootLogger=INFO, A1
70log4j.appender.A1=ConsoleAppender
71log4j.appender.A1.Target=System.err
72log4j.appender.A1.layout=PatternLayout
73log4j.appender.A1.layout.ConversionPattern={}
74"""
76_LOG = logging.getLogger(__name__.partition(".")[2])
79class _OutputChainedCollectionInfo:
80 """A helper class for handling command-line arguments related to an output
81 `~lsst.daf.butler.CollectionType.CHAINED` collection.
83 Parameters
84 ----------
85 registry : `lsst.daf.butler.Registry`
86 Butler registry that collections will be added to and/or queried from.
87 name : `str`
88 Name of the collection given on the command line.
89 """
90 def __init__(self, registry: Registry, name: str):
91 self.name = name
92 try:
93 self.chain = list(registry.getCollectionChain(name))
94 self.exists = True
95 except MissingCollectionError:
96 self.chain = []
97 self.exists = False
99 def __str__(self):
100 return self.name
102 name: str
103 """Name of the collection provided on the command line (`str`).
104 """
106 exists: bool
107 """Whether this collection already exists in the registry (`bool`).
108 """
110 chain: List[Tuple[str, DatasetTypeRestriction]]
111 """The definition of the collection, if it already exists (`list`).
113 Empty if the collection does not alredy exist.
114 """
117class _OutputRunCollectionInfo:
118 """A helper class for handling command-line arguments related to an output
119 `~lsst.daf.butler.CollectionType.RUN` collection.
121 Parameters
122 ----------
123 registry : `lsst.daf.butler.Registry`
124 Butler registry that collections will be added to and/or queried from.
125 name : `str`
126 Name of the collection given on the command line.
127 """
128 def __init__(self, registry: Registry, name: str):
129 self.name = name
130 try:
131 actualType = registry.getCollectionType(name)
132 if actualType is not CollectionType.RUN:
133 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
134 self.exists = True
135 except MissingCollectionError:
136 self.exists = False
138 name: str
139 """Name of the collection provided on the command line (`str`).
140 """
142 exists: bool
143 """Whether this collection already exists in the registry (`bool`).
144 """
147class _ButlerFactory:
148 """A helper class for processing command-line arguments related to input
149 and output collections.
151 Parameters
152 ----------
153 registry : `lsst.daf.butler.Registry`
154 Butler registry that collections will be added to and/or queried from.
156 args : `argparse.Namespace`
157 Parsed command-line arguments. The following attributes are used,
158 either at construction or in later methods.
160 ``output``
161 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
162 input/output collection.
164 ``output_run``
165 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
166 collection.
168 ``extend_run``
169 A boolean indicating whether ``output_run`` should already exist
170 and be extended.
172 ``replace_run``
173 A boolean indicating that (if `True`) ``output_run`` should already
174 exist but will be removed from the output chained collection and
175 replaced with a new one.
177 ``prune_replaced``
178 A boolean indicating whether to prune the replaced run (requires
179 ``replace_run``).
181 ``inputs``
182 Input collections of any type; may be any type handled by
183 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
185 ``butler_config``
186 Path to a data repository root or configuration file.
188 writeable : `bool`
189 If `True`, a `Butler` is being initialized in a context where actual
190 writes should happens, and hence no output run is necessary.
192 Raises
193 ------
194 ValueError
195 Raised if ``writeable is True`` but there are no output collections.
196 """
197 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
198 if args.output is not None:
199 self.output = _OutputChainedCollectionInfo(registry, args.output)
200 else:
201 self.output = None
202 if args.output_run is not None:
203 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
204 elif self.output is not None:
205 if args.extend_run:
206 runName, _ = self.output.chain[0]
207 else:
208 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now())
209 self.outputRun = _OutputRunCollectionInfo(registry, runName)
210 elif not writeable:
211 # If we're not writing yet, ok to have no output run.
212 self.outputRun = None
213 else:
214 raise ValueError("Cannot write without at least one of (--output, --output-run).")
215 self.inputs = list(CollectionSearch.fromExpression(args.input))
217 def check(self, args: argparse.Namespace):
218 """Check command-line options for consistency with each other and the
219 data repository.
221 Parameters
222 ----------
223 args : `argparse.Namespace`
224 Parsed command-line arguments. See class documentation for the
225 construction parameter of the same name.
226 """
227 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
228 if self.inputs and self.output is not None and self.output.exists:
229 raise ValueError("Cannot use --output with existing collection with --inputs.")
230 if args.extend_run and self.outputRun is None:
231 raise ValueError("Cannot --extend-run when no output collection is given.")
232 if args.extend_run and not self.outputRun.exists:
233 raise ValueError(f"Cannot --extend-run; output collection "
234 f"'{self.outputRun.name}' does not exist.")
235 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
236 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
237 f"--extend-run was not given.")
238 if args.prune_replaced and not args.replace_run:
239 raise ValueError(f"--prune-replaced requires --replace-run.")
240 if args.replace_run and (self.output is None or not self.output.exists):
241 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.")
243 @classmethod
244 def _makeReadParts(cls, args: argparse.Namespace):
245 """Common implementation for `makeReadButler` and
246 `makeRegistryAndCollections`.
248 Parameters
249 ----------
250 args : `argparse.Namespace`
251 Parsed command-line arguments. See class documentation for the
252 construction parameter of the same name.
254 Returns
255 -------
256 butler : `lsst.daf.butler.Butler`
257 A read-only butler constructed from the repo at
258 ``args.butler_config``, but with no default collections.
259 inputs : `lsst.daf.butler.registry.CollectionSearch`
260 A collection search path constructed according to ``args``.
261 self : `_ButlerFactory`
262 A new `_ButlerFactory` instance representing the processed version
263 of ``args``.
264 """
265 butler = Butler(args.butler_config, writeable=False)
266 self = cls(butler.registry, args, writeable=False)
267 self.check(args)
268 if self.output and self.output.exists:
269 if args.replace_run:
270 replaced, _ = self.output.chain[0]
271 inputs = self.output.chain[1:]
272 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
273 self.output.name, replaced)
274 else:
275 inputs = [self.output.name]
276 else:
277 inputs = list(self.inputs)
278 if args.extend_run:
279 inputs.insert(0, self.outputRun.name)
280 inputs = CollectionSearch.fromExpression(inputs)
281 return butler, inputs, self
283 @classmethod
284 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
285 """Construct a read-only butler according to the given command-line
286 arguments.
288 Parameters
289 ----------
290 args : `argparse.Namespace`
291 Parsed command-line arguments. See class documentation for the
292 construction parameter of the same name.
294 Returns
295 -------
296 butler : `lsst.daf.butler.Butler`
297 A read-only butler initialized with the collections specified by
298 ``args``.
299 """
300 butler, inputs, _ = cls._makeReadParts(args)
301 _LOG.debug("Preparing butler to read from %s.", inputs)
302 return Butler(butler=butler, collections=inputs)
304 @classmethod
305 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \
306 Tuple[Registry, CollectionSearch, Optional[str]]:
307 """Return a read-only registry, a collection search path, and the name
308 of the run to be used for future writes.
310 Parameters
311 ----------
312 args : `argparse.Namespace`
313 Parsed command-line arguments. See class documentation for the
314 construction parameter of the same name.
316 Returns
317 -------
318 registry : `lsst.daf.butler.Registry`
319 Butler registry that collections will be added to and/or queried
320 from.
321 inputs : `lsst.daf.butler.registry.CollectionSearch`
322 Collections to search for datasets.
323 run : `str` or `None`
324 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
325 if it already exists, or `None` if it does not.
326 """
327 butler, inputs, self = cls._makeReadParts(args)
328 run = self.outputRun.name if args.extend_run else None
329 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
330 return butler.registry, inputs, run
332 @classmethod
333 def makeWriteButler(cls, args: argparse.Namespace) -> Butler:
334 """Return a read-write butler initialized to write to and read from
335 the collections specified by the given command-line arguments.
337 Parameters
338 ----------
339 args : `argparse.Namespace`
340 Parsed command-line arguments. See class documentation for the
341 construction parameter of the same name.
343 Returns
344 -------
345 butler : `lsst.daf.butler.Butler`
346 A read-write butler initialized according to the given arguments.
347 """
348 butler = Butler(args.butler_config, writeable=True)
349 self = cls(butler.registry, args, writeable=True)
350 self.check(args)
351 if self.output is not None:
352 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
353 if args.replace_run:
354 replaced, _ = chainDefinition.pop(0)
355 if args.prune_replaced:
356 # TODO: DM-23671: need a butler API for pruning an
357 # entire RUN collection, then apply it to 'replaced'
358 # here.
359 raise NotImplementedError("Support for --prune-replaced is not yet implemented.")
360 chainDefinition.insert(0, self.outputRun.name)
361 chainDefinition = CollectionSearch.fromExpression(chainDefinition)
362 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
363 self.outputRun.name, self.output.name, chainDefinition)
364 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name,
365 chains={self.output.name: chainDefinition})
366 else:
367 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs)
368 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
369 return Butler(butler=butler, run=self.outputRun.name, collections=inputs)
371 output: Optional[_OutputChainedCollectionInfo]
372 """Information about the output chained collection, if there is or will be
373 one (`_OutputChainedCollectionInfo` or `None`).
374 """
376 outputRun: Optional[_OutputRunCollectionInfo]
377 """Information about the output run collection, if there is or will be
378 one (`_OutputRunCollectionInfo` or `None`).
379 """
381 inputs: List[Tuple[str, DatasetTypeRestriction]]
382 """Input collections, including those also used for outputs and any
383 restrictions on dataset types (`list`).
384 """
387class _FilteredStream:
388 """A file-like object that filters some config fields.
390 Note
391 ----
392 This class depends on implementation details of ``Config.saveToStream``
393 methods, in particular that that method uses single call to write()
394 method to save information about single config field, and that call
395 combines comments string(s) for a field and field path and value.
396 This class will not work reliably on the "import" strings, so imports
397 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
398 """
399 def __init__(self, pattern):
400 # obey case if pattern isn't lowercase or requests NOIGNORECASE
401 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
403 if mat:
404 pattern = mat.group(1)
405 self._pattern = re.compile(fnmatch.translate(pattern))
406 else:
407 if pattern != pattern.lower():
408 print(f"Matching \"{pattern}\" without regard to case "
409 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
410 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
412 def write(self, showStr):
413 # Strip off doc string line(s) and cut off at "=" for string matching
414 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
415 if self._pattern.search(matchStr):
416 sys.stdout.write(showStr)
418# ------------------------
419# Exported definitions --
420# ------------------------
423class CmdLineFwk:
424 """PipelineTask framework which executes tasks from command line.
426 In addition to executing tasks this activator provides additional methods
427 for task management like dumping configuration or execution chain.
428 """
430 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
432 def __init__(self):
433 pass
435 def parseAndRun(self, argv=None):
436 """
437 This method is a main entry point for this class, it parses command
438 line and executes all commands.
440 Parameters
441 ----------
442 argv : `list` of `str`, optional
443 list of command line arguments, if not specified then
444 `sys.argv[1:]` is used
445 """
447 if argv is None:
448 argv = sys.argv[1:]
450 # start with parsing command line, only do partial parsing now as
451 # the tasks can add more arguments later
452 parser = makeParser()
453 args = parser.parse_args(argv)
455 # First thing to do is to setup logging.
456 self.configLog(args.longlog, args.loglevel)
458 taskFactory = TaskFactory()
460 # make pipeline out of command line arguments (can return empty pipeline)
461 try:
462 pipeline = self.makePipeline(args)
463 except Exception as exc:
464 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
465 raise
467 if args.subcommand == "build":
468 # stop here but process --show option first
469 self.showInfo(args, pipeline)
470 return 0
472 # make quantum graph
473 try:
474 qgraph = self.makeGraph(pipeline, args)
475 except Exception as exc:
476 print("Failed to build graph: {}".format(exc), file=sys.stderr)
477 raise
479 # optionally dump some info
480 self.showInfo(args, pipeline, qgraph)
482 if qgraph is None:
483 # No need to raise an exception here, code that makes graph
484 # should have printed warning message already.
485 return 2
487 if args.subcommand == "qgraph":
488 # stop here
489 return 0
491 # execute
492 if args.subcommand == "run":
493 return self.runPipeline(qgraph, taskFactory, args)
495 @staticmethod
496 def configLog(longlog, logLevels):
497 """Configure logging system.
499 Parameters
500 ----------
501 longlog : `bool`
502 If True then make log messages appear in "long format"
503 logLevels : `list` of `tuple`
504 per-component logging levels, each item in the list is a tuple
505 (component, level), `component` is a logger name or `None` for root
506 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
507 """
508 if longlog:
509 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
510 else:
511 message_fmt = "%c %p: %m%n"
513 # global logging config
514 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
516 # Forward all Python logging to lsst.log
517 lgr = logging.getLogger()
518 lgr.setLevel(logging.INFO) # same as in log4cxx config above
519 lgr.addHandler(lsst.log.LogHandler())
521 # also capture warnings and send them to logging
522 logging.captureWarnings(True)
524 # configure individual loggers
525 for component, level in logLevels:
526 level = getattr(lsst.log.Log, level.upper(), None)
527 if level is not None:
528 # set logging level for lsst.log
529 logger = lsst.log.Log.getLogger(component or "")
530 logger.setLevel(level)
531 # set logging level for Python logging
532 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
533 logging.getLogger(component).setLevel(pyLevel)
535 def makePipeline(self, args):
536 """Build a pipeline from command line arguments.
538 Parameters
539 ----------
540 args : `argparse.Namespace`
541 Parsed command line
543 Returns
544 -------
545 pipeline : `~lsst.pipe.base.Pipeline`
546 """
547 if args.pipeline:
548 pipeline = Pipeline.fromFile(args.pipeline)
549 else:
550 pipeline = Pipeline("anonymous")
552 # loop over all pipeline actions and apply them in order
553 for action in args.pipeline_actions:
554 if action.action == "add_instrument":
556 pipeline.addInstrument(action.value)
558 elif action.action == "new_task":
560 pipeline.addTask(action.value, action.label)
562 elif action.action == "delete_task":
564 pipeline.removeTask(action.label)
566 elif action.action == "config":
568 # action value string is "field=value", split it at '='
569 field, _, value = action.value.partition("=")
570 pipeline.addConfigOverride(action.label, field, value)
572 elif action.action == "configfile":
574 pipeline.addConfigFile(action.label, action.value)
576 else:
578 raise ValueError(f"Unexpected pipeline action: {action.action}")
580 if args.save_pipeline:
581 pipeline.toFile(args.save_pipeline)
583 if args.pipeline_dot:
584 pipeline2dot(pipeline, args.pipeline_dot)
586 return pipeline
588 def makeGraph(self, pipeline, args):
589 """Build a graph from command line arguments.
591 Parameters
592 ----------
593 pipeline : `~lsst.pipe.base.Pipeline`
594 Pipeline, can be empty or ``None`` if graph is read from a file.
595 args : `argparse.Namespace`
596 Parsed command line
598 Returns
599 -------
600 graph : `~lsst.pipe.base.QuantumGraph` or `None`
601 If resulting graph is empty then `None` is returned.
602 """
604 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
606 if args.qgraph:
608 with open(args.qgraph, 'rb') as pickleFile:
609 qgraph = QuantumGraph.load(pickleFile, registry.dimensions)
611 # pipeline can not be provided in this case
612 if pipeline:
613 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
615 else:
617 # make execution plan (a.k.a. DAG) for pipeline
618 graphBuilder = GraphBuilder(registry,
619 skipExisting=args.skip_existing)
620 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query)
622 # count quanta in graph and give a warning if it's empty and return None
623 nQuanta = qgraph.countQuanta()
624 if nQuanta == 0:
625 warnings.warn("QuantumGraph is empty", stacklevel=2)
626 return None
627 else:
628 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
629 nQuanta, len(qgraph))
631 if args.save_qgraph:
632 with open(args.save_qgraph, "wb") as pickleFile:
633 qgraph.save(pickleFile)
635 if args.save_single_quanta:
636 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()):
637 filename = args.save_single_quanta.format(iq)
638 with open(filename, "wb") as pickleFile:
639 sqgraph.save(pickleFile)
641 if args.qgraph_dot:
642 graph2dot(qgraph, args.qgraph_dot)
644 return qgraph
646 def runPipeline(self, graph, taskFactory, args, butler=None):
647 """Execute complete QuantumGraph.
649 Parameters
650 ----------
651 graph : `QuantumGraph`
652 Execution graph.
653 taskFactory : `~lsst.pipe.base.TaskFactory`
654 Task factory
655 args : `argparse.Namespace`
656 Parsed command line
657 butler : `~lsst.daf.butler.Butler`, optional
658 Data Butler instance, if not defined then new instance is made
659 using command line options.
660 """
661 # make butler instance
662 if butler is None:
663 butler = _ButlerFactory.makeWriteButler(args)
665 # Enable lsstDebug debugging. Note that this is done once in the
666 # main process before PreExecInit and it is also repeated before
667 # running each task in SingleQuantumExecutor (which may not be
668 # needed if `multipocessing` always uses fork start method).
669 if args.enableLsstDebug:
670 try:
671 _LOG.debug("Will try to import debug.py")
672 import debug # noqa:F401
673 except ImportError:
674 _LOG.warn("No 'debug' module found.")
676 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing)
677 preExecInit.initialize(graph,
678 saveInitOutputs=not args.skip_init_writes,
679 registerDatasetTypes=args.register_dataset_types,
680 saveVersions=not args.no_versions)
682 if not args.init_only:
683 graphFixup = self._importGraphFixup(args)
684 quantumExecutor = SingleQuantumExecutor(taskFactory,
685 skipExisting=args.skip_existing,
686 enableLsstDebug=args.enableLsstDebug)
687 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT,
688 quantumExecutor=quantumExecutor,
689 executionGraphFixup=graphFixup)
690 with util.profile(args.profile, _LOG):
691 executor.execute(graph, butler)
693 def showInfo(self, args, pipeline, graph=None):
694 """Display useful info about pipeline and environment.
696 Parameters
697 ----------
698 args : `argparse.Namespace`
699 Parsed command line
700 pipeline : `Pipeline`
701 Pipeline definition
702 graph : `QuantumGraph`, optional
703 Execution graph
704 """
705 showOpts = args.show
706 for what in showOpts:
707 showCommand, _, showArgs = what.partition("=")
709 if showCommand in ["pipeline", "config", "history", "tasks"]:
710 if not pipeline:
711 _LOG.warning("Pipeline is required for --show=%s", showCommand)
712 continue
714 if showCommand in ["graph", "workflow"]:
715 if not graph:
716 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
717 continue
719 if showCommand == "pipeline":
720 print(pipeline)
721 elif showCommand == "config":
722 self._showConfig(pipeline, showArgs, False)
723 elif showCommand == "dump-config":
724 self._showConfig(pipeline, showArgs, True)
725 elif showCommand == "history":
726 self._showConfigHistory(pipeline, showArgs)
727 elif showCommand == "tasks":
728 self._showTaskHierarchy(pipeline)
729 elif showCommand == "graph":
730 if graph:
731 self._showGraph(graph)
732 elif showCommand == "workflow":
733 if graph:
734 self._showWorkflow(graph, args)
735 else:
736 print("Unknown value for show: %s (choose from '%s')" %
737 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
738 file=sys.stderr)
739 sys.exit(1)
741 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
742 """Show task configuration
744 Parameters
745 ----------
746 pipeline : `Pipeline`
747 Pipeline definition
748 showArgs : `str`
749 Defines what to show
750 dumpFullConfig : `bool`
751 If true then dump complete task configuration with all imports.
752 """
753 stream = sys.stdout
754 if dumpFullConfig:
755 # Task label can be given with this option
756 taskName = showArgs
757 else:
758 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
759 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
760 taskName = matConfig.group(1)
761 pattern = matConfig.group(2)
762 if pattern:
763 stream = _FilteredStream(pattern)
765 tasks = util.filterTasks(pipeline, taskName)
766 if not tasks:
767 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
768 sys.exit(1)
770 for taskDef in tasks:
771 print("### Configuration for task `{}'".format(taskDef.label))
772 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
774 def _showConfigHistory(self, pipeline, showArgs):
775 """Show history for task configuration
777 Parameters
778 ----------
779 pipeline : `Pipeline`
780 Pipeline definition
781 showArgs : `str`
782 Defines what to show
783 """
785 taskName = None
786 pattern = None
787 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
788 if matHistory:
789 taskName = matHistory.group(1)
790 pattern = matHistory.group(2)
791 if not pattern:
792 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
793 sys.exit(1)
795 tasks = util.filterTasks(pipeline, taskName)
796 if not tasks:
797 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
798 sys.exit(1)
800 cpath, _, cname = pattern.rpartition(".")
801 found = False
802 for taskDef in tasks:
803 try:
804 if not cpath:
805 # looking for top-level field
806 hconfig = taskDef.config
807 else:
808 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
809 except AttributeError:
810 # Means this config object has no such field, but maybe some other task has it.
811 continue
812 except Exception:
813 # Any other exception probably means some error in the expression.
814 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
815 sys.exit(1)
817 if hasattr(hconfig, cname):
818 print(f"### Configuration field for task `{taskDef.label}'")
819 print(pexConfig.history.format(hconfig, cname))
820 found = True
822 if not found:
823 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
824 sys.exit(1)
826 def _showTaskHierarchy(self, pipeline):
827 """Print task hierarchy to stdout
829 Parameters
830 ----------
831 pipeline: `Pipeline`
832 """
833 for taskDef in pipeline.toExpandedPipeline():
834 print("### Subtasks for task `{}'".format(taskDef.taskName))
836 for configName, taskName in util.subTaskIter(taskDef.config):
837 print("{}: {}".format(configName, taskName))
839 def _showGraph(self, graph):
840 """Print quanta information to stdout
842 Parameters
843 ----------
844 graph : `QuantumGraph`
845 Execution graph.
846 """
847 for taskNodes in graph:
848 print(taskNodes.taskDef)
850 for iq, quantum in enumerate(taskNodes.quanta):
851 print(" Quantum {}:".format(iq))
852 print(" inputs:")
853 for key, refs in quantum.predictedInputs.items():
854 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
855 print(" {}: [{}]".format(key, ", ".join(dataIds)))
856 print(" outputs:")
857 for key, refs in quantum.outputs.items():
858 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
859 print(" {}: [{}]".format(key, ", ".join(dataIds)))
861 def _showWorkflow(self, graph, args):
862 """Print quanta information and dependency to stdout
864 The input and predicted output URIs based on the Butler repo are printed.
866 Parameters
867 ----------
868 graph : `QuantumGraph`
869 Execution graph.
870 args : `argparse.Namespace`
871 Parsed command line
872 """
873 def dumpURIs(thisRef):
874 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
875 if primary:
876 print(f" {primary}")
877 else:
878 print(f" (disassembled artifact)")
879 for compName, compUri in components.items():
880 print(f" {compName}: {compUri}")
882 butler = _ButlerFactory.makeReadButler(args)
883 hashToParent = {}
884 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
885 shortname = taskDef.taskName.split('.')[-1]
886 print("Quantum {}: {}".format(iq, shortname))
887 print(" inputs:")
888 for key, refs in quantum.predictedInputs.items():
889 for ref in refs:
890 dumpURIs(ref)
891 print(" outputs:")
892 for key, refs in quantum.outputs.items():
893 for ref in refs:
894 dumpURIs(ref)
895 # Store hash to figure out dependency
896 dhash = hash((key, ref.dataId))
897 hashToParent[dhash] = iq
899 uses = set()
900 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
901 for key, refs in quantum.predictedInputs.items():
902 for ref in refs:
903 dhash = hash((key, ref.dataId))
904 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses:
905 parentIq = hashToParent[dhash]
906 uses.add((iq, parentIq)) # iq uses parentIq
907 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))
909 def _importGraphFixup(self, args):
910 """Import/instantiate graph fixup object.
912 Parameters
913 ----------
914 args : `argparse.Namespace`
915 Parsed command line.
917 Returns
918 -------
919 fixup : `ExecutionGraphFixup` or `None`
921 Raises
922 ------
923 ValueError
924 Raised if import fails, method call raises exception, or returned
925 instance has unexpected type.
926 """
927 if args.graph_fixup:
928 try:
929 factory = doImport(args.graph_fixup)
930 except Exception as exc:
931 raise ValueError("Failed to import graph fixup class/method") from exc
932 try:
933 fixup = factory()
934 except Exception as exc:
935 raise ValueError("Failed to make instance of graph fixup") from exc
936 if not isinstance(fixup, ExecutionGraphFixup):
937 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
938 return fixup