Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import copy
32import datetime
33import fnmatch
34import getpass
35import logging
36import re
37import sys
38from typing import Iterable, Optional, Tuple
39import warnings
41# -----------------------------
42# Imports for other modules --
43# -----------------------------
44from lsst.daf.butler import (
45 Butler,
46 CollectionSearch,
47 CollectionType,
48 Registry,
49)
50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
51import lsst.pex.config as pexConfig
52from lsst.pipe.base import (buildExecutionButler, GraphBuilder, Pipeline,
53 PipelineDatasetTypes, QuantumGraph, TaskDef)
54from lsst.obs.base import Instrument
55from .dotTools import graph2dot, pipeline2dot
56from .executionGraphFixup import ExecutionGraphFixup
57from .mpGraphExecutor import MPGraphExecutor
58from .preExecInit import PreExecInit
59from .singleQuantumExecutor import SingleQuantumExecutor
60from . import util
61from lsst.utils import doImport
63# ----------------------------------
64# Local non-exported definitions --
65# ----------------------------------
67_LOG = logging.getLogger(__name__.partition(".")[2])
70class _OutputChainedCollectionInfo:
71 """A helper class for handling command-line arguments related to an output
72 `~lsst.daf.butler.CollectionType.CHAINED` collection.
74 Parameters
75 ----------
76 registry : `lsst.daf.butler.Registry`
77 Butler registry that collections will be added to and/or queried from.
78 name : `str`
79 Name of the collection given on the command line.
80 """
81 def __init__(self, registry: Registry, name: str):
82 self.name = name
83 try:
84 self.chain = tuple(registry.getCollectionChain(name))
85 self.exists = True
86 except MissingCollectionError:
87 self.chain = ()
88 self.exists = False
90 def __str__(self):
91 return self.name
93 name: str
94 """Name of the collection provided on the command line (`str`).
95 """
97 exists: bool
98 """Whether this collection already exists in the registry (`bool`).
99 """
101 chain: Tuple[str, ...]
102 """The definition of the collection, if it already exists (`tuple`[`str`]).
104 Empty if the collection does not already exist.
105 """
108class _OutputRunCollectionInfo:
109 """A helper class for handling command-line arguments related to an output
110 `~lsst.daf.butler.CollectionType.RUN` collection.
112 Parameters
113 ----------
114 registry : `lsst.daf.butler.Registry`
115 Butler registry that collections will be added to and/or queried from.
116 name : `str`
117 Name of the collection given on the command line.
118 """
119 def __init__(self, registry: Registry, name: str):
120 self.name = name
121 try:
122 actualType = registry.getCollectionType(name)
123 if actualType is not CollectionType.RUN:
124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
125 self.exists = True
126 except MissingCollectionError:
127 self.exists = False
129 name: str
130 """Name of the collection provided on the command line (`str`).
131 """
133 exists: bool
134 """Whether this collection already exists in the registry (`bool`).
135 """
138class _ButlerFactory:
139 """A helper class for processing command-line arguments related to input
140 and output collections.
142 Parameters
143 ----------
144 registry : `lsst.daf.butler.Registry`
145 Butler registry that collections will be added to and/or queried from.
147 args : `types.SimpleNamespace`
148 Parsed command-line arguments. The following attributes are used,
149 either at construction or in later methods.
151 ``output``
152 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
153 input/output collection.
155 ``output_run``
156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
157 collection.
159 ``extend_run``
160 A boolean indicating whether ``output_run`` should already exist
161 and be extended.
163 ``replace_run``
164 A boolean indicating that (if `True`) ``output_run`` should already
165 exist but will be removed from the output chained collection and
166 replaced with a new one.
168 ``prune_replaced``
169 A boolean indicating whether to prune the replaced run (requires
170 ``replace_run``).
172 ``inputs``
173 Input collections of any type; may be any type handled by
174 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
176 ``butler_config``
177 Path to a data repository root or configuration file.
179 writeable : `bool`
180 If `True`, a `Butler` is being initialized in a context where actual
181 writes should happens, and hence no output run is necessary.
183 Raises
184 ------
185 ValueError
186 Raised if ``writeable is True`` but there are no output collections.
187 """
188 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
189 if args.output is not None:
190 self.output = _OutputChainedCollectionInfo(registry, args.output)
191 else:
192 self.output = None
193 if args.output_run is not None:
194 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
195 elif self.output is not None:
196 if args.extend_run:
197 runName = self.output.chain[0]
198 else:
199 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp())
200 self.outputRun = _OutputRunCollectionInfo(registry, runName)
201 elif not writeable:
202 # If we're not writing yet, ok to have no output run.
203 self.outputRun = None
204 else:
205 raise ValueError("Cannot write without at least one of (--output, --output-run).")
206 # Recursively flatten any input CHAINED collections. We do this up
207 # front so we can tell if the user passes the same inputs on subsequent
208 # calls, even though we also flatten when we define the output CHAINED
209 # collection.
210 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
212 def check(self, args: argparse.Namespace):
213 """Check command-line options for consistency with each other and the
214 data repository.
216 Parameters
217 ----------
218 args : `types.SimpleNamespace`
219 Parsed command-line arguments. See class documentation for the
220 construction parameter of the same name.
221 """
222 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
223 if self.inputs and self.output is not None and self.output.exists:
224 # Passing the same inputs that were used to initialize the output
225 # collection is allowed; this means they must _end_ with the same
226 # collections, because we push new runs to the front of the chain.
227 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]):
228 if c1 != c2:
229 raise ValueError(
230 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
231 "a different sequence of input collections than those given: "
232 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
233 f"{self.output.name}={self.output.chain}."
234 )
235 if len(self.inputs) > len(self.output.chain):
236 nNew = len(self.inputs) - len(self.output.chain)
237 raise ValueError(
238 f"Cannot add new input collections {self.inputs[:nNew]} after "
239 "output collection is first created."
240 )
241 if args.extend_run and self.outputRun is None:
242 raise ValueError("Cannot --extend-run when no output collection is given.")
243 if args.extend_run and not self.outputRun.exists:
244 raise ValueError(f"Cannot --extend-run; output collection "
245 f"'{self.outputRun.name}' does not exist.")
246 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
247 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
248 f"--extend-run was not given.")
249 if args.prune_replaced and not args.replace_run:
250 raise ValueError("--prune-replaced requires --replace-run.")
251 if args.replace_run and (self.output is None or not self.output.exists):
252 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
254 @classmethod
255 def _makeReadParts(cls, args: argparse.Namespace):
256 """Common implementation for `makeReadButler` and
257 `makeRegistryAndCollections`.
259 Parameters
260 ----------
261 args : `types.SimpleNamespace`
262 Parsed command-line arguments. See class documentation for the
263 construction parameter of the same name.
265 Returns
266 -------
267 butler : `lsst.daf.butler.Butler`
268 A read-only butler constructed from the repo at
269 ``args.butler_config``, but with no default collections.
270 inputs : `lsst.daf.butler.registry.CollectionSearch`
271 A collection search path constructed according to ``args``.
272 self : `_ButlerFactory`
273 A new `_ButlerFactory` instance representing the processed version
274 of ``args``.
275 """
276 butler = Butler(args.butler_config, writeable=False)
277 self = cls(butler.registry, args, writeable=False)
278 self.check(args)
279 if self.output and self.output.exists:
280 if args.replace_run:
281 replaced = self.output.chain[0]
282 inputs = self.output.chain[1:]
283 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
284 self.output.name, replaced)
285 else:
286 inputs = [self.output.name]
287 else:
288 inputs = list(self.inputs)
289 if args.extend_run:
290 inputs.insert(0, self.outputRun.name)
291 inputs = CollectionSearch.fromExpression(inputs)
292 return butler, inputs, self
294 @classmethod
295 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
296 """Construct a read-only butler according to the given command-line
297 arguments.
299 Parameters
300 ----------
301 args : `types.SimpleNamespace`
302 Parsed command-line arguments. See class documentation for the
303 construction parameter of the same name.
305 Returns
306 -------
307 butler : `lsst.daf.butler.Butler`
308 A read-only butler initialized with the collections specified by
309 ``args``.
310 """
311 butler, inputs, _ = cls._makeReadParts(args)
312 _LOG.debug("Preparing butler to read from %s.", inputs)
313 return Butler(butler=butler, collections=inputs)
315 @classmethod
316 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \
317 Tuple[Registry, CollectionSearch, Optional[str]]:
318 """Return a read-only registry, a collection search path, and the name
319 of the run to be used for future writes.
321 Parameters
322 ----------
323 args : `types.SimpleNamespace`
324 Parsed command-line arguments. See class documentation for the
325 construction parameter of the same name.
327 Returns
328 -------
329 registry : `lsst.daf.butler.Registry`
330 Butler registry that collections will be added to and/or queried
331 from.
332 inputs : `lsst.daf.butler.registry.CollectionSearch`
333 Collections to search for datasets.
334 run : `str` or `None`
335 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
336 if it already exists, or `None` if it does not.
337 """
338 butler, inputs, self = cls._makeReadParts(args)
339 run = self.outputRun.name if args.extend_run else None
340 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
341 return butler.registry, inputs, run
343 @classmethod
344 def makeWriteButler(cls, args: argparse.Namespace,
345 taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler:
346 """Return a read-write butler initialized to write to and read from
347 the collections specified by the given command-line arguments.
349 Parameters
350 ----------
351 args : `types.SimpleNamespace`
352 Parsed command-line arguments. See class documentation for the
353 construction parameter of the same name.
354 taskDefs : iterable of `TaskDef`, optional
355 Definitions for tasks in a pipeline. This argument is only needed
356 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
357 "unstore".
359 Returns
360 -------
361 butler : `lsst.daf.butler.Butler`
362 A read-write butler initialized according to the given arguments.
363 """
364 butler = Butler(args.butler_config, writeable=True)
365 self = cls(butler.registry, args, writeable=True)
366 self.check(args)
367 if self.output is not None:
368 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
369 if args.replace_run:
370 replaced = chainDefinition.pop(0)
371 if args.prune_replaced == "unstore":
372 # Remove datasets from datastore
373 with butler.transaction():
374 refs = butler.registry.queryDatasets(..., collections=replaced)
375 # we want to remove regular outputs but keep
376 # initOutputs, configs, and versions.
377 if taskDefs is not None:
378 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
379 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
380 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False)
381 elif args.prune_replaced == "purge":
382 # Erase entire collection and all datasets, need to remove
383 # collection from its chain collection first.
384 with butler.transaction():
385 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
386 butler.pruneCollection(replaced, purge=True, unstore=True)
387 elif args.prune_replaced is not None:
388 raise NotImplementedError(
389 f"Unsupported --prune-replaced option '{args.prune_replaced}'."
390 )
391 if not self.output.exists:
392 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
393 if not args.extend_run:
394 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
395 chainDefinition.insert(0, self.outputRun.name)
396 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
397 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
398 self.outputRun.name, self.output.name, chainDefinition)
399 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
400 else:
401 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs)
402 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
403 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
404 return butler
406 output: Optional[_OutputChainedCollectionInfo]
407 """Information about the output chained collection, if there is or will be
408 one (`_OutputChainedCollectionInfo` or `None`).
409 """
411 outputRun: Optional[_OutputRunCollectionInfo]
412 """Information about the output run collection, if there is or will be
413 one (`_OutputRunCollectionInfo` or `None`).
414 """
416 inputs: Tuple[str, ...]
417 """Input collections provided directly by the user (`tuple` [ `str` ]).
418 """
421class _FilteredStream:
422 """A file-like object that filters some config fields.
424 Note
425 ----
426 This class depends on implementation details of ``Config.saveToStream``
427 methods, in particular that that method uses single call to write()
428 method to save information about single config field, and that call
429 combines comments string(s) for a field and field path and value.
430 This class will not work reliably on the "import" strings, so imports
431 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
432 """
433 def __init__(self, pattern):
434 # obey case if pattern isn't lowercase or requests NOIGNORECASE
435 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
437 if mat:
438 pattern = mat.group(1)
439 self._pattern = re.compile(fnmatch.translate(pattern))
440 else:
441 if pattern != pattern.lower():
442 print(f"Matching \"{pattern}\" without regard to case "
443 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
444 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
446 def write(self, showStr):
447 # Strip off doc string line(s) and cut off at "=" for string matching
448 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
449 if self._pattern.search(matchStr):
450 sys.stdout.write(showStr)
452# ------------------------
453# Exported definitions --
454# ------------------------
457class CmdLineFwk:
458 """PipelineTask framework which executes tasks from command line.
460 In addition to executing tasks this activator provides additional methods
461 for task management like dumping configuration or execution chain.
462 """
464 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
466 def __init__(self):
467 pass
469 def makePipeline(self, args):
470 """Build a pipeline from command line arguments.
472 Parameters
473 ----------
474 args : `types.SimpleNamespace`
475 Parsed command line
477 Returns
478 -------
479 pipeline : `~lsst.pipe.base.Pipeline`
480 """
481 if args.pipeline:
482 pipeline = Pipeline.from_uri(args.pipeline)
483 else:
484 pipeline = Pipeline("anonymous")
486 # loop over all pipeline actions and apply them in order
487 for action in args.pipeline_actions:
488 if action.action == "add_instrument":
490 pipeline.addInstrument(action.value)
492 elif action.action == "new_task":
494 pipeline.addTask(action.value, action.label)
496 elif action.action == "delete_task":
498 pipeline.removeTask(action.label)
500 elif action.action == "config":
502 # action value string is "field=value", split it at '='
503 field, _, value = action.value.partition("=")
504 pipeline.addConfigOverride(action.label, field, value)
506 elif action.action == "configfile":
508 pipeline.addConfigFile(action.label, action.value)
510 else:
512 raise ValueError(f"Unexpected pipeline action: {action.action}")
514 if args.save_pipeline:
515 pipeline.write_to_uri(args.save_pipeline)
517 if args.pipeline_dot:
518 pipeline2dot(pipeline, args.pipeline_dot)
520 return pipeline
522 def makeGraph(self, pipeline, args):
523 """Build a graph from command line arguments.
525 Parameters
526 ----------
527 pipeline : `~lsst.pipe.base.Pipeline`
528 Pipeline, can be empty or ``None`` if graph is read from a file.
529 args : `types.SimpleNamespace`
530 Parsed command line
532 Returns
533 -------
534 graph : `~lsst.pipe.base.QuantumGraph` or `None`
535 If resulting graph is empty then `None` is returned.
536 """
538 # make sure that --extend-run always enables --skip-existing
539 if args.extend_run:
540 args.skip_existing = True
542 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
544 if args.skip_existing and run:
545 args.skip_existing_in += (run,)
547 if args.qgraph:
548 # click passes empty tuple as default value for qgraph_node_id
549 nodes = args.qgraph_node_id or None
550 qgraph = QuantumGraph.loadUri(args.qgraph, registry.dimensions,
551 nodes=nodes, graphID=args.qgraph_id)
553 # pipeline can not be provided in this case
554 if pipeline:
555 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
557 else:
559 # make execution plan (a.k.a. DAG) for pipeline
560 graphBuilder = GraphBuilder(registry,
561 skipExistingIn=args.skip_existing_in,
562 clobberOutputs=args.clobber_outputs)
563 # accumulate metadata
564 metadata = {"input": args.input, "output": args.output, "butler_argument": args.butler_config,
565 "output_run": args.output_run, "extend_run": args.extend_run,
566 "skip_existing_in": args.skip_existing_in, "skip_existing": args.skip_existing,
567 "data_query": args.data_query, "user": getpass.getuser(),
568 "time": f"{datetime.datetime.now()}"}
569 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query, metadata=metadata)
571 # Count quanta in graph and give a warning if it's empty and return
572 # None.
573 nQuanta = len(qgraph)
574 if nQuanta == 0:
575 warnings.warn("QuantumGraph is empty", stacklevel=2)
576 return None
577 else:
578 _LOG.info("QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
579 nQuanta, len(qgraph.taskGraph), qgraph.graphID)
581 if args.save_qgraph:
582 qgraph.saveUri(args.save_qgraph)
584 if args.save_single_quanta:
585 for quantumNode in qgraph:
586 sqgraph = qgraph.subset(quantumNode)
587 uri = args.save_single_quanta.format(quantumNode.nodeId.number)
588 sqgraph.saveUri(uri)
590 if args.qgraph_dot:
591 graph2dot(qgraph, args.qgraph_dot)
593 if args.execution_butler_location:
594 butler = Butler(args.butler_config)
595 newArgs = copy.deepcopy(args)
597 def builderShim(butler):
598 newArgs.butler_config = butler._config
599 # Calling makeWriteButler is done for the side effects of
600 # calling that method, maining parsing all the args into
601 # collection names, creating collections, etc.
602 newButler = _ButlerFactory.makeWriteButler(newArgs)
603 return newButler
605 buildExecutionButler(butler, qgraph, args.execution_butler_location, run,
606 butlerModifier=builderShim, collections=collections,
607 clobber=args.clobber_execution_butler)
609 return qgraph
611 def runPipeline(self, graph, taskFactory, args, butler=None):
612 """Execute complete QuantumGraph.
614 Parameters
615 ----------
616 graph : `QuantumGraph`
617 Execution graph.
618 taskFactory : `~lsst.pipe.base.TaskFactory`
619 Task factory
620 args : `types.SimpleNamespace`
621 Parsed command line
622 butler : `~lsst.daf.butler.Butler`, optional
623 Data Butler instance, if not defined then new instance is made
624 using command line options.
625 """
626 # make sure that --extend-run always enables --skip-existing
627 if args.extend_run:
628 args.skip_existing = True
630 # make butler instance
631 if butler is None:
632 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
634 if args.skip_existing:
635 args.skip_existing_in += (butler.run, )
637 # Enable lsstDebug debugging. Note that this is done once in the
638 # main process before PreExecInit and it is also repeated before
639 # running each task in SingleQuantumExecutor (which may not be
640 # needed if `multipocessing` always uses fork start method).
641 if args.enableLsstDebug:
642 try:
643 _LOG.debug("Will try to import debug.py")
644 import debug # noqa:F401
645 except ImportError:
646 _LOG.warn("No 'debug' module found.")
648 # Save all InitOutputs, configs, etc.
649 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
650 preExecInit.initialize(graph,
651 saveInitOutputs=not args.skip_init_writes,
652 registerDatasetTypes=args.register_dataset_types,
653 saveVersions=not args.no_versions)
655 if not args.init_only:
656 graphFixup = self._importGraphFixup(args)
657 quantumExecutor = SingleQuantumExecutor(taskFactory,
658 skipExistingIn=args.skip_existing_in,
659 clobberOutputs=args.clobber_outputs,
660 enableLsstDebug=args.enableLsstDebug,
661 exitOnKnownError=args.fail_fast)
662 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
663 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout,
664 startMethod=args.start_method,
665 quantumExecutor=quantumExecutor,
666 failFast=args.fail_fast,
667 executionGraphFixup=graphFixup)
668 with util.profile(args.profile, _LOG):
669 executor.execute(graph, butler)
671 def showInfo(self, args, pipeline, graph=None):
672 """Display useful info about pipeline and environment.
674 Parameters
675 ----------
676 args : `types.SimpleNamespace`
677 Parsed command line
678 pipeline : `Pipeline`
679 Pipeline definition
680 graph : `QuantumGraph`, optional
681 Execution graph
682 """
683 showOpts = args.show
684 for what in showOpts:
685 showCommand, _, showArgs = what.partition("=")
687 if showCommand in ["pipeline", "config", "history", "tasks"]:
688 if not pipeline:
689 _LOG.warning("Pipeline is required for --show=%s", showCommand)
690 continue
692 if showCommand in ["graph", "workflow", "uri"]:
693 if not graph:
694 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
695 continue
697 if showCommand == "pipeline":
698 print(pipeline)
699 elif showCommand == "config":
700 self._showConfig(pipeline, showArgs, False)
701 elif showCommand == "dump-config":
702 self._showConfig(pipeline, showArgs, True)
703 elif showCommand == "history":
704 self._showConfigHistory(pipeline, showArgs)
705 elif showCommand == "tasks":
706 self._showTaskHierarchy(pipeline)
707 elif showCommand == "graph":
708 if graph:
709 self._showGraph(graph)
710 elif showCommand == "uri":
711 if graph:
712 self._showUri(graph, args)
713 elif showCommand == "workflow":
714 if graph:
715 self._showWorkflow(graph, args)
716 else:
717 print("Unknown value for show: %s (choose from '%s')" %
718 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
719 file=sys.stderr)
720 sys.exit(1)
722 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
723 """Show task configuration
725 Parameters
726 ----------
727 pipeline : `Pipeline`
728 Pipeline definition
729 showArgs : `str`
730 Defines what to show
731 dumpFullConfig : `bool`
732 If true then dump complete task configuration with all imports.
733 """
734 stream = sys.stdout
735 if dumpFullConfig:
736 # Task label can be given with this option
737 taskName = showArgs
738 else:
739 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
740 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
741 taskName = matConfig.group(1)
742 pattern = matConfig.group(2)
743 if pattern:
744 stream = _FilteredStream(pattern)
746 tasks = util.filterTasks(pipeline, taskName)
747 if not tasks:
748 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
749 sys.exit(1)
751 for taskDef in tasks:
752 print("### Configuration for task `{}'".format(taskDef.label))
753 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
755 def _showConfigHistory(self, pipeline, showArgs):
756 """Show history for task configuration
758 Parameters
759 ----------
760 pipeline : `Pipeline`
761 Pipeline definition
762 showArgs : `str`
763 Defines what to show
764 """
766 taskName = None
767 pattern = None
768 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
769 if matHistory:
770 taskName = matHistory.group(1)
771 pattern = matHistory.group(2)
772 if not pattern:
773 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
774 sys.exit(1)
776 tasks = util.filterTasks(pipeline, taskName)
777 if not tasks:
778 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
779 sys.exit(1)
781 found = False
782 for taskDef in tasks:
784 config = taskDef.config
786 # Look for any matches in the config hierarchy for this name
787 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
788 if nmatch > 0:
789 print("")
791 cpath, _, cname = thisName.rpartition(".")
792 try:
793 if not cpath:
794 # looking for top-level field
795 hconfig = taskDef.config
796 else:
797 hconfig = eval("config." + cpath, {}, {"config": config})
798 except AttributeError:
799 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
800 file=sys.stderr)
801 hconfig = None
803 # Sometimes we end up with a non-Config so skip those
804 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \
805 hasattr(hconfig, cname):
806 print(f"### Configuration field for task `{taskDef.label}'")
807 print(pexConfig.history.format(hconfig, cname))
808 found = True
810 if not found:
811 print(f"None of the tasks has field matching {pattern}", file=sys.stderr)
812 sys.exit(1)
814 def _showTaskHierarchy(self, pipeline):
815 """Print task hierarchy to stdout
817 Parameters
818 ----------
819 pipeline: `Pipeline`
820 """
821 for taskDef in pipeline.toExpandedPipeline():
822 print("### Subtasks for task `{}'".format(taskDef.taskName))
824 for configName, taskName in util.subTaskIter(taskDef.config):
825 print("{}: {}".format(configName, taskName))
827 def _showGraph(self, graph):
828 """Print quanta information to stdout
830 Parameters
831 ----------
832 graph : `QuantumGraph`
833 Execution graph.
834 """
835 for taskNode in graph.taskGraph:
836 print(taskNode)
838 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
839 print(" Quantum {}:".format(iq))
840 print(" inputs:")
841 for key, refs in quantum.inputs.items():
842 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
843 print(" {}: [{}]".format(key, ", ".join(dataIds)))
844 print(" outputs:")
845 for key, refs in quantum.outputs.items():
846 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
847 print(" {}: [{}]".format(key, ", ".join(dataIds)))
849 def _showWorkflow(self, graph, args):
850 """Print quanta information and dependency to stdout
852 Parameters
853 ----------
854 graph : `QuantumGraph`
855 Execution graph.
856 args : `types.SimpleNamespace`
857 Parsed command line
858 """
859 for node in graph:
860 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}")
861 for parent in graph.determineInputsToQuantumNode(node):
862 print(f"Parent Quantum {parent.nodeId.number} - Child Quantum {node.nodeId.number}")
864 def _showUri(self, graph, args):
865 """Print input and predicted output URIs to stdout
867 Parameters
868 ----------
869 graph : `QuantumGraph`
870 Execution graph
871 args : `types.SimpleNamespace`
872 Parsed command line
873 """
874 def dumpURIs(thisRef):
875 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
876 if primary:
877 print(f" {primary}")
878 else:
879 print(" (disassembled artifact)")
880 for compName, compUri in components.items():
881 print(f" {compName}: {compUri}")
883 butler = _ButlerFactory.makeReadButler(args)
884 for node in graph:
885 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}")
886 print(" inputs:")
887 for key, refs in node.quantum.inputs.items():
888 for ref in refs:
889 dumpURIs(ref)
890 print(" outputs:")
891 for key, refs in node.quantum.outputs.items():
892 for ref in refs:
893 dumpURIs(ref)
895 def _importGraphFixup(self, args):
896 """Import/instantiate graph fixup object.
898 Parameters
899 ----------
900 args : `types.SimpleNamespace`
901 Parsed command line.
903 Returns
904 -------
905 fixup : `ExecutionGraphFixup` or `None`
907 Raises
908 ------
909 ValueError
910 Raised if import fails, method call raises exception, or returned
911 instance has unexpected type.
912 """
913 if args.graph_fixup:
914 try:
915 factory = doImport(args.graph_fixup)
916 except Exception as exc:
917 raise ValueError("Failed to import graph fixup class/method") from exc
918 try:
919 fixup = factory()
920 except Exception as exc:
921 raise ValueError("Failed to make instance of graph fixup") from exc
922 if not isinstance(fixup, ExecutionGraphFixup):
923 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
924 return fixup