Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 11%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import copy
32import datetime
33import fnmatch
34import getpass
35import logging
36import re
37import sys
38from typing import Iterable, Optional, Tuple
39import warnings
41# -----------------------------
42# Imports for other modules --
43# -----------------------------
44from lsst.daf.butler import (
45 Butler,
46 CollectionSearch,
47 CollectionType,
48 Registry,
49)
50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
51import lsst.pex.config as pexConfig
52from lsst.pipe.base import (buildExecutionButler, GraphBuilder, Pipeline,
53 PipelineDatasetTypes, QuantumGraph, TaskDef)
54from lsst.obs.base import Instrument
55from .dotTools import graph2dot, pipeline2dot
56from .executionGraphFixup import ExecutionGraphFixup
57from .mpGraphExecutor import MPGraphExecutor
58from .preExecInit import PreExecInit
59from .singleQuantumExecutor import SingleQuantumExecutor
60from . import util
61from lsst.utils import doImport
63# ----------------------------------
64# Local non-exported definitions --
65# ----------------------------------
67_LOG = logging.getLogger(__name__)
70class _OutputChainedCollectionInfo:
71 """A helper class for handling command-line arguments related to an output
72 `~lsst.daf.butler.CollectionType.CHAINED` collection.
74 Parameters
75 ----------
76 registry : `lsst.daf.butler.Registry`
77 Butler registry that collections will be added to and/or queried from.
78 name : `str`
79 Name of the collection given on the command line.
80 """
81 def __init__(self, registry: Registry, name: str):
82 self.name = name
83 try:
84 self.chain = tuple(registry.getCollectionChain(name))
85 self.exists = True
86 except MissingCollectionError:
87 self.chain = ()
88 self.exists = False
90 def __str__(self):
91 return self.name
93 name: str
94 """Name of the collection provided on the command line (`str`).
95 """
97 exists: bool
98 """Whether this collection already exists in the registry (`bool`).
99 """
101 chain: Tuple[str, ...]
102 """The definition of the collection, if it already exists (`tuple`[`str`]).
104 Empty if the collection does not already exist.
105 """
108class _OutputRunCollectionInfo:
109 """A helper class for handling command-line arguments related to an output
110 `~lsst.daf.butler.CollectionType.RUN` collection.
112 Parameters
113 ----------
114 registry : `lsst.daf.butler.Registry`
115 Butler registry that collections will be added to and/or queried from.
116 name : `str`
117 Name of the collection given on the command line.
118 """
119 def __init__(self, registry: Registry, name: str):
120 self.name = name
121 try:
122 actualType = registry.getCollectionType(name)
123 if actualType is not CollectionType.RUN:
124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
125 self.exists = True
126 except MissingCollectionError:
127 self.exists = False
129 name: str
130 """Name of the collection provided on the command line (`str`).
131 """
133 exists: bool
134 """Whether this collection already exists in the registry (`bool`).
135 """
138class _ButlerFactory:
139 """A helper class for processing command-line arguments related to input
140 and output collections.
142 Parameters
143 ----------
144 registry : `lsst.daf.butler.Registry`
145 Butler registry that collections will be added to and/or queried from.
147 args : `types.SimpleNamespace`
148 Parsed command-line arguments. The following attributes are used,
149 either at construction or in later methods.
151 ``output``
152 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
153 input/output collection.
155 ``output_run``
156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
157 collection.
159 ``extend_run``
160 A boolean indicating whether ``output_run`` should already exist
161 and be extended.
163 ``replace_run``
164 A boolean indicating that (if `True`) ``output_run`` should already
165 exist but will be removed from the output chained collection and
166 replaced with a new one.
168 ``prune_replaced``
169 A boolean indicating whether to prune the replaced run (requires
170 ``replace_run``).
172 ``inputs``
173 Input collections of any type; may be any type handled by
174 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
176 ``butler_config``
177 Path to a data repository root or configuration file.
179 writeable : `bool`
180 If `True`, a `Butler` is being initialized in a context where actual
181 writes should happens, and hence no output run is necessary.
183 Raises
184 ------
185 ValueError
186 Raised if ``writeable is True`` but there are no output collections.
187 """
188 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
189 if args.output is not None:
190 self.output = _OutputChainedCollectionInfo(registry, args.output)
191 else:
192 self.output = None
193 if args.output_run is not None:
194 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
195 elif self.output is not None:
196 if args.extend_run:
197 if not self.output.chain:
198 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
199 runName = self.output.chain[0]
200 else:
201 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp())
202 self.outputRun = _OutputRunCollectionInfo(registry, runName)
203 elif not writeable:
204 # If we're not writing yet, ok to have no output run.
205 self.outputRun = None
206 else:
207 raise ValueError("Cannot write without at least one of (--output, --output-run).")
208 # Recursively flatten any input CHAINED collections. We do this up
209 # front so we can tell if the user passes the same inputs on subsequent
210 # calls, even though we also flatten when we define the output CHAINED
211 # collection.
212 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
214 def check(self, args: argparse.Namespace):
215 """Check command-line options for consistency with each other and the
216 data repository.
218 Parameters
219 ----------
220 args : `types.SimpleNamespace`
221 Parsed command-line arguments. See class documentation for the
222 construction parameter of the same name.
223 """
224 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
225 if self.inputs and self.output is not None and self.output.exists:
226 # Passing the same inputs that were used to initialize the output
227 # collection is allowed; this means they must _end_ with the same
228 # collections, because we push new runs to the front of the chain.
229 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]):
230 if c1 != c2:
231 raise ValueError(
232 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
233 "a different sequence of input collections than those given: "
234 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
235 f"{self.output.name}={self.output.chain}."
236 )
237 if len(self.inputs) > len(self.output.chain):
238 nNew = len(self.inputs) - len(self.output.chain)
239 raise ValueError(
240 f"Cannot add new input collections {self.inputs[:nNew]} after "
241 "output collection is first created."
242 )
243 if args.extend_run and self.outputRun is None:
244 raise ValueError("Cannot --extend-run when no output collection is given.")
245 if args.extend_run and not self.outputRun.exists:
246 raise ValueError(f"Cannot --extend-run; output collection "
247 f"'{self.outputRun.name}' does not exist.")
248 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
249 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but "
250 f"--extend-run was not given.")
251 if args.prune_replaced and not args.replace_run:
252 raise ValueError("--prune-replaced requires --replace-run.")
253 if args.replace_run and (self.output is None or not self.output.exists):
254 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
256 @classmethod
257 def _makeReadParts(cls, args: argparse.Namespace):
258 """Common implementation for `makeReadButler` and
259 `makeRegistryAndCollections`.
261 Parameters
262 ----------
263 args : `types.SimpleNamespace`
264 Parsed command-line arguments. See class documentation for the
265 construction parameter of the same name.
267 Returns
268 -------
269 butler : `lsst.daf.butler.Butler`
270 A read-only butler constructed from the repo at
271 ``args.butler_config``, but with no default collections.
272 inputs : `lsst.daf.butler.registry.CollectionSearch`
273 A collection search path constructed according to ``args``.
274 self : `_ButlerFactory`
275 A new `_ButlerFactory` instance representing the processed version
276 of ``args``.
277 """
278 butler = Butler(args.butler_config, writeable=False)
279 self = cls(butler.registry, args, writeable=False)
280 self.check(args)
281 if self.output and self.output.exists:
282 if args.replace_run:
283 replaced = self.output.chain[0]
284 inputs = self.output.chain[1:]
285 _LOG.debug("Simulating collection search in '%s' after removing '%s'.",
286 self.output.name, replaced)
287 else:
288 inputs = [self.output.name]
289 else:
290 inputs = list(self.inputs)
291 if args.extend_run:
292 inputs.insert(0, self.outputRun.name)
293 inputs = CollectionSearch.fromExpression(inputs)
294 return butler, inputs, self
296 @classmethod
297 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
298 """Construct a read-only butler according to the given command-line
299 arguments.
301 Parameters
302 ----------
303 args : `types.SimpleNamespace`
304 Parsed command-line arguments. See class documentation for the
305 construction parameter of the same name.
307 Returns
308 -------
309 butler : `lsst.daf.butler.Butler`
310 A read-only butler initialized with the collections specified by
311 ``args``.
312 """
313 butler, inputs, _ = cls._makeReadParts(args)
314 _LOG.debug("Preparing butler to read from %s.", inputs)
315 return Butler(butler=butler, collections=inputs)
317 @classmethod
318 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \
319 Tuple[Registry, CollectionSearch, Optional[str]]:
320 """Return a read-only registry, a collection search path, and the name
321 of the run to be used for future writes.
323 Parameters
324 ----------
325 args : `types.SimpleNamespace`
326 Parsed command-line arguments. See class documentation for the
327 construction parameter of the same name.
329 Returns
330 -------
331 registry : `lsst.daf.butler.Registry`
332 Butler registry that collections will be added to and/or queried
333 from.
334 inputs : `lsst.daf.butler.registry.CollectionSearch`
335 Collections to search for datasets.
336 run : `str` or `None`
337 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
338 if it already exists, or `None` if it does not.
339 """
340 butler, inputs, self = cls._makeReadParts(args)
341 run = self.outputRun.name if args.extend_run else None
342 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
343 return butler.registry, inputs, run
345 @classmethod
346 def makeWriteButler(cls, args: argparse.Namespace,
347 taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler:
348 """Return a read-write butler initialized to write to and read from
349 the collections specified by the given command-line arguments.
351 Parameters
352 ----------
353 args : `types.SimpleNamespace`
354 Parsed command-line arguments. See class documentation for the
355 construction parameter of the same name.
356 taskDefs : iterable of `TaskDef`, optional
357 Definitions for tasks in a pipeline. This argument is only needed
358 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
359 "unstore".
361 Returns
362 -------
363 butler : `lsst.daf.butler.Butler`
364 A read-write butler initialized according to the given arguments.
365 """
366 butler = Butler(args.butler_config, writeable=True)
367 self = cls(butler.registry, args, writeable=True)
368 self.check(args)
369 if self.output is not None:
370 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
371 if args.replace_run:
372 replaced = chainDefinition.pop(0)
373 if args.prune_replaced == "unstore":
374 # Remove datasets from datastore
375 with butler.transaction():
376 refs = butler.registry.queryDatasets(..., collections=replaced)
377 # we want to remove regular outputs but keep
378 # initOutputs, configs, and versions.
379 if taskDefs is not None:
380 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
381 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
382 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False)
383 elif args.prune_replaced == "purge":
384 # Erase entire collection and all datasets, need to remove
385 # collection from its chain collection first.
386 with butler.transaction():
387 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
388 butler.pruneCollection(replaced, purge=True, unstore=True)
389 elif args.prune_replaced is not None:
390 raise NotImplementedError(
391 f"Unsupported --prune-replaced option '{args.prune_replaced}'."
392 )
393 if not self.output.exists:
394 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
395 if not args.extend_run:
396 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
397 chainDefinition.insert(0, self.outputRun.name)
398 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
399 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s",
400 self.outputRun.name, self.output.name, chainDefinition)
401 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
402 else:
403 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs)
404 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
405 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
406 return butler
408 output: Optional[_OutputChainedCollectionInfo]
409 """Information about the output chained collection, if there is or will be
410 one (`_OutputChainedCollectionInfo` or `None`).
411 """
413 outputRun: Optional[_OutputRunCollectionInfo]
414 """Information about the output run collection, if there is or will be
415 one (`_OutputRunCollectionInfo` or `None`).
416 """
418 inputs: Tuple[str, ...]
419 """Input collections provided directly by the user (`tuple` [ `str` ]).
420 """
423class _FilteredStream:
424 """A file-like object that filters some config fields.
426 Note
427 ----
428 This class depends on implementation details of ``Config.saveToStream``
429 methods, in particular that that method uses single call to write()
430 method to save information about single config field, and that call
431 combines comments string(s) for a field and field path and value.
432 This class will not work reliably on the "import" strings, so imports
433 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
434 """
435 def __init__(self, pattern):
436 # obey case if pattern isn't lowercase or requests NOIGNORECASE
437 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
439 if mat:
440 pattern = mat.group(1)
441 self._pattern = re.compile(fnmatch.translate(pattern))
442 else:
443 if pattern != pattern.lower():
444 print(f"Matching \"{pattern}\" without regard to case "
445 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
446 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
448 def write(self, showStr):
449 # Strip off doc string line(s) and cut off at "=" for string matching
450 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
451 if self._pattern.search(matchStr):
452 sys.stdout.write(showStr)
454# ------------------------
455# Exported definitions --
456# ------------------------
459class CmdLineFwk:
460 """PipelineTask framework which executes tasks from command line.
462 In addition to executing tasks this activator provides additional methods
463 for task management like dumping configuration or execution chain.
464 """
466 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
468 def __init__(self):
469 pass
471 def makePipeline(self, args):
472 """Build a pipeline from command line arguments.
474 Parameters
475 ----------
476 args : `types.SimpleNamespace`
477 Parsed command line
479 Returns
480 -------
481 pipeline : `~lsst.pipe.base.Pipeline`
482 """
483 if args.pipeline:
484 pipeline = Pipeline.from_uri(args.pipeline)
485 else:
486 pipeline = Pipeline("anonymous")
488 # loop over all pipeline actions and apply them in order
489 for action in args.pipeline_actions:
490 if action.action == "add_instrument":
492 pipeline.addInstrument(action.value)
494 elif action.action == "new_task":
496 pipeline.addTask(action.value, action.label)
498 elif action.action == "delete_task":
500 pipeline.removeTask(action.label)
502 elif action.action == "config":
504 # action value string is "field=value", split it at '='
505 field, _, value = action.value.partition("=")
506 pipeline.addConfigOverride(action.label, field, value)
508 elif action.action == "configfile":
510 pipeline.addConfigFile(action.label, action.value)
512 else:
514 raise ValueError(f"Unexpected pipeline action: {action.action}")
516 if args.save_pipeline:
517 pipeline.write_to_uri(args.save_pipeline)
519 if args.pipeline_dot:
520 pipeline2dot(pipeline, args.pipeline_dot)
522 return pipeline
524 def makeGraph(self, pipeline, args):
525 """Build a graph from command line arguments.
527 Parameters
528 ----------
529 pipeline : `~lsst.pipe.base.Pipeline`
530 Pipeline, can be empty or ``None`` if graph is read from a file.
531 args : `types.SimpleNamespace`
532 Parsed command line
534 Returns
535 -------
536 graph : `~lsst.pipe.base.QuantumGraph` or `None`
537 If resulting graph is empty then `None` is returned.
538 """
540 # make sure that --extend-run always enables --skip-existing
541 if args.extend_run:
542 args.skip_existing = True
544 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
546 if args.skip_existing and run:
547 args.skip_existing_in += (run,)
549 if args.qgraph:
550 # click passes empty tuple as default value for qgraph_node_id
551 nodes = args.qgraph_node_id or None
552 qgraph = QuantumGraph.loadUri(args.qgraph, registry.dimensions,
553 nodes=nodes, graphID=args.qgraph_id)
555 # pipeline can not be provided in this case
556 if pipeline:
557 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
559 else:
561 # make execution plan (a.k.a. DAG) for pipeline
562 graphBuilder = GraphBuilder(registry,
563 skipExistingIn=args.skip_existing_in,
564 clobberOutputs=args.clobber_outputs)
565 # accumulate metadata
566 metadata = {"input": args.input, "output": args.output, "butler_argument": args.butler_config,
567 "output_run": args.output_run, "extend_run": args.extend_run,
568 "skip_existing_in": args.skip_existing_in, "skip_existing": args.skip_existing,
569 "data_query": args.data_query, "user": getpass.getuser(),
570 "time": f"{datetime.datetime.now()}"}
571 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query, metadata=metadata,
572 datasetQueryConstraint=args.dataset_query_constraint)
574 # Count quanta in graph and give a warning if it's empty and return
575 # None.
576 nQuanta = len(qgraph)
577 if nQuanta == 0:
578 warnings.warn("QuantumGraph is empty", stacklevel=2)
579 return None
580 else:
581 _LOG.info("QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
582 nQuanta, len(qgraph.taskGraph), qgraph.graphID)
584 if args.save_qgraph:
585 qgraph.saveUri(args.save_qgraph)
587 if args.save_single_quanta:
588 for quantumNode in qgraph:
589 sqgraph = qgraph.subset(quantumNode)
590 uri = args.save_single_quanta.format(quantumNode.nodeId.number)
591 sqgraph.saveUri(uri)
593 if args.qgraph_dot:
594 graph2dot(qgraph, args.qgraph_dot)
596 if args.execution_butler_location:
597 butler = Butler(args.butler_config)
598 newArgs = copy.deepcopy(args)
600 def builderShim(butler):
601 newArgs.butler_config = butler._config
602 # Calling makeWriteButler is done for the side effects of
603 # calling that method, maining parsing all the args into
604 # collection names, creating collections, etc.
605 newButler = _ButlerFactory.makeWriteButler(newArgs)
606 return newButler
608 # Include output collection in collections for input
609 # files if it exists in the repo.
610 all_inputs = args.input
611 if args.output is not None:
612 try:
613 all_inputs += (next(iter(butler.registry.queryCollections(args.output))), )
614 except MissingCollectionError:
615 pass
617 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
618 buildExecutionButler(butler, qgraph, args.execution_butler_location, run,
619 butlerModifier=builderShim, collections=all_inputs,
620 clobber=args.clobber_execution_butler)
622 return qgraph
624 def runPipeline(self, graph, taskFactory, args, butler=None):
625 """Execute complete QuantumGraph.
627 Parameters
628 ----------
629 graph : `QuantumGraph`
630 Execution graph.
631 taskFactory : `~lsst.pipe.base.TaskFactory`
632 Task factory
633 args : `types.SimpleNamespace`
634 Parsed command line
635 butler : `~lsst.daf.butler.Butler`, optional
636 Data Butler instance, if not defined then new instance is made
637 using command line options.
638 """
639 # make sure that --extend-run always enables --skip-existing
640 if args.extend_run:
641 args.skip_existing = True
643 # make butler instance
644 if butler is None:
645 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
647 if args.skip_existing:
648 args.skip_existing_in += (butler.run, )
650 # Enable lsstDebug debugging. Note that this is done once in the
651 # main process before PreExecInit and it is also repeated before
652 # running each task in SingleQuantumExecutor (which may not be
653 # needed if `multipocessing` always uses fork start method).
654 if args.enableLsstDebug:
655 try:
656 _LOG.debug("Will try to import debug.py")
657 import debug # noqa:F401
658 except ImportError:
659 _LOG.warn("No 'debug' module found.")
661 # Save all InitOutputs, configs, etc.
662 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
663 preExecInit.initialize(graph,
664 saveInitOutputs=not args.skip_init_writes,
665 registerDatasetTypes=args.register_dataset_types,
666 saveVersions=not args.no_versions)
668 if not args.init_only:
669 graphFixup = self._importGraphFixup(args)
670 quantumExecutor = SingleQuantumExecutor(taskFactory,
671 skipExistingIn=args.skip_existing_in,
672 clobberOutputs=args.clobber_outputs,
673 enableLsstDebug=args.enableLsstDebug,
674 exitOnKnownError=args.fail_fast)
675 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
676 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout,
677 startMethod=args.start_method,
678 quantumExecutor=quantumExecutor,
679 failFast=args.fail_fast,
680 executionGraphFixup=graphFixup)
681 with util.profile(args.profile, _LOG):
682 executor.execute(graph, butler)
684 def showInfo(self, args, pipeline, graph=None):
685 """Display useful info about pipeline and environment.
687 Parameters
688 ----------
689 args : `types.SimpleNamespace`
690 Parsed command line
691 pipeline : `Pipeline`
692 Pipeline definition
693 graph : `QuantumGraph`, optional
694 Execution graph
695 """
696 showOpts = args.show
697 for what in showOpts:
698 showCommand, _, showArgs = what.partition("=")
700 if showCommand in ["pipeline", "config", "history", "tasks"]:
701 if not pipeline:
702 _LOG.warning("Pipeline is required for --show=%s", showCommand)
703 continue
705 if showCommand in ["graph", "workflow", "uri"]:
706 if not graph:
707 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
708 continue
710 if showCommand == "pipeline":
711 print(pipeline)
712 elif showCommand == "config":
713 self._showConfig(pipeline, showArgs, False)
714 elif showCommand == "dump-config":
715 self._showConfig(pipeline, showArgs, True)
716 elif showCommand == "history":
717 self._showConfigHistory(pipeline, showArgs)
718 elif showCommand == "tasks":
719 self._showTaskHierarchy(pipeline)
720 elif showCommand == "graph":
721 if graph:
722 self._showGraph(graph)
723 elif showCommand == "uri":
724 if graph:
725 self._showUri(graph, args)
726 elif showCommand == "workflow":
727 if graph:
728 self._showWorkflow(graph, args)
729 else:
730 print("Unknown value for show: %s (choose from '%s')" %
731 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
732 file=sys.stderr)
733 sys.exit(1)
735 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
736 """Show task configuration
738 Parameters
739 ----------
740 pipeline : `Pipeline`
741 Pipeline definition
742 showArgs : `str`
743 Defines what to show
744 dumpFullConfig : `bool`
745 If true then dump complete task configuration with all imports.
746 """
747 stream = sys.stdout
748 if dumpFullConfig:
749 # Task label can be given with this option
750 taskName = showArgs
751 else:
752 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
753 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
754 taskName = matConfig.group(1)
755 pattern = matConfig.group(2)
756 if pattern:
757 stream = _FilteredStream(pattern)
759 tasks = util.filterTasks(pipeline, taskName)
760 if not tasks:
761 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
762 sys.exit(1)
764 for taskDef in tasks:
765 print("### Configuration for task `{}'".format(taskDef.label))
766 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
768 def _showConfigHistory(self, pipeline, showArgs):
769 """Show history for task configuration
771 Parameters
772 ----------
773 pipeline : `Pipeline`
774 Pipeline definition
775 showArgs : `str`
776 Defines what to show
777 """
779 taskName = None
780 pattern = None
781 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
782 if matHistory:
783 taskName = matHistory.group(1)
784 pattern = matHistory.group(2)
785 if not pattern:
786 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
787 sys.exit(1)
789 tasks = util.filterTasks(pipeline, taskName)
790 if not tasks:
791 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
792 sys.exit(1)
794 found = False
795 for taskDef in tasks:
797 config = taskDef.config
799 # Look for any matches in the config hierarchy for this name
800 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
801 if nmatch > 0:
802 print("")
804 cpath, _, cname = thisName.rpartition(".")
805 try:
806 if not cpath:
807 # looking for top-level field
808 hconfig = taskDef.config
809 else:
810 hconfig = eval("config." + cpath, {}, {"config": config})
811 except AttributeError:
812 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
813 file=sys.stderr)
814 hconfig = None
816 # Sometimes we end up with a non-Config so skip those
817 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \
818 hasattr(hconfig, cname):
819 print(f"### Configuration field for task `{taskDef.label}'")
820 print(pexConfig.history.format(hconfig, cname))
821 found = True
823 if not found:
824 print(f"None of the tasks has field matching {pattern}", file=sys.stderr)
825 sys.exit(1)
827 def _showTaskHierarchy(self, pipeline):
828 """Print task hierarchy to stdout
830 Parameters
831 ----------
832 pipeline: `Pipeline`
833 """
834 for taskDef in pipeline.toExpandedPipeline():
835 print("### Subtasks for task `{}'".format(taskDef.taskName))
837 for configName, taskName in util.subTaskIter(taskDef.config):
838 print("{}: {}".format(configName, taskName))
840 def _showGraph(self, graph):
841 """Print quanta information to stdout
843 Parameters
844 ----------
845 graph : `QuantumGraph`
846 Execution graph.
847 """
848 for taskNode in graph.taskGraph:
849 print(taskNode)
851 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
852 print(" Quantum {}:".format(iq))
853 print(" inputs:")
854 for key, refs in quantum.inputs.items():
855 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
856 print(" {}: [{}]".format(key, ", ".join(dataIds)))
857 print(" outputs:")
858 for key, refs in quantum.outputs.items():
859 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
860 print(" {}: [{}]".format(key, ", ".join(dataIds)))
862 def _showWorkflow(self, graph, args):
863 """Print quanta information and dependency to stdout
865 Parameters
866 ----------
867 graph : `QuantumGraph`
868 Execution graph.
869 args : `types.SimpleNamespace`
870 Parsed command line
871 """
872 for node in graph:
873 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}")
874 for parent in graph.determineInputsToQuantumNode(node):
875 print(f"Parent Quantum {parent.nodeId.number} - Child Quantum {node.nodeId.number}")
877 def _showUri(self, graph, args):
878 """Print input and predicted output URIs to stdout
880 Parameters
881 ----------
882 graph : `QuantumGraph`
883 Execution graph
884 args : `types.SimpleNamespace`
885 Parsed command line
886 """
887 def dumpURIs(thisRef):
888 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
889 if primary:
890 print(f" {primary}")
891 else:
892 print(" (disassembled artifact)")
893 for compName, compUri in components.items():
894 print(f" {compName}: {compUri}")
896 butler = _ButlerFactory.makeReadButler(args)
897 for node in graph:
898 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}")
899 print(" inputs:")
900 for key, refs in node.quantum.inputs.items():
901 for ref in refs:
902 dumpURIs(ref)
903 print(" outputs:")
904 for key, refs in node.quantum.outputs.items():
905 for ref in refs:
906 dumpURIs(ref)
908 def _importGraphFixup(self, args):
909 """Import/instantiate graph fixup object.
911 Parameters
912 ----------
913 args : `types.SimpleNamespace`
914 Parsed command line.
916 Returns
917 -------
918 fixup : `ExecutionGraphFixup` or `None`
920 Raises
921 ------
922 ValueError
923 Raised if import fails, method call raises exception, or returned
924 instance has unexpected type.
925 """
926 if args.graph_fixup:
927 try:
928 factory = doImport(args.graph_fixup)
929 except Exception as exc:
930 raise ValueError("Failed to import graph fixup class/method") from exc
931 try:
932 fixup = factory()
933 except Exception as exc:
934 raise ValueError("Failed to make instance of graph fixup") from exc
935 if not isinstance(fixup, ExecutionGraphFixup):
936 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
937 return fixup