Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ["CmdLineFwk"]
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import copy
32import datetime
33import fnmatch
34import getpass
35import logging
36import re
37import sys
38import warnings
39from typing import Iterable, Optional, Tuple
41import lsst.pex.config as pexConfig
43# -----------------------------
44# Imports for other modules --
45# -----------------------------
46from lsst.daf.butler import Butler, CollectionSearch, CollectionType, Registry
47from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
48from lsst.obs.base import Instrument
49from lsst.pipe.base import (
50 GraphBuilder,
51 Pipeline,
52 PipelineDatasetTypes,
53 QuantumGraph,
54 TaskDef,
55 buildExecutionButler,
56)
57from lsst.utils import doImport
59from . import util
60from .dotTools import graph2dot, pipeline2dot
61from .executionGraphFixup import ExecutionGraphFixup
62from .mpGraphExecutor import MPGraphExecutor
63from .preExecInit import PreExecInit
64from .singleQuantumExecutor import SingleQuantumExecutor
66# ----------------------------------
67# Local non-exported definitions --
68# ----------------------------------
70_LOG = logging.getLogger(__name__)
73class _OutputChainedCollectionInfo:
74 """A helper class for handling command-line arguments related to an output
75 `~lsst.daf.butler.CollectionType.CHAINED` collection.
77 Parameters
78 ----------
79 registry : `lsst.daf.butler.Registry`
80 Butler registry that collections will be added to and/or queried from.
81 name : `str`
82 Name of the collection given on the command line.
83 """
85 def __init__(self, registry: Registry, name: str):
86 self.name = name
87 try:
88 self.chain = tuple(registry.getCollectionChain(name))
89 self.exists = True
90 except MissingCollectionError:
91 self.chain = ()
92 self.exists = False
94 def __str__(self):
95 return self.name
97 name: str
98 """Name of the collection provided on the command line (`str`).
99 """
101 exists: bool
102 """Whether this collection already exists in the registry (`bool`).
103 """
105 chain: Tuple[str, ...]
106 """The definition of the collection, if it already exists (`tuple`[`str`]).
108 Empty if the collection does not already exist.
109 """
112class _OutputRunCollectionInfo:
113 """A helper class for handling command-line arguments related to an output
114 `~lsst.daf.butler.CollectionType.RUN` collection.
116 Parameters
117 ----------
118 registry : `lsst.daf.butler.Registry`
119 Butler registry that collections will be added to and/or queried from.
120 name : `str`
121 Name of the collection given on the command line.
122 """
124 def __init__(self, registry: Registry, name: str):
125 self.name = name
126 try:
127 actualType = registry.getCollectionType(name)
128 if actualType is not CollectionType.RUN:
129 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
130 self.exists = True
131 except MissingCollectionError:
132 self.exists = False
134 name: str
135 """Name of the collection provided on the command line (`str`).
136 """
138 exists: bool
139 """Whether this collection already exists in the registry (`bool`).
140 """
143class _ButlerFactory:
144 """A helper class for processing command-line arguments related to input
145 and output collections.
147 Parameters
148 ----------
149 registry : `lsst.daf.butler.Registry`
150 Butler registry that collections will be added to and/or queried from.
152 args : `types.SimpleNamespace`
153 Parsed command-line arguments. The following attributes are used,
154 either at construction or in later methods.
156 ``output``
157 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
158 input/output collection.
160 ``output_run``
161 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
162 collection.
164 ``extend_run``
165 A boolean indicating whether ``output_run`` should already exist
166 and be extended.
168 ``replace_run``
169 A boolean indicating that (if `True`) ``output_run`` should already
170 exist but will be removed from the output chained collection and
171 replaced with a new one.
173 ``prune_replaced``
174 A boolean indicating whether to prune the replaced run (requires
175 ``replace_run``).
177 ``inputs``
178 Input collections of any type; may be any type handled by
179 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
181 ``butler_config``
182 Path to a data repository root or configuration file.
184 writeable : `bool`
185 If `True`, a `Butler` is being initialized in a context where actual
186 writes should happens, and hence no output run is necessary.
188 Raises
189 ------
190 ValueError
191 Raised if ``writeable is True`` but there are no output collections.
192 """
194 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
195 if args.output is not None:
196 self.output = _OutputChainedCollectionInfo(registry, args.output)
197 else:
198 self.output = None
199 if args.output_run is not None:
200 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
201 elif self.output is not None:
202 if args.extend_run:
203 if not self.output.chain:
204 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
205 runName = self.output.chain[0]
206 else:
207 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp())
208 self.outputRun = _OutputRunCollectionInfo(registry, runName)
209 elif not writeable:
210 # If we're not writing yet, ok to have no output run.
211 self.outputRun = None
212 else:
213 raise ValueError("Cannot write without at least one of (--output, --output-run).")
214 # Recursively flatten any input CHAINED collections. We do this up
215 # front so we can tell if the user passes the same inputs on subsequent
216 # calls, even though we also flatten when we define the output CHAINED
217 # collection.
218 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
220 def check(self, args: argparse.Namespace):
221 """Check command-line options for consistency with each other and the
222 data repository.
224 Parameters
225 ----------
226 args : `types.SimpleNamespace`
227 Parsed command-line arguments. See class documentation for the
228 construction parameter of the same name.
229 """
230 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
231 if self.inputs and self.output is not None and self.output.exists:
232 # Passing the same inputs that were used to initialize the output
233 # collection is allowed; this means they must _end_ with the same
234 # collections, because we push new runs to the front of the chain.
235 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]):
236 if c1 != c2:
237 raise ValueError(
238 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
239 "a different sequence of input collections than those given: "
240 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
241 f"{self.output.name}={self.output.chain}."
242 )
243 if len(self.inputs) > len(self.output.chain):
244 nNew = len(self.inputs) - len(self.output.chain)
245 raise ValueError(
246 f"Cannot add new input collections {self.inputs[:nNew]} after "
247 "output collection is first created."
248 )
249 if args.extend_run and self.outputRun is None:
250 raise ValueError("Cannot --extend-run when no output collection is given.")
251 if args.extend_run and not self.outputRun.exists:
252 raise ValueError(
253 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
254 )
255 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
256 raise ValueError(
257 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
258 )
259 if args.prune_replaced and not args.replace_run:
260 raise ValueError("--prune-replaced requires --replace-run.")
261 if args.replace_run and (self.output is None or not self.output.exists):
262 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
264 @classmethod
265 def _makeReadParts(cls, args: argparse.Namespace):
266 """Common implementation for `makeReadButler` and
267 `makeRegistryAndCollections`.
269 Parameters
270 ----------
271 args : `types.SimpleNamespace`
272 Parsed command-line arguments. See class documentation for the
273 construction parameter of the same name.
275 Returns
276 -------
277 butler : `lsst.daf.butler.Butler`
278 A read-only butler constructed from the repo at
279 ``args.butler_config``, but with no default collections.
280 inputs : `lsst.daf.butler.registry.CollectionSearch`
281 A collection search path constructed according to ``args``.
282 self : `_ButlerFactory`
283 A new `_ButlerFactory` instance representing the processed version
284 of ``args``.
285 """
286 butler = Butler(args.butler_config, writeable=False)
287 self = cls(butler.registry, args, writeable=False)
288 self.check(args)
289 if self.output and self.output.exists:
290 if args.replace_run:
291 replaced = self.output.chain[0]
292 inputs = self.output.chain[1:]
293 _LOG.debug(
294 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
295 )
296 else:
297 inputs = [self.output.name]
298 else:
299 inputs = list(self.inputs)
300 if args.extend_run:
301 inputs.insert(0, self.outputRun.name)
302 inputs = CollectionSearch.fromExpression(inputs)
303 return butler, inputs, self
305 @classmethod
306 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
307 """Construct a read-only butler according to the given command-line
308 arguments.
310 Parameters
311 ----------
312 args : `types.SimpleNamespace`
313 Parsed command-line arguments. See class documentation for the
314 construction parameter of the same name.
316 Returns
317 -------
318 butler : `lsst.daf.butler.Butler`
319 A read-only butler initialized with the collections specified by
320 ``args``.
321 """
322 butler, inputs, _ = cls._makeReadParts(args)
323 _LOG.debug("Preparing butler to read from %s.", inputs)
324 return Butler(butler=butler, collections=inputs)
326 @classmethod
327 def makeRegistryAndCollections(
328 cls, args: argparse.Namespace
329 ) -> Tuple[Registry, CollectionSearch, Optional[str]]:
330 """Return a read-only registry, a collection search path, and the name
331 of the run to be used for future writes.
333 Parameters
334 ----------
335 args : `types.SimpleNamespace`
336 Parsed command-line arguments. See class documentation for the
337 construction parameter of the same name.
339 Returns
340 -------
341 registry : `lsst.daf.butler.Registry`
342 Butler registry that collections will be added to and/or queried
343 from.
344 inputs : `lsst.daf.butler.registry.CollectionSearch`
345 Collections to search for datasets.
346 run : `str` or `None`
347 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
348 if it already exists, or `None` if it does not.
349 """
350 butler, inputs, self = cls._makeReadParts(args)
351 run = self.outputRun.name if args.extend_run else None
352 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
353 return butler.registry, inputs, run
355 @classmethod
356 def makeWriteButler(
357 cls, args: argparse.Namespace, taskDefs: Optional[Iterable[TaskDef]] = None
358 ) -> Butler:
359 """Return a read-write butler initialized to write to and read from
360 the collections specified by the given command-line arguments.
362 Parameters
363 ----------
364 args : `types.SimpleNamespace`
365 Parsed command-line arguments. See class documentation for the
366 construction parameter of the same name.
367 taskDefs : iterable of `TaskDef`, optional
368 Definitions for tasks in a pipeline. This argument is only needed
369 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
370 "unstore".
372 Returns
373 -------
374 butler : `lsst.daf.butler.Butler`
375 A read-write butler initialized according to the given arguments.
376 """
377 butler = Butler(args.butler_config, writeable=True)
378 self = cls(butler.registry, args, writeable=True)
379 self.check(args)
380 if self.output is not None:
381 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
382 if args.replace_run:
383 replaced = chainDefinition.pop(0)
384 if args.prune_replaced == "unstore":
385 # Remove datasets from datastore
386 with butler.transaction():
387 refs = butler.registry.queryDatasets(..., collections=replaced)
388 # we want to remove regular outputs but keep
389 # initOutputs, configs, and versions.
390 if taskDefs is not None:
391 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
392 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
393 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False)
394 elif args.prune_replaced == "purge":
395 # Erase entire collection and all datasets, need to remove
396 # collection from its chain collection first.
397 with butler.transaction():
398 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
399 butler.pruneCollection(replaced, purge=True, unstore=True)
400 elif args.prune_replaced is not None:
401 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
402 if not self.output.exists:
403 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
404 if not args.extend_run:
405 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
406 chainDefinition.insert(0, self.outputRun.name)
407 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
408 _LOG.debug(
409 "Preparing butler to write to '%s' and read from '%s'=%s",
410 self.outputRun.name,
411 self.output.name,
412 chainDefinition,
413 )
414 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
415 else:
416 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs)
417 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
418 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
419 return butler
421 output: Optional[_OutputChainedCollectionInfo]
422 """Information about the output chained collection, if there is or will be
423 one (`_OutputChainedCollectionInfo` or `None`).
424 """
426 outputRun: Optional[_OutputRunCollectionInfo]
427 """Information about the output run collection, if there is or will be
428 one (`_OutputRunCollectionInfo` or `None`).
429 """
431 inputs: Tuple[str, ...]
432 """Input collections provided directly by the user (`tuple` [ `str` ]).
433 """
436class _FilteredStream:
437 """A file-like object that filters some config fields.
439 Note
440 ----
441 This class depends on implementation details of ``Config.saveToStream``
442 methods, in particular that that method uses single call to write()
443 method to save information about single config field, and that call
444 combines comments string(s) for a field and field path and value.
445 This class will not work reliably on the "import" strings, so imports
446 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
447 """
449 def __init__(self, pattern):
450 # obey case if pattern isn't lowercase or requests NOIGNORECASE
451 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
453 if mat:
454 pattern = mat.group(1)
455 self._pattern = re.compile(fnmatch.translate(pattern))
456 else:
457 if pattern != pattern.lower():
458 print(
459 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)",
460 file=sys.stdout,
461 )
462 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
464 def write(self, showStr):
465 # Strip off doc string line(s) and cut off at "=" for string matching
466 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
467 if self._pattern.search(matchStr):
468 sys.stdout.write(showStr)
471# ------------------------
472# Exported definitions --
473# ------------------------
476class CmdLineFwk:
477 """PipelineTask framework which executes tasks from command line.
479 In addition to executing tasks this activator provides additional methods
480 for task management like dumping configuration or execution chain.
481 """
483 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
485 def __init__(self):
486 pass
488 def makePipeline(self, args):
489 """Build a pipeline from command line arguments.
491 Parameters
492 ----------
493 args : `types.SimpleNamespace`
494 Parsed command line
496 Returns
497 -------
498 pipeline : `~lsst.pipe.base.Pipeline`
499 """
500 if args.pipeline:
501 pipeline = Pipeline.from_uri(args.pipeline)
502 else:
503 pipeline = Pipeline("anonymous")
505 # loop over all pipeline actions and apply them in order
506 for action in args.pipeline_actions:
507 if action.action == "add_instrument":
509 pipeline.addInstrument(action.value)
511 elif action.action == "new_task":
513 pipeline.addTask(action.value, action.label)
515 elif action.action == "delete_task":
517 pipeline.removeTask(action.label)
519 elif action.action == "config":
521 # action value string is "field=value", split it at '='
522 field, _, value = action.value.partition("=")
523 pipeline.addConfigOverride(action.label, field, value)
525 elif action.action == "configfile":
527 pipeline.addConfigFile(action.label, action.value)
529 else:
531 raise ValueError(f"Unexpected pipeline action: {action.action}")
533 if args.save_pipeline:
534 pipeline.write_to_uri(args.save_pipeline)
536 if args.pipeline_dot:
537 pipeline2dot(pipeline, args.pipeline_dot)
539 return pipeline
541 def makeGraph(self, pipeline, args):
542 """Build a graph from command line arguments.
544 Parameters
545 ----------
546 pipeline : `~lsst.pipe.base.Pipeline`
547 Pipeline, can be empty or ``None`` if graph is read from a file.
548 args : `types.SimpleNamespace`
549 Parsed command line
551 Returns
552 -------
553 graph : `~lsst.pipe.base.QuantumGraph` or `None`
554 If resulting graph is empty then `None` is returned.
555 """
557 # make sure that --extend-run always enables --skip-existing
558 if args.extend_run:
559 args.skip_existing = True
561 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
563 if args.skip_existing and run:
564 args.skip_existing_in += (run,)
566 if args.qgraph:
567 # click passes empty tuple as default value for qgraph_node_id
568 nodes = args.qgraph_node_id or None
569 qgraph = QuantumGraph.loadUri(
570 args.qgraph, registry.dimensions, nodes=nodes, graphID=args.qgraph_id
571 )
573 # pipeline can not be provided in this case
574 if pipeline:
575 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
576 if args.show_qgraph_header:
577 print(QuantumGraph.readHeader(args.qgraph))
578 else:
579 # make execution plan (a.k.a. DAG) for pipeline
580 graphBuilder = GraphBuilder(
581 registry, skipExistingIn=args.skip_existing_in, clobberOutputs=args.clobber_outputs
582 )
583 # accumulate metadata
584 metadata = {
585 "input": args.input,
586 "output": args.output,
587 "butler_argument": args.butler_config,
588 "output_run": args.output_run,
589 "extend_run": args.extend_run,
590 "skip_existing_in": args.skip_existing_in,
591 "skip_existing": args.skip_existing,
592 "data_query": args.data_query,
593 "user": getpass.getuser(),
594 "time": f"{datetime.datetime.now()}",
595 }
596 qgraph = graphBuilder.makeGraph(
597 pipeline,
598 collections,
599 run,
600 args.data_query,
601 metadata=metadata,
602 datasetQueryConstraint=args.dataset_query_constraint,
603 )
604 if args.show_qgraph_header:
605 print(qgraph.buildAndPrintHeader())
607 # Count quanta in graph and give a warning if it's empty and return
608 # None.
609 nQuanta = len(qgraph)
610 if nQuanta == 0:
611 warnings.warn("QuantumGraph is empty", stacklevel=2)
612 return None
613 else:
614 _LOG.info(
615 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
616 nQuanta,
617 len(qgraph.taskGraph),
618 qgraph.graphID,
619 )
621 if args.save_qgraph:
622 qgraph.saveUri(args.save_qgraph)
624 if args.save_single_quanta:
625 for quantumNode in qgraph:
626 sqgraph = qgraph.subset(quantumNode)
627 uri = args.save_single_quanta.format(quantumNode)
628 sqgraph.saveUri(uri)
630 if args.qgraph_dot:
631 graph2dot(qgraph, args.qgraph_dot)
633 if args.execution_butler_location:
634 butler = Butler(args.butler_config)
635 newArgs = copy.deepcopy(args)
637 def builderShim(butler):
638 newArgs.butler_config = butler._config
639 # Calling makeWriteButler is done for the side effects of
640 # calling that method, maining parsing all the args into
641 # collection names, creating collections, etc.
642 newButler = _ButlerFactory.makeWriteButler(newArgs)
643 return newButler
645 # Include output collection in collections for input
646 # files if it exists in the repo.
647 all_inputs = args.input
648 if args.output is not None:
649 try:
650 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
651 except MissingCollectionError:
652 pass
654 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
655 buildExecutionButler(
656 butler,
657 qgraph,
658 args.execution_butler_location,
659 run,
660 butlerModifier=builderShim,
661 collections=all_inputs,
662 clobber=args.clobber_execution_butler,
663 )
665 return qgraph
667 def runPipeline(self, graph, taskFactory, args, butler=None):
668 """Execute complete QuantumGraph.
670 Parameters
671 ----------
672 graph : `QuantumGraph`
673 Execution graph.
674 taskFactory : `~lsst.pipe.base.TaskFactory`
675 Task factory
676 args : `types.SimpleNamespace`
677 Parsed command line
678 butler : `~lsst.daf.butler.Butler`, optional
679 Data Butler instance, if not defined then new instance is made
680 using command line options.
681 """
682 # make sure that --extend-run always enables --skip-existing
683 if args.extend_run:
684 args.skip_existing = True
686 # make butler instance
687 if butler is None:
688 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
690 if args.skip_existing:
691 args.skip_existing_in += (butler.run,)
693 # Enable lsstDebug debugging. Note that this is done once in the
694 # main process before PreExecInit and it is also repeated before
695 # running each task in SingleQuantumExecutor (which may not be
696 # needed if `multipocessing` always uses fork start method).
697 if args.enableLsstDebug:
698 try:
699 _LOG.debug("Will try to import debug.py")
700 import debug # noqa:F401
701 except ImportError:
702 _LOG.warn("No 'debug' module found.")
704 # Save all InitOutputs, configs, etc.
705 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock)
706 preExecInit.initialize(
707 graph,
708 saveInitOutputs=not args.skip_init_writes,
709 registerDatasetTypes=args.register_dataset_types,
710 saveVersions=not args.no_versions,
711 )
713 if not args.init_only:
714 graphFixup = self._importGraphFixup(args)
715 quantumExecutor = SingleQuantumExecutor(
716 taskFactory,
717 skipExistingIn=args.skip_existing_in,
718 clobberOutputs=args.clobber_outputs,
719 enableLsstDebug=args.enableLsstDebug,
720 exitOnKnownError=args.fail_fast,
721 mock=args.mock,
722 mock_configs=args.mock_configs,
723 )
724 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
725 executor = MPGraphExecutor(
726 numProc=args.processes,
727 timeout=timeout,
728 startMethod=args.start_method,
729 quantumExecutor=quantumExecutor,
730 failFast=args.fail_fast,
731 executionGraphFixup=graphFixup,
732 )
733 try:
734 with util.profile(args.profile, _LOG):
735 executor.execute(graph, butler)
736 finally:
737 if args.summary:
738 report = executor.getReport()
739 if report:
740 with open(args.summary, "w") as out:
741 # Do not save fields that are not set.
742 out.write(report.json(exclude_none=True, indent=2))
744 def showInfo(self, args, pipeline, graph=None):
745 """Display useful info about pipeline and environment.
747 Parameters
748 ----------
749 args : `types.SimpleNamespace`
750 Parsed command line
751 pipeline : `Pipeline`
752 Pipeline definition
753 graph : `QuantumGraph`, optional
754 Execution graph
755 """
756 showOpts = args.show
757 for what in showOpts:
758 showCommand, _, showArgs = what.partition("=")
760 if showCommand in ["pipeline", "config", "history", "tasks"]:
761 if not pipeline:
762 _LOG.warning("Pipeline is required for --show=%s", showCommand)
763 continue
765 if showCommand in ["graph", "workflow", "uri"]:
766 if not graph:
767 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
768 continue
770 if showCommand == "pipeline":
771 print(pipeline)
772 elif showCommand == "config":
773 self._showConfig(pipeline, showArgs, False)
774 elif showCommand == "dump-config":
775 self._showConfig(pipeline, showArgs, True)
776 elif showCommand == "history":
777 self._showConfigHistory(pipeline, showArgs)
778 elif showCommand == "tasks":
779 self._showTaskHierarchy(pipeline)
780 elif showCommand == "graph":
781 if graph:
782 self._showGraph(graph)
783 elif showCommand == "uri":
784 if graph:
785 self._showUri(graph, args)
786 elif showCommand == "workflow":
787 if graph:
788 self._showWorkflow(graph, args)
789 else:
790 print(
791 "Unknown value for show: %s (choose from '%s')"
792 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
793 file=sys.stderr,
794 )
795 sys.exit(1)
797 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
798 """Show task configuration
800 Parameters
801 ----------
802 pipeline : `Pipeline`
803 Pipeline definition
804 showArgs : `str`
805 Defines what to show
806 dumpFullConfig : `bool`
807 If true then dump complete task configuration with all imports.
808 """
809 stream = sys.stdout
810 if dumpFullConfig:
811 # Task label can be given with this option
812 taskName = showArgs
813 else:
814 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
815 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
816 taskName = matConfig.group(1)
817 pattern = matConfig.group(2)
818 if pattern:
819 stream = _FilteredStream(pattern)
821 tasks = util.filterTasks(pipeline, taskName)
822 if not tasks:
823 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
824 sys.exit(1)
826 for taskDef in tasks:
827 print("### Configuration for task `{}'".format(taskDef.label))
828 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
830 def _showConfigHistory(self, pipeline, showArgs):
831 """Show history for task configuration
833 Parameters
834 ----------
835 pipeline : `Pipeline`
836 Pipeline definition
837 showArgs : `str`
838 Defines what to show
839 """
841 taskName = None
842 pattern = None
843 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
844 if matHistory:
845 taskName = matHistory.group(1)
846 pattern = matHistory.group(2)
847 if not pattern:
848 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
849 sys.exit(1)
851 tasks = util.filterTasks(pipeline, taskName)
852 if not tasks:
853 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
854 sys.exit(1)
856 found = False
857 for taskDef in tasks:
859 config = taskDef.config
861 # Look for any matches in the config hierarchy for this name
862 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
863 if nmatch > 0:
864 print("")
866 cpath, _, cname = thisName.rpartition(".")
867 try:
868 if not cpath:
869 # looking for top-level field
870 hconfig = taskDef.config
871 else:
872 hconfig = eval("config." + cpath, {}, {"config": config})
873 except AttributeError:
874 print(
875 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
876 file=sys.stderr,
877 )
878 hconfig = None
880 # Sometimes we end up with a non-Config so skip those
881 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr(
882 hconfig, cname
883 ):
884 print(f"### Configuration field for task `{taskDef.label}'")
885 print(pexConfig.history.format(hconfig, cname))
886 found = True
888 if not found:
889 print(f"None of the tasks has field matching {pattern}", file=sys.stderr)
890 sys.exit(1)
892 def _showTaskHierarchy(self, pipeline):
893 """Print task hierarchy to stdout
895 Parameters
896 ----------
897 pipeline: `Pipeline`
898 """
899 for taskDef in pipeline.toExpandedPipeline():
900 print("### Subtasks for task `{}'".format(taskDef.taskName))
902 for configName, taskName in util.subTaskIter(taskDef.config):
903 print("{}: {}".format(configName, taskName))
905 def _showGraph(self, graph):
906 """Print quanta information to stdout
908 Parameters
909 ----------
910 graph : `QuantumGraph`
911 Execution graph.
912 """
913 for taskNode in graph.taskGraph:
914 print(taskNode)
916 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
917 print(" Quantum {}:".format(iq))
918 print(" inputs:")
919 for key, refs in quantum.inputs.items():
920 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
921 print(" {}: [{}]".format(key, ", ".join(dataIds)))
922 print(" outputs:")
923 for key, refs in quantum.outputs.items():
924 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
925 print(" {}: [{}]".format(key, ", ".join(dataIds)))
927 def _showWorkflow(self, graph, args):
928 """Print quanta information and dependency to stdout
930 Parameters
931 ----------
932 graph : `QuantumGraph`
933 Execution graph.
934 args : `types.SimpleNamespace`
935 Parsed command line
936 """
937 for node in graph:
938 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
939 for parent in graph.determineInputsToQuantumNode(node):
940 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}")
942 def _showUri(self, graph, args):
943 """Print input and predicted output URIs to stdout
945 Parameters
946 ----------
947 graph : `QuantumGraph`
948 Execution graph
949 args : `types.SimpleNamespace`
950 Parsed command line
951 """
953 def dumpURIs(thisRef):
954 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
955 if primary:
956 print(f" {primary}")
957 else:
958 print(" (disassembled artifact)")
959 for compName, compUri in components.items():
960 print(f" {compName}: {compUri}")
962 butler = _ButlerFactory.makeReadButler(args)
963 for node in graph:
964 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
965 print(" inputs:")
966 for key, refs in node.quantum.inputs.items():
967 for ref in refs:
968 dumpURIs(ref)
969 print(" outputs:")
970 for key, refs in node.quantum.outputs.items():
971 for ref in refs:
972 dumpURIs(ref)
974 def _importGraphFixup(self, args):
975 """Import/instantiate graph fixup object.
977 Parameters
978 ----------
979 args : `types.SimpleNamespace`
980 Parsed command line.
982 Returns
983 -------
984 fixup : `ExecutionGraphFixup` or `None`
986 Raises
987 ------
988 ValueError
989 Raised if import fails, method call raises exception, or returned
990 instance has unexpected type.
991 """
992 if args.graph_fixup:
993 try:
994 factory = doImport(args.graph_fixup)
995 except Exception as exc:
996 raise ValueError("Failed to import graph fixup class/method") from exc
997 try:
998 fixup = factory()
999 except Exception as exc:
1000 raise ValueError("Failed to make instance of graph fixup") from exc
1001 if not isinstance(fixup, ExecutionGraphFixup):
1002 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
1003 return fixup