Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ["CmdLineFwk"]
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import argparse
31import copy
32import datetime
33import fnmatch
34import getpass
35import logging
36import re
37import sys
38import warnings
39from typing import Iterable, Optional, Tuple
41import lsst.pex.config as pexConfig
42import lsst.pex.config.history as pexConfigHistory
44# -----------------------------
45# Imports for other modules --
46# -----------------------------
47from lsst.daf.butler import Butler, CollectionSearch, CollectionType, Registry
48from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
49from lsst.pipe.base import (
50 GraphBuilder,
51 Instrument,
52 Pipeline,
53 PipelineDatasetTypes,
54 QuantumGraph,
55 TaskDef,
56 buildExecutionButler,
57)
58from lsst.utils import doImport
60from . import util
61from .dotTools import graph2dot, pipeline2dot
62from .executionGraphFixup import ExecutionGraphFixup
63from .mpGraphExecutor import MPGraphExecutor
64from .preExecInit import PreExecInit
65from .singleQuantumExecutor import SingleQuantumExecutor
67# ----------------------------------
68# Local non-exported definitions --
69# ----------------------------------
71_LOG = logging.getLogger(__name__)
74class _OutputChainedCollectionInfo:
75 """A helper class for handling command-line arguments related to an output
76 `~lsst.daf.butler.CollectionType.CHAINED` collection.
78 Parameters
79 ----------
80 registry : `lsst.daf.butler.Registry`
81 Butler registry that collections will be added to and/or queried from.
82 name : `str`
83 Name of the collection given on the command line.
84 """
86 def __init__(self, registry: Registry, name: str):
87 self.name = name
88 try:
89 self.chain = tuple(registry.getCollectionChain(name))
90 self.exists = True
91 except MissingCollectionError:
92 self.chain = ()
93 self.exists = False
95 def __str__(self):
96 return self.name
98 name: str
99 """Name of the collection provided on the command line (`str`).
100 """
102 exists: bool
103 """Whether this collection already exists in the registry (`bool`).
104 """
106 chain: Tuple[str, ...]
107 """The definition of the collection, if it already exists (`tuple`[`str`]).
109 Empty if the collection does not already exist.
110 """
113class _OutputRunCollectionInfo:
114 """A helper class for handling command-line arguments related to an output
115 `~lsst.daf.butler.CollectionType.RUN` collection.
117 Parameters
118 ----------
119 registry : `lsst.daf.butler.Registry`
120 Butler registry that collections will be added to and/or queried from.
121 name : `str`
122 Name of the collection given on the command line.
123 """
125 def __init__(self, registry: Registry, name: str):
126 self.name = name
127 try:
128 actualType = registry.getCollectionType(name)
129 if actualType is not CollectionType.RUN:
130 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
131 self.exists = True
132 except MissingCollectionError:
133 self.exists = False
135 name: str
136 """Name of the collection provided on the command line (`str`).
137 """
139 exists: bool
140 """Whether this collection already exists in the registry (`bool`).
141 """
144class _ButlerFactory:
145 """A helper class for processing command-line arguments related to input
146 and output collections.
148 Parameters
149 ----------
150 registry : `lsst.daf.butler.Registry`
151 Butler registry that collections will be added to and/or queried from.
153 args : `types.SimpleNamespace`
154 Parsed command-line arguments. The following attributes are used,
155 either at construction or in later methods.
157 ``output``
158 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
159 input/output collection.
161 ``output_run``
162 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
163 collection.
165 ``extend_run``
166 A boolean indicating whether ``output_run`` should already exist
167 and be extended.
169 ``replace_run``
170 A boolean indicating that (if `True`) ``output_run`` should already
171 exist but will be removed from the output chained collection and
172 replaced with a new one.
174 ``prune_replaced``
175 A boolean indicating whether to prune the replaced run (requires
176 ``replace_run``).
178 ``inputs``
179 Input collections of any type; may be any type handled by
180 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
182 ``butler_config``
183 Path to a data repository root or configuration file.
185 writeable : `bool`
186 If `True`, a `Butler` is being initialized in a context where actual
187 writes should happens, and hence no output run is necessary.
189 Raises
190 ------
191 ValueError
192 Raised if ``writeable is True`` but there are no output collections.
193 """
195 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool):
196 if args.output is not None:
197 self.output = _OutputChainedCollectionInfo(registry, args.output)
198 else:
199 self.output = None
200 if args.output_run is not None:
201 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
202 elif self.output is not None:
203 if args.extend_run:
204 if not self.output.chain:
205 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
206 runName = self.output.chain[0]
207 else:
208 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp())
209 self.outputRun = _OutputRunCollectionInfo(registry, runName)
210 elif not writeable:
211 # If we're not writing yet, ok to have no output run.
212 self.outputRun = None
213 else:
214 raise ValueError("Cannot write without at least one of (--output, --output-run).")
215 # Recursively flatten any input CHAINED collections. We do this up
216 # front so we can tell if the user passes the same inputs on subsequent
217 # calls, even though we also flatten when we define the output CHAINED
218 # collection.
219 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
221 def check(self, args: argparse.Namespace):
222 """Check command-line options for consistency with each other and the
223 data repository.
225 Parameters
226 ----------
227 args : `types.SimpleNamespace`
228 Parsed command-line arguments. See class documentation for the
229 construction parameter of the same name.
230 """
231 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
232 if self.inputs and self.output is not None and self.output.exists:
233 # Passing the same inputs that were used to initialize the output
234 # collection is allowed; this means they must _end_ with the same
235 # collections, because we push new runs to the front of the chain.
236 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]):
237 if c1 != c2:
238 raise ValueError(
239 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
240 "a different sequence of input collections than those given: "
241 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
242 f"{self.output.name}={self.output.chain}."
243 )
244 if len(self.inputs) > len(self.output.chain):
245 nNew = len(self.inputs) - len(self.output.chain)
246 raise ValueError(
247 f"Cannot add new input collections {self.inputs[:nNew]} after "
248 "output collection is first created."
249 )
250 if args.extend_run and self.outputRun is None:
251 raise ValueError("Cannot --extend-run when no output collection is given.")
252 if args.extend_run and not self.outputRun.exists:
253 raise ValueError(
254 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
255 )
256 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
257 raise ValueError(
258 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
259 )
260 if args.prune_replaced and not args.replace_run:
261 raise ValueError("--prune-replaced requires --replace-run.")
262 if args.replace_run and (self.output is None or not self.output.exists):
263 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
265 @classmethod
266 def _makeReadParts(cls, args: argparse.Namespace):
267 """Common implementation for `makeReadButler` and
268 `makeRegistryAndCollections`.
270 Parameters
271 ----------
272 args : `types.SimpleNamespace`
273 Parsed command-line arguments. See class documentation for the
274 construction parameter of the same name.
276 Returns
277 -------
278 butler : `lsst.daf.butler.Butler`
279 A read-only butler constructed from the repo at
280 ``args.butler_config``, but with no default collections.
281 inputs : `lsst.daf.butler.registry.CollectionSearch`
282 A collection search path constructed according to ``args``.
283 self : `_ButlerFactory`
284 A new `_ButlerFactory` instance representing the processed version
285 of ``args``.
286 """
287 butler = Butler(args.butler_config, writeable=False)
288 self = cls(butler.registry, args, writeable=False)
289 self.check(args)
290 if self.output and self.output.exists:
291 if args.replace_run:
292 replaced = self.output.chain[0]
293 inputs = self.output.chain[1:]
294 _LOG.debug(
295 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
296 )
297 else:
298 inputs = [self.output.name]
299 else:
300 inputs = list(self.inputs)
301 if args.extend_run:
302 inputs.insert(0, self.outputRun.name)
303 inputs = CollectionSearch.fromExpression(inputs)
304 return butler, inputs, self
306 @classmethod
307 def makeReadButler(cls, args: argparse.Namespace) -> Butler:
308 """Construct a read-only butler according to the given command-line
309 arguments.
311 Parameters
312 ----------
313 args : `types.SimpleNamespace`
314 Parsed command-line arguments. See class documentation for the
315 construction parameter of the same name.
317 Returns
318 -------
319 butler : `lsst.daf.butler.Butler`
320 A read-only butler initialized with the collections specified by
321 ``args``.
322 """
323 butler, inputs, _ = cls._makeReadParts(args)
324 _LOG.debug("Preparing butler to read from %s.", inputs)
325 return Butler(butler=butler, collections=inputs)
327 @classmethod
328 def makeRegistryAndCollections(
329 cls, args: argparse.Namespace
330 ) -> Tuple[Registry, CollectionSearch, Optional[str]]:
331 """Return a read-only registry, a collection search path, and the name
332 of the run to be used for future writes.
334 Parameters
335 ----------
336 args : `types.SimpleNamespace`
337 Parsed command-line arguments. See class documentation for the
338 construction parameter of the same name.
340 Returns
341 -------
342 registry : `lsst.daf.butler.Registry`
343 Butler registry that collections will be added to and/or queried
344 from.
345 inputs : `lsst.daf.butler.registry.CollectionSearch`
346 Collections to search for datasets.
347 run : `str` or `None`
348 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
349 if it already exists, or `None` if it does not.
350 """
351 butler, inputs, self = cls._makeReadParts(args)
352 run = self.outputRun.name if args.extend_run else None
353 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
354 return butler.registry, inputs, run
356 @classmethod
357 def makeWriteButler(
358 cls, args: argparse.Namespace, taskDefs: Optional[Iterable[TaskDef]] = None
359 ) -> Butler:
360 """Return a read-write butler initialized to write to and read from
361 the collections specified by the given command-line arguments.
363 Parameters
364 ----------
365 args : `types.SimpleNamespace`
366 Parsed command-line arguments. See class documentation for the
367 construction parameter of the same name.
368 taskDefs : iterable of `TaskDef`, optional
369 Definitions for tasks in a pipeline. This argument is only needed
370 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
371 "unstore".
373 Returns
374 -------
375 butler : `lsst.daf.butler.Butler`
376 A read-write butler initialized according to the given arguments.
377 """
378 butler = Butler(args.butler_config, writeable=True)
379 self = cls(butler.registry, args, writeable=True)
380 self.check(args)
381 if self.output is not None:
382 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
383 if args.replace_run:
384 replaced = chainDefinition.pop(0)
385 if args.prune_replaced == "unstore":
386 # Remove datasets from datastore
387 with butler.transaction():
388 refs = butler.registry.queryDatasets(..., collections=replaced)
389 # we want to remove regular outputs but keep
390 # initOutputs, configs, and versions.
391 if taskDefs is not None:
392 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
393 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
394 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False)
395 elif args.prune_replaced == "purge":
396 # Erase entire collection and all datasets, need to remove
397 # collection from its chain collection first.
398 with butler.transaction():
399 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
400 butler.pruneCollection(replaced, purge=True, unstore=True)
401 elif args.prune_replaced is not None:
402 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
403 if not self.output.exists:
404 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
405 if not args.extend_run:
406 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
407 chainDefinition.insert(0, self.outputRun.name)
408 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
409 _LOG.debug(
410 "Preparing butler to write to '%s' and read from '%s'=%s",
411 self.outputRun.name,
412 self.output.name,
413 chainDefinition,
414 )
415 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
416 else:
417 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs)
418 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
419 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
420 return butler
422 output: Optional[_OutputChainedCollectionInfo]
423 """Information about the output chained collection, if there is or will be
424 one (`_OutputChainedCollectionInfo` or `None`).
425 """
427 outputRun: Optional[_OutputRunCollectionInfo]
428 """Information about the output run collection, if there is or will be
429 one (`_OutputRunCollectionInfo` or `None`).
430 """
432 inputs: Tuple[str, ...]
433 """Input collections provided directly by the user (`tuple` [ `str` ]).
434 """
437class _FilteredStream:
438 """A file-like object that filters some config fields.
440 Note
441 ----
442 This class depends on implementation details of ``Config.saveToStream``
443 methods, in particular that that method uses single call to write()
444 method to save information about single config field, and that call
445 combines comments string(s) for a field and field path and value.
446 This class will not work reliably on the "import" strings, so imports
447 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
448 """
450 def __init__(self, pattern):
451 # obey case if pattern isn't lowercase or requests NOIGNORECASE
452 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
454 if mat:
455 pattern = mat.group(1)
456 self._pattern = re.compile(fnmatch.translate(pattern))
457 else:
458 if pattern != pattern.lower():
459 print(
460 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)",
461 file=sys.stdout,
462 )
463 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
465 def write(self, showStr):
466 # Strip off doc string line(s) and cut off at "=" for string matching
467 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
468 if self._pattern.search(matchStr):
469 sys.stdout.write(showStr)
472# ------------------------
473# Exported definitions --
474# ------------------------
477class CmdLineFwk:
478 """PipelineTask framework which executes tasks from command line.
480 In addition to executing tasks this activator provides additional methods
481 for task management like dumping configuration or execution chain.
482 """
484 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
486 def __init__(self):
487 pass
489 def makePipeline(self, args):
490 """Build a pipeline from command line arguments.
492 Parameters
493 ----------
494 args : `types.SimpleNamespace`
495 Parsed command line
497 Returns
498 -------
499 pipeline : `~lsst.pipe.base.Pipeline`
500 """
501 if args.pipeline:
502 pipeline = Pipeline.from_uri(args.pipeline)
503 else:
504 pipeline = Pipeline("anonymous")
506 # loop over all pipeline actions and apply them in order
507 for action in args.pipeline_actions:
508 if action.action == "add_instrument":
510 pipeline.addInstrument(action.value)
512 elif action.action == "new_task":
514 pipeline.addTask(action.value, action.label)
516 elif action.action == "delete_task":
518 pipeline.removeTask(action.label)
520 elif action.action == "config":
522 # action value string is "field=value", split it at '='
523 field, _, value = action.value.partition("=")
524 pipeline.addConfigOverride(action.label, field, value)
526 elif action.action == "configfile":
528 pipeline.addConfigFile(action.label, action.value)
530 else:
532 raise ValueError(f"Unexpected pipeline action: {action.action}")
534 if args.save_pipeline:
535 pipeline.write_to_uri(args.save_pipeline)
537 if args.pipeline_dot:
538 pipeline2dot(pipeline, args.pipeline_dot)
540 return pipeline
542 def makeGraph(self, pipeline, args):
543 """Build a graph from command line arguments.
545 Parameters
546 ----------
547 pipeline : `~lsst.pipe.base.Pipeline`
548 Pipeline, can be empty or ``None`` if graph is read from a file.
549 args : `types.SimpleNamespace`
550 Parsed command line
552 Returns
553 -------
554 graph : `~lsst.pipe.base.QuantumGraph` or `None`
555 If resulting graph is empty then `None` is returned.
556 """
558 # make sure that --extend-run always enables --skip-existing
559 if args.extend_run:
560 args.skip_existing = True
562 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args)
564 if args.skip_existing and run:
565 args.skip_existing_in += (run,)
567 if args.qgraph:
568 # click passes empty tuple as default value for qgraph_node_id
569 nodes = args.qgraph_node_id or None
570 qgraph = QuantumGraph.loadUri(
571 args.qgraph, registry.dimensions, nodes=nodes, graphID=args.qgraph_id
572 )
574 # pipeline can not be provided in this case
575 if pipeline:
576 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
577 if args.show_qgraph_header:
578 print(QuantumGraph.readHeader(args.qgraph))
579 else:
580 # make execution plan (a.k.a. DAG) for pipeline
581 graphBuilder = GraphBuilder(
582 registry, skipExistingIn=args.skip_existing_in, clobberOutputs=args.clobber_outputs
583 )
584 # accumulate metadata
585 metadata = {
586 "input": args.input,
587 "output": args.output,
588 "butler_argument": args.butler_config,
589 "output_run": args.output_run,
590 "extend_run": args.extend_run,
591 "skip_existing_in": args.skip_existing_in,
592 "skip_existing": args.skip_existing,
593 "data_query": args.data_query,
594 "user": getpass.getuser(),
595 "time": f"{datetime.datetime.now()}",
596 }
597 qgraph = graphBuilder.makeGraph(
598 pipeline,
599 collections,
600 run,
601 args.data_query,
602 metadata=metadata,
603 datasetQueryConstraint=args.dataset_query_constraint,
604 )
605 if args.show_qgraph_header:
606 print(qgraph.buildAndPrintHeader())
608 # Count quanta in graph and give a warning if it's empty and return
609 # None.
610 nQuanta = len(qgraph)
611 if nQuanta == 0:
612 warnings.warn("QuantumGraph is empty", stacklevel=2)
613 return None
614 else:
615 _LOG.info(
616 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
617 nQuanta,
618 len(qgraph.taskGraph),
619 qgraph.graphID,
620 )
622 if args.save_qgraph:
623 qgraph.saveUri(args.save_qgraph)
625 if args.save_single_quanta:
626 for quantumNode in qgraph:
627 sqgraph = qgraph.subset(quantumNode)
628 uri = args.save_single_quanta.format(quantumNode)
629 sqgraph.saveUri(uri)
631 if args.qgraph_dot:
632 graph2dot(qgraph, args.qgraph_dot)
634 if args.execution_butler_location:
635 butler = Butler(args.butler_config)
636 newArgs = copy.deepcopy(args)
638 def builderShim(butler):
639 newArgs.butler_config = butler._config
640 # Calling makeWriteButler is done for the side effects of
641 # calling that method, maining parsing all the args into
642 # collection names, creating collections, etc.
643 newButler = _ButlerFactory.makeWriteButler(newArgs)
644 return newButler
646 # Include output collection in collections for input
647 # files if it exists in the repo.
648 all_inputs = args.input
649 if args.output is not None:
650 try:
651 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
652 except MissingCollectionError:
653 pass
655 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
656 buildExecutionButler(
657 butler,
658 qgraph,
659 args.execution_butler_location,
660 run,
661 butlerModifier=builderShim,
662 collections=all_inputs,
663 clobber=args.clobber_execution_butler,
664 )
666 return qgraph
668 def runPipeline(self, graph, taskFactory, args, butler=None):
669 """Execute complete QuantumGraph.
671 Parameters
672 ----------
673 graph : `QuantumGraph`
674 Execution graph.
675 taskFactory : `~lsst.pipe.base.TaskFactory`
676 Task factory
677 args : `types.SimpleNamespace`
678 Parsed command line
679 butler : `~lsst.daf.butler.Butler`, optional
680 Data Butler instance, if not defined then new instance is made
681 using command line options.
682 """
683 # make sure that --extend-run always enables --skip-existing
684 if args.extend_run:
685 args.skip_existing = True
687 # make butler instance
688 if butler is None:
689 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
691 if args.skip_existing:
692 args.skip_existing_in += (butler.run,)
694 # Enable lsstDebug debugging. Note that this is done once in the
695 # main process before PreExecInit and it is also repeated before
696 # running each task in SingleQuantumExecutor (which may not be
697 # needed if `multipocessing` always uses fork start method).
698 if args.enableLsstDebug:
699 try:
700 _LOG.debug("Will try to import debug.py")
701 import debug # noqa:F401
702 except ImportError:
703 _LOG.warn("No 'debug' module found.")
705 # Save all InitOutputs, configs, etc.
706 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock)
707 preExecInit.initialize(
708 graph,
709 saveInitOutputs=not args.skip_init_writes,
710 registerDatasetTypes=args.register_dataset_types,
711 saveVersions=not args.no_versions,
712 )
714 if not args.init_only:
715 graphFixup = self._importGraphFixup(args)
716 quantumExecutor = SingleQuantumExecutor(
717 taskFactory,
718 skipExistingIn=args.skip_existing_in,
719 clobberOutputs=args.clobber_outputs,
720 enableLsstDebug=args.enableLsstDebug,
721 exitOnKnownError=args.fail_fast,
722 mock=args.mock,
723 mock_configs=args.mock_configs,
724 )
725 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
726 executor = MPGraphExecutor(
727 numProc=args.processes,
728 timeout=timeout,
729 startMethod=args.start_method,
730 quantumExecutor=quantumExecutor,
731 failFast=args.fail_fast,
732 executionGraphFixup=graphFixup,
733 )
734 try:
735 with util.profile(args.profile, _LOG):
736 executor.execute(graph, butler)
737 finally:
738 if args.summary:
739 report = executor.getReport()
740 if report:
741 with open(args.summary, "w") as out:
742 # Do not save fields that are not set.
743 out.write(report.json(exclude_none=True, indent=2))
745 def showInfo(self, args, pipeline, graph=None):
746 """Display useful info about pipeline and environment.
748 Parameters
749 ----------
750 args : `types.SimpleNamespace`
751 Parsed command line
752 pipeline : `Pipeline`
753 Pipeline definition
754 graph : `QuantumGraph`, optional
755 Execution graph
756 """
757 showOpts = args.show
758 for what in showOpts:
759 showCommand, _, showArgs = what.partition("=")
761 if showCommand in ["pipeline", "config", "history", "tasks"]:
762 if not pipeline:
763 _LOG.warning("Pipeline is required for --show=%s", showCommand)
764 continue
766 if showCommand in ["graph", "workflow", "uri"]:
767 if not graph:
768 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
769 continue
771 if showCommand == "pipeline":
772 print(pipeline)
773 elif showCommand == "config":
774 self._showConfig(pipeline, showArgs, False)
775 elif showCommand == "dump-config":
776 self._showConfig(pipeline, showArgs, True)
777 elif showCommand == "history":
778 self._showConfigHistory(pipeline, showArgs)
779 elif showCommand == "tasks":
780 self._showTaskHierarchy(pipeline)
781 elif showCommand == "graph":
782 if graph:
783 self._showGraph(graph)
784 elif showCommand == "uri":
785 if graph:
786 self._showUri(graph, args)
787 elif showCommand == "workflow":
788 if graph:
789 self._showWorkflow(graph, args)
790 else:
791 print(
792 "Unknown value for show: %s (choose from '%s')"
793 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
794 file=sys.stderr,
795 )
796 sys.exit(1)
798 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
799 """Show task configuration
801 Parameters
802 ----------
803 pipeline : `Pipeline`
804 Pipeline definition
805 showArgs : `str`
806 Defines what to show
807 dumpFullConfig : `bool`
808 If true then dump complete task configuration with all imports.
809 """
810 stream = sys.stdout
811 if dumpFullConfig:
812 # Task label can be given with this option
813 taskName = showArgs
814 else:
815 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
816 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
817 taskName = matConfig.group(1)
818 pattern = matConfig.group(2)
819 if pattern:
820 stream = _FilteredStream(pattern)
822 tasks = util.filterTasks(pipeline, taskName)
823 if not tasks:
824 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
825 sys.exit(1)
827 for taskDef in tasks:
828 print("### Configuration for task `{}'".format(taskDef.label))
829 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
831 def _showConfigHistory(self, pipeline, showArgs):
832 """Show history for task configuration
834 Parameters
835 ----------
836 pipeline : `Pipeline`
837 Pipeline definition
838 showArgs : `str`
839 Defines what to show
840 """
842 taskName = None
843 pattern = None
844 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
845 if matHistory:
846 taskName = matHistory.group(1)
847 pattern = matHistory.group(2)
848 if not pattern:
849 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
850 sys.exit(1)
852 tasks = util.filterTasks(pipeline, taskName)
853 if not tasks:
854 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
855 sys.exit(1)
857 found = False
858 for taskDef in tasks:
860 config = taskDef.config
862 # Look for any matches in the config hierarchy for this name
863 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
864 if nmatch > 0:
865 print("")
867 cpath, _, cname = thisName.rpartition(".")
868 try:
869 if not cpath:
870 # looking for top-level field
871 hconfig = taskDef.config
872 else:
873 hconfig = eval("config." + cpath, {}, {"config": config})
874 except AttributeError:
875 print(
876 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
877 file=sys.stderr,
878 )
879 hconfig = None
881 # Sometimes we end up with a non-Config so skip those
882 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr(
883 hconfig, cname
884 ):
885 print(f"### Configuration field for task `{taskDef.label}'")
886 print(pexConfigHistory.format(hconfig, cname))
887 found = True
889 if not found:
890 print(f"None of the tasks has field matching {pattern}", file=sys.stderr)
891 sys.exit(1)
893 def _showTaskHierarchy(self, pipeline):
894 """Print task hierarchy to stdout
896 Parameters
897 ----------
898 pipeline: `Pipeline`
899 """
900 for taskDef in pipeline.toExpandedPipeline():
901 print("### Subtasks for task `{}'".format(taskDef.taskName))
903 for configName, taskName in util.subTaskIter(taskDef.config):
904 print("{}: {}".format(configName, taskName))
906 def _showGraph(self, graph):
907 """Print quanta information to stdout
909 Parameters
910 ----------
911 graph : `QuantumGraph`
912 Execution graph.
913 """
914 for taskNode in graph.taskGraph:
915 print(taskNode)
917 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
918 print(" Quantum {}:".format(iq))
919 print(" inputs:")
920 for key, refs in quantum.inputs.items():
921 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
922 print(" {}: [{}]".format(key, ", ".join(dataIds)))
923 print(" outputs:")
924 for key, refs in quantum.outputs.items():
925 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
926 print(" {}: [{}]".format(key, ", ".join(dataIds)))
928 def _showWorkflow(self, graph, args):
929 """Print quanta information and dependency to stdout
931 Parameters
932 ----------
933 graph : `QuantumGraph`
934 Execution graph.
935 args : `types.SimpleNamespace`
936 Parsed command line
937 """
938 for node in graph:
939 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
940 for parent in graph.determineInputsToQuantumNode(node):
941 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}")
943 def _showUri(self, graph, args):
944 """Print input and predicted output URIs to stdout
946 Parameters
947 ----------
948 graph : `QuantumGraph`
949 Execution graph
950 args : `types.SimpleNamespace`
951 Parsed command line
952 """
954 def dumpURIs(thisRef):
955 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
956 if primary:
957 print(f" {primary}")
958 else:
959 print(" (disassembled artifact)")
960 for compName, compUri in components.items():
961 print(f" {compName}: {compUri}")
963 butler = _ButlerFactory.makeReadButler(args)
964 for node in graph:
965 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
966 print(" inputs:")
967 for key, refs in node.quantum.inputs.items():
968 for ref in refs:
969 dumpURIs(ref)
970 print(" outputs:")
971 for key, refs in node.quantum.outputs.items():
972 for ref in refs:
973 dumpURIs(ref)
975 def _importGraphFixup(self, args):
976 """Import/instantiate graph fixup object.
978 Parameters
979 ----------
980 args : `types.SimpleNamespace`
981 Parsed command line.
983 Returns
984 -------
985 fixup : `ExecutionGraphFixup` or `None`
987 Raises
988 ------
989 ValueError
990 Raised if import fails, method call raises exception, or returned
991 instance has unexpected type.
992 """
993 if args.graph_fixup:
994 try:
995 factory = doImport(args.graph_fixup)
996 except Exception as exc:
997 raise ValueError("Failed to import graph fixup class/method") from exc
998 try:
999 fixup = factory()
1000 except Exception as exc:
1001 raise ValueError("Failed to make instance of graph fixup") from exc
1002 if not isinstance(fixup, ExecutionGraphFixup):
1003 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
1004 return fixup