Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of ctrl_mpexec. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
# ------------------------------- # Imports of standard modules -- # -------------------------------
# ----------------------------- # Imports for other modules -- # -----------------------------
# ---------------------------------- # Local non-exported definitions -- # ----------------------------------
# logging properties log4j.rootLogger=INFO, A1 log4j.appender.A1=ConsoleAppender log4j.appender.A1.Target=System.err log4j.appender.A1.layout=PatternLayout log4j.appender.A1.layout.ConversionPattern={} """
"""A file-like object that filters some config fields.
Note ---- This class depends on implementation details of ``Config.saveToStream`` methods, in particular that that method uses single call to write() method to save information about single config field, and that call combines comments string(s) for a field and field path and value. This class will not work reliably on the "import" strings, so imports should be disabled by passing ``skipImports=True`` to ``saveToStream()``. """ # obey case if pattern isn't lowercase or requests NOIGNORECASE mat = re.search(r"(.*):NOIGNORECASE$", pattern)
if mat: pattern = mat.group(1) self._pattern = re.compile(fnmatch.translate(pattern)) else: if pattern != pattern.lower(): print(f"Matching \"{pattern}\" without regard to case " "(append :NOIGNORECASE to prevent this)", file=sys.stdout) self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
# Strip off doc string line(s) and cut off at "=" for string matching matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] if self._pattern.search(matchStr): sys.stdout.write(showStr)
# ------------------------ # Exported definitions -- # ------------------------
"""PipelineTask framework which executes tasks from command line.
In addition to executing tasks this activator provides additional methods for task management like dumping configuration or execution chain. """
pass
""" This method is a main entry point for this class, it parses command line and executes all commands.
Parameters ---------- argv : `list` of `str`, optional list of command line arguments, if not specified then `sys.argv[1:]` is used """
if argv is None: argv = sys.argv[1:]
# start with parsing command line, only do partial parsing now as # the tasks can add more arguments later parser = makeParser() args = parser.parse_args(argv)
# First thing to do is to setup logging. self.configLog(args.longlog, args.loglevel)
taskFactory = TaskFactory()
# make pipeline out of command line arguments (can return empty pipeline) try: pipeline = self.makePipeline(args) except Exception as exc: print("Failed to build pipeline: {}".format(exc), file=sys.stderr) raise
if args.subcommand == "build": # stop here but process --show option first self.showInfo(args, pipeline) return 0
# make quantum graph try: qgraph = self.makeGraph(pipeline, args) except Exception as exc: print("Failed to build graph: {}".format(exc), file=sys.stderr) raise
# optionally dump some info self.showInfo(args, pipeline, qgraph)
if qgraph is None: # No need to raise an exception here, code that makes graph # should have printed warning message already. return 2
if args.subcommand == "qgraph": # stop here return 0
# execute if args.subcommand == "run": return self.runPipeline(qgraph, taskFactory, args)
def configLog(longlog, logLevels): """Configure logging system.
Parameters ---------- longlog : `bool` If True then make log messages appear in "long format" logLevels : `list` of `tuple` per-component logging levels, each item in the list is a tuple (component, level), `component` is a logger name or `None` for root logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) """ if longlog: message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" else: message_fmt = "%c %p: %m%n"
# global logging config lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
# Forward all Python logging to lsst.log lgr = logging.getLogger() lgr.setLevel(logging.INFO) # same as in log4cxx config above lgr.addHandler(lsst.log.LogHandler())
# also capture warnings and send them to logging logging.captureWarnings(True)
# configure individual loggers for component, level in logLevels: level = getattr(lsst.log.Log, level.upper(), None) if level is not None: # set logging level for lsst.log logger = lsst.log.Log.getLogger(component or "") logger.setLevel(level) # set logging level for Python logging pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) logging.getLogger(component).setLevel(pyLevel)
"""Build a pipeline from command line arguments.
Parameters ---------- args : `argparse.Namespace` Parsed command line
Returns ------- pipeline : `~lsst.pipe.base.Pipeline` """ if args.pipeline: pipeline = Pipeline.fromFile(args.pipeline) else: pipeline = Pipeline("anonymous")
# loop over all pipeline actions and apply them in order for action in args.pipeline_actions: if action.action == "add_instrument":
pipeline.addInstrument(action.value)
elif action.action == "new_task":
pipeline.addTask(action.value, action.label)
elif action.action == "delete_task":
pipeline.removeTask(action.label)
elif action.action == "config":
# action value string is "field=value", split it at '=' field, _, value = action.value.partition("=") pipeline.addConfigOverride(action.label, field, value)
elif action.action == "configfile":
pipeline.addConfigFile(action.label, action.value)
else:
raise ValueError(f"Unexpected pipeline action: {action.action}")
if args.save_pipeline: pipeline.toFile(args.save_pipeline)
if args.pipeline_dot: pipeline2dot(pipeline, args.pipeline_dot)
return pipeline
"""Build a graph from command line arguments.
Parameters ---------- pipeline : `~lsst.pipe.base.Pipeline` Pipeline, can be empty or ``None`` if graph is read from pickle file. args : `argparse.Namespace` Parsed command line
Returns ------- graph : `~lsst.pipe.base.QuantumGraph` or `None` If resulting graph is empty then `None` is returned. """ if args.qgraph:
# Un-pickling QGraph needs a dimensions universe defined in # registry. Easiest way to do it now is to initialize whole data # butler. Butler requires run or collection provided in # constructor but in this case we do not care about (or do not # know) what collection to use so give it an empty name. butler = Butler(config=args.butler_config, collection="")
with open(args.qgraph, 'rb') as pickleFile: qgraph = pickle.load(pickleFile) if not isinstance(qgraph, QuantumGraph): raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format( type(qgraph)))
# pipeline can not be provided in this case if pipeline: raise ValueError("Pipeline must not be given when quantum graph is read from file.")
else:
if not pipeline: raise ValueError("Pipeline must be given for quantum graph construction.")
# build collection names inputs = args.input.copy() defaultInputs = inputs.pop("", None) outputs = args.output.copy() defaultOutputs = outputs.pop("", None)
# Make butler instance. From this Butler we only need Registry # instance. Input/output collections are handled by pre-flight # and we don't want to be constrained here by Butler's restrictions # on collection names. collection = defaultInputs[0] if defaultInputs else None butler = Butler(config=args.butler_config, collection=collection)
# if default input collections are not given on command line then # use one from Butler (has to be configured in butler config) if not defaultInputs: defaultInputs = [butler.collection] inputCollections = defaultdict(functools.partial(list, defaultInputs)) inputCollections.update(inputs) outputCollection = defaultOutputs if outputs: # TODO: this may never be supported; maybe we should just # remove the command-line option? raise NotImplementedError("Different output collections for different dataset " "types is not currently supported.")
# make execution plan (a.k.a. DAG) for pipeline graphBuilder = GraphBuilder(butler.registry, skipExisting=args.skip_existing, clobberExisting=args.clobber_output) qgraph = graphBuilder.makeGraph(pipeline, inputCollections, outputCollection, args.data_query)
# count quanta in graph and give a warning if it's empty and return None nQuanta = qgraph.countQuanta() if nQuanta == 0: warnings.warn("QuantumGraph is empty", stacklevel=2) return None else: _LOG.info("QuantumGraph contains %d quanta for %d tasks", nQuanta, len(qgraph))
if args.save_qgraph: with open(args.save_qgraph, "wb") as pickleFile: pickle.dump(qgraph, pickleFile)
if args.save_single_quanta: for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()): filename = args.save_single_quanta.format(iq) with open(filename, "wb") as pickleFile: pickle.dump(sqgraph, pickleFile)
if args.qgraph_dot: graph2dot(qgraph, args.qgraph_dot)
return qgraph
"""Execute complete QuantumGraph.
Parameters ---------- graph : `QuantumGraph` Execution graph. taskFactory : `~lsst.pipe.base.TaskFactory` Task factory args : `argparse.Namespace` Parsed command line butler : `~lsst.daf.butler.Butler`, optional Data Butler instance, if not defined then new instance is made using command line options. """ # If default output collection is given then use it to override # butler-configured one. run = args.output.get("", None)
# make butler instance if butler is None: butler = Butler(config=args.butler_config, run=run)
# at this point we require that output collection was defined if not butler.run: raise ValueError("no output collection defined in data butler")
# Enable lsstDebug debugging. Note that this is done once in the # main process before PreExecInit and it is also repeated before # running each task in SingleQuantumExecutor (which may not be # needed if `multipocessing` always uses fork start method). if args.enableLsstDebug: try: _LOG.debug("Will try to import debug.py") import debug # noqa:F401 except ImportError: _LOG.warn("No 'debug' module found.")
preExecInit = PreExecInit(butler, taskFactory, args.skip_existing, args.clobber_output) preExecInit.initialize(graph, saveInitOutputs=not args.skip_init_writes, registerDatasetTypes=args.register_dataset_types)
if not args.init_only: executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT, skipExisting=args.skip_existing, clobberOutput=args.clobber_output, enableLsstDebug=args.enableLsstDebug) with util.profile(args.profile, _LOG): executor.execute(graph, butler, taskFactory)
"""Display useful info about pipeline and environment.
Parameters ---------- args : `argparse.Namespace` Parsed command line pipeline : `Pipeline` Pipeline definition graph : `QuantumGraph`, optional Execution graph """ showOpts = args.show for what in showOpts: showCommand, _, showArgs = what.partition("=")
if showCommand in ["pipeline", "config", "history", "tasks"]: if not pipeline: _LOG.warning("Pipeline is required for --show=%s", showCommand) continue
if showCommand in ["graph", "workflow"]: if not graph: _LOG.warning("QuantumGraph is required for --show=%s", showCommand) continue
if showCommand == "pipeline": print(pipeline) elif showCommand == "config": self._showConfig(pipeline, showArgs, False) elif showCommand == "dump-config": self._showConfig(pipeline, showArgs, True) elif showCommand == "history": self._showConfigHistory(pipeline, showArgs) elif showCommand == "tasks": self._showTaskHierarchy(pipeline) elif showCommand == "graph": if graph: self._showGraph(graph) elif showCommand == "workflow": if graph: self._showWorkflow(graph, args) else: print("Unknown value for show: %s (choose from '%s')" % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), file=sys.stderr) sys.exit(1)
"""Show task configuration
Parameters ---------- pipeline : `Pipeline` Pipeline definition showArgs : `str` Defines what to show dumpFullConfig : `bool` If true then dump complete task configuration with all imports. """ stream = sys.stdout if dumpFullConfig: # Task label can be given with this option taskName = showArgs else: # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) taskName = matConfig.group(1) pattern = matConfig.group(2) if pattern: stream = _FilteredStream(pattern)
tasks = util.filterTasks(pipeline, taskName) if not tasks: print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) sys.exit(1)
for taskDef in tasks: print("### Configuration for task `{}'".format(taskDef.label)) taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
"""Show history for task configuration
Parameters ---------- pipeline : `Pipeline` Pipeline definition showArgs : `str` Defines what to show """
taskName = None pattern = None matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) if matHistory: taskName = matHistory.group(1) pattern = matHistory.group(2) if not pattern: print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) sys.exit(1)
tasks = util.filterTasks(pipeline, taskName) if not tasks: print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) sys.exit(1)
cpath, _, cname = pattern.rpartition(".") found = False for taskDef in tasks: try: if not cpath: # looking for top-level field hconfig = taskDef.config else: hconfig = eval("config." + cpath, {}, {"config": taskDef.config}) except AttributeError: # Means this config object has no such field, but maybe some other task has it. continue except Exception: # Any other exception probably means some error in the expression. print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr) sys.exit(1)
if hasattr(hconfig, cname): print(f"### Configuration field for task `{taskDef.label}'") print(pexConfig.history.format(hconfig, cname)) found = True
if not found: print(f"None of the tasks has field named {pattern}", file=sys.stderr) sys.exit(1)
"""Print task hierarchy to stdout
Parameters ---------- pipeline: `Pipeline` """ for taskDef in pipeline.toExpandedPipeline(): print("### Subtasks for task `{}'".format(taskDef.taskName))
for configName, taskName in util.subTaskIter(taskDef.config): print("{}: {}".format(configName, taskName))
"""Print quanta information to stdout
Parameters ---------- graph : `QuantumGraph` Execution graph. """ for taskNodes in graph: print(taskNodes.taskDef)
for iq, quantum in enumerate(taskNodes.quanta): print(" Quantum {}:".format(iq)) print(" inputs:") for key, refs in quantum.predictedInputs.items(): dataIds = ["DataId({})".format(ref.dataId) for ref in refs] print(" {}: [{}]".format(key, ", ".join(dataIds))) print(" outputs:") for key, refs in quantum.outputs.items(): dataIds = ["DataId({})".format(ref.dataId) for ref in refs] print(" {}: [{}]".format(key, ", ".join(dataIds)))
"""Print quanta information and dependency to stdout
The input and predicted output URIs based on the Butler repo are printed.
Parameters ---------- graph : `QuantumGraph` Execution graph. args : `argparse.Namespace` Parsed command line """ run = args.output.get("", None) butler = Butler(config=args.butler_config, run=run) hashToParent = {} for iq, (taskDef, quantum) in enumerate(graph.quanta()): shortname = taskDef.taskName.split('.')[-1] print("Quantum {}: {}".format(iq, shortname)) print(" inputs:") for key, refs in quantum.predictedInputs.items(): for ref in refs: if butler.datastore.exists(ref): print(" {}".format(butler.datastore.getUri(ref))) else: fakeRef = DatasetRef(ref.datasetType, ref.dataId, run=run) print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) print(" outputs:") for key, refs in quantum.outputs.items(): for ref in refs: if butler.datastore.exists(ref): print(" {}".format(butler.datastore.getUri(ref))) else: fakeRef = DatasetRef(ref.datasetType, ref.dataId, run=run) print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) # Store hash to figure out dependency dhash = hash((key, ref.dataId)) hashToParent[dhash] = iq
uses = set() for iq, (taskDef, quantum) in enumerate(graph.quanta()): for key, refs in quantum.predictedInputs.items(): for ref in refs: dhash = hash((key, ref.dataId)) if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses: parentIq = hashToParent[dhash] uses.add((iq, parentIq)) # iq uses parentIq print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq)) |