Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25__all__ = ['CmdLineFwk']
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import fnmatch
31import logging
32import pickle
33import re
34import sys
35import warnings
36import functools
37from collections import defaultdict
39# -----------------------------
40# Imports for other modules --
41# -----------------------------
42from lsst.daf.butler import Butler, DatasetRef
43import lsst.log
44import lsst.pex.config as pexConfig
45from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph
46from .cmdLineParser import makeParser
47from .dotTools import graph2dot, pipeline2dot
48from .mpGraphExecutor import MPGraphExecutor
49from .preExecInit import PreExecInit
50from .taskFactory import TaskFactory
51from . import util
53# ----------------------------------
54# Local non-exported definitions --
55# ----------------------------------
57# logging properties
58_LOG_PROP = """\
59log4j.rootLogger=INFO, A1
60log4j.appender.A1=ConsoleAppender
61log4j.appender.A1.Target=System.err
62log4j.appender.A1.layout=PatternLayout
63log4j.appender.A1.layout.ConversionPattern={}
64"""
66_LOG = logging.getLogger(__name__.partition(".")[2])
69class _FilteredStream:
70 """A file-like object that filters some config fields.
72 Note
73 ----
74 This class depends on implementation details of ``Config.saveToStream``
75 methods, in particular that that method uses single call to write()
76 method to save information about single config field, and that call
77 combines comments string(s) for a field and field path and value.
78 This class will not work reliably on the "import" strings, so imports
79 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
80 """
81 def __init__(self, pattern):
82 # obey case if pattern isn't lowercase or requests NOIGNORECASE
83 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
85 if mat:
86 pattern = mat.group(1)
87 self._pattern = re.compile(fnmatch.translate(pattern))
88 else:
89 if pattern != pattern.lower():
90 print(f"Matching \"{pattern}\" without regard to case "
91 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
92 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
94 def write(self, showStr):
95 # Strip off doc string line(s) and cut off at "=" for string matching
96 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
97 if self._pattern.search(matchStr):
98 sys.stdout.write(showStr)
100# ------------------------
101# Exported definitions --
102# ------------------------
105class CmdLineFwk:
106 """PipelineTask framework which executes tasks from command line.
108 In addition to executing tasks this activator provides additional methods
109 for task management like dumping configuration or execution chain.
110 """
112 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
114 def __init__(self):
115 pass
117 def parseAndRun(self, argv=None):
118 """
119 This method is a main entry point for this class, it parses command
120 line and executes all commands.
122 Parameters
123 ----------
124 argv : `list` of `str`, optional
125 list of command line arguments, if not specified then
126 `sys.argv[1:]` is used
127 """
129 if argv is None:
130 argv = sys.argv[1:]
132 # start with parsing command line, only do partial parsing now as
133 # the tasks can add more arguments later
134 parser = makeParser()
135 args = parser.parse_args(argv)
137 # First thing to do is to setup logging.
138 self.configLog(args.longlog, args.loglevel)
140 taskFactory = TaskFactory()
142 # make pipeline out of command line arguments (can return empty pipeline)
143 try:
144 pipeline = self.makePipeline(args)
145 except Exception as exc:
146 print("Failed to build pipeline: {}".format(exc), file=sys.stderr)
147 raise
149 if args.subcommand == "build":
150 # stop here but process --show option first
151 self.showInfo(args, pipeline)
152 return 0
154 # make quantum graph
155 try:
156 qgraph = self.makeGraph(pipeline, args)
157 except Exception as exc:
158 print("Failed to build graph: {}".format(exc), file=sys.stderr)
159 raise
161 # optionally dump some info
162 self.showInfo(args, pipeline, qgraph)
164 if qgraph is None:
165 # No need to raise an exception here, code that makes graph
166 # should have printed warning message already.
167 return 2
169 if args.subcommand == "qgraph":
170 # stop here
171 return 0
173 # execute
174 if args.subcommand == "run":
175 return self.runPipeline(qgraph, taskFactory, args)
177 @staticmethod
178 def configLog(longlog, logLevels):
179 """Configure logging system.
181 Parameters
182 ----------
183 longlog : `bool`
184 If True then make log messages appear in "long format"
185 logLevels : `list` of `tuple`
186 per-component logging levels, each item in the list is a tuple
187 (component, level), `component` is a logger name or `None` for root
188 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.)
189 """
190 if longlog:
191 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n"
192 else:
193 message_fmt = "%c %p: %m%n"
195 # global logging config
196 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
198 # Forward all Python logging to lsst.log
199 lgr = logging.getLogger()
200 lgr.setLevel(logging.INFO) # same as in log4cxx config above
201 lgr.addHandler(lsst.log.LogHandler())
203 # also capture warnings and send them to logging
204 logging.captureWarnings(True)
206 # configure individual loggers
207 for component, level in logLevels:
208 level = getattr(lsst.log.Log, level.upper(), None)
209 if level is not None:
210 # set logging level for lsst.log
211 logger = lsst.log.Log.getLogger(component or "")
212 logger.setLevel(level)
213 # set logging level for Python logging
214 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level)
215 logging.getLogger(component).setLevel(pyLevel)
217 def makePipeline(self, args):
218 """Build a pipeline from command line arguments.
220 Parameters
221 ----------
222 args : `argparse.Namespace`
223 Parsed command line
225 Returns
226 -------
227 pipeline : `~lsst.pipe.base.Pipeline`
228 """
229 if args.pipeline:
230 pipeline = Pipeline.fromFile(args.pipeline)
231 else:
232 pipeline = Pipeline("anonymous")
234 # loop over all pipeline actions and apply them in order
235 for action in args.pipeline_actions:
236 if action.action == "add_instrument":
238 pipeline.addInstrument(action.value)
240 elif action.action == "new_task":
242 pipeline.addTask(action.value, action.label)
244 elif action.action == "delete_task":
246 pipeline.removeTask(action.label)
248 elif action.action == "config":
250 # action value string is "field=value", split it at '='
251 field, _, value = action.value.partition("=")
252 pipeline.addConfigOverride(action.label, field, value)
254 elif action.action == "configfile":
256 pipeline.addConfigFile(action.label, action.value)
258 else:
260 raise ValueError(f"Unexpected pipeline action: {action.action}")
262 if args.save_pipeline:
263 pipeline.toFile(args.save_pipeline)
265 if args.pipeline_dot:
266 pipeline2dot(pipeline, args.pipeline_dot)
268 return pipeline
270 def makeGraph(self, pipeline, args):
271 """Build a graph from command line arguments.
273 Parameters
274 ----------
275 pipeline : `~lsst.pipe.base.Pipeline`
276 Pipeline, can be empty or ``None`` if graph is read from pickle
277 file.
278 args : `argparse.Namespace`
279 Parsed command line
281 Returns
282 -------
283 graph : `~lsst.pipe.base.QuantumGraph` or `None`
284 If resulting graph is empty then `None` is returned.
285 """
286 if args.qgraph:
288 # Un-pickling QGraph needs a dimensions universe defined in
289 # registry. Easiest way to do it now is to initialize whole data
290 # butler. Butler requires run or collection provided in
291 # constructor but in this case we do not care about (or do not
292 # know) what collection to use so give it an empty name.
293 butler = Butler(config=args.butler_config, collection="")
295 with open(args.qgraph, 'rb') as pickleFile:
296 qgraph = pickle.load(pickleFile)
297 if not isinstance(qgraph, QuantumGraph):
298 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format(
299 type(qgraph)))
301 # pipeline can not be provided in this case
302 if pipeline:
303 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
305 else:
307 if not pipeline:
308 raise ValueError("Pipeline must be given for quantum graph construction.")
310 # build collection names
311 inputs = args.input.copy()
312 defaultInputs = inputs.pop("", None)
313 outputs = args.output.copy()
314 defaultOutputs = outputs.pop("", None)
316 # Make butler instance. From this Butler we only need Registry
317 # instance. Input/output collections are handled by pre-flight
318 # and we don't want to be constrained here by Butler's restrictions
319 # on collection names.
320 collection = defaultInputs[0] if defaultInputs else None
321 butler = Butler(config=args.butler_config, collection=collection)
323 # if default input collections are not given on command line then
324 # use one from Butler (has to be configured in butler config)
325 if not defaultInputs:
326 defaultInputs = [butler.collection]
327 inputCollections = defaultdict(functools.partial(list, defaultInputs))
328 inputCollections.update(inputs)
329 outputCollection = defaultOutputs
330 if outputs:
331 # TODO: this may never be supported; maybe we should just
332 # remove the command-line option?
333 raise NotImplementedError("Different output collections for different dataset "
334 "types is not currently supported.")
336 # make execution plan (a.k.a. DAG) for pipeline
337 graphBuilder = GraphBuilder(butler.registry,
338 skipExisting=args.skip_existing,
339 clobberExisting=args.clobber_output)
340 qgraph = graphBuilder.makeGraph(pipeline, inputCollections, outputCollection, args.data_query)
342 # count quanta in graph and give a warning if it's empty and return None
343 nQuanta = qgraph.countQuanta()
344 if nQuanta == 0:
345 warnings.warn("QuantumGraph is empty", stacklevel=2)
346 return None
347 else:
348 _LOG.info("QuantumGraph contains %d quanta for %d tasks",
349 nQuanta, len(qgraph))
351 if args.save_qgraph:
352 with open(args.save_qgraph, "wb") as pickleFile:
353 pickle.dump(qgraph, pickleFile)
355 if args.save_single_quanta:
356 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()):
357 filename = args.save_single_quanta.format(iq)
358 with open(filename, "wb") as pickleFile:
359 pickle.dump(sqgraph, pickleFile)
361 if args.qgraph_dot:
362 graph2dot(qgraph, args.qgraph_dot)
364 return qgraph
366 def runPipeline(self, graph, taskFactory, args, butler=None):
367 """Execute complete QuantumGraph.
369 Parameters
370 ----------
371 graph : `QuantumGraph`
372 Execution graph.
373 taskFactory : `~lsst.pipe.base.TaskFactory`
374 Task factory
375 args : `argparse.Namespace`
376 Parsed command line
377 butler : `~lsst.daf.butler.Butler`, optional
378 Data Butler instance, if not defined then new instance is made
379 using command line options.
380 """
381 # If default output collection is given then use it to override
382 # butler-configured one.
383 run = args.output.get("", None)
385 # make butler instance
386 if butler is None:
387 butler = Butler(config=args.butler_config, run=run)
389 # at this point we require that output collection was defined
390 if not butler.run:
391 raise ValueError("no output collection defined in data butler")
393 # Enable lsstDebug debugging. Note that this is done once in the
394 # main process before PreExecInit and it is also repeated before
395 # running each task in SingleQuantumExecutor (which may not be
396 # needed if `multipocessing` always uses fork start method).
397 if args.enableLsstDebug:
398 try:
399 _LOG.debug("Will try to import debug.py")
400 import debug # noqa:F401
401 except ImportError:
402 _LOG.warn("No 'debug' module found.")
404 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing, args.clobber_output)
405 preExecInit.initialize(graph,
406 saveInitOutputs=not args.skip_init_writes,
407 registerDatasetTypes=args.register_dataset_types)
409 if not args.init_only:
410 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT,
411 skipExisting=args.skip_existing,
412 clobberOutput=args.clobber_output,
413 enableLsstDebug=args.enableLsstDebug)
414 with util.profile(args.profile, _LOG):
415 executor.execute(graph, butler, taskFactory)
417 def showInfo(self, args, pipeline, graph=None):
418 """Display useful info about pipeline and environment.
420 Parameters
421 ----------
422 args : `argparse.Namespace`
423 Parsed command line
424 pipeline : `Pipeline`
425 Pipeline definition
426 graph : `QuantumGraph`, optional
427 Execution graph
428 """
429 showOpts = args.show
430 for what in showOpts:
431 showCommand, _, showArgs = what.partition("=")
433 if showCommand in ["pipeline", "config", "history", "tasks"]:
434 if not pipeline:
435 _LOG.warning("Pipeline is required for --show=%s", showCommand)
436 continue
438 if showCommand in ["graph", "workflow"]:
439 if not graph:
440 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
441 continue
443 if showCommand == "pipeline":
444 print(pipeline)
445 elif showCommand == "config":
446 self._showConfig(pipeline, showArgs, False)
447 elif showCommand == "dump-config":
448 self._showConfig(pipeline, showArgs, True)
449 elif showCommand == "history":
450 self._showConfigHistory(pipeline, showArgs)
451 elif showCommand == "tasks":
452 self._showTaskHierarchy(pipeline)
453 elif showCommand == "graph":
454 if graph:
455 self._showGraph(graph)
456 elif showCommand == "workflow":
457 if graph:
458 self._showWorkflow(graph, args)
459 else:
460 print("Unknown value for show: %s (choose from '%s')" %
461 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
462 file=sys.stderr)
463 sys.exit(1)
465 def _showConfig(self, pipeline, showArgs, dumpFullConfig):
466 """Show task configuration
468 Parameters
469 ----------
470 pipeline : `Pipeline`
471 Pipeline definition
472 showArgs : `str`
473 Defines what to show
474 dumpFullConfig : `bool`
475 If true then dump complete task configuration with all imports.
476 """
477 stream = sys.stdout
478 if dumpFullConfig:
479 # Task label can be given with this option
480 taskName = showArgs
481 else:
482 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
483 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
484 taskName = matConfig.group(1)
485 pattern = matConfig.group(2)
486 if pattern:
487 stream = _FilteredStream(pattern)
489 tasks = util.filterTasks(pipeline, taskName)
490 if not tasks:
491 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
492 sys.exit(1)
494 for taskDef in tasks:
495 print("### Configuration for task `{}'".format(taskDef.label))
496 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
498 def _showConfigHistory(self, pipeline, showArgs):
499 """Show history for task configuration
501 Parameters
502 ----------
503 pipeline : `Pipeline`
504 Pipeline definition
505 showArgs : `str`
506 Defines what to show
507 """
509 taskName = None
510 pattern = None
511 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
512 if matHistory:
513 taskName = matHistory.group(1)
514 pattern = matHistory.group(2)
515 if not pattern:
516 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
517 sys.exit(1)
519 tasks = util.filterTasks(pipeline, taskName)
520 if not tasks:
521 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
522 sys.exit(1)
524 cpath, _, cname = pattern.rpartition(".")
525 found = False
526 for taskDef in tasks:
527 try:
528 if not cpath:
529 # looking for top-level field
530 hconfig = taskDef.config
531 else:
532 hconfig = eval("config." + cpath, {}, {"config": taskDef.config})
533 except AttributeError:
534 # Means this config object has no such field, but maybe some other task has it.
535 continue
536 except Exception:
537 # Any other exception probably means some error in the expression.
538 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr)
539 sys.exit(1)
541 if hasattr(hconfig, cname):
542 print(f"### Configuration field for task `{taskDef.label}'")
543 print(pexConfig.history.format(hconfig, cname))
544 found = True
546 if not found:
547 print(f"None of the tasks has field named {pattern}", file=sys.stderr)
548 sys.exit(1)
550 def _showTaskHierarchy(self, pipeline):
551 """Print task hierarchy to stdout
553 Parameters
554 ----------
555 pipeline: `Pipeline`
556 """
557 for taskDef in pipeline.toExpandedPipeline():
558 print("### Subtasks for task `{}'".format(taskDef.taskName))
560 for configName, taskName in util.subTaskIter(taskDef.config):
561 print("{}: {}".format(configName, taskName))
563 def _showGraph(self, graph):
564 """Print quanta information to stdout
566 Parameters
567 ----------
568 graph : `QuantumGraph`
569 Execution graph.
570 """
571 for taskNodes in graph:
572 print(taskNodes.taskDef)
574 for iq, quantum in enumerate(taskNodes.quanta):
575 print(" Quantum {}:".format(iq))
576 print(" inputs:")
577 for key, refs in quantum.predictedInputs.items():
578 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
579 print(" {}: [{}]".format(key, ", ".join(dataIds)))
580 print(" outputs:")
581 for key, refs in quantum.outputs.items():
582 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
583 print(" {}: [{}]".format(key, ", ".join(dataIds)))
585 def _showWorkflow(self, graph, args):
586 """Print quanta information and dependency to stdout
588 The input and predicted output URIs based on the Butler repo are printed.
590 Parameters
591 ----------
592 graph : `QuantumGraph`
593 Execution graph.
594 args : `argparse.Namespace`
595 Parsed command line
596 """
597 run = args.output.get("", None)
598 butler = Butler(config=args.butler_config, run=run)
599 hashToParent = {}
600 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
601 shortname = taskDef.taskName.split('.')[-1]
602 print("Quantum {}: {}".format(iq, shortname))
603 print(" inputs:")
604 for key, refs in quantum.predictedInputs.items():
605 for ref in refs:
606 if butler.datastore.exists(ref):
607 print(" {}".format(butler.datastore.getUri(ref)))
608 else:
609 fakeRef = DatasetRef(ref.datasetType, ref.dataId, run=run)
610 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
611 print(" outputs:")
612 for key, refs in quantum.outputs.items():
613 for ref in refs:
614 if butler.datastore.exists(ref):
615 print(" {}".format(butler.datastore.getUri(ref)))
616 else:
617 fakeRef = DatasetRef(ref.datasetType, ref.dataId, run=run)
618 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True)))
619 # Store hash to figure out dependency
620 dhash = hash((key, ref.dataId))
621 hashToParent[dhash] = iq
623 uses = set()
624 for iq, (taskDef, quantum) in enumerate(graph.quanta()):
625 for key, refs in quantum.predictedInputs.items():
626 for ref in refs:
627 dhash = hash((key, ref.dataId))
628 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses:
629 parentIq = hashToParent[dhash]
630 uses.add((iq, parentIq)) # iq uses parentIq
631 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))