Coverage for python/lsst/ctrl/mpexec/cmdLineParser.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining `makeParser` factory method.
23"""
25__all__ = ["makeParser"]
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30from argparse import Action, ArgumentParser, RawDescriptionHelpFormatter
31import collections
32import copy
33import re
34import textwrap
36# -----------------------------
37# Imports for other modules --
38# -----------------------------
40# ----------------------------------
41# Local non-exported definitions --
42# ----------------------------------
44# Class which determines an action that needs to be performed
45# when building pipeline, its attributes are:
46# action: the name of the action, e.g. "new_task", "delete_task"
47# label: task label, can be None if action does not require label
48# value: argument value excluding task label.
49_PipelineAction = collections.namedtuple("_PipelineAction", "action,label,value")
52class _PipelineActionType:
53 """Class defining a callable type which converts strings into
54 _PipelineAction instances.
56 Parameters
57 ----------
58 action : str
59 Name of the action, will become `action` attribute of instance.
60 regex : str
61 Regular expression for argument value, it can define groups 'label'
62 and 'value' which will become corresponding attributes of a
63 returned instance.
64 """
66 def __init__(self, action, regex='.*', valueType=str):
67 self.action = action
68 self.regex = re.compile(regex)
69 self.valueType = valueType
71 def __call__(self, value):
72 match = self.regex.match(value)
73 if not match:
74 raise TypeError("Unrecognized option syntax: " + value)
75 # get "label" group or use None as label
76 try:
77 label = match.group("label")
78 except IndexError:
79 label = None
80 # if "value" group is not defined use whole string
81 try:
82 value = match.group("value")
83 except IndexError:
84 pass
85 value = self.valueType(value)
86 return _PipelineAction(self.action, label, value)
88 def __repr__(self):
89 """String representation of this class.
91 argparse can use this for some error messages, default implementation
92 makes those messages incomprehensible.
93 """
94 return f"_PipelineActionType(action={self.action})"
97_ACTION_ADD_TASK = _PipelineActionType("new_task", "(?P<value>[^:]+)(:(?P<label>.+))?")
98_ACTION_DELETE_TASK = _PipelineActionType("delete_task", "(?P<value>)(?P<label>.+)")
99_ACTION_CONFIG = _PipelineActionType("config", "(?P<label>.+):(?P<value>.+=.+)")
100_ACTION_CONFIG_FILE = _PipelineActionType("configfile", "(?P<label>.+):(?P<value>.+)")
101_ACTION_ADD_INSTRUMENT = _PipelineActionType("add_instrument", "(?P<value>[^:]+)")
104class _LogLevelAction(Action):
105 """Action class which collects logging levels.
107 This action class collects arguments in the form "LEVEL" or
108 "COMPONENT=LEVEL" where LEVEL is the name of the logging level (case-
109 insensitive). It converts the series of arguments into the list of
110 tuples (COMPONENT, LEVEL). If component name is missing then first
111 item in tuple is set to `None`. Second item in tuple is converted to
112 upper case.
113 """
115 permittedLevels = set(['TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'])
117 def __call__(self, parser, namespace, values, option_string=None):
118 """Re-implementation of the base class method.
120 See `argparse.Action` documentation for parameter description.
121 """
122 dest = getattr(namespace, self.dest)
123 if dest is None:
124 dest = []
125 setattr(namespace, self.dest, dest)
127 component, _, levelStr = values.partition("=")
128 if not levelStr:
129 levelStr, component = component, None
130 logLevelUpr = levelStr.upper()
131 if logLevelUpr not in self.permittedLevels:
132 parser.error("loglevel=%s not one of %s" % (levelStr, tuple(self.permittedLevels)))
133 dest.append((component, logLevelUpr))
136class _InputCollectionAction(Action):
137 """Action class which collects input collection names.
139 This action type accepts string values in format:
141 value :== collection[,collection[...]]
142 collection :== [dataset_type:]collection_name
144 Converts `values` into a list whose items are a 2-item tuple, the first
145 item is the collection_name if one is provided or an elipsis `...` if
146 collection_name is not provided, and the second item is the dataset_type:
147 `((dataset_type, collection_name),)` or `((dataset_type, ...))`
148 """
150 def __call__(self, parser, namespace, values, option_string=None):
151 """Re-implementation of the base class method.
153 See `argparse.Action` documentation for parameter description.
154 """
155 dest = getattr(namespace, self.dest, [])
156 # In case default is set to a list (empty or not) we want to use
157 # new copy of that list as initial value to avoid
158 # modifying default value.
159 if dest is self.default:
160 dest = copy.copy(dest)
162 # split on commas, collection can be preceeded by dataset type
163 for collstr in values.split(","):
164 dsType, sep, collection = collstr.partition(':')
165 if not sep:
166 dsType, collection = ..., dsType
167 dest.append((collection, dsType))
169 setattr(namespace, self.dest, dest)
172_EPILOG = """\
173Notes:
174 * many options can appear multiple times; all values are used, in order
175 left to right
176 * @file reads command-line options from the specified file:
177 * data may be distributed among multiple lines (e.g. one option per line)
178 * data after # is treated as a comment and ignored
179 * blank lines and lines starting with # are ignored
180"""
183def _makeButlerOptions(parser):
184 """Add a set of options for data butler to a parser.
186 Parameters
187 ----------
188 parser : `argparse.ArgumentParser`
189 """
190 group = parser.add_argument_group("Data repository and selection options")
191 group.add_argument(
192 "-b", "--butler-config", dest="butler_config", default=None, metavar="PATH",
193 help="Location of the gen3 butler/registry config file."
194 )
195 group.add_argument(
196 "-i", "--input", dest="input", action=_InputCollectionAction,
197 metavar="COLL,DSTYPE:COLL", default=[],
198 help=(
199 "Comma-separated names of the input collection(s). Any entry "
200 "includes a colon (:), the first string is a dataset type name "
201 "that restricts the search in that collection. "
202 "May be passed multiple times (all arguments are concatenated)."
203 )
204 )
205 group.add_argument(
206 "-o", "--output", dest="output", default=None, metavar="COLL",
207 help=(
208 "Name of the output CHAINED collection. This may either be an "
209 "existing CHAINED collection to use as both input and output "
210 "(incompatible with --input), or a new CHAINED collection created "
211 "to include all inputs (requires --input). "
212 "In both cases, the collection's children will start with an "
213 "output RUN collection that directly holds all new datasets (see "
214 "--output-run)."
215 )
216 )
217 group.add_argument(
218 "--output-run", dest="output_run", default=None, metavar="COLL",
219 help=(
220 "Name of the new output RUN collection. If not provided, "
221 "--output must be, a new RUN collection will be created by "
222 "appending a timestamp to the value passed with --output. "
223 "If this collection already exists, --extend-run must be passed."
224 )
225 )
226 groupex = group.add_mutually_exclusive_group()
227 groupex.add_argument(
228 "--extend-run", dest="extend_run", default=False, action="store_true",
229 help=(
230 "Instead of creating a new RUN collection, insert datasets into "
231 "either the one given by --output-run (if provided) or the first "
232 "child collection of --output (which must be of type RUN)."
233 )
234 )
235 groupex.add_argument(
236 "--replace-run", dest="replace_run", default=False, action="store_true",
237 help=(
238 "Before creating a new RUN collection in an existing CHAINED "
239 "collection, remove the first child collection (which must be of "
240 "type RUN). "
241 "This can be used to repeatedly write to the same (parent) "
242 "collection during development, but it does not delete the "
243 "datasets associated with the replaced run unless "
244 "--prune-replaced is also passed. "
245 "Requires --output, and incompatible with --extend-run."
246 )
247 )
248 group.add_argument(
249 "--prune-replaced", dest="prune_replaced", choices=("unstore", "purge"), default=None,
250 help=(
251 "Delete the datasets in the collection replaced by --replace-run, "
252 "either just from the datastore ('unstore') or by removing them "
253 "and the RUN completely ('purge'). Requires --replace-run."
254 )
255 )
256 group.add_argument("-d", "--data-query", dest="data_query", default="", metavar="QUERY",
257 help="User data selection expression.")
260def _makeMetaOutputOptions(parser):
261 """Add a set of options describing output metadata.
263 Parameters
264 ----------
265 parser : `argparse.ArgumentParser`
266 """
267 group = parser.add_argument_group("Meta-information output options")
268 group.add_argument("--skip-init-writes", dest="skip_init_writes", default=False,
269 action="store_true",
270 help="Do not write collection-wide 'init output' datasets (e.g. schemas).")
271 group.add_argument("--init-only", dest="init_only", default=False,
272 action="store_true",
273 help=("Do not actually run; just register dataset types and/or save init outputs."))
274 group.add_argument("--register-dataset-types", dest="register_dataset_types", default=False,
275 action="store_true",
276 help="Register DatasetTypes that do not already exist in the Registry.")
277 group.add_argument("--no-versions", dest="no_versions", default=False,
278 action="store_true",
279 help="Do not save or check package versions.")
282def _makeLoggingOptions(parser):
283 """Add a set of options for logging configuration.
285 Parameters
286 ----------
287 parser : `argparse.ArgumentParser`
288 """
289 group = parser.add_argument_group("Logging options")
290 group.add_argument("-L", "--loglevel", action=_LogLevelAction, default=[],
291 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
292 metavar="LEVEL|COMPONENT=LEVEL")
293 group.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
294 group.add_argument("--debug", action="store_true", dest="enableLsstDebug",
295 help="enable debugging output using lsstDebug facility (imports debug.py)")
298def _makePipelineOptions(parser):
299 """Add a set of options for building a pipeline.
301 Parameters
302 ----------
303 parser : `argparse.ArgumentParser`
304 """
305 group = parser.add_argument_group("Pipeline building options")
306 group.add_argument("-p", "--pipeline", dest="pipeline",
307 help="Location of a pipeline definition file in YAML format.",
308 metavar="PATH")
309 group.add_argument("-t", "--task", metavar="TASK[:LABEL]",
310 dest="pipeline_actions", action='append', type=_ACTION_ADD_TASK,
311 help="Task name to add to pipeline, must be a fully qualified task name. "
312 "Task name can be followed by colon and "
313 "label name, if label is not given than task base name (class name) "
314 "is used as label.")
315 group.add_argument("--delete", metavar="LABEL",
316 dest="pipeline_actions", action='append', type=_ACTION_DELETE_TASK,
317 help="Delete task with given label from pipeline.")
318 group.add_argument("-c", "--config", metavar="LABEL:NAME=VALUE",
319 dest="pipeline_actions", action='append', type=_ACTION_CONFIG,
320 help="Configuration override(s) for a task with specified label, "
321 "e.g. -c task:foo=newfoo -c task:bar.baz=3.")
322 group.add_argument("-C", "--configfile", metavar="LABEL:PATH",
323 dest="pipeline_actions", action='append', type=_ACTION_CONFIG_FILE,
324 help="Configuration override file(s), applies to a task with a given label.")
325 group.add_argument("--order-pipeline", dest="order_pipeline",
326 default=False, action="store_true",
327 help="Order tasks in pipeline based on their data dependencies, "
328 "ordering is performed as last step before saving or executing "
329 "pipeline.")
330 group.add_argument("-s", "--save-pipeline", dest="save_pipeline",
331 help="Location for storing resulting pipeline definition in YAML format.",
332 metavar="PATH")
333 group.add_argument("--pipeline-dot", dest="pipeline_dot",
334 help="Location for storing GraphViz DOT representation of a pipeline.",
335 metavar="PATH")
336 group.add_argument("--instrument", metavar="instrument",
337 dest="pipeline_actions", action="append", type=_ACTION_ADD_INSTRUMENT,
338 help="Add an instrument which will be used to load config overrides when"
339 " defining a pipeline. This must be the fully qualified class name")
342def _makeQuantumGraphOptions(parser):
343 """Add a set of options controlling quantum graph generation.
345 Parameters
346 ----------
347 parser : `argparse.ArgumentParser`
348 """
349 group = parser.add_argument_group("Quantum graph building options")
350 group.add_argument("-g", "--qgraph", dest="qgraph",
351 help="Location for a serialized quantum graph definition "
352 "(pickle file). If this option is given then all input data "
353 "options and pipeline-building options cannot be used.",
354 metavar="PATH")
355 # TODO: I've made --skip-existing apply to _just_ the output run (which
356 # means that it requires --extend-run), but a variant where we can also
357 # skip anything in the input chained collection may also be useful; need to
358 # think about whether that should be a separate argument or a conversion to
359 # make this one take a value.
360 group.add_argument("--skip-existing", dest="skip_existing",
361 default=False, action="store_true",
362 help=("If all Quantum outputs already exist in the output RUN collection "
363 "then that Quantum will be excluded from the QuantumGraph. "
364 "Requires --extend-run. When this option is used with 'run' command "
365 "it skips execution of quantum if all its output exist."))
366 group.add_argument("-q", "--save-qgraph", dest="save_qgraph",
367 help="Location for storing a serialized quantum graph definition "
368 "(pickle file).",
369 metavar="PATH")
370 group.add_argument("--save-single-quanta", dest="save_single_quanta",
371 help="Format string of locations for storing individual quantum graph "
372 "definition (pickle files). The curly brace {} in the input string "
373 "will be replaced by a quantum number.",
374 metavar="PATH")
375 group.add_argument("--qgraph-dot", dest="qgraph_dot",
376 help="Location for storing GraphViz DOT representation of a "
377 "quantum graph.",
378 metavar="PATH")
381def _makeExecOptions(parser):
382 """Add options controlling how tasks are executed.
384 Parameters
385 ----------
386 parser : `argparse.ArgumentParser`
387 """
388 group = parser.add_argument_group("Execution options")
389 group.add_argument("--clobber-partial-outputs", action="store_true", default=False,
390 help=("Remove incomplete outputs from previous execution of the same "
391 "quantum before new execution."))
392 group.add_argument("--doraise", action="store_true",
393 help="raise an exception on error (else log a message and continue)?")
394 group.add_argument("--profile", metavar="PATH", help="Dump cProfile statistics to filename")
396 # parallelism options
397 group.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
398 group.add_argument("--timeout", type=float,
399 help="Timeout for multiprocessing; maximum wall time (sec)")
400 group.add_argument("--fail-fast", action="store_true", default=False,
401 help="Stop processing at first error, default is to process as many tasks "
402 "as possible.")
404 # run-time graph fixup option
405 group.add_argument("--graph-fixup", type=str, default=None,
406 help="Name of the class or factory method which makes an instance "
407 "used for execution graph fixup.")
409# ------------------------
410# Exported definitions --
411# ------------------------
414def makeParser(fromfile_prefix_chars='@', parser_class=ArgumentParser, **kwargs):
415 """Make instance of command line parser for `CmdLineFwk`.
417 Creates instance of parser populated with all options that are supported
418 by command line activator. There is no additional logic in this class,
419 all semantics is handled by the activator class.
421 Parameters
422 ----------
423 fromfile_prefix_chars : `str`, optional
424 Prefix for arguments to be used as options files (default: `@`)
425 parser_class : `type`, optional
426 Specifies the class of the argument parser, by default
427 `ArgumentParser` is used.
428 kwargs : extra keyword arguments
429 Passed directly to `parser_class` constructor
431 Returns
432 -------
433 instance of `parser_class`
434 """
436 parser = parser_class(usage="%(prog)s subcommand [options]",
437 fromfile_prefix_chars=fromfile_prefix_chars,
438 epilog=_EPILOG,
439 formatter_class=RawDescriptionHelpFormatter,
440 **kwargs)
442 # define sub-commands
443 subparsers = parser.add_subparsers(dest="subcommand",
444 title="commands",
445 description=("Valid commands, use `<command> --help' to get "
446 "more info about each command:"),
447 prog=parser.prog)
448 # Python3 workaround, see http://bugs.python.org/issue9253#msg186387
449 # The issue was fixed in Python 3.6, workaround is not need starting with that version
450 subparsers.required = True
452 for subcommand in ("build", "qgraph", "run"):
453 # show/run sub-commands, they are all identical except for the
454 # command itself and description
456 if subcommand == "build":
457 description = textwrap.dedent("""\
458 Build and optionally save pipeline definition.
459 This does not require input data to be specified.""")
460 elif subcommand == "qgraph":
461 description = textwrap.dedent("""\
462 Build and optionally save pipeline and quantum graph.""")
463 else:
464 description = textwrap.dedent("""\
465 Build and execute pipeline and quantum graph.""")
467 subparser = subparsers.add_parser(subcommand,
468 description=description,
469 epilog=_EPILOG,
470 formatter_class=RawDescriptionHelpFormatter)
471 subparser.set_defaults(subparser=subparser,
472 pipeline_actions=[])
473 _makeLoggingOptions(subparser)
474 _makePipelineOptions(subparser)
476 if subcommand in ("qgraph", "run"):
477 _makeQuantumGraphOptions(subparser)
478 _makeButlerOptions(subparser)
480 if subcommand == "run":
481 _makeExecOptions(subparser)
482 _makeMetaOutputOptions(subparser)
484 subparser.add_argument("--show", metavar="ITEM|ITEM=VALUE", action="append", default=[],
485 help="Dump various info to standard output. Possible items are: "
486 "`config', `config=[Task::]<PATTERN>' or "
487 "`config=[Task::]<PATTERN>:NOIGNORECASE' to dump configuration "
488 "fields possibly matching given pattern and/or task label; "
489 "`history=<FIELD>' to dump configuration history for a field, "
490 "field name is specified as [Task::][SubTask.]Field; "
491 "`dump-config', `dump-config=Task' to dump complete configuration "
492 "for a task given its label or all tasks; "
493 "`pipeline' to show pipeline composition; "
494 "`graph' to show information about quanta; "
495 "`workflow' to show information about quanta and their dependency; "
496 "`tasks' to show task composition.")
498 return parser