lsst.pipe.base  15.0-7-g598c41d+2
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """Apply environment variable as default root, if present, and abspath.
57 
58  Parameters
59  ----------
60  defName : `str`
61  Name of environment variable containing default root path; if the environment variable does not exist
62  then the path is relative to the current working directory
63  path : `str`
64  Path relative to default root path.
65 
66  Returns
67  -------
68  abspath : `str`
69  Path that has been expanded, or `None` if the environment variable does not exist and path is `None`.
70  """
71  defRoot = os.environ.get(defName)
72  if defRoot is None:
73  if path is None:
74  return None
75  return os.path.abspath(path)
76  return os.path.abspath(os.path.join(defRoot, path or ""))
77 
78 
79 class DataIdContainer(object):
80  """Container for data IDs and associated data references.
81 
82  Parameters
83  ----------
84  level
85  Unknown.
86 
87  Notes
88  -----
89  Override for data IDs that require special handling to be converted to ``data references``, and specify
90  the override class as ``ContainerClass`` for ``add_id_argument``. (If you don't want the argument parser
91  to compute data references, you may use this class and specify ``doMakeDataRefList=False`` in
92  ``add_id_argument``.)
93  """
94 
95  def __init__(self, level=None):
96  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
97  self.level = level
98  self.idList = []
99  self.refList = []
100 
101  def setDatasetType(self, datasetType):
102  """Set actual dataset type, once it is known.
103 
104  Parameters
105  ----------
106  datasetType : `str`
107  Dataset type.
108  """
109  self.datasetType = datasetType
110 
111  def castDataIds(self, butler):
112  """Validate data IDs and cast them to the correct type (modify idList in place).
113 
114  Parameters
115  ----------
116  butler : `lsst.daf.persistence.Butler`
117  Data butler.
118  """
119  if self.datasetType is None:
120  raise RuntimeError("Must call setDatasetType first")
121  try:
122  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
123  except KeyError:
124  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
125 
126  for dataDict in self.idList:
127  for key, strVal in dataDict.items():
128  try:
129  keyType = idKeyTypeDict[key]
130  except KeyError:
131  # OK, assume that it's a valid key and guess that it's a string
132  keyType = str
133 
134  log = lsstLog.Log.getDefaultLogger()
135  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
136  (key, 'str' if keyType == str else keyType))
137  idKeyTypeDict[key] = keyType
138 
139  if keyType != str:
140  try:
141  castVal = keyType(strVal)
142  except Exception:
143  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
144  dataDict[key] = castVal
145 
146  def makeDataRefList(self, namespace):
147  """Compute refList based on idList.
148 
149  Parameters
150  ----------
151  namespace
152  Results of parsing command-line (with ``butler`` and ``log`` elements).
153 
154  Notes
155  -----
156  Not called if ``add_id_argument`` called with ``doMakeDataRefList=False``.
157  """
158  if self.datasetType is None:
159  raise RuntimeError("Must call setDatasetType first")
160  butler = namespace.butler
161  for dataId in self.idList:
162  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
163  level=self.level, dataId=dataId)
164  if not refList:
165  namespace.log.warn("No data found for dataId=%s", dataId)
166  continue
167  self.refList += refList
168 
169 
170 class DataIdArgument(object):
171  """data ID argument, used by `ArgumentParser.add_id_argument`.
172 
173  Parameters
174  ----------
175  name : `str`
176  Name of identifier (argument name without dashes).
177  datasetType : `str`
178  Type of dataset; specify a string for a fixed dataset type or a `DatasetArgument` for a dynamic
179  dataset type (e.g. one specified by a command-line argument).
180  level
181  Level of dataset, for `~lsst.daf.persistence.Butler`.
182  doMakeDataRefList : `bool`, optional
183  If `True` (default), construct data references.
184  ContainerClass : class, optional
185  Class to contain data IDs and data references; the default class will work for many kinds of data,
186  but you may have to override to compute some kinds of data references. Default is `DataIdContainer`.
187  """
188 
189  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
190  if name.startswith("-"):
191  raise RuntimeError("Name %s must not start with -" % (name,))
192  self.name = name
193  self.datasetType = datasetType
194  self.level = level
195  self.doMakeDataRefList = bool(doMakeDataRefList)
196  self.ContainerClass = ContainerClass
197  self.argName = name.lstrip("-")
198 
199  @property
201  """`True` if the dataset type is dynamic (that is, specified on the command line)."""
202  return isinstance(self.datasetType, DynamicDatasetType)
203 
204  def getDatasetType(self, namespace):
205  """Get the dataset type as a string.
206 
207  Parameters
208  ----------
209  namespace
210  Parsed command.
211 
212  Returns
213  -------
214  datasetType : `str`
215  Dataset type.
216  """
217  if self.isDynamicDatasetType:
218  return self.datasetType.getDatasetType(namespace)
219  else:
220  return self.datasetType
221 
222 
223 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
224  """Abstract base class for a dataset type determined from parsed command-line arguments.
225  """
226 
227  def addArgument(self, parser, idName):
228  """Add a command-line argument to specify dataset type name, if wanted.
229 
230  Parameters
231  ----------
232  parser : `ArgumentParser`
233  Argument parser to add the argument to.
234  idName : `str`
235  Name of data ID argument, without the leading ``"--"``, e.g. ``"id"``.
236 
237  Notes
238  -----
239  The default implementation does nothing
240  """
241  pass
242 
243  @abc.abstractmethod
244  def getDatasetType(self, namespace):
245  """Get the dataset type as a string, based on parsed command-line arguments.
246 
247  Returns
248  -------
249  namespace : `str`
250  Parsed command.
251  """
252  raise NotImplementedError("Subclasses must override")
253 
254 
256  """Dataset type specified by a command-line argument.
257 
258  Parameters
259  ----------
260  name : `str`, optional
261  Name of command-line argument (including leading "--", if appropriate) whose value is the dataset
262  type. If `None`, uses ``--idName_dstype`` where idName is the name of the data ID argument (e.g.
263  "id").
264  help : `str`, optional
265  Help string for the command-line argument.
266  default : obj, optional
267  Default value. If `None`, then the command-line option is required. This argument isignored if the
268  command-line argument is positional (name does not start with "-") because positional arguments do
269  not support default values.
270  """
271 
272  def __init__(self,
273  name=None,
274  help="dataset type to process from input data repository",
275  default=None,
276  ):
277  DynamicDatasetType.__init__(self)
278  self.name = name
279  self.help = help
280  self.default = default
281 
282  def getDatasetType(self, namespace):
283  """Get the dataset type as a string, from the appropriate command-line argument.
284 
285  Parameters
286  ----------
287  namespace
288  Parsed command.
289 
290  Returns
291  -------
292  datasetType : `str`
293  Dataset type.
294  """
295  argName = self.name.lstrip("-")
296  return getattr(namespace, argName)
297 
298  def addArgument(self, parser, idName):
299  """Add a command-line argument to specify the dataset type name.
300 
301  Parameters
302  ----------
303  parser : `ArgumentParser`
304  Argument parser.
305  idName : `str`
306  Data ID.
307 
308  Notes
309  -----
310  Also sets the `name` attribute if it is currently `None`.
311  """
312  help = self.help if self.help else "dataset type for %s" % (idName,)
313  if self.name is None:
314  self.name = "--%s_dstype" % (idName,)
315  requiredDict = dict()
316  if self.name.startswith("-"):
317  requiredDict = dict(required=self.default is None)
318  parser.add_argument(
319  self.name,
320  default=self.default,
321  help=help,
322  **requiredDict) # cannot specify required=None for positional arguments
323 
324 
326  """Dataset type specified by a config parameter.
327 
328  Parameters
329  ----------
330  name : `str`
331  Name of config option whose value is the dataset type.
332  """
333 
334  def __init__(self, name):
335  DynamicDatasetType.__init__(self)
336  self.name = name
337 
338  def getDatasetType(self, namespace):
339  """Return the dataset type as a string, from the appropriate config field
340 
341  Parameters
342  ----------
343  namespace : `argparse.Namespace`
344  Parsed command.
345  """
346  # getattr does not work reliably if the config field name is dotted,
347  # so step through one level at a time
348  keyList = self.name.split(".")
349  value = namespace.config
350  for key in keyList:
351  try:
352  value = getattr(value, key)
353  except KeyError:
354  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
355  return value
356 
357 
358 class ArgumentParser(argparse.ArgumentParser):
359  """Argument parser for command-line tasks that is based on `argparse.ArgumentParser`.
360 
361  Parameters
362  ----------
363  name : `str`
364  Name of top-level task; used to identify camera-specific override files.
365  usage : `str`, optional
366  Command-line usage signature.
367  **kwargs
368  Additional keyword arguments for `argparse.ArgumentParser`.
369 
370  Notes
371  -----
372  Users may wish to add additional arguments before calling `parse_args`.
373  """
374  # I would prefer to check data ID keys and values as they are parsed,
375  # but the required information comes from the butler, so I have to construct a butler
376  # before I do this checking. Constructing a butler is slow, so I only want do it once,
377  # after parsing the command line, so as to catch syntax errors quickly.
378 
379  requireOutput = True
380  """Require an output directory to be specified (`bool`)."""
381 
382  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
383  self._name = name
384  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
385  argparse.ArgumentParser.__init__(self,
386  usage=usage,
387  fromfile_prefix_chars='@',
388  epilog=textwrap.dedent("""Notes:
389  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
390  all values are used, in order left to right
391  * @file reads command-line options from the specified file:
392  * data may be distributed among multiple lines (e.g. one option per line)
393  * data after # is treated as a comment and ignored
394  * blank lines and lines starting with # are ignored
395  * To specify multiple values for an option, do not use = after the option name:
396  * right: --configfile foo bar
397  * wrong: --configfile=foo bar
398  """),
399  formatter_class=argparse.RawDescriptionHelpFormatter,
400  **kwargs)
401  self.add_argument(metavar='input', dest="rawInput",
402  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
403  self.add_argument("--calib", dest="rawCalib",
404  help="path to input calibration repository, relative to $%s" %
405  (DEFAULT_CALIB_NAME,))
406  self.add_argument("--output", dest="rawOutput",
407  help="path to output data repository (need not exist), relative to $%s" %
408  (DEFAULT_OUTPUT_NAME,))
409  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
410  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
411  "optionally sets ROOT to ROOT/rerun/INPUT")
412  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
413  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
414  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
415  help="config override file(s)")
416  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
417  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
418  metavar="LEVEL|COMPONENT=LEVEL")
419  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
420  self.add_argument("--debug", action="store_true", help="enable debugging output?")
421  self.add_argument("--doraise", action="store_true",
422  help="raise an exception on error (else log a message and continue)?")
423  self.add_argument("--noExit", action="store_true",
424  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
425  self.add_argument("--profile", help="Dump cProfile statistics to filename")
426  self.add_argument("--show", nargs="+", default=(),
427  help="display the specified information to stdout and quit "
428  "(unless run is specified).")
429  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
430  self.add_argument("-t", "--timeout", type=float,
431  help="Timeout for multiprocessing; maximum wall time (sec)")
432  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
433  help=("remove and re-create the output directory if it already exists "
434  "(safe with -j, but not all other forms of parallel execution)"))
435  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
436  help=("backup and then overwrite existing config files instead of checking them "
437  "(safe with -j, but not all other forms of parallel execution)"))
438  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
439  help="Don't copy config to file~N backup.")
440  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
441  help=("backup and then overwrite existing package versions instead of checking"
442  "them (safe with -j, but not all other forms of parallel execution)"))
443  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
444  help="don't check package versions; useful for development")
445  lsstLog.configure_prop("""
446 log4j.rootLogger=INFO, A1
447 log4j.appender.A1=ConsoleAppender
448 log4j.appender.A1.Target=System.out
449 log4j.appender.A1.layout=PatternLayout
450 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
451 """)
452 
453  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
454  ContainerClass=DataIdContainer):
455  """Add a data ID argument.
456 
457 
458  Parameters
459  ----------
460  name : `str`
461  Data ID argument (including leading dashes, if wanted).
462  datasetType : `str` or `DynamicDatasetType`-type
463  Type of dataset. Supply a string for a fixed dataset type. For a dynamically determined dataset
464  type, supply a `DynamicDatasetType`, such a `DatasetArgument`.
465  help : `str`
466  Help string for the argument.
467  level : object, optional
468  Level of dataset, for the `~lsst.daf.persistence.Butler`.
469  doMakeDataRefList : bool, optional
470  If `True` (default), construct data references.
471  ContainerClass : class, optional
472  Data ID container class to use to contain results; override the default if you need a special
473  means of computing data references from data IDs
474 
475  Notes
476  -----
477  If ``datasetType`` is an instance of `DatasetArgument`, then add a second argument to specify the
478  dataset type.
479 
480  The associated data is put into ``namespace.<dataIdArgument.name>`` as an instance of ContainerClass;
481  the container includes fields:
482 
483  - ``idList``: a list of data ID dicts.
484  - ``refList``: a list of `~lsst.daf.persistence.Butler` data references (empty if
485  ``doMakeDataRefList`` is `False`).
486  """
487  argName = name.lstrip("-")
488 
489  if argName in self._dataIdArgDict:
490  raise RuntimeError("Data ID argument %s already exists" % (name,))
491  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
492  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
493 
494  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
495  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
496 
497  dataIdArgument = DataIdArgument(
498  name=argName,
499  datasetType=datasetType,
500  level=level,
501  doMakeDataRefList=doMakeDataRefList,
502  ContainerClass=ContainerClass,
503  )
504 
505  if dataIdArgument.isDynamicDatasetType:
506  datasetType.addArgument(parser=self, idName=argName)
507 
508  self._dataIdArgDict[argName] = dataIdArgument
509 
510  def parse_args(self, config, args=None, log=None, override=None):
511  """Parse arguments for a command-line task.
512 
513  Parameters
514  ----------
515  config : `lsst.pex.config.Config`
516  Config for the task being run.
517  args : `list`, optional
518  Argument list; if `None` then ``sys.argv[1:]`` is used.
519  log : `lsst.log.Log`, optional
520  `~lsst.log.Log` instance; if `None` use the default log.
521  override : callable, optional
522  A config override function. It must take the root config object as its only argument and must
523  modify the config in place. This function is called after camera-specific overrides files are
524  applied, and before command-line config overrides are applied (thus allowing the user the final
525  word).
526 
527  Returns
528  -------
529  namespace : `argparse.Namespace`
530  A `~argparse.Namespace` instance containing fields:
531 
532  - ``camera``: camera name.
533  - ``config``: the supplied config with all overrides applied, validated and frozen.
534  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
535  - An entry for each of the data ID arguments registered by `add_id_argument`,
536  the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements
537  ``idList`` and ``refList``.
538  - ``log``: a `lsst.log` Log.
539  - An entry for each command-line argument, with the following exceptions:
540  - config is the supplied config, suitably updated.
541  - configfile, id and loglevel are all missing.
542  - ``obsPkg``: name of the ``obs_`` package for this camera.
543  """
544  if args is None:
545  args = sys.argv[1:]
546 
547  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
548  self.print_help()
549  if len(args) == 1 and args[0] in ("-h", "--help"):
550  self.exit()
551  else:
552  self.exit("%s: error: Must specify input as first argument" % self.prog)
553 
554  # Note that --rerun may change namespace.input, but if it does we verify that the
555  # new input has the same mapper class.
556  namespace = argparse.Namespace()
557  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
558  if not os.path.isdir(namespace.input):
559  self.error("Error: input=%r not found" % (namespace.input,))
560 
561  namespace.config = config
562  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
563  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
564  namespace.camera = mapperClass.getCameraName()
565  namespace.obsPkg = mapperClass.getPackageName()
566 
567  self.handleCamera(namespace)
568 
569  self._applyInitialOverrides(namespace)
570  if override is not None:
571  override(namespace.config)
572 
573  # Add data ID containers to namespace
574  for dataIdArgument in self._dataIdArgDict.values():
575  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
576 
577  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
578  del namespace.configfile
579 
580  self._parseDirectories(namespace)
581 
582  if namespace.clobberOutput:
583  if namespace.output is None:
584  self.error("--clobber-output is only valid with --output or --rerun")
585  elif namespace.output == namespace.input:
586  self.error("--clobber-output is not valid when the output and input repos are the same")
587  if os.path.exists(namespace.output):
588  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
589  shutil.rmtree(namespace.output)
590 
591  namespace.log.debug("input=%s", namespace.input)
592  namespace.log.debug("calib=%s", namespace.calib)
593  namespace.log.debug("output=%s", namespace.output)
594 
595  obeyShowArgument(namespace.show, namespace.config, exit=False)
596 
597  # No environment variable or --output or --rerun specified.
598  if self.requireOutput and namespace.output is None and namespace.rerun is None:
599  self.error("no output directory specified.\n"
600  "An output directory must be specified with the --output or --rerun\n"
601  "command-line arguments.\n")
602 
603  butlerArgs = {} # common arguments for butler elements
604  if namespace.calib:
605  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
606  if namespace.output:
607  outputs = {'root': namespace.output, 'mode': 'rw'}
608  inputs = {'root': namespace.input}
609  inputs.update(butlerArgs)
610  outputs.update(butlerArgs)
611  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
612  else:
613  outputs = {'root': namespace.input, 'mode': 'rw'}
614  outputs.update(butlerArgs)
615  namespace.butler = dafPersist.Butler(outputs=outputs)
616 
617  # convert data in each of the identifier lists to proper types
618  # this is done after constructing the butler, hence after parsing the command line,
619  # because it takes a long time to construct a butler
620  self._processDataIds(namespace)
621  if "data" in namespace.show:
622  for dataIdName in self._dataIdArgDict.keys():
623  for dataRef in getattr(namespace, dataIdName).refList:
624  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
625 
626  if namespace.show and "run" not in namespace.show:
627  sys.exit(0)
628 
629  if namespace.debug:
630  try:
631  import debug
632  assert debug # silence pyflakes
633  except ImportError:
634  sys.stderr.write("Warning: no 'debug' module found\n")
635  namespace.debug = False
636 
637  del namespace.loglevel
638 
639  if namespace.longlog:
640  lsstLog.configure_prop("""
641 log4j.rootLogger=INFO, A1
642 log4j.appender.A1=ConsoleAppender
643 log4j.appender.A1.Target=System.out
644 log4j.appender.A1.layout=PatternLayout
645 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
646 """)
647  del namespace.longlog
648 
649  namespace.config.validate()
650  namespace.config.freeze()
651 
652  return namespace
653 
654  def _parseDirectories(self, namespace):
655  """Parse input, output and calib directories
656 
657  This allows for hacking the directories, e.g., to include a "rerun".
658  Modifications are made to the 'namespace' object in-place.
659  """
660  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
661  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
662 
663  # If an output directory is specified, process it and assign it to the namespace
664  if namespace.rawOutput:
665  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
666  else:
667  namespace.output = None
668 
669  # This section processes the rerun argument, if rerun is specified as a colon separated
670  # value, it will be parsed as an input and output. The input value will be overridden if
671  # previously specified (but a check is made to make sure both inputs use the same mapper)
672  if namespace.rawRerun:
673  if namespace.output:
674  self.error("Error: cannot specify both --output and --rerun")
675  namespace.rerun = namespace.rawRerun.split(":")
676  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
677  modifiedInput = False
678  if len(rerunDir) == 2:
679  namespace.input, namespace.output = rerunDir
680  modifiedInput = True
681  elif len(rerunDir) == 1:
682  namespace.output = rerunDir[0]
683  if os.path.exists(os.path.join(namespace.output, "_parent")):
684  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
685  modifiedInput = True
686  else:
687  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
688  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
689  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
690  else:
691  namespace.rerun = None
692  del namespace.rawInput
693  del namespace.rawCalib
694  del namespace.rawOutput
695  del namespace.rawRerun
696 
697  def _processDataIds(self, namespace):
698  """Process the parsed data for each data ID argument in a `~argparse.Namespace`.
699 
700  Processing includes:
701 
702  - Validate data ID keys.
703  - Cast the data ID values to the correct type.
704  - Compute data references from data IDs.
705 
706  Parameters
707  ----------
708  namespace : parsed namespace (an argparse.Namespace);
709  Parsed namespace. These attributes are read:
710 
711  - ``butler``
712  - ``log``
713  - ``config``, if any dynamic dataset types are set by a config parameter.
714  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic dataset types are specified by such
715  and modifies these attributes:
716  - ``<name>`` for each data ID argument registered using `add_id_argument`.
717  """
718  for dataIdArgument in self._dataIdArgDict.values():
719  dataIdContainer = getattr(namespace, dataIdArgument.name)
720  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
721  if dataIdArgument.doMakeDataRefList:
722  try:
723  dataIdContainer.castDataIds(butler=namespace.butler)
724  except (KeyError, TypeError) as e:
725  # failure of castDataIds indicates invalid command args
726  self.error(e)
727 
728  # failure of makeDataRefList indicates a bug that wants a traceback
729  dataIdContainer.makeDataRefList(namespace)
730 
731  def _applyInitialOverrides(self, namespace):
732  """Apply obs-package-specific and camera-specific config override files, if found
733 
734  Parameters
735  ----------
736  namespace : `argparse.Namespace`
737  Parsed namespace. These attributes are read:
738 
739  - ``obsPkg``
740 
741  Look in the package namespace.obsPkg for files:
742 
743  - ``config/<task_name>.py``
744  - ``config/<camera_name>/<task_name>.py`` and load if found.
745  """
746  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
747  fileName = self._name + ".py"
748  for filePath in (
749  os.path.join(obsPkgDir, "config", fileName),
750  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
751  ):
752  if os.path.exists(filePath):
753  namespace.log.info("Loading config overrride file %r", filePath)
754  namespace.config.load(filePath)
755  else:
756  namespace.log.debug("Config override file does not exist: %r", filePath)
757 
758  def handleCamera(self, namespace):
759  """Perform camera-specific operations before parsing the command-line.
760 
761  Parameters
762  ----------
763  namespace : `argparse.Namespace`
764  Namespace (an ) with the following fields:
765 
766  - ``camera``: the camera name.
767  - ``config``: the config passed to parse_args, with no overrides applied.
768  - ``obsPkg``: the ``obs_`` package for this camera.
769  - ``log``: a `lsst.log` Log.
770 
771  Notes
772  -----
773  The default implementation does nothing.
774  """
775  pass
776 
777  def convert_arg_line_to_args(self, arg_line):
778  """Allow files of arguments referenced by ``@<path>`` to contain multiple values on each line.
779 
780  Parameters
781  ----------
782  arg_line : `str`
783  Line of text read from an argument file.
784  """
785  arg_line = arg_line.strip()
786  if not arg_line or arg_line.startswith("#"):
787  return
788  for arg in shlex.split(arg_line, comments=True, posix=True):
789  if not arg.strip():
790  continue
791  yield arg
792 
793  def addReuseOption(self, choices):
794  """Add a "--reuse-outputs-from SUBTASK" option to the argument parser.
795 
796  CmdLineTasks that can be restarted at an intermediate step using outputs
797  from earlier (but still internal) steps should use this method to allow
798  the user to control whether that happens when outputs from earlier steps
799  are present.
800 
801  Parameters
802  ----------
803  choices : sequence
804  A sequence of string names (by convention, top-level subtasks) that
805  identify the steps that could be skipped when their outputs are
806  already present. The list is ordered, so when the user specifies
807  one step on the command line, all previous steps may be skipped as
808  well. In addition to the choices provided, users may pass "all"
809  to indicate that all steps may be thus skipped.
810 
811  When this method is called, the ``namespace`` object returned by
812  ``parse_args`` will contain a ``reuse`` attribute containing a list of
813  all steps that should be skipped if their outputs are already present.
814  If no steps should be skipped, the ``reuse`` will be an empty list.
815  """
816  choices = list(choices)
817  choices.append("all")
818  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
819  default=[], action=ReuseAction,
820  help=("Skip the given subtask and its predecessors and reuse their outputs "
821  "if those outputs already exist. Use 'all' to specify all subtasks."))
822 
823 
825  """`ArgumentParser` for command-line tasks that don't write any output.
826  """
827 
828  requireOutput = False # We're not going to write anything
829 
830 
831 def getTaskDict(config, taskDict=None, baseName=""):
832  """Get a dictionary of task info for all subtasks in a config
833 
834  Parameters
835  ----------
836  config : `lsst.pex.config.Config`
837  Configuration to process.
838  taskDict : `dict`, optional
839  Users should not specify this argument. Supports recursion; if provided, taskDict is updated in
840  place, else a new `dict` is started).
841  baseName : `str`, optional
842  Users should not specify this argument. It is only used for recursion: if a non-empty string then a
843  period is appended and the result is used as a prefix for additional entries in taskDict; otherwise
844  no prefix is used.
845 
846  Returns
847  -------
848  taskDict : `dict`
849  Keys are config field names, values are task names.
850 
851  Notes
852  -----
853  This function is designed to be called recursively. The user should call with only a config
854  (leaving taskDict and baseName at their default values).
855  """
856  if taskDict is None:
857  taskDict = dict()
858  for fieldName, field in config.items():
859  if hasattr(field, "value") and hasattr(field, "target"):
860  subConfig = field.value
861  if isinstance(subConfig, pexConfig.Config):
862  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
863  try:
864  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
865  except Exception:
866  taskName = repr(field.target)
867  taskDict[subBaseName] = taskName
868  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
869  return taskDict
870 
871 
872 def obeyShowArgument(showOpts, config=None, exit=False):
873  """Process arguments specified with ``--show`` (but ignores ``"data"``).
874 
875  Parameters
876  ----------
877  showOpts : `list` of `str`
878  List of options passed to ``--show``.
879  config : optional
880  The provided config.
881  exit : bool, optional
882  Exit if ``"run"`` isn't included in ``showOpts``.
883 
884  Parameters
885  ----------
886  Supports the following options in showOpts:
887 
888  - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern.
889  - ``history=PAT``. Show where the config entries that match the glob pattern were set.
890  - ``tasks``. Show task hierarchy.
891  - ``data``. Ignored; to be processed by caller.
892  - ``run``. Keep going (the default behaviour is to exit if --show is specified).
893 
894  Calls ``sys.exit(1)`` if any other option found.
895  """
896  if not showOpts:
897  return
898 
899  for what in showOpts:
900  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
901 
902  if showCommand == "config":
903  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
904  pattern = matConfig.group(1)
905  if pattern:
906  class FilteredStream(object):
907  """A file object that only prints lines that match the glob "pattern"
908 
909  N.b. Newlines are silently discarded and reinserted; crude but effective.
910  """
911 
912  def __init__(self, pattern):
913  # obey case if pattern isn't lowecase or requests NOIGNORECASE
914  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
915 
916  if mat:
917  pattern = mat.group(1)
918  self._pattern = re.compile(fnmatch.translate(pattern))
919  else:
920  if pattern != pattern.lower():
921  print(u"Matching \"%s\" without regard to case "
922  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
923  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
924 
925  def write(self, showStr):
926  showStr = showStr.rstrip()
927  # Strip off doc string line(s) and cut off at "=" for string matching
928  matchStr = showStr.split("\n")[-1].split("=")[0]
929  if self._pattern.search(matchStr):
930  print(u"\n" + showStr)
931 
932  fd = FilteredStream(pattern)
933  else:
934  fd = sys.stdout
935 
936  config.saveToStream(fd, "config")
937  elif showCommand == "history":
938  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
939  pattern = matHistory.group(1)
940  if not pattern:
941  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
942  sys.exit(1)
943 
944  pattern = pattern.split(".")
945  cpath, cname = pattern[:-1], pattern[-1]
946  hconfig = config # the config that we're interested in
947  for i, cpt in enumerate(cpath):
948  try:
949  hconfig = getattr(hconfig, cpt)
950  except AttributeError:
951  print("Error: configuration %s has no subconfig %s" %
952  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
953 
954  sys.exit(1)
955 
956  try:
957  print(pexConfig.history.format(hconfig, cname))
958  except KeyError:
959  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
960  sys.exit(1)
961 
962  elif showCommand == "data":
963  pass
964  elif showCommand == "run":
965  pass
966  elif showCommand == "tasks":
967  showTaskHierarchy(config)
968  else:
969  print(u"Unknown value for show: %s (choose from '%s')" %
970  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
971  sys.exit(1)
972 
973  if exit and "run" not in showOpts:
974  sys.exit(0)
975 
976 
977 def showTaskHierarchy(config):
978  """Print task hierarchy to stdout.
979 
980  Parameters
981  ----------
982  config : `lsst.pex.config.Config`
983  Configuration to process.
984  """
985  print(u"Subtasks:")
986  taskDict = getTaskDict(config=config)
987 
988  fieldNameList = sorted(taskDict.keys())
989  for fieldName in fieldNameList:
990  taskName = taskDict[fieldName]
991  print(u"%s: %s" % (fieldName, taskName))
992 
993 
994 class ConfigValueAction(argparse.Action):
995  """argparse action callback to override config parameters using name=value pairs from the command-line.
996  """
997 
998  def __call__(self, parser, namespace, values, option_string):
999  """Override one or more config name value pairs.
1000 
1001  Parameters
1002  ----------
1003  parser : `argparse.ArgumentParser`
1004  Argument parser.
1005  namespace : `argparse.Namespace`
1006  Parsed command. The ``namespace.config`` attribute is updated.
1007  values : `list`
1008  A list of ``configItemName=value`` pairs.
1009  option_string : `str`
1010  Option value specified by the user.
1011  """
1012  if namespace.config is None:
1013  return
1014  for nameValue in values:
1015  name, sep, valueStr = nameValue.partition("=")
1016  if not valueStr:
1017  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1018 
1019  # see if setting the string value works; if not, try eval
1020  try:
1021  setDottedAttr(namespace.config, name, valueStr)
1022  except AttributeError:
1023  parser.error("no config field: %s" % (name,))
1024  except Exception:
1025  try:
1026  value = eval(valueStr, {})
1027  except Exception:
1028  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1029  try:
1030  setDottedAttr(namespace.config, name, value)
1031  except Exception as e:
1032  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1033 
1034 
1035 class ConfigFileAction(argparse.Action):
1036  """argparse action to load config overrides from one or more files.
1037  """
1038 
1039  def __call__(self, parser, namespace, values, option_string=None):
1040  """Load one or more files of config overrides.
1041 
1042  Parameters
1043  ----------
1044  parser : `argparse.ArgumentParser`
1045  Argument parser.
1046  namespace : `argparse.Namespace`
1047  Parsed command. The following attributes are updated by this method: ``namespace.config``.
1048  values : `list`
1049  A list of data config file paths.
1050  option_string : `str`, optional
1051  Option value specified by the user.
1052  """
1053  if namespace.config is None:
1054  return
1055  for configfile in values:
1056  try:
1057  namespace.config.load(configfile)
1058  except Exception as e:
1059  parser.error("cannot load config file %r: %s" % (configfile, e))
1060 
1061 
1062 class IdValueAction(argparse.Action):
1063  """argparse action callback to process a data ID into a dict.
1064  """
1065 
1066  def __call__(self, parser, namespace, values, option_string):
1067  """Parse ``--id`` data and append results to ``namespace.<argument>.idList``.
1068 
1069  Parameters
1070  ----------
1071  parser : `ArgumentParser`
1072  Argument parser.
1073  namespace : `argparse.Namespace`
1074  Parsed command (an instance of argparse.Namespace). The following attributes are updated:
1075 
1076  - ``<idName>.idList``, where ``<idName>`` is the name of the ID argument, for instance ``"id"``
1077  for ID argument ``--id``.
1078  values : `list`
1079  A list of data IDs; see Notes below.
1080  option_string : `str`
1081  Option value specified by the user.
1082 
1083  Notes
1084  -----
1085  The data format is::
1086 
1087  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
1088 
1089  The values (e.g. ``value1_1``) may either be a string, or of the form ``"int..int"``
1090  (e.g. ``"1..3"``) which is interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1091  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``. You may also specify a stride:
1092  ``"1..5:2"`` is ``"1^3^5"``.
1093 
1094  The cross product is computed for keys with multiple values. For example::
1095 
1096  --id visit 1^2 ccd 1,1^2,2
1097 
1098  results in the following data ID dicts being appended to ``namespace.<argument>.idList``:
1099 
1100  {"visit":1, "ccd":"1,1"}
1101  {"visit":2, "ccd":"1,1"}
1102  {"visit":1, "ccd":"2,2"}
1103  {"visit":2, "ccd":"2,2"}
1104  """
1105  if namespace.config is None:
1106  return
1107  idDict = collections.OrderedDict()
1108  for nameValue in values:
1109  name, sep, valueStr = nameValue.partition("=")
1110  if name in idDict:
1111  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1112  idDict[name] = []
1113  for v in valueStr.split("^"):
1114  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1115  if mat:
1116  v1 = int(mat.group(1))
1117  v2 = int(mat.group(2))
1118  v3 = mat.group(3)
1119  v3 = int(v3) if v3 else 1
1120  for v in range(v1, v2 + 1, v3):
1121  idDict[name].append(str(v))
1122  else:
1123  idDict[name].append(v)
1124 
1125  iterList = [idDict[key] for key in idDict.keys()]
1126  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1127  for valList in itertools.product(*iterList)]
1128 
1129  argName = option_string.lstrip("-")
1130  ident = getattr(namespace, argName)
1131  ident.idList += idDictList
1132 
1133 
1134 class LogLevelAction(argparse.Action):
1135  """argparse action to set log level.
1136  """
1137 
1138  def __call__(self, parser, namespace, values, option_string):
1139  """Set trace level.
1140 
1141  Parameters
1142  ----------
1143  parser : `ArgumentParser`
1144  Argument parser.
1145  namespace : `argparse.Namespace`
1146  Parsed command. This argument is not used.
1147  values : `list`
1148  List of trace levels; each item must be of the form ``component_name=level`` or ``level``,
1149  where ``level`` is a keyword (not case sensitive) or an integer.
1150  option_string : `str`
1151  Option value specified by the user.
1152  """
1153  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1154  permittedLevelSet = set(permittedLevelList)
1155  for componentLevel in values:
1156  component, sep, levelStr = componentLevel.partition("=")
1157  if not levelStr:
1158  levelStr, component = component, None
1159  logLevelUpr = levelStr.upper()
1160  if logLevelUpr in permittedLevelSet:
1161  logLevel = getattr(lsstLog.Log, logLevelUpr)
1162  else:
1163  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1164  if component is None:
1165  namespace.log.setLevel(logLevel)
1166  else:
1167  lsstLog.Log.getLogger(component).setLevel(logLevel)
1168 
1169 
1170 class ReuseAction(argparse.Action):
1171  """argparse action associated with ArgumentPraser.addReuseOption."""
1172 
1173  def __call__(self, parser, namespace, value, option_string):
1174  if value == "all":
1175  value = self.choices[-2]
1176  index = self.choices.index(value)
1177  namespace.reuse = self.choices[:index + 1]
1178 
1179 
1180 def setDottedAttr(item, name, value):
1181  """Set an instance attribute (like `setattr` but accepting hierarchical names such as ``foo.bar.baz``).
1182 
1183  Parameters
1184  ----------
1185  item : obj
1186  Object whose attribute is to be set.
1187  name : `str`
1188  Name of attribute to set.
1189  value : obj
1190  New value for the attribute.
1191 
1192  Notes
1193  -----
1194  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz`` is set to the specified value.
1195  """
1196  subitem = item
1197  subnameList = name.split(".")
1198  for subname in subnameList[:-1]:
1199  subitem = getattr(subitem, subname)
1200  setattr(subitem, subnameList[-1], value)
1201 
1202 
1203 def getDottedAttr(item, name):
1204  """Get an attribute (like `getattr` but accepts hierarchical names such as ``foo.bar.baz``).
1205 
1206  Parameters
1207  ----------
1208  item : obj
1209  Object whose attribute is to be returned.
1210  name : `str`
1211  Name of the attribute to get.
1212 
1213  Returns
1214  -------
1215  itemAttr : obj
1216  If name is ``foo.bar.baz then the return value is ``item.foo.bar.baz``.
1217  """
1218  subitem = item
1219  for subname in name.split("."):
1220  subitem = getattr(subitem, subname)
1221  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)