lsst.pipe.base  16.0-5-gf14cb0b+1
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """Apply environment variable as default root, if present, and abspath.
57 
58  Parameters
59  ----------
60  defName : `str`
61  Name of environment variable containing default root path; if the environment variable does not exist
62  then the path is relative to the current working directory
63  path : `str`
64  Path relative to default root path.
65 
66  Returns
67  -------
68  abspath : `str`
69  Path that has been expanded, or `None` if the environment variable does not exist and path is `None`.
70  """
71  defRoot = os.environ.get(defName)
72  if defRoot is None:
73  if path is None:
74  return None
75  return os.path.abspath(path)
76  return os.path.abspath(os.path.join(defRoot, path or ""))
77 
78 
79 class DataIdContainer(object):
80  """Container for data IDs and associated data references.
81 
82  Parameters
83  ----------
84  level
85  Unknown.
86 
87  Notes
88  -----
89  Override for data IDs that require special handling to be converted to ``data references``, and specify
90  the override class as ``ContainerClass`` for ``add_id_argument``. (If you don't want the argument parser
91  to compute data references, you may use this class and specify ``doMakeDataRefList=False`` in
92  ``add_id_argument``.)
93  """
94 
95  def __init__(self, level=None):
96  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
97  self.level = level
98  self.idList = []
99  self.refList = []
100 
101  def setDatasetType(self, datasetType):
102  """Set actual dataset type, once it is known.
103 
104  Parameters
105  ----------
106  datasetType : `str`
107  Dataset type.
108  """
109  self.datasetType = datasetType
110 
111  def castDataIds(self, butler):
112  """Validate data IDs and cast them to the correct type (modify idList in place).
113 
114  Parameters
115  ----------
116  butler : `lsst.daf.persistence.Butler`
117  Data butler.
118  """
119  if self.datasetType is None:
120  raise RuntimeError("Must call setDatasetType first")
121  try:
122  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
123  except KeyError as e:
124  msg = "Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level)
125  raise KeyError(msg) from e
126 
127  for dataDict in self.idList:
128  for key, strVal in dataDict.items():
129  try:
130  keyType = idKeyTypeDict[key]
131  except KeyError:
132  # OK, assume that it's a valid key and guess that it's a string
133  keyType = str
134 
135  log = lsstLog.Log.getDefaultLogger()
136  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
137  (key, 'str' if keyType == str else keyType))
138  idKeyTypeDict[key] = keyType
139 
140  if keyType != str:
141  try:
142  castVal = keyType(strVal)
143  except Exception:
144  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
145  dataDict[key] = castVal
146 
147  def makeDataRefList(self, namespace):
148  """Compute refList based on idList.
149 
150  Parameters
151  ----------
152  namespace
153  Results of parsing command-line (with ``butler`` and ``log`` elements).
154 
155  Notes
156  -----
157  Not called if ``add_id_argument`` called with ``doMakeDataRefList=False``.
158  """
159  if self.datasetType is None:
160  raise RuntimeError("Must call setDatasetType first")
161  butler = namespace.butler
162  for dataId in self.idList:
163  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
164  level=self.level, dataId=dataId)
165  if not refList:
166  namespace.log.warn("No data found for dataId=%s", dataId)
167  continue
168  self.refList += refList
169 
170 
171 class DataIdArgument(object):
172  """data ID argument, used by `ArgumentParser.add_id_argument`.
173 
174  Parameters
175  ----------
176  name : `str`
177  Name of identifier (argument name without dashes).
178  datasetType : `str`
179  Type of dataset; specify a string for a fixed dataset type or a `DatasetArgument` for a dynamic
180  dataset type (e.g. one specified by a command-line argument).
181  level
182  Level of dataset, for `~lsst.daf.persistence.Butler`.
183  doMakeDataRefList : `bool`, optional
184  If `True` (default), construct data references.
185  ContainerClass : class, optional
186  Class to contain data IDs and data references; the default class will work for many kinds of data,
187  but you may have to override to compute some kinds of data references. Default is `DataIdContainer`.
188  """
189 
190  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
191  if name.startswith("-"):
192  raise RuntimeError("Name %s must not start with -" % (name,))
193  self.name = name
194  self.datasetType = datasetType
195  self.level = level
196  self.doMakeDataRefList = bool(doMakeDataRefList)
197  self.ContainerClass = ContainerClass
198  self.argName = name.lstrip("-")
199 
200  @property
202  """`True` if the dataset type is dynamic (that is, specified on the command line)."""
203  return isinstance(self.datasetType, DynamicDatasetType)
204 
205  def getDatasetType(self, namespace):
206  """Get the dataset type as a string.
207 
208  Parameters
209  ----------
210  namespace
211  Parsed command.
212 
213  Returns
214  -------
215  datasetType : `str`
216  Dataset type.
217  """
218  if self.isDynamicDatasetType:
219  return self.datasetType.getDatasetType(namespace)
220  else:
221  return self.datasetType
222 
223 
224 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
225  """Abstract base class for a dataset type determined from parsed command-line arguments.
226  """
227 
228  def addArgument(self, parser, idName):
229  """Add a command-line argument to specify dataset type name, if wanted.
230 
231  Parameters
232  ----------
233  parser : `ArgumentParser`
234  Argument parser to add the argument to.
235  idName : `str`
236  Name of data ID argument, without the leading ``"--"``, e.g. ``"id"``.
237 
238  Notes
239  -----
240  The default implementation does nothing
241  """
242  pass
243 
244  @abc.abstractmethod
245  def getDatasetType(self, namespace):
246  """Get the dataset type as a string, based on parsed command-line arguments.
247 
248  Returns
249  -------
250  namespace : `str`
251  Parsed command.
252  """
253  raise NotImplementedError("Subclasses must override")
254 
255 
257  """Dataset type specified by a command-line argument.
258 
259  Parameters
260  ----------
261  name : `str`, optional
262  Name of command-line argument (including leading "--", if appropriate) whose value is the dataset
263  type. If `None`, uses ``--idName_dstype`` where idName is the name of the data ID argument (e.g.
264  "id").
265  help : `str`, optional
266  Help string for the command-line argument.
267  default : obj, optional
268  Default value. If `None`, then the command-line option is required. This argument isignored if the
269  command-line argument is positional (name does not start with "-") because positional arguments do
270  not support default values.
271  """
272 
273  def __init__(self,
274  name=None,
275  help="dataset type to process from input data repository",
276  default=None,
277  ):
278  DynamicDatasetType.__init__(self)
279  self.name = name
280  self.help = help
281  self.default = default
282 
283  def getDatasetType(self, namespace):
284  """Get the dataset type as a string, from the appropriate command-line argument.
285 
286  Parameters
287  ----------
288  namespace
289  Parsed command.
290 
291  Returns
292  -------
293  datasetType : `str`
294  Dataset type.
295  """
296  argName = self.name.lstrip("-")
297  return getattr(namespace, argName)
298 
299  def addArgument(self, parser, idName):
300  """Add a command-line argument to specify the dataset type name.
301 
302  Parameters
303  ----------
304  parser : `ArgumentParser`
305  Argument parser.
306  idName : `str`
307  Data ID.
308 
309  Notes
310  -----
311  Also sets the `name` attribute if it is currently `None`.
312  """
313  help = self.help if self.help else "dataset type for %s" % (idName,)
314  if self.name is None:
315  self.name = "--%s_dstype" % (idName,)
316  requiredDict = dict()
317  if self.name.startswith("-"):
318  requiredDict = dict(required=self.default is None)
319  parser.add_argument(
320  self.name,
321  default=self.default,
322  help=help,
323  **requiredDict) # cannot specify required=None for positional arguments
324 
325 
327  """Dataset type specified by a config parameter.
328 
329  Parameters
330  ----------
331  name : `str`
332  Name of config option whose value is the dataset type.
333  """
334 
335  def __init__(self, name):
336  DynamicDatasetType.__init__(self)
337  self.name = name
338 
339  def getDatasetType(self, namespace):
340  """Return the dataset type as a string, from the appropriate config field
341 
342  Parameters
343  ----------
344  namespace : `argparse.Namespace`
345  Parsed command.
346  """
347  # getattr does not work reliably if the config field name is dotted,
348  # so step through one level at a time
349  keyList = self.name.split(".")
350  value = namespace.config
351  for key in keyList:
352  try:
353  value = getattr(value, key)
354  except KeyError:
355  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
356  return value
357 
358 
359 class ArgumentParser(argparse.ArgumentParser):
360  """Argument parser for command-line tasks that is based on `argparse.ArgumentParser`.
361 
362  Parameters
363  ----------
364  name : `str`
365  Name of top-level task; used to identify camera-specific override files.
366  usage : `str`, optional
367  Command-line usage signature.
368  **kwargs
369  Additional keyword arguments for `argparse.ArgumentParser`.
370 
371  Notes
372  -----
373  Users may wish to add additional arguments before calling `parse_args`.
374  """
375  # I would prefer to check data ID keys and values as they are parsed,
376  # but the required information comes from the butler, so I have to construct a butler
377  # before I do this checking. Constructing a butler is slow, so I only want do it once,
378  # after parsing the command line, so as to catch syntax errors quickly.
379 
380  requireOutput = True
381  """Require an output directory to be specified (`bool`)."""
382 
383  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
384  self._name = name
385  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
386  argparse.ArgumentParser.__init__(self,
387  usage=usage,
388  fromfile_prefix_chars='@',
389  epilog=textwrap.dedent("""Notes:
390  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
391  all values are used, in order left to right
392  * @file reads command-line options from the specified file:
393  * data may be distributed among multiple lines (e.g. one option per line)
394  * data after # is treated as a comment and ignored
395  * blank lines and lines starting with # are ignored
396  * To specify multiple values for an option, do not use = after the option name:
397  * right: --configfile foo bar
398  * wrong: --configfile=foo bar
399  """),
400  formatter_class=argparse.RawDescriptionHelpFormatter,
401  **kwargs)
402  self.add_argument(metavar='input', dest="rawInput",
403  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
404  self.add_argument("--calib", dest="rawCalib",
405  help="path to input calibration repository, relative to $%s" %
406  (DEFAULT_CALIB_NAME,))
407  self.add_argument("--output", dest="rawOutput",
408  help="path to output data repository (need not exist), relative to $%s" %
409  (DEFAULT_OUTPUT_NAME,))
410  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
411  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
412  "optionally sets ROOT to ROOT/rerun/INPUT")
413  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
414  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
415  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
416  help="config override file(s)")
417  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
418  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
419  metavar="LEVEL|COMPONENT=LEVEL")
420  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
421  self.add_argument("--debug", action="store_true", help="enable debugging output?")
422  self.add_argument("--doraise", action="store_true",
423  help="raise an exception on error (else log a message and continue)?")
424  self.add_argument("--noExit", action="store_true",
425  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
426  self.add_argument("--profile", help="Dump cProfile statistics to filename")
427  self.add_argument("--show", nargs="+", default=(),
428  help="display the specified information to stdout and quit "
429  "(unless run is specified).")
430  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
431  self.add_argument("-t", "--timeout", type=float,
432  help="Timeout for multiprocessing; maximum wall time (sec)")
433  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
434  help=("remove and re-create the output directory if it already exists "
435  "(safe with -j, but not all other forms of parallel execution)"))
436  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
437  help=("backup and then overwrite existing config files instead of checking them "
438  "(safe with -j, but not all other forms of parallel execution)"))
439  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
440  help="Don't copy config to file~N backup.")
441  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
442  help=("backup and then overwrite existing package versions instead of checking"
443  "them (safe with -j, but not all other forms of parallel execution)"))
444  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
445  help="don't check package versions; useful for development")
446  lsstLog.configure_prop("""
447 log4j.rootLogger=INFO, A1
448 log4j.appender.A1=ConsoleAppender
449 log4j.appender.A1.Target=System.out
450 log4j.appender.A1.layout=PatternLayout
451 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
452 """)
453 
454  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
455  ContainerClass=DataIdContainer):
456  """Add a data ID argument.
457 
458 
459  Parameters
460  ----------
461  name : `str`
462  Data ID argument (including leading dashes, if wanted).
463  datasetType : `str` or `DynamicDatasetType`-type
464  Type of dataset. Supply a string for a fixed dataset type. For a dynamically determined dataset
465  type, supply a `DynamicDatasetType`, such a `DatasetArgument`.
466  help : `str`
467  Help string for the argument.
468  level : object, optional
469  Level of dataset, for the `~lsst.daf.persistence.Butler`.
470  doMakeDataRefList : bool, optional
471  If `True` (default), construct data references.
472  ContainerClass : class, optional
473  Data ID container class to use to contain results; override the default if you need a special
474  means of computing data references from data IDs
475 
476  Notes
477  -----
478  If ``datasetType`` is an instance of `DatasetArgument`, then add a second argument to specify the
479  dataset type.
480 
481  The associated data is put into ``namespace.<dataIdArgument.name>`` as an instance of ContainerClass;
482  the container includes fields:
483 
484  - ``idList``: a list of data ID dicts.
485  - ``refList``: a list of `~lsst.daf.persistence.Butler` data references (empty if
486  ``doMakeDataRefList`` is `False`).
487  """
488  argName = name.lstrip("-")
489 
490  if argName in self._dataIdArgDict:
491  raise RuntimeError("Data ID argument %s already exists" % (name,))
492  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
493  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
494 
495  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
496  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
497 
498  dataIdArgument = DataIdArgument(
499  name=argName,
500  datasetType=datasetType,
501  level=level,
502  doMakeDataRefList=doMakeDataRefList,
503  ContainerClass=ContainerClass,
504  )
505 
506  if dataIdArgument.isDynamicDatasetType:
507  datasetType.addArgument(parser=self, idName=argName)
508 
509  self._dataIdArgDict[argName] = dataIdArgument
510 
511  def parse_args(self, config, args=None, log=None, override=None):
512  """Parse arguments for a command-line task.
513 
514  Parameters
515  ----------
516  config : `lsst.pex.config.Config`
517  Config for the task being run.
518  args : `list`, optional
519  Argument list; if `None` then ``sys.argv[1:]`` is used.
520  log : `lsst.log.Log`, optional
521  `~lsst.log.Log` instance; if `None` use the default log.
522  override : callable, optional
523  A config override function. It must take the root config object as its only argument and must
524  modify the config in place. This function is called after camera-specific overrides files are
525  applied, and before command-line config overrides are applied (thus allowing the user the final
526  word).
527 
528  Returns
529  -------
530  namespace : `argparse.Namespace`
531  A `~argparse.Namespace` instance containing fields:
532 
533  - ``camera``: camera name.
534  - ``config``: the supplied config with all overrides applied, validated and frozen.
535  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
536  - An entry for each of the data ID arguments registered by `add_id_argument`,
537  the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements
538  ``idList`` and ``refList``.
539  - ``log``: a `lsst.log` Log.
540  - An entry for each command-line argument, with the following exceptions:
541  - config is the supplied config, suitably updated.
542  - configfile, id and loglevel are all missing.
543  - ``obsPkg``: name of the ``obs_`` package for this camera.
544  """
545  if args is None:
546  args = sys.argv[1:]
547 
548  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
549  self.print_help()
550  if len(args) == 1 and args[0] in ("-h", "--help"):
551  self.exit()
552  else:
553  self.exit("%s: error: Must specify input as first argument" % self.prog)
554 
555  # Note that --rerun may change namespace.input, but if it does we verify that the
556  # new input has the same mapper class.
557  namespace = argparse.Namespace()
558  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
559  if not os.path.isdir(namespace.input):
560  self.error("Error: input=%r not found" % (namespace.input,))
561 
562  namespace.config = config
563  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
564  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
565  namespace.camera = mapperClass.getCameraName()
566  namespace.obsPkg = mapperClass.getPackageName()
567 
568  self.handleCamera(namespace)
569 
570  self._applyInitialOverrides(namespace)
571  if override is not None:
572  override(namespace.config)
573 
574  # Add data ID containers to namespace
575  for dataIdArgument in self._dataIdArgDict.values():
576  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
577 
578  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
579  del namespace.configfile
580 
581  self._parseDirectories(namespace)
582 
583  if namespace.clobberOutput:
584  if namespace.output is None:
585  self.error("--clobber-output is only valid with --output or --rerun")
586  elif namespace.output == namespace.input:
587  self.error("--clobber-output is not valid when the output and input repos are the same")
588  if os.path.exists(namespace.output):
589  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
590  shutil.rmtree(namespace.output)
591 
592  namespace.log.debug("input=%s", namespace.input)
593  namespace.log.debug("calib=%s", namespace.calib)
594  namespace.log.debug("output=%s", namespace.output)
595 
596  obeyShowArgument(namespace.show, namespace.config, exit=False)
597 
598  # No environment variable or --output or --rerun specified.
599  if self.requireOutput and namespace.output is None and namespace.rerun is None:
600  self.error("no output directory specified.\n"
601  "An output directory must be specified with the --output or --rerun\n"
602  "command-line arguments.\n")
603 
604  butlerArgs = {} # common arguments for butler elements
605  if namespace.calib:
606  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
607  if namespace.output:
608  outputs = {'root': namespace.output, 'mode': 'rw'}
609  inputs = {'root': namespace.input}
610  inputs.update(butlerArgs)
611  outputs.update(butlerArgs)
612  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
613  else:
614  outputs = {'root': namespace.input, 'mode': 'rw'}
615  outputs.update(butlerArgs)
616  namespace.butler = dafPersist.Butler(outputs=outputs)
617 
618  # convert data in each of the identifier lists to proper types
619  # this is done after constructing the butler, hence after parsing the command line,
620  # because it takes a long time to construct a butler
621  self._processDataIds(namespace)
622  if "data" in namespace.show:
623  for dataIdName in self._dataIdArgDict.keys():
624  for dataRef in getattr(namespace, dataIdName).refList:
625  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
626 
627  if namespace.show and "run" not in namespace.show:
628  sys.exit(0)
629 
630  if namespace.debug:
631  try:
632  import debug
633  assert debug # silence pyflakes
634  except ImportError:
635  sys.stderr.write("Warning: no 'debug' module found\n")
636  namespace.debug = False
637 
638  del namespace.loglevel
639 
640  if namespace.longlog:
641  lsstLog.configure_prop("""
642 log4j.rootLogger=INFO, A1
643 log4j.appender.A1=ConsoleAppender
644 log4j.appender.A1.Target=System.out
645 log4j.appender.A1.layout=PatternLayout
646 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
647 """)
648  del namespace.longlog
649 
650  namespace.config.validate()
651  namespace.config.freeze()
652 
653  return namespace
654 
655  def _parseDirectories(self, namespace):
656  """Parse input, output and calib directories
657 
658  This allows for hacking the directories, e.g., to include a "rerun".
659  Modifications are made to the 'namespace' object in-place.
660  """
661  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
662  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
663 
664  # If an output directory is specified, process it and assign it to the namespace
665  if namespace.rawOutput:
666  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
667  else:
668  namespace.output = None
669 
670  # This section processes the rerun argument, if rerun is specified as a colon separated
671  # value, it will be parsed as an input and output. The input value will be overridden if
672  # previously specified (but a check is made to make sure both inputs use the same mapper)
673  if namespace.rawRerun:
674  if namespace.output:
675  self.error("Error: cannot specify both --output and --rerun")
676  namespace.rerun = namespace.rawRerun.split(":")
677  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
678  modifiedInput = False
679  if len(rerunDir) == 2:
680  namespace.input, namespace.output = rerunDir
681  modifiedInput = True
682  elif len(rerunDir) == 1:
683  namespace.output = rerunDir[0]
684  if os.path.exists(os.path.join(namespace.output, "_parent")):
685  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
686  modifiedInput = True
687  else:
688  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
689  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
690  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
691  else:
692  namespace.rerun = None
693  del namespace.rawInput
694  del namespace.rawCalib
695  del namespace.rawOutput
696  del namespace.rawRerun
697 
698  def _processDataIds(self, namespace):
699  """Process the parsed data for each data ID argument in a `~argparse.Namespace`.
700 
701  Processing includes:
702 
703  - Validate data ID keys.
704  - Cast the data ID values to the correct type.
705  - Compute data references from data IDs.
706 
707  Parameters
708  ----------
709  namespace : parsed namespace (an argparse.Namespace);
710  Parsed namespace. These attributes are read:
711 
712  - ``butler``
713  - ``log``
714  - ``config``, if any dynamic dataset types are set by a config parameter.
715  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic dataset types are specified by such
716  and modifies these attributes:
717  - ``<name>`` for each data ID argument registered using `add_id_argument`.
718  """
719  for dataIdArgument in self._dataIdArgDict.values():
720  dataIdContainer = getattr(namespace, dataIdArgument.name)
721  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
722  if dataIdArgument.doMakeDataRefList:
723  try:
724  dataIdContainer.castDataIds(butler=namespace.butler)
725  except (KeyError, TypeError) as e:
726  # failure of castDataIds indicates invalid command args
727  self.error(e)
728 
729  # failure of makeDataRefList indicates a bug that wants a traceback
730  dataIdContainer.makeDataRefList(namespace)
731 
732  def _applyInitialOverrides(self, namespace):
733  """Apply obs-package-specific and camera-specific config override files, if found
734 
735  Parameters
736  ----------
737  namespace : `argparse.Namespace`
738  Parsed namespace. These attributes are read:
739 
740  - ``obsPkg``
741 
742  Look in the package namespace.obsPkg for files:
743 
744  - ``config/<task_name>.py``
745  - ``config/<camera_name>/<task_name>.py`` and load if found.
746  """
747  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
748  fileName = self._name + ".py"
749  for filePath in (
750  os.path.join(obsPkgDir, "config", fileName),
751  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
752  ):
753  if os.path.exists(filePath):
754  namespace.log.info("Loading config overrride file %r", filePath)
755  namespace.config.load(filePath)
756  else:
757  namespace.log.debug("Config override file does not exist: %r", filePath)
758 
759  def handleCamera(self, namespace):
760  """Perform camera-specific operations before parsing the command-line.
761 
762  Parameters
763  ----------
764  namespace : `argparse.Namespace`
765  Namespace (an ) with the following fields:
766 
767  - ``camera``: the camera name.
768  - ``config``: the config passed to parse_args, with no overrides applied.
769  - ``obsPkg``: the ``obs_`` package for this camera.
770  - ``log``: a `lsst.log` Log.
771 
772  Notes
773  -----
774  The default implementation does nothing.
775  """
776  pass
777 
778  def convert_arg_line_to_args(self, arg_line):
779  """Allow files of arguments referenced by ``@<path>`` to contain multiple values on each line.
780 
781  Parameters
782  ----------
783  arg_line : `str`
784  Line of text read from an argument file.
785  """
786  arg_line = arg_line.strip()
787  if not arg_line or arg_line.startswith("#"):
788  return
789  for arg in shlex.split(arg_line, comments=True, posix=True):
790  if not arg.strip():
791  continue
792  yield arg
793 
794  def addReuseOption(self, choices):
795  """Add a "--reuse-outputs-from SUBTASK" option to the argument parser.
796 
797  CmdLineTasks that can be restarted at an intermediate step using outputs
798  from earlier (but still internal) steps should use this method to allow
799  the user to control whether that happens when outputs from earlier steps
800  are present.
801 
802  Parameters
803  ----------
804  choices : sequence
805  A sequence of string names (by convention, top-level subtasks) that
806  identify the steps that could be skipped when their outputs are
807  already present. The list is ordered, so when the user specifies
808  one step on the command line, all previous steps may be skipped as
809  well. In addition to the choices provided, users may pass "all"
810  to indicate that all steps may be thus skipped.
811 
812  When this method is called, the ``namespace`` object returned by
813  ``parse_args`` will contain a ``reuse`` attribute containing a list of
814  all steps that should be skipped if their outputs are already present.
815  If no steps should be skipped, the ``reuse`` will be an empty list.
816  """
817  choices = list(choices)
818  choices.append("all")
819  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
820  default=[], action=ReuseAction,
821  help=("Skip the given subtask and its predecessors and reuse their outputs "
822  "if those outputs already exist. Use 'all' to specify all subtasks."))
823 
824 
826  """`ArgumentParser` for command-line tasks that don't write any output.
827  """
828 
829  requireOutput = False # We're not going to write anything
830 
831 
832 def getTaskDict(config, taskDict=None, baseName=""):
833  """Get a dictionary of task info for all subtasks in a config
834 
835  Parameters
836  ----------
837  config : `lsst.pex.config.Config`
838  Configuration to process.
839  taskDict : `dict`, optional
840  Users should not specify this argument. Supports recursion; if provided, taskDict is updated in
841  place, else a new `dict` is started).
842  baseName : `str`, optional
843  Users should not specify this argument. It is only used for recursion: if a non-empty string then a
844  period is appended and the result is used as a prefix for additional entries in taskDict; otherwise
845  no prefix is used.
846 
847  Returns
848  -------
849  taskDict : `dict`
850  Keys are config field names, values are task names.
851 
852  Notes
853  -----
854  This function is designed to be called recursively. The user should call with only a config
855  (leaving taskDict and baseName at their default values).
856  """
857  if taskDict is None:
858  taskDict = dict()
859  for fieldName, field in config.items():
860  if hasattr(field, "value") and hasattr(field, "target"):
861  subConfig = field.value
862  if isinstance(subConfig, pexConfig.Config):
863  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
864  try:
865  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
866  except Exception:
867  taskName = repr(field.target)
868  taskDict[subBaseName] = taskName
869  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
870  return taskDict
871 
872 
873 def obeyShowArgument(showOpts, config=None, exit=False):
874  """Process arguments specified with ``--show`` (but ignores ``"data"``).
875 
876  Parameters
877  ----------
878  showOpts : `list` of `str`
879  List of options passed to ``--show``.
880  config : optional
881  The provided config.
882  exit : bool, optional
883  Exit if ``"run"`` isn't included in ``showOpts``.
884 
885  Parameters
886  ----------
887  Supports the following options in showOpts:
888 
889  - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern.
890  - ``history=PAT``. Show where the config entries that match the glob pattern were set.
891  - ``tasks``. Show task hierarchy.
892  - ``data``. Ignored; to be processed by caller.
893  - ``run``. Keep going (the default behaviour is to exit if --show is specified).
894 
895  Calls ``sys.exit(1)`` if any other option found.
896  """
897  if not showOpts:
898  return
899 
900  for what in showOpts:
901  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
902 
903  if showCommand == "config":
904  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
905  pattern = matConfig.group(1)
906  if pattern:
907  class FilteredStream(object):
908  """A file object that only prints lines that match the glob "pattern"
909 
910  N.b. Newlines are silently discarded and reinserted; crude but effective.
911  """
912 
913  def __init__(self, pattern):
914  # obey case if pattern isn't lowecase or requests NOIGNORECASE
915  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
916 
917  if mat:
918  pattern = mat.group(1)
919  self._pattern = re.compile(fnmatch.translate(pattern))
920  else:
921  if pattern != pattern.lower():
922  print(u"Matching \"%s\" without regard to case "
923  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
924  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
925 
926  def write(self, showStr):
927  showStr = showStr.rstrip()
928  # Strip off doc string line(s) and cut off at "=" for string matching
929  matchStr = showStr.split("\n")[-1].split("=")[0]
930  if self._pattern.search(matchStr):
931  print(u"\n" + showStr)
932 
933  fd = FilteredStream(pattern)
934  else:
935  fd = sys.stdout
936 
937  config.saveToStream(fd, "config")
938  elif showCommand == "history":
939  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
940  pattern = matHistory.group(1)
941  if not pattern:
942  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
943  sys.exit(1)
944 
945  pattern = pattern.split(".")
946  cpath, cname = pattern[:-1], pattern[-1]
947  hconfig = config # the config that we're interested in
948  for i, cpt in enumerate(cpath):
949  try:
950  hconfig = getattr(hconfig, cpt)
951  except AttributeError:
952  print("Error: configuration %s has no subconfig %s" %
953  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
954 
955  sys.exit(1)
956 
957  try:
958  print(pexConfig.history.format(hconfig, cname))
959  except KeyError:
960  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
961  sys.exit(1)
962 
963  elif showCommand == "data":
964  pass
965  elif showCommand == "run":
966  pass
967  elif showCommand == "tasks":
968  showTaskHierarchy(config)
969  else:
970  print(u"Unknown value for show: %s (choose from '%s')" %
971  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
972  sys.exit(1)
973 
974  if exit and "run" not in showOpts:
975  sys.exit(0)
976 
977 
978 def showTaskHierarchy(config):
979  """Print task hierarchy to stdout.
980 
981  Parameters
982  ----------
983  config : `lsst.pex.config.Config`
984  Configuration to process.
985  """
986  print(u"Subtasks:")
987  taskDict = getTaskDict(config=config)
988 
989  fieldNameList = sorted(taskDict.keys())
990  for fieldName in fieldNameList:
991  taskName = taskDict[fieldName]
992  print(u"%s: %s" % (fieldName, taskName))
993 
994 
995 class ConfigValueAction(argparse.Action):
996  """argparse action callback to override config parameters using name=value pairs from the command-line.
997  """
998 
999  def __call__(self, parser, namespace, values, option_string):
1000  """Override one or more config name value pairs.
1001 
1002  Parameters
1003  ----------
1004  parser : `argparse.ArgumentParser`
1005  Argument parser.
1006  namespace : `argparse.Namespace`
1007  Parsed command. The ``namespace.config`` attribute is updated.
1008  values : `list`
1009  A list of ``configItemName=value`` pairs.
1010  option_string : `str`
1011  Option value specified by the user.
1012  """
1013  if namespace.config is None:
1014  return
1015  for nameValue in values:
1016  name, sep, valueStr = nameValue.partition("=")
1017  if not valueStr:
1018  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1019 
1020  # see if setting the string value works; if not, try eval
1021  try:
1022  setDottedAttr(namespace.config, name, valueStr)
1023  except AttributeError:
1024  parser.error("no config field: %s" % (name,))
1025  except Exception:
1026  try:
1027  value = eval(valueStr, {})
1028  except Exception:
1029  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1030  try:
1031  setDottedAttr(namespace.config, name, value)
1032  except Exception as e:
1033  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1034 
1035 
1036 class ConfigFileAction(argparse.Action):
1037  """argparse action to load config overrides from one or more files.
1038  """
1039 
1040  def __call__(self, parser, namespace, values, option_string=None):
1041  """Load one or more files of config overrides.
1042 
1043  Parameters
1044  ----------
1045  parser : `argparse.ArgumentParser`
1046  Argument parser.
1047  namespace : `argparse.Namespace`
1048  Parsed command. The following attributes are updated by this method: ``namespace.config``.
1049  values : `list`
1050  A list of data config file paths.
1051  option_string : `str`, optional
1052  Option value specified by the user.
1053  """
1054  if namespace.config is None:
1055  return
1056  for configfile in values:
1057  try:
1058  namespace.config.load(configfile)
1059  except Exception as e:
1060  parser.error("cannot load config file %r: %s" % (configfile, e))
1061 
1062 
1063 class IdValueAction(argparse.Action):
1064  """argparse action callback to process a data ID into a dict.
1065  """
1066 
1067  def __call__(self, parser, namespace, values, option_string):
1068  """Parse ``--id`` data and append results to ``namespace.<argument>.idList``.
1069 
1070  Parameters
1071  ----------
1072  parser : `ArgumentParser`
1073  Argument parser.
1074  namespace : `argparse.Namespace`
1075  Parsed command (an instance of argparse.Namespace). The following attributes are updated:
1076 
1077  - ``<idName>.idList``, where ``<idName>`` is the name of the ID argument, for instance ``"id"``
1078  for ID argument ``--id``.
1079  values : `list`
1080  A list of data IDs; see Notes below.
1081  option_string : `str`
1082  Option value specified by the user.
1083 
1084  Notes
1085  -----
1086  The data format is::
1087 
1088  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
1089 
1090  The values (e.g. ``value1_1``) may either be a string, or of the form ``"int..int"``
1091  (e.g. ``"1..3"``) which is interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1092  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``. You may also specify a stride:
1093  ``"1..5:2"`` is ``"1^3^5"``.
1094 
1095  The cross product is computed for keys with multiple values. For example::
1096 
1097  --id visit 1^2 ccd 1,1^2,2
1098 
1099  results in the following data ID dicts being appended to ``namespace.<argument>.idList``:
1100 
1101  {"visit":1, "ccd":"1,1"}
1102  {"visit":2, "ccd":"1,1"}
1103  {"visit":1, "ccd":"2,2"}
1104  {"visit":2, "ccd":"2,2"}
1105  """
1106  if namespace.config is None:
1107  return
1108  idDict = collections.OrderedDict()
1109  for nameValue in values:
1110  name, sep, valueStr = nameValue.partition("=")
1111  if name in idDict:
1112  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1113  idDict[name] = []
1114  for v in valueStr.split("^"):
1115  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1116  if mat:
1117  v1 = int(mat.group(1))
1118  v2 = int(mat.group(2))
1119  v3 = mat.group(3)
1120  v3 = int(v3) if v3 else 1
1121  for v in range(v1, v2 + 1, v3):
1122  idDict[name].append(str(v))
1123  else:
1124  idDict[name].append(v)
1125 
1126  iterList = [idDict[key] for key in idDict.keys()]
1127  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1128  for valList in itertools.product(*iterList)]
1129 
1130  argName = option_string.lstrip("-")
1131  ident = getattr(namespace, argName)
1132  ident.idList += idDictList
1133 
1134 
1135 class LogLevelAction(argparse.Action):
1136  """argparse action to set log level.
1137  """
1138 
1139  def __call__(self, parser, namespace, values, option_string):
1140  """Set trace level.
1141 
1142  Parameters
1143  ----------
1144  parser : `ArgumentParser`
1145  Argument parser.
1146  namespace : `argparse.Namespace`
1147  Parsed command. This argument is not used.
1148  values : `list`
1149  List of trace levels; each item must be of the form ``component_name=level`` or ``level``,
1150  where ``level`` is a keyword (not case sensitive) or an integer.
1151  option_string : `str`
1152  Option value specified by the user.
1153  """
1154  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1155  permittedLevelSet = set(permittedLevelList)
1156  for componentLevel in values:
1157  component, sep, levelStr = componentLevel.partition("=")
1158  if not levelStr:
1159  levelStr, component = component, None
1160  logLevelUpr = levelStr.upper()
1161  if logLevelUpr in permittedLevelSet:
1162  logLevel = getattr(lsstLog.Log, logLevelUpr)
1163  else:
1164  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1165  if component is None:
1166  namespace.log.setLevel(logLevel)
1167  else:
1168  lsstLog.Log.getLogger(component).setLevel(logLevel)
1169 
1170 
1171 class ReuseAction(argparse.Action):
1172  """argparse action associated with ArgumentPraser.addReuseOption."""
1173 
1174  def __call__(self, parser, namespace, value, option_string):
1175  if value == "all":
1176  value = self.choices[-2]
1177  index = self.choices.index(value)
1178  namespace.reuse = self.choices[:index + 1]
1179 
1180 
1181 def setDottedAttr(item, name, value):
1182  """Set an instance attribute (like `setattr` but accepting hierarchical names such as ``foo.bar.baz``).
1183 
1184  Parameters
1185  ----------
1186  item : obj
1187  Object whose attribute is to be set.
1188  name : `str`
1189  Name of attribute to set.
1190  value : obj
1191  New value for the attribute.
1192 
1193  Notes
1194  -----
1195  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz`` is set to the specified value.
1196  """
1197  subitem = item
1198  subnameList = name.split(".")
1199  for subname in subnameList[:-1]:
1200  subitem = getattr(subitem, subname)
1201  setattr(subitem, subnameList[-1], value)
1202 
1203 
1204 def getDottedAttr(item, name):
1205  """Get an attribute (like `getattr` but accepts hierarchical names such as ``foo.bar.baz``).
1206 
1207  Parameters
1208  ----------
1209  item : obj
1210  Object whose attribute is to be returned.
1211  name : `str`
1212  Name of the attribute to get.
1213 
1214  Returns
1215  -------
1216  itemAttr : obj
1217  If name is ``foo.bar.baz then the return value is ``item.foo.bar.baz``.
1218  """
1219  subitem = item
1220  for subname in name.split("."):
1221  subitem = getattr(subitem, subname)
1222  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)