lsst.pipe.base  14.0-6-ge2c9487+54
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """Apply environment variable as default root, if present, and abspath.
57 
58  Parameters
59  ----------
60  defName : `str`
61  Name of environment variable containing default root path; if the environment variable does not exist
62  then the path is relative to the current working directory
63  path : `str`
64  Path relative to default root path.
65 
66  Returns
67  -------
68  abspath : `str`
69  Path that has been expanded, or `None` if the environment variable does not exist and path is `None`.
70  """
71  defRoot = os.environ.get(defName)
72  if defRoot is None:
73  if path is None:
74  return None
75  return os.path.abspath(path)
76  return os.path.abspath(os.path.join(defRoot, path or ""))
77 
78 
79 class DataIdContainer(object):
80  """Container for data IDs and associated data references.
81 
82  Parameters
83  ----------
84  level
85  Unknown.
86 
87  Notes
88  -----
89  Override for data IDs that require special handling to be converted to ``data references``, and specify
90  the override class as ``ContainerClass`` for ``add_id_argument``. (If you don't want the argument parser
91  to compute data references, you may use this class and specify ``doMakeDataRefList=False`` in
92  ``add_id_argument``.)
93  """
94 
95  def __init__(self, level=None):
96  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
97  self.level = level
98  self.idList = []
99  self.refList = []
100 
101  def setDatasetType(self, datasetType):
102  """Set actual dataset type, once it is known.
103 
104  Parameters
105  ----------
106  datasetType : `str`
107  Dataset type.
108  """
109  self.datasetType = datasetType
110 
111  def castDataIds(self, butler):
112  """Validate data IDs and cast them to the correct type (modify idList in place).
113 
114  Parameters
115  ----------
116  butler : `lsst.daf.persistence.Butler`
117  Data butler.
118  """
119  if self.datasetType is None:
120  raise RuntimeError("Must call setDatasetType first")
121  try:
122  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
123  except KeyError:
124  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
125 
126  for dataDict in self.idList:
127  for key, strVal in dataDict.items():
128  try:
129  keyType = idKeyTypeDict[key]
130  except KeyError:
131  # OK, assume that it's a valid key and guess that it's a string
132  keyType = str
133 
134  log = lsstLog.Log.getDefaultLogger()
135  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
136  (key, 'str' if keyType == str else keyType))
137  idKeyTypeDict[key] = keyType
138 
139  if keyType != str:
140  try:
141  castVal = keyType(strVal)
142  except Exception:
143  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
144  dataDict[key] = castVal
145 
146  def makeDataRefList(self, namespace):
147  """Compute refList based on idList.
148 
149  Parameters
150  ----------
151  namespace
152  Results of parsing command-line (with ``butler`` and ``log`` elements).
153 
154  Notes
155  -----
156  Not called if ``add_id_argument`` called with ``doMakeDataRefList=False``.
157  """
158  if self.datasetType is None:
159  raise RuntimeError("Must call setDatasetType first")
160  butler = namespace.butler
161  for dataId in self.idList:
162  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
163  # exclude nonexistent data
164  # this is a recursive test, e.g. for the sake of "raw" data
165  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
166  dataRef=dr)]
167  if not refList:
168  namespace.log.warn("No data found for dataId=%s", dataId)
169  continue
170  self.refList += refList
171 
172 
173 class DataIdArgument(object):
174  """data ID argument, used by `ArgumentParser.add_id_argument`.
175 
176  Parameters
177  ----------
178  name : `str`
179  Name of identifier (argument name without dashes).
180  datasetType : `str`
181  Type of dataset; specify a string for a fixed dataset type or a `DatasetArgument` for a dynamic
182  dataset type (e.g. one specified by a command-line argument).
183  level
184  Level of dataset, for `~lsst.daf.persistence.Butler`.
185  doMakeDataRefList : `bool`, optional
186  If `True` (default), construct data references.
187  ContainerClass : class, optional
188  Class to contain data IDs and data references; the default class will work for many kinds of data,
189  but you may have to override to compute some kinds of data references. Default is `DataIdContainer`.
190  """
191 
192  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
193  if name.startswith("-"):
194  raise RuntimeError("Name %s must not start with -" % (name,))
195  self.name = name
196  self.datasetType = datasetType
197  self.level = level
198  self.doMakeDataRefList = bool(doMakeDataRefList)
199  self.ContainerClass = ContainerClass
200  self.argName = name.lstrip("-")
201 
202  @property
204  """`True` if the dataset type is dynamic (that is, specified on the command line)."""
205  return isinstance(self.datasetType, DynamicDatasetType)
206 
207  def getDatasetType(self, namespace):
208  """Get the dataset type as a string.
209 
210  Parameters
211  ----------
212  namespace
213  Parsed command.
214 
215  Returns
216  -------
217  datasetType : `str`
218  Dataset type.
219  """
220  if self.isDynamicDatasetType:
221  return self.datasetType.getDatasetType(namespace)
222  else:
223  return self.datasetType
224 
225 
226 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
227  """Abstract base class for a dataset type determined from parsed command-line arguments.
228  """
229 
230  def addArgument(self, parser, idName):
231  """Add a command-line argument to specify dataset type name, if wanted.
232 
233  Parameters
234  ----------
235  parser : `ArgumentParser`
236  Argument parser to add the argument to.
237  idName : `str`
238  Name of data ID argument, without the leading ``"--"``, e.g. ``"id"``.
239 
240  Notes
241  -----
242  The default implementation does nothing
243  """
244  pass
245 
246  @abc.abstractmethod
247  def getDatasetType(self, namespace):
248  """Get the dataset type as a string, based on parsed command-line arguments.
249 
250  Returns
251  -------
252  namespace : `str`
253  Parsed command.
254  """
255  raise NotImplementedError("Subclasses must override")
256 
257 
259  """Dataset type specified by a command-line argument.
260 
261  Parameters
262  ----------
263  name : `str`, optional
264  Name of command-line argument (including leading "--", if appropriate) whose value is the dataset
265  type. If `None`, uses ``--idName_dstype`` where idName is the name of the data ID argument (e.g.
266  "id").
267  help : `str`, optional
268  Help string for the command-line argument.
269  default : obj, optional
270  Default value. If `None`, then the command-line option is required. This argument isignored if the
271  command-line argument is positional (name does not start with "-") because positional arguments do
272  not support default values.
273  """
274 
275  def __init__(self,
276  name=None,
277  help="dataset type to process from input data repository",
278  default=None,
279  ):
280  DynamicDatasetType.__init__(self)
281  self.name = name
282  self.help = help
283  self.default = default
284 
285  def getDatasetType(self, namespace):
286  """Get the dataset type as a string, from the appropriate command-line argument.
287 
288  Parameters
289  ----------
290  namespace
291  Parsed command.
292 
293  Returns
294  -------
295  datasetType : `str`
296  Dataset type.
297  """
298  argName = self.name.lstrip("-")
299  return getattr(namespace, argName)
300 
301  def addArgument(self, parser, idName):
302  """Add a command-line argument to specify the dataset type name.
303 
304  Parameters
305  ----------
306  parser : `ArgumentParser`
307  Argument parser.
308  idName : `str`
309  Data ID.
310 
311  Notes
312  -----
313  Also sets the `name` attribute if it is currently `None`.
314  """
315  help = self.help if self.help else "dataset type for %s" % (idName,)
316  if self.name is None:
317  self.name = "--%s_dstype" % (idName,)
318  requiredDict = dict()
319  if self.name.startswith("-"):
320  requiredDict = dict(required=self.default is None)
321  parser.add_argument(
322  self.name,
323  default=self.default,
324  help=help,
325  **requiredDict) # cannot specify required=None for positional arguments
326 
327 
329  """Dataset type specified by a config parameter.
330 
331  Parameters
332  ----------
333  name : `str`
334  Name of config option whose value is the dataset type.
335  """
336 
337  def __init__(self, name):
338  DynamicDatasetType.__init__(self)
339  self.name = name
340 
341  def getDatasetType(self, namespace):
342  """Return the dataset type as a string, from the appropriate config field
343 
344  Parameters
345  ----------
346  namespace : `argparse.Namespace`
347  Parsed command.
348  """
349  # getattr does not work reliably if the config field name is dotted,
350  # so step through one level at a time
351  keyList = self.name.split(".")
352  value = namespace.config
353  for key in keyList:
354  try:
355  value = getattr(value, key)
356  except KeyError:
357  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
358  return value
359 
360 
361 class ArgumentParser(argparse.ArgumentParser):
362  """Argument parser for command-line tasks that is based on `argparse.ArgumentParser`.
363 
364  Parameters
365  ----------
366  name : `str`
367  Name of top-level task; used to identify camera-specific override files.
368  usage : `str`, optional
369  Command-line usage signature.
370  **kwargs
371  Additional keyword arguments for `argparse.ArgumentParser`.
372 
373  Notes
374  -----
375  Users may wish to add additional arguments before calling `parse_args`.
376  """
377  # I would prefer to check data ID keys and values as they are parsed,
378  # but the required information comes from the butler, so I have to construct a butler
379  # before I do this checking. Constructing a butler is slow, so I only want do it once,
380  # after parsing the command line, so as to catch syntax errors quickly.
381 
382  requireOutput = True
383  """Require an output directory to be specified (`bool`)."""
384 
385  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
386  self._name = name
387  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
388  argparse.ArgumentParser.__init__(self,
389  usage=usage,
390  fromfile_prefix_chars='@',
391  epilog=textwrap.dedent("""Notes:
392  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
393  all values are used, in order left to right
394  * @file reads command-line options from the specified file:
395  * data may be distributed among multiple lines (e.g. one option per line)
396  * data after # is treated as a comment and ignored
397  * blank lines and lines starting with # are ignored
398  * To specify multiple values for an option, do not use = after the option name:
399  * right: --configfile foo bar
400  * wrong: --configfile=foo bar
401  """),
402  formatter_class=argparse.RawDescriptionHelpFormatter,
403  **kwargs)
404  self.add_argument(metavar='input', dest="rawInput",
405  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
406  self.add_argument("--calib", dest="rawCalib",
407  help="path to input calibration repository, relative to $%s" %
408  (DEFAULT_CALIB_NAME,))
409  self.add_argument("--output", dest="rawOutput",
410  help="path to output data repository (need not exist), relative to $%s" %
411  (DEFAULT_OUTPUT_NAME,))
412  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
413  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
414  "optionally sets ROOT to ROOT/rerun/INPUT")
415  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
416  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
417  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
418  help="config override file(s)")
419  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
420  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
421  metavar="LEVEL|COMPONENT=LEVEL")
422  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
423  self.add_argument("--debug", action="store_true", help="enable debugging output?")
424  self.add_argument("--doraise", action="store_true",
425  help="raise an exception on error (else log a message and continue)?")
426  self.add_argument("--noExit", action="store_true",
427  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
428  self.add_argument("--profile", help="Dump cProfile statistics to filename")
429  self.add_argument("--show", nargs="+", default=(),
430  help="display the specified information to stdout and quit "
431  "(unless run is specified).")
432  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
433  self.add_argument("-t", "--timeout", type=float,
434  help="Timeout for multiprocessing; maximum wall time (sec)")
435  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
436  help=("remove and re-create the output directory if it already exists "
437  "(safe with -j, but not all other forms of parallel execution)"))
438  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
439  help=("backup and then overwrite existing config files instead of checking them "
440  "(safe with -j, but not all other forms of parallel execution)"))
441  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
442  help="Don't copy config to file~N backup.")
443  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
444  help=("backup and then overwrite existing package versions instead of checking"
445  "them (safe with -j, but not all other forms of parallel execution)"))
446  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
447  help="don't check package versions; useful for development")
448  lsstLog.configure_prop("""
449 log4j.rootLogger=INFO, A1
450 log4j.appender.A1=ConsoleAppender
451 log4j.appender.A1.Target=System.out
452 log4j.appender.A1.layout=PatternLayout
453 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
454 """)
455 
456  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
457  ContainerClass=DataIdContainer):
458  """Add a data ID argument.
459 
460 
461  Parameters
462  ----------
463  name : `str`
464  Data ID argument (including leading dashes, if wanted).
465  datasetType : `str` or `DynamicDatasetType`-type
466  Type of dataset. Supply a string for a fixed dataset type. For a dynamically determined dataset
467  type, supply a `DynamicDatasetType`, such a `DatasetArgument`.
468  help : `str`
469  Help string for the argument.
470  level : object, optional
471  Level of dataset, for the `~lsst.daf.persistence.Butler`.
472  doMakeDataRefList : bool, optional
473  If `True` (default), construct data references.
474  ContainerClass : class, optional
475  Data ID container class to use to contain results; override the default if you need a special
476  means of computing data references from data IDs
477 
478  Notes
479  -----
480  If ``datasetType`` is an instance of `DatasetArgument`, then add a second argument to specify the
481  dataset type.
482 
483  The associated data is put into ``namespace.<dataIdArgument.name>`` as an instance of ContainerClass;
484  the container includes fields:
485 
486  - ``idList``: a list of data ID dicts.
487  - ``refList``: a list of `~lsst.daf.persistence.Butler` data references (empty if
488  ``doMakeDataRefList`` is `False`).
489  """
490  argName = name.lstrip("-")
491 
492  if argName in self._dataIdArgDict:
493  raise RuntimeError("Data ID argument %s already exists" % (name,))
494  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
495  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
496 
497  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
498  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
499 
500  dataIdArgument = DataIdArgument(
501  name=argName,
502  datasetType=datasetType,
503  level=level,
504  doMakeDataRefList=doMakeDataRefList,
505  ContainerClass=ContainerClass,
506  )
507 
508  if dataIdArgument.isDynamicDatasetType:
509  datasetType.addArgument(parser=self, idName=argName)
510 
511  self._dataIdArgDict[argName] = dataIdArgument
512 
513  def parse_args(self, config, args=None, log=None, override=None):
514  """Parse arguments for a command-line task.
515 
516  Parameters
517  ----------
518  config : `lsst.pex.config.Config`
519  Config for the task being run.
520  args : `list`, optional
521  Argument list; if `None` then ``sys.argv[1:]`` is used.
522  log : `lsst.log.Log`, optional
523  `~lsst.log.Log` instance; if `None` use the default log.
524  override : callable, optional
525  A config override function. It must take the root config object as its only argument and must
526  modify the config in place. This function is called after camera-specific overrides files are
527  applied, and before command-line config overrides are applied (thus allowing the user the final
528  word).
529 
530  Returns
531  -------
532  namespace : `argparse.Namespace`
533  A `~argparse.Namespace` instance containing fields:
534 
535  - ``camera``: camera name.
536  - ``config``: the supplied config with all overrides applied, validated and frozen.
537  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
538  - An entry for each of the data ID arguments registered by `add_id_argument`,
539  the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements
540  ``idList`` and ``refList``.
541  - ``log``: a `lsst.log` Log.
542  - An entry for each command-line argument, with the following exceptions:
543  - config is the supplied config, suitably updated.
544  - configfile, id and loglevel are all missing.
545  - ``obsPkg``: name of the ``obs_`` package for this camera.
546  """
547  if args is None:
548  args = sys.argv[1:]
549 
550  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
551  self.print_help()
552  if len(args) == 1 and args[0] in ("-h", "--help"):
553  self.exit()
554  else:
555  self.exit("%s: error: Must specify input as first argument" % self.prog)
556 
557  # Note that --rerun may change namespace.input, but if it does we verify that the
558  # new input has the same mapper class.
559  namespace = argparse.Namespace()
560  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
561  if not os.path.isdir(namespace.input):
562  self.error("Error: input=%r not found" % (namespace.input,))
563 
564  namespace.config = config
565  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
566  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
567  namespace.camera = mapperClass.getCameraName()
568  namespace.obsPkg = mapperClass.getPackageName()
569 
570  self.handleCamera(namespace)
571 
572  self._applyInitialOverrides(namespace)
573  if override is not None:
574  override(namespace.config)
575 
576  # Add data ID containers to namespace
577  for dataIdArgument in self._dataIdArgDict.values():
578  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
579 
580  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
581  del namespace.configfile
582 
583  self._parseDirectories(namespace)
584 
585  if namespace.clobberOutput:
586  if namespace.output is None:
587  self.error("--clobber-output is only valid with --output or --rerun")
588  elif namespace.output == namespace.input:
589  self.error("--clobber-output is not valid when the output and input repos are the same")
590  if os.path.exists(namespace.output):
591  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
592  shutil.rmtree(namespace.output)
593 
594  namespace.log.debug("input=%s", namespace.input)
595  namespace.log.debug("calib=%s", namespace.calib)
596  namespace.log.debug("output=%s", namespace.output)
597 
598  obeyShowArgument(namespace.show, namespace.config, exit=False)
599 
600  # No environment variable or --output or --rerun specified.
601  if self.requireOutput and namespace.output is None and namespace.rerun is None:
602  self.error("no output directory specified.\n"
603  "An output directory must be specified with the --output or --rerun\n"
604  "command-line arguments.\n")
605 
606  butlerArgs = {} # common arguments for butler elements
607  if namespace.calib:
608  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
609  if namespace.output:
610  outputs = {'root': namespace.output, 'mode': 'rw'}
611  inputs = {'root': namespace.input}
612  inputs.update(butlerArgs)
613  outputs.update(butlerArgs)
614  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
615  else:
616  outputs = {'root': namespace.input, 'mode': 'rw'}
617  outputs.update(butlerArgs)
618  namespace.butler = dafPersist.Butler(outputs=outputs)
619 
620  # convert data in each of the identifier lists to proper types
621  # this is done after constructing the butler, hence after parsing the command line,
622  # because it takes a long time to construct a butler
623  self._processDataIds(namespace)
624  if "data" in namespace.show:
625  for dataIdName in self._dataIdArgDict.keys():
626  for dataRef in getattr(namespace, dataIdName).refList:
627  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
628 
629  if namespace.show and "run" not in namespace.show:
630  sys.exit(0)
631 
632  if namespace.debug:
633  try:
634  import debug
635  assert debug # silence pyflakes
636  except ImportError:
637  sys.stderr.write("Warning: no 'debug' module found\n")
638  namespace.debug = False
639 
640  del namespace.loglevel
641 
642  if namespace.longlog:
643  lsstLog.configure_prop("""
644 log4j.rootLogger=INFO, A1
645 log4j.appender.A1=ConsoleAppender
646 log4j.appender.A1.Target=System.out
647 log4j.appender.A1.layout=PatternLayout
648 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
649 """)
650  del namespace.longlog
651 
652  namespace.config.validate()
653  namespace.config.freeze()
654 
655  return namespace
656 
657  def _parseDirectories(self, namespace):
658  """Parse input, output and calib directories
659 
660  This allows for hacking the directories, e.g., to include a "rerun".
661  Modifications are made to the 'namespace' object in-place.
662  """
663  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
664  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
665 
666  # If an output directory is specified, process it and assign it to the namespace
667  if namespace.rawOutput:
668  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
669  else:
670  namespace.output = None
671 
672  # This section processes the rerun argument, if rerun is specified as a colon separated
673  # value, it will be parsed as an input and output. The input value will be overridden if
674  # previously specified (but a check is made to make sure both inputs use the same mapper)
675  if namespace.rawRerun:
676  if namespace.output:
677  self.error("Error: cannot specify both --output and --rerun")
678  namespace.rerun = namespace.rawRerun.split(":")
679  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
680  modifiedInput = False
681  if len(rerunDir) == 2:
682  namespace.input, namespace.output = rerunDir
683  modifiedInput = True
684  elif len(rerunDir) == 1:
685  namespace.output = rerunDir[0]
686  if os.path.exists(os.path.join(namespace.output, "_parent")):
687  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
688  modifiedInput = True
689  else:
690  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
691  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
692  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
693  else:
694  namespace.rerun = None
695  del namespace.rawInput
696  del namespace.rawCalib
697  del namespace.rawOutput
698  del namespace.rawRerun
699 
700  def _processDataIds(self, namespace):
701  """Process the parsed data for each data ID argument in a `~argparse.Namespace`.
702 
703  Processing includes:
704 
705  - Validate data ID keys.
706  - Cast the data ID values to the correct type.
707  - Compute data references from data IDs.
708 
709  Parameters
710  ----------
711  namespace : parsed namespace (an argparse.Namespace);
712  Parsed namespace. These attributes are read:
713 
714  - ``butler``
715  - ``log``
716  - ``config``, if any dynamic dataset types are set by a config parameter.
717  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic dataset types are specified by such
718  and modifies these attributes:
719  - ``<name>`` for each data ID argument registered using `add_id_argument`.
720  """
721  for dataIdArgument in self._dataIdArgDict.values():
722  dataIdContainer = getattr(namespace, dataIdArgument.name)
723  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
724  if dataIdArgument.doMakeDataRefList:
725  try:
726  dataIdContainer.castDataIds(butler=namespace.butler)
727  except (KeyError, TypeError) as e:
728  # failure of castDataIds indicates invalid command args
729  self.error(e)
730 
731  # failure of makeDataRefList indicates a bug that wants a traceback
732  dataIdContainer.makeDataRefList(namespace)
733 
734  def _applyInitialOverrides(self, namespace):
735  """Apply obs-package-specific and camera-specific config override files, if found
736 
737  Parameters
738  ----------
739  namespace : `argparse.Namespace`
740  Parsed namespace. These attributes are read:
741 
742  - ``obsPkg``
743 
744  Look in the package namespace.obsPkg for files:
745 
746  - ``config/<task_name>.py``
747  - ``config/<camera_name>/<task_name>.py`` and load if found.
748  """
749  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
750  fileName = self._name + ".py"
751  for filePath in (
752  os.path.join(obsPkgDir, "config", fileName),
753  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
754  ):
755  if os.path.exists(filePath):
756  namespace.log.info("Loading config overrride file %r", filePath)
757  namespace.config.load(filePath)
758  else:
759  namespace.log.debug("Config override file does not exist: %r", filePath)
760 
761  def handleCamera(self, namespace):
762  """Perform camera-specific operations before parsing the command-line.
763 
764  Parameters
765  ----------
766  namespace : `argparse.Namespace`
767  Namespace (an ) with the following fields:
768 
769  - ``camera``: the camera name.
770  - ``config``: the config passed to parse_args, with no overrides applied.
771  - ``obsPkg``: the ``obs_`` package for this camera.
772  - ``log``: a `lsst.log` Log.
773 
774  Notes
775  -----
776  The default implementation does nothing.
777  """
778  pass
779 
780  def convert_arg_line_to_args(self, arg_line):
781  """Allow files of arguments referenced by ``@<path>`` to contain multiple values on each line.
782 
783  Parameters
784  ----------
785  arg_line : `str`
786  Line of text read from an argument file.
787  """
788  arg_line = arg_line.strip()
789  if not arg_line or arg_line.startswith("#"):
790  return
791  for arg in shlex.split(arg_line, comments=True, posix=True):
792  if not arg.strip():
793  continue
794  yield arg
795 
796  def addReuseOption(self, choices):
797  """Add a "--reuse-outputs-from SUBTASK" option to the argument parser.
798 
799  CmdLineTasks that can be restarted at an intermediate step using outputs
800  from earlier (but still internal) steps should use this method to allow
801  the user to control whether that happens when outputs from earlier steps
802  are present.
803 
804  Parameters
805  ----------
806  choices : sequence
807  A sequence of string names (by convention, top-level subtasks) that
808  identify the steps that could be skipped when their outputs are
809  already present. The list is ordered, so when the user specifies
810  one step on the command line, all previous steps may be skipped as
811  well. In addition to the choices provided, users may pass "all"
812  to indicate that all steps may be thus skipped.
813 
814  When this method is called, the ``namespace`` object returned by
815  ``parse_args`` will contain a ``reuse`` attribute containing a list of
816  all steps that should be skipped if their outputs are already present.
817  If no steps should be skipped, the ``reuse`` will be an empty list.
818  """
819  choices = list(choices)
820  choices.append("all")
821  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
822  default=[], action=ReuseAction,
823  help=("Skip the given subtask and its predecessors and reuse their outputs "
824  "if those outputs already exist. Use 'all' to specify all subtasks."))
825 
826 
828  """`ArgumentParser` for command-line tasks that don't write any output.
829  """
830 
831  requireOutput = False # We're not going to write anything
832 
833 
834 def getTaskDict(config, taskDict=None, baseName=""):
835  """Get a dictionary of task info for all subtasks in a config
836 
837  Parameters
838  ----------
839  config : `lsst.pex.config.Config`
840  Configuration to process.
841  taskDict : `dict`, optional
842  Users should not specify this argument. Supports recursion; if provided, taskDict is updated in
843  place, else a new `dict` is started).
844  baseName : `str`, optional
845  Users should not specify this argument. It is only used for recursion: if a non-empty string then a
846  period is appended and the result is used as a prefix for additional entries in taskDict; otherwise
847  no prefix is used.
848 
849  Returns
850  -------
851  taskDict : `dict`
852  Keys are config field names, values are task names.
853 
854  Notes
855  -----
856  This function is designed to be called recursively. The user should call with only a config
857  (leaving taskDict and baseName at their default values).
858  """
859  if taskDict is None:
860  taskDict = dict()
861  for fieldName, field in config.items():
862  if hasattr(field, "value") and hasattr(field, "target"):
863  subConfig = field.value
864  if isinstance(subConfig, pexConfig.Config):
865  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
866  try:
867  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
868  except Exception:
869  taskName = repr(field.target)
870  taskDict[subBaseName] = taskName
871  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
872  return taskDict
873 
874 
875 def obeyShowArgument(showOpts, config=None, exit=False):
876  """Process arguments specified with ``--show`` (but ignores ``"data"``).
877 
878  Parameters
879  ----------
880  showOpts : `list` of `str`
881  List of options passed to ``--show``.
882  config : optional
883  The provided config.
884  exit : bool, optional
885  Exit if ``"run"`` isn't included in ``showOpts``.
886 
887  Parameters
888  ----------
889  Supports the following options in showOpts:
890 
891  - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern.
892  - ``history=PAT``. Show where the config entries that match the glob pattern were set.
893  - ``tasks``. Show task hierarchy.
894  - ``data``. Ignored; to be processed by caller.
895  - ``run``. Keep going (the default behaviour is to exit if --show is specified).
896 
897  Calls ``sys.exit(1)`` if any other option found.
898  """
899  if not showOpts:
900  return
901 
902  for what in showOpts:
903  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
904 
905  if showCommand == "config":
906  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
907  pattern = matConfig.group(1)
908  if pattern:
909  class FilteredStream(object):
910  """A file object that only prints lines that match the glob "pattern"
911 
912  N.b. Newlines are silently discarded and reinserted; crude but effective.
913  """
914 
915  def __init__(self, pattern):
916  # obey case if pattern isn't lowecase or requests NOIGNORECASE
917  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
918 
919  if mat:
920  pattern = mat.group(1)
921  self._pattern = re.compile(fnmatch.translate(pattern))
922  else:
923  if pattern != pattern.lower():
924  print(u"Matching \"%s\" without regard to case "
925  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
926  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
927 
928  def write(self, showStr):
929  showStr = showStr.rstrip()
930  # Strip off doc string line(s) and cut off at "=" for string matching
931  matchStr = showStr.split("\n")[-1].split("=")[0]
932  if self._pattern.search(matchStr):
933  print(u"\n" + showStr)
934 
935  fd = FilteredStream(pattern)
936  else:
937  fd = sys.stdout
938 
939  config.saveToStream(fd, "config")
940  elif showCommand == "history":
941  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
942  pattern = matHistory.group(1)
943  if not pattern:
944  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
945  sys.exit(1)
946 
947  pattern = pattern.split(".")
948  cpath, cname = pattern[:-1], pattern[-1]
949  hconfig = config # the config that we're interested in
950  for i, cpt in enumerate(cpath):
951  try:
952  hconfig = getattr(hconfig, cpt)
953  except AttributeError:
954  print("Error: configuration %s has no subconfig %s" %
955  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
956 
957  sys.exit(1)
958 
959  try:
960  print(pexConfig.history.format(hconfig, cname))
961  except KeyError:
962  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
963  sys.exit(1)
964 
965  elif showCommand == "data":
966  pass
967  elif showCommand == "run":
968  pass
969  elif showCommand == "tasks":
970  showTaskHierarchy(config)
971  else:
972  print(u"Unknown value for show: %s (choose from '%s')" %
973  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
974  sys.exit(1)
975 
976  if exit and "run" not in showOpts:
977  sys.exit(0)
978 
979 
980 def showTaskHierarchy(config):
981  """Print task hierarchy to stdout.
982 
983  Parameters
984  ----------
985  config : `lsst.pex.config.Config`
986  Configuration to process.
987  """
988  print(u"Subtasks:")
989  taskDict = getTaskDict(config=config)
990 
991  fieldNameList = sorted(taskDict.keys())
992  for fieldName in fieldNameList:
993  taskName = taskDict[fieldName]
994  print(u"%s: %s" % (fieldName, taskName))
995 
996 
997 class ConfigValueAction(argparse.Action):
998  """argparse action callback to override config parameters using name=value pairs from the command-line.
999  """
1000 
1001  def __call__(self, parser, namespace, values, option_string):
1002  """Override one or more config name value pairs.
1003 
1004  Parameters
1005  ----------
1006  parser : `argparse.ArgumentParser`
1007  Argument parser.
1008  namespace : `argparse.Namespace`
1009  Parsed command. The ``namespace.config`` attribute is updated.
1010  values : `list`
1011  A list of ``configItemName=value`` pairs.
1012  option_string : `str`
1013  Option value specified by the user.
1014  """
1015  if namespace.config is None:
1016  return
1017  for nameValue in values:
1018  name, sep, valueStr = nameValue.partition("=")
1019  if not valueStr:
1020  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1021 
1022  # see if setting the string value works; if not, try eval
1023  try:
1024  setDottedAttr(namespace.config, name, valueStr)
1025  except AttributeError:
1026  parser.error("no config field: %s" % (name,))
1027  except Exception:
1028  try:
1029  value = eval(valueStr, {})
1030  except Exception:
1031  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1032  try:
1033  setDottedAttr(namespace.config, name, value)
1034  except Exception as e:
1035  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1036 
1037 
1038 class ConfigFileAction(argparse.Action):
1039  """argparse action to load config overrides from one or more files.
1040  """
1041 
1042  def __call__(self, parser, namespace, values, option_string=None):
1043  """Load one or more files of config overrides.
1044 
1045  Parameters
1046  ----------
1047  parser : `argparse.ArgumentParser`
1048  Argument parser.
1049  namespace : `argparse.Namespace`
1050  Parsed command. The following attributes are updated by this method: ``namespace.config``.
1051  values : `list`
1052  A list of data config file paths.
1053  option_string : `str`, optional
1054  Option value specified by the user.
1055  """
1056  if namespace.config is None:
1057  return
1058  for configfile in values:
1059  try:
1060  namespace.config.load(configfile)
1061  except Exception as e:
1062  parser.error("cannot load config file %r: %s" % (configfile, e))
1063 
1064 
1065 class IdValueAction(argparse.Action):
1066  """argparse action callback to process a data ID into a dict.
1067  """
1068 
1069  def __call__(self, parser, namespace, values, option_string):
1070  """Parse ``--id`` data and append results to ``namespace.<argument>.idList``.
1071 
1072  Parameters
1073  ----------
1074  parser : `ArgumentParser`
1075  Argument parser.
1076  namespace : `argparse.Namespace`
1077  Parsed command (an instance of argparse.Namespace). The following attributes are updated:
1078 
1079  - ``<idName>.idList``, where ``<idName>`` is the name of the ID argument, for instance ``"id"``
1080  for ID argument ``--id``.
1081  values : `list`
1082  A list of data IDs; see Notes below.
1083  option_string : `str`
1084  Option value specified by the user.
1085 
1086  Notes
1087  -----
1088  The data format is::
1089 
1090  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
1091 
1092  The values (e.g. ``value1_1``) may either be a string, or of the form ``"int..int"``
1093  (e.g. ``"1..3"``) which is interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1094  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``. You may also specify a stride:
1095  ``"1..5:2"`` is ``"1^3^5"``.
1096 
1097  The cross product is computed for keys with multiple values. For example::
1098 
1099  --id visit 1^2 ccd 1,1^2,2
1100 
1101  results in the following data ID dicts being appended to ``namespace.<argument>.idList``:
1102 
1103  {"visit":1, "ccd":"1,1"}
1104  {"visit":2, "ccd":"1,1"}
1105  {"visit":1, "ccd":"2,2"}
1106  {"visit":2, "ccd":"2,2"}
1107  """
1108  if namespace.config is None:
1109  return
1110  idDict = collections.OrderedDict()
1111  for nameValue in values:
1112  name, sep, valueStr = nameValue.partition("=")
1113  if name in idDict:
1114  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1115  idDict[name] = []
1116  for v in valueStr.split("^"):
1117  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1118  if mat:
1119  v1 = int(mat.group(1))
1120  v2 = int(mat.group(2))
1121  v3 = mat.group(3)
1122  v3 = int(v3) if v3 else 1
1123  for v in range(v1, v2 + 1, v3):
1124  idDict[name].append(str(v))
1125  else:
1126  idDict[name].append(v)
1127 
1128  iterList = [idDict[key] for key in idDict.keys()]
1129  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1130  for valList in itertools.product(*iterList)]
1131 
1132  argName = option_string.lstrip("-")
1133  ident = getattr(namespace, argName)
1134  ident.idList += idDictList
1135 
1136 
1137 class LogLevelAction(argparse.Action):
1138  """argparse action to set log level.
1139  """
1140 
1141  def __call__(self, parser, namespace, values, option_string):
1142  """Set trace level.
1143 
1144  Parameters
1145  ----------
1146  parser : `ArgumentParser`
1147  Argument parser.
1148  namespace : `argparse.Namespace`
1149  Parsed command. This argument is not used.
1150  values : `list`
1151  List of trace levels; each item must be of the form ``component_name=level`` or ``level``,
1152  where ``level`` is a keyword (not case sensitive) or an integer.
1153  option_string : `str`
1154  Option value specified by the user.
1155  """
1156  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1157  permittedLevelSet = set(permittedLevelList)
1158  for componentLevel in values:
1159  component, sep, levelStr = componentLevel.partition("=")
1160  if not levelStr:
1161  levelStr, component = component, None
1162  logLevelUpr = levelStr.upper()
1163  if logLevelUpr in permittedLevelSet:
1164  logLevel = getattr(lsstLog.Log, logLevelUpr)
1165  else:
1166  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1167  if component is None:
1168  namespace.log.setLevel(logLevel)
1169  else:
1170  lsstLog.Log.getLogger(component).setLevel(logLevel)
1171 
1172 
1173 class ReuseAction(argparse.Action):
1174  """argparse action associated with ArgumentPraser.addReuseOption."""
1175 
1176  def __call__(self, parser, namespace, value, option_string):
1177  if value == "all":
1178  value = self.choices[-2]
1179  index = self.choices.index(value)
1180  namespace.reuse = self.choices[:index+1]
1181 
1182 
1183 def setDottedAttr(item, name, value):
1184  """Set an instance attribute (like `setattr` but accepting hierarchical names such as ``foo.bar.baz``).
1185 
1186  Parameters
1187  ----------
1188  item : obj
1189  Object whose attribute is to be set.
1190  name : `str`
1191  Name of attribute to set.
1192  value : obj
1193  New value for the attribute.
1194 
1195  Notes
1196  -----
1197  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz`` is set to the specified value.
1198  """
1199  subitem = item
1200  subnameList = name.split(".")
1201  for subname in subnameList[:-1]:
1202  subitem = getattr(subitem, subname)
1203  setattr(subitem, subnameList[-1], value)
1204 
1205 
1206 def getDottedAttr(item, name):
1207  """Get an attribute (like `getattr` but accepts hierarchical names such as ``foo.bar.baz``).
1208 
1209  Parameters
1210  ----------
1211  item : obj
1212  Object whose attribute is to be returned.
1213  name : `str`
1214  Name of the attribute to get.
1215 
1216  Returns
1217  -------
1218  itemAttr : obj
1219  If name is ``foo.bar.baz then the return value is ``item.foo.bar.baz``.
1220  """
1221  subitem = item
1222  for subname in name.split("."):
1223  subitem = getattr(subitem, subname)
1224  return subitem
1225 
1226 
1227 def dataExists(butler, datasetType, dataRef):
1228  """Determine if data exists at the current level or any data exists at a deeper level.
1229 
1230  Parameters
1231  ----------
1232  butler : `lsst.daf.persistence.Butler`
1233  The Butler.
1234  datasetType : `str`
1235  Dataset type.
1236  dataRef : `lsst.daf.persistence.ButlerDataRef`
1237  Butler data reference.
1238 
1239  Returns
1240  -------
1241  exists : `bool`
1242  Return value is `True` if data exists, `False` otherwise.
1243  """
1244  subDRList = dataRef.subItems()
1245  if subDRList:
1246  for subDR in subDRList:
1247  if dataExists(butler, datasetType, subDR):
1248  return True
1249  return False
1250  else:
1251  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def dataExists(butler, datasetType, dataRef)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)