lsst.pipe.base  14.0-4-gd190390+2
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """Apply environment variable as default root, if present, and abspath.
57 
58  Parameters
59  ----------
60  defName : `str`
61  Name of environment variable containing default root path; if the environment variable does not exist
62  then the path is relative to the current working directory
63  path : `str`
64  Path relative to default root path.
65 
66  Returns
67  -------
68  abspath : `str`
69  Path that has been expanded, or `None` if the environment variable does not exist and path is `None`.
70  """
71  defRoot = os.environ.get(defName)
72  if defRoot is None:
73  if path is None:
74  return None
75  return os.path.abspath(path)
76  return os.path.abspath(os.path.join(defRoot, path or ""))
77 
78 
79 class DataIdContainer(object):
80  """Container for data IDs and associated data references.
81 
82  Parameters
83  ----------
84  level
85  Unknown.
86 
87  Notes
88  -----
89  Override for data IDs that require special handling to be converted to ``data references``, and specify
90  the override class as ``ContainerClass`` for ``add_id_argument``. (If you don't want the argument parser
91  to compute data references, you may use this class and specify ``doMakeDataRefList=False`` in
92  ``add_id_argument``.)
93  """
94 
95  def __init__(self, level=None):
96  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
97  self.level = level
98  self.idList = []
99  self.refList = []
100 
101  def setDatasetType(self, datasetType):
102  """Set actual dataset type, once it is known.
103 
104  Parameters
105  ----------
106  datasetType : `str`
107  Dataset type.
108  """
109  self.datasetType = datasetType
110 
111  def castDataIds(self, butler):
112  """Validate data IDs and cast them to the correct type (modify idList in place).
113 
114  Parameters
115  ----------
116  butler : `lsst.daf.persistence.Butler`
117  Data butler.
118  """
119  if self.datasetType is None:
120  raise RuntimeError("Must call setDatasetType first")
121  try:
122  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
123  except KeyError:
124  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
125 
126  for dataDict in self.idList:
127  for key, strVal in dataDict.items():
128  try:
129  keyType = idKeyTypeDict[key]
130  except KeyError:
131  # OK, assume that it's a valid key and guess that it's a string
132  keyType = str
133 
134  log = lsstLog.Log.getDefaultLogger()
135  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
136  (key, 'str' if keyType == str else keyType))
137  idKeyTypeDict[key] = keyType
138 
139  if keyType != str:
140  try:
141  castVal = keyType(strVal)
142  except Exception:
143  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
144  dataDict[key] = castVal
145 
146  def makeDataRefList(self, namespace):
147  """Compute refList based on idList.
148 
149  Parameters
150  ----------
151  namespace
152  Results of parsing command-line (with ``butler`` and ``log`` elements).
153 
154  Notes
155  -----
156  Not called if ``add_id_argument`` called with ``doMakeDataRefList=False``.
157  """
158  if self.datasetType is None:
159  raise RuntimeError("Must call setDatasetType first")
160  butler = namespace.butler
161  for dataId in self.idList:
162  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
163  # exclude nonexistent data
164  # this is a recursive test, e.g. for the sake of "raw" data
165  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
166  dataRef=dr)]
167  if not refList:
168  namespace.log.warn("No data found for dataId=%s", dataId)
169  continue
170  self.refList += refList
171 
172 
173 class DataIdArgument(object):
174  """data ID argument, used by `ArgumentParser.add_id_argument`.
175 
176  Parameters
177  ----------
178  name : `str`
179  Name of identifier (argument name without dashes).
180  datasetType : `str`
181  Type of dataset; specify a string for a fixed dataset type or a `DatasetArgument` for a dynamic
182  dataset type (e.g. one specified by a command-line argument).
183  level
184  Level of dataset, for `~lsst.daf.persistence.Butler`.
185  doMakeDataRefList : `bool`, optional
186  If `True` (default), construct data references.
187  ContainerClass : class, optional
188  Class to contain data IDs and data references; the default class will work for many kinds of data,
189  but you may have to override to compute some kinds of data references. Default is `DataIdContainer`.
190  """
191 
192  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
193  if name.startswith("-"):
194  raise RuntimeError("Name %s must not start with -" % (name,))
195  self.name = name
196  self.datasetType = datasetType
197  self.level = level
198  self.doMakeDataRefList = bool(doMakeDataRefList)
199  self.ContainerClass = ContainerClass
200  self.argName = name.lstrip("-")
201 
202  @property
204  """`True` if the dataset type is dynamic (that is, specified on the command line)."""
205  return isinstance(self.datasetType, DynamicDatasetType)
206 
207  def getDatasetType(self, namespace):
208  """Get the dataset type as a string.
209 
210  Parameters
211  ----------
212  namespace
213  Parsed command.
214 
215  Returns
216  -------
217  datasetType : `str`
218  Dataset type.
219  """
220  if self.isDynamicDatasetType:
221  return self.datasetType.getDatasetType(namespace)
222  else:
223  return self.datasetType
224 
225 
226 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
227  """Abstract base class for a dataset type determined from parsed command-line arguments.
228  """
229 
230  def addArgument(self, parser, idName):
231  """Add a command-line argument to specify dataset type name, if wanted.
232 
233  Parameters
234  ----------
235  parser : `ArgumentParser`
236  Argument parser to add the argument to.
237  idName : `str`
238  Name of data ID argument, without the leading ``"--"``, e.g. ``"id"``.
239 
240  Notes
241  -----
242  The default implementation does nothing
243  """
244  pass
245 
246  @abc.abstractmethod
247  def getDatasetType(self, namespace):
248  """Get the dataset type as a string, based on parsed command-line arguments.
249 
250  Returns
251  -------
252  namespace : `str`
253  Parsed command.
254  """
255  raise NotImplementedError("Subclasses must override")
256 
257 
259  """Dataset type specified by a command-line argument.
260 
261  Parameters
262  ----------
263  name : `str`, optional
264  Name of command-line argument (including leading "--", if appropriate) whose value is the dataset
265  type. If `None`, uses ``--idName_dstype`` where idName is the name of the data ID argument (e.g.
266  "id").
267  help : `str`, optional
268  Help string for the command-line argument.
269  default : obj, optional
270  Default value. If `None`, then the command-line option is required. This argument isignored if the
271  command-line argument is positional (name does not start with "-") because positional arguments do
272  not support default values.
273  """
274 
275  def __init__(self,
276  name=None,
277  help="dataset type to process from input data repository",
278  default=None,
279  ):
280  DynamicDatasetType.__init__(self)
281  self.name = name
282  self.help = help
283  self.default = default
284 
285  def getDatasetType(self, namespace):
286  """Get the dataset type as a string, from the appropriate command-line argument.
287 
288  Parameters
289  ----------
290  namespace
291  Parsed command.
292 
293  Returns
294  -------
295  datasetType : `str`
296  Dataset type.
297  """
298  argName = self.name.lstrip("-")
299  return getattr(namespace, argName)
300 
301  def addArgument(self, parser, idName):
302  """Add a command-line argument to specify the dataset type name.
303 
304  Parameters
305  ----------
306  parser : `ArgumentParser`
307  Argument parser.
308  idName : `str`
309  Data ID.
310 
311  Notes
312  -----
313  Also sets the `name` attribute if it is currently `None`.
314  """
315  help = self.help if self.help else "dataset type for %s" % (idName,)
316  if self.name is None:
317  self.name = "--%s_dstype" % (idName,)
318  requiredDict = dict()
319  if self.name.startswith("-"):
320  requiredDict = dict(required=self.default is None)
321  parser.add_argument(
322  self.name,
323  default=self.default,
324  help=help,
325  **requiredDict) # cannot specify required=None for positional arguments
326 
327 
329  """Dataset type specified by a config parameter.
330 
331  Parameters
332  ----------
333  name : `str`
334  Name of config option whose value is the dataset type.
335  """
336 
337  def __init__(self, name):
338  DynamicDatasetType.__init__(self)
339  self.name = name
340 
341  def getDatasetType(self, namespace):
342  """Return the dataset type as a string, from the appropriate config field
343 
344  Parameters
345  ----------
346  namespace : `argparse.Namespace`
347  Parsed command.
348  """
349  # getattr does not work reliably if the config field name is dotted,
350  # so step through one level at a time
351  keyList = self.name.split(".")
352  value = namespace.config
353  for key in keyList:
354  try:
355  value = getattr(value, key)
356  except KeyError:
357  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
358  return value
359 
360 
361 class ArgumentParser(argparse.ArgumentParser):
362  """Argument parser for command-line tasks that is based on `argparse.ArgumentParser`.
363 
364  Parameters
365  ----------
366  name : `str`
367  Name of top-level task; used to identify camera-specific override files.
368  usage : `str`, optional
369  Command-line usage signature.
370  **kwargs
371  Additional keyword arguments for `argparse.ArgumentParser`.
372 
373  Notes
374  -----
375  Users may wish to add additional arguments before calling `parse_args`.
376  """
377  # I would prefer to check data ID keys and values as they are parsed,
378  # but the required information comes from the butler, so I have to construct a butler
379  # before I do this checking. Constructing a butler is slow, so I only want do it once,
380  # after parsing the command line, so as to catch syntax errors quickly.
381 
382  requireOutput = True
383  """Require an output directory to be specified (`bool`)."""
384 
385  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
386  self._name = name
387  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
388  argparse.ArgumentParser.__init__(self,
389  usage=usage,
390  fromfile_prefix_chars='@',
391  epilog=textwrap.dedent("""Notes:
392  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
393  all values are used, in order left to right
394  * @file reads command-line options from the specified file:
395  * data may be distributed among multiple lines (e.g. one option per line)
396  * data after # is treated as a comment and ignored
397  * blank lines and lines starting with # are ignored
398  * To specify multiple values for an option, do not use = after the option name:
399  * right: --configfile foo bar
400  * wrong: --configfile=foo bar
401  """),
402  formatter_class=argparse.RawDescriptionHelpFormatter,
403  **kwargs)
404  self.add_argument(metavar='input', dest="rawInput",
405  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
406  self.add_argument("--calib", dest="rawCalib",
407  help="path to input calibration repository, relative to $%s" %
408  (DEFAULT_CALIB_NAME,))
409  self.add_argument("--output", dest="rawOutput",
410  help="path to output data repository (need not exist), relative to $%s" %
411  (DEFAULT_OUTPUT_NAME,))
412  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
413  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
414  "optionally sets ROOT to ROOT/rerun/INPUT")
415  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
416  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
417  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
418  help="config override file(s)")
419  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
420  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
421  metavar="LEVEL|COMPONENT=LEVEL")
422  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
423  self.add_argument("--debug", action="store_true", help="enable debugging output?")
424  self.add_argument("--doraise", action="store_true",
425  help="raise an exception on error (else log a message and continue)?")
426  self.add_argument("--noExit", action="store_true",
427  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
428  self.add_argument("--profile", help="Dump cProfile statistics to filename")
429  self.add_argument("--show", nargs="+", default=(),
430  help="display the specified information to stdout and quit "
431  "(unless run is specified).")
432  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
433  self.add_argument("-t", "--timeout", type=float,
434  help="Timeout for multiprocessing; maximum wall time (sec)")
435  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
436  help=("remove and re-create the output directory if it already exists "
437  "(safe with -j, but not all other forms of parallel execution)"))
438  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
439  help=("backup and then overwrite existing config files instead of checking them "
440  "(safe with -j, but not all other forms of parallel execution)"))
441  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
442  help="Don't copy config to file~N backup.")
443  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
444  help=("backup and then overwrite existing package versions instead of checking"
445  "them (safe with -j, but not all other forms of parallel execution)"))
446  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
447  help="don't check package versions; useful for development")
448  lsstLog.configure_prop("""
449 log4j.rootLogger=INFO, A1
450 log4j.appender.A1=ConsoleAppender
451 log4j.appender.A1.Target=System.out
452 log4j.appender.A1.layout=PatternLayout
453 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
454 """)
455 
456  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
457  ContainerClass=DataIdContainer):
458  """Add a data ID argument.
459 
460 
461  Parameters
462  ----------
463  name : `str`
464  Data ID argument (including leading dashes, if wanted).
465  datasetType : `str` or `DynamicDatasetType`-type
466  Type of dataset. Supply a string for a fixed dataset type. For a dynamically determined dataset
467  type, supply a `DynamicDatasetType`, such a `DatasetArgument`.
468  help : `str`
469  Help string for the argument.
470  level : object, optional
471  Level of dataset, for the `~lsst.daf.persistence.Butler`.
472  doMakeDataRefList : bool, optional
473  If `True` (default), construct data references.
474  ContainerClass : class, optional
475  Data ID container class to use to contain results; override the default if you need a special
476  means of computing data references from data IDs
477 
478  Notes
479  -----
480  If ``datasetType`` is an instance of `DatasetArgument`, then add a second argument to specify the
481  dataset type.
482 
483  The associated data is put into ``namespace.<dataIdArgument.name>`` as an instance of ContainerClass;
484  the container includes fields:
485 
486  - ``idList``: a list of data ID dicts.
487  - ``refList``: a list of `~lsst.daf.persistence.Butler` data references (empty if
488  ``doMakeDataRefList`` is `False`).
489  """
490  argName = name.lstrip("-")
491 
492  if argName in self._dataIdArgDict:
493  raise RuntimeError("Data ID argument %s already exists" % (name,))
494  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
495  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
496 
497  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
498  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
499 
500  dataIdArgument = DataIdArgument(
501  name=argName,
502  datasetType=datasetType,
503  level=level,
504  doMakeDataRefList=doMakeDataRefList,
505  ContainerClass=ContainerClass,
506  )
507 
508  if dataIdArgument.isDynamicDatasetType:
509  datasetType.addArgument(parser=self, idName=argName)
510 
511  self._dataIdArgDict[argName] = dataIdArgument
512 
513  def parse_args(self, config, args=None, log=None, override=None):
514  """Parse arguments for a command-line task.
515 
516  Parameters
517  ----------
518  config : `lsst.pex.config.Config`
519  Config for the task being run.
520  args : `list`, optional
521  Argument list; if `None` then ``sys.argv[1:]`` is used.
522  log : `lsst.log.Log`, optional
523  `~lsst.log.Log` instance; if `None` use the default log.
524  override : callable, optional
525  A config override function. It must take the root config object as its only argument and must
526  modify the config in place. This function is called after camera-specific overrides files are
527  applied, and before command-line config overrides are applied (thus allowing the user the final
528  word).
529 
530  Returns
531  -------
532  namespace : `argparse.Namespace`
533  A `~argparse.Namespace` instance containing fields:
534 
535  - ``camera``: camera name.
536  - ``config``: the supplied config with all overrides applied, validated and frozen.
537  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
538  - An entry for each of the data ID arguments registered by `add_id_argument`,
539  the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements
540  ``idList`` and ``refList``.
541  - ``log``: a `lsst.log` Log.
542  - An entry for each command-line argument, with the following exceptions:
543  - config is the supplied config, suitably updated.
544  - configfile, id and loglevel are all missing.
545  - ``obsPkg``: name of the ``obs_`` package for this camera.
546  """
547  if args is None:
548  args = sys.argv[1:]
549 
550  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
551  self.print_help()
552  if len(args) == 1 and args[0] in ("-h", "--help"):
553  self.exit()
554  else:
555  self.exit("%s: error: Must specify input as first argument" % self.prog)
556 
557  # Note that --rerun may change namespace.input, but if it does we verify that the
558  # new input has the same mapper class.
559  namespace = argparse.Namespace()
560  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
561  if not os.path.isdir(namespace.input):
562  self.error("Error: input=%r not found" % (namespace.input,))
563 
564  namespace.config = config
565  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
566  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
567  namespace.camera = mapperClass.getCameraName()
568  namespace.obsPkg = mapperClass.getPackageName()
569 
570  self.handleCamera(namespace)
571 
572  self._applyInitialOverrides(namespace)
573  if override is not None:
574  override(namespace.config)
575 
576  # Add data ID containers to namespace
577  for dataIdArgument in self._dataIdArgDict.values():
578  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
579 
580  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
581  del namespace.configfile
582 
583  self._parseDirectories(namespace)
584 
585  if namespace.clobberOutput:
586  if namespace.output is None:
587  self.error("--clobber-output is only valid with --output or --rerun")
588  elif namespace.output == namespace.input:
589  self.error("--clobber-output is not valid when the output and input repos are the same")
590  if os.path.exists(namespace.output):
591  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
592  shutil.rmtree(namespace.output)
593 
594  namespace.log.debug("input=%s", namespace.input)
595  namespace.log.debug("calib=%s", namespace.calib)
596  namespace.log.debug("output=%s", namespace.output)
597 
598  obeyShowArgument(namespace.show, namespace.config, exit=False)
599 
600  # No environment variable or --output or --rerun specified.
601  if self.requireOutput and namespace.output is None and namespace.rerun is None:
602  self.error("no output directory specified.\n"
603  "An output directory must be specified with the --output or --rerun\n"
604  "command-line arguments.\n")
605 
606  butlerArgs = {} # common arguments for butler elements
607  if namespace.calib:
608  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
609  if namespace.output:
610  outputs = {'root': namespace.output, 'mode': 'rw'}
611  inputs = {'root': namespace.input}
612  inputs.update(butlerArgs)
613  outputs.update(butlerArgs)
614  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
615  else:
616  outputs = {'root': namespace.input, 'mode': 'rw'}
617  outputs.update(butlerArgs)
618  namespace.butler = dafPersist.Butler(outputs=outputs)
619 
620  # convert data in each of the identifier lists to proper types
621  # this is done after constructing the butler, hence after parsing the command line,
622  # because it takes a long time to construct a butler
623  self._processDataIds(namespace)
624  if "data" in namespace.show:
625  for dataIdName in self._dataIdArgDict.keys():
626  for dataRef in getattr(namespace, dataIdName).refList:
627  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
628 
629  if namespace.show and "run" not in namespace.show:
630  sys.exit(0)
631 
632  if namespace.debug:
633  try:
634  import debug
635  assert debug # silence pyflakes
636  except ImportError:
637  sys.stderr.write("Warning: no 'debug' module found\n")
638  namespace.debug = False
639 
640  del namespace.loglevel
641 
642  if namespace.longlog:
643  lsstLog.configure_prop("""
644 log4j.rootLogger=INFO, A1
645 log4j.appender.A1=ConsoleAppender
646 log4j.appender.A1.Target=System.out
647 log4j.appender.A1.layout=PatternLayout
648 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
649 """)
650  del namespace.longlog
651 
652  namespace.config.validate()
653  namespace.config.freeze()
654 
655  return namespace
656 
657  def _parseDirectories(self, namespace):
658  """Parse input, output and calib directories
659 
660  This allows for hacking the directories, e.g., to include a "rerun".
661  Modifications are made to the 'namespace' object in-place.
662  """
663  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
664  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
665 
666  # If an output directory is specified, process it and assign it to the namespace
667  if namespace.rawOutput:
668  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
669  else:
670  namespace.output = None
671 
672  # This section processes the rerun argument, if rerun is specified as a colon separated
673  # value, it will be parsed as an input and output. The input value will be overridden if
674  # previously specified (but a check is made to make sure both inputs use the same mapper)
675  if namespace.rawRerun:
676  if namespace.output:
677  self.error("Error: cannot specify both --output and --rerun")
678  namespace.rerun = namespace.rawRerun.split(":")
679  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
680  modifiedInput = False
681  if len(rerunDir) == 2:
682  namespace.input, namespace.output = rerunDir
683  modifiedInput = True
684  elif len(rerunDir) == 1:
685  namespace.output = rerunDir[0]
686  if os.path.exists(os.path.join(namespace.output, "_parent")):
687  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
688  modifiedInput = True
689  else:
690  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
691  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
692  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
693  else:
694  namespace.rerun = None
695  del namespace.rawInput
696  del namespace.rawCalib
697  del namespace.rawOutput
698  del namespace.rawRerun
699 
700  def _processDataIds(self, namespace):
701  """Process the parsed data for each data ID argument in a `~argparse.Namespace`.
702 
703  Processing includes:
704 
705  - Validate data ID keys.
706  - Cast the data ID values to the correct type.
707  - Compute data references from data IDs.
708 
709  Parameters
710  ----------
711  namespace : parsed namespace (an argparse.Namespace);
712  Parsed namespace. These attributes are read:
713 
714  - ``butler``
715  - ``log``
716  - ``config``, if any dynamic dataset types are set by a config parameter.
717  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic dataset types are specified by such
718  and modifies these attributes:
719  - ``<name>`` for each data ID argument registered using `add_id_argument`.
720  """
721  for dataIdArgument in self._dataIdArgDict.values():
722  dataIdContainer = getattr(namespace, dataIdArgument.name)
723  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
724  if dataIdArgument.doMakeDataRefList:
725  try:
726  dataIdContainer.castDataIds(butler=namespace.butler)
727  except (KeyError, TypeError) as e:
728  # failure of castDataIds indicates invalid command args
729  self.error(e)
730 
731  # failure of makeDataRefList indicates a bug that wants a traceback
732  dataIdContainer.makeDataRefList(namespace)
733 
734  def _applyInitialOverrides(self, namespace):
735  """Apply obs-package-specific and camera-specific config override files, if found
736 
737  Parameters
738  ----------
739  namespace : `argparse.Namespace`
740  Parsed namespace. These attributes are read:
741 
742  - ``obsPkg``
743 
744  Look in the package namespace.obsPkg for files:
745 
746  - ``config/<task_name>.py``
747  - ``config/<camera_name>/<task_name>.py`` and load if found.
748  """
749  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
750  fileName = self._name + ".py"
751  for filePath in (
752  os.path.join(obsPkgDir, "config", fileName),
753  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
754  ):
755  if os.path.exists(filePath):
756  namespace.log.info("Loading config overrride file %r", filePath)
757  namespace.config.load(filePath)
758  else:
759  namespace.log.debug("Config override file does not exist: %r", filePath)
760 
761  def handleCamera(self, namespace):
762  """Perform camera-specific operations before parsing the command-line.
763 
764  Parameters
765  ----------
766  namespace : `argparse.Namespace`
767  Namespace (an ) with the following fields:
768 
769  - ``camera``: the camera name.
770  - ``config``: the config passed to parse_args, with no overrides applied.
771  - ``obsPkg``: the ``obs_`` package for this camera.
772  - ``log``: a `lsst.log` Log.
773 
774  Notes
775  -----
776  The default implementation does nothing.
777  """
778  pass
779 
780  def convert_arg_line_to_args(self, arg_line):
781  """Allow files of arguments referenced by ``@<path>`` to contain multiple values on each line.
782 
783  Parameters
784  ----------
785  arg_line : `str`
786  Line of text read from an argument file.
787  """
788  arg_line = arg_line.strip()
789  if not arg_line or arg_line.startswith("#"):
790  return
791  for arg in shlex.split(arg_line, comments=True, posix=True):
792  if not arg.strip():
793  continue
794  yield arg
795 
796 
798  """`ArgumentParser` for command-line tasks that don't write any output.
799  """
800 
801  requireOutput = False # We're not going to write anything
802 
803 
804 def getTaskDict(config, taskDict=None, baseName=""):
805  """Get a dictionary of task info for all subtasks in a config
806 
807  Parameters
808  ----------
809  config : `lsst.pex.config.Config`
810  Configuration to process.
811  taskDict : `dict`, optional
812  Users should not specify this argument. Supports recursion; if provided, taskDict is updated in
813  place, else a new `dict` is started).
814  baseName : `str`, optional
815  Users should not specify this argument. It is only used for recursion: if a non-empty string then a
816  period is appended and the result is used as a prefix for additional entries in taskDict; otherwise
817  no prefix is used.
818 
819  Returns
820  -------
821  taskDict : `dict`
822  Keys are config field names, values are task names.
823 
824  Notes
825  -----
826  This function is designed to be called recursively. The user should call with only a config
827  (leaving taskDict and baseName at their default values).
828  """
829  if taskDict is None:
830  taskDict = dict()
831  for fieldName, field in config.items():
832  if hasattr(field, "value") and hasattr(field, "target"):
833  subConfig = field.value
834  if isinstance(subConfig, pexConfig.Config):
835  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
836  try:
837  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
838  except Exception:
839  taskName = repr(field.target)
840  taskDict[subBaseName] = taskName
841  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
842  return taskDict
843 
844 
845 def obeyShowArgument(showOpts, config=None, exit=False):
846  """Process arguments specified with ``--show`` (but ignores ``"data"``).
847 
848  Parameters
849  ----------
850  showOpts : `list` of `str`
851  List of options passed to ``--show``.
852  config : optional
853  The provided config.
854  exit : bool, optional
855  Exit if ``"run"`` isn't included in ``showOpts``.
856 
857  Parameters
858  ----------
859  Supports the following options in showOpts:
860 
861  - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern.
862  - ``history=PAT``. Show where the config entries that match the glob pattern were set.
863  - ``tasks``. Show task hierarchy.
864  - ``data``. Ignored; to be processed by caller.
865  - ``run``. Keep going (the default behaviour is to exit if --show is specified).
866 
867  Calls ``sys.exit(1)`` if any other option found.
868  """
869  if not showOpts:
870  return
871 
872  for what in showOpts:
873  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
874 
875  if showCommand == "config":
876  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
877  pattern = matConfig.group(1)
878  if pattern:
879  class FilteredStream(object):
880  """A file object that only prints lines that match the glob "pattern"
881 
882  N.b. Newlines are silently discarded and reinserted; crude but effective.
883  """
884 
885  def __init__(self, pattern):
886  # obey case if pattern isn't lowecase or requests NOIGNORECASE
887  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
888 
889  if mat:
890  pattern = mat.group(1)
891  self._pattern = re.compile(fnmatch.translate(pattern))
892  else:
893  if pattern != pattern.lower():
894  print(u"Matching \"%s\" without regard to case "
895  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
896  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
897 
898  def write(self, showStr):
899  showStr = showStr.rstrip()
900  # Strip off doc string line(s) and cut off at "=" for string matching
901  matchStr = showStr.split("\n")[-1].split("=")[0]
902  if self._pattern.search(matchStr):
903  print(u"\n" + showStr)
904 
905  fd = FilteredStream(pattern)
906  else:
907  fd = sys.stdout
908 
909  config.saveToStream(fd, "config")
910  elif showCommand == "history":
911  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
912  pattern = matHistory.group(1)
913  if not pattern:
914  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
915  sys.exit(1)
916 
917  pattern = pattern.split(".")
918  cpath, cname = pattern[:-1], pattern[-1]
919  hconfig = config # the config that we're interested in
920  for i, cpt in enumerate(cpath):
921  try:
922  hconfig = getattr(hconfig, cpt)
923  except AttributeError:
924  print("Error: configuration %s has no subconfig %s" %
925  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
926 
927  sys.exit(1)
928 
929  try:
930  print(pexConfig.history.format(hconfig, cname))
931  except KeyError:
932  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
933  sys.exit(1)
934 
935  elif showCommand == "data":
936  pass
937  elif showCommand == "run":
938  pass
939  elif showCommand == "tasks":
940  showTaskHierarchy(config)
941  else:
942  print(u"Unknown value for show: %s (choose from '%s')" %
943  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
944  sys.exit(1)
945 
946  if exit and "run" not in showOpts:
947  sys.exit(0)
948 
949 
950 def showTaskHierarchy(config):
951  """Print task hierarchy to stdout.
952 
953  Parameters
954  ----------
955  config : `lsst.pex.config.Config`
956  Configuration to process.
957  """
958  print(u"Subtasks:")
959  taskDict = getTaskDict(config=config)
960 
961  fieldNameList = sorted(taskDict.keys())
962  for fieldName in fieldNameList:
963  taskName = taskDict[fieldName]
964  print(u"%s: %s" % (fieldName, taskName))
965 
966 
967 class ConfigValueAction(argparse.Action):
968  """argparse action callback to override config parameters using name=value pairs from the command-line.
969  """
970 
971  def __call__(self, parser, namespace, values, option_string):
972  """Override one or more config name value pairs.
973 
974  Parameters
975  ----------
976  parser : `argparse.ArgumentParser`
977  Argument parser.
978  namespace : `argparse.Namespace`
979  Parsed command. The ``namespace.config`` attribute is updated.
980  values : `list`
981  A list of ``configItemName=value`` pairs.
982  option_string : `str`
983  Option value specified by the user.
984  """
985  if namespace.config is None:
986  return
987  for nameValue in values:
988  name, sep, valueStr = nameValue.partition("=")
989  if not valueStr:
990  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
991 
992  # see if setting the string value works; if not, try eval
993  try:
994  setDottedAttr(namespace.config, name, valueStr)
995  except AttributeError:
996  parser.error("no config field: %s" % (name,))
997  except Exception:
998  try:
999  value = eval(valueStr, {})
1000  except Exception:
1001  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1002  try:
1003  setDottedAttr(namespace.config, name, value)
1004  except Exception as e:
1005  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1006 
1007 
1008 class ConfigFileAction(argparse.Action):
1009  """argparse action to load config overrides from one or more files.
1010  """
1011 
1012  def __call__(self, parser, namespace, values, option_string=None):
1013  """Load one or more files of config overrides.
1014 
1015  Parameters
1016  ----------
1017  parser : `argparse.ArgumentParser`
1018  Argument parser.
1019  namespace : `argparse.Namespace`
1020  Parsed command. The following attributes are updated by this method: ``namespace.config``.
1021  values : `list`
1022  A list of data config file paths.
1023  option_string : `str`, optional
1024  Option value specified by the user.
1025  """
1026  if namespace.config is None:
1027  return
1028  for configfile in values:
1029  try:
1030  namespace.config.load(configfile)
1031  except Exception as e:
1032  parser.error("cannot load config file %r: %s" % (configfile, e))
1033 
1034 
1035 class IdValueAction(argparse.Action):
1036  """argparse action callback to process a data ID into a dict.
1037  """
1038 
1039  def __call__(self, parser, namespace, values, option_string):
1040  """Parse ``--id`` data and append results to ``namespace.<argument>.idList``.
1041 
1042  Parameters
1043  ----------
1044  parser : `ArgumentParser`
1045  Argument parser.
1046  namespace : `argparse.Namespace`
1047  Parsed command (an instance of argparse.Namespace). The following attributes are updated:
1048 
1049  - ``<idName>.idList``, where ``<idName>`` is the name of the ID argument, for instance ``"id"``
1050  for ID argument ``--id``.
1051  values : `list`
1052  A list of data IDs; see Notes below.
1053  option_string : `str`
1054  Option value specified by the user.
1055 
1056  Notes
1057  -----
1058  The data format is::
1059 
1060  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
1061 
1062  The values (e.g. ``value1_1``) may either be a string, or of the form ``"int..int"``
1063  (e.g. ``"1..3"``) which is interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1064  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``. You may also specify a stride:
1065  ``"1..5:2"`` is ``"1^3^5"``.
1066 
1067  The cross product is computed for keys with multiple values. For example::
1068 
1069  --id visit 1^2 ccd 1,1^2,2
1070 
1071  results in the following data ID dicts being appended to ``namespace.<argument>.idList``:
1072 
1073  {"visit":1, "ccd":"1,1"}
1074  {"visit":2, "ccd":"1,1"}
1075  {"visit":1, "ccd":"2,2"}
1076  {"visit":2, "ccd":"2,2"}
1077  """
1078  if namespace.config is None:
1079  return
1080  idDict = collections.OrderedDict()
1081  for nameValue in values:
1082  name, sep, valueStr = nameValue.partition("=")
1083  if name in idDict:
1084  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1085  idDict[name] = []
1086  for v in valueStr.split("^"):
1087  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1088  if mat:
1089  v1 = int(mat.group(1))
1090  v2 = int(mat.group(2))
1091  v3 = mat.group(3)
1092  v3 = int(v3) if v3 else 1
1093  for v in range(v1, v2 + 1, v3):
1094  idDict[name].append(str(v))
1095  else:
1096  idDict[name].append(v)
1097 
1098  iterList = [idDict[key] for key in idDict.keys()]
1099  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1100  for valList in itertools.product(*iterList)]
1101 
1102  argName = option_string.lstrip("-")
1103  ident = getattr(namespace, argName)
1104  ident.idList += idDictList
1105 
1106 
1107 class LogLevelAction(argparse.Action):
1108  """argparse action to set log level.
1109  """
1110 
1111  def __call__(self, parser, namespace, values, option_string):
1112  """Set trace level.
1113 
1114  Parameters
1115  ----------
1116  parser : `ArgumentParser`
1117  Argument parser.
1118  namespace : `argparse.Namespace`
1119  Parsed command. This argument is not used.
1120  values : `list`
1121  List of trace levels; each item must be of the form ``component_name=level`` or ``level``,
1122  where ``level`` is a keyword (not case sensitive) or an integer.
1123  option_string : `str`
1124  Option value specified by the user.
1125  """
1126  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1127  permittedLevelSet = set(permittedLevelList)
1128  for componentLevel in values:
1129  component, sep, levelStr = componentLevel.partition("=")
1130  if not levelStr:
1131  levelStr, component = component, None
1132  logLevelUpr = levelStr.upper()
1133  if logLevelUpr in permittedLevelSet:
1134  logLevel = getattr(lsstLog.Log, logLevelUpr)
1135  else:
1136  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1137  if component is None:
1138  namespace.log.setLevel(logLevel)
1139  else:
1140  lsstLog.Log.getLogger(component).setLevel(logLevel)
1141 
1142 
1143 def setDottedAttr(item, name, value):
1144  """Set an instance attribute (like `setattr` but accepting hierarchical names such as ``foo.bar.baz``).
1145 
1146  Parameters
1147  ----------
1148  item : obj
1149  Object whose attribute is to be set.
1150  name : `str`
1151  Name of attribute to set.
1152  value : obj
1153  New value for the attribute.
1154 
1155  Notes
1156  -----
1157  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz`` is set to the specified value.
1158  """
1159  subitem = item
1160  subnameList = name.split(".")
1161  for subname in subnameList[:-1]:
1162  subitem = getattr(subitem, subname)
1163  setattr(subitem, subnameList[-1], value)
1164 
1165 
1166 def getDottedAttr(item, name):
1167  """Get an attribute (like `getattr` but accepts hierarchical names such as ``foo.bar.baz``).
1168 
1169  Parameters
1170  ----------
1171  item : obj
1172  Object whose attribute is to be returned.
1173  name : `str`
1174  Name of the attribute to get.
1175 
1176  Returns
1177  -------
1178  itemAttr : obj
1179  If name is ``foo.bar.baz then the return value is ``item.foo.bar.baz``.
1180  """
1181  subitem = item
1182  for subname in name.split("."):
1183  subitem = getattr(subitem, subname)
1184  return subitem
1185 
1186 
1187 def dataExists(butler, datasetType, dataRef):
1188  """Determine if data exists at the current level or any data exists at a deeper level.
1189 
1190  Parameters
1191  ----------
1192  butler : `lsst.daf.persistence.Butler`
1193  The Butler.
1194  datasetType : `str`
1195  Dataset type.
1196  dataRef : `lsst.daf.persistence.ButlerDataRef`
1197  Butler data reference.
1198 
1199  Returns
1200  -------
1201  exists : `bool`
1202  Return value is `True` if data exists, `False` otherwise.
1203  """
1204  subDRList = dataRef.subItems()
1205  if subDRList:
1206  for subDR in subDRList:
1207  if dataExists(butler, datasetType, subDR):
1208  return True
1209  return False
1210  else:
1211  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def dataExists(butler, datasetType, dataRef)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)