lsst.pipe.base  16.0-11-g9fe0e56+13
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
24 
25 import abc
26 import argparse
27 import collections
28 import fnmatch
29 import itertools
30 import os
31 import re
32 import shlex
33 import sys
34 import shutil
35 import textwrap
36 
37 import lsst.utils
38 import lsst.pex.config as pexConfig
40 import lsst.log as lsstLog
41 import lsst.daf.persistence as dafPersist
42 
43 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
44 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
45 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
46 
47 
48 def _fixPath(defName, path):
49  """Apply environment variable as default root, if present, and abspath.
50 
51  Parameters
52  ----------
53  defName : `str`
54  Name of environment variable containing default root path;
55  if the environment variable does not exist
56  then the path is relative to the current working directory
57  path : `str`
58  Path relative to default root path.
59 
60  Returns
61  -------
62  abspath : `str`
63  Path that has been expanded, or `None` if the environment variable
64  does not exist and path is `None`.
65  """
66  defRoot = os.environ.get(defName)
67  if defRoot is None:
68  if path is None:
69  return None
70  return os.path.abspath(path)
71  return os.path.abspath(os.path.join(defRoot, path or ""))
72 
73 
75  """Container for data IDs and associated data references.
76 
77  Parameters
78  ----------
79  level : `str`
80  The lowest hierarchy level to descend to for this dataset type,
81  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
82  Use `""` to use the mapper's default for the dataset type.
83  This class does not support `None`, but if it did, `None`
84  would mean the level should not be restricted.
85 
86  Notes
87  -----
88  Override this class for data IDs that require special handling to be
89  converted to ``data references``, and specify the override class
90  as ``ContainerClass`` for ``add_id_argument``.
91 
92  If you don't want the argument parser to compute data references,
93  specify ``doMakeDataRefList=False`` in ``add_id_argument``.
94  """
95 
96  def __init__(self, level=None):
97  self.datasetType = None
98  """Dataset type of the data references (`str`).
99  """
100  self.level = level
101  """See parameter ``level`` (`str`).
102  """
103  self.idList = []
104  """List of data IDs specified on the command line for the
105  appropriate data ID argument (`list` of `dict`).
106  """
107  self.refList = []
108  """List of data references for the data IDs in ``idList``
109  (`list` of `lsst.daf.persistence.ButlerDataRef`).
110  Elements will be omitted if the corresponding data is not found.
111  The list will be empty when returned by ``parse_args`` if
112  ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
113  """
114 
115  def setDatasetType(self, datasetType):
116  """Set actual dataset type, once it is known.
117 
118  Parameters
119  ----------
120  datasetType : `str`
121  Dataset type.
122 
123  Notes
124  -----
125  The reason ``datasetType`` is not a constructor argument is that
126  some subclasses do not know the dataset type until the command
127  is parsed. Thus, to reduce special cases in the code,
128  ``datasetType`` is always set after the command is parsed.
129  """
130  self.datasetType = datasetType
131 
132  def castDataIds(self, butler):
133  """Validate data IDs and cast them to the correct type
134  (modify idList in place).
135 
136  This code casts the values in the data IDs dicts in `dataIdList`
137  to the type required by the butler. Data IDs are read from the
138  command line as `str`, but the butler requires some values to be
139  other types. For example "visit" values should be `int`.
140 
141  Parameters
142  ----------
143  butler : `lsst.daf.persistence.Butler`
144  Data butler.
145  """
146  if self.datasetType is None:
147  raise RuntimeError("Must call setDatasetType first")
148  try:
149  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
150  except KeyError as e:
151  msg = "Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level)
152  raise KeyError(msg) from e
153 
154  for dataDict in self.idList:
155  for key, strVal in dataDict.items():
156  try:
157  keyType = idKeyTypeDict[key]
158  except KeyError:
159  # OK, assume that it's a valid key and guess that it's a string
160  keyType = str
161 
162  log = lsstLog.Log.getDefaultLogger()
163  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
164  (key, 'str' if keyType == str else keyType))
165  idKeyTypeDict[key] = keyType
166 
167  if keyType != str:
168  try:
169  castVal = keyType(strVal)
170  except Exception:
171  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
172  dataDict[key] = castVal
173 
174  def makeDataRefList(self, namespace):
175  """Compute refList based on idList.
176 
177  Parameters
178  ----------
179  namespace : `argparse.Namespace`
180  Results of parsing command-line. The ``butler`` and ``log``
181  elements must be set.
182 
183  Notes
184  -----
185  Not called if ``add_id_argument`` was called with
186  ``doMakeDataRefList=False``.
187  """
188  if self.datasetType is None:
189  raise RuntimeError("Must call setDatasetType first")
190  butler = namespace.butler
191  for dataId in self.idList:
192  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
193  level=self.level, dataId=dataId)
194  if not refList:
195  namespace.log.warn("No data found for dataId=%s", dataId)
196  continue
197  self.refList += refList
198 
199 
201  """data ID argument, used by `ArgumentParser.add_id_argument`.
202 
203  Parameters
204  ----------
205  name : `str`
206  Name of identifier (argument name without dashes).
207  datasetType : `str`
208  Type of dataset; specify a string for a fixed dataset type
209  or a `DatasetArgument` for a dynamic dataset type (e.g.
210  one specified by a command-line argument).
211  level : `str`
212  The lowest hierarchy level to descend to for this dataset type,
213  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
214  Use `""` to use the mapper's default for the dataset type.
215  Some container classes may also support `None`, which means
216  the level should not be restricted; however the default class,
217  `DataIdContainer`, does not support `None`.
218  doMakeDataRefList : `bool`, optional
219  If `True` (default), construct data references.
220  ContainerClass : `class`, optional
221  Class to contain data IDs and data references; the default class
222  `DataIdContainer` will work for many, but not all, cases.
223  For example if the dataset type is specified on the command line
224  then use `DynamicDatasetType`.
225  """
226 
227  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
228  if name.startswith("-"):
229  raise RuntimeError("Name %s must not start with -" % (name,))
230  self.name = name
231  self.datasetType = datasetType
232  self.level = level
233  self.doMakeDataRefList = bool(doMakeDataRefList)
234  self.ContainerClass = ContainerClass
235  self.argName = name.lstrip("-")
236 
237  @property
239  """`True` if the dataset type is dynamic (that is, specified
240  on the command line).
241  """
242  return isinstance(self.datasetType, DynamicDatasetType)
243 
244  def getDatasetType(self, namespace):
245  """Get the dataset type as a string.
246 
247  Parameters
248  ----------
249  namespace
250  Parsed command.
251 
252  Returns
253  -------
254  datasetType : `str`
255  Dataset type.
256  """
257  if self.isDynamicDatasetType:
258  return self.datasetType.getDatasetType(namespace)
259  else:
260  return self.datasetType
261 
262 
263 class DynamicDatasetType(metaclass=abc.ABCMeta):
264  """Abstract base class for a dataset type determined from parsed
265  command-line arguments.
266  """
267 
268  def addArgument(self, parser, idName):
269  """Add a command-line argument to specify dataset type name,
270  if wanted.
271 
272  Parameters
273  ----------
274  parser : `ArgumentParser`
275  Argument parser to add the argument to.
276  idName : `str`
277  Name of data ID argument, without the leading ``"--"``,
278  e.g. ``"id"``.
279 
280  Notes
281  -----
282  The default implementation does nothing
283  """
284  pass
285 
286  @abc.abstractmethod
287  def getDatasetType(self, namespace):
288  """Get the dataset type as a string, based on parsed command-line
289  arguments.
290 
291  Returns
292  -------
293  datasetType : `str`
294  Dataset type.
295  """
296  raise NotImplementedError("Subclasses must override")
297 
298 
300  """Dataset type specified by a command-line argument.
301 
302  Parameters
303  ----------
304  name : `str`, optional
305  Name of command-line argument (including leading "--",
306  if appropriate) whose value is the dataset type.
307  If `None`, uses ``--idName_dstype`` where idName
308  is the name of the data ID argument (e.g. "id").
309  help : `str`, optional
310  Help string for the command-line argument.
311  default : `object`, optional
312  Default value. If `None`, then the command-line option is required.
313  This argument isignored if the command-line argument is positional
314  (name does not start with "-") because positional arguments do
315  not support default values.
316  """
317 
318  def __init__(self,
319  name=None,
320  help="dataset type to process from input data repository",
321  default=None,
322  ):
323  DynamicDatasetType.__init__(self)
324  self.name = name
325  self.help = help
326  self.default = default
327 
328  def getDatasetType(self, namespace):
329  """Get the dataset type as a string, from the appropriate
330  command-line argument.
331 
332  Parameters
333  ----------
334  namespace :
335  Parsed command.
336 
337  Returns
338  -------
339  datasetType : `str`
340  Dataset type.
341  """
342  argName = self.name.lstrip("-")
343  return getattr(namespace, argName)
344 
345  def addArgument(self, parser, idName):
346  """Add a command-line argument to specify the dataset type name.
347 
348  Parameters
349  ----------
350  parser : `ArgumentParser`
351  Argument parser.
352  idName : `str`
353  Data ID.
354 
355  Notes
356  -----
357  Also sets the `name` attribute if it is currently `None`.
358  """
359  help = self.help if self.help else "dataset type for %s" % (idName,)
360  if self.name is None:
361  self.name = "--%s_dstype" % (idName,)
362  requiredDict = dict()
363  if self.name.startswith("-"):
364  requiredDict = dict(required=self.default is None)
365  parser.add_argument(
366  self.name,
367  default=self.default,
368  help=help,
369  **requiredDict)
370 
371 
373  """Dataset type specified by a config parameter.
374 
375  Parameters
376  ----------
377  name : `str`
378  Name of config option whose value is the dataset type.
379  """
380 
381  def __init__(self, name):
382  DynamicDatasetType.__init__(self)
383  self.name = name
384 
385  def getDatasetType(self, namespace):
386  """Return the dataset type as a string, from the appropriate
387  config field.
388 
389  Parameters
390  ----------
391  namespace : `argparse.Namespace`
392  Parsed command.
393  """
394  # getattr does not work reliably if the config field name is
395  # dotted, so step through one level at a time
396  keyList = self.name.split(".")
397  value = namespace.config
398  for key in keyList:
399  try:
400  value = getattr(value, key)
401  except KeyError:
402  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
403  return value
404 
405 
406 class ArgumentParser(argparse.ArgumentParser):
407  """Argument parser for command-line tasks that is based on
408  `argparse.ArgumentParser`.
409 
410  Parameters
411  ----------
412  name : `str`
413  Name of top-level task; used to identify camera-specific override
414  files.
415  usage : `str`, optional
416  Command-line usage signature.
417  **kwargs
418  Additional keyword arguments for `argparse.ArgumentParser`.
419 
420  Notes
421  -----
422  Users may wish to add additional arguments before calling `parse_args`.
423  """
424  # I would prefer to check data ID keys and values as they are parsed,
425  # but the required information comes from the butler, so I have to
426  # construct a butler before I do this checking. Constructing a butler
427  # is slow, so I only want do it once, after parsing the command line,
428  # so as to catch syntax errors quickly.
429 
430  requireOutput = True
431  """Require an output directory to be specified (`bool`)."""
432 
433  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
434  self._name = name
435  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
436  argparse.ArgumentParser.__init__(self,
437  usage=usage,
438  fromfile_prefix_chars='@',
439  epilog=textwrap.dedent("""Notes:
440  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
441  all values are used, in order left to right
442  * @file reads command-line options from the specified file:
443  * data may be distributed among multiple lines (e.g. one option per line)
444  * data after # is treated as a comment and ignored
445  * blank lines and lines starting with # are ignored
446  * To specify multiple values for an option, do not use = after the option name:
447  * right: --configfile foo bar
448  * wrong: --configfile=foo bar
449  """),
450  formatter_class=argparse.RawDescriptionHelpFormatter,
451  **kwargs)
452  self.add_argument(metavar='input', dest="rawInput",
453  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
454  self.add_argument("--calib", dest="rawCalib",
455  help="path to input calibration repository, relative to $%s" %
456  (DEFAULT_CALIB_NAME,))
457  self.add_argument("--output", dest="rawOutput",
458  help="path to output data repository (need not exist), relative to $%s" %
459  (DEFAULT_OUTPUT_NAME,))
460  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
461  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
462  "optionally sets ROOT to ROOT/rerun/INPUT")
463  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
464  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
465  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
466  help="config override file(s)")
467  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
468  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
469  metavar="LEVEL|COMPONENT=LEVEL")
470  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
471  self.add_argument("--debug", action="store_true", help="enable debugging output?")
472  self.add_argument("--doraise", action="store_true",
473  help="raise an exception on error (else log a message and continue)?")
474  self.add_argument("--noExit", action="store_true",
475  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
476  self.add_argument("--profile", help="Dump cProfile statistics to filename")
477  self.add_argument("--show", nargs="+", default=(),
478  help="display the specified information to stdout and quit "
479  "(unless run is specified).")
480  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
481  self.add_argument("-t", "--timeout", type=float,
482  help="Timeout for multiprocessing; maximum wall time (sec)")
483  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
484  help=("remove and re-create the output directory if it already exists "
485  "(safe with -j, but not all other forms of parallel execution)"))
486  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
487  help=("backup and then overwrite existing config files instead of checking them "
488  "(safe with -j, but not all other forms of parallel execution)"))
489  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
490  help="Don't copy config to file~N backup.")
491  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
492  help=("backup and then overwrite existing package versions instead of checking"
493  "them (safe with -j, but not all other forms of parallel execution)"))
494  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
495  help="don't check package versions; useful for development")
496  lsstLog.configure_prop("""
497 log4j.rootLogger=INFO, A1
498 log4j.appender.A1=ConsoleAppender
499 log4j.appender.A1.Target=System.out
500 log4j.appender.A1.layout=PatternLayout
501 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
502 """)
503 
504  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
505  ContainerClass=DataIdContainer):
506  """Add a data ID argument.
507 
508 
509  Parameters
510  ----------
511  name : `str`
512  Data ID argument (including leading dashes, if wanted).
513  datasetType : `str` or `DynamicDatasetType`-type
514  Type of dataset. Supply a string for a fixed dataset type.
515  For a dynamically determined dataset type, supply
516  a `DynamicDatasetType`, such a `DatasetArgument`.
517  help : `str`
518  Help string for the argument.
519  level : `str`
520  The lowest hierarchy level to descend to for this dataset type,
521  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
522  Use `""` to use the mapper's default for the dataset type.
523  Some container classes may also support `None`, which means
524  the level should not be restricted; however the default class,
525  `DataIdContainer`, does not support `None`.
526  doMakeDataRefList : bool, optional
527  If `True` (default), construct data references.
528  ContainerClass : `class`, optional
529  Class to contain data IDs and data references; the default class
530  `DataIdContainer` will work for many, but not all, cases.
531  For example if the dataset type is specified on the command line
532  then use `DynamicDatasetType`.
533 
534  Notes
535  -----
536  If ``datasetType`` is an instance of `DatasetArgument`,
537  then add a second argument to specify the dataset type.
538 
539  The associated data is put into ``namespace.<dataIdArgument.name>``
540  as an instance of `ContainerClass`; the container includes fields:
541 
542  - ``idList``: a list of data ID dicts.
543  - ``refList``: a list of `~lsst.daf.persistence.Butler`
544  data references (empty if ``doMakeDataRefList`` is `False`).
545  """
546  argName = name.lstrip("-")
547 
548  if argName in self._dataIdArgDict:
549  raise RuntimeError("Data ID argument %s already exists" % (name,))
550  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
551  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
552 
553  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
554  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
555 
556  dataIdArgument = DataIdArgument(
557  name=argName,
558  datasetType=datasetType,
559  level=level,
560  doMakeDataRefList=doMakeDataRefList,
561  ContainerClass=ContainerClass,
562  )
563 
564  if dataIdArgument.isDynamicDatasetType:
565  datasetType.addArgument(parser=self, idName=argName)
566 
567  self._dataIdArgDict[argName] = dataIdArgument
568 
569  def parse_args(self, config, args=None, log=None, override=None):
570  """Parse arguments for a command-line task.
571 
572  Parameters
573  ----------
574  config : `lsst.pex.config.Config`
575  Config for the task being run.
576  args : `list`, optional
577  Argument list; if `None` then ``sys.argv[1:]`` is used.
578  log : `lsst.log.Log`, optional
579  `~lsst.log.Log` instance; if `None` use the default log.
580  override : callable, optional
581  A config override function. It must take the root config object
582  as its only argument and must modify the config in place.
583  This function is called after camera-specific overrides files
584  are applied, and before command-line config overrides
585  are applied (thus allowing the user the final word).
586 
587  Returns
588  -------
589  namespace : `argparse.Namespace`
590  A `~argparse.Namespace` instance containing fields:
591 
592  - ``camera``: camera name.
593  - ``config``: the supplied config with all overrides applied,
594  validated and frozen.
595  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
596  - An entry for each of the data ID arguments registered by
597  `add_id_argument`, the value of which is an
598  `~lsst.pipe.base.DataIdArgument` that includes public
599  elements.
600  ``idList`` and ``refList``.
601  - ``log``: a `lsst.log` Log.
602  - An entry for each command-line argument,
603  with the following exceptions:
604 
605  - config is the supplied config, suitably updated.
606  - configfile, id and loglevel are all missing.
607  - ``obsPkg``: name of the ``obs_`` package for this camera.
608  """
609  if args is None:
610  args = sys.argv[1:]
611 
612  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
613  self.print_help()
614  if len(args) == 1 and args[0] in ("-h", "--help"):
615  self.exit()
616  else:
617  self.exit("%s: error: Must specify input as first argument" % self.prog)
618 
619  # Note that --rerun may change namespace.input, but if it does
620  # we verify that the new input has the same mapper class.
621  namespace = argparse.Namespace()
622  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
623  if not os.path.isdir(namespace.input):
624  self.error("Error: input=%r not found" % (namespace.input,))
625 
626  namespace.config = config
627  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
628  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
629  namespace.camera = mapperClass.getCameraName()
630  namespace.obsPkg = mapperClass.getPackageName()
631 
632  self.handleCamera(namespace)
633 
634  self._applyInitialOverrides(namespace)
635  if override is not None:
636  override(namespace.config)
637 
638  # Add data ID containers to namespace
639  for dataIdArgument in self._dataIdArgDict.values():
640  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
641 
642  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
643  del namespace.configfile
644 
645  self._parseDirectories(namespace)
646 
647  if namespace.clobberOutput:
648  if namespace.output is None:
649  self.error("--clobber-output is only valid with --output or --rerun")
650  elif namespace.output == namespace.input:
651  self.error("--clobber-output is not valid when the output and input repos are the same")
652  if os.path.exists(namespace.output):
653  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
654  shutil.rmtree(namespace.output)
655 
656  namespace.log.debug("input=%s", namespace.input)
657  namespace.log.debug("calib=%s", namespace.calib)
658  namespace.log.debug("output=%s", namespace.output)
659 
660  obeyShowArgument(namespace.show, namespace.config, exit=False)
661 
662  # No environment variable or --output or --rerun specified.
663  if self.requireOutput and namespace.output is None and namespace.rerun is None:
664  self.error("no output directory specified.\n"
665  "An output directory must be specified with the --output or --rerun\n"
666  "command-line arguments.\n")
667 
668  butlerArgs = {} # common arguments for butler elements
669  if namespace.calib:
670  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
671  if namespace.output:
672  outputs = {'root': namespace.output, 'mode': 'rw'}
673  inputs = {'root': namespace.input}
674  inputs.update(butlerArgs)
675  outputs.update(butlerArgs)
676  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
677  else:
678  outputs = {'root': namespace.input, 'mode': 'rw'}
679  outputs.update(butlerArgs)
680  namespace.butler = dafPersist.Butler(outputs=outputs)
681 
682  # convert data in each of the identifier lists to proper types
683  # this is done after constructing the butler,
684  # hence after parsing the command line,
685  # because it takes a long time to construct a butler
686  self._processDataIds(namespace)
687  if "data" in namespace.show:
688  for dataIdName in self._dataIdArgDict.keys():
689  for dataRef in getattr(namespace, dataIdName).refList:
690  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
691 
692  if namespace.show and "run" not in namespace.show:
693  sys.exit(0)
694 
695  if namespace.debug:
696  try:
697  import debug
698  assert debug # silence pyflakes
699  except ImportError:
700  sys.stderr.write("Warning: no 'debug' module found\n")
701  namespace.debug = False
702 
703  del namespace.loglevel
704 
705  if namespace.longlog:
706  lsstLog.configure_prop("""
707 log4j.rootLogger=INFO, A1
708 log4j.appender.A1=ConsoleAppender
709 log4j.appender.A1.Target=System.out
710 log4j.appender.A1.layout=PatternLayout
711 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
712 """)
713  del namespace.longlog
714 
715  namespace.config.validate()
716  namespace.config.freeze()
717 
718  return namespace
719 
720  def _parseDirectories(self, namespace):
721  """Parse input, output and calib directories
722 
723  This allows for hacking the directories, e.g., to include a
724  "rerun".
725  Modifications are made to the 'namespace' object in-place.
726  """
727  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
728  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
729 
730  # If an output directory is specified, process it and assign it to the namespace
731  if namespace.rawOutput:
732  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
733  else:
734  namespace.output = None
735 
736  # This section processes the rerun argument.
737  # If rerun is specified as a colon separated value,
738  # it will be parsed as an input and output.
739  # The input value will be overridden if previously specified
740  # (but a check is made to make sure both inputs use
741  # the same mapper)
742  if namespace.rawRerun:
743  if namespace.output:
744  self.error("Error: cannot specify both --output and --rerun")
745  namespace.rerun = namespace.rawRerun.split(":")
746  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
747  modifiedInput = False
748  if len(rerunDir) == 2:
749  namespace.input, namespace.output = rerunDir
750  modifiedInput = True
751  elif len(rerunDir) == 1:
752  namespace.output = rerunDir[0]
753  if os.path.exists(os.path.join(namespace.output, "_parent")):
754  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
755  modifiedInput = True
756  else:
757  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
758  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
759  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
760  else:
761  namespace.rerun = None
762  del namespace.rawInput
763  del namespace.rawCalib
764  del namespace.rawOutput
765  del namespace.rawRerun
766 
767  def _processDataIds(self, namespace):
768  """Process the parsed data for each data ID argument in an
769  `~argparse.Namespace`.
770 
771  Processing includes:
772 
773  - Validate data ID keys.
774  - Cast the data ID values to the correct type.
775  - Compute data references from data IDs.
776 
777  Parameters
778  ----------
779  namespace : `argparse.Namespace`
780  Parsed namespace. These attributes are read:
781 
782  - ``butler``
783  - ``log``
784  - ``config``, if any dynamic dataset types are set by
785  a config parameter.
786  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
787  dataset types are specified by such
788 
789  These attributes are modified:
790 
791  - ``<name>`` for each data ID argument registered using
792  `add_id_argument` with name ``<name>``.
793  """
794  for dataIdArgument in self._dataIdArgDict.values():
795  dataIdContainer = getattr(namespace, dataIdArgument.name)
796  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
797  if dataIdArgument.doMakeDataRefList:
798  try:
799  dataIdContainer.castDataIds(butler=namespace.butler)
800  except (KeyError, TypeError) as e:
801  # failure of castDataIds indicates invalid command args
802  self.error(e)
803 
804  # failure of makeDataRefList indicates a bug
805  # that wants a traceback
806  dataIdContainer.makeDataRefList(namespace)
807 
808  def _applyInitialOverrides(self, namespace):
809  """Apply obs-package-specific and camera-specific config
810  override files, if found
811 
812  Parameters
813  ----------
814  namespace : `argparse.Namespace`
815  Parsed namespace. These attributes are read:
816 
817  - ``obsPkg``
818 
819  Look in the package namespace.obsPkg for files:
820 
821  - ``config/<task_name>.py``
822  - ``config/<camera_name>/<task_name>.py`` and load if found.
823  """
824  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
825  fileName = self._name + ".py"
826  for filePath in (
827  os.path.join(obsPkgDir, "config", fileName),
828  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
829  ):
830  if os.path.exists(filePath):
831  namespace.log.info("Loading config overrride file %r", filePath)
832  namespace.config.load(filePath)
833  else:
834  namespace.log.debug("Config override file does not exist: %r", filePath)
835 
836  def handleCamera(self, namespace):
837  """Perform camera-specific operations before parsing the command-line.
838 
839  Parameters
840  ----------
841  namespace : `argparse.Namespace`
842  Namespace (an ) with the following fields:
843 
844  - ``camera``: the camera name.
845  - ``config``: the config passed to parse_args, with no overrides applied.
846  - ``obsPkg``: the ``obs_`` package for this camera.
847  - ``log``: a `lsst.log` Log.
848 
849  Notes
850  -----
851  The default implementation does nothing.
852  """
853  pass
854 
855  def convert_arg_line_to_args(self, arg_line):
856  """Allow files of arguments referenced by ``@<path>`` to contain
857  multiple values on each line.
858 
859  Parameters
860  ----------
861  arg_line : `str`
862  Line of text read from an argument file.
863  """
864  arg_line = arg_line.strip()
865  if not arg_line or arg_line.startswith("#"):
866  return
867  for arg in shlex.split(arg_line, comments=True, posix=True):
868  if not arg.strip():
869  continue
870  yield arg
871 
872  def addReuseOption(self, choices):
873  """Add a "--reuse-outputs-from SUBTASK" option to the argument
874  parser.
875 
876  CmdLineTasks that can be restarted at an intermediate step using
877  outputs from earlier (but still internal) steps should use this
878  method to allow the user to control whether that happens when
879  outputs from earlier steps are present.
880 
881  Parameters
882  ----------
883  choices : sequence
884  A sequence of string names (by convention, top-level subtasks)
885  that identify the steps that could be skipped when their
886  outputs are already present. The list is ordered, so when the
887  user specifies one step on the command line, all previous steps
888  may be skipped as well. In addition to the choices provided,
889  users may pass "all" to indicate that all steps may be thus
890  skipped.
891 
892  When this method is called, the ``namespace`` object returned by
893  ``parse_args`` will contain a ``reuse`` attribute containing
894  a list of all steps that should be skipped if their outputs
895  are already present.
896  If no steps should be skipped, the ``reuse`` will be an empty list.
897  """
898  choices = list(choices)
899  choices.append("all")
900  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
901  default=[], action=ReuseAction,
902  help=("Skip the given subtask and its predecessors and reuse their outputs "
903  "if those outputs already exist. Use 'all' to specify all subtasks."))
904 
905 
907  """`ArgumentParser` for command-line tasks that don't write any output.
908  """
909 
910  requireOutput = False # We're not going to write anything
911 
912 
913 def getTaskDict(config, taskDict=None, baseName=""):
914  """Get a dictionary of task info for all subtasks in a config
915 
916  Parameters
917  ----------
918  config : `lsst.pex.config.Config`
919  Configuration to process.
920  taskDict : `dict`, optional
921  Users should not specify this argument. Supports recursion.
922  If provided, taskDict is updated in place, else a new `dict`
923  is started.
924  baseName : `str`, optional
925  Users should not specify this argument. It is only used for
926  recursion: if a non-empty string then a period is appended
927  and the result is used as a prefix for additional entries
928  in taskDict; otherwise no prefix is used.
929 
930  Returns
931  -------
932  taskDict : `dict`
933  Keys are config field names, values are task names.
934 
935  Notes
936  -----
937  This function is designed to be called recursively.
938  The user should call with only a config (leaving taskDict and baseName
939  at their default values).
940  """
941  if taskDict is None:
942  taskDict = dict()
943  for fieldName, field in config.items():
944  if hasattr(field, "value") and hasattr(field, "target"):
945  subConfig = field.value
946  if isinstance(subConfig, pexConfig.Config):
947  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
948  try:
949  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
950  except Exception:
951  taskName = repr(field.target)
952  taskDict[subBaseName] = taskName
953  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
954  return taskDict
955 
956 
957 def obeyShowArgument(showOpts, config=None, exit=False):
958  """Process arguments specified with ``--show`` (but ignores
959  ``"data"``).
960 
961  Parameters
962  ----------
963  showOpts : `list` of `str`
964  List of options passed to ``--show``.
965  config : optional
966  The provided config.
967  exit : bool, optional
968  Exit if ``"run"`` isn't included in ``showOpts``.
969 
970  Parameters
971  ----------
972  Supports the following options in showOpts:
973 
974  - ``config[=PAT]``. Dump all the config entries, or just the ones that
975  match the glob pattern.
976  - ``history=PAT``. Show where the config entries that match the glob
977  pattern were set.
978  - ``tasks``. Show task hierarchy.
979  - ``data``. Ignored; to be processed by caller.
980  - ``run``. Keep going (the default behaviour is to exit if
981  ``--show`` is specified).
982 
983  Calls ``sys.exit(1)`` if any other option found.
984  """
985  if not showOpts:
986  return
987 
988  for what in showOpts:
989  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
990 
991  if showCommand == "config":
992  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
993  pattern = matConfig.group(1)
994  if pattern:
995  class FilteredStream:
996  """A file object that only prints lines
997  that match the glob "pattern".
998 
999  N.b. Newlines are silently discarded and reinserted;
1000  crude but effective.
1001  """
1002 
1003  def __init__(self, pattern):
1004  # obey case if pattern isn't lowecase or requests NOIGNORECASE
1005  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1006 
1007  if mat:
1008  pattern = mat.group(1)
1009  self._pattern = re.compile(fnmatch.translate(pattern))
1010  else:
1011  if pattern != pattern.lower():
1012  print(u"Matching \"%s\" without regard to case "
1013  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
1014  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1015 
1016  def write(self, showStr):
1017  showStr = showStr.rstrip()
1018  # Strip off doc string line(s) and cut off
1019  # at "=" for string matching
1020  matchStr = showStr.split("\n")[-1].split("=")[0]
1021  if self._pattern.search(matchStr):
1022  print(u"\n" + showStr)
1023 
1024  fd = FilteredStream(pattern)
1025  else:
1026  fd = sys.stdout
1027 
1028  config.saveToStream(fd, "config")
1029  elif showCommand == "history":
1030  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1031  globPattern = matHistory.group(1)
1032  if not globPattern:
1033  print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1034  sys.exit(1)
1035 
1036  error = False
1037  for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1038  if i > 0:
1039  print("")
1040 
1041  pattern = pattern.split(".")
1042  cpath, cname = pattern[:-1], pattern[-1]
1043  hconfig = config # the config that we're interested in
1044  for i, cpt in enumerate(cpath):
1045  try:
1046  hconfig = getattr(hconfig, cpt)
1047  except AttributeError:
1048  print("Error: configuration %s has no subconfig %s" %
1049  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
1050  error = True
1051 
1052  try:
1053  print(pexConfig.history.format(hconfig, cname))
1054  except KeyError:
1055  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname),
1056  file=sys.stderr)
1057  error = True
1058 
1059  if error:
1060  sys.exit(1)
1061 
1062  elif showCommand == "data":
1063  pass
1064  elif showCommand == "run":
1065  pass
1066  elif showCommand == "tasks":
1067  showTaskHierarchy(config)
1068  else:
1069  print(u"Unknown value for show: %s (choose from '%s')" %
1070  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
1071  sys.exit(1)
1072 
1073  if exit and "run" not in showOpts:
1074  sys.exit(0)
1075 
1076 
1077 def showTaskHierarchy(config):
1078  """Print task hierarchy to stdout.
1079 
1080  Parameters
1081  ----------
1082  config : `lsst.pex.config.Config`
1083  Configuration to process.
1084  """
1085  print(u"Subtasks:")
1086  taskDict = getTaskDict(config=config)
1087 
1088  fieldNameList = sorted(taskDict.keys())
1089  for fieldName in fieldNameList:
1090  taskName = taskDict[fieldName]
1091  print(u"%s: %s" % (fieldName, taskName))
1092 
1093 
1094 class ConfigValueAction(argparse.Action):
1095  """argparse action callback to override config parameters using
1096  name=value pairs from the command-line.
1097  """
1098 
1099  def __call__(self, parser, namespace, values, option_string):
1100  """Override one or more config name value pairs.
1101 
1102  Parameters
1103  ----------
1104  parser : `argparse.ArgumentParser`
1105  Argument parser.
1106  namespace : `argparse.Namespace`
1107  Parsed command. The ``namespace.config`` attribute is updated.
1108  values : `list`
1109  A list of ``configItemName=value`` pairs.
1110  option_string : `str`
1111  Option value specified by the user.
1112  """
1113  if namespace.config is None:
1114  return
1115  for nameValue in values:
1116  name, sep, valueStr = nameValue.partition("=")
1117  if not valueStr:
1118  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1119 
1120  # see if setting the string value works; if not, try eval
1121  try:
1122  setDottedAttr(namespace.config, name, valueStr)
1123  except AttributeError:
1124  parser.error("no config field: %s" % (name,))
1125  except Exception:
1126  try:
1127  value = eval(valueStr, {})
1128  except Exception:
1129  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1130  try:
1131  setDottedAttr(namespace.config, name, value)
1132  except Exception as e:
1133  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1134 
1135 
1136 class ConfigFileAction(argparse.Action):
1137  """argparse action to load config overrides from one or more files.
1138  """
1139 
1140  def __call__(self, parser, namespace, values, option_string=None):
1141  """Load one or more files of config overrides.
1142 
1143  Parameters
1144  ----------
1145  parser : `argparse.ArgumentParser`
1146  Argument parser.
1147  namespace : `argparse.Namespace`
1148  Parsed command. The following attributes are updated by this
1149  method: ``namespace.config``.
1150  values : `list`
1151  A list of data config file paths.
1152  option_string : `str`, optional
1153  Option value specified by the user.
1154  """
1155  if namespace.config is None:
1156  return
1157  for configfile in values:
1158  try:
1159  namespace.config.load(configfile)
1160  except Exception as e:
1161  parser.error("cannot load config file %r: %s" % (configfile, e))
1162 
1163 
1164 class IdValueAction(argparse.Action):
1165  """argparse action callback to process a data ID into a dict.
1166  """
1167 
1168  def __call__(self, parser, namespace, values, option_string):
1169  """Parse ``--id`` data and append results to
1170  ``namespace.<argument>.idList``.
1171 
1172  Parameters
1173  ----------
1174  parser : `ArgumentParser`
1175  Argument parser.
1176  namespace : `argparse.Namespace`
1177  Parsed command (an instance of argparse.Namespace).
1178  The following attributes are updated:
1179 
1180  - ``<idName>.idList``, where ``<idName>`` is the name of the
1181  ID argument, for instance ``"id"`` for ID argument ``--id``.
1182  values : `list`
1183  A list of data IDs; see Notes below.
1184  option_string : `str`
1185  Option value specified by the user.
1186 
1187  Notes
1188  -----
1189  The data format is::
1190 
1191  key1=value1_1[^value1_2[^value1_3...]
1192  key2=value2_1[^value2_2[^value2_3...]...
1193 
1194  The values (e.g. ``value1_1``) may either be a string,
1195  or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1196  interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1197  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1198  You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1199 
1200  The cross product is computed for keys with multiple values.
1201  For example::
1202 
1203  --id visit 1^2 ccd 1,1^2,2
1204 
1205  results in the following data ID dicts being appended to
1206  ``namespace.<argument>.idList``:
1207 
1208  {"visit":1, "ccd":"1,1"}
1209  {"visit":2, "ccd":"1,1"}
1210  {"visit":1, "ccd":"2,2"}
1211  {"visit":2, "ccd":"2,2"}
1212  """
1213  if namespace.config is None:
1214  return
1215  idDict = collections.OrderedDict()
1216  for nameValue in values:
1217  name, sep, valueStr = nameValue.partition("=")
1218  if name in idDict:
1219  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1220  idDict[name] = []
1221  for v in valueStr.split("^"):
1222  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1223  if mat:
1224  v1 = int(mat.group(1))
1225  v2 = int(mat.group(2))
1226  v3 = mat.group(3)
1227  v3 = int(v3) if v3 else 1
1228  for v in range(v1, v2 + 1, v3):
1229  idDict[name].append(str(v))
1230  else:
1231  idDict[name].append(v)
1232 
1233  iterList = [idDict[key] for key in idDict.keys()]
1234  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1235  for valList in itertools.product(*iterList)]
1236 
1237  argName = option_string.lstrip("-")
1238  ident = getattr(namespace, argName)
1239  ident.idList += idDictList
1240 
1241 
1242 class LogLevelAction(argparse.Action):
1243  """argparse action to set log level.
1244  """
1245 
1246  def __call__(self, parser, namespace, values, option_string):
1247  """Set trace level.
1248 
1249  Parameters
1250  ----------
1251  parser : `ArgumentParser`
1252  Argument parser.
1253  namespace : `argparse.Namespace`
1254  Parsed command. This argument is not used.
1255  values : `list`
1256  List of trace levels; each item must be of the form
1257  ``component_name=level`` or ``level``, where ``level``
1258  is a keyword (not case sensitive) or an integer.
1259  option_string : `str`
1260  Option value specified by the user.
1261  """
1262  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1263  permittedLevelSet = set(permittedLevelList)
1264  for componentLevel in values:
1265  component, sep, levelStr = componentLevel.partition("=")
1266  if not levelStr:
1267  levelStr, component = component, None
1268  logLevelUpr = levelStr.upper()
1269  if logLevelUpr in permittedLevelSet:
1270  logLevel = getattr(lsstLog.Log, logLevelUpr)
1271  else:
1272  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1273  if component is None:
1274  namespace.log.setLevel(logLevel)
1275  else:
1276  lsstLog.Log.getLogger(component).setLevel(logLevel)
1277 
1278 
1279 class ReuseAction(argparse.Action):
1280  """argparse action associated with ArgumentPraser.addReuseOption."""
1281 
1282  def __call__(self, parser, namespace, value, option_string):
1283  if value == "all":
1284  value = self.choices[-2]
1285  index = self.choices.index(value)
1286  namespace.reuse = self.choices[:index + 1]
1287 
1288 
1289 def setDottedAttr(item, name, value):
1290  """Set an instance attribute (like `setattr` but accepting
1291  hierarchical names such as ``foo.bar.baz``).
1292 
1293  Parameters
1294  ----------
1295  item : obj
1296  Object whose attribute is to be set.
1297  name : `str`
1298  Name of attribute to set.
1299  value : obj
1300  New value for the attribute.
1301 
1302  Notes
1303  -----
1304  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1305  is set to the specified value.
1306  """
1307  subitem = item
1308  subnameList = name.split(".")
1309  for subname in subnameList[:-1]:
1310  subitem = getattr(subitem, subname)
1311  setattr(subitem, subnameList[-1], value)
1312 
1313 
1314 def getDottedAttr(item, name):
1315  """Get an attribute (like `getattr` but accepts hierarchical names
1316  such as ``foo.bar.baz``).
1317 
1318  Parameters
1319  ----------
1320  item : obj
1321  Object whose attribute is to be returned.
1322  name : `str`
1323  Name of the attribute to get.
1324 
1325  Returns
1326  -------
1327  itemAttr : obj
1328  If name is ``foo.bar.baz then the return value is
1329  ``item.foo.bar.baz``.
1330  """
1331  subitem = item
1332  for subname in name.split("."):
1333  subitem = getattr(subitem, subname)
1334  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)