lsst.pipe.base  16.0-25-g2c6bf4a+2
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
24 
25 import abc
26 import argparse
27 import collections
28 import fnmatch
29 import itertools
30 import os
31 import re
32 import shlex
33 import sys
34 import shutil
35 import textwrap
36 
37 import lsst.utils
38 import lsst.pex.config as pexConfig
40 import lsst.log as lsstLog
41 import lsst.daf.persistence as dafPersist
42 
43 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
44 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
45 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
46 
47 
48 def _fixPath(defName, path):
49  """Apply environment variable as default root, if present, and abspath.
50 
51  Parameters
52  ----------
53  defName : `str`
54  Name of environment variable containing default root path;
55  if the environment variable does not exist
56  then the path is relative to the current working directory
57  path : `str`
58  Path relative to default root path.
59 
60  Returns
61  -------
62  abspath : `str`
63  Path that has been expanded, or `None` if the environment variable
64  does not exist and path is `None`.
65  """
66  defRoot = os.environ.get(defName)
67  if defRoot is None:
68  if path is None:
69  return None
70  return os.path.abspath(path)
71  return os.path.abspath(os.path.join(defRoot, path or ""))
72 
73 
75  """Container for data IDs and associated data references.
76 
77  Parameters
78  ----------
79  level : `str`
80  The lowest hierarchy level to descend to for this dataset type,
81  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
82  Use `""` to use the mapper's default for the dataset type.
83  This class does not support `None`, but if it did, `None`
84  would mean the level should not be restricted.
85 
86  Notes
87  -----
88  Override this class for data IDs that require special handling to be
89  converted to ``data references``, and specify the override class
90  as ``ContainerClass`` for ``add_id_argument``.
91 
92  If you don't want the argument parser to compute data references,
93  specify ``doMakeDataRefList=False`` in ``add_id_argument``.
94  """
95 
96  def __init__(self, level=None):
97  self.datasetType = None
98  """Dataset type of the data references (`str`).
99  """
100  self.level = level
101  """See parameter ``level`` (`str`).
102  """
103  self.idList = []
104  """List of data IDs specified on the command line for the
105  appropriate data ID argument (`list` of `dict`).
106  """
107  self.refList = []
108  """List of data references for the data IDs in ``idList``
109  (`list` of `lsst.daf.persistence.ButlerDataRef`).
110  Elements will be omitted if the corresponding data is not found.
111  The list will be empty when returned by ``parse_args`` if
112  ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
113  """
114 
115  def setDatasetType(self, datasetType):
116  """Set actual dataset type, once it is known.
117 
118  Parameters
119  ----------
120  datasetType : `str`
121  Dataset type.
122 
123  Notes
124  -----
125  The reason ``datasetType`` is not a constructor argument is that
126  some subclasses do not know the dataset type until the command
127  is parsed. Thus, to reduce special cases in the code,
128  ``datasetType`` is always set after the command is parsed.
129  """
130  self.datasetType = datasetType
131 
132  def castDataIds(self, butler):
133  """Validate data IDs and cast them to the correct type
134  (modify idList in place).
135 
136  This code casts the values in the data IDs dicts in `dataIdList`
137  to the type required by the butler. Data IDs are read from the
138  command line as `str`, but the butler requires some values to be
139  other types. For example "visit" values should be `int`.
140 
141  Parameters
142  ----------
143  butler : `lsst.daf.persistence.Butler`
144  Data butler.
145  """
146  if self.datasetType is None:
147  raise RuntimeError("Must call setDatasetType first")
148  try:
149  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
150  except KeyError as e:
151  msg = "Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level)
152  raise KeyError(msg) from e
153 
154  for dataDict in self.idList:
155  for key, strVal in dataDict.items():
156  try:
157  keyType = idKeyTypeDict[key]
158  except KeyError:
159  # OK, assume that it's a valid key and guess that it's a string
160  keyType = str
161 
162  log = lsstLog.Log.getDefaultLogger()
163  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
164  (key, 'str' if keyType == str else keyType))
165  idKeyTypeDict[key] = keyType
166 
167  if keyType != str:
168  try:
169  castVal = keyType(strVal)
170  except Exception:
171  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
172  dataDict[key] = castVal
173 
174  def makeDataRefList(self, namespace):
175  """Compute refList based on idList.
176 
177  Parameters
178  ----------
179  namespace : `argparse.Namespace`
180  Results of parsing command-line. The ``butler`` and ``log``
181  elements must be set.
182 
183  Notes
184  -----
185  Not called if ``add_id_argument`` was called with
186  ``doMakeDataRefList=False``.
187  """
188  if self.datasetType is None:
189  raise RuntimeError("Must call setDatasetType first")
190  butler = namespace.butler
191  for dataId in self.idList:
192  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
193  level=self.level, dataId=dataId)
194  if not refList:
195  namespace.log.warn("No data found for dataId=%s", dataId)
196  continue
197  self.refList += refList
198 
199 
201  """data ID argument, used by `ArgumentParser.add_id_argument`.
202 
203  Parameters
204  ----------
205  name : `str`
206  Name of identifier (argument name without dashes).
207  datasetType : `str`
208  Type of dataset; specify a string for a fixed dataset type
209  or a `DatasetArgument` for a dynamic dataset type (e.g.
210  one specified by a command-line argument).
211  level : `str`
212  The lowest hierarchy level to descend to for this dataset type,
213  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
214  Use `""` to use the mapper's default for the dataset type.
215  Some container classes may also support `None`, which means
216  the level should not be restricted; however the default class,
217  `DataIdContainer`, does not support `None`.
218  doMakeDataRefList : `bool`, optional
219  If `True` (default), construct data references.
220  ContainerClass : `class`, optional
221  Class to contain data IDs and data references; the default class
222  `DataIdContainer` will work for many, but not all, cases.
223  For example if the dataset type is specified on the command line
224  then use `DynamicDatasetType`.
225  """
226 
227  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
228  if name.startswith("-"):
229  raise RuntimeError("Name %s must not start with -" % (name,))
230  self.name = name
231  self.datasetType = datasetType
232  self.level = level
233  self.doMakeDataRefList = bool(doMakeDataRefList)
234  self.ContainerClass = ContainerClass
235  self.argName = name.lstrip("-")
236 
237  @property
239  """`True` if the dataset type is dynamic (that is, specified
240  on the command line).
241  """
242  return isinstance(self.datasetType, DynamicDatasetType)
243 
244  def getDatasetType(self, namespace):
245  """Get the dataset type as a string.
246 
247  Parameters
248  ----------
249  namespace
250  Parsed command.
251 
252  Returns
253  -------
254  datasetType : `str`
255  Dataset type.
256  """
257  if self.isDynamicDatasetType:
258  return self.datasetType.getDatasetType(namespace)
259  else:
260  return self.datasetType
261 
262 
263 class DynamicDatasetType(metaclass=abc.ABCMeta):
264  """Abstract base class for a dataset type determined from parsed
265  command-line arguments.
266  """
267 
268  def addArgument(self, parser, idName):
269  """Add a command-line argument to specify dataset type name,
270  if wanted.
271 
272  Parameters
273  ----------
274  parser : `ArgumentParser`
275  Argument parser to add the argument to.
276  idName : `str`
277  Name of data ID argument, without the leading ``"--"``,
278  e.g. ``"id"``.
279 
280  Notes
281  -----
282  The default implementation does nothing
283  """
284  pass
285 
286  @abc.abstractmethod
287  def getDatasetType(self, namespace):
288  """Get the dataset type as a string, based on parsed command-line
289  arguments.
290 
291  Returns
292  -------
293  datasetType : `str`
294  Dataset type.
295  """
296  raise NotImplementedError("Subclasses must override")
297 
298 
300  """Dataset type specified by a command-line argument.
301 
302  Parameters
303  ----------
304  name : `str`, optional
305  Name of command-line argument (including leading "--",
306  if appropriate) whose value is the dataset type.
307  If `None`, uses ``--idName_dstype`` where idName
308  is the name of the data ID argument (e.g. "id").
309  help : `str`, optional
310  Help string for the command-line argument.
311  default : `object`, optional
312  Default value. If `None`, then the command-line option is required.
313  This argument isignored if the command-line argument is positional
314  (name does not start with "-") because positional arguments do
315  not support default values.
316  """
317 
318  def __init__(self,
319  name=None,
320  help="dataset type to process from input data repository",
321  default=None,
322  ):
323  DynamicDatasetType.__init__(self)
324  self.name = name
325  self.help = help
326  self.default = default
327 
328  def getDatasetType(self, namespace):
329  """Get the dataset type as a string, from the appropriate
330  command-line argument.
331 
332  Parameters
333  ----------
334  namespace :
335  Parsed command.
336 
337  Returns
338  -------
339  datasetType : `str`
340  Dataset type.
341  """
342  argName = self.name.lstrip("-")
343  return getattr(namespace, argName)
344 
345  def addArgument(self, parser, idName):
346  """Add a command-line argument to specify the dataset type name.
347 
348  Parameters
349  ----------
350  parser : `ArgumentParser`
351  Argument parser.
352  idName : `str`
353  Data ID.
354 
355  Notes
356  -----
357  Also sets the `name` attribute if it is currently `None`.
358  """
359  help = self.help if self.help else "dataset type for %s" % (idName,)
360  if self.name is None:
361  self.name = "--%s_dstype" % (idName,)
362  requiredDict = dict()
363  if self.name.startswith("-"):
364  requiredDict = dict(required=self.default is None)
365  parser.add_argument(
366  self.name,
367  default=self.default,
368  help=help,
369  **requiredDict)
370 
371 
373  """Dataset type specified by a config parameter.
374 
375  Parameters
376  ----------
377  name : `str`
378  Name of config option whose value is the dataset type.
379  """
380 
381  def __init__(self, name):
382  DynamicDatasetType.__init__(self)
383  self.name = name
384 
385  def getDatasetType(self, namespace):
386  """Return the dataset type as a string, from the appropriate
387  config field.
388 
389  Parameters
390  ----------
391  namespace : `argparse.Namespace`
392  Parsed command.
393  """
394  # getattr does not work reliably if the config field name is
395  # dotted, so step through one level at a time
396  keyList = self.name.split(".")
397  value = namespace.config
398  for key in keyList:
399  try:
400  value = getattr(value, key)
401  except KeyError:
402  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
403  return value
404 
405 
406 class ArgumentParser(argparse.ArgumentParser):
407  """Argument parser for command-line tasks that is based on
408  `argparse.ArgumentParser`.
409 
410  Parameters
411  ----------
412  name : `str`
413  Name of top-level task; used to identify camera-specific override
414  files.
415  usage : `str`, optional
416  Command-line usage signature.
417  **kwargs
418  Additional keyword arguments for `argparse.ArgumentParser`.
419 
420  Notes
421  -----
422  Users may wish to add additional arguments before calling `parse_args`.
423  """
424  # I would prefer to check data ID keys and values as they are parsed,
425  # but the required information comes from the butler, so I have to
426  # construct a butler before I do this checking. Constructing a butler
427  # is slow, so I only want do it once, after parsing the command line,
428  # so as to catch syntax errors quickly.
429 
430  requireOutput = True
431  """Require an output directory to be specified (`bool`)."""
432 
433  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
434  self._name = name
435  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
436  argparse.ArgumentParser.__init__(self,
437  usage=usage,
438  fromfile_prefix_chars='@',
439  epilog=textwrap.dedent("""Notes:
440  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
441  all values are used, in order left to right
442  * @file reads command-line options from the specified file:
443  * data may be distributed among multiple lines (e.g. one option per line)
444  * data after # is treated as a comment and ignored
445  * blank lines and lines starting with # are ignored
446  * To specify multiple values for an option, do not use = after the option name:
447  * right: --configfile foo bar
448  * wrong: --configfile=foo bar
449  """),
450  formatter_class=argparse.RawDescriptionHelpFormatter,
451  **kwargs)
452  self.add_argument(metavar='input', dest="rawInput",
453  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
454  self.add_argument("--calib", dest="rawCalib",
455  help="path to input calibration repository, relative to $%s" %
456  (DEFAULT_CALIB_NAME,))
457  self.add_argument("--output", dest="rawOutput",
458  help="path to output data repository (need not exist), relative to $%s" %
459  (DEFAULT_OUTPUT_NAME,))
460  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
461  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
462  "optionally sets ROOT to ROOT/rerun/INPUT")
463  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
464  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
465  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
466  help="config override file(s)")
467  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
468  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
469  metavar="LEVEL|COMPONENT=LEVEL")
470  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
471  self.add_argument("--debug", action="store_true", help="enable debugging output?")
472  self.add_argument("--doraise", action="store_true",
473  help="raise an exception on error (else log a message and continue)?")
474  self.add_argument("--noExit", action="store_true",
475  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
476  self.add_argument("--profile", help="Dump cProfile statistics to filename")
477  self.add_argument("--show", nargs="+", default=(),
478  help="display the specified information to stdout and quit "
479  "(unless run is specified).")
480  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
481  self.add_argument("-t", "--timeout", type=float,
482  help="Timeout for multiprocessing; maximum wall time (sec)")
483  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
484  help=("remove and re-create the output directory if it already exists "
485  "(safe with -j, but not all other forms of parallel execution)"))
486  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
487  help=("backup and then overwrite existing config files instead of checking them "
488  "(safe with -j, but not all other forms of parallel execution)"))
489  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
490  help="Don't copy config to file~N backup.")
491  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
492  help=("backup and then overwrite existing package versions instead of checking"
493  "them (safe with -j, but not all other forms of parallel execution)"))
494  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
495  help="don't check package versions; useful for development")
496  lsstLog.configure_prop("""
497 log4j.rootLogger=INFO, A1
498 log4j.appender.A1=ConsoleAppender
499 log4j.appender.A1.Target=System.out
500 log4j.appender.A1.layout=PatternLayout
501 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
502 """)
503 
504  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
505  ContainerClass=DataIdContainer):
506  """Add a data ID argument.
507 
508 
509  Parameters
510  ----------
511  name : `str`
512  Data ID argument (including leading dashes, if wanted).
513  datasetType : `str` or `DynamicDatasetType`-type
514  Type of dataset. Supply a string for a fixed dataset type.
515  For a dynamically determined dataset type, supply
516  a `DynamicDatasetType`, such a `DatasetArgument`.
517  help : `str`
518  Help string for the argument.
519  level : `str`
520  The lowest hierarchy level to descend to for this dataset type,
521  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
522  Use `""` to use the mapper's default for the dataset type.
523  Some container classes may also support `None`, which means
524  the level should not be restricted; however the default class,
525  `DataIdContainer`, does not support `None`.
526  doMakeDataRefList : bool, optional
527  If `True` (default), construct data references.
528  ContainerClass : `class`, optional
529  Class to contain data IDs and data references; the default class
530  `DataIdContainer` will work for many, but not all, cases.
531  For example if the dataset type is specified on the command line
532  then use `DynamicDatasetType`.
533 
534  Notes
535  -----
536  If ``datasetType`` is an instance of `DatasetArgument`,
537  then add a second argument to specify the dataset type.
538 
539  The associated data is put into ``namespace.<dataIdArgument.name>``
540  as an instance of `ContainerClass`; the container includes fields:
541 
542  - ``idList``: a list of data ID dicts.
543  - ``refList``: a list of `~lsst.daf.persistence.Butler`
544  data references (empty if ``doMakeDataRefList`` is `False`).
545  """
546  argName = name.lstrip("-")
547 
548  if argName in self._dataIdArgDict:
549  raise RuntimeError("Data ID argument %s already exists" % (name,))
550  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
551  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
552 
553  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
554  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
555 
556  dataIdArgument = DataIdArgument(
557  name=argName,
558  datasetType=datasetType,
559  level=level,
560  doMakeDataRefList=doMakeDataRefList,
561  ContainerClass=ContainerClass,
562  )
563 
564  if dataIdArgument.isDynamicDatasetType:
565  datasetType.addArgument(parser=self, idName=argName)
566 
567  self._dataIdArgDict[argName] = dataIdArgument
568 
569  def parse_args(self, config, args=None, log=None, override=None):
570  """Parse arguments for a command-line task.
571 
572  Parameters
573  ----------
574  config : `lsst.pex.config.Config`
575  Config for the task being run.
576  args : `list`, optional
577  Argument list; if `None` then ``sys.argv[1:]`` is used.
578  log : `lsst.log.Log`, optional
579  `~lsst.log.Log` instance; if `None` use the default log.
580  override : callable, optional
581  A config override function. It must take the root config object
582  as its only argument and must modify the config in place.
583  This function is called after camera-specific overrides files
584  are applied, and before command-line config overrides
585  are applied (thus allowing the user the final word).
586 
587  Returns
588  -------
589  namespace : `argparse.Namespace`
590  A `~argparse.Namespace` instance containing fields:
591 
592  - ``camera``: camera name.
593  - ``config``: the supplied config with all overrides applied,
594  validated and frozen.
595  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
596  - An entry for each of the data ID arguments registered by
597  `add_id_argument`, of the type passed to its ``ContainerClass``
598  keyword (`~lsst.pipe.base.DataIdContainer` by default). It
599  includes public elements ``idList`` and ``refList``.
600  - ``log``: a `lsst.log` Log.
601  - An entry for each command-line argument,
602  with the following exceptions:
603 
604  - config is the supplied config, suitably updated.
605  - configfile, id and loglevel are all missing.
606  - ``obsPkg``: name of the ``obs_`` package for this camera.
607  """
608  if args is None:
609  args = sys.argv[1:]
610 
611  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
612  self.print_help()
613  if len(args) == 1 and args[0] in ("-h", "--help"):
614  self.exit()
615  else:
616  self.exit("%s: error: Must specify input as first argument" % self.prog)
617 
618  # Note that --rerun may change namespace.input, but if it does
619  # we verify that the new input has the same mapper class.
620  namespace = argparse.Namespace()
621  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
622  if not os.path.isdir(namespace.input):
623  self.error("Error: input=%r not found" % (namespace.input,))
624 
625  namespace.config = config
626  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
627  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
628  namespace.camera = mapperClass.getCameraName()
629  namespace.obsPkg = mapperClass.getPackageName()
630 
631  self.handleCamera(namespace)
632 
633  self._applyInitialOverrides(namespace)
634  if override is not None:
635  override(namespace.config)
636 
637  # Add data ID containers to namespace
638  for dataIdArgument in self._dataIdArgDict.values():
639  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
640 
641  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
642  del namespace.configfile
643 
644  self._parseDirectories(namespace)
645 
646  if namespace.clobberOutput:
647  if namespace.output is None:
648  self.error("--clobber-output is only valid with --output or --rerun")
649  elif namespace.output == namespace.input:
650  self.error("--clobber-output is not valid when the output and input repos are the same")
651  if os.path.exists(namespace.output):
652  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
653  shutil.rmtree(namespace.output)
654 
655  namespace.log.debug("input=%s", namespace.input)
656  namespace.log.debug("calib=%s", namespace.calib)
657  namespace.log.debug("output=%s", namespace.output)
658 
659  obeyShowArgument(namespace.show, namespace.config, exit=False)
660 
661  # No environment variable or --output or --rerun specified.
662  if self.requireOutput and namespace.output is None and namespace.rerun is None:
663  self.error("no output directory specified.\n"
664  "An output directory must be specified with the --output or --rerun\n"
665  "command-line arguments.\n")
666 
667  butlerArgs = {} # common arguments for butler elements
668  if namespace.calib:
669  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
670  if namespace.output:
671  outputs = {'root': namespace.output, 'mode': 'rw'}
672  inputs = {'root': namespace.input}
673  inputs.update(butlerArgs)
674  outputs.update(butlerArgs)
675  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
676  else:
677  outputs = {'root': namespace.input, 'mode': 'rw'}
678  outputs.update(butlerArgs)
679  namespace.butler = dafPersist.Butler(outputs=outputs)
680 
681  # convert data in each of the identifier lists to proper types
682  # this is done after constructing the butler,
683  # hence after parsing the command line,
684  # because it takes a long time to construct a butler
685  self._processDataIds(namespace)
686  if "data" in namespace.show:
687  for dataIdName in self._dataIdArgDict.keys():
688  for dataRef in getattr(namespace, dataIdName).refList:
689  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
690 
691  if namespace.show and "run" not in namespace.show:
692  sys.exit(0)
693 
694  if namespace.debug:
695  try:
696  import debug
697  assert debug # silence pyflakes
698  except ImportError:
699  sys.stderr.write("Warning: no 'debug' module found\n")
700  namespace.debug = False
701 
702  del namespace.loglevel
703 
704  if namespace.longlog:
705  lsstLog.configure_prop("""
706 log4j.rootLogger=INFO, A1
707 log4j.appender.A1=ConsoleAppender
708 log4j.appender.A1.Target=System.out
709 log4j.appender.A1.layout=PatternLayout
710 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
711 """)
712  del namespace.longlog
713 
714  namespace.config.validate()
715  namespace.config.freeze()
716 
717  return namespace
718 
719  def _parseDirectories(self, namespace):
720  """Parse input, output and calib directories
721 
722  This allows for hacking the directories, e.g., to include a
723  "rerun".
724  Modifications are made to the 'namespace' object in-place.
725  """
726  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
727  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
728 
729  # If an output directory is specified, process it and assign it to the namespace
730  if namespace.rawOutput:
731  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
732  else:
733  namespace.output = None
734 
735  # This section processes the rerun argument.
736  # If rerun is specified as a colon separated value,
737  # it will be parsed as an input and output.
738  # The input value will be overridden if previously specified
739  # (but a check is made to make sure both inputs use
740  # the same mapper)
741  if namespace.rawRerun:
742  if namespace.output:
743  self.error("Error: cannot specify both --output and --rerun")
744  namespace.rerun = namespace.rawRerun.split(":")
745  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
746  modifiedInput = False
747  if len(rerunDir) == 2:
748  namespace.input, namespace.output = rerunDir
749  modifiedInput = True
750  elif len(rerunDir) == 1:
751  namespace.output = rerunDir[0]
752  if os.path.exists(os.path.join(namespace.output, "_parent")):
753  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
754  modifiedInput = True
755  else:
756  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
757  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
758  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
759  else:
760  namespace.rerun = None
761  del namespace.rawInput
762  del namespace.rawCalib
763  del namespace.rawOutput
764  del namespace.rawRerun
765 
766  def _processDataIds(self, namespace):
767  """Process the parsed data for each data ID argument in an
768  `~argparse.Namespace`.
769 
770  Processing includes:
771 
772  - Validate data ID keys.
773  - Cast the data ID values to the correct type.
774  - Compute data references from data IDs.
775 
776  Parameters
777  ----------
778  namespace : `argparse.Namespace`
779  Parsed namespace. These attributes are read:
780 
781  - ``butler``
782  - ``log``
783  - ``config``, if any dynamic dataset types are set by
784  a config parameter.
785  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
786  dataset types are specified by such
787 
788  These attributes are modified:
789 
790  - ``<name>`` for each data ID argument registered using
791  `add_id_argument` with name ``<name>``.
792  """
793  for dataIdArgument in self._dataIdArgDict.values():
794  dataIdContainer = getattr(namespace, dataIdArgument.name)
795  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
796  if dataIdArgument.doMakeDataRefList:
797  try:
798  dataIdContainer.castDataIds(butler=namespace.butler)
799  except (KeyError, TypeError) as e:
800  # failure of castDataIds indicates invalid command args
801  self.error(e)
802 
803  # failure of makeDataRefList indicates a bug
804  # that wants a traceback
805  dataIdContainer.makeDataRefList(namespace)
806 
807  def _applyInitialOverrides(self, namespace):
808  """Apply obs-package-specific and camera-specific config
809  override files, if found
810 
811  Parameters
812  ----------
813  namespace : `argparse.Namespace`
814  Parsed namespace. These attributes are read:
815 
816  - ``obsPkg``
817 
818  Look in the package namespace.obsPkg for files:
819 
820  - ``config/<task_name>.py``
821  - ``config/<camera_name>/<task_name>.py`` and load if found.
822  """
823  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
824  fileName = self._name + ".py"
825  for filePath in (
826  os.path.join(obsPkgDir, "config", fileName),
827  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
828  ):
829  if os.path.exists(filePath):
830  namespace.log.info("Loading config overrride file %r", filePath)
831  namespace.config.load(filePath)
832  else:
833  namespace.log.debug("Config override file does not exist: %r", filePath)
834 
835  def handleCamera(self, namespace):
836  """Perform camera-specific operations before parsing the command-line.
837 
838  Parameters
839  ----------
840  namespace : `argparse.Namespace`
841  Namespace (an ) with the following fields:
842 
843  - ``camera``: the camera name.
844  - ``config``: the config passed to parse_args, with no overrides applied.
845  - ``obsPkg``: the ``obs_`` package for this camera.
846  - ``log``: a `lsst.log` Log.
847 
848  Notes
849  -----
850  The default implementation does nothing.
851  """
852  pass
853 
854  def convert_arg_line_to_args(self, arg_line):
855  """Allow files of arguments referenced by ``@<path>`` to contain
856  multiple values on each line.
857 
858  Parameters
859  ----------
860  arg_line : `str`
861  Line of text read from an argument file.
862  """
863  arg_line = arg_line.strip()
864  if not arg_line or arg_line.startswith("#"):
865  return
866  for arg in shlex.split(arg_line, comments=True, posix=True):
867  if not arg.strip():
868  continue
869  yield arg
870 
871  def addReuseOption(self, choices):
872  """Add a "--reuse-outputs-from SUBTASK" option to the argument
873  parser.
874 
875  CmdLineTasks that can be restarted at an intermediate step using
876  outputs from earlier (but still internal) steps should use this
877  method to allow the user to control whether that happens when
878  outputs from earlier steps are present.
879 
880  Parameters
881  ----------
882  choices : sequence
883  A sequence of string names (by convention, top-level subtasks)
884  that identify the steps that could be skipped when their
885  outputs are already present. The list is ordered, so when the
886  user specifies one step on the command line, all previous steps
887  may be skipped as well. In addition to the choices provided,
888  users may pass "all" to indicate that all steps may be thus
889  skipped.
890 
891  When this method is called, the ``namespace`` object returned by
892  ``parse_args`` will contain a ``reuse`` attribute containing
893  a list of all steps that should be skipped if their outputs
894  are already present.
895  If no steps should be skipped, the ``reuse`` will be an empty list.
896  """
897  choices = list(choices)
898  choices.append("all")
899  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
900  default=[], action=ReuseAction,
901  help=("Skip the given subtask and its predecessors and reuse their outputs "
902  "if those outputs already exist. Use 'all' to specify all subtasks."))
903 
904 
906  """`ArgumentParser` for command-line tasks that don't write any output.
907  """
908 
909  requireOutput = False # We're not going to write anything
910 
911 
912 def getTaskDict(config, taskDict=None, baseName=""):
913  """Get a dictionary of task info for all subtasks in a config
914 
915  Parameters
916  ----------
917  config : `lsst.pex.config.Config`
918  Configuration to process.
919  taskDict : `dict`, optional
920  Users should not specify this argument. Supports recursion.
921  If provided, taskDict is updated in place, else a new `dict`
922  is started.
923  baseName : `str`, optional
924  Users should not specify this argument. It is only used for
925  recursion: if a non-empty string then a period is appended
926  and the result is used as a prefix for additional entries
927  in taskDict; otherwise no prefix is used.
928 
929  Returns
930  -------
931  taskDict : `dict`
932  Keys are config field names, values are task names.
933 
934  Notes
935  -----
936  This function is designed to be called recursively.
937  The user should call with only a config (leaving taskDict and baseName
938  at their default values).
939  """
940  if taskDict is None:
941  taskDict = dict()
942  for fieldName, field in config.items():
943  if hasattr(field, "value") and hasattr(field, "target"):
944  subConfig = field.value
945  if isinstance(subConfig, pexConfig.Config):
946  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
947  try:
948  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
949  except Exception:
950  taskName = repr(field.target)
951  taskDict[subBaseName] = taskName
952  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
953  return taskDict
954 
955 
956 def obeyShowArgument(showOpts, config=None, exit=False):
957  """Process arguments specified with ``--show`` (but ignores
958  ``"data"``).
959 
960  Parameters
961  ----------
962  showOpts : `list` of `str`
963  List of options passed to ``--show``.
964  config : optional
965  The provided config.
966  exit : bool, optional
967  Exit if ``"run"`` isn't included in ``showOpts``.
968 
969  Parameters
970  ----------
971  Supports the following options in showOpts:
972 
973  - ``config[=PAT]``. Dump all the config entries, or just the ones that
974  match the glob pattern.
975  - ``history=PAT``. Show where the config entries that match the glob
976  pattern were set.
977  - ``tasks``. Show task hierarchy.
978  - ``data``. Ignored; to be processed by caller.
979  - ``run``. Keep going (the default behaviour is to exit if
980  ``--show`` is specified).
981 
982  Calls ``sys.exit(1)`` if any other option found.
983  """
984  if not showOpts:
985  return
986 
987  for what in showOpts:
988  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
989 
990  if showCommand == "config":
991  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
992  pattern = matConfig.group(1)
993  if pattern:
994  class FilteredStream:
995  """A file object that only prints lines
996  that match the glob "pattern".
997 
998  N.b. Newlines are silently discarded and reinserted;
999  crude but effective.
1000  """
1001 
1002  def __init__(self, pattern):
1003  # obey case if pattern isn't lowecase or requests NOIGNORECASE
1004  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1005 
1006  if mat:
1007  pattern = mat.group(1)
1008  self._pattern = re.compile(fnmatch.translate(pattern))
1009  else:
1010  if pattern != pattern.lower():
1011  print(u"Matching \"%s\" without regard to case "
1012  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
1013  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1014 
1015  def write(self, showStr):
1016  showStr = showStr.rstrip()
1017  # Strip off doc string line(s) and cut off
1018  # at "=" for string matching
1019  matchStr = showStr.split("\n")[-1].split("=")[0]
1020  if self._pattern.search(matchStr):
1021  print(u"\n" + showStr)
1022 
1023  fd = FilteredStream(pattern)
1024  else:
1025  fd = sys.stdout
1026 
1027  config.saveToStream(fd, "config")
1028  elif showCommand == "history":
1029  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1030  globPattern = matHistory.group(1)
1031  if not globPattern:
1032  print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1033  sys.exit(1)
1034 
1035  error = False
1036  for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1037  if i > 0:
1038  print("")
1039 
1040  pattern = pattern.split(".")
1041  cpath, cname = pattern[:-1], pattern[-1]
1042  hconfig = config # the config that we're interested in
1043  for i, cpt in enumerate(cpath):
1044  try:
1045  hconfig = getattr(hconfig, cpt)
1046  except AttributeError:
1047  print("Error: configuration %s has no subconfig %s" %
1048  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
1049  error = True
1050 
1051  try:
1052  print(pexConfig.history.format(hconfig, cname))
1053  except KeyError:
1054  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname),
1055  file=sys.stderr)
1056  error = True
1057 
1058  if error:
1059  sys.exit(1)
1060 
1061  elif showCommand == "data":
1062  pass
1063  elif showCommand == "run":
1064  pass
1065  elif showCommand == "tasks":
1066  showTaskHierarchy(config)
1067  else:
1068  print(u"Unknown value for show: %s (choose from '%s')" %
1069  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
1070  sys.exit(1)
1071 
1072  if exit and "run" not in showOpts:
1073  sys.exit(0)
1074 
1075 
1076 def showTaskHierarchy(config):
1077  """Print task hierarchy to stdout.
1078 
1079  Parameters
1080  ----------
1081  config : `lsst.pex.config.Config`
1082  Configuration to process.
1083  """
1084  print(u"Subtasks:")
1085  taskDict = getTaskDict(config=config)
1086 
1087  fieldNameList = sorted(taskDict.keys())
1088  for fieldName in fieldNameList:
1089  taskName = taskDict[fieldName]
1090  print(u"%s: %s" % (fieldName, taskName))
1091 
1092 
1093 class ConfigValueAction(argparse.Action):
1094  """argparse action callback to override config parameters using
1095  name=value pairs from the command-line.
1096  """
1097 
1098  def __call__(self, parser, namespace, values, option_string):
1099  """Override one or more config name value pairs.
1100 
1101  Parameters
1102  ----------
1103  parser : `argparse.ArgumentParser`
1104  Argument parser.
1105  namespace : `argparse.Namespace`
1106  Parsed command. The ``namespace.config`` attribute is updated.
1107  values : `list`
1108  A list of ``configItemName=value`` pairs.
1109  option_string : `str`
1110  Option value specified by the user.
1111  """
1112  if namespace.config is None:
1113  return
1114  for nameValue in values:
1115  name, sep, valueStr = nameValue.partition("=")
1116  if not valueStr:
1117  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1118 
1119  # see if setting the string value works; if not, try eval
1120  try:
1121  setDottedAttr(namespace.config, name, valueStr)
1122  except AttributeError:
1123  parser.error("no config field: %s" % (name,))
1124  except Exception:
1125  try:
1126  value = eval(valueStr, {})
1127  except Exception:
1128  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1129  try:
1130  setDottedAttr(namespace.config, name, value)
1131  except Exception as e:
1132  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1133 
1134 
1135 class ConfigFileAction(argparse.Action):
1136  """argparse action to load config overrides from one or more files.
1137  """
1138 
1139  def __call__(self, parser, namespace, values, option_string=None):
1140  """Load one or more files of config overrides.
1141 
1142  Parameters
1143  ----------
1144  parser : `argparse.ArgumentParser`
1145  Argument parser.
1146  namespace : `argparse.Namespace`
1147  Parsed command. The following attributes are updated by this
1148  method: ``namespace.config``.
1149  values : `list`
1150  A list of data config file paths.
1151  option_string : `str`, optional
1152  Option value specified by the user.
1153  """
1154  if namespace.config is None:
1155  return
1156  for configfile in values:
1157  try:
1158  namespace.config.load(configfile)
1159  except Exception as e:
1160  parser.error("cannot load config file %r: %s" % (configfile, e))
1161 
1162 
1163 class IdValueAction(argparse.Action):
1164  """argparse action callback to process a data ID into a dict.
1165  """
1166 
1167  def __call__(self, parser, namespace, values, option_string):
1168  """Parse ``--id`` data and append results to
1169  ``namespace.<argument>.idList``.
1170 
1171  Parameters
1172  ----------
1173  parser : `ArgumentParser`
1174  Argument parser.
1175  namespace : `argparse.Namespace`
1176  Parsed command (an instance of argparse.Namespace).
1177  The following attributes are updated:
1178 
1179  - ``<idName>.idList``, where ``<idName>`` is the name of the
1180  ID argument, for instance ``"id"`` for ID argument ``--id``.
1181  values : `list`
1182  A list of data IDs; see Notes below.
1183  option_string : `str`
1184  Option value specified by the user.
1185 
1186  Notes
1187  -----
1188  The data format is::
1189 
1190  key1=value1_1[^value1_2[^value1_3...]
1191  key2=value2_1[^value2_2[^value2_3...]...
1192 
1193  The values (e.g. ``value1_1``) may either be a string,
1194  or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1195  interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1196  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1197  You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1198 
1199  The cross product is computed for keys with multiple values.
1200  For example::
1201 
1202  --id visit 1^2 ccd 1,1^2,2
1203 
1204  results in the following data ID dicts being appended to
1205  ``namespace.<argument>.idList``:
1206 
1207  {"visit":1, "ccd":"1,1"}
1208  {"visit":2, "ccd":"1,1"}
1209  {"visit":1, "ccd":"2,2"}
1210  {"visit":2, "ccd":"2,2"}
1211  """
1212  if namespace.config is None:
1213  return
1214  idDict = collections.OrderedDict()
1215  for nameValue in values:
1216  name, sep, valueStr = nameValue.partition("=")
1217  if name in idDict:
1218  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1219  idDict[name] = []
1220  for v in valueStr.split("^"):
1221  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1222  if mat:
1223  v1 = int(mat.group(1))
1224  v2 = int(mat.group(2))
1225  v3 = mat.group(3)
1226  v3 = int(v3) if v3 else 1
1227  for v in range(v1, v2 + 1, v3):
1228  idDict[name].append(str(v))
1229  else:
1230  idDict[name].append(v)
1231 
1232  iterList = [idDict[key] for key in idDict.keys()]
1233  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1234  for valList in itertools.product(*iterList)]
1235 
1236  argName = option_string.lstrip("-")
1237  ident = getattr(namespace, argName)
1238  ident.idList += idDictList
1239 
1240 
1241 class LogLevelAction(argparse.Action):
1242  """argparse action to set log level.
1243  """
1244 
1245  def __call__(self, parser, namespace, values, option_string):
1246  """Set trace level.
1247 
1248  Parameters
1249  ----------
1250  parser : `ArgumentParser`
1251  Argument parser.
1252  namespace : `argparse.Namespace`
1253  Parsed command. This argument is not used.
1254  values : `list`
1255  List of trace levels; each item must be of the form
1256  ``component_name=level`` or ``level``, where ``level``
1257  is a keyword (not case sensitive) or an integer.
1258  option_string : `str`
1259  Option value specified by the user.
1260  """
1261  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1262  permittedLevelSet = set(permittedLevelList)
1263  for componentLevel in values:
1264  component, sep, levelStr = componentLevel.partition("=")
1265  if not levelStr:
1266  levelStr, component = component, None
1267  logLevelUpr = levelStr.upper()
1268  if logLevelUpr in permittedLevelSet:
1269  logLevel = getattr(lsstLog.Log, logLevelUpr)
1270  else:
1271  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1272  if component is None:
1273  namespace.log.setLevel(logLevel)
1274  else:
1275  lsstLog.Log.getLogger(component).setLevel(logLevel)
1276 
1277 
1278 class ReuseAction(argparse.Action):
1279  """argparse action associated with ArgumentPraser.addReuseOption."""
1280 
1281  def __call__(self, parser, namespace, value, option_string):
1282  if value == "all":
1283  value = self.choices[-2]
1284  index = self.choices.index(value)
1285  namespace.reuse = self.choices[:index + 1]
1286 
1287 
1288 def setDottedAttr(item, name, value):
1289  """Set an instance attribute (like `setattr` but accepting
1290  hierarchical names such as ``foo.bar.baz``).
1291 
1292  Parameters
1293  ----------
1294  item : obj
1295  Object whose attribute is to be set.
1296  name : `str`
1297  Name of attribute to set.
1298  value : obj
1299  New value for the attribute.
1300 
1301  Notes
1302  -----
1303  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1304  is set to the specified value.
1305  """
1306  subitem = item
1307  subnameList = name.split(".")
1308  for subname in subnameList[:-1]:
1309  subitem = getattr(subitem, subname)
1310  setattr(subitem, subnameList[-1], value)
1311 
1312 
1313 def getDottedAttr(item, name):
1314  """Get an attribute (like `getattr` but accepts hierarchical names
1315  such as ``foo.bar.baz``).
1316 
1317  Parameters
1318  ----------
1319  item : obj
1320  Object whose attribute is to be returned.
1321  name : `str`
1322  Name of the attribute to get.
1323 
1324  Returns
1325  -------
1326  itemAttr : obj
1327  If name is ``foo.bar.baz then the return value is
1328  ``item.foo.bar.baz``.
1329  """
1330  subitem = item
1331  for subname in name.split("."):
1332  subitem = getattr(subitem, subname)
1333  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)