lsst.pipe.base  16.0-6-g44ca919+2
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """Apply environment variable as default root, if present, and abspath.
57 
58  Parameters
59  ----------
60  defName : `str`
61  Name of environment variable containing default root path;
62  if the environment variable does not exist
63  then the path is relative to the current working directory
64  path : `str`
65  Path relative to default root path.
66 
67  Returns
68  -------
69  abspath : `str`
70  Path that has been expanded, or `None` if the environment variable
71  does not exist and path is `None`.
72  """
73  defRoot = os.environ.get(defName)
74  if defRoot is None:
75  if path is None:
76  return None
77  return os.path.abspath(path)
78  return os.path.abspath(os.path.join(defRoot, path or ""))
79 
80 
81 class DataIdContainer(object):
82  """Container for data IDs and associated data references.
83 
84  Parameters
85  ----------
86  level : `str`
87  The lowest hierarchy level to descend to for this dataset type,
88  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
89  Use `""` to use the mapper's default for the dataset type.
90  This class does not support `None`, but if it did, `None`
91  would mean the level should not be restricted.
92 
93  Notes
94  -----
95  Override this class for data IDs that require special handling to be
96  converted to ``data references``, and specify the override class
97  as ``ContainerClass`` for ``add_id_argument``.
98 
99  If you don't want the argument parser to compute data references,
100  specify ``doMakeDataRefList=False`` in ``add_id_argument``.
101  """
102 
103  def __init__(self, level=None):
104  self.datasetType = None
105  """Dataset type of the data references (`str`).
106  """
107  self.level = level
108  """See parameter ``level`` (`str`).
109  """
110  self.idList = []
111  """List of data IDs specified on the command line for the
112  appropriate data ID argument (`list` of `dict`).
113  """
114  self.refList = []
115  """List of data references for the data IDs in ``idList``
116  (`list` of `lsst.daf.persistence.ButlerDataRef`).
117  Elements will be omitted if the corresponding data is not found.
118  The list will be empty when returned by ``parse_args`` if
119  ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
120  """
121 
122  def setDatasetType(self, datasetType):
123  """Set actual dataset type, once it is known.
124 
125  Parameters
126  ----------
127  datasetType : `str`
128  Dataset type.
129 
130  Notes
131  -----
132  The reason ``datasetType`` is not a constructor argument is that
133  some subclasses do not know the dataset type until the command
134  is parsed. Thus, to reduce special cases in the code,
135  ``datasetType`` is always set after the command is parsed.
136  """
137  self.datasetType = datasetType
138 
139  def castDataIds(self, butler):
140  """Validate data IDs and cast them to the correct type
141  (modify idList in place).
142 
143  This code casts the values in the data IDs dicts in `dataIdList`
144  to the type required by the butler. Data IDs are read from the
145  command line as `str`, but the butler requires some values to be
146  other types. For example "visit" values should be `int`.
147 
148  Parameters
149  ----------
150  butler : `lsst.daf.persistence.Butler`
151  Data butler.
152  """
153  if self.datasetType is None:
154  raise RuntimeError("Must call setDatasetType first")
155  try:
156  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
157  except KeyError as e:
158  msg = "Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level)
159  raise KeyError(msg) from e
160 
161  for dataDict in self.idList:
162  for key, strVal in dataDict.items():
163  try:
164  keyType = idKeyTypeDict[key]
165  except KeyError:
166  # OK, assume that it's a valid key and guess that it's a string
167  keyType = str
168 
169  log = lsstLog.Log.getDefaultLogger()
170  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
171  (key, 'str' if keyType == str else keyType))
172  idKeyTypeDict[key] = keyType
173 
174  if keyType != str:
175  try:
176  castVal = keyType(strVal)
177  except Exception:
178  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
179  dataDict[key] = castVal
180 
181  def makeDataRefList(self, namespace):
182  """Compute refList based on idList.
183 
184  Parameters
185  ----------
186  namespace : `argparse.Namespace`
187  Results of parsing command-line. The ``butler`` and ``log``
188  elements must be set.
189 
190  Notes
191  -----
192  Not called if ``add_id_argument`` was called with
193  ``doMakeDataRefList=False``.
194  """
195  if self.datasetType is None:
196  raise RuntimeError("Must call setDatasetType first")
197  butler = namespace.butler
198  for dataId in self.idList:
199  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
200  level=self.level, dataId=dataId)
201  if not refList:
202  namespace.log.warn("No data found for dataId=%s", dataId)
203  continue
204  self.refList += refList
205 
206 
207 class DataIdArgument(object):
208  """data ID argument, used by `ArgumentParser.add_id_argument`.
209 
210  Parameters
211  ----------
212  name : `str`
213  Name of identifier (argument name without dashes).
214  datasetType : `str`
215  Type of dataset; specify a string for a fixed dataset type
216  or a `DatasetArgument` for a dynamic dataset type (e.g.
217  one specified by a command-line argument).
218  level : `str`
219  The lowest hierarchy level to descend to for this dataset type,
220  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
221  Use `""` to use the mapper's default for the dataset type.
222  Some container classes may also support `None`, which means
223  the level should not be restricted; however the default class,
224  `DataIdContainer`, does not support `None`.
225  doMakeDataRefList : `bool`, optional
226  If `True` (default), construct data references.
227  ContainerClass : `class`, optional
228  Class to contain data IDs and data references; the default class
229  `DataIdContainer` will work for many, but not all, cases.
230  For example if the dataset type is specified on the command line
231  then use `DynamicDatasetType`.
232  """
233 
234  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
235  if name.startswith("-"):
236  raise RuntimeError("Name %s must not start with -" % (name,))
237  self.name = name
238  self.datasetType = datasetType
239  self.level = level
240  self.doMakeDataRefList = bool(doMakeDataRefList)
241  self.ContainerClass = ContainerClass
242  self.argName = name.lstrip("-")
243 
244  @property
246  """`True` if the dataset type is dynamic (that is, specified
247  on the command line).
248  """
249  return isinstance(self.datasetType, DynamicDatasetType)
250 
251  def getDatasetType(self, namespace):
252  """Get the dataset type as a string.
253 
254  Parameters
255  ----------
256  namespace
257  Parsed command.
258 
259  Returns
260  -------
261  datasetType : `str`
262  Dataset type.
263  """
264  if self.isDynamicDatasetType:
265  return self.datasetType.getDatasetType(namespace)
266  else:
267  return self.datasetType
268 
269 
270 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
271  """Abstract base class for a dataset type determined from parsed
272  command-line arguments.
273  """
274 
275  def addArgument(self, parser, idName):
276  """Add a command-line argument to specify dataset type name,
277  if wanted.
278 
279  Parameters
280  ----------
281  parser : `ArgumentParser`
282  Argument parser to add the argument to.
283  idName : `str`
284  Name of data ID argument, without the leading ``"--"``,
285  e.g. ``"id"``.
286 
287  Notes
288  -----
289  The default implementation does nothing
290  """
291  pass
292 
293  @abc.abstractmethod
294  def getDatasetType(self, namespace):
295  """Get the dataset type as a string, based on parsed command-line
296  arguments.
297 
298  Returns
299  -------
300  datasetType : `str`
301  Dataset type.
302  """
303  raise NotImplementedError("Subclasses must override")
304 
305 
307  """Dataset type specified by a command-line argument.
308 
309  Parameters
310  ----------
311  name : `str`, optional
312  Name of command-line argument (including leading "--",
313  if appropriate) whose value is the dataset type.
314  If `None`, uses ``--idName_dstype`` where idName
315  is the name of the data ID argument (e.g. "id").
316  help : `str`, optional
317  Help string for the command-line argument.
318  default : `object`, optional
319  Default value. If `None`, then the command-line option is required.
320  This argument isignored if the command-line argument is positional
321  (name does not start with "-") because positional arguments do
322  not support default values.
323  """
324 
325  def __init__(self,
326  name=None,
327  help="dataset type to process from input data repository",
328  default=None,
329  ):
330  DynamicDatasetType.__init__(self)
331  self.name = name
332  self.help = help
333  self.default = default
334 
335  def getDatasetType(self, namespace):
336  """Get the dataset type as a string, from the appropriate
337  command-line argument.
338 
339  Parameters
340  ----------
341  namespace :
342  Parsed command.
343 
344  Returns
345  -------
346  datasetType : `str`
347  Dataset type.
348  """
349  argName = self.name.lstrip("-")
350  return getattr(namespace, argName)
351 
352  def addArgument(self, parser, idName):
353  """Add a command-line argument to specify the dataset type name.
354 
355  Parameters
356  ----------
357  parser : `ArgumentParser`
358  Argument parser.
359  idName : `str`
360  Data ID.
361 
362  Notes
363  -----
364  Also sets the `name` attribute if it is currently `None`.
365  """
366  help = self.help if self.help else "dataset type for %s" % (idName,)
367  if self.name is None:
368  self.name = "--%s_dstype" % (idName,)
369  requiredDict = dict()
370  if self.name.startswith("-"):
371  requiredDict = dict(required=self.default is None)
372  parser.add_argument(
373  self.name,
374  default=self.default,
375  help=help,
376  **requiredDict)
377 
378 
380  """Dataset type specified by a config parameter.
381 
382  Parameters
383  ----------
384  name : `str`
385  Name of config option whose value is the dataset type.
386  """
387 
388  def __init__(self, name):
389  DynamicDatasetType.__init__(self)
390  self.name = name
391 
392  def getDatasetType(self, namespace):
393  """Return the dataset type as a string, from the appropriate
394  config field.
395 
396  Parameters
397  ----------
398  namespace : `argparse.Namespace`
399  Parsed command.
400  """
401  # getattr does not work reliably if the config field name is
402  # dotted, so step through one level at a time
403  keyList = self.name.split(".")
404  value = namespace.config
405  for key in keyList:
406  try:
407  value = getattr(value, key)
408  except KeyError:
409  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
410  return value
411 
412 
413 class ArgumentParser(argparse.ArgumentParser):
414  """Argument parser for command-line tasks that is based on
415  `argparse.ArgumentParser`.
416 
417  Parameters
418  ----------
419  name : `str`
420  Name of top-level task; used to identify camera-specific override
421  files.
422  usage : `str`, optional
423  Command-line usage signature.
424  **kwargs
425  Additional keyword arguments for `argparse.ArgumentParser`.
426 
427  Notes
428  -----
429  Users may wish to add additional arguments before calling `parse_args`.
430  """
431  # I would prefer to check data ID keys and values as they are parsed,
432  # but the required information comes from the butler, so I have to
433  # construct a butler before I do this checking. Constructing a butler
434  # is slow, so I only want do it once, after parsing the command line,
435  # so as to catch syntax errors quickly.
436 
437  requireOutput = True
438  """Require an output directory to be specified (`bool`)."""
439 
440  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
441  self._name = name
442  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
443  argparse.ArgumentParser.__init__(self,
444  usage=usage,
445  fromfile_prefix_chars='@',
446  epilog=textwrap.dedent("""Notes:
447  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
448  all values are used, in order left to right
449  * @file reads command-line options from the specified file:
450  * data may be distributed among multiple lines (e.g. one option per line)
451  * data after # is treated as a comment and ignored
452  * blank lines and lines starting with # are ignored
453  * To specify multiple values for an option, do not use = after the option name:
454  * right: --configfile foo bar
455  * wrong: --configfile=foo bar
456  """),
457  formatter_class=argparse.RawDescriptionHelpFormatter,
458  **kwargs)
459  self.add_argument(metavar='input', dest="rawInput",
460  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
461  self.add_argument("--calib", dest="rawCalib",
462  help="path to input calibration repository, relative to $%s" %
463  (DEFAULT_CALIB_NAME,))
464  self.add_argument("--output", dest="rawOutput",
465  help="path to output data repository (need not exist), relative to $%s" %
466  (DEFAULT_OUTPUT_NAME,))
467  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
468  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
469  "optionally sets ROOT to ROOT/rerun/INPUT")
470  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
471  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
472  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
473  help="config override file(s)")
474  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
475  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
476  metavar="LEVEL|COMPONENT=LEVEL")
477  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
478  self.add_argument("--debug", action="store_true", help="enable debugging output?")
479  self.add_argument("--doraise", action="store_true",
480  help="raise an exception on error (else log a message and continue)?")
481  self.add_argument("--noExit", action="store_true",
482  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
483  self.add_argument("--profile", help="Dump cProfile statistics to filename")
484  self.add_argument("--show", nargs="+", default=(),
485  help="display the specified information to stdout and quit "
486  "(unless run is specified).")
487  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
488  self.add_argument("-t", "--timeout", type=float,
489  help="Timeout for multiprocessing; maximum wall time (sec)")
490  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
491  help=("remove and re-create the output directory if it already exists "
492  "(safe with -j, but not all other forms of parallel execution)"))
493  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
494  help=("backup and then overwrite existing config files instead of checking them "
495  "(safe with -j, but not all other forms of parallel execution)"))
496  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
497  help="Don't copy config to file~N backup.")
498  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
499  help=("backup and then overwrite existing package versions instead of checking"
500  "them (safe with -j, but not all other forms of parallel execution)"))
501  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
502  help="don't check package versions; useful for development")
503  lsstLog.configure_prop("""
504 log4j.rootLogger=INFO, A1
505 log4j.appender.A1=ConsoleAppender
506 log4j.appender.A1.Target=System.out
507 log4j.appender.A1.layout=PatternLayout
508 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
509 """)
510 
511  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
512  ContainerClass=DataIdContainer):
513  """Add a data ID argument.
514 
515 
516  Parameters
517  ----------
518  name : `str`
519  Data ID argument (including leading dashes, if wanted).
520  datasetType : `str` or `DynamicDatasetType`-type
521  Type of dataset. Supply a string for a fixed dataset type.
522  For a dynamically determined dataset type, supply
523  a `DynamicDatasetType`, such a `DatasetArgument`.
524  help : `str`
525  Help string for the argument.
526  level : `str`
527  The lowest hierarchy level to descend to for this dataset type,
528  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
529  Use `""` to use the mapper's default for the dataset type.
530  Some container classes may also support `None`, which means
531  the level should not be restricted; however the default class,
532  `DataIdContainer`, does not support `None`.
533  doMakeDataRefList : bool, optional
534  If `True` (default), construct data references.
535  ContainerClass : `class`, optional
536  Class to contain data IDs and data references; the default class
537  `DataIdContainer` will work for many, but not all, cases.
538  For example if the dataset type is specified on the command line
539  then use `DynamicDatasetType`.
540 
541  Notes
542  -----
543  If ``datasetType`` is an instance of `DatasetArgument`,
544  then add a second argument to specify the dataset type.
545 
546  The associated data is put into ``namespace.<dataIdArgument.name>``
547  as an instance of `ContainerClass`; the container includes fields:
548 
549  - ``idList``: a list of data ID dicts.
550  - ``refList``: a list of `~lsst.daf.persistence.Butler`
551  data references (empty if ``doMakeDataRefList`` is `False`).
552  """
553  argName = name.lstrip("-")
554 
555  if argName in self._dataIdArgDict:
556  raise RuntimeError("Data ID argument %s already exists" % (name,))
557  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
558  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
559 
560  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
561  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
562 
563  dataIdArgument = DataIdArgument(
564  name=argName,
565  datasetType=datasetType,
566  level=level,
567  doMakeDataRefList=doMakeDataRefList,
568  ContainerClass=ContainerClass,
569  )
570 
571  if dataIdArgument.isDynamicDatasetType:
572  datasetType.addArgument(parser=self, idName=argName)
573 
574  self._dataIdArgDict[argName] = dataIdArgument
575 
576  def parse_args(self, config, args=None, log=None, override=None):
577  """Parse arguments for a command-line task.
578 
579  Parameters
580  ----------
581  config : `lsst.pex.config.Config`
582  Config for the task being run.
583  args : `list`, optional
584  Argument list; if `None` then ``sys.argv[1:]`` is used.
585  log : `lsst.log.Log`, optional
586  `~lsst.log.Log` instance; if `None` use the default log.
587  override : callable, optional
588  A config override function. It must take the root config object
589  as its only argument and must modify the config in place.
590  This function is called after camera-specific overrides files
591  are applied, and before command-line config overrides
592  are applied (thus allowing the user the final word).
593 
594  Returns
595  -------
596  namespace : `argparse.Namespace`
597  A `~argparse.Namespace` instance containing fields:
598 
599  - ``camera``: camera name.
600  - ``config``: the supplied config with all overrides applied,
601  validated and frozen.
602  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
603  - An entry for each of the data ID arguments registered by
604  `add_id_argument`, the value of which is an
605  `~lsst.pipe.base.DataIdArgument` that includes public
606  elements.
607  ``idList`` and ``refList``.
608  - ``log``: a `lsst.log` Log.
609  - An entry for each command-line argument,
610  with the following exceptions:
611 
612  - config is the supplied config, suitably updated.
613  - configfile, id and loglevel are all missing.
614  - ``obsPkg``: name of the ``obs_`` package for this camera.
615  """
616  if args is None:
617  args = sys.argv[1:]
618 
619  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
620  self.print_help()
621  if len(args) == 1 and args[0] in ("-h", "--help"):
622  self.exit()
623  else:
624  self.exit("%s: error: Must specify input as first argument" % self.prog)
625 
626  # Note that --rerun may change namespace.input, but if it does
627  # we verify that the new input has the same mapper class.
628  namespace = argparse.Namespace()
629  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
630  if not os.path.isdir(namespace.input):
631  self.error("Error: input=%r not found" % (namespace.input,))
632 
633  namespace.config = config
634  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
635  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
636  namespace.camera = mapperClass.getCameraName()
637  namespace.obsPkg = mapperClass.getPackageName()
638 
639  self.handleCamera(namespace)
640 
641  self._applyInitialOverrides(namespace)
642  if override is not None:
643  override(namespace.config)
644 
645  # Add data ID containers to namespace
646  for dataIdArgument in self._dataIdArgDict.values():
647  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
648 
649  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
650  del namespace.configfile
651 
652  self._parseDirectories(namespace)
653 
654  if namespace.clobberOutput:
655  if namespace.output is None:
656  self.error("--clobber-output is only valid with --output or --rerun")
657  elif namespace.output == namespace.input:
658  self.error("--clobber-output is not valid when the output and input repos are the same")
659  if os.path.exists(namespace.output):
660  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
661  shutil.rmtree(namespace.output)
662 
663  namespace.log.debug("input=%s", namespace.input)
664  namespace.log.debug("calib=%s", namespace.calib)
665  namespace.log.debug("output=%s", namespace.output)
666 
667  obeyShowArgument(namespace.show, namespace.config, exit=False)
668 
669  # No environment variable or --output or --rerun specified.
670  if self.requireOutput and namespace.output is None and namespace.rerun is None:
671  self.error("no output directory specified.\n"
672  "An output directory must be specified with the --output or --rerun\n"
673  "command-line arguments.\n")
674 
675  butlerArgs = {} # common arguments for butler elements
676  if namespace.calib:
677  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
678  if namespace.output:
679  outputs = {'root': namespace.output, 'mode': 'rw'}
680  inputs = {'root': namespace.input}
681  inputs.update(butlerArgs)
682  outputs.update(butlerArgs)
683  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
684  else:
685  outputs = {'root': namespace.input, 'mode': 'rw'}
686  outputs.update(butlerArgs)
687  namespace.butler = dafPersist.Butler(outputs=outputs)
688 
689  # convert data in each of the identifier lists to proper types
690  # this is done after constructing the butler,
691  # hence after parsing the command line,
692  # because it takes a long time to construct a butler
693  self._processDataIds(namespace)
694  if "data" in namespace.show:
695  for dataIdName in self._dataIdArgDict.keys():
696  for dataRef in getattr(namespace, dataIdName).refList:
697  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
698 
699  if namespace.show and "run" not in namespace.show:
700  sys.exit(0)
701 
702  if namespace.debug:
703  try:
704  import debug
705  assert debug # silence pyflakes
706  except ImportError:
707  sys.stderr.write("Warning: no 'debug' module found\n")
708  namespace.debug = False
709 
710  del namespace.loglevel
711 
712  if namespace.longlog:
713  lsstLog.configure_prop("""
714 log4j.rootLogger=INFO, A1
715 log4j.appender.A1=ConsoleAppender
716 log4j.appender.A1.Target=System.out
717 log4j.appender.A1.layout=PatternLayout
718 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
719 """)
720  del namespace.longlog
721 
722  namespace.config.validate()
723  namespace.config.freeze()
724 
725  return namespace
726 
727  def _parseDirectories(self, namespace):
728  """Parse input, output and calib directories
729 
730  This allows for hacking the directories, e.g., to include a
731  "rerun".
732  Modifications are made to the 'namespace' object in-place.
733  """
734  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
735  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
736 
737  # If an output directory is specified, process it and assign it to the namespace
738  if namespace.rawOutput:
739  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
740  else:
741  namespace.output = None
742 
743  # This section processes the rerun argument.
744  # If rerun is specified as a colon separated value,
745  # it will be parsed as an input and output.
746  # The input value will be overridden if previously specified
747  # (but a check is made to make sure both inputs use
748  # the same mapper)
749  if namespace.rawRerun:
750  if namespace.output:
751  self.error("Error: cannot specify both --output and --rerun")
752  namespace.rerun = namespace.rawRerun.split(":")
753  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
754  modifiedInput = False
755  if len(rerunDir) == 2:
756  namespace.input, namespace.output = rerunDir
757  modifiedInput = True
758  elif len(rerunDir) == 1:
759  namespace.output = rerunDir[0]
760  if os.path.exists(os.path.join(namespace.output, "_parent")):
761  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
762  modifiedInput = True
763  else:
764  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
765  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
766  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
767  else:
768  namespace.rerun = None
769  del namespace.rawInput
770  del namespace.rawCalib
771  del namespace.rawOutput
772  del namespace.rawRerun
773 
774  def _processDataIds(self, namespace):
775  """Process the parsed data for each data ID argument in an
776  `~argparse.Namespace`.
777 
778  Processing includes:
779 
780  - Validate data ID keys.
781  - Cast the data ID values to the correct type.
782  - Compute data references from data IDs.
783 
784  Parameters
785  ----------
786  namespace : `argparse.Namespace`
787  Parsed namespace. These attributes are read:
788 
789  - ``butler``
790  - ``log``
791  - ``config``, if any dynamic dataset types are set by
792  a config parameter.
793  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
794  dataset types are specified by such
795 
796  These attributes are modified:
797 
798  - ``<name>`` for each data ID argument registered using
799  `add_id_argument` with name ``<name>``.
800  """
801  for dataIdArgument in self._dataIdArgDict.values():
802  dataIdContainer = getattr(namespace, dataIdArgument.name)
803  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
804  if dataIdArgument.doMakeDataRefList:
805  try:
806  dataIdContainer.castDataIds(butler=namespace.butler)
807  except (KeyError, TypeError) as e:
808  # failure of castDataIds indicates invalid command args
809  self.error(e)
810 
811  # failure of makeDataRefList indicates a bug
812  # that wants a traceback
813  dataIdContainer.makeDataRefList(namespace)
814 
815  def _applyInitialOverrides(self, namespace):
816  """Apply obs-package-specific and camera-specific config
817  override files, if found
818 
819  Parameters
820  ----------
821  namespace : `argparse.Namespace`
822  Parsed namespace. These attributes are read:
823 
824  - ``obsPkg``
825 
826  Look in the package namespace.obsPkg for files:
827 
828  - ``config/<task_name>.py``
829  - ``config/<camera_name>/<task_name>.py`` and load if found.
830  """
831  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
832  fileName = self._name + ".py"
833  for filePath in (
834  os.path.join(obsPkgDir, "config", fileName),
835  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
836  ):
837  if os.path.exists(filePath):
838  namespace.log.info("Loading config overrride file %r", filePath)
839  namespace.config.load(filePath)
840  else:
841  namespace.log.debug("Config override file does not exist: %r", filePath)
842 
843  def handleCamera(self, namespace):
844  """Perform camera-specific operations before parsing the command-line.
845 
846  Parameters
847  ----------
848  namespace : `argparse.Namespace`
849  Namespace (an ) with the following fields:
850 
851  - ``camera``: the camera name.
852  - ``config``: the config passed to parse_args, with no overrides applied.
853  - ``obsPkg``: the ``obs_`` package for this camera.
854  - ``log``: a `lsst.log` Log.
855 
856  Notes
857  -----
858  The default implementation does nothing.
859  """
860  pass
861 
862  def convert_arg_line_to_args(self, arg_line):
863  """Allow files of arguments referenced by ``@<path>`` to contain
864  multiple values on each line.
865 
866  Parameters
867  ----------
868  arg_line : `str`
869  Line of text read from an argument file.
870  """
871  arg_line = arg_line.strip()
872  if not arg_line or arg_line.startswith("#"):
873  return
874  for arg in shlex.split(arg_line, comments=True, posix=True):
875  if not arg.strip():
876  continue
877  yield arg
878 
879  def addReuseOption(self, choices):
880  """Add a "--reuse-outputs-from SUBTASK" option to the argument
881  parser.
882 
883  CmdLineTasks that can be restarted at an intermediate step using
884  outputs from earlier (but still internal) steps should use this
885  method to allow the user to control whether that happens when
886  outputs from earlier steps are present.
887 
888  Parameters
889  ----------
890  choices : sequence
891  A sequence of string names (by convention, top-level subtasks)
892  that identify the steps that could be skipped when their
893  outputs are already present. The list is ordered, so when the
894  user specifies one step on the command line, all previous steps
895  may be skipped as well. In addition to the choices provided,
896  users may pass "all" to indicate that all steps may be thus
897  skipped.
898 
899  When this method is called, the ``namespace`` object returned by
900  ``parse_args`` will contain a ``reuse`` attribute containing
901  a list of all steps that should be skipped if their outputs
902  are already present.
903  If no steps should be skipped, the ``reuse`` will be an empty list.
904  """
905  choices = list(choices)
906  choices.append("all")
907  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
908  default=[], action=ReuseAction,
909  help=("Skip the given subtask and its predecessors and reuse their outputs "
910  "if those outputs already exist. Use 'all' to specify all subtasks."))
911 
912 
914  """`ArgumentParser` for command-line tasks that don't write any output.
915  """
916 
917  requireOutput = False # We're not going to write anything
918 
919 
920 def getTaskDict(config, taskDict=None, baseName=""):
921  """Get a dictionary of task info for all subtasks in a config
922 
923  Parameters
924  ----------
925  config : `lsst.pex.config.Config`
926  Configuration to process.
927  taskDict : `dict`, optional
928  Users should not specify this argument. Supports recursion.
929  If provided, taskDict is updated in place, else a new `dict`
930  is started.
931  baseName : `str`, optional
932  Users should not specify this argument. It is only used for
933  recursion: if a non-empty string then a period is appended
934  and the result is used as a prefix for additional entries
935  in taskDict; otherwise no prefix is used.
936 
937  Returns
938  -------
939  taskDict : `dict`
940  Keys are config field names, values are task names.
941 
942  Notes
943  -----
944  This function is designed to be called recursively.
945  The user should call with only a config (leaving taskDict and baseName
946  at their default values).
947  """
948  if taskDict is None:
949  taskDict = dict()
950  for fieldName, field in config.items():
951  if hasattr(field, "value") and hasattr(field, "target"):
952  subConfig = field.value
953  if isinstance(subConfig, pexConfig.Config):
954  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
955  try:
956  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
957  except Exception:
958  taskName = repr(field.target)
959  taskDict[subBaseName] = taskName
960  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
961  return taskDict
962 
963 
964 def obeyShowArgument(showOpts, config=None, exit=False):
965  """Process arguments specified with ``--show`` (but ignores
966  ``"data"``).
967 
968  Parameters
969  ----------
970  showOpts : `list` of `str`
971  List of options passed to ``--show``.
972  config : optional
973  The provided config.
974  exit : bool, optional
975  Exit if ``"run"`` isn't included in ``showOpts``.
976 
977  Parameters
978  ----------
979  Supports the following options in showOpts:
980 
981  - ``config[=PAT]``. Dump all the config entries, or just the ones that
982  match the glob pattern.
983  - ``history=PAT``. Show where the config entries that match the glob
984  pattern were set.
985  - ``tasks``. Show task hierarchy.
986  - ``data``. Ignored; to be processed by caller.
987  - ``run``. Keep going (the default behaviour is to exit if
988  ``--show`` is specified).
989 
990  Calls ``sys.exit(1)`` if any other option found.
991  """
992  if not showOpts:
993  return
994 
995  for what in showOpts:
996  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
997 
998  if showCommand == "config":
999  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1000  pattern = matConfig.group(1)
1001  if pattern:
1002  class FilteredStream(object):
1003  """A file object that only prints lines
1004  that match the glob "pattern".
1005 
1006  N.b. Newlines are silently discarded and reinserted;
1007  crude but effective.
1008  """
1009 
1010  def __init__(self, pattern):
1011  # obey case if pattern isn't lowecase or requests NOIGNORECASE
1012  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1013 
1014  if mat:
1015  pattern = mat.group(1)
1016  self._pattern = re.compile(fnmatch.translate(pattern))
1017  else:
1018  if pattern != pattern.lower():
1019  print(u"Matching \"%s\" without regard to case "
1020  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
1021  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1022 
1023  def write(self, showStr):
1024  showStr = showStr.rstrip()
1025  # Strip off doc string line(s) and cut off
1026  # at "=" for string matching
1027  matchStr = showStr.split("\n")[-1].split("=")[0]
1028  if self._pattern.search(matchStr):
1029  print(u"\n" + showStr)
1030 
1031  fd = FilteredStream(pattern)
1032  else:
1033  fd = sys.stdout
1034 
1035  config.saveToStream(fd, "config")
1036  elif showCommand == "history":
1037  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1038  pattern = matHistory.group(1)
1039  if not pattern:
1040  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
1041  sys.exit(1)
1042 
1043  pattern = pattern.split(".")
1044  cpath, cname = pattern[:-1], pattern[-1]
1045  hconfig = config # the config that we're interested in
1046  for i, cpt in enumerate(cpath):
1047  try:
1048  hconfig = getattr(hconfig, cpt)
1049  except AttributeError:
1050  print("Error: configuration %s has no subconfig %s" %
1051  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
1052 
1053  sys.exit(1)
1054 
1055  try:
1056  print(pexConfig.history.format(hconfig, cname))
1057  except KeyError:
1058  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
1059  sys.exit(1)
1060 
1061  elif showCommand == "data":
1062  pass
1063  elif showCommand == "run":
1064  pass
1065  elif showCommand == "tasks":
1066  showTaskHierarchy(config)
1067  else:
1068  print(u"Unknown value for show: %s (choose from '%s')" %
1069  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
1070  sys.exit(1)
1071 
1072  if exit and "run" not in showOpts:
1073  sys.exit(0)
1074 
1075 
1076 def showTaskHierarchy(config):
1077  """Print task hierarchy to stdout.
1078 
1079  Parameters
1080  ----------
1081  config : `lsst.pex.config.Config`
1082  Configuration to process.
1083  """
1084  print(u"Subtasks:")
1085  taskDict = getTaskDict(config=config)
1086 
1087  fieldNameList = sorted(taskDict.keys())
1088  for fieldName in fieldNameList:
1089  taskName = taskDict[fieldName]
1090  print(u"%s: %s" % (fieldName, taskName))
1091 
1092 
1093 class ConfigValueAction(argparse.Action):
1094  """argparse action callback to override config parameters using
1095  name=value pairs from the command-line.
1096  """
1097 
1098  def __call__(self, parser, namespace, values, option_string):
1099  """Override one or more config name value pairs.
1100 
1101  Parameters
1102  ----------
1103  parser : `argparse.ArgumentParser`
1104  Argument parser.
1105  namespace : `argparse.Namespace`
1106  Parsed command. The ``namespace.config`` attribute is updated.
1107  values : `list`
1108  A list of ``configItemName=value`` pairs.
1109  option_string : `str`
1110  Option value specified by the user.
1111  """
1112  if namespace.config is None:
1113  return
1114  for nameValue in values:
1115  name, sep, valueStr = nameValue.partition("=")
1116  if not valueStr:
1117  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1118 
1119  # see if setting the string value works; if not, try eval
1120  try:
1121  setDottedAttr(namespace.config, name, valueStr)
1122  except AttributeError:
1123  parser.error("no config field: %s" % (name,))
1124  except Exception:
1125  try:
1126  value = eval(valueStr, {})
1127  except Exception:
1128  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1129  try:
1130  setDottedAttr(namespace.config, name, value)
1131  except Exception as e:
1132  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1133 
1134 
1135 class ConfigFileAction(argparse.Action):
1136  """argparse action to load config overrides from one or more files.
1137  """
1138 
1139  def __call__(self, parser, namespace, values, option_string=None):
1140  """Load one or more files of config overrides.
1141 
1142  Parameters
1143  ----------
1144  parser : `argparse.ArgumentParser`
1145  Argument parser.
1146  namespace : `argparse.Namespace`
1147  Parsed command. The following attributes are updated by this
1148  method: ``namespace.config``.
1149  values : `list`
1150  A list of data config file paths.
1151  option_string : `str`, optional
1152  Option value specified by the user.
1153  """
1154  if namespace.config is None:
1155  return
1156  for configfile in values:
1157  try:
1158  namespace.config.load(configfile)
1159  except Exception as e:
1160  parser.error("cannot load config file %r: %s" % (configfile, e))
1161 
1162 
1163 class IdValueAction(argparse.Action):
1164  """argparse action callback to process a data ID into a dict.
1165  """
1166 
1167  def __call__(self, parser, namespace, values, option_string):
1168  """Parse ``--id`` data and append results to
1169  ``namespace.<argument>.idList``.
1170 
1171  Parameters
1172  ----------
1173  parser : `ArgumentParser`
1174  Argument parser.
1175  namespace : `argparse.Namespace`
1176  Parsed command (an instance of argparse.Namespace).
1177  The following attributes are updated:
1178 
1179  - ``<idName>.idList``, where ``<idName>`` is the name of the
1180  ID argument, for instance ``"id"`` for ID argument ``--id``.
1181  values : `list`
1182  A list of data IDs; see Notes below.
1183  option_string : `str`
1184  Option value specified by the user.
1185 
1186  Notes
1187  -----
1188  The data format is::
1189 
1190  key1=value1_1[^value1_2[^value1_3...]
1191  key2=value2_1[^value2_2[^value2_3...]...
1192 
1193  The values (e.g. ``value1_1``) may either be a string,
1194  or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1195  interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1196  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1197  You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1198 
1199  The cross product is computed for keys with multiple values.
1200  For example::
1201 
1202  --id visit 1^2 ccd 1,1^2,2
1203 
1204  results in the following data ID dicts being appended to
1205  ``namespace.<argument>.idList``:
1206 
1207  {"visit":1, "ccd":"1,1"}
1208  {"visit":2, "ccd":"1,1"}
1209  {"visit":1, "ccd":"2,2"}
1210  {"visit":2, "ccd":"2,2"}
1211  """
1212  if namespace.config is None:
1213  return
1214  idDict = collections.OrderedDict()
1215  for nameValue in values:
1216  name, sep, valueStr = nameValue.partition("=")
1217  if name in idDict:
1218  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1219  idDict[name] = []
1220  for v in valueStr.split("^"):
1221  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1222  if mat:
1223  v1 = int(mat.group(1))
1224  v2 = int(mat.group(2))
1225  v3 = mat.group(3)
1226  v3 = int(v3) if v3 else 1
1227  for v in range(v1, v2 + 1, v3):
1228  idDict[name].append(str(v))
1229  else:
1230  idDict[name].append(v)
1231 
1232  iterList = [idDict[key] for key in idDict.keys()]
1233  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1234  for valList in itertools.product(*iterList)]
1235 
1236  argName = option_string.lstrip("-")
1237  ident = getattr(namespace, argName)
1238  ident.idList += idDictList
1239 
1240 
1241 class LogLevelAction(argparse.Action):
1242  """argparse action to set log level.
1243  """
1244 
1245  def __call__(self, parser, namespace, values, option_string):
1246  """Set trace level.
1247 
1248  Parameters
1249  ----------
1250  parser : `ArgumentParser`
1251  Argument parser.
1252  namespace : `argparse.Namespace`
1253  Parsed command. This argument is not used.
1254  values : `list`
1255  List of trace levels; each item must be of the form
1256  ``component_name=level`` or ``level``, where ``level``
1257  is a keyword (not case sensitive) or an integer.
1258  option_string : `str`
1259  Option value specified by the user.
1260  """
1261  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1262  permittedLevelSet = set(permittedLevelList)
1263  for componentLevel in values:
1264  component, sep, levelStr = componentLevel.partition("=")
1265  if not levelStr:
1266  levelStr, component = component, None
1267  logLevelUpr = levelStr.upper()
1268  if logLevelUpr in permittedLevelSet:
1269  logLevel = getattr(lsstLog.Log, logLevelUpr)
1270  else:
1271  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1272  if component is None:
1273  namespace.log.setLevel(logLevel)
1274  else:
1275  lsstLog.Log.getLogger(component).setLevel(logLevel)
1276 
1277 
1278 class ReuseAction(argparse.Action):
1279  """argparse action associated with ArgumentPraser.addReuseOption."""
1280 
1281  def __call__(self, parser, namespace, value, option_string):
1282  if value == "all":
1283  value = self.choices[-2]
1284  index = self.choices.index(value)
1285  namespace.reuse = self.choices[:index + 1]
1286 
1287 
1288 def setDottedAttr(item, name, value):
1289  """Set an instance attribute (like `setattr` but accepting
1290  hierarchical names such as ``foo.bar.baz``).
1291 
1292  Parameters
1293  ----------
1294  item : obj
1295  Object whose attribute is to be set.
1296  name : `str`
1297  Name of attribute to set.
1298  value : obj
1299  New value for the attribute.
1300 
1301  Notes
1302  -----
1303  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1304  is set to the specified value.
1305  """
1306  subitem = item
1307  subnameList = name.split(".")
1308  for subname in subnameList[:-1]:
1309  subitem = getattr(subitem, subname)
1310  setattr(subitem, subnameList[-1], value)
1311 
1312 
1313 def getDottedAttr(item, name):
1314  """Get an attribute (like `getattr` but accepts hierarchical names
1315  such as ``foo.bar.baz``).
1316 
1317  Parameters
1318  ----------
1319  item : obj
1320  Object whose attribute is to be returned.
1321  name : `str`
1322  Name of the attribute to get.
1323 
1324  Returns
1325  -------
1326  itemAttr : obj
1327  If name is ``foo.bar.baz then the return value is
1328  ``item.foo.bar.baz``.
1329  """
1330  subitem = item
1331  for subname in name.split("."):
1332  subitem = getattr(subitem, subname)
1333  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)