lsst.pipe.base  19.0.0-10-ged17d6e
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
24 
25 import abc
26 import argparse
27 import collections
28 import fnmatch
29 import itertools
30 import logging
31 import os
32 import re
33 import shlex
34 import sys
35 import shutil
36 import textwrap
37 
38 import lsst.utils
39 import lsst.pex.config as pexConfig
40 import lsst.pex.config.history
41 import lsst.log as lsstLog
42 import lsst.daf.persistence as dafPersist
43 
44 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
45 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
46 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
47 
48 
49 def _fixPath(defName, path):
50  """Apply environment variable as default root, if present, and abspath.
51 
52  Parameters
53  ----------
54  defName : `str`
55  Name of environment variable containing default root path;
56  if the environment variable does not exist
57  then the path is relative to the current working directory
58  path : `str`
59  Path relative to default root path.
60 
61  Returns
62  -------
63  abspath : `str`
64  Path that has been expanded, or `None` if the environment variable
65  does not exist and path is `None`.
66  """
67  defRoot = os.environ.get(defName)
68  if defRoot is None:
69  if path is None:
70  return None
71  return os.path.abspath(path)
72  return os.path.abspath(os.path.join(defRoot, path or ""))
73 
74 
76  """Container for data IDs and associated data references.
77 
78  Parameters
79  ----------
80  level : `str`
81  The lowest hierarchy level to descend to for this dataset type,
82  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
83  Use `""` to use the mapper's default for the dataset type.
84  This class does not support `None`, but if it did, `None`
85  would mean the level should not be restricted.
86 
87  Notes
88  -----
89  Override this class for data IDs that require special handling to be
90  converted to ``data references``, and specify the override class
91  as ``ContainerClass`` for ``add_id_argument``.
92 
93  If you don't want the argument parser to compute data references,
94  specify ``doMakeDataRefList=False`` in ``add_id_argument``.
95  """
96 
97  def __init__(self, level=None):
98  self.datasetType = None
99  """Dataset type of the data references (`str`).
100  """
101  self.level = level
102  """See parameter ``level`` (`str`).
103  """
104  self.idList = []
105  """List of data IDs specified on the command line for the
106  appropriate data ID argument (`list` of `dict`).
107  """
108  self.refList = []
109  """List of data references for the data IDs in ``idList``
110  (`list` of `lsst.daf.persistence.ButlerDataRef`).
111  Elements will be omitted if the corresponding data is not found.
112  The list will be empty when returned by ``parse_args`` if
113  ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
114  """
115 
116  def setDatasetType(self, datasetType):
117  """Set actual dataset type, once it is known.
118 
119  Parameters
120  ----------
121  datasetType : `str`
122  Dataset type.
123 
124  Notes
125  -----
126  The reason ``datasetType`` is not a constructor argument is that
127  some subclasses do not know the dataset type until the command
128  is parsed. Thus, to reduce special cases in the code,
129  ``datasetType`` is always set after the command is parsed.
130  """
131  self.datasetType = datasetType
132 
133  def castDataIds(self, butler):
134  """Validate data IDs and cast them to the correct type
135  (modify idList in place).
136 
137  This code casts the values in the data IDs dicts in `dataIdList`
138  to the type required by the butler. Data IDs are read from the
139  command line as `str`, but the butler requires some values to be
140  other types. For example "visit" values should be `int`.
141 
142  Parameters
143  ----------
144  butler : `lsst.daf.persistence.Butler`
145  Data butler.
146  """
147  if self.datasetType is None:
148  raise RuntimeError("Must call setDatasetType first")
149  try:
150  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
151  except KeyError as e:
152  msg = "Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level)
153  raise KeyError(msg) from e
154 
155  for dataDict in self.idList:
156  for key, strVal in dataDict.items():
157  try:
158  keyType = idKeyTypeDict[key]
159  except KeyError:
160  # OK, assume that it's a valid key and guess that it's a string
161  keyType = str
162 
163  log = lsstLog.Log.getDefaultLogger()
164  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
165  (key, 'str' if keyType == str else keyType))
166  idKeyTypeDict[key] = keyType
167 
168  if keyType != str:
169  try:
170  castVal = keyType(strVal)
171  except Exception:
172  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
173  dataDict[key] = castVal
174 
175  def makeDataRefList(self, namespace):
176  """Compute refList based on idList.
177 
178  Parameters
179  ----------
180  namespace : `argparse.Namespace`
181  Results of parsing command-line. The ``butler`` and ``log``
182  elements must be set.
183 
184  Notes
185  -----
186  Not called if ``add_id_argument`` was called with
187  ``doMakeDataRefList=False``.
188  """
189  if self.datasetType is None:
190  raise RuntimeError("Must call setDatasetType first")
191  butler = namespace.butler
192  for dataId in self.idList:
193  refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
194  level=self.level, dataId=dataId)
195  if not refList:
196  namespace.log.warn("No data found for dataId=%s", dataId)
197  continue
198  self.refList += refList
199 
200 
202  """data ID argument, used by `ArgumentParser.add_id_argument`.
203 
204  Parameters
205  ----------
206  name : `str`
207  Name of identifier (argument name without dashes).
208  datasetType : `str`
209  Type of dataset; specify a string for a fixed dataset type
210  or a `DatasetArgument` for a dynamic dataset type (e.g.
211  one specified by a command-line argument).
212  level : `str`
213  The lowest hierarchy level to descend to for this dataset type,
214  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
215  Use `""` to use the mapper's default for the dataset type.
216  Some container classes may also support `None`, which means
217  the level should not be restricted; however the default class,
218  `DataIdContainer`, does not support `None`.
219  doMakeDataRefList : `bool`, optional
220  If `True` (default), construct data references.
221  ContainerClass : `class`, optional
222  Class to contain data IDs and data references; the default class
223  `DataIdContainer` will work for many, but not all, cases.
224  For example if the dataset type is specified on the command line
225  then use `DynamicDatasetType`.
226  """
227 
228  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
229  if name.startswith("-"):
230  raise RuntimeError("Name %s must not start with -" % (name,))
231  self.name = name
232  self.datasetType = datasetType
233  self.level = level
234  self.doMakeDataRefList = bool(doMakeDataRefList)
235  self.ContainerClass = ContainerClass
236  self.argName = name.lstrip("-")
237 
238  @property
240  """`True` if the dataset type is dynamic (that is, specified
241  on the command line).
242  """
243  return isinstance(self.datasetType, DynamicDatasetType)
244 
245  def getDatasetType(self, namespace):
246  """Get the dataset type as a string.
247 
248  Parameters
249  ----------
250  namespace
251  Parsed command.
252 
253  Returns
254  -------
255  datasetType : `str`
256  Dataset type.
257  """
258  if self.isDynamicDatasetType:
259  return self.datasetType.getDatasetType(namespace)
260  else:
261  return self.datasetType
262 
263 
264 class DynamicDatasetType(metaclass=abc.ABCMeta):
265  """Abstract base class for a dataset type determined from parsed
266  command-line arguments.
267  """
268 
269  def addArgument(self, parser, idName):
270  """Add a command-line argument to specify dataset type name,
271  if wanted.
272 
273  Parameters
274  ----------
275  parser : `ArgumentParser`
276  Argument parser to add the argument to.
277  idName : `str`
278  Name of data ID argument, without the leading ``"--"``,
279  e.g. ``"id"``.
280 
281  Notes
282  -----
283  The default implementation does nothing
284  """
285  pass
286 
287  @abc.abstractmethod
288  def getDatasetType(self, namespace):
289  """Get the dataset type as a string, based on parsed command-line
290  arguments.
291 
292  Returns
293  -------
294  datasetType : `str`
295  Dataset type.
296  """
297  raise NotImplementedError("Subclasses must override")
298 
299 
301  """Dataset type specified by a command-line argument.
302 
303  Parameters
304  ----------
305  name : `str`, optional
306  Name of command-line argument (including leading "--",
307  if appropriate) whose value is the dataset type.
308  If `None`, uses ``--idName_dstype`` where idName
309  is the name of the data ID argument (e.g. "id").
310  help : `str`, optional
311  Help string for the command-line argument.
312  default : `object`, optional
313  Default value. If `None`, then the command-line option is required.
314  This argument isignored if the command-line argument is positional
315  (name does not start with "-") because positional arguments do
316  not support default values.
317  """
318 
319  def __init__(self,
320  name=None,
321  help="dataset type to process from input data repository",
322  default=None,
323  ):
324  DynamicDatasetType.__init__(self)
325  self.name = name
326  self.help = help
327  self.default = default
328 
329  def getDatasetType(self, namespace):
330  """Get the dataset type as a string, from the appropriate
331  command-line argument.
332 
333  Parameters
334  ----------
335  namespace :
336  Parsed command.
337 
338  Returns
339  -------
340  datasetType : `str`
341  Dataset type.
342  """
343  argName = self.name.lstrip("-")
344  return getattr(namespace, argName)
345 
346  def addArgument(self, parser, idName):
347  """Add a command-line argument to specify the dataset type name.
348 
349  Parameters
350  ----------
351  parser : `ArgumentParser`
352  Argument parser.
353  idName : `str`
354  Data ID.
355 
356  Notes
357  -----
358  Also sets the `name` attribute if it is currently `None`.
359  """
360  help = self.help if self.help else "dataset type for %s" % (idName,)
361  if self.name is None:
362  self.name = "--%s_dstype" % (idName,)
363  requiredDict = dict()
364  if self.name.startswith("-"):
365  requiredDict = dict(required=self.default is None)
366  parser.add_argument(
367  self.name,
368  default=self.default,
369  help=help,
370  **requiredDict)
371 
372 
374  """Dataset type specified by a config parameter.
375 
376  Parameters
377  ----------
378  name : `str`
379  Name of config option whose value is the dataset type.
380  """
381 
382  def __init__(self, name):
383  DynamicDatasetType.__init__(self)
384  self.name = name
385 
386  def getDatasetType(self, namespace):
387  """Return the dataset type as a string, from the appropriate
388  config field.
389 
390  Parameters
391  ----------
392  namespace : `argparse.Namespace`
393  Parsed command.
394  """
395  # getattr does not work reliably if the config field name is
396  # dotted, so step through one level at a time
397  keyList = self.name.split(".")
398  value = namespace.config
399  for key in keyList:
400  try:
401  value = getattr(value, key)
402  except KeyError:
403  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
404  return value
405 
406 
407 class ArgumentParser(argparse.ArgumentParser):
408  """Argument parser for command-line tasks that is based on
409  `argparse.ArgumentParser`.
410 
411  Parameters
412  ----------
413  name : `str`
414  Name of top-level task; used to identify camera-specific override
415  files.
416  usage : `str`, optional
417  Command-line usage signature.
418  **kwargs
419  Additional keyword arguments for `argparse.ArgumentParser`.
420 
421  Notes
422  -----
423  Users may wish to add additional arguments before calling `parse_args`.
424  """
425  # I would prefer to check data ID keys and values as they are parsed,
426  # but the required information comes from the butler, so I have to
427  # construct a butler before I do this checking. Constructing a butler
428  # is slow, so I only want do it once, after parsing the command line,
429  # so as to catch syntax errors quickly.
430 
431  requireOutput = True
432  """Require an output directory to be specified (`bool`)."""
433 
434  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
435  self._name = name
436  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
437  argparse.ArgumentParser.__init__(self,
438  usage=usage,
439  fromfile_prefix_chars='@',
440  epilog=textwrap.dedent("""Notes:
441  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
442  all values are used, in order left to right
443  * @file reads command-line options from the specified file:
444  * data may be distributed among multiple lines (e.g. one option per line)
445  * data after # is treated as a comment and ignored
446  * blank lines and lines starting with # are ignored
447  * To specify multiple values for an option, do not use = after the option name:
448  * right: --configfile foo bar
449  * wrong: --configfile=foo bar
450  """),
451  formatter_class=argparse.RawDescriptionHelpFormatter,
452  **kwargs)
453  self.add_argument(metavar='input', dest="rawInput",
454  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
455  self.add_argument("--calib", dest="rawCalib",
456  help="path to input calibration repository, relative to $%s" %
457  (DEFAULT_CALIB_NAME,))
458  self.add_argument("--output", dest="rawOutput",
459  help="path to output data repository (need not exist), relative to $%s" %
460  (DEFAULT_OUTPUT_NAME,))
461  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
462  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
463  "optionally sets ROOT to ROOT/rerun/INPUT")
464  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
465  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
466  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
467  help="config override file(s)")
468  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
469  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
470  metavar="LEVEL|COMPONENT=LEVEL")
471  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
472  self.add_argument("--debug", action="store_true", help="enable debugging output?")
473  self.add_argument("--doraise", action="store_true",
474  help="raise an exception on error (else log a message and continue)?")
475  self.add_argument("--noExit", action="store_true",
476  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
477  self.add_argument("--profile", help="Dump cProfile statistics to filename")
478  self.add_argument("--show", nargs="+", default=(),
479  help="display the specified information to stdout and quit "
480  "(unless run is specified); information is "
481  "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
482  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
483  self.add_argument("-t", "--timeout", type=float,
484  help="Timeout for multiprocessing; maximum wall time (sec)")
485  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
486  help=("remove and re-create the output directory if it already exists "
487  "(safe with -j, but not all other forms of parallel execution)"))
488  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
489  help=("backup and then overwrite existing config files instead of checking them "
490  "(safe with -j, but not all other forms of parallel execution)"))
491  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
492  help="Don't copy config to file~N backup.")
493  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
494  help=("backup and then overwrite existing package versions instead of checking"
495  "them (safe with -j, but not all other forms of parallel execution)"))
496  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
497  help="don't check package versions; useful for development")
498  lsstLog.configure_prop("""
499 log4j.rootLogger=INFO, A1
500 log4j.appender.A1=ConsoleAppender
501 log4j.appender.A1.Target=System.out
502 log4j.appender.A1.layout=PatternLayout
503 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
504 """)
505 
506  # Forward all Python logging to lsst.log
507  lgr = logging.getLogger()
508  lgr.setLevel(logging.INFO) # same as in log4cxx config above
509  lgr.addHandler(lsstLog.LogHandler())
510 
511  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
512  ContainerClass=DataIdContainer):
513  """Add a data ID argument.
514 
515 
516  Parameters
517  ----------
518  name : `str`
519  Data ID argument (including leading dashes, if wanted).
520  datasetType : `str` or `DynamicDatasetType`-type
521  Type of dataset. Supply a string for a fixed dataset type.
522  For a dynamically determined dataset type, supply
523  a `DynamicDatasetType`, such a `DatasetArgument`.
524  help : `str`
525  Help string for the argument.
526  level : `str`
527  The lowest hierarchy level to descend to for this dataset type,
528  for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
529  Use `""` to use the mapper's default for the dataset type.
530  Some container classes may also support `None`, which means
531  the level should not be restricted; however the default class,
532  `DataIdContainer`, does not support `None`.
533  doMakeDataRefList : bool, optional
534  If `True` (default), construct data references.
535  ContainerClass : `class`, optional
536  Class to contain data IDs and data references; the default class
537  `DataIdContainer` will work for many, but not all, cases.
538  For example if the dataset type is specified on the command line
539  then use `DynamicDatasetType`.
540 
541  Notes
542  -----
543  If ``datasetType`` is an instance of `DatasetArgument`,
544  then add a second argument to specify the dataset type.
545 
546  The associated data is put into ``namespace.<dataIdArgument.name>``
547  as an instance of `ContainerClass`; the container includes fields:
548 
549  - ``idList``: a list of data ID dicts.
550  - ``refList``: a list of `~lsst.daf.persistence.Butler`
551  data references (empty if ``doMakeDataRefList`` is `False`).
552  """
553  argName = name.lstrip("-")
554 
555  if argName in self._dataIdArgDict:
556  raise RuntimeError("Data ID argument %s already exists" % (name,))
557  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
558  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
559 
560  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
561  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
562 
563  dataIdArgument = DataIdArgument(
564  name=argName,
565  datasetType=datasetType,
566  level=level,
567  doMakeDataRefList=doMakeDataRefList,
568  ContainerClass=ContainerClass,
569  )
570 
571  if dataIdArgument.isDynamicDatasetType:
572  datasetType.addArgument(parser=self, idName=argName)
573 
574  self._dataIdArgDict[argName] = dataIdArgument
575 
576  def parse_args(self, config, args=None, log=None, override=None):
577  """Parse arguments for a command-line task.
578 
579  Parameters
580  ----------
581  config : `lsst.pex.config.Config`
582  Config for the task being run.
583  args : `list`, optional
584  Argument list; if `None` then ``sys.argv[1:]`` is used.
585  log : `lsst.log.Log`, optional
586  `~lsst.log.Log` instance; if `None` use the default log.
587  override : callable, optional
588  A config override function. It must take the root config object
589  as its only argument and must modify the config in place.
590  This function is called after camera-specific overrides files
591  are applied, and before command-line config overrides
592  are applied (thus allowing the user the final word).
593 
594  Returns
595  -------
596  namespace : `argparse.Namespace`
597  A `~argparse.Namespace` instance containing fields:
598 
599  - ``camera``: camera name.
600  - ``config``: the supplied config with all overrides applied,
601  validated and frozen.
602  - ``butler``: a `lsst.daf.persistence.Butler` for the data.
603  - An entry for each of the data ID arguments registered by
604  `add_id_argument`, of the type passed to its ``ContainerClass``
605  keyword (`~lsst.pipe.base.DataIdContainer` by default). It
606  includes public elements ``idList`` and ``refList``.
607  - ``log``: a `lsst.log` Log.
608  - An entry for each command-line argument,
609  with the following exceptions:
610 
611  - config is the supplied config, suitably updated.
612  - configfile, id and loglevel are all missing.
613  - ``obsPkg``: name of the ``obs_`` package for this camera.
614  """
615  if args is None:
616  args = sys.argv[1:]
617 
618  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
619  self.print_help()
620  if len(args) == 1 and args[0] in ("-h", "--help"):
621  self.exit()
622  else:
623  self.exit("%s: error: Must specify input as first argument" % self.prog)
624 
625  # Note that --rerun may change namespace.input, but if it does
626  # we verify that the new input has the same mapper class.
627  namespace = argparse.Namespace()
628  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
629  if not os.path.isdir(namespace.input):
630  self.error("Error: input=%r not found" % (namespace.input,))
631 
632  namespace.config = config
633  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
634  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
635  if mapperClass is None:
636  self.error("Error: no mapper specified for input repo %r" % (namespace.input,))
637 
638  namespace.camera = mapperClass.getCameraName()
639  namespace.obsPkg = mapperClass.getPackageName()
640 
641  self.handleCamera(namespace)
642 
643  self._applyInitialOverrides(namespace)
644  if override is not None:
645  override(namespace.config)
646 
647  # Add data ID containers to namespace
648  for dataIdArgument in self._dataIdArgDict.values():
649  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
650 
651  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
652  del namespace.configfile
653 
654  self._parseDirectories(namespace)
655 
656  if namespace.clobberOutput:
657  if namespace.output is None:
658  self.error("--clobber-output is only valid with --output or --rerun")
659  elif namespace.output == namespace.input:
660  self.error("--clobber-output is not valid when the output and input repos are the same")
661  if os.path.exists(namespace.output):
662  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
663  shutil.rmtree(namespace.output)
664 
665  namespace.log.debug("input=%s", namespace.input)
666  namespace.log.debug("calib=%s", namespace.calib)
667  namespace.log.debug("output=%s", namespace.output)
668 
669  obeyShowArgument(namespace.show, namespace.config, exit=False)
670 
671  # No environment variable or --output or --rerun specified.
672  if self.requireOutput and namespace.output is None and namespace.rerun is None:
673  self.error("no output directory specified.\n"
674  "An output directory must be specified with the --output or --rerun\n"
675  "command-line arguments.\n")
676 
677  butlerArgs = {} # common arguments for butler elements
678  if namespace.calib:
679  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
680  if namespace.output:
681  outputs = {'root': namespace.output, 'mode': 'rw'}
682  inputs = {'root': namespace.input}
683  inputs.update(butlerArgs)
684  outputs.update(butlerArgs)
685  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
686  else:
687  outputs = {'root': namespace.input, 'mode': 'rw'}
688  outputs.update(butlerArgs)
689  namespace.butler = dafPersist.Butler(outputs=outputs)
690 
691  # convert data in each of the identifier lists to proper types
692  # this is done after constructing the butler,
693  # hence after parsing the command line,
694  # because it takes a long time to construct a butler
695  self._processDataIds(namespace)
696  if "data" in namespace.show:
697  for dataIdName in self._dataIdArgDict.keys():
698  for dataRef in getattr(namespace, dataIdName).refList:
699  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
700 
701  if namespace.show and "run" not in namespace.show:
702  sys.exit(0)
703 
704  if namespace.debug:
705  try:
706  import debug
707  assert debug # silence pyflakes
708  except ImportError:
709  sys.stderr.write("Warning: no 'debug' module found\n")
710  namespace.debug = False
711 
712  del namespace.loglevel
713 
714  if namespace.longlog:
715  lsstLog.configure_prop("""
716 log4j.rootLogger=INFO, A1
717 log4j.appender.A1=ConsoleAppender
718 log4j.appender.A1.Target=System.out
719 log4j.appender.A1.layout=PatternLayout
720 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
721 """)
722  del namespace.longlog
723 
724  namespace.config.validate()
725  namespace.config.freeze()
726 
727  return namespace
728 
729  def _parseDirectories(self, namespace):
730  """Parse input, output and calib directories
731 
732  This allows for hacking the directories, e.g., to include a
733  "rerun".
734  Modifications are made to the 'namespace' object in-place.
735  """
736  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
737  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
738 
739  # If an output directory is specified, process it and assign it to the namespace
740  if namespace.rawOutput:
741  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
742  else:
743  namespace.output = None
744 
745  # This section processes the rerun argument.
746  # If rerun is specified as a colon separated value,
747  # it will be parsed as an input and output.
748  # The input value will be overridden if previously specified
749  # (but a check is made to make sure both inputs use
750  # the same mapper)
751  if namespace.rawRerun:
752  if namespace.output:
753  self.error("Error: cannot specify both --output and --rerun")
754  namespace.rerun = namespace.rawRerun.split(":")
755  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
756  modifiedInput = False
757  if len(rerunDir) == 2:
758  namespace.input, namespace.output = rerunDir
759  modifiedInput = True
760  elif len(rerunDir) == 1:
761  namespace.output = rerunDir[0]
762  if os.path.exists(os.path.join(namespace.output, "_parent")):
763  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
764  modifiedInput = True
765  else:
766  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
767  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
768  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
769  else:
770  namespace.rerun = None
771  del namespace.rawInput
772  del namespace.rawCalib
773  del namespace.rawOutput
774  del namespace.rawRerun
775 
776  def _processDataIds(self, namespace):
777  """Process the parsed data for each data ID argument in an
778  `~argparse.Namespace`.
779 
780  Processing includes:
781 
782  - Validate data ID keys.
783  - Cast the data ID values to the correct type.
784  - Compute data references from data IDs.
785 
786  Parameters
787  ----------
788  namespace : `argparse.Namespace`
789  Parsed namespace. These attributes are read:
790 
791  - ``butler``
792  - ``log``
793  - ``config``, if any dynamic dataset types are set by
794  a config parameter.
795  - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
796  dataset types are specified by such
797 
798  These attributes are modified:
799 
800  - ``<name>`` for each data ID argument registered using
801  `add_id_argument` with name ``<name>``.
802  """
803  for dataIdArgument in self._dataIdArgDict.values():
804  dataIdContainer = getattr(namespace, dataIdArgument.name)
805  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
806  if dataIdArgument.doMakeDataRefList:
807  try:
808  dataIdContainer.castDataIds(butler=namespace.butler)
809  except (KeyError, TypeError) as e:
810  # failure of castDataIds indicates invalid command args
811  self.error(e)
812 
813  # failure of makeDataRefList indicates a bug
814  # that wants a traceback
815  dataIdContainer.makeDataRefList(namespace)
816 
817  def _applyInitialOverrides(self, namespace):
818  """Apply obs-package-specific and camera-specific config
819  override files, if found
820 
821  Parameters
822  ----------
823  namespace : `argparse.Namespace`
824  Parsed namespace. These attributes are read:
825 
826  - ``obsPkg``
827 
828  Look in the package namespace.obsPkg for files:
829 
830  - ``config/<task_name>.py``
831  - ``config/<camera_name>/<task_name>.py`` and load if found.
832  """
833  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
834  fileName = self._name + ".py"
835  for filePath in (
836  os.path.join(obsPkgDir, "config", fileName),
837  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
838  ):
839  if os.path.exists(filePath):
840  namespace.log.info("Loading config overrride file %r", filePath)
841  namespace.config.load(filePath)
842  else:
843  namespace.log.debug("Config override file does not exist: %r", filePath)
844 
845  def handleCamera(self, namespace):
846  """Perform camera-specific operations before parsing the command-line.
847 
848  Parameters
849  ----------
850  namespace : `argparse.Namespace`
851  Namespace (an ) with the following fields:
852 
853  - ``camera``: the camera name.
854  - ``config``: the config passed to parse_args, with no overrides applied.
855  - ``obsPkg``: the ``obs_`` package for this camera.
856  - ``log``: a `lsst.log` Log.
857 
858  Notes
859  -----
860  The default implementation does nothing.
861  """
862  pass
863 
864  def convert_arg_line_to_args(self, arg_line):
865  """Allow files of arguments referenced by ``@<path>`` to contain
866  multiple values on each line.
867 
868  Parameters
869  ----------
870  arg_line : `str`
871  Line of text read from an argument file.
872  """
873  arg_line = arg_line.strip()
874  if not arg_line or arg_line.startswith("#"):
875  return
876  for arg in shlex.split(arg_line, comments=True, posix=True):
877  if not arg.strip():
878  continue
879  yield arg
880 
881  def addReuseOption(self, choices):
882  """Add a "--reuse-outputs-from SUBTASK" option to the argument
883  parser.
884 
885  CmdLineTasks that can be restarted at an intermediate step using
886  outputs from earlier (but still internal) steps should use this
887  method to allow the user to control whether that happens when
888  outputs from earlier steps are present.
889 
890  Parameters
891  ----------
892  choices : sequence
893  A sequence of string names (by convention, top-level subtasks)
894  that identify the steps that could be skipped when their
895  outputs are already present. The list is ordered, so when the
896  user specifies one step on the command line, all previous steps
897  may be skipped as well. In addition to the choices provided,
898  users may pass "all" to indicate that all steps may be thus
899  skipped.
900 
901  When this method is called, the ``namespace`` object returned by
902  ``parse_args`` will contain a ``reuse`` attribute containing
903  a list of all steps that should be skipped if their outputs
904  are already present.
905  If no steps should be skipped, the ``reuse`` will be an empty list.
906  """
907  choices = list(choices)
908  choices.append("all")
909  self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
910  default=[], action=ReuseAction,
911  help=("Skip the given subtask and its predecessors and reuse their outputs "
912  "if those outputs already exist. Use 'all' to specify all subtasks."))
913 
914 
916  """`ArgumentParser` for command-line tasks that don't write any output.
917  """
918 
919  requireOutput = False # We're not going to write anything
920 
921 
922 def getTaskDict(config, taskDict=None, baseName=""):
923  """Get a dictionary of task info for all subtasks in a config
924 
925  Parameters
926  ----------
927  config : `lsst.pex.config.Config`
928  Configuration to process.
929  taskDict : `dict`, optional
930  Users should not specify this argument. Supports recursion.
931  If provided, taskDict is updated in place, else a new `dict`
932  is started.
933  baseName : `str`, optional
934  Users should not specify this argument. It is only used for
935  recursion: if a non-empty string then a period is appended
936  and the result is used as a prefix for additional entries
937  in taskDict; otherwise no prefix is used.
938 
939  Returns
940  -------
941  taskDict : `dict`
942  Keys are config field names, values are task names.
943 
944  Notes
945  -----
946  This function is designed to be called recursively.
947  The user should call with only a config (leaving taskDict and baseName
948  at their default values).
949  """
950  if taskDict is None:
951  taskDict = dict()
952  for fieldName, field in config.items():
953  if hasattr(field, "value") and hasattr(field, "target"):
954  subConfig = field.value
955  if isinstance(subConfig, pexConfig.Config):
956  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
957  try:
958  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
959  except Exception:
960  taskName = repr(field.target)
961  taskDict[subBaseName] = taskName
962  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
963  return taskDict
964 
965 
966 def obeyShowArgument(showOpts, config=None, exit=False):
967  """Process arguments specified with ``--show`` (but ignores
968  ``"data"``).
969 
970  Parameters
971  ----------
972  showOpts : `list` of `str`
973  List of options passed to ``--show``.
974  config : optional
975  The provided config.
976  exit : bool, optional
977  Exit if ``"run"`` isn't included in ``showOpts``.
978 
979  Parameters
980  ----------
981  Supports the following options in showOpts:
982 
983  - ``config[=PAT]``. Dump all the config entries, or just the ones that
984  match the glob pattern.
985  - ``history=PAT``. Show where the config entries that match the glob
986  pattern were set.
987  - ``tasks``. Show task hierarchy.
988  - ``data``. Ignored; to be processed by caller.
989  - ``run``. Keep going (the default behaviour is to exit if
990  ``--show`` is specified).
991 
992  Calls ``sys.exit(1)`` if any other option found.
993  """
994  if not showOpts:
995  return
996 
997  for what in showOpts:
998  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
999 
1000  if showCommand == "config":
1001  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1002  pattern = matConfig.group(1)
1003  if pattern:
1004  class FilteredStream:
1005  """A file object that only prints lines
1006  that match the glob "pattern".
1007 
1008  N.b. Newlines are silently discarded and reinserted;
1009  crude but effective.
1010  """
1011 
1012  def __init__(self, pattern):
1013  # obey case if pattern isn't lowecase or requests NOIGNORECASE
1014  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1015 
1016  if mat:
1017  pattern = mat.group(1)
1018  self._pattern = re.compile(fnmatch.translate(pattern))
1019  else:
1020  if pattern != pattern.lower():
1021  print(u"Matching \"%s\" without regard to case "
1022  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
1023  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1024 
1025  def write(self, showStr):
1026  showStr = showStr.rstrip()
1027  # Strip off doc string line(s) and cut off
1028  # at "=" for string matching
1029  matchStr = showStr.split("\n")[-1].split("=")[0]
1030  if self._pattern.search(matchStr):
1031  print(u"\n" + showStr)
1032 
1033  fd = FilteredStream(pattern)
1034  else:
1035  fd = sys.stdout
1036 
1037  config.saveToStream(fd, "config")
1038  elif showCommand == "history":
1039  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1040  globPattern = matHistory.group(1)
1041  if not globPattern:
1042  print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1043  sys.exit(1)
1044 
1045  error = False
1046  for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1047  if i > 0:
1048  print("")
1049 
1050  pattern = pattern.split(".")
1051  cpath, cname = pattern[:-1], pattern[-1]
1052  hconfig = config # the config that we're interested in
1053  for i, cpt in enumerate(cpath):
1054  try:
1055  hconfig = getattr(hconfig, cpt)
1056  except AttributeError:
1057  print("Error: configuration %s has no subconfig %s" %
1058  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
1059  error = True
1060 
1061  try:
1062  print(pexConfig.history.format(hconfig, cname))
1063  except KeyError:
1064  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname),
1065  file=sys.stderr)
1066  error = True
1067 
1068  if error:
1069  sys.exit(1)
1070 
1071  elif showCommand == "data":
1072  pass
1073  elif showCommand == "run":
1074  pass
1075  elif showCommand == "tasks":
1076  showTaskHierarchy(config)
1077  else:
1078  print(u"Unknown value for show: %s (choose from '%s')" %
1079  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
1080  sys.exit(1)
1081 
1082  if exit and "run" not in showOpts:
1083  sys.exit(0)
1084 
1085 
1086 def showTaskHierarchy(config):
1087  """Print task hierarchy to stdout.
1088 
1089  Parameters
1090  ----------
1091  config : `lsst.pex.config.Config`
1092  Configuration to process.
1093  """
1094  print(u"Subtasks:")
1095  taskDict = getTaskDict(config=config)
1096 
1097  fieldNameList = sorted(taskDict.keys())
1098  for fieldName in fieldNameList:
1099  taskName = taskDict[fieldName]
1100  print(u"%s: %s" % (fieldName, taskName))
1101 
1102 
1103 class ConfigValueAction(argparse.Action):
1104  """argparse action callback to override config parameters using
1105  name=value pairs from the command-line.
1106  """
1107 
1108  def __call__(self, parser, namespace, values, option_string):
1109  """Override one or more config name value pairs.
1110 
1111  Parameters
1112  ----------
1113  parser : `argparse.ArgumentParser`
1114  Argument parser.
1115  namespace : `argparse.Namespace`
1116  Parsed command. The ``namespace.config`` attribute is updated.
1117  values : `list`
1118  A list of ``configItemName=value`` pairs.
1119  option_string : `str`
1120  Option value specified by the user.
1121  """
1122  if namespace.config is None:
1123  return
1124  for nameValue in values:
1125  name, sep, valueStr = nameValue.partition("=")
1126  if not valueStr:
1127  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
1128 
1129  # see if setting the string value works; if not, try eval
1130  try:
1131  setDottedAttr(namespace.config, name, valueStr)
1132  except AttributeError:
1133  parser.error("no config field: %s" % (name,))
1134  except Exception:
1135  try:
1136  value = eval(valueStr, {})
1137  except Exception:
1138  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
1139  try:
1140  setDottedAttr(namespace.config, name, value)
1141  except Exception as e:
1142  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
1143 
1144 
1145 class ConfigFileAction(argparse.Action):
1146  """argparse action to load config overrides from one or more files.
1147  """
1148 
1149  def __call__(self, parser, namespace, values, option_string=None):
1150  """Load one or more files of config overrides.
1151 
1152  Parameters
1153  ----------
1154  parser : `argparse.ArgumentParser`
1155  Argument parser.
1156  namespace : `argparse.Namespace`
1157  Parsed command. The following attributes are updated by this
1158  method: ``namespace.config``.
1159  values : `list`
1160  A list of data config file paths.
1161  option_string : `str`, optional
1162  Option value specified by the user.
1163  """
1164  if namespace.config is None:
1165  return
1166  for configfile in values:
1167  try:
1168  namespace.config.load(configfile)
1169  except Exception as e:
1170  parser.error("cannot load config file %r: %s" % (configfile, e))
1171 
1172 
1173 class IdValueAction(argparse.Action):
1174  """argparse action callback to process a data ID into a dict.
1175  """
1176 
1177  def __call__(self, parser, namespace, values, option_string):
1178  """Parse ``--id`` data and append results to
1179  ``namespace.<argument>.idList``.
1180 
1181  Parameters
1182  ----------
1183  parser : `ArgumentParser`
1184  Argument parser.
1185  namespace : `argparse.Namespace`
1186  Parsed command (an instance of argparse.Namespace).
1187  The following attributes are updated:
1188 
1189  - ``<idName>.idList``, where ``<idName>`` is the name of the
1190  ID argument, for instance ``"id"`` for ID argument ``--id``.
1191  values : `list`
1192  A list of data IDs; see Notes below.
1193  option_string : `str`
1194  Option value specified by the user.
1195 
1196  Notes
1197  -----
1198  The data format is::
1199 
1200  key1=value1_1[^value1_2[^value1_3...]
1201  key2=value2_1[^value2_2[^value2_3...]...
1202 
1203  The values (e.g. ``value1_1``) may either be a string,
1204  or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1205  interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1206  So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1207  You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1208 
1209  The cross product is computed for keys with multiple values.
1210  For example::
1211 
1212  --id visit 1^2 ccd 1,1^2,2
1213 
1214  results in the following data ID dicts being appended to
1215  ``namespace.<argument>.idList``:
1216 
1217  {"visit":1, "ccd":"1,1"}
1218  {"visit":2, "ccd":"1,1"}
1219  {"visit":1, "ccd":"2,2"}
1220  {"visit":2, "ccd":"2,2"}
1221  """
1222  if namespace.config is None:
1223  return
1224  idDict = collections.OrderedDict()
1225  for nameValue in values:
1226  name, sep, valueStr = nameValue.partition("=")
1227  if name in idDict:
1228  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
1229  idDict[name] = []
1230  for v in valueStr.split("^"):
1231  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1232  if mat:
1233  v1 = int(mat.group(1))
1234  v2 = int(mat.group(2))
1235  v3 = mat.group(3)
1236  v3 = int(v3) if v3 else 1
1237  for v in range(v1, v2 + 1, v3):
1238  idDict[name].append(str(v))
1239  else:
1240  idDict[name].append(v)
1241 
1242  iterList = [idDict[key] for key in idDict.keys()]
1243  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1244  for valList in itertools.product(*iterList)]
1245 
1246  argName = option_string.lstrip("-")
1247  ident = getattr(namespace, argName)
1248  ident.idList += idDictList
1249 
1250 
1251 class LogLevelAction(argparse.Action):
1252  """argparse action to set log level.
1253  """
1254 
1255  def __call__(self, parser, namespace, values, option_string):
1256  """Set trace level.
1257 
1258  Parameters
1259  ----------
1260  parser : `ArgumentParser`
1261  Argument parser.
1262  namespace : `argparse.Namespace`
1263  Parsed command. This argument is not used.
1264  values : `list`
1265  List of trace levels; each item must be of the form
1266  ``component_name=level`` or ``level``, where ``level``
1267  is a keyword (not case sensitive) or an integer.
1268  option_string : `str`
1269  Option value specified by the user.
1270  """
1271  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1272  permittedLevelSet = set(permittedLevelList)
1273  for componentLevel in values:
1274  component, sep, levelStr = componentLevel.partition("=")
1275  if not levelStr:
1276  levelStr, component = component, None
1277  logLevelUpr = levelStr.upper()
1278  if logLevelUpr in permittedLevelSet:
1279  logLevel = getattr(lsstLog.Log, logLevelUpr)
1280  else:
1281  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
1282  if component is None:
1283  namespace.log.setLevel(logLevel)
1284  else:
1285  lsstLog.Log.getLogger(component).setLevel(logLevel)
1286  # set logging level for Python logging
1287  pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1288  logging.getLogger(component).setLevel(pyLevel)
1289 
1290 
1291 class ReuseAction(argparse.Action):
1292  """argparse action associated with ArgumentPraser.addReuseOption."""
1293 
1294  def __call__(self, parser, namespace, value, option_string):
1295  if value == "all":
1296  value = self.choices[-2]
1297  index = self.choices.index(value)
1298  namespace.reuse = self.choices[:index + 1]
1299 
1300 
1301 def setDottedAttr(item, name, value):
1302  """Set an instance attribute (like `setattr` but accepting
1303  hierarchical names such as ``foo.bar.baz``).
1304 
1305  Parameters
1306  ----------
1307  item : obj
1308  Object whose attribute is to be set.
1309  name : `str`
1310  Name of attribute to set.
1311  value : obj
1312  New value for the attribute.
1313 
1314  Notes
1315  -----
1316  For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1317  is set to the specified value.
1318  """
1319  subitem = item
1320  subnameList = name.split(".")
1321  for subname in subnameList[:-1]:
1322  subitem = getattr(subitem, subname)
1323  setattr(subitem, subnameList[-1], value)
1324 
1325 
1326 def getDottedAttr(item, name):
1327  """Get an attribute (like `getattr` but accepts hierarchical names
1328  such as ``foo.bar.baz``).
1329 
1330  Parameters
1331  ----------
1332  item : obj
1333  Object whose attribute is to be returned.
1334  name : `str`
1335  Name of the attribute to get.
1336 
1337  Returns
1338  -------
1339  itemAttr : obj
1340  If name is ``foo.bar.baz then the return value is
1341  ``item.foo.bar.baz``.
1342  """
1343  subitem = item
1344  for subname in name.split("."):
1345  subitem = getattr(subitem, subname)
1346  return subitem
def getTaskDict(config, taskDict=None, baseName="")
def __init__(self, name=None, help="dataset type to process from input data repository", default=None)
def setDottedAttr(item, name, value)
def __call__(self, parser, namespace, values, option_string=None)
def obeyShowArgument(showOpts, config=None, exit=False)
std::string getPackageDir(std::string const &packageName)
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer)
def __call__(self, parser, namespace, value, option_string)
def __init__(self, name, usage="%(prog)s input [options]", kwargs)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def __call__(self, parser, namespace, values, option_string)
def parse_args(self, config, args=None, log=None, override=None)
def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer)