lsst.pipe.base  13.0-4-g69476a5+1
 All Classes Namespaces Files Functions Variables Pages
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
42 import lsst.pex.config.history
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """!Apply environment variable as default root, if present, and abspath
57 
58  @param[in] defName name of environment variable containing default root path;
59  if the environment variable does not exist then the path is relative
60  to the current working directory
61  @param[in] path path relative to default root path
62  @return abspath: path that has been expanded, or None if the environment variable does not exist
63  and path is None
64  """
65  defRoot = os.environ.get(defName)
66  if defRoot is None:
67  if path is None:
68  return None
69  return os.path.abspath(path)
70  return os.path.abspath(os.path.join(defRoot, path or ""))
71 
72 
73 class DataIdContainer(object):
74  """!A container for data IDs and associated data references
75 
76  Override for data IDs that require special handling to be converted to data references,
77  and specify the override class as ContainerClass for add_id_argument.
78  (If you don't want the argument parser to compute data references, you may use this class
79  and specify doMakeDataRefList=False in add_id_argument.)
80  """
81 
82  def __init__(self, level=None):
83  """!Construct a DataIdContainer"""
84  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
85  self.level = level
86  self.idList = []
87  self.refList = []
88 
89  def setDatasetType(self, datasetType):
90  """!Set actual dataset type, once it is known"""
91  self.datasetType = datasetType
92 
93  def castDataIds(self, butler):
94  """!Validate data IDs and cast them to the correct type (modify idList in place).
95 
96  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
97  "lsst.daf.persistence.Butler")
98  """
99  if self.datasetType is None:
100  raise RuntimeError("Must call setDatasetType first")
101  try:
102  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
103  except KeyError:
104  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
105 
106  for dataDict in self.idList:
107  for key, strVal in dataDict.items():
108  try:
109  keyType = idKeyTypeDict[key]
110  except KeyError:
111  # OK, assume that it's a valid key and guess that it's a string
112  keyType = str
113 
114  log = lsstLog.Log.getDefaultLogger()
115  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
116  (key, 'str' if keyType == str else keyType))
117  idKeyTypeDict[key] = keyType
118 
119  if keyType != str:
120  try:
121  castVal = keyType(strVal)
122  except Exception:
123  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
124  dataDict[key] = castVal
125 
126  def makeDataRefList(self, namespace):
127  """!Compute refList based on idList
128 
129  Not called if add_id_argument called with doMakeDataRefList=False
130 
131  @param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
132  """
133  if self.datasetType is None:
134  raise RuntimeError("Must call setDatasetType first")
135  butler = namespace.butler
136  for dataId in self.idList:
137  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
138  # exclude nonexistent data
139  # this is a recursive test, e.g. for the sake of "raw" data
140  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
141  dataRef=dr)]
142  if not refList:
143  namespace.log.warn("No data found for dataId=%s", dataId)
144  continue
145  self.refList += refList
146 
147 
148 class DataIdArgument(object):
149  """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
150 
151  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
152  """!Constructor
153 
154  @param[in] name name of identifier (argument name without dashes)
155  @param[in] datasetType type of dataset; specify a string for a fixed dataset type
156  or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
157  @param[in] level level of dataset, for butler
158  @param[in] doMakeDataRefList construct data references?
159  @param[in] ContainerClass class to contain data IDs and data references;
160  the default class will work for many kinds of data, but you may have to override
161  to compute some kinds of data references.
162  """
163  if name.startswith("-"):
164  raise RuntimeError("Name %s must not start with -" % (name,))
165  self.name = name
166  self.datasetType = datasetType
167  self.level = level
168  self.doMakeDataRefList = bool(doMakeDataRefList)
169  self.ContainerClass = ContainerClass
170  self.argName = name.lstrip("-")
171 
172  @property
174  """!Is the dataset type dynamic (specified on the command line)?"""
175  return isinstance(self.datasetType, DynamicDatasetType)
176 
177  def getDatasetType(self, namespace):
178  """!Return the dataset type as a string
179 
180  @param[in] namespace parsed command
181  """
182  if self.isDynamicDatasetType:
183  return self.datasetType.getDatasetType(namespace)
184  else:
185  return self.datasetType
186 
187 
188 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
189  """!Abstract base class for a dataset type determined from parsed command-line arguments
190  """
191 
192  def addArgument(self, parser, idName):
193  """!Add a command-line argument to specify dataset type name, if wanted
194 
195  @param[in] parser argument parser to which to add argument
196  @param[in] idName name of data ID argument, without the leading "--", e.g. "id"
197 
198  The default implementation does nothing
199  """
200  pass
201 
202  @abc.abstractmethod
203  def getDatasetType(self, namespace):
204  """Return the dataset type as a string, based on parsed command-line arguments
205 
206  @param[in] namespace parsed command
207  """
208  raise NotImplementedError("Subclasses must override")
209 
210 
212  """!A dataset type specified by a command-line argument.
213  """
214 
215  def __init__(self,
216  name=None,
217  help="dataset type to process from input data repository",
218  default=None,
219  ):
220  """!Construct a DatasetArgument
221 
222  @param[in] name name of command-line argument (including leading "--", if appropriate)
223  whose value is the dataset type; if None, uses --idName_dstype
224  where idName is the name of the data ID argument (e.g. "id")
225  @param[in] help help string for the command-line argument
226  @param[in] default default value; if None, then the command-line option is required;
227  ignored if the argument is positional (name does not start with "-")
228  because positional argument do not support default values
229  """
230  DynamicDatasetType.__init__(self)
231  self.name = name
232  self.help = help
233  self.default = default
234 
235  def getDatasetType(self, namespace):
236  """Return the dataset type as a string, from the appropriate command-line argument
237 
238  @param[in] namespace parsed command
239  """
240  argName = self.name.lstrip("-")
241  return getattr(namespace, argName)
242 
243  def addArgument(self, parser, idName):
244  """!Add a command-line argument to specify dataset type name
245 
246  Also set self.name if it is None
247  """
248  help = self.help if self.help else "dataset type for %s" % (idName,)
249  if self.name is None:
250  self.name = "--%s_dstype" % (idName,)
251  requiredDict = dict()
252  if self.name.startswith("-"):
253  requiredDict = dict(required=self.default is None)
254  parser.add_argument(
255  self.name,
256  default=self.default,
257  help=help,
258  **requiredDict) # cannot specify required=None for positional arguments
259 
260 
262  """!A dataset type specified by a config parameter
263  """
264 
265  def __init__(self, name):
266  """!Construct a ConfigDatasetType
267 
268  @param[in] name name of config option whose value is the dataset type
269  """
270  DynamicDatasetType.__init__(self)
271  self.name = name
272 
273  def getDatasetType(self, namespace):
274  """Return the dataset type as a string, from the appropriate config field
275 
276  @param[in] namespace parsed command
277  """
278  # getattr does not work reliably if the config field name is dotted,
279  # so step through one level at a time
280  keyList = self.name.split(".")
281  value = namespace.config
282  for key in keyList:
283  try:
284  value = getattr(value, key)
285  except KeyError:
286  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
287  return value
288 
289 
290 class ArgumentParser(argparse.ArgumentParser):
291  """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
292 
293  Users may wish to add additional arguments before calling parse_args.
294 
295  @note
296  - I would prefer to check data ID keys and values as they are parsed,
297  but the required information comes from the butler, so I have to construct a butler
298  before I do this checking. Constructing a butler is slow, so I only want do it once,
299  after parsing the command line, so as to catch syntax errors quickly.
300  """
301  requireOutput = True # Require an output directory to be specified?
302 
303  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
304  """!Construct an ArgumentParser
305 
306  @param[in] name name of top-level task; used to identify camera-specific override files
307  @param[in] usage usage string
308  @param[in] **kwargs additional keyword arguments for argparse.ArgumentParser
309  """
310  self._name = name
311  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
312  argparse.ArgumentParser.__init__(self,
313  usage=usage,
314  fromfile_prefix_chars='@',
315  epilog=textwrap.dedent("""Notes:
316  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
317  all values are used, in order left to right
318  * @file reads command-line options from the specified file:
319  * data may be distributed among multiple lines (e.g. one option per line)
320  * data after # is treated as a comment and ignored
321  * blank lines and lines starting with # are ignored
322  * To specify multiple values for an option, do not use = after the option name:
323  * right: --configfile foo bar
324  * wrong: --configfile=foo bar
325  """),
326  formatter_class=argparse.RawDescriptionHelpFormatter,
327  **kwargs)
328  self.add_argument(metavar='input', dest="rawInput",
329  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
330  self.add_argument("--calib", dest="rawCalib",
331  help="path to input calibration repository, relative to $%s" %
332  (DEFAULT_CALIB_NAME,))
333  self.add_argument("--output", dest="rawOutput",
334  help="path to output data repository (need not exist), relative to $%s" %
335  (DEFAULT_OUTPUT_NAME,))
336  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
337  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
338  "optionally sets ROOT to ROOT/rerun/INPUT")
339  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
340  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
341  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
342  help="config override file(s)")
343  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
344  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
345  metavar="LEVEL|COMPONENT=LEVEL")
346  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
347  self.add_argument("--debug", action="store_true", help="enable debugging output?")
348  self.add_argument("--doraise", action="store_true",
349  help="raise an exception on error (else log a message and continue)?")
350  self.add_argument("--profile", help="Dump cProfile statistics to filename")
351  self.add_argument("--show", nargs="+", default=(),
352  help="display the specified information to stdout and quit "
353  "(unless run is specified).")
354  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
355  self.add_argument("-t", "--timeout", type=float,
356  help="Timeout for multiprocessing; maximum wall time (sec)")
357  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
358  help=("remove and re-create the output directory if it already exists "
359  "(safe with -j, but not all other forms of parallel execution)"))
360  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
361  help=("backup and then overwrite existing config files instead of checking them "
362  "(safe with -j, but not all other forms of parallel execution)"))
363  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
364  help="Don't copy config to file~N backup.")
365  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
366  help=("backup and then overwrite existing package versions instead of checking"
367  "them (safe with -j, but not all other forms of parallel execution)"))
368  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
369  help="don't check package versions; useful for development")
370  lsstLog.configure_prop("""
371 log4j.rootLogger=INFO, A1
372 log4j.appender.A1=ConsoleAppender
373 log4j.appender.A1.Target=System.err
374 log4j.appender.A1.layout=PatternLayout
375 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
376 """)
377 
378  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
379  ContainerClass=DataIdContainer):
380  """!Add a data ID argument
381 
382  Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
383  then add a second argument to specify the dataset type.
384 
385  @param[in] name data ID argument (including leading dashes, if wanted)
386  @param[in] datasetType type of dataset; supply a string for a fixed dataset type,
387  or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
388  @param[in] help help string for the argument
389  @param[in] level level of dataset, for butler
390  @param[in] doMakeDataRefList construct data references?
391  @param[in] ContainerClass data ID container class to use to contain results;
392  override the default if you need a special means of computing data references from data IDs
393 
394  The associated data is put into namespace.<dataIdArgument.name> as an instance of ContainerClass;
395  the container includes fields:
396  - idList: a list of data ID dicts
397  - refList: a list of butler data references (empty if doMakeDataRefList false)
398  """
399  argName = name.lstrip("-")
400 
401  if argName in self._dataIdArgDict:
402  raise RuntimeError("Data ID argument %s already exists" % (name,))
403  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
404  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
405 
406  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
407  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
408 
409  dataIdArgument = DataIdArgument(
410  name=argName,
411  datasetType=datasetType,
412  level=level,
413  doMakeDataRefList=doMakeDataRefList,
414  ContainerClass=ContainerClass,
415  )
416 
417  if dataIdArgument.isDynamicDatasetType:
418  datasetType.addArgument(parser=self, idName=argName)
419 
420  self._dataIdArgDict[argName] = dataIdArgument
421 
422  def parse_args(self, config, args=None, log=None, override=None):
423  """!Parse arguments for a pipeline task
424 
425  @param[in,out] config config for the task being run
426  @param[in] args argument list; if None use sys.argv[1:]
427  @param[in] log log (instance lsst.log Log); if None use the default log
428  @param[in] override a config override function; it must take the root config object
429  as its only argument and must modify the config in place.
430  This function is called after camera-specific overrides files are applied, and before
431  command-line config overrides are applied (thus allowing the user the final word).
432 
433  @return namespace: an argparse.Namespace containing many useful fields including:
434  - camera: camera name
435  - config: the supplied config with all overrides applied, validated and frozen
436  - butler: a butler for the data
437  - an entry for each of the data ID arguments registered by add_id_argument(),
438  the value of which is a DataIdArgument that includes public elements 'idList' and 'refList'
439  - log: a lsst.log Log
440  - an entry for each command-line argument, with the following exceptions:
441  - config is the supplied config, suitably updated
442  - configfile, id and loglevel are all missing
443  - obsPkg: name of obs_ package for this camera
444  """
445  if args is None:
446  args = sys.argv[1:]
447 
448  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
449  self.print_help()
450  if len(args) == 1 and args[0] in ("-h", "--help"):
451  self.exit()
452  else:
453  self.exit("%s: error: Must specify input as first argument" % self.prog)
454 
455  # Note that --rerun may change namespace.input, but if it does we verify that the
456  # new input has the same mapper class.
457  namespace = argparse.Namespace()
458  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
459  if not os.path.isdir(namespace.input):
460  self.error("Error: input=%r not found" % (namespace.input,))
461 
462  namespace.config = config
463  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
464  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
465  namespace.camera = mapperClass.getCameraName()
466  namespace.obsPkg = mapperClass.getPackageName()
467 
468  self.handleCamera(namespace)
469 
470  self._applyInitialOverrides(namespace)
471  if override is not None:
472  override(namespace.config)
473 
474  # Add data ID containers to namespace
475  for dataIdArgument in self._dataIdArgDict.values():
476  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
477 
478  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
479  del namespace.configfile
480 
481  self._parseDirectories(namespace)
482 
483  if namespace.clobberOutput:
484  if namespace.output is None:
485  self.error("--clobber-output is only valid with --output or --rerun")
486  elif namespace.output == namespace.input:
487  self.error("--clobber-output is not valid when the output and input repos are the same")
488  if os.path.exists(namespace.output):
489  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
490  shutil.rmtree(namespace.output)
491 
492  namespace.log.debug("input=%s", namespace.input)
493  namespace.log.debug("calib=%s", namespace.calib)
494  namespace.log.debug("output=%s", namespace.output)
495 
496  obeyShowArgument(namespace.show, namespace.config, exit=False)
497 
498  # No environment variable or --output or --rerun specified.
499  if self.requireOutput and namespace.output is None and namespace.rerun is None:
500  self.error("no output directory specified.\n"
501  "An output directory must be specified with the --output or --rerun\n"
502  "command-line arguments.\n")
503 
504  if namespace.output:
505  outputs = {'root': namespace.output, 'mode': 'rw'}
506  inputs = {'root': namespace.input}
507  if namespace.calib:
508  inputs['mapperArgs'] = {'calibRoot': namespace.calib}
509  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
510  else:
511  outputs = {'root': namespace.input, 'mode': 'rw'}
512  if namespace.calib:
513  outputs['mapperArgs'] = {'calibRoot': namespace.calib}
514  namespace.butler = dafPersist.Butler(outputs=outputs)
515 
516  # convert data in each of the identifier lists to proper types
517  # this is done after constructing the butler, hence after parsing the command line,
518  # because it takes a long time to construct a butler
519  self._processDataIds(namespace)
520  if "data" in namespace.show:
521  for dataIdName in self._dataIdArgDict.keys():
522  for dataRef in getattr(namespace, dataIdName).refList:
523  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
524 
525  if namespace.show and "run" not in namespace.show:
526  sys.exit(0)
527 
528  if namespace.debug:
529  try:
530  import debug
531  assert debug # silence pyflakes
532  except ImportError:
533  sys.stderr.write("Warning: no 'debug' module found\n")
534  namespace.debug = False
535 
536  del namespace.loglevel
537 
538  if namespace.longlog:
539  lsstLog.configure_prop("""
540 log4j.rootLogger=INFO, A1
541 log4j.appender.A1=ConsoleAppender
542 log4j.appender.A1.Target=System.err
543 log4j.appender.A1.layout=PatternLayout
544 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
545 """)
546  del namespace.longlog
547 
548  namespace.config.validate()
549  namespace.config.freeze()
550 
551  return namespace
552 
553  def _parseDirectories(self, namespace):
554  """Parse input, output and calib directories
555 
556  This allows for hacking the directories, e.g., to include a "rerun".
557  Modifications are made to the 'namespace' object in-place.
558  """
559  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
560  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
561 
562  # If an output directory is specified, process it and assign it to the namespace
563  if namespace.rawOutput:
564  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
565  else:
566  namespace.output = None
567 
568  # This section processes the rerun argument, if rerun is specified as a colon separated
569  # value, it will be parsed as an input and output. The input value will be overridden if
570  # previously specified (but a check is made to make sure both inputs use the same mapper)
571  if namespace.rawRerun:
572  if namespace.output:
573  self.error("Error: cannot specify both --output and --rerun")
574  namespace.rerun = namespace.rawRerun.split(":")
575  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
576  modifiedInput = False
577  if len(rerunDir) == 2:
578  namespace.input, namespace.output = rerunDir
579  modifiedInput = True
580  elif len(rerunDir) == 1:
581  namespace.output = rerunDir[0]
582  if os.path.exists(os.path.join(namespace.output, "_parent")):
583  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
584  modifiedInput = True
585  else:
586  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
587  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
588  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
589  else:
590  namespace.rerun = None
591  del namespace.rawInput
592  del namespace.rawCalib
593  del namespace.rawOutput
594  del namespace.rawRerun
595 
596  def _processDataIds(self, namespace):
597  """!Process the parsed data for each data ID argument
598 
599  Processing includes:
600  - Validate data ID keys
601  - Cast the data ID values to the correct type
602  - Compute data references from data IDs
603 
604  @param[in,out] namespace parsed namespace (an argparse.Namespace);
605  reads these attributes:
606  - butler
607  - log
608  - config, if any dynamic dataset types are set by a config parameter
609  - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
610  and modifies these attributes:
611  - <name> for each data ID argument registered using add_id_argument
612  """
613  for dataIdArgument in self._dataIdArgDict.values():
614  dataIdContainer = getattr(namespace, dataIdArgument.name)
615  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
616  if dataIdArgument.doMakeDataRefList:
617  try:
618  dataIdContainer.castDataIds(butler=namespace.butler)
619  except (KeyError, TypeError) as e:
620  # failure of castDataIds indicates invalid command args
621  self.error(e)
622 
623  # failure of makeDataRefList indicates a bug that wants a traceback
624  dataIdContainer.makeDataRefList(namespace)
625 
626  def _applyInitialOverrides(self, namespace):
627  """!Apply obs-package-specific and camera-specific config override files, if found
628 
629  @param[in] namespace parsed namespace (an argparse.Namespace);
630  reads these attributes:
631  - obsPkg
632 
633  Look in the package namespace.obsPkg for files:
634  - config/<task_name>.py
635  - config/<camera_name>/<task_name>.py
636  and load if found
637  """
638  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
639  fileName = self._name + ".py"
640  for filePath in (
641  os.path.join(obsPkgDir, "config", fileName),
642  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
643  ):
644  if os.path.exists(filePath):
645  namespace.log.info("Loading config overrride file %r", filePath)
646  namespace.config.load(filePath)
647  else:
648  namespace.log.debug("Config override file does not exist: %r", filePath)
649 
650  def handleCamera(self, namespace):
651  """!Perform camera-specific operations before parsing the command line.
652 
653  The default implementation does nothing.
654 
655  @param[in,out] namespace namespace (an argparse.Namespace) with the following fields:
656  - camera: the camera name
657  - config: the config passed to parse_args, with no overrides applied
658  - obsPkg: the obs_ package for this camera
659  - log: a lsst.log Log
660  """
661  pass
662 
663  def convert_arg_line_to_args(self, arg_line):
664  """!Allow files of arguments referenced by `@<path>` to contain multiple values on each line
665 
666  @param[in] arg_line line of text read from an argument file
667  """
668  arg_line = arg_line.strip()
669  if not arg_line or arg_line.startswith("#"):
670  return
671  for arg in shlex.split(arg_line, comments=True, posix=True):
672  if not arg.strip():
673  continue
674  yield arg
675 
676 
678  """An ArgumentParser for pipeline tasks that don't write any output"""
679  requireOutput = False # We're not going to write anything
680 
681 
682 def getTaskDict(config, taskDict=None, baseName=""):
683  """!Get a dictionary of task info for all subtasks in a config
684 
685  Designed to be called recursively; the user should call with only a config
686  (leaving taskDict and baseName at their default values).
687 
688  @param[in] config configuration to process, an instance of lsst.pex.config.Config
689  @param[in,out] taskDict users should not specify this argument;
690  (supports recursion; if provided, taskDict is updated in place, else a new dict is started)
691  @param[in] baseName users should not specify this argument.
692  (supports recursion: if a non-empty string then a period is appended and the result is used
693  as a prefix for additional entries in taskDict; otherwise no prefix is used)
694  @return taskDict: a dict of config field name: task name
695  """
696  if taskDict is None:
697  taskDict = dict()
698  for fieldName, field in config.items():
699  if hasattr(field, "value") and hasattr(field, "target"):
700  subConfig = field.value
701  if isinstance(subConfig, pexConfig.Config):
702  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
703  try:
704  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
705  except Exception:
706  taskName = repr(field.target)
707  taskDict[subBaseName] = taskName
708  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
709  return taskDict
710 
711 
712 def obeyShowArgument(showOpts, config=None, exit=False):
713  """!Process arguments specified with --show (but ignores "data")
714 
715  @param showOpts List of options passed to --show
716  @param config The provided config
717  @param exit Exit if "run" isn't included in showOpts
718 
719  Supports the following options in showOpts:
720  - config[=PAT] Dump all the config entries, or just the ones that match the glob pattern
721  - history=PAT Show where the config entries that match the glob pattern were set
722  - tasks Show task hierarchy
723  - data Ignored; to be processed by caller
724  - run Keep going (the default behaviour is to exit if --show is specified)
725 
726  Calls sys.exit(1) if any other option found.
727  """
728  if not showOpts:
729  return
730 
731  for what in showOpts:
732  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
733 
734  if showCommand == "config":
735  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
736  pattern = matConfig.group(1)
737  if pattern:
738  class FilteredStream(object):
739  """A file object that only prints lines that match the glob "pattern"
740 
741  N.b. Newlines are silently discarded and reinserted; crude but effective.
742  """
743 
744  def __init__(self, pattern):
745  # obey case if pattern isn't lowecase or requests NOIGNORECASE
746  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
747 
748  if mat:
749  pattern = mat.group(1)
750  self._pattern = re.compile(fnmatch.translate(pattern))
751  else:
752  if pattern != pattern.lower():
753  print(u"Matching \"%s\" without regard to case "
754  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
755  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
756 
757  def write(self, showStr):
758  showStr = showStr.rstrip()
759  # Strip off doc string line(s) and cut off at "=" for string matching
760  matchStr = showStr.split("\n")[-1].split("=")[0]
761  if self._pattern.search(matchStr):
762  print(u"\n" + showStr)
763 
764  fd = FilteredStream(pattern)
765  else:
766  fd = sys.stdout
767 
768  config.saveToStream(fd, "config")
769  elif showCommand == "history":
770  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
771  pattern = matHistory.group(1)
772  if not pattern:
773  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
774  sys.exit(1)
775 
776  pattern = pattern.split(".")
777  cpath, cname = pattern[:-1], pattern[-1]
778  hconfig = config # the config that we're interested in
779  for i, cpt in enumerate(cpath):
780  try:
781  hconfig = getattr(hconfig, cpt)
782  except AttributeError:
783  print("Error: configuration %s has no subconfig %s" %
784  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
785 
786  sys.exit(1)
787 
788  try:
789  print(pexConfig.history.format(hconfig, cname))
790  except KeyError:
791  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
792  sys.exit(1)
793 
794  elif showCommand == "data":
795  pass
796  elif showCommand == "run":
797  pass
798  elif showCommand == "tasks":
799  showTaskHierarchy(config)
800  else:
801  print(u"Unknown value for show: %s (choose from '%s')" %
802  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
803  sys.exit(1)
804 
805  if exit and "run" not in showOpts:
806  sys.exit(0)
807 
808 
809 def showTaskHierarchy(config):
810  """!Print task hierarchy to stdout
811 
812  @param[in] config: configuration to process (an lsst.pex.config.Config)
813  """
814  print(u"Subtasks:")
815  taskDict = getTaskDict(config=config)
816 
817  fieldNameList = sorted(taskDict.keys())
818  for fieldName in fieldNameList:
819  taskName = taskDict[fieldName]
820  print(u"%s: %s" % (fieldName, taskName))
821 
822 
823 class ConfigValueAction(argparse.Action):
824  """!argparse action callback to override config parameters using name=value pairs from the command line
825  """
826 
827  def __call__(self, parser, namespace, values, option_string):
828  """!Override one or more config name value pairs
829 
830  @param[in] parser argument parser (instance of ArgumentParser)
831  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
832  updated values:
833  - namespace.config
834  @param[in] values a list of configItemName=value pairs
835  @param[in] option_string option value specified by the user (a str)
836  """
837  if namespace.config is None:
838  return
839  for nameValue in values:
840  name, sep, valueStr = nameValue.partition("=")
841  if not valueStr:
842  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
843 
844  # see if setting the string value works; if not, try eval
845  try:
846  setDottedAttr(namespace.config, name, valueStr)
847  except AttributeError:
848  parser.error("no config field: %s" % (name,))
849  except Exception:
850  try:
851  value = eval(valueStr, {})
852  except Exception:
853  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
854  try:
855  setDottedAttr(namespace.config, name, value)
856  except Exception as e:
857  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
858 
859 
860 class ConfigFileAction(argparse.Action):
861  """!argparse action to load config overrides from one or more files
862  """
863 
864  def __call__(self, parser, namespace, values, option_string=None):
865  """!Load one or more files of config overrides
866 
867  @param[in] parser argument parser (instance of ArgumentParser)
868  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
869  updated values:
870  - namespace.config
871  @param[in] values a list of data config file paths
872  @param[in] option_string option value specified by the user (a str)
873  """
874  if namespace.config is None:
875  return
876  for configfile in values:
877  try:
878  namespace.config.load(configfile)
879  except Exception as e:
880  parser.error("cannot load config file %r: %s" % (configfile, e))
881 
882 
883 class IdValueAction(argparse.Action):
884  """!argparse action callback to process a data ID into a dict
885  """
886 
887  def __call__(self, parser, namespace, values, option_string):
888  """!Parse --id data and append results to namespace.<argument>.idList
889 
890  @param[in] parser argument parser (instance of ArgumentParser)
891  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
892  updated values:
893  - <idName>.idList, where <idName> is the name of the ID argument,
894  for instance "id" for ID argument --id
895  @param[in] values a list of data IDs; see data format below
896  @param[in] option_string option value specified by the user (a str)
897 
898  The data format is:
899  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
900 
901  The values (e.g. value1_1) may either be a string, or of the form "int..int" (e.g. "1..3")
902  which is interpreted as "1^2^3" (inclusive, unlike a python range). So "0^2..4^7..9" is
903  equivalent to "0^2^3^4^7^8^9". You may also specify a stride: "1..5:2" is "1^3^5"
904 
905  The cross product is computed for keys with multiple values. For example:
906  --id visit 1^2 ccd 1,1^2,2
907  results in the following data ID dicts being appended to namespace.<argument>.idList:
908  {"visit":1, "ccd":"1,1"}
909  {"visit":2, "ccd":"1,1"}
910  {"visit":1, "ccd":"2,2"}
911  {"visit":2, "ccd":"2,2"}
912  """
913  if namespace.config is None:
914  return
915  idDict = collections.OrderedDict()
916  for nameValue in values:
917  name, sep, valueStr = nameValue.partition("=")
918  if name in idDict:
919  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
920  idDict[name] = []
921  for v in valueStr.split("^"):
922  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
923  if mat:
924  v1 = int(mat.group(1))
925  v2 = int(mat.group(2))
926  v3 = mat.group(3)
927  v3 = int(v3) if v3 else 1
928  for v in range(v1, v2 + 1, v3):
929  idDict[name].append(str(v))
930  else:
931  idDict[name].append(v)
932 
933  iterList = [idDict[key] for key in idDict.keys()]
934  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
935  for valList in itertools.product(*iterList)]
936 
937  argName = option_string.lstrip("-")
938  ident = getattr(namespace, argName)
939  ident.idList += idDictList
940 
941 
942 class LogLevelAction(argparse.Action):
943  """!argparse action to set log level
944  """
945 
946  def __call__(self, parser, namespace, values, option_string):
947  """!Set trace level
948 
949  @param[in] parser argument parser (instance of ArgumentParser)
950  @param[in] namespace parsed command (an instance of argparse.Namespace); ignored
951  @param[in] values a list of trace levels;
952  each item must be of the form 'component_name=level' or 'level',
953  where level is a keyword (not case sensitive) or an integer
954  @param[in] option_string option value specified by the user (a str)
955  """
956  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
957  permittedLevelSet = set(permittedLevelList)
958  for componentLevel in values:
959  component, sep, levelStr = componentLevel.partition("=")
960  if not levelStr:
961  levelStr, component = component, None
962  logLevelUpr = levelStr.upper()
963  if logLevelUpr in permittedLevelSet:
964  logLevel = getattr(lsstLog.Log, logLevelUpr)
965  else:
966  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
967  if component is None:
968  namespace.log.setLevel(logLevel)
969  else:
970  lsstLog.Log.getLogger(component).setLevel(logLevel)
971 
972 
973 def setDottedAttr(item, name, value):
974  """!Like setattr, but accepts hierarchical names, e.g. foo.bar.baz
975 
976  @param[in,out] item object whose attribute is to be set
977  @param[in] name name of item to set
978  @param[in] value new value for the item
979 
980  For example if name is foo.bar.baz then item.foo.bar.baz is set to the specified value.
981  """
982  subitem = item
983  subnameList = name.split(".")
984  for subname in subnameList[:-1]:
985  subitem = getattr(subitem, subname)
986  setattr(subitem, subnameList[-1], value)
987 
988 
989 def getDottedAttr(item, name):
990  """!Like getattr, but accepts hierarchical names, e.g. foo.bar.baz
991 
992  @param[in] item object whose attribute is to be returned
993  @param[in] name name of item to get
994 
995  For example if name is foo.bar.baz then returns item.foo.bar.baz
996  """
997  subitem = item
998  for subname in name.split("."):
999  subitem = getattr(subitem, subname)
1000  return subitem
1001 
1002 
1003 def dataExists(butler, datasetType, dataRef):
1004  """!Return True if data exists at the current level or any data exists at a deeper level, False otherwise
1005 
1006  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
1007  "lsst.daf.persistence.Butler")
1008  @param[in] datasetType dataset type (a str)
1009  @param[in] dataRef butler data reference (a \ref lsst.daf.persistence.butlerSubset.ButlerDataRef
1010  "lsst.daf.persistence.ButlerDataRef")
1011  """
1012  subDRList = dataRef.subItems()
1013  if subDRList:
1014  for subDR in subDRList:
1015  if dataExists(butler, datasetType, subDR):
1016  return True
1017  return False
1018  else:
1019  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def setDatasetType
Set actual dataset type, once it is known.
def castDataIds
Validate data IDs and cast them to the correct type (modify idList in place).
argparse action to set log level
An argument parser for pipeline tasks that is based on argparse.ArgumentParser.
def dataExists
Return True if data exists at the current level or any data exists at a deeper level, False otherwise.
A dataset type specified by a command-line argument.
def showTaskHierarchy
Print task hierarchy to stdout.
Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument.
def setDottedAttr
Like setattr, but accepts hierarchical names, e.g.
def __call__
Parse –id data and append results to namespace.
argparse action callback to override config parameters using name=value pairs from the command line ...
def getTaskDict
Get a dictionary of task info for all subtasks in a config.
def _applyInitialOverrides
Apply obs-package-specific and camera-specific config override files, if found.
def makeDataRefList
Compute refList based on idList.
def __init__
Construct a DatasetArgument.
def convert_arg_line_to_args
Allow files of arguments referenced by @&lt;path&gt; to contain multiple values on each line...
def getDottedAttr
Like getattr, but accepts hierarchical names, e.g.
def addArgument
Add a command-line argument to specify dataset type name.
def __call__
Load one or more files of config overrides.
def isDynamicDatasetType
Is the dataset type dynamic (specified on the command line)?
def __call__
Override one or more config name value pairs.
argparse action callback to process a data ID into a dict
def __init__
Construct a ConfigDatasetType.
def handleCamera
Perform camera-specific operations before parsing the command line.
def parse_args
Parse arguments for a pipeline task.
def _processDataIds
Process the parsed data for each data ID argument.
A dataset type specified by a config parameter.
def __init__
Construct an ArgumentParser.
def obeyShowArgument
Process arguments specified with –show (but ignores &quot;data&quot;)
Abstract base class for a dataset type determined from parsed command-line arguments.
def addArgument
Add a command-line argument to specify dataset type name, if wanted.
argparse action to load config overrides from one or more files
def getDatasetType
Return the dataset type as a string.
def __init__
Construct a DataIdContainer.
A container for data IDs and associated data references.