lsst.pipe.base  13.0-9-g1c7d9c5+4
 All Classes Namespaces Files Functions Variables Pages
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
42 import lsst.pex.config.history
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """!Apply environment variable as default root, if present, and abspath
57 
58  @param[in] defName name of environment variable containing default root path;
59  if the environment variable does not exist then the path is relative
60  to the current working directory
61  @param[in] path path relative to default root path
62  @return abspath: path that has been expanded, or None if the environment variable does not exist
63  and path is None
64  """
65  defRoot = os.environ.get(defName)
66  if defRoot is None:
67  if path is None:
68  return None
69  return os.path.abspath(path)
70  return os.path.abspath(os.path.join(defRoot, path or ""))
71 
72 
73 class DataIdContainer(object):
74  """!A container for data IDs and associated data references
75 
76  Override for data IDs that require special handling to be converted to data references,
77  and specify the override class as ContainerClass for add_id_argument.
78  (If you don't want the argument parser to compute data references, you may use this class
79  and specify doMakeDataRefList=False in add_id_argument.)
80  """
81 
82  def __init__(self, level=None):
83  """!Construct a DataIdContainer"""
84  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
85  self.level = level
86  self.idList = []
87  self.refList = []
88 
89  def setDatasetType(self, datasetType):
90  """!Set actual dataset type, once it is known"""
91  self.datasetType = datasetType
92 
93  def castDataIds(self, butler):
94  """!Validate data IDs and cast them to the correct type (modify idList in place).
95 
96  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
97  "lsst.daf.persistence.Butler")
98  """
99  if self.datasetType is None:
100  raise RuntimeError("Must call setDatasetType first")
101  try:
102  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
103  except KeyError:
104  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
105 
106  for dataDict in self.idList:
107  for key, strVal in dataDict.items():
108  try:
109  keyType = idKeyTypeDict[key]
110  except KeyError:
111  # OK, assume that it's a valid key and guess that it's a string
112  keyType = str
113 
114  log = lsstLog.Log.getDefaultLogger()
115  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
116  (key, 'str' if keyType == str else keyType))
117  idKeyTypeDict[key] = keyType
118 
119  if keyType != str:
120  try:
121  castVal = keyType(strVal)
122  except Exception:
123  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
124  dataDict[key] = castVal
125 
126  def makeDataRefList(self, namespace):
127  """!Compute refList based on idList
128 
129  Not called if add_id_argument called with doMakeDataRefList=False
130 
131  @param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
132  """
133  if self.datasetType is None:
134  raise RuntimeError("Must call setDatasetType first")
135  butler = namespace.butler
136  for dataId in self.idList:
137  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
138  # exclude nonexistent data
139  # this is a recursive test, e.g. for the sake of "raw" data
140  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
141  dataRef=dr)]
142  if not refList:
143  namespace.log.warn("No data found for dataId=%s", dataId)
144  continue
145  self.refList += refList
146 
147 
148 class DataIdArgument(object):
149  """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
150 
151  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
152  """!Constructor
153 
154  @param[in] name name of identifier (argument name without dashes)
155  @param[in] datasetType type of dataset; specify a string for a fixed dataset type
156  or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
157  @param[in] level level of dataset, for butler
158  @param[in] doMakeDataRefList construct data references?
159  @param[in] ContainerClass class to contain data IDs and data references;
160  the default class will work for many kinds of data, but you may have to override
161  to compute some kinds of data references.
162  """
163  if name.startswith("-"):
164  raise RuntimeError("Name %s must not start with -" % (name,))
165  self.name = name
166  self.datasetType = datasetType
167  self.level = level
168  self.doMakeDataRefList = bool(doMakeDataRefList)
169  self.ContainerClass = ContainerClass
170  self.argName = name.lstrip("-")
171 
172  @property
174  """!Is the dataset type dynamic (specified on the command line)?"""
175  return isinstance(self.datasetType, DynamicDatasetType)
176 
177  def getDatasetType(self, namespace):
178  """!Return the dataset type as a string
179 
180  @param[in] namespace parsed command
181  """
182  if self.isDynamicDatasetType:
183  return self.datasetType.getDatasetType(namespace)
184  else:
185  return self.datasetType
186 
187 
188 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
189  """!Abstract base class for a dataset type determined from parsed command-line arguments
190  """
191 
192  def addArgument(self, parser, idName):
193  """!Add a command-line argument to specify dataset type name, if wanted
194 
195  @param[in] parser argument parser to which to add argument
196  @param[in] idName name of data ID argument, without the leading "--", e.g. "id"
197 
198  The default implementation does nothing
199  """
200  pass
201 
202  @abc.abstractmethod
203  def getDatasetType(self, namespace):
204  """Return the dataset type as a string, based on parsed command-line arguments
205 
206  @param[in] namespace parsed command
207  """
208  raise NotImplementedError("Subclasses must override")
209 
210 
212  """!A dataset type specified by a command-line argument.
213  """
214 
215  def __init__(self,
216  name=None,
217  help="dataset type to process from input data repository",
218  default=None,
219  ):
220  """!Construct a DatasetArgument
221 
222  @param[in] name name of command-line argument (including leading "--", if appropriate)
223  whose value is the dataset type; if None, uses --idName_dstype
224  where idName is the name of the data ID argument (e.g. "id")
225  @param[in] help help string for the command-line argument
226  @param[in] default default value; if None, then the command-line option is required;
227  ignored if the argument is positional (name does not start with "-")
228  because positional argument do not support default values
229  """
230  DynamicDatasetType.__init__(self)
231  self.name = name
232  self.help = help
233  self.default = default
234 
235  def getDatasetType(self, namespace):
236  """Return the dataset type as a string, from the appropriate command-line argument
237 
238  @param[in] namespace parsed command
239  """
240  argName = self.name.lstrip("-")
241  return getattr(namespace, argName)
242 
243  def addArgument(self, parser, idName):
244  """!Add a command-line argument to specify dataset type name
245 
246  Also set self.name if it is None
247  """
248  help = self.help if self.help else "dataset type for %s" % (idName,)
249  if self.name is None:
250  self.name = "--%s_dstype" % (idName,)
251  requiredDict = dict()
252  if self.name.startswith("-"):
253  requiredDict = dict(required=self.default is None)
254  parser.add_argument(
255  self.name,
256  default=self.default,
257  help=help,
258  **requiredDict) # cannot specify required=None for positional arguments
259 
260 
262  """!A dataset type specified by a config parameter
263  """
264 
265  def __init__(self, name):
266  """!Construct a ConfigDatasetType
267 
268  @param[in] name name of config option whose value is the dataset type
269  """
270  DynamicDatasetType.__init__(self)
271  self.name = name
272 
273  def getDatasetType(self, namespace):
274  """Return the dataset type as a string, from the appropriate config field
275 
276  @param[in] namespace parsed command
277  """
278  # getattr does not work reliably if the config field name is dotted,
279  # so step through one level at a time
280  keyList = self.name.split(".")
281  value = namespace.config
282  for key in keyList:
283  try:
284  value = getattr(value, key)
285  except KeyError:
286  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
287  return value
288 
289 
290 class ArgumentParser(argparse.ArgumentParser):
291  """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
292 
293  Users may wish to add additional arguments before calling parse_args.
294 
295  @note
296  - I would prefer to check data ID keys and values as they are parsed,
297  but the required information comes from the butler, so I have to construct a butler
298  before I do this checking. Constructing a butler is slow, so I only want do it once,
299  after parsing the command line, so as to catch syntax errors quickly.
300  """
301  requireOutput = True # Require an output directory to be specified?
302 
303  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
304  """!Construct an ArgumentParser
305 
306  @param[in] name name of top-level task; used to identify camera-specific override files
307  @param[in] usage usage string
308  @param[in] **kwargs additional keyword arguments for argparse.ArgumentParser
309  """
310  self._name = name
311  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
312  argparse.ArgumentParser.__init__(self,
313  usage=usage,
314  fromfile_prefix_chars='@',
315  epilog=textwrap.dedent("""Notes:
316  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
317  all values are used, in order left to right
318  * @file reads command-line options from the specified file:
319  * data may be distributed among multiple lines (e.g. one option per line)
320  * data after # is treated as a comment and ignored
321  * blank lines and lines starting with # are ignored
322  * To specify multiple values for an option, do not use = after the option name:
323  * right: --configfile foo bar
324  * wrong: --configfile=foo bar
325  """),
326  formatter_class=argparse.RawDescriptionHelpFormatter,
327  **kwargs)
328  self.add_argument(metavar='input', dest="rawInput",
329  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
330  self.add_argument("--calib", dest="rawCalib",
331  help="path to input calibration repository, relative to $%s" %
332  (DEFAULT_CALIB_NAME,))
333  self.add_argument("--output", dest="rawOutput",
334  help="path to output data repository (need not exist), relative to $%s" %
335  (DEFAULT_OUTPUT_NAME,))
336  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
337  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
338  "optionally sets ROOT to ROOT/rerun/INPUT")
339  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
340  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
341  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
342  help="config override file(s)")
343  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
344  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
345  metavar="LEVEL|COMPONENT=LEVEL")
346  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
347  self.add_argument("--debug", action="store_true", help="enable debugging output?")
348  self.add_argument("--doraise", action="store_true",
349  help="raise an exception on error (else log a message and continue)?")
350  self.add_argument("--profile", help="Dump cProfile statistics to filename")
351  self.add_argument("--show", nargs="+", default=(),
352  help="display the specified information to stdout and quit "
353  "(unless run is specified).")
354  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
355  self.add_argument("-t", "--timeout", type=float,
356  help="Timeout for multiprocessing; maximum wall time (sec)")
357  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
358  help=("remove and re-create the output directory if it already exists "
359  "(safe with -j, but not all other forms of parallel execution)"))
360  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
361  help=("backup and then overwrite existing config files instead of checking them "
362  "(safe with -j, but not all other forms of parallel execution)"))
363  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
364  help="Don't copy config to file~N backup.")
365  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
366  help=("backup and then overwrite existing package versions instead of checking"
367  "them (safe with -j, but not all other forms of parallel execution)"))
368  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
369  help="don't check package versions; useful for development")
370  lsstLog.configure_prop("""
371 log4j.rootLogger=INFO, A1
372 log4j.appender.A1=ConsoleAppender
373 log4j.appender.A1.Target=System.err
374 log4j.appender.A1.layout=PatternLayout
375 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
376 """)
377 
378  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
379  ContainerClass=DataIdContainer):
380  """!Add a data ID argument
381 
382  Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
383  then add a second argument to specify the dataset type.
384 
385  @param[in] name data ID argument (including leading dashes, if wanted)
386  @param[in] datasetType type of dataset; supply a string for a fixed dataset type,
387  or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
388  @param[in] help help string for the argument
389  @param[in] level level of dataset, for butler
390  @param[in] doMakeDataRefList construct data references?
391  @param[in] ContainerClass data ID container class to use to contain results;
392  override the default if you need a special means of computing data references from data IDs
393 
394  The associated data is put into namespace.<dataIdArgument.name> as an instance of ContainerClass;
395  the container includes fields:
396  - idList: a list of data ID dicts
397  - refList: a list of butler data references (empty if doMakeDataRefList false)
398  """
399  argName = name.lstrip("-")
400 
401  if argName in self._dataIdArgDict:
402  raise RuntimeError("Data ID argument %s already exists" % (name,))
403  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
404  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
405 
406  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
407  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
408 
409  dataIdArgument = DataIdArgument(
410  name=argName,
411  datasetType=datasetType,
412  level=level,
413  doMakeDataRefList=doMakeDataRefList,
414  ContainerClass=ContainerClass,
415  )
416 
417  if dataIdArgument.isDynamicDatasetType:
418  datasetType.addArgument(parser=self, idName=argName)
419 
420  self._dataIdArgDict[argName] = dataIdArgument
421 
422  def parse_args(self, config, args=None, log=None, override=None):
423  """!Parse arguments for a pipeline task
424 
425  @param[in,out] config config for the task being run
426  @param[in] args argument list; if None use sys.argv[1:]
427  @param[in] log log (instance lsst.log Log); if None use the default log
428  @param[in] override a config override function; it must take the root config object
429  as its only argument and must modify the config in place.
430  This function is called after camera-specific overrides files are applied, and before
431  command-line config overrides are applied (thus allowing the user the final word).
432 
433  @return namespace: an argparse.Namespace containing many useful fields including:
434  - camera: camera name
435  - config: the supplied config with all overrides applied, validated and frozen
436  - butler: a butler for the data
437  - an entry for each of the data ID arguments registered by add_id_argument(),
438  the value of which is a DataIdArgument that includes public elements 'idList' and 'refList'
439  - log: a lsst.log Log
440  - an entry for each command-line argument, with the following exceptions:
441  - config is the supplied config, suitably updated
442  - configfile, id and loglevel are all missing
443  - obsPkg: name of obs_ package for this camera
444  """
445  if args is None:
446  args = sys.argv[1:]
447 
448  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
449  self.print_help()
450  if len(args) == 1 and args[0] in ("-h", "--help"):
451  self.exit()
452  else:
453  self.exit("%s: error: Must specify input as first argument" % self.prog)
454 
455  # Note that --rerun may change namespace.input, but if it does we verify that the
456  # new input has the same mapper class.
457  namespace = argparse.Namespace()
458  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
459  if not os.path.isdir(namespace.input):
460  self.error("Error: input=%r not found" % (namespace.input,))
461 
462  namespace.config = config
463  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
464  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
465  namespace.camera = mapperClass.getCameraName()
466  namespace.obsPkg = mapperClass.getPackageName()
467 
468  self.handleCamera(namespace)
469 
470  self._applyInitialOverrides(namespace)
471  if override is not None:
472  override(namespace.config)
473 
474  # Add data ID containers to namespace
475  for dataIdArgument in self._dataIdArgDict.values():
476  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
477 
478  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
479  del namespace.configfile
480 
481  self._parseDirectories(namespace)
482 
483  if namespace.clobberOutput:
484  if namespace.output is None:
485  self.error("--clobber-output is only valid with --output or --rerun")
486  elif namespace.output == namespace.input:
487  self.error("--clobber-output is not valid when the output and input repos are the same")
488  if os.path.exists(namespace.output):
489  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
490  shutil.rmtree(namespace.output)
491 
492  namespace.log.debug("input=%s", namespace.input)
493  namespace.log.debug("calib=%s", namespace.calib)
494  namespace.log.debug("output=%s", namespace.output)
495 
496  obeyShowArgument(namespace.show, namespace.config, exit=False)
497 
498  # No environment variable or --output or --rerun specified.
499  if self.requireOutput and namespace.output is None and namespace.rerun is None:
500  self.error("no output directory specified.\n"
501  "An output directory must be specified with the --output or --rerun\n"
502  "command-line arguments.\n")
503 
504  butlerArgs = {} # common arguments for butler elements
505  if namespace.calib:
506  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
507  if namespace.output:
508  outputs = {'root': namespace.output, 'mode': 'rw'}
509  inputs = {'root': namespace.input}
510  inputs.update(butlerArgs)
511  outputs.update(butlerArgs)
512  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
513  else:
514  outputs = {'root': namespace.input, 'mode': 'rw'}
515  outputs.update(butlerArgs)
516  namespace.butler = dafPersist.Butler(outputs=outputs)
517 
518  # convert data in each of the identifier lists to proper types
519  # this is done after constructing the butler, hence after parsing the command line,
520  # because it takes a long time to construct a butler
521  self._processDataIds(namespace)
522  if "data" in namespace.show:
523  for dataIdName in self._dataIdArgDict.keys():
524  for dataRef in getattr(namespace, dataIdName).refList:
525  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
526 
527  if namespace.show and "run" not in namespace.show:
528  sys.exit(0)
529 
530  if namespace.debug:
531  try:
532  import debug
533  assert debug # silence pyflakes
534  except ImportError:
535  sys.stderr.write("Warning: no 'debug' module found\n")
536  namespace.debug = False
537 
538  del namespace.loglevel
539 
540  if namespace.longlog:
541  lsstLog.configure_prop("""
542 log4j.rootLogger=INFO, A1
543 log4j.appender.A1=ConsoleAppender
544 log4j.appender.A1.Target=System.err
545 log4j.appender.A1.layout=PatternLayout
546 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
547 """)
548  del namespace.longlog
549 
550  namespace.config.validate()
551  namespace.config.freeze()
552 
553  return namespace
554 
555  def _parseDirectories(self, namespace):
556  """Parse input, output and calib directories
557 
558  This allows for hacking the directories, e.g., to include a "rerun".
559  Modifications are made to the 'namespace' object in-place.
560  """
561  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
562  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
563 
564  # If an output directory is specified, process it and assign it to the namespace
565  if namespace.rawOutput:
566  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
567  else:
568  namespace.output = None
569 
570  # This section processes the rerun argument, if rerun is specified as a colon separated
571  # value, it will be parsed as an input and output. The input value will be overridden if
572  # previously specified (but a check is made to make sure both inputs use the same mapper)
573  if namespace.rawRerun:
574  if namespace.output:
575  self.error("Error: cannot specify both --output and --rerun")
576  namespace.rerun = namespace.rawRerun.split(":")
577  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
578  modifiedInput = False
579  if len(rerunDir) == 2:
580  namespace.input, namespace.output = rerunDir
581  modifiedInput = True
582  elif len(rerunDir) == 1:
583  namespace.output = rerunDir[0]
584  if os.path.exists(os.path.join(namespace.output, "_parent")):
585  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
586  modifiedInput = True
587  else:
588  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
589  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
590  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
591  else:
592  namespace.rerun = None
593  del namespace.rawInput
594  del namespace.rawCalib
595  del namespace.rawOutput
596  del namespace.rawRerun
597 
598  def _processDataIds(self, namespace):
599  """!Process the parsed data for each data ID argument
600 
601  Processing includes:
602  - Validate data ID keys
603  - Cast the data ID values to the correct type
604  - Compute data references from data IDs
605 
606  @param[in,out] namespace parsed namespace (an argparse.Namespace);
607  reads these attributes:
608  - butler
609  - log
610  - config, if any dynamic dataset types are set by a config parameter
611  - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
612  and modifies these attributes:
613  - <name> for each data ID argument registered using add_id_argument
614  """
615  for dataIdArgument in self._dataIdArgDict.values():
616  dataIdContainer = getattr(namespace, dataIdArgument.name)
617  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
618  if dataIdArgument.doMakeDataRefList:
619  try:
620  dataIdContainer.castDataIds(butler=namespace.butler)
621  except (KeyError, TypeError) as e:
622  # failure of castDataIds indicates invalid command args
623  self.error(e)
624 
625  # failure of makeDataRefList indicates a bug that wants a traceback
626  dataIdContainer.makeDataRefList(namespace)
627 
628  def _applyInitialOverrides(self, namespace):
629  """!Apply obs-package-specific and camera-specific config override files, if found
630 
631  @param[in] namespace parsed namespace (an argparse.Namespace);
632  reads these attributes:
633  - obsPkg
634 
635  Look in the package namespace.obsPkg for files:
636  - config/<task_name>.py
637  - config/<camera_name>/<task_name>.py
638  and load if found
639  """
640  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
641  fileName = self._name + ".py"
642  for filePath in (
643  os.path.join(obsPkgDir, "config", fileName),
644  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
645  ):
646  if os.path.exists(filePath):
647  namespace.log.info("Loading config overrride file %r", filePath)
648  namespace.config.load(filePath)
649  else:
650  namespace.log.debug("Config override file does not exist: %r", filePath)
651 
652  def handleCamera(self, namespace):
653  """!Perform camera-specific operations before parsing the command line.
654 
655  The default implementation does nothing.
656 
657  @param[in,out] namespace namespace (an argparse.Namespace) with the following fields:
658  - camera: the camera name
659  - config: the config passed to parse_args, with no overrides applied
660  - obsPkg: the obs_ package for this camera
661  - log: a lsst.log Log
662  """
663  pass
664 
665  def convert_arg_line_to_args(self, arg_line):
666  """!Allow files of arguments referenced by `@<path>` to contain multiple values on each line
667 
668  @param[in] arg_line line of text read from an argument file
669  """
670  arg_line = arg_line.strip()
671  if not arg_line or arg_line.startswith("#"):
672  return
673  for arg in shlex.split(arg_line, comments=True, posix=True):
674  if not arg.strip():
675  continue
676  yield arg
677 
678 
680  """An ArgumentParser for pipeline tasks that don't write any output"""
681  requireOutput = False # We're not going to write anything
682 
683 
684 def getTaskDict(config, taskDict=None, baseName=""):
685  """!Get a dictionary of task info for all subtasks in a config
686 
687  Designed to be called recursively; the user should call with only a config
688  (leaving taskDict and baseName at their default values).
689 
690  @param[in] config configuration to process, an instance of lsst.pex.config.Config
691  @param[in,out] taskDict users should not specify this argument;
692  (supports recursion; if provided, taskDict is updated in place, else a new dict is started)
693  @param[in] baseName users should not specify this argument.
694  (supports recursion: if a non-empty string then a period is appended and the result is used
695  as a prefix for additional entries in taskDict; otherwise no prefix is used)
696  @return taskDict: a dict of config field name: task name
697  """
698  if taskDict is None:
699  taskDict = dict()
700  for fieldName, field in config.items():
701  if hasattr(field, "value") and hasattr(field, "target"):
702  subConfig = field.value
703  if isinstance(subConfig, pexConfig.Config):
704  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
705  try:
706  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
707  except Exception:
708  taskName = repr(field.target)
709  taskDict[subBaseName] = taskName
710  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
711  return taskDict
712 
713 
714 def obeyShowArgument(showOpts, config=None, exit=False):
715  """!Process arguments specified with --show (but ignores "data")
716 
717  @param showOpts List of options passed to --show
718  @param config The provided config
719  @param exit Exit if "run" isn't included in showOpts
720 
721  Supports the following options in showOpts:
722  - config[=PAT] Dump all the config entries, or just the ones that match the glob pattern
723  - history=PAT Show where the config entries that match the glob pattern were set
724  - tasks Show task hierarchy
725  - data Ignored; to be processed by caller
726  - run Keep going (the default behaviour is to exit if --show is specified)
727 
728  Calls sys.exit(1) if any other option found.
729  """
730  if not showOpts:
731  return
732 
733  for what in showOpts:
734  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
735 
736  if showCommand == "config":
737  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
738  pattern = matConfig.group(1)
739  if pattern:
740  class FilteredStream(object):
741  """A file object that only prints lines that match the glob "pattern"
742 
743  N.b. Newlines are silently discarded and reinserted; crude but effective.
744  """
745 
746  def __init__(self, pattern):
747  # obey case if pattern isn't lowecase or requests NOIGNORECASE
748  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
749 
750  if mat:
751  pattern = mat.group(1)
752  self._pattern = re.compile(fnmatch.translate(pattern))
753  else:
754  if pattern != pattern.lower():
755  print(u"Matching \"%s\" without regard to case "
756  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
757  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
758 
759  def write(self, showStr):
760  showStr = showStr.rstrip()
761  # Strip off doc string line(s) and cut off at "=" for string matching
762  matchStr = showStr.split("\n")[-1].split("=")[0]
763  if self._pattern.search(matchStr):
764  print(u"\n" + showStr)
765 
766  fd = FilteredStream(pattern)
767  else:
768  fd = sys.stdout
769 
770  config.saveToStream(fd, "config")
771  elif showCommand == "history":
772  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
773  pattern = matHistory.group(1)
774  if not pattern:
775  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
776  sys.exit(1)
777 
778  pattern = pattern.split(".")
779  cpath, cname = pattern[:-1], pattern[-1]
780  hconfig = config # the config that we're interested in
781  for i, cpt in enumerate(cpath):
782  try:
783  hconfig = getattr(hconfig, cpt)
784  except AttributeError:
785  print("Error: configuration %s has no subconfig %s" %
786  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
787 
788  sys.exit(1)
789 
790  try:
791  print(pexConfig.history.format(hconfig, cname))
792  except KeyError:
793  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
794  sys.exit(1)
795 
796  elif showCommand == "data":
797  pass
798  elif showCommand == "run":
799  pass
800  elif showCommand == "tasks":
801  showTaskHierarchy(config)
802  else:
803  print(u"Unknown value for show: %s (choose from '%s')" %
804  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
805  sys.exit(1)
806 
807  if exit and "run" not in showOpts:
808  sys.exit(0)
809 
810 
811 def showTaskHierarchy(config):
812  """!Print task hierarchy to stdout
813 
814  @param[in] config: configuration to process (an lsst.pex.config.Config)
815  """
816  print(u"Subtasks:")
817  taskDict = getTaskDict(config=config)
818 
819  fieldNameList = sorted(taskDict.keys())
820  for fieldName in fieldNameList:
821  taskName = taskDict[fieldName]
822  print(u"%s: %s" % (fieldName, taskName))
823 
824 
825 class ConfigValueAction(argparse.Action):
826  """!argparse action callback to override config parameters using name=value pairs from the command line
827  """
828 
829  def __call__(self, parser, namespace, values, option_string):
830  """!Override one or more config name value pairs
831 
832  @param[in] parser argument parser (instance of ArgumentParser)
833  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
834  updated values:
835  - namespace.config
836  @param[in] values a list of configItemName=value pairs
837  @param[in] option_string option value specified by the user (a str)
838  """
839  if namespace.config is None:
840  return
841  for nameValue in values:
842  name, sep, valueStr = nameValue.partition("=")
843  if not valueStr:
844  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
845 
846  # see if setting the string value works; if not, try eval
847  try:
848  setDottedAttr(namespace.config, name, valueStr)
849  except AttributeError:
850  parser.error("no config field: %s" % (name,))
851  except Exception:
852  try:
853  value = eval(valueStr, {})
854  except Exception:
855  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
856  try:
857  setDottedAttr(namespace.config, name, value)
858  except Exception as e:
859  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
860 
861 
862 class ConfigFileAction(argparse.Action):
863  """!argparse action to load config overrides from one or more files
864  """
865 
866  def __call__(self, parser, namespace, values, option_string=None):
867  """!Load one or more files of config overrides
868 
869  @param[in] parser argument parser (instance of ArgumentParser)
870  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
871  updated values:
872  - namespace.config
873  @param[in] values a list of data config file paths
874  @param[in] option_string option value specified by the user (a str)
875  """
876  if namespace.config is None:
877  return
878  for configfile in values:
879  try:
880  namespace.config.load(configfile)
881  except Exception as e:
882  parser.error("cannot load config file %r: %s" % (configfile, e))
883 
884 
885 class IdValueAction(argparse.Action):
886  """!argparse action callback to process a data ID into a dict
887  """
888 
889  def __call__(self, parser, namespace, values, option_string):
890  """!Parse --id data and append results to namespace.<argument>.idList
891 
892  @param[in] parser argument parser (instance of ArgumentParser)
893  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
894  updated values:
895  - <idName>.idList, where <idName> is the name of the ID argument,
896  for instance "id" for ID argument --id
897  @param[in] values a list of data IDs; see data format below
898  @param[in] option_string option value specified by the user (a str)
899 
900  The data format is:
901  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
902 
903  The values (e.g. value1_1) may either be a string, or of the form "int..int" (e.g. "1..3")
904  which is interpreted as "1^2^3" (inclusive, unlike a python range). So "0^2..4^7..9" is
905  equivalent to "0^2^3^4^7^8^9". You may also specify a stride: "1..5:2" is "1^3^5"
906 
907  The cross product is computed for keys with multiple values. For example:
908  --id visit 1^2 ccd 1,1^2,2
909  results in the following data ID dicts being appended to namespace.<argument>.idList:
910  {"visit":1, "ccd":"1,1"}
911  {"visit":2, "ccd":"1,1"}
912  {"visit":1, "ccd":"2,2"}
913  {"visit":2, "ccd":"2,2"}
914  """
915  if namespace.config is None:
916  return
917  idDict = collections.OrderedDict()
918  for nameValue in values:
919  name, sep, valueStr = nameValue.partition("=")
920  if name in idDict:
921  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
922  idDict[name] = []
923  for v in valueStr.split("^"):
924  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
925  if mat:
926  v1 = int(mat.group(1))
927  v2 = int(mat.group(2))
928  v3 = mat.group(3)
929  v3 = int(v3) if v3 else 1
930  for v in range(v1, v2 + 1, v3):
931  idDict[name].append(str(v))
932  else:
933  idDict[name].append(v)
934 
935  iterList = [idDict[key] for key in idDict.keys()]
936  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
937  for valList in itertools.product(*iterList)]
938 
939  argName = option_string.lstrip("-")
940  ident = getattr(namespace, argName)
941  ident.idList += idDictList
942 
943 
944 class LogLevelAction(argparse.Action):
945  """!argparse action to set log level
946  """
947 
948  def __call__(self, parser, namespace, values, option_string):
949  """!Set trace level
950 
951  @param[in] parser argument parser (instance of ArgumentParser)
952  @param[in] namespace parsed command (an instance of argparse.Namespace); ignored
953  @param[in] values a list of trace levels;
954  each item must be of the form 'component_name=level' or 'level',
955  where level is a keyword (not case sensitive) or an integer
956  @param[in] option_string option value specified by the user (a str)
957  """
958  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
959  permittedLevelSet = set(permittedLevelList)
960  for componentLevel in values:
961  component, sep, levelStr = componentLevel.partition("=")
962  if not levelStr:
963  levelStr, component = component, None
964  logLevelUpr = levelStr.upper()
965  if logLevelUpr in permittedLevelSet:
966  logLevel = getattr(lsstLog.Log, logLevelUpr)
967  else:
968  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
969  if component is None:
970  namespace.log.setLevel(logLevel)
971  else:
972  lsstLog.Log.getLogger(component).setLevel(logLevel)
973 
974 
975 def setDottedAttr(item, name, value):
976  """!Like setattr, but accepts hierarchical names, e.g. foo.bar.baz
977 
978  @param[in,out] item object whose attribute is to be set
979  @param[in] name name of item to set
980  @param[in] value new value for the item
981 
982  For example if name is foo.bar.baz then item.foo.bar.baz is set to the specified value.
983  """
984  subitem = item
985  subnameList = name.split(".")
986  for subname in subnameList[:-1]:
987  subitem = getattr(subitem, subname)
988  setattr(subitem, subnameList[-1], value)
989 
990 
991 def getDottedAttr(item, name):
992  """!Like getattr, but accepts hierarchical names, e.g. foo.bar.baz
993 
994  @param[in] item object whose attribute is to be returned
995  @param[in] name name of item to get
996 
997  For example if name is foo.bar.baz then returns item.foo.bar.baz
998  """
999  subitem = item
1000  for subname in name.split("."):
1001  subitem = getattr(subitem, subname)
1002  return subitem
1003 
1004 
1005 def dataExists(butler, datasetType, dataRef):
1006  """!Return True if data exists at the current level or any data exists at a deeper level, False otherwise
1007 
1008  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
1009  "lsst.daf.persistence.Butler")
1010  @param[in] datasetType dataset type (a str)
1011  @param[in] dataRef butler data reference (a \ref lsst.daf.persistence.butlerSubset.ButlerDataRef
1012  "lsst.daf.persistence.ButlerDataRef")
1013  """
1014  subDRList = dataRef.subItems()
1015  if subDRList:
1016  for subDR in subDRList:
1017  if dataExists(butler, datasetType, subDR):
1018  return True
1019  return False
1020  else:
1021  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def setDatasetType
Set actual dataset type, once it is known.
def castDataIds
Validate data IDs and cast them to the correct type (modify idList in place).
argparse action to set log level
An argument parser for pipeline tasks that is based on argparse.ArgumentParser.
def dataExists
Return True if data exists at the current level or any data exists at a deeper level, False otherwise.
A dataset type specified by a command-line argument.
def showTaskHierarchy
Print task hierarchy to stdout.
Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument.
def setDottedAttr
Like setattr, but accepts hierarchical names, e.g.
def __call__
Parse –id data and append results to namespace.
argparse action callback to override config parameters using name=value pairs from the command line ...
def getTaskDict
Get a dictionary of task info for all subtasks in a config.
def _applyInitialOverrides
Apply obs-package-specific and camera-specific config override files, if found.
def makeDataRefList
Compute refList based on idList.
def __init__
Construct a DatasetArgument.
def convert_arg_line_to_args
Allow files of arguments referenced by @&lt;path&gt; to contain multiple values on each line...
def getDottedAttr
Like getattr, but accepts hierarchical names, e.g.
def addArgument
Add a command-line argument to specify dataset type name.
def __call__
Load one or more files of config overrides.
def isDynamicDatasetType
Is the dataset type dynamic (specified on the command line)?
def __call__
Override one or more config name value pairs.
argparse action callback to process a data ID into a dict
def __init__
Construct a ConfigDatasetType.
def handleCamera
Perform camera-specific operations before parsing the command line.
def parse_args
Parse arguments for a pipeline task.
def _processDataIds
Process the parsed data for each data ID argument.
A dataset type specified by a config parameter.
def __init__
Construct an ArgumentParser.
def obeyShowArgument
Process arguments specified with –show (but ignores &quot;data&quot;)
Abstract base class for a dataset type determined from parsed command-line arguments.
def addArgument
Add a command-line argument to specify dataset type name, if wanted.
argparse action to load config overrides from one or more files
def getDatasetType
Return the dataset type as a string.
def __init__
Construct a DataIdContainer.
A container for data IDs and associated data references.