lsst.pipe.base  13.0-11-gdf6a56c+13
 All Classes Namespaces Files Functions Variables Pages
argumentParser.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 import abc
24 import argparse
25 import collections
26 import fnmatch
27 import itertools
28 import os
29 import re
30 import shlex
31 import sys
32 import shutil
33 import textwrap
34 
35 from builtins import zip
36 from builtins import str
37 from builtins import range
38 from builtins import object
39 
40 import lsst.utils
41 import lsst.pex.config as pexConfig
42 import lsst.pex.config.history
43 import lsst.log as lsstLog
44 import lsst.daf.persistence as dafPersist
45 from future.utils import with_metaclass
46 
47 __all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
48  "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
49 
50 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
51 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
52 DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
53 
54 
55 def _fixPath(defName, path):
56  """!Apply environment variable as default root, if present, and abspath
57 
58  @param[in] defName name of environment variable containing default root path;
59  if the environment variable does not exist then the path is relative
60  to the current working directory
61  @param[in] path path relative to default root path
62  @return abspath: path that has been expanded, or None if the environment variable does not exist
63  and path is None
64  """
65  defRoot = os.environ.get(defName)
66  if defRoot is None:
67  if path is None:
68  return None
69  return os.path.abspath(path)
70  return os.path.abspath(os.path.join(defRoot, path or ""))
71 
72 
73 class DataIdContainer(object):
74  """!A container for data IDs and associated data references
75 
76  Override for data IDs that require special handling to be converted to data references,
77  and specify the override class as ContainerClass for add_id_argument.
78  (If you don't want the argument parser to compute data references, you may use this class
79  and specify doMakeDataRefList=False in add_id_argument.)
80  """
81 
82  def __init__(self, level=None):
83  """!Construct a DataIdContainer"""
84  self.datasetType = None # the actual dataset type, as specified on the command line (if dynamic)
85  self.level = level
86  self.idList = []
87  self.refList = []
88 
89  def setDatasetType(self, datasetType):
90  """!Set actual dataset type, once it is known"""
91  self.datasetType = datasetType
92 
93  def castDataIds(self, butler):
94  """!Validate data IDs and cast them to the correct type (modify idList in place).
95 
96  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
97  "lsst.daf.persistence.Butler")
98  """
99  if self.datasetType is None:
100  raise RuntimeError("Must call setDatasetType first")
101  try:
102  idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
103  except KeyError:
104  raise KeyError("Cannot get keys for datasetType %s at level %s" % (self.datasetType, self.level))
105 
106  for dataDict in self.idList:
107  for key, strVal in dataDict.items():
108  try:
109  keyType = idKeyTypeDict[key]
110  except KeyError:
111  # OK, assume that it's a valid key and guess that it's a string
112  keyType = str
113 
114  log = lsstLog.Log.getDefaultLogger()
115  log.warn("Unexpected ID %s; guessing type is \"%s\"" %
116  (key, 'str' if keyType == str else keyType))
117  idKeyTypeDict[key] = keyType
118 
119  if keyType != str:
120  try:
121  castVal = keyType(strVal)
122  except Exception:
123  raise TypeError("Cannot cast value %r to %s for ID key %r" % (strVal, keyType, key,))
124  dataDict[key] = castVal
125 
126  def makeDataRefList(self, namespace):
127  """!Compute refList based on idList
128 
129  Not called if add_id_argument called with doMakeDataRefList=False
130 
131  @param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
132  """
133  if self.datasetType is None:
134  raise RuntimeError("Must call setDatasetType first")
135  butler = namespace.butler
136  for dataId in self.idList:
137  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
138  # exclude nonexistent data
139  # this is a recursive test, e.g. for the sake of "raw" data
140  refList = [dr for dr in refList if dataExists(butler=butler, datasetType=self.datasetType,
141  dataRef=dr)]
142  if not refList:
143  namespace.log.warn("No data found for dataId=%s", dataId)
144  continue
145  self.refList += refList
146 
147 
148 class DataIdArgument(object):
149  """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
150 
151  def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
152  """!Constructor
153 
154  @param[in] name name of identifier (argument name without dashes)
155  @param[in] datasetType type of dataset; specify a string for a fixed dataset type
156  or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
157  @param[in] level level of dataset, for butler
158  @param[in] doMakeDataRefList construct data references?
159  @param[in] ContainerClass class to contain data IDs and data references;
160  the default class will work for many kinds of data, but you may have to override
161  to compute some kinds of data references.
162  """
163  if name.startswith("-"):
164  raise RuntimeError("Name %s must not start with -" % (name,))
165  self.name = name
166  self.datasetType = datasetType
167  self.level = level
168  self.doMakeDataRefList = bool(doMakeDataRefList)
169  self.ContainerClass = ContainerClass
170  self.argName = name.lstrip("-")
171 
172  @property
174  """!Is the dataset type dynamic (specified on the command line)?"""
175  return isinstance(self.datasetType, DynamicDatasetType)
176 
177  def getDatasetType(self, namespace):
178  """!Return the dataset type as a string
179 
180  @param[in] namespace parsed command
181  """
182  if self.isDynamicDatasetType:
183  return self.datasetType.getDatasetType(namespace)
184  else:
185  return self.datasetType
186 
187 
188 class DynamicDatasetType(with_metaclass(abc.ABCMeta, object)):
189  """!Abstract base class for a dataset type determined from parsed command-line arguments
190  """
191 
192  def addArgument(self, parser, idName):
193  """!Add a command-line argument to specify dataset type name, if wanted
194 
195  @param[in] parser argument parser to which to add argument
196  @param[in] idName name of data ID argument, without the leading "--", e.g. "id"
197 
198  The default implementation does nothing
199  """
200  pass
201 
202  @abc.abstractmethod
203  def getDatasetType(self, namespace):
204  """Return the dataset type as a string, based on parsed command-line arguments
205 
206  @param[in] namespace parsed command
207  """
208  raise NotImplementedError("Subclasses must override")
209 
210 
212  """!A dataset type specified by a command-line argument.
213  """
214 
215  def __init__(self,
216  name=None,
217  help="dataset type to process from input data repository",
218  default=None,
219  ):
220  """!Construct a DatasetArgument
221 
222  @param[in] name name of command-line argument (including leading "--", if appropriate)
223  whose value is the dataset type; if None, uses --idName_dstype
224  where idName is the name of the data ID argument (e.g. "id")
225  @param[in] help help string for the command-line argument
226  @param[in] default default value; if None, then the command-line option is required;
227  ignored if the argument is positional (name does not start with "-")
228  because positional argument do not support default values
229  """
230  DynamicDatasetType.__init__(self)
231  self.name = name
232  self.help = help
233  self.default = default
234 
235  def getDatasetType(self, namespace):
236  """Return the dataset type as a string, from the appropriate command-line argument
237 
238  @param[in] namespace parsed command
239  """
240  argName = self.name.lstrip("-")
241  return getattr(namespace, argName)
242 
243  def addArgument(self, parser, idName):
244  """!Add a command-line argument to specify dataset type name
245 
246  Also set self.name if it is None
247  """
248  help = self.help if self.help else "dataset type for %s" % (idName,)
249  if self.name is None:
250  self.name = "--%s_dstype" % (idName,)
251  requiredDict = dict()
252  if self.name.startswith("-"):
253  requiredDict = dict(required=self.default is None)
254  parser.add_argument(
255  self.name,
256  default=self.default,
257  help=help,
258  **requiredDict) # cannot specify required=None for positional arguments
259 
260 
262  """!A dataset type specified by a config parameter
263  """
264 
265  def __init__(self, name):
266  """!Construct a ConfigDatasetType
267 
268  @param[in] name name of config option whose value is the dataset type
269  """
270  DynamicDatasetType.__init__(self)
271  self.name = name
272 
273  def getDatasetType(self, namespace):
274  """Return the dataset type as a string, from the appropriate config field
275 
276  @param[in] namespace parsed command
277  """
278  # getattr does not work reliably if the config field name is dotted,
279  # so step through one level at a time
280  keyList = self.name.split(".")
281  value = namespace.config
282  for key in keyList:
283  try:
284  value = getattr(value, key)
285  except KeyError:
286  raise RuntimeError("Cannot find config parameter %r" % (self.name,))
287  return value
288 
289 
290 class ArgumentParser(argparse.ArgumentParser):
291  """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
292 
293  Users may wish to add additional arguments before calling parse_args.
294 
295  @note
296  - I would prefer to check data ID keys and values as they are parsed,
297  but the required information comes from the butler, so I have to construct a butler
298  before I do this checking. Constructing a butler is slow, so I only want do it once,
299  after parsing the command line, so as to catch syntax errors quickly.
300  """
301  requireOutput = True # Require an output directory to be specified?
302 
303  def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
304  """!Construct an ArgumentParser
305 
306  @param[in] name name of top-level task; used to identify camera-specific override files
307  @param[in] usage usage string
308  @param[in] **kwargs additional keyword arguments for argparse.ArgumentParser
309  """
310  self._name = name
311  self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
312  argparse.ArgumentParser.__init__(self,
313  usage=usage,
314  fromfile_prefix_chars='@',
315  epilog=textwrap.dedent("""Notes:
316  * --config, --configfile, --id, --loglevel and @file may appear multiple times;
317  all values are used, in order left to right
318  * @file reads command-line options from the specified file:
319  * data may be distributed among multiple lines (e.g. one option per line)
320  * data after # is treated as a comment and ignored
321  * blank lines and lines starting with # are ignored
322  * To specify multiple values for an option, do not use = after the option name:
323  * right: --configfile foo bar
324  * wrong: --configfile=foo bar
325  """),
326  formatter_class=argparse.RawDescriptionHelpFormatter,
327  **kwargs)
328  self.add_argument(metavar='input', dest="rawInput",
329  help="path to input data repository, relative to $%s" % (DEFAULT_INPUT_NAME,))
330  self.add_argument("--calib", dest="rawCalib",
331  help="path to input calibration repository, relative to $%s" %
332  (DEFAULT_CALIB_NAME,))
333  self.add_argument("--output", dest="rawOutput",
334  help="path to output data repository (need not exist), relative to $%s" %
335  (DEFAULT_OUTPUT_NAME,))
336  self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
337  help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
338  "optionally sets ROOT to ROOT/rerun/INPUT")
339  self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
340  help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
341  self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
342  help="config override file(s)")
343  self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
344  help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
345  metavar="LEVEL|COMPONENT=LEVEL")
346  self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
347  self.add_argument("--debug", action="store_true", help="enable debugging output?")
348  self.add_argument("--doraise", action="store_true",
349  help="raise an exception on error (else log a message and continue)?")
350  self.add_argument("--noExit", action="store_true",
351  help="Do not exit even upon failure (i.e. return a struct to the calling script)")
352  self.add_argument("--profile", help="Dump cProfile statistics to filename")
353  self.add_argument("--show", nargs="+", default=(),
354  help="display the specified information to stdout and quit "
355  "(unless run is specified).")
356  self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
357  self.add_argument("-t", "--timeout", type=float,
358  help="Timeout for multiprocessing; maximum wall time (sec)")
359  self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
360  help=("remove and re-create the output directory if it already exists "
361  "(safe with -j, but not all other forms of parallel execution)"))
362  self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
363  help=("backup and then overwrite existing config files instead of checking them "
364  "(safe with -j, but not all other forms of parallel execution)"))
365  self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
366  help="Don't copy config to file~N backup.")
367  self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
368  help=("backup and then overwrite existing package versions instead of checking"
369  "them (safe with -j, but not all other forms of parallel execution)"))
370  self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
371  help="don't check package versions; useful for development")
372  lsstLog.configure_prop("""
373 log4j.rootLogger=INFO, A1
374 log4j.appender.A1=ConsoleAppender
375 log4j.appender.A1.Target=System.err
376 log4j.appender.A1.layout=PatternLayout
377 log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
378 """)
379 
380  def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
381  ContainerClass=DataIdContainer):
382  """!Add a data ID argument
383 
384  Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
385  then add a second argument to specify the dataset type.
386 
387  @param[in] name data ID argument (including leading dashes, if wanted)
388  @param[in] datasetType type of dataset; supply a string for a fixed dataset type,
389  or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
390  @param[in] help help string for the argument
391  @param[in] level level of dataset, for butler
392  @param[in] doMakeDataRefList construct data references?
393  @param[in] ContainerClass data ID container class to use to contain results;
394  override the default if you need a special means of computing data references from data IDs
395 
396  The associated data is put into namespace.<dataIdArgument.name> as an instance of ContainerClass;
397  the container includes fields:
398  - idList: a list of data ID dicts
399  - refList: a list of butler data references (empty if doMakeDataRefList false)
400  """
401  argName = name.lstrip("-")
402 
403  if argName in self._dataIdArgDict:
404  raise RuntimeError("Data ID argument %s already exists" % (name,))
405  if argName in set(("camera", "config", "butler", "log", "obsPkg")):
406  raise RuntimeError("Data ID argument %s is a reserved name" % (name,))
407 
408  self.add_argument(name, nargs="*", action=IdValueAction, help=help,
409  metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
410 
411  dataIdArgument = DataIdArgument(
412  name=argName,
413  datasetType=datasetType,
414  level=level,
415  doMakeDataRefList=doMakeDataRefList,
416  ContainerClass=ContainerClass,
417  )
418 
419  if dataIdArgument.isDynamicDatasetType:
420  datasetType.addArgument(parser=self, idName=argName)
421 
422  self._dataIdArgDict[argName] = dataIdArgument
423 
424  def parse_args(self, config, args=None, log=None, override=None):
425  """!Parse arguments for a pipeline task
426 
427  @param[in,out] config config for the task being run
428  @param[in] args argument list; if None use sys.argv[1:]
429  @param[in] log log (instance lsst.log Log); if None use the default log
430  @param[in] override a config override function; it must take the root config object
431  as its only argument and must modify the config in place.
432  This function is called after camera-specific overrides files are applied, and before
433  command-line config overrides are applied (thus allowing the user the final word).
434 
435  @return namespace: an argparse.Namespace containing many useful fields including:
436  - camera: camera name
437  - config: the supplied config with all overrides applied, validated and frozen
438  - butler: a butler for the data
439  - an entry for each of the data ID arguments registered by add_id_argument(),
440  the value of which is a DataIdArgument that includes public elements 'idList' and 'refList'
441  - log: a lsst.log Log
442  - an entry for each command-line argument, with the following exceptions:
443  - config is the supplied config, suitably updated
444  - configfile, id and loglevel are all missing
445  - obsPkg: name of obs_ package for this camera
446  """
447  if args is None:
448  args = sys.argv[1:]
449 
450  if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
451  self.print_help()
452  if len(args) == 1 and args[0] in ("-h", "--help"):
453  self.exit()
454  else:
455  self.exit("%s: error: Must specify input as first argument" % self.prog)
456 
457  # Note that --rerun may change namespace.input, but if it does we verify that the
458  # new input has the same mapper class.
459  namespace = argparse.Namespace()
460  namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
461  if not os.path.isdir(namespace.input):
462  self.error("Error: input=%r not found" % (namespace.input,))
463 
464  namespace.config = config
465  namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
466  mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
467  namespace.camera = mapperClass.getCameraName()
468  namespace.obsPkg = mapperClass.getPackageName()
469 
470  self.handleCamera(namespace)
471 
472  self._applyInitialOverrides(namespace)
473  if override is not None:
474  override(namespace.config)
475 
476  # Add data ID containers to namespace
477  for dataIdArgument in self._dataIdArgDict.values():
478  setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
479 
480  namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
481  del namespace.configfile
482 
483  self._parseDirectories(namespace)
484 
485  if namespace.clobberOutput:
486  if namespace.output is None:
487  self.error("--clobber-output is only valid with --output or --rerun")
488  elif namespace.output == namespace.input:
489  self.error("--clobber-output is not valid when the output and input repos are the same")
490  if os.path.exists(namespace.output):
491  namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
492  shutil.rmtree(namespace.output)
493 
494  namespace.log.debug("input=%s", namespace.input)
495  namespace.log.debug("calib=%s", namespace.calib)
496  namespace.log.debug("output=%s", namespace.output)
497 
498  obeyShowArgument(namespace.show, namespace.config, exit=False)
499 
500  # No environment variable or --output or --rerun specified.
501  if self.requireOutput and namespace.output is None and namespace.rerun is None:
502  self.error("no output directory specified.\n"
503  "An output directory must be specified with the --output or --rerun\n"
504  "command-line arguments.\n")
505 
506  butlerArgs = {} # common arguments for butler elements
507  if namespace.calib:
508  butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
509  if namespace.output:
510  outputs = {'root': namespace.output, 'mode': 'rw'}
511  inputs = {'root': namespace.input}
512  inputs.update(butlerArgs)
513  outputs.update(butlerArgs)
514  namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
515  else:
516  outputs = {'root': namespace.input, 'mode': 'rw'}
517  outputs.update(butlerArgs)
518  namespace.butler = dafPersist.Butler(outputs=outputs)
519 
520  # convert data in each of the identifier lists to proper types
521  # this is done after constructing the butler, hence after parsing the command line,
522  # because it takes a long time to construct a butler
523  self._processDataIds(namespace)
524  if "data" in namespace.show:
525  for dataIdName in self._dataIdArgDict.keys():
526  for dataRef in getattr(namespace, dataIdName).refList:
527  print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
528 
529  if namespace.show and "run" not in namespace.show:
530  sys.exit(0)
531 
532  if namespace.debug:
533  try:
534  import debug
535  assert debug # silence pyflakes
536  except ImportError:
537  sys.stderr.write("Warning: no 'debug' module found\n")
538  namespace.debug = False
539 
540  del namespace.loglevel
541 
542  if namespace.longlog:
543  lsstLog.configure_prop("""
544 log4j.rootLogger=INFO, A1
545 log4j.appender.A1=ConsoleAppender
546 log4j.appender.A1.Target=System.err
547 log4j.appender.A1.layout=PatternLayout
548 log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n
549 """)
550  del namespace.longlog
551 
552  namespace.config.validate()
553  namespace.config.freeze()
554 
555  return namespace
556 
557  def _parseDirectories(self, namespace):
558  """Parse input, output and calib directories
559 
560  This allows for hacking the directories, e.g., to include a "rerun".
561  Modifications are made to the 'namespace' object in-place.
562  """
563  mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
564  namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
565 
566  # If an output directory is specified, process it and assign it to the namespace
567  if namespace.rawOutput:
568  namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
569  else:
570  namespace.output = None
571 
572  # This section processes the rerun argument, if rerun is specified as a colon separated
573  # value, it will be parsed as an input and output. The input value will be overridden if
574  # previously specified (but a check is made to make sure both inputs use the same mapper)
575  if namespace.rawRerun:
576  if namespace.output:
577  self.error("Error: cannot specify both --output and --rerun")
578  namespace.rerun = namespace.rawRerun.split(":")
579  rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
580  modifiedInput = False
581  if len(rerunDir) == 2:
582  namespace.input, namespace.output = rerunDir
583  modifiedInput = True
584  elif len(rerunDir) == 1:
585  namespace.output = rerunDir[0]
586  if os.path.exists(os.path.join(namespace.output, "_parent")):
587  namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
588  modifiedInput = True
589  else:
590  self.error("Error: invalid argument for --rerun: %s" % namespace.rerun)
591  if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
592  self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
593  else:
594  namespace.rerun = None
595  del namespace.rawInput
596  del namespace.rawCalib
597  del namespace.rawOutput
598  del namespace.rawRerun
599 
600  def _processDataIds(self, namespace):
601  """!Process the parsed data for each data ID argument
602 
603  Processing includes:
604  - Validate data ID keys
605  - Cast the data ID values to the correct type
606  - Compute data references from data IDs
607 
608  @param[in,out] namespace parsed namespace (an argparse.Namespace);
609  reads these attributes:
610  - butler
611  - log
612  - config, if any dynamic dataset types are set by a config parameter
613  - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
614  and modifies these attributes:
615  - <name> for each data ID argument registered using add_id_argument
616  """
617  for dataIdArgument in self._dataIdArgDict.values():
618  dataIdContainer = getattr(namespace, dataIdArgument.name)
619  dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
620  if dataIdArgument.doMakeDataRefList:
621  try:
622  dataIdContainer.castDataIds(butler=namespace.butler)
623  except (KeyError, TypeError) as e:
624  # failure of castDataIds indicates invalid command args
625  self.error(e)
626 
627  # failure of makeDataRefList indicates a bug that wants a traceback
628  dataIdContainer.makeDataRefList(namespace)
629 
630  def _applyInitialOverrides(self, namespace):
631  """!Apply obs-package-specific and camera-specific config override files, if found
632 
633  @param[in] namespace parsed namespace (an argparse.Namespace);
634  reads these attributes:
635  - obsPkg
636 
637  Look in the package namespace.obsPkg for files:
638  - config/<task_name>.py
639  - config/<camera_name>/<task_name>.py
640  and load if found
641  """
642  obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
643  fileName = self._name + ".py"
644  for filePath in (
645  os.path.join(obsPkgDir, "config", fileName),
646  os.path.join(obsPkgDir, "config", namespace.camera, fileName),
647  ):
648  if os.path.exists(filePath):
649  namespace.log.info("Loading config overrride file %r", filePath)
650  namespace.config.load(filePath)
651  else:
652  namespace.log.debug("Config override file does not exist: %r", filePath)
653 
654  def handleCamera(self, namespace):
655  """!Perform camera-specific operations before parsing the command line.
656 
657  The default implementation does nothing.
658 
659  @param[in,out] namespace namespace (an argparse.Namespace) with the following fields:
660  - camera: the camera name
661  - config: the config passed to parse_args, with no overrides applied
662  - obsPkg: the obs_ package for this camera
663  - log: a lsst.log Log
664  """
665  pass
666 
667  def convert_arg_line_to_args(self, arg_line):
668  """!Allow files of arguments referenced by `@<path>` to contain multiple values on each line
669 
670  @param[in] arg_line line of text read from an argument file
671  """
672  arg_line = arg_line.strip()
673  if not arg_line or arg_line.startswith("#"):
674  return
675  for arg in shlex.split(arg_line, comments=True, posix=True):
676  if not arg.strip():
677  continue
678  yield arg
679 
680 
682  """An ArgumentParser for pipeline tasks that don't write any output"""
683  requireOutput = False # We're not going to write anything
684 
685 
686 def getTaskDict(config, taskDict=None, baseName=""):
687  """!Get a dictionary of task info for all subtasks in a config
688 
689  Designed to be called recursively; the user should call with only a config
690  (leaving taskDict and baseName at their default values).
691 
692  @param[in] config configuration to process, an instance of lsst.pex.config.Config
693  @param[in,out] taskDict users should not specify this argument;
694  (supports recursion; if provided, taskDict is updated in place, else a new dict is started)
695  @param[in] baseName users should not specify this argument.
696  (supports recursion: if a non-empty string then a period is appended and the result is used
697  as a prefix for additional entries in taskDict; otherwise no prefix is used)
698  @return taskDict: a dict of config field name: task name
699  """
700  if taskDict is None:
701  taskDict = dict()
702  for fieldName, field in config.items():
703  if hasattr(field, "value") and hasattr(field, "target"):
704  subConfig = field.value
705  if isinstance(subConfig, pexConfig.Config):
706  subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
707  try:
708  taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
709  except Exception:
710  taskName = repr(field.target)
711  taskDict[subBaseName] = taskName
712  getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
713  return taskDict
714 
715 
716 def obeyShowArgument(showOpts, config=None, exit=False):
717  """!Process arguments specified with --show (but ignores "data")
718 
719  @param showOpts List of options passed to --show
720  @param config The provided config
721  @param exit Exit if "run" isn't included in showOpts
722 
723  Supports the following options in showOpts:
724  - config[=PAT] Dump all the config entries, or just the ones that match the glob pattern
725  - history=PAT Show where the config entries that match the glob pattern were set
726  - tasks Show task hierarchy
727  - data Ignored; to be processed by caller
728  - run Keep going (the default behaviour is to exit if --show is specified)
729 
730  Calls sys.exit(1) if any other option found.
731  """
732  if not showOpts:
733  return
734 
735  for what in showOpts:
736  showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
737 
738  if showCommand == "config":
739  matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
740  pattern = matConfig.group(1)
741  if pattern:
742  class FilteredStream(object):
743  """A file object that only prints lines that match the glob "pattern"
744 
745  N.b. Newlines are silently discarded and reinserted; crude but effective.
746  """
747 
748  def __init__(self, pattern):
749  # obey case if pattern isn't lowecase or requests NOIGNORECASE
750  mat = re.search(r"(.*):NOIGNORECASE$", pattern)
751 
752  if mat:
753  pattern = mat.group(1)
754  self._pattern = re.compile(fnmatch.translate(pattern))
755  else:
756  if pattern != pattern.lower():
757  print(u"Matching \"%s\" without regard to case "
758  "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
759  self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
760 
761  def write(self, showStr):
762  showStr = showStr.rstrip()
763  # Strip off doc string line(s) and cut off at "=" for string matching
764  matchStr = showStr.split("\n")[-1].split("=")[0]
765  if self._pattern.search(matchStr):
766  print(u"\n" + showStr)
767 
768  fd = FilteredStream(pattern)
769  else:
770  fd = sys.stdout
771 
772  config.saveToStream(fd, "config")
773  elif showCommand == "history":
774  matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
775  pattern = matHistory.group(1)
776  if not pattern:
777  print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
778  sys.exit(1)
779 
780  pattern = pattern.split(".")
781  cpath, cname = pattern[:-1], pattern[-1]
782  hconfig = config # the config that we're interested in
783  for i, cpt in enumerate(cpath):
784  try:
785  hconfig = getattr(hconfig, cpt)
786  except AttributeError:
787  print("Error: configuration %s has no subconfig %s" %
788  (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
789 
790  sys.exit(1)
791 
792  try:
793  print(pexConfig.history.format(hconfig, cname))
794  except KeyError:
795  print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
796  sys.exit(1)
797 
798  elif showCommand == "data":
799  pass
800  elif showCommand == "run":
801  pass
802  elif showCommand == "tasks":
803  showTaskHierarchy(config)
804  else:
805  print(u"Unknown value for show: %s (choose from '%s')" %
806  (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
807  sys.exit(1)
808 
809  if exit and "run" not in showOpts:
810  sys.exit(0)
811 
812 
813 def showTaskHierarchy(config):
814  """!Print task hierarchy to stdout
815 
816  @param[in] config: configuration to process (an lsst.pex.config.Config)
817  """
818  print(u"Subtasks:")
819  taskDict = getTaskDict(config=config)
820 
821  fieldNameList = sorted(taskDict.keys())
822  for fieldName in fieldNameList:
823  taskName = taskDict[fieldName]
824  print(u"%s: %s" % (fieldName, taskName))
825 
826 
827 class ConfigValueAction(argparse.Action):
828  """!argparse action callback to override config parameters using name=value pairs from the command line
829  """
830 
831  def __call__(self, parser, namespace, values, option_string):
832  """!Override one or more config name value pairs
833 
834  @param[in] parser argument parser (instance of ArgumentParser)
835  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
836  updated values:
837  - namespace.config
838  @param[in] values a list of configItemName=value pairs
839  @param[in] option_string option value specified by the user (a str)
840  """
841  if namespace.config is None:
842  return
843  for nameValue in values:
844  name, sep, valueStr = nameValue.partition("=")
845  if not valueStr:
846  parser.error("%s value %s must be in form name=value" % (option_string, nameValue))
847 
848  # see if setting the string value works; if not, try eval
849  try:
850  setDottedAttr(namespace.config, name, valueStr)
851  except AttributeError:
852  parser.error("no config field: %s" % (name,))
853  except Exception:
854  try:
855  value = eval(valueStr, {})
856  except Exception:
857  parser.error("cannot parse %r as a value for %s" % (valueStr, name))
858  try:
859  setDottedAttr(namespace.config, name, value)
860  except Exception as e:
861  parser.error("cannot set config.%s=%r: %s" % (name, value, e))
862 
863 
864 class ConfigFileAction(argparse.Action):
865  """!argparse action to load config overrides from one or more files
866  """
867 
868  def __call__(self, parser, namespace, values, option_string=None):
869  """!Load one or more files of config overrides
870 
871  @param[in] parser argument parser (instance of ArgumentParser)
872  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
873  updated values:
874  - namespace.config
875  @param[in] values a list of data config file paths
876  @param[in] option_string option value specified by the user (a str)
877  """
878  if namespace.config is None:
879  return
880  for configfile in values:
881  try:
882  namespace.config.load(configfile)
883  except Exception as e:
884  parser.error("cannot load config file %r: %s" % (configfile, e))
885 
886 
887 class IdValueAction(argparse.Action):
888  """!argparse action callback to process a data ID into a dict
889  """
890 
891  def __call__(self, parser, namespace, values, option_string):
892  """!Parse --id data and append results to namespace.<argument>.idList
893 
894  @param[in] parser argument parser (instance of ArgumentParser)
895  @param[in,out] namespace parsed command (an instance of argparse.Namespace);
896  updated values:
897  - <idName>.idList, where <idName> is the name of the ID argument,
898  for instance "id" for ID argument --id
899  @param[in] values a list of data IDs; see data format below
900  @param[in] option_string option value specified by the user (a str)
901 
902  The data format is:
903  key1=value1_1[^value1_2[^value1_3...] key2=value2_1[^value2_2[^value2_3...]...
904 
905  The values (e.g. value1_1) may either be a string, or of the form "int..int" (e.g. "1..3")
906  which is interpreted as "1^2^3" (inclusive, unlike a python range). So "0^2..4^7..9" is
907  equivalent to "0^2^3^4^7^8^9". You may also specify a stride: "1..5:2" is "1^3^5"
908 
909  The cross product is computed for keys with multiple values. For example:
910  --id visit 1^2 ccd 1,1^2,2
911  results in the following data ID dicts being appended to namespace.<argument>.idList:
912  {"visit":1, "ccd":"1,1"}
913  {"visit":2, "ccd":"1,1"}
914  {"visit":1, "ccd":"2,2"}
915  {"visit":2, "ccd":"2,2"}
916  """
917  if namespace.config is None:
918  return
919  idDict = collections.OrderedDict()
920  for nameValue in values:
921  name, sep, valueStr = nameValue.partition("=")
922  if name in idDict:
923  parser.error("%s appears multiple times in one ID argument: %s" % (name, option_string))
924  idDict[name] = []
925  for v in valueStr.split("^"):
926  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
927  if mat:
928  v1 = int(mat.group(1))
929  v2 = int(mat.group(2))
930  v3 = mat.group(3)
931  v3 = int(v3) if v3 else 1
932  for v in range(v1, v2 + 1, v3):
933  idDict[name].append(str(v))
934  else:
935  idDict[name].append(v)
936 
937  iterList = [idDict[key] for key in idDict.keys()]
938  idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
939  for valList in itertools.product(*iterList)]
940 
941  argName = option_string.lstrip("-")
942  ident = getattr(namespace, argName)
943  ident.idList += idDictList
944 
945 
946 class LogLevelAction(argparse.Action):
947  """!argparse action to set log level
948  """
949 
950  def __call__(self, parser, namespace, values, option_string):
951  """!Set trace level
952 
953  @param[in] parser argument parser (instance of ArgumentParser)
954  @param[in] namespace parsed command (an instance of argparse.Namespace); ignored
955  @param[in] values a list of trace levels;
956  each item must be of the form 'component_name=level' or 'level',
957  where level is a keyword (not case sensitive) or an integer
958  @param[in] option_string option value specified by the user (a str)
959  """
960  permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
961  permittedLevelSet = set(permittedLevelList)
962  for componentLevel in values:
963  component, sep, levelStr = componentLevel.partition("=")
964  if not levelStr:
965  levelStr, component = component, None
966  logLevelUpr = levelStr.upper()
967  if logLevelUpr in permittedLevelSet:
968  logLevel = getattr(lsstLog.Log, logLevelUpr)
969  else:
970  parser.error("loglevel=%r not one of %s" % (levelStr, permittedLevelList))
971  if component is None:
972  namespace.log.setLevel(logLevel)
973  else:
974  lsstLog.Log.getLogger(component).setLevel(logLevel)
975 
976 
977 def setDottedAttr(item, name, value):
978  """!Like setattr, but accepts hierarchical names, e.g. foo.bar.baz
979 
980  @param[in,out] item object whose attribute is to be set
981  @param[in] name name of item to set
982  @param[in] value new value for the item
983 
984  For example if name is foo.bar.baz then item.foo.bar.baz is set to the specified value.
985  """
986  subitem = item
987  subnameList = name.split(".")
988  for subname in subnameList[:-1]:
989  subitem = getattr(subitem, subname)
990  setattr(subitem, subnameList[-1], value)
991 
992 
993 def getDottedAttr(item, name):
994  """!Like getattr, but accepts hierarchical names, e.g. foo.bar.baz
995 
996  @param[in] item object whose attribute is to be returned
997  @param[in] name name of item to get
998 
999  For example if name is foo.bar.baz then returns item.foo.bar.baz
1000  """
1001  subitem = item
1002  for subname in name.split("."):
1003  subitem = getattr(subitem, subname)
1004  return subitem
1005 
1006 
1007 def dataExists(butler, datasetType, dataRef):
1008  """!Return True if data exists at the current level or any data exists at a deeper level, False otherwise
1009 
1010  @param[in] butler data butler (a \ref lsst.daf.persistence.butler.Butler
1011  "lsst.daf.persistence.Butler")
1012  @param[in] datasetType dataset type (a str)
1013  @param[in] dataRef butler data reference (a \ref lsst.daf.persistence.butlerSubset.ButlerDataRef
1014  "lsst.daf.persistence.ButlerDataRef")
1015  """
1016  subDRList = dataRef.subItems()
1017  if subDRList:
1018  for subDR in subDRList:
1019  if dataExists(butler, datasetType, subDR):
1020  return True
1021  return False
1022  else:
1023  return butler.datasetExists(datasetType=datasetType, dataId=dataRef.dataId)
def setDatasetType
Set actual dataset type, once it is known.
def castDataIds
Validate data IDs and cast them to the correct type (modify idList in place).
argparse action to set log level
An argument parser for pipeline tasks that is based on argparse.ArgumentParser.
def dataExists
Return True if data exists at the current level or any data exists at a deeper level, False otherwise.
A dataset type specified by a command-line argument.
def showTaskHierarchy
Print task hierarchy to stdout.
Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument.
def setDottedAttr
Like setattr, but accepts hierarchical names, e.g.
def __call__
Parse –id data and append results to namespace.
argparse action callback to override config parameters using name=value pairs from the command line ...
def getTaskDict
Get a dictionary of task info for all subtasks in a config.
def _applyInitialOverrides
Apply obs-package-specific and camera-specific config override files, if found.
def makeDataRefList
Compute refList based on idList.
def __init__
Construct a DatasetArgument.
def convert_arg_line_to_args
Allow files of arguments referenced by @&lt;path&gt; to contain multiple values on each line...
def getDottedAttr
Like getattr, but accepts hierarchical names, e.g.
def addArgument
Add a command-line argument to specify dataset type name.
def __call__
Load one or more files of config overrides.
def isDynamicDatasetType
Is the dataset type dynamic (specified on the command line)?
def __call__
Override one or more config name value pairs.
argparse action callback to process a data ID into a dict
def __init__
Construct a ConfigDatasetType.
def handleCamera
Perform camera-specific operations before parsing the command line.
def parse_args
Parse arguments for a pipeline task.
def _processDataIds
Process the parsed data for each data ID argument.
A dataset type specified by a config parameter.
def __init__
Construct an ArgumentParser.
def obeyShowArgument
Process arguments specified with –show (but ignores &quot;data&quot;)
Abstract base class for a dataset type determined from parsed command-line arguments.
def addArgument
Add a command-line argument to specify dataset type name, if wanted.
argparse action to load config overrides from one or more files
def getDatasetType
Return the dataset type as a string.
def __init__
Construct a DataIdContainer.
A container for data IDs and associated data references.