Coverage for python/lsst/pipe/base/argumentParser.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.pex.config as pexConfig
40import lsst.pex.config.history
41import lsst.log as lsstLog
42import lsst.daf.persistence as dafPersist
44DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
45DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
46DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
49def _fixPath(defName, path):
50 """Apply environment variable as default root, if present, and abspath.
52 Parameters
53 ----------
54 defName : `str`
55 Name of environment variable containing default root path;
56 if the environment variable does not exist
57 then the path is relative to the current working directory
58 path : `str`
59 Path relative to default root path.
61 Returns
62 -------
63 abspath : `str`
64 Path that has been expanded, or `None` if the environment variable
65 does not exist and path is `None`.
66 """
67 defRoot = os.environ.get(defName)
68 if defRoot is None:
69 if path is None:
70 return None
71 return os.path.abspath(path)
72 return os.path.abspath(os.path.join(defRoot, path or ""))
75class DataIdContainer:
76 """Container for data IDs and associated data references.
78 Parameters
79 ----------
80 level : `str`
81 The lowest hierarchy level to descend to for this dataset type,
82 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
83 Use `""` to use the mapper's default for the dataset type.
84 This class does not support `None`, but if it did, `None`
85 would mean the level should not be restricted.
87 Notes
88 -----
89 Override this class for data IDs that require special handling to be
90 converted to ``data references``, and specify the override class
91 as ``ContainerClass`` for ``add_id_argument``.
93 If you don't want the argument parser to compute data references,
94 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
95 """
97 def __init__(self, level=None):
98 self.datasetType = None
99 """Dataset type of the data references (`str`).
100 """
101 self.level = level
102 """See parameter ``level`` (`str`).
103 """
104 self.idList = []
105 """List of data IDs specified on the command line for the
106 appropriate data ID argument (`list` of `dict`).
107 """
108 self.refList = []
109 """List of data references for the data IDs in ``idList``
110 (`list` of `lsst.daf.persistence.ButlerDataRef`).
111 Elements will be omitted if the corresponding data is not found.
112 The list will be empty when returned by ``parse_args`` if
113 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
114 """
116 def setDatasetType(self, datasetType):
117 """Set actual dataset type, once it is known.
119 Parameters
120 ----------
121 datasetType : `str`
122 Dataset type.
124 Notes
125 -----
126 The reason ``datasetType`` is not a constructor argument is that
127 some subclasses do not know the dataset type until the command
128 is parsed. Thus, to reduce special cases in the code,
129 ``datasetType`` is always set after the command is parsed.
130 """
131 self.datasetType = datasetType
133 def castDataIds(self, butler):
134 """Validate data IDs and cast them to the correct type
135 (modify idList in place).
137 This code casts the values in the data IDs dicts in `dataIdList`
138 to the type required by the butler. Data IDs are read from the
139 command line as `str`, but the butler requires some values to be
140 other types. For example "visit" values should be `int`.
142 Parameters
143 ----------
144 butler : `lsst.daf.persistence.Butler`
145 Data butler.
146 """
147 if self.datasetType is None:
148 raise RuntimeError("Must call setDatasetType first")
149 try:
150 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
151 except KeyError as e:
152 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
153 raise KeyError(msg) from e
155 for dataDict in self.idList:
156 for key, strVal in dataDict.items():
157 try:
158 keyType = idKeyTypeDict[key]
159 except KeyError:
160 # OK, assume that it's a valid key and guess that it's a
161 # string
162 keyType = str
164 log = lsstLog.Log.getDefaultLogger()
165 log.warn("Unexpected ID %s; guessing type is \"%s\"",
166 key, 'str' if keyType == str else keyType)
167 idKeyTypeDict[key] = keyType
169 if keyType != str:
170 try:
171 castVal = keyType(strVal)
172 except Exception:
173 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
174 dataDict[key] = castVal
176 def makeDataRefList(self, namespace):
177 """Compute refList based on idList.
179 Parameters
180 ----------
181 namespace : `argparse.Namespace`
182 Results of parsing command-line. The ``butler`` and ``log``
183 elements must be set.
185 Notes
186 -----
187 Not called if ``add_id_argument`` was called with
188 ``doMakeDataRefList=False``.
189 """
190 if self.datasetType is None:
191 raise RuntimeError("Must call setDatasetType first")
192 butler = namespace.butler
193 for dataId in self.idList:
194 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
195 level=self.level, dataId=dataId)
196 if not refList:
197 namespace.log.warn("No data found for dataId=%s", dataId)
198 continue
199 self.refList += refList
202class DataIdArgument:
203 """data ID argument, used by `ArgumentParser.add_id_argument`.
205 Parameters
206 ----------
207 name : `str`
208 Name of identifier (argument name without dashes).
209 datasetType : `str`
210 Type of dataset; specify a string for a fixed dataset type
211 or a `DatasetArgument` for a dynamic dataset type (e.g.
212 one specified by a command-line argument).
213 level : `str`
214 The lowest hierarchy level to descend to for this dataset type,
215 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
216 Use `""` to use the mapper's default for the dataset type.
217 Some container classes may also support `None`, which means
218 the level should not be restricted; however the default class,
219 `DataIdContainer`, does not support `None`.
220 doMakeDataRefList : `bool`, optional
221 If `True` (default), construct data references.
222 ContainerClass : `class`, optional
223 Class to contain data IDs and data references; the default class
224 `DataIdContainer` will work for many, but not all, cases.
225 For example if the dataset type is specified on the command line
226 then use `DynamicDatasetType`.
227 """
229 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
230 if name.startswith("-"):
231 raise RuntimeError(f"Name {name} must not start with -")
232 self.name = name
233 self.datasetType = datasetType
234 self.level = level
235 self.doMakeDataRefList = bool(doMakeDataRefList)
236 self.ContainerClass = ContainerClass
237 self.argName = name.lstrip("-")
239 @property
240 def isDynamicDatasetType(self):
241 """`True` if the dataset type is dynamic (that is, specified
242 on the command line).
243 """
244 return isinstance(self.datasetType, DynamicDatasetType)
246 def getDatasetType(self, namespace):
247 """Get the dataset type as a string.
249 Parameters
250 ----------
251 namespace
252 Parsed command.
254 Returns
255 -------
256 datasetType : `str`
257 Dataset type.
258 """
259 if self.isDynamicDatasetType:
260 return self.datasetType.getDatasetType(namespace)
261 else:
262 return self.datasetType
265class DynamicDatasetType(metaclass=abc.ABCMeta):
266 """Abstract base class for a dataset type determined from parsed
267 command-line arguments.
268 """
270 def addArgument(self, parser, idName):
271 """Add a command-line argument to specify dataset type name,
272 if wanted.
274 Parameters
275 ----------
276 parser : `ArgumentParser`
277 Argument parser to add the argument to.
278 idName : `str`
279 Name of data ID argument, without the leading ``"--"``,
280 e.g. ``"id"``.
282 Notes
283 -----
284 The default implementation does nothing
285 """
286 pass
288 @abc.abstractmethod
289 def getDatasetType(self, namespace):
290 """Get the dataset type as a string, based on parsed command-line
291 arguments.
293 Returns
294 -------
295 datasetType : `str`
296 Dataset type.
297 """
298 raise NotImplementedError("Subclasses must override")
301class DatasetArgument(DynamicDatasetType):
302 """Dataset type specified by a command-line argument.
304 Parameters
305 ----------
306 name : `str`, optional
307 Name of command-line argument (including leading "--",
308 if appropriate) whose value is the dataset type.
309 If `None`, uses ``--idName_dstype`` where idName
310 is the name of the data ID argument (e.g. "id").
311 help : `str`, optional
312 Help string for the command-line argument.
313 default : `object`, optional
314 Default value. If `None`, then the command-line option is required.
315 This argument isignored if the command-line argument is positional
316 (name does not start with "-") because positional arguments do
317 not support default values.
318 """
320 def __init__(self,
321 name=None,
322 help="dataset type to process from input data repository",
323 default=None,
324 ):
325 DynamicDatasetType.__init__(self)
326 self.name = name
327 self.help = help
328 self.default = default
330 def getDatasetType(self, namespace):
331 """Get the dataset type as a string, from the appropriate
332 command-line argument.
334 Parameters
335 ----------
336 namespace :
337 Parsed command.
339 Returns
340 -------
341 datasetType : `str`
342 Dataset type.
343 """
344 argName = self.name.lstrip("-")
345 return getattr(namespace, argName)
347 def addArgument(self, parser, idName):
348 """Add a command-line argument to specify the dataset type name.
350 Parameters
351 ----------
352 parser : `ArgumentParser`
353 Argument parser.
354 idName : `str`
355 Data ID.
357 Notes
358 -----
359 Also sets the `name` attribute if it is currently `None`.
360 """
361 help = self.help if self.help else f"dataset type for {idName}"
362 if self.name is None:
363 self.name = f"--{idName}_dstype"
364 requiredDict = dict()
365 if self.name.startswith("-"):
366 requiredDict = dict(required=self.default is None)
367 parser.add_argument(
368 self.name,
369 default=self.default,
370 help=help,
371 **requiredDict)
374class ConfigDatasetType(DynamicDatasetType):
375 """Dataset type specified by a config parameter.
377 Parameters
378 ----------
379 name : `str`
380 Name of config option whose value is the dataset type.
381 """
383 def __init__(self, name):
384 DynamicDatasetType.__init__(self)
385 self.name = name
387 def getDatasetType(self, namespace):
388 """Return the dataset type as a string, from the appropriate
389 config field.
391 Parameters
392 ----------
393 namespace : `argparse.Namespace`
394 Parsed command.
395 """
396 # getattr does not work reliably if the config field name is
397 # dotted, so step through one level at a time
398 keyList = self.name.split(".")
399 value = namespace.config
400 for key in keyList:
401 try:
402 value = getattr(value, key)
403 except KeyError:
404 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
405 return value
408class ArgumentParser(argparse.ArgumentParser):
409 """Argument parser for command-line tasks that is based on
410 `argparse.ArgumentParser`.
412 Parameters
413 ----------
414 name : `str`
415 Name of top-level task; used to identify camera-specific override
416 files.
417 usage : `str`, optional
418 Command-line usage signature.
419 **kwargs
420 Additional keyword arguments for `argparse.ArgumentParser`.
422 Notes
423 -----
424 Users may wish to add additional arguments before calling `parse_args`.
425 """
426 # I would prefer to check data ID keys and values as they are parsed,
427 # but the required information comes from the butler, so I have to
428 # construct a butler before I do this checking. Constructing a butler
429 # is slow, so I only want do it once, after parsing the command line,
430 # so as to catch syntax errors quickly.
432 requireOutput = True
433 """Require an output directory to be specified (`bool`)."""
435 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
436 self._name = name
437 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
438 argparse.ArgumentParser.__init__(self,
439 usage=usage,
440 fromfile_prefix_chars='@',
441 epilog=textwrap.dedent("""Notes:
442 * --config, --configfile, --id, --loglevel and @file may appear multiple times;
443 all values are used, in order left to right
444 * @file reads command-line options from the specified file:
445 * data may be distributed among multiple lines (e.g. one option per line)
446 * data after # is treated as a comment and ignored
447 * blank lines and lines starting with # are ignored
448 * To specify multiple values for an option, do not use = after the option name:
449 * right: --configfile foo bar
450 * wrong: --configfile=foo bar
451 """),
452 formatter_class=argparse.RawDescriptionHelpFormatter,
453 **kwargs)
454 self.add_argument(metavar='input', dest="rawInput",
455 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
456 self.add_argument("--calib", dest="rawCalib",
457 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
458 self.add_argument("--output", dest="rawOutput",
459 help="path to output data repository (need not exist), "
460 f"relative to ${DEFAULT_OUTPUT_NAME}")
461 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
462 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
463 "optionally sets ROOT to ROOT/rerun/INPUT")
464 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
465 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
466 self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
467 help="config override file(s)")
468 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
469 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
470 metavar="LEVEL|COMPONENT=LEVEL")
471 self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
472 self.add_argument("--debug", action="store_true", help="enable debugging output?")
473 self.add_argument("--doraise", action="store_true",
474 help="raise an exception on error (else log a message and continue)?")
475 self.add_argument("--noExit", action="store_true",
476 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
477 self.add_argument("--profile", help="Dump cProfile statistics to filename")
478 self.add_argument("--show", nargs="+", default=(),
479 help="display the specified information to stdout and quit "
480 "(unless run is specified); information is "
481 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
482 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
483 self.add_argument("-t", "--timeout", type=float,
484 help="Timeout for multiprocessing; maximum wall time (sec)")
485 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
486 help=("remove and re-create the output directory if it already exists "
487 "(safe with -j, but not all other forms of parallel execution)"))
488 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
489 help=("backup and then overwrite existing config files instead of checking them "
490 "(safe with -j, but not all other forms of parallel execution)"))
491 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
492 help="Don't copy config to file~N backup.")
493 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
494 help=("backup and then overwrite existing package versions instead of checking"
495 "them (safe with -j, but not all other forms of parallel execution)"))
496 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
497 help="don't check package versions; useful for development")
498 lsstLog.configure_prop("""
499log4j.rootLogger=INFO, A1
500log4j.appender.A1=ConsoleAppender
501log4j.appender.A1.Target=System.out
502log4j.appender.A1.layout=PatternLayout
503log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
504""")
506 # Forward all Python logging to lsst.log
507 lgr = logging.getLogger()
508 lgr.setLevel(logging.INFO) # same as in log4cxx config above
509 lgr.addHandler(lsstLog.LogHandler())
511 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
512 ContainerClass=DataIdContainer):
513 """Add a data ID argument.
516 Parameters
517 ----------
518 name : `str`
519 Data ID argument (including leading dashes, if wanted).
520 datasetType : `str` or `DynamicDatasetType`-type
521 Type of dataset. Supply a string for a fixed dataset type.
522 For a dynamically determined dataset type, supply
523 a `DynamicDatasetType`, such a `DatasetArgument`.
524 help : `str`
525 Help string for the argument.
526 level : `str`
527 The lowest hierarchy level to descend to for this dataset type,
528 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
529 Use `""` to use the mapper's default for the dataset type.
530 Some container classes may also support `None`, which means
531 the level should not be restricted; however the default class,
532 `DataIdContainer`, does not support `None`.
533 doMakeDataRefList : bool, optional
534 If `True` (default), construct data references.
535 ContainerClass : `class`, optional
536 Class to contain data IDs and data references; the default class
537 `DataIdContainer` will work for many, but not all, cases.
538 For example if the dataset type is specified on the command line
539 then use `DynamicDatasetType`.
541 Notes
542 -----
543 If ``datasetType`` is an instance of `DatasetArgument`,
544 then add a second argument to specify the dataset type.
546 The associated data is put into ``namespace.<dataIdArgument.name>``
547 as an instance of `ContainerClass`; the container includes fields:
549 - ``idList``: a list of data ID dicts.
550 - ``refList``: a list of `~lsst.daf.persistence.Butler`
551 data references (empty if ``doMakeDataRefList`` is `False`).
552 """
553 argName = name.lstrip("-")
555 if argName in self._dataIdArgDict:
556 raise RuntimeError(f"Data ID argument {name} already exists")
557 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
558 raise RuntimeError(f"Data ID argument {name} is a reserved name")
560 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
561 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
563 dataIdArgument = DataIdArgument(
564 name=argName,
565 datasetType=datasetType,
566 level=level,
567 doMakeDataRefList=doMakeDataRefList,
568 ContainerClass=ContainerClass,
569 )
571 if dataIdArgument.isDynamicDatasetType:
572 datasetType.addArgument(parser=self, idName=argName)
574 self._dataIdArgDict[argName] = dataIdArgument
576 def parse_args(self, config, args=None, log=None, override=None):
577 """Parse arguments for a command-line task.
579 Parameters
580 ----------
581 config : `lsst.pex.config.Config`
582 Config for the task being run.
583 args : `list`, optional
584 Argument list; if `None` then ``sys.argv[1:]`` is used.
585 log : `lsst.log.Log`, optional
586 `~lsst.log.Log` instance; if `None` use the default log.
587 override : callable, optional
588 A config override function. It must take the root config object
589 as its only argument and must modify the config in place.
590 This function is called after camera-specific overrides files
591 are applied, and before command-line config overrides
592 are applied (thus allowing the user the final word).
594 Returns
595 -------
596 namespace : `argparse.Namespace`
597 A `~argparse.Namespace` instance containing fields:
599 - ``camera``: camera name.
600 - ``config``: the supplied config with all overrides applied,
601 validated and frozen.
602 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
603 - An entry for each of the data ID arguments registered by
604 `add_id_argument`, of the type passed to its ``ContainerClass``
605 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
606 includes public elements ``idList`` and ``refList``.
607 - ``log``: a `lsst.log` Log.
608 - An entry for each command-line argument,
609 with the following exceptions:
611 - config is the supplied config, suitably updated.
612 - configfile, id and loglevel are all missing.
613 - ``obsPkg``: name of the ``obs_`` package for this camera.
614 """
615 if args is None:
616 args = sys.argv[1:]
618 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
619 self.print_help()
620 if len(args) == 1 and args[0] in ("-h", "--help"):
621 self.exit()
622 else:
623 self.exit(f"{self.prog}: error: Must specify input as first argument")
625 # Note that --rerun may change namespace.input, but if it does
626 # we verify that the new input has the same mapper class.
627 namespace = argparse.Namespace()
628 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
629 if not os.path.isdir(namespace.input):
630 self.error(f"Error: input={namespace.input!r} not found")
632 namespace.config = config
633 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
634 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
635 if mapperClass is None:
636 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
638 namespace.camera = mapperClass.getCameraName()
639 namespace.obsPkg = mapperClass.getPackageName()
641 self.handleCamera(namespace)
643 self._applyInitialOverrides(namespace)
644 if override is not None:
645 override(namespace.config)
647 # Add data ID containers to namespace
648 for dataIdArgument in self._dataIdArgDict.values():
649 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
651 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
652 del namespace.configfile
654 self._parseDirectories(namespace)
656 if namespace.clobberOutput:
657 if namespace.output is None:
658 self.error("--clobber-output is only valid with --output or --rerun")
659 elif namespace.output == namespace.input:
660 self.error("--clobber-output is not valid when the output and input repos are the same")
661 if os.path.exists(namespace.output):
662 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
663 shutil.rmtree(namespace.output)
665 namespace.log.debug("input=%s", namespace.input)
666 namespace.log.debug("calib=%s", namespace.calib)
667 namespace.log.debug("output=%s", namespace.output)
669 obeyShowArgument(namespace.show, namespace.config, exit=False)
671 # No environment variable or --output or --rerun specified.
672 if self.requireOutput and namespace.output is None and namespace.rerun is None:
673 self.error("no output directory specified.\n"
674 "An output directory must be specified with the --output or --rerun\n"
675 "command-line arguments.\n")
677 butlerArgs = {} # common arguments for butler elements
678 if namespace.calib:
679 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
680 if namespace.output:
681 outputs = {'root': namespace.output, 'mode': 'rw'}
682 inputs = {'root': namespace.input}
683 inputs.update(butlerArgs)
684 outputs.update(butlerArgs)
685 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
686 else:
687 outputs = {'root': namespace.input, 'mode': 'rw'}
688 outputs.update(butlerArgs)
689 namespace.butler = dafPersist.Butler(outputs=outputs)
691 # convert data in each of the identifier lists to proper types
692 # this is done after constructing the butler,
693 # hence after parsing the command line,
694 # because it takes a long time to construct a butler
695 self._processDataIds(namespace)
696 if "data" in namespace.show:
697 for dataIdName in self._dataIdArgDict.keys():
698 for dataRef in getattr(namespace, dataIdName).refList:
699 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
701 if namespace.show and "run" not in namespace.show:
702 sys.exit(0)
704 if namespace.debug:
705 try:
706 import debug
707 assert debug # silence pyflakes
708 except ImportError:
709 print("Warning: no 'debug' module found", file=sys.stderr)
710 namespace.debug = False
712 del namespace.loglevel
714 if namespace.longlog:
715 lsstLog.configure_prop("""
716log4j.rootLogger=INFO, A1
717log4j.appender.A1=ConsoleAppender
718log4j.appender.A1.Target=System.out
719log4j.appender.A1.layout=PatternLayout
720log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
721""")
722 del namespace.longlog
724 namespace.config.validate()
725 namespace.config.freeze()
727 return namespace
729 def _parseDirectories(self, namespace):
730 """Parse input, output and calib directories
732 This allows for hacking the directories, e.g., to include a
733 "rerun".
734 Modifications are made to the 'namespace' object in-place.
735 """
736 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
737 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
739 # If an output directory is specified, process it and assign it to the
740 # namespace
741 if namespace.rawOutput:
742 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
743 else:
744 namespace.output = None
746 # This section processes the rerun argument.
747 # If rerun is specified as a colon separated value,
748 # it will be parsed as an input and output.
749 # The input value will be overridden if previously specified
750 # (but a check is made to make sure both inputs use
751 # the same mapper)
752 if namespace.rawRerun:
753 if namespace.output:
754 self.error("Error: cannot specify both --output and --rerun")
755 namespace.rerun = namespace.rawRerun.split(":")
756 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
757 modifiedInput = False
758 if len(rerunDir) == 2:
759 namespace.input, namespace.output = rerunDir
760 modifiedInput = True
761 elif len(rerunDir) == 1:
762 namespace.output = rerunDir[0]
763 if os.path.exists(os.path.join(namespace.output, "_parent")):
764 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
765 modifiedInput = True
766 else:
767 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
768 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
769 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
770 else:
771 namespace.rerun = None
772 del namespace.rawInput
773 del namespace.rawCalib
774 del namespace.rawOutput
775 del namespace.rawRerun
777 def _processDataIds(self, namespace):
778 """Process the parsed data for each data ID argument in an
779 `~argparse.Namespace`.
781 Processing includes:
783 - Validate data ID keys.
784 - Cast the data ID values to the correct type.
785 - Compute data references from data IDs.
787 Parameters
788 ----------
789 namespace : `argparse.Namespace`
790 Parsed namespace. These attributes are read:
792 - ``butler``
793 - ``log``
794 - ``config``, if any dynamic dataset types are set by
795 a config parameter.
796 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
797 dataset types are specified by such
799 These attributes are modified:
801 - ``<name>`` for each data ID argument registered using
802 `add_id_argument` with name ``<name>``.
803 """
804 for dataIdArgument in self._dataIdArgDict.values():
805 dataIdContainer = getattr(namespace, dataIdArgument.name)
806 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
807 if dataIdArgument.doMakeDataRefList:
808 try:
809 dataIdContainer.castDataIds(butler=namespace.butler)
810 except (KeyError, TypeError) as e:
811 # failure of castDataIds indicates invalid command args
812 self.error(e)
814 # failure of makeDataRefList indicates a bug
815 # that wants a traceback
816 dataIdContainer.makeDataRefList(namespace)
818 def _applyInitialOverrides(self, namespace):
819 """Apply obs-package-specific and camera-specific config
820 override files, if found
822 Parameters
823 ----------
824 namespace : `argparse.Namespace`
825 Parsed namespace. These attributes are read:
827 - ``obsPkg``
829 Look in the package namespace.obsPkg for files:
831 - ``config/<task_name>.py``
832 - ``config/<camera_name>/<task_name>.py`` and load if found.
833 """
834 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
835 fileName = self._name + ".py"
836 for filePath in (
837 os.path.join(obsPkgDir, "config", fileName),
838 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
839 ):
840 if os.path.exists(filePath):
841 namespace.log.info("Loading config overrride file %r", filePath)
842 namespace.config.load(filePath)
843 else:
844 namespace.log.debug("Config override file does not exist: %r", filePath)
846 def handleCamera(self, namespace):
847 """Perform camera-specific operations before parsing the command-line.
849 Parameters
850 ----------
851 namespace : `argparse.Namespace`
852 Namespace (an ) with the following fields:
854 - ``camera``: the camera name.
855 - ``config``: the config passed to parse_args, with no overrides
856 applied.
857 - ``obsPkg``: the ``obs_`` package for this camera.
858 - ``log``: a `lsst.log` Log.
860 Notes
861 -----
862 The default implementation does nothing.
863 """
864 pass
866 def convert_arg_line_to_args(self, arg_line):
867 """Allow files of arguments referenced by ``@<path>`` to contain
868 multiple values on each line.
870 Parameters
871 ----------
872 arg_line : `str`
873 Line of text read from an argument file.
874 """
875 arg_line = arg_line.strip()
876 if not arg_line or arg_line.startswith("#"):
877 return
878 for arg in shlex.split(arg_line, comments=True, posix=True):
879 if not arg.strip():
880 continue
881 yield arg
883 def addReuseOption(self, choices):
884 """Add a "--reuse-outputs-from SUBTASK" option to the argument
885 parser.
887 CmdLineTasks that can be restarted at an intermediate step using
888 outputs from earlier (but still internal) steps should use this
889 method to allow the user to control whether that happens when
890 outputs from earlier steps are present.
892 Parameters
893 ----------
894 choices : sequence
895 A sequence of string names (by convention, top-level subtasks)
896 that identify the steps that could be skipped when their
897 outputs are already present. The list is ordered, so when the
898 user specifies one step on the command line, all previous steps
899 may be skipped as well. In addition to the choices provided,
900 users may pass "all" to indicate that all steps may be thus
901 skipped.
903 When this method is called, the ``namespace`` object returned by
904 ``parse_args`` will contain a ``reuse`` attribute containing
905 a list of all steps that should be skipped if their outputs
906 are already present.
907 If no steps should be skipped, the ``reuse`` will be an empty list.
908 """
909 choices = list(choices)
910 choices.append("all")
911 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
912 default=[], action=ReuseAction,
913 help=("Skip the given subtask and its predecessors and reuse their outputs "
914 "if those outputs already exist. Use 'all' to specify all subtasks."))
917class InputOnlyArgumentParser(ArgumentParser):
918 """`ArgumentParser` for command-line tasks that don't write any output.
919 """
921 requireOutput = False # We're not going to write anything
924def getTaskDict(config, taskDict=None, baseName=""):
925 """Get a dictionary of task info for all subtasks in a config
927 Parameters
928 ----------
929 config : `lsst.pex.config.Config`
930 Configuration to process.
931 taskDict : `dict`, optional
932 Users should not specify this argument. Supports recursion.
933 If provided, taskDict is updated in place, else a new `dict`
934 is started.
935 baseName : `str`, optional
936 Users should not specify this argument. It is only used for
937 recursion: if a non-empty string then a period is appended
938 and the result is used as a prefix for additional entries
939 in taskDict; otherwise no prefix is used.
941 Returns
942 -------
943 taskDict : `dict`
944 Keys are config field names, values are task names.
946 Notes
947 -----
948 This function is designed to be called recursively.
949 The user should call with only a config (leaving taskDict and baseName
950 at their default values).
951 """
952 if taskDict is None:
953 taskDict = dict()
954 for fieldName, field in config.items():
955 if hasattr(field, "value") and hasattr(field, "target"):
956 subConfig = field.value
957 if isinstance(subConfig, pexConfig.Config):
958 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
959 try:
960 taskName = f"{field.target.__module__}.{field.target.__name__}"
961 except Exception:
962 taskName = repr(field.target)
963 taskDict[subBaseName] = taskName
964 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
965 return taskDict
968def obeyShowArgument(showOpts, config=None, exit=False):
969 """Process arguments specified with ``--show`` (but ignores
970 ``"data"``).
972 Parameters
973 ----------
974 showOpts : `list` of `str`
975 List of options passed to ``--show``.
976 config : optional
977 The provided config.
978 exit : bool, optional
979 Exit if ``"run"`` isn't included in ``showOpts``.
981 Parameters
982 ----------
983 Supports the following options in showOpts:
985 - ``config[=PAT]``. Dump all the config entries, or just the ones that
986 match the glob pattern.
987 - ``history=PAT``. Show where the config entries that match the glob
988 pattern were set.
989 - ``tasks``. Show task hierarchy.
990 - ``data``. Ignored; to be processed by caller.
991 - ``run``. Keep going (the default behaviour is to exit if
992 ``--show`` is specified).
994 Calls ``sys.exit(1)`` if any other option found.
995 """
996 if not showOpts:
997 return
999 for what in showOpts:
1000 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
1002 if showCommand == "config":
1003 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1004 pattern = matConfig.group(1)
1005 if pattern:
1006 class FilteredStream:
1007 """A file object that only prints lines
1008 that match the glob "pattern".
1010 N.b. Newlines are silently discarded and reinserted;
1011 crude but effective.
1012 """
1014 def __init__(self, pattern):
1015 # obey case if pattern isn't lowecase or requests
1016 # NOIGNORECASE
1017 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1019 if mat:
1020 pattern = mat.group(1)
1021 self._pattern = re.compile(fnmatch.translate(pattern))
1022 else:
1023 if pattern != pattern.lower():
1024 print(f"Matching {pattern!r} without regard to case "
1025 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1026 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1028 def write(self, showStr):
1029 showStr = showStr.rstrip()
1030 # Strip off doc string line(s) and cut off
1031 # at "=" for string matching
1032 matchStr = showStr.split("\n")[-1].split("=")[0]
1033 if self._pattern.search(matchStr):
1034 print("\n" + showStr)
1036 fd = FilteredStream(pattern)
1037 else:
1038 fd = sys.stdout
1040 config.saveToStream(fd, "config")
1041 elif showCommand == "history":
1042 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1043 globPattern = matHistory.group(1)
1044 if not globPattern:
1045 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1046 sys.exit(1)
1048 error = False
1049 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1050 if i > 0:
1051 print("")
1053 pattern = pattern.split(".")
1054 cpath, cname = pattern[:-1], pattern[-1]
1055 hconfig = config # the config that we're interested in
1056 for i, cpt in enumerate(cpath):
1057 try:
1058 hconfig = getattr(hconfig, cpt)
1059 except AttributeError:
1060 config_path = ".".join(["config"] + cpath[:i])
1061 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1062 error = True
1064 try:
1065 print(pexConfig.history.format(hconfig, cname))
1066 except KeyError:
1067 config_path = ".".join(["config"] + cpath)
1068 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1069 error = True
1071 if error:
1072 sys.exit(1)
1074 elif showCommand == "data":
1075 pass
1076 elif showCommand == "run":
1077 pass
1078 elif showCommand == "tasks":
1079 showTaskHierarchy(config)
1080 else:
1081 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1082 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1083 sys.exit(1)
1085 if exit and "run" not in showOpts:
1086 sys.exit(0)
1089def showTaskHierarchy(config):
1090 """Print task hierarchy to stdout.
1092 Parameters
1093 ----------
1094 config : `lsst.pex.config.Config`
1095 Configuration to process.
1096 """
1097 print("Subtasks:")
1098 taskDict = getTaskDict(config=config)
1100 fieldNameList = sorted(taskDict.keys())
1101 for fieldName in fieldNameList:
1102 taskName = taskDict[fieldName]
1103 print(f"{fieldName}: {taskName}")
1106class ConfigValueAction(argparse.Action):
1107 """argparse action callback to override config parameters using
1108 name=value pairs from the command-line.
1109 """
1111 def __call__(self, parser, namespace, values, option_string):
1112 """Override one or more config name value pairs.
1114 Parameters
1115 ----------
1116 parser : `argparse.ArgumentParser`
1117 Argument parser.
1118 namespace : `argparse.Namespace`
1119 Parsed command. The ``namespace.config`` attribute is updated.
1120 values : `list`
1121 A list of ``configItemName=value`` pairs.
1122 option_string : `str`
1123 Option value specified by the user.
1124 """
1125 if namespace.config is None:
1126 return
1127 for nameValue in values:
1128 name, sep, valueStr = nameValue.partition("=")
1129 if not valueStr:
1130 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1132 # see if setting the string value works; if not, try eval
1133 try:
1134 setDottedAttr(namespace.config, name, valueStr)
1135 except AttributeError:
1136 parser.error(f"no config field: {name}")
1137 except Exception:
1138 try:
1139 value = eval(valueStr, {})
1140 except Exception:
1141 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1142 try:
1143 setDottedAttr(namespace.config, name, value)
1144 except Exception as e:
1145 parser.error(f"cannot set config.{name}={value!r}: {e}")
1148class ConfigFileAction(argparse.Action):
1149 """argparse action to load config overrides from one or more files.
1150 """
1152 def __call__(self, parser, namespace, values, option_string=None):
1153 """Load one or more files of config overrides.
1155 Parameters
1156 ----------
1157 parser : `argparse.ArgumentParser`
1158 Argument parser.
1159 namespace : `argparse.Namespace`
1160 Parsed command. The following attributes are updated by this
1161 method: ``namespace.config``.
1162 values : `list`
1163 A list of data config file paths.
1164 option_string : `str`, optional
1165 Option value specified by the user.
1166 """
1167 if namespace.config is None:
1168 return
1169 for configfile in values:
1170 try:
1171 namespace.config.load(configfile)
1172 except Exception as e:
1173 parser.error(f"cannot load config file {configfile!r}: {e}")
1176class IdValueAction(argparse.Action):
1177 """argparse action callback to process a data ID into a dict.
1178 """
1180 def __call__(self, parser, namespace, values, option_string):
1181 """Parse ``--id`` data and append results to
1182 ``namespace.<argument>.idList``.
1184 Parameters
1185 ----------
1186 parser : `ArgumentParser`
1187 Argument parser.
1188 namespace : `argparse.Namespace`
1189 Parsed command (an instance of argparse.Namespace).
1190 The following attributes are updated:
1192 - ``<idName>.idList``, where ``<idName>`` is the name of the
1193 ID argument, for instance ``"id"`` for ID argument ``--id``.
1194 values : `list`
1195 A list of data IDs; see Notes below.
1196 option_string : `str`
1197 Option value specified by the user.
1199 Notes
1200 -----
1201 The data format is::
1203 key1=value1_1[^value1_2[^value1_3...]
1204 key2=value2_1[^value2_2[^value2_3...]...
1206 The values (e.g. ``value1_1``) may either be a string,
1207 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1208 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1209 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1210 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1212 The cross product is computed for keys with multiple values.
1213 For example::
1215 --id visit 1^2 ccd 1,1^2,2
1217 results in the following data ID dicts being appended to
1218 ``namespace.<argument>.idList``:
1220 {"visit":1, "ccd":"1,1"}
1221 {"visit":2, "ccd":"1,1"}
1222 {"visit":1, "ccd":"2,2"}
1223 {"visit":2, "ccd":"2,2"}
1224 """
1225 if namespace.config is None:
1226 return
1227 idDict = collections.OrderedDict()
1228 for nameValue in values:
1229 name, sep, valueStr = nameValue.partition("=")
1230 if name in idDict:
1231 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1232 idDict[name] = []
1233 for v in valueStr.split("^"):
1234 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1235 if mat:
1236 v1 = int(mat.group(1))
1237 v2 = int(mat.group(2))
1238 v3 = mat.group(3)
1239 v3 = int(v3) if v3 else 1
1240 for v in range(v1, v2 + 1, v3):
1241 idDict[name].append(str(v))
1242 else:
1243 idDict[name].append(v)
1245 iterList = [idDict[key] for key in idDict.keys()]
1246 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1247 for valList in itertools.product(*iterList)]
1249 argName = option_string.lstrip("-")
1250 ident = getattr(namespace, argName)
1251 ident.idList += idDictList
1254class LogLevelAction(argparse.Action):
1255 """argparse action to set log level.
1256 """
1258 def __call__(self, parser, namespace, values, option_string):
1259 """Set trace level.
1261 Parameters
1262 ----------
1263 parser : `ArgumentParser`
1264 Argument parser.
1265 namespace : `argparse.Namespace`
1266 Parsed command. This argument is not used.
1267 values : `list`
1268 List of trace levels; each item must be of the form
1269 ``component_name=level`` or ``level``, where ``level``
1270 is a keyword (not case sensitive) or an integer.
1271 option_string : `str`
1272 Option value specified by the user.
1273 """
1274 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1275 permittedLevelSet = set(permittedLevelList)
1276 for componentLevel in values:
1277 component, sep, levelStr = componentLevel.partition("=")
1278 if not levelStr:
1279 levelStr, component = component, None
1280 logLevelUpr = levelStr.upper()
1281 if logLevelUpr in permittedLevelSet:
1282 logLevel = getattr(lsstLog.Log, logLevelUpr)
1283 else:
1284 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1285 if component is None:
1286 namespace.log.setLevel(logLevel)
1287 else:
1288 lsstLog.Log.getLogger(component).setLevel(logLevel)
1289 # set logging level for Python logging
1290 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1291 logging.getLogger(component).setLevel(pyLevel)
1294class ReuseAction(argparse.Action):
1295 """argparse action associated with ArgumentPraser.addReuseOption."""
1297 def __call__(self, parser, namespace, value, option_string):
1298 if value == "all":
1299 value = self.choices[-2]
1300 index = self.choices.index(value)
1301 namespace.reuse = self.choices[:index + 1]
1304def setDottedAttr(item, name, value):
1305 """Set an instance attribute (like `setattr` but accepting
1306 hierarchical names such as ``foo.bar.baz``).
1308 Parameters
1309 ----------
1310 item : obj
1311 Object whose attribute is to be set.
1312 name : `str`
1313 Name of attribute to set.
1314 value : obj
1315 New value for the attribute.
1317 Notes
1318 -----
1319 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1320 is set to the specified value.
1321 """
1322 subitem = item
1323 subnameList = name.split(".")
1324 for subname in subnameList[:-1]:
1325 subitem = getattr(subitem, subname)
1326 setattr(subitem, subnameList[-1], value)
1329def getDottedAttr(item, name):
1330 """Get an attribute (like `getattr` but accepts hierarchical names
1331 such as ``foo.bar.baz``).
1333 Parameters
1334 ----------
1335 item : obj
1336 Object whose attribute is to be returned.
1337 name : `str`
1338 Name of the attribute to get.
1340 Returns
1341 -------
1342 itemAttr : obj
1343 If name is ``foo.bar.baz then the return value is
1344 ``item.foo.bar.baz``.
1345 """
1346 subitem = item
1347 for subname in name.split("."):
1348 subitem = getattr(subitem, subname)
1349 return subitem