Coverage for python/lsst/pipe/base/argumentParser.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.utils.logging
40import lsst.pex.config as pexConfig
41import lsst.pex.config.history
42import lsst.log as lsstLog
43import lsst.daf.persistence as dafPersist
45DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
46DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
47DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
50def _fixPath(defName, path):
51 """Apply environment variable as default root, if present, and abspath.
53 Parameters
54 ----------
55 defName : `str`
56 Name of environment variable containing default root path;
57 if the environment variable does not exist
58 then the path is relative to the current working directory
59 path : `str`
60 Path relative to default root path.
62 Returns
63 -------
64 abspath : `str`
65 Path that has been expanded, or `None` if the environment variable
66 does not exist and path is `None`.
67 """
68 defRoot = os.environ.get(defName)
69 if defRoot is None:
70 if path is None:
71 return None
72 return os.path.abspath(path)
73 return os.path.abspath(os.path.join(defRoot, path or ""))
76class DataIdContainer:
77 """Container for data IDs and associated data references.
79 Parameters
80 ----------
81 level : `str`
82 The lowest hierarchy level to descend to for this dataset type,
83 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
84 Use `""` to use the mapper's default for the dataset type.
85 This class does not support `None`, but if it did, `None`
86 would mean the level should not be restricted.
88 Notes
89 -----
90 Override this class for data IDs that require special handling to be
91 converted to ``data references``, and specify the override class
92 as ``ContainerClass`` for ``add_id_argument``.
94 If you don't want the argument parser to compute data references,
95 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
96 """
98 def __init__(self, level=None):
99 self.datasetType = None
100 """Dataset type of the data references (`str`).
101 """
102 self.level = level
103 """See parameter ``level`` (`str`).
104 """
105 self.idList = []
106 """List of data IDs specified on the command line for the
107 appropriate data ID argument (`list` of `dict`).
108 """
109 self.refList = []
110 """List of data references for the data IDs in ``idList``
111 (`list` of `lsst.daf.persistence.ButlerDataRef`).
112 Elements will be omitted if the corresponding data is not found.
113 The list will be empty when returned by ``parse_args`` if
114 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
115 """
117 def setDatasetType(self, datasetType):
118 """Set actual dataset type, once it is known.
120 Parameters
121 ----------
122 datasetType : `str`
123 Dataset type.
125 Notes
126 -----
127 The reason ``datasetType`` is not a constructor argument is that
128 some subclasses do not know the dataset type until the command
129 is parsed. Thus, to reduce special cases in the code,
130 ``datasetType`` is always set after the command is parsed.
131 """
132 self.datasetType = datasetType
134 def castDataIds(self, butler):
135 """Validate data IDs and cast them to the correct type
136 (modify idList in place).
138 This code casts the values in the data IDs dicts in `dataIdList`
139 to the type required by the butler. Data IDs are read from the
140 command line as `str`, but the butler requires some values to be
141 other types. For example "visit" values should be `int`.
143 Parameters
144 ----------
145 butler : `lsst.daf.persistence.Butler`
146 Data butler.
147 """
148 if self.datasetType is None:
149 raise RuntimeError("Must call setDatasetType first")
150 try:
151 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
152 except KeyError as e:
153 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
154 raise KeyError(msg) from e
156 for dataDict in self.idList:
157 for key, strVal in dataDict.items():
158 try:
159 keyType = idKeyTypeDict[key]
160 except KeyError:
161 # OK, assume that it's a valid key and guess that it's a
162 # string
163 keyType = str
165 log = lsst.utils.logging.getLogger()
166 log.warning("Unexpected ID %s; guessing type is \"%s\"",
167 key, 'str' if keyType == str else keyType)
168 idKeyTypeDict[key] = keyType
170 if keyType != str:
171 try:
172 castVal = keyType(strVal)
173 except Exception:
174 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
175 dataDict[key] = castVal
177 def makeDataRefList(self, namespace):
178 """Compute refList based on idList.
180 Parameters
181 ----------
182 namespace : `argparse.Namespace`
183 Results of parsing command-line. The ``butler`` and ``log``
184 elements must be set.
186 Notes
187 -----
188 Not called if ``add_id_argument`` was called with
189 ``doMakeDataRefList=False``.
190 """
191 if self.datasetType is None:
192 raise RuntimeError("Must call setDatasetType first")
193 butler = namespace.butler
194 for dataId in self.idList:
195 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
196 level=self.level, dataId=dataId)
197 if not refList:
198 namespace.log.warning("No data found for dataId=%s", dataId)
199 continue
200 self.refList += refList
203class DataIdArgument:
204 """data ID argument, used by `ArgumentParser.add_id_argument`.
206 Parameters
207 ----------
208 name : `str`
209 Name of identifier (argument name without dashes).
210 datasetType : `str`
211 Type of dataset; specify a string for a fixed dataset type
212 or a `DatasetArgument` for a dynamic dataset type (e.g.
213 one specified by a command-line argument).
214 level : `str`
215 The lowest hierarchy level to descend to for this dataset type,
216 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
217 Use `""` to use the mapper's default for the dataset type.
218 Some container classes may also support `None`, which means
219 the level should not be restricted; however the default class,
220 `DataIdContainer`, does not support `None`.
221 doMakeDataRefList : `bool`, optional
222 If `True` (default), construct data references.
223 ContainerClass : `class`, optional
224 Class to contain data IDs and data references; the default class
225 `DataIdContainer` will work for many, but not all, cases.
226 For example if the dataset type is specified on the command line
227 then use `DynamicDatasetType`.
228 """
230 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
231 if name.startswith("-"):
232 raise RuntimeError(f"Name {name} must not start with -")
233 self.name = name
234 self.datasetType = datasetType
235 self.level = level
236 self.doMakeDataRefList = bool(doMakeDataRefList)
237 self.ContainerClass = ContainerClass
238 self.argName = name.lstrip("-")
240 @property
241 def isDynamicDatasetType(self):
242 """`True` if the dataset type is dynamic (that is, specified
243 on the command line).
244 """
245 return isinstance(self.datasetType, DynamicDatasetType)
247 def getDatasetType(self, namespace):
248 """Get the dataset type as a string.
250 Parameters
251 ----------
252 namespace
253 Parsed command.
255 Returns
256 -------
257 datasetType : `str`
258 Dataset type.
259 """
260 if self.isDynamicDatasetType:
261 return self.datasetType.getDatasetType(namespace)
262 else:
263 return self.datasetType
266class DynamicDatasetType(metaclass=abc.ABCMeta):
267 """Abstract base class for a dataset type determined from parsed
268 command-line arguments.
269 """
271 def addArgument(self, parser, idName):
272 """Add a command-line argument to specify dataset type name,
273 if wanted.
275 Parameters
276 ----------
277 parser : `ArgumentParser`
278 Argument parser to add the argument to.
279 idName : `str`
280 Name of data ID argument, without the leading ``"--"``,
281 e.g. ``"id"``.
283 Notes
284 -----
285 The default implementation does nothing
286 """
287 pass
289 @abc.abstractmethod
290 def getDatasetType(self, namespace):
291 """Get the dataset type as a string, based on parsed command-line
292 arguments.
294 Returns
295 -------
296 datasetType : `str`
297 Dataset type.
298 """
299 raise NotImplementedError("Subclasses must override")
302class DatasetArgument(DynamicDatasetType):
303 """Dataset type specified by a command-line argument.
305 Parameters
306 ----------
307 name : `str`, optional
308 Name of command-line argument (including leading "--",
309 if appropriate) whose value is the dataset type.
310 If `None`, uses ``--idName_dstype`` where idName
311 is the name of the data ID argument (e.g. "id").
312 help : `str`, optional
313 Help string for the command-line argument.
314 default : `object`, optional
315 Default value. If `None`, then the command-line option is required.
316 This argument isignored if the command-line argument is positional
317 (name does not start with "-") because positional arguments do
318 not support default values.
319 """
321 def __init__(self,
322 name=None,
323 help="dataset type to process from input data repository",
324 default=None,
325 ):
326 DynamicDatasetType.__init__(self)
327 self.name = name
328 self.help = help
329 self.default = default
331 def getDatasetType(self, namespace):
332 """Get the dataset type as a string, from the appropriate
333 command-line argument.
335 Parameters
336 ----------
337 namespace :
338 Parsed command.
340 Returns
341 -------
342 datasetType : `str`
343 Dataset type.
344 """
345 argName = self.name.lstrip("-")
346 return getattr(namespace, argName)
348 def addArgument(self, parser, idName):
349 """Add a command-line argument to specify the dataset type name.
351 Parameters
352 ----------
353 parser : `ArgumentParser`
354 Argument parser.
355 idName : `str`
356 Data ID.
358 Notes
359 -----
360 Also sets the `name` attribute if it is currently `None`.
361 """
362 help = self.help if self.help else f"dataset type for {idName}"
363 if self.name is None:
364 self.name = f"--{idName}_dstype"
365 requiredDict = dict()
366 if self.name.startswith("-"):
367 requiredDict = dict(required=self.default is None)
368 parser.add_argument(
369 self.name,
370 default=self.default,
371 help=help,
372 **requiredDict)
375class ConfigDatasetType(DynamicDatasetType):
376 """Dataset type specified by a config parameter.
378 Parameters
379 ----------
380 name : `str`
381 Name of config option whose value is the dataset type.
382 """
384 def __init__(self, name):
385 DynamicDatasetType.__init__(self)
386 self.name = name
388 def getDatasetType(self, namespace):
389 """Return the dataset type as a string, from the appropriate
390 config field.
392 Parameters
393 ----------
394 namespace : `argparse.Namespace`
395 Parsed command.
396 """
397 # getattr does not work reliably if the config field name is
398 # dotted, so step through one level at a time
399 keyList = self.name.split(".")
400 value = namespace.config
401 for key in keyList:
402 try:
403 value = getattr(value, key)
404 except KeyError:
405 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
406 return value
409class ArgumentParser(argparse.ArgumentParser):
410 """Argument parser for command-line tasks that is based on
411 `argparse.ArgumentParser`.
413 Parameters
414 ----------
415 name : `str`
416 Name of top-level task; used to identify camera-specific override
417 files.
418 usage : `str`, optional
419 Command-line usage signature.
420 **kwargs
421 Additional keyword arguments for `argparse.ArgumentParser`.
423 Notes
424 -----
425 Users may wish to add additional arguments before calling `parse_args`.
426 """
427 # I would prefer to check data ID keys and values as they are parsed,
428 # but the required information comes from the butler, so I have to
429 # construct a butler before I do this checking. Constructing a butler
430 # is slow, so I only want do it once, after parsing the command line,
431 # so as to catch syntax errors quickly.
433 requireOutput = True
434 """Require an output directory to be specified (`bool`)."""
436 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
437 self._name = name
438 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
439 argparse.ArgumentParser.__init__(self,
440 usage=usage,
441 fromfile_prefix_chars='@',
442 epilog=textwrap.dedent("""Notes:
443 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
444 all values are used, in order left to right
445 * @file reads command-line options from the specified file:
446 * data may be distributed among multiple lines (e.g. one option per line)
447 * data after # is treated as a comment and ignored
448 * blank lines and lines starting with # are ignored
449 * To specify multiple values for an option, do not use = after the option name:
450 * right: --config-file foo bar
451 * wrong: --config-file=foo bar
452 """),
453 formatter_class=argparse.RawDescriptionHelpFormatter,
454 **kwargs)
455 self.add_argument(metavar='input', dest="rawInput",
456 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
457 self.add_argument("--calib", dest="rawCalib",
458 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
459 self.add_argument("--output", dest="rawOutput",
460 help="path to output data repository (need not exist), "
461 f"relative to ${DEFAULT_OUTPUT_NAME}")
462 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
463 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
464 "optionally sets ROOT to ROOT/rerun/INPUT")
465 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
466 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
467 self.add_argument("-C", "--config-file", "--configfile",
468 dest="configfile", nargs="*", action=ConfigFileAction,
469 help="config override file(s)")
470 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
471 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
472 metavar="LEVEL|COMPONENT=LEVEL")
473 self.add_argument("--longlog", action=LongLogAction, help="use a more verbose format for the logging")
474 self.add_argument("--debug", action="store_true", help="enable debugging output?")
475 self.add_argument("--doraise", action="store_true",
476 help="raise an exception on error (else log a message and continue)?")
477 self.add_argument("--noExit", action="store_true",
478 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
479 self.add_argument("--profile", help="Dump cProfile statistics to filename")
480 self.add_argument("--show", nargs="+", default=(),
481 help="display the specified information to stdout and quit "
482 "(unless run is specified); information is "
483 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
484 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
485 self.add_argument("-t", "--timeout", type=float,
486 help="Timeout for multiprocessing; maximum wall time (sec)")
487 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
488 help=("remove and re-create the output directory if it already exists "
489 "(safe with -j, but not all other forms of parallel execution)"))
490 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
491 help=("backup and then overwrite existing config files instead of checking them "
492 "(safe with -j, but not all other forms of parallel execution)"))
493 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
494 help="Don't copy config to file~N backup.")
495 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
496 help=("backup and then overwrite existing package versions instead of checking"
497 "them (safe with -j, but not all other forms of parallel execution)"))
498 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
499 help="don't check package versions; useful for development")
500 lsstLog.configure_prop("""
501log4j.rootLogger=INFO, A1
502log4j.appender.A1=ConsoleAppender
503log4j.appender.A1.Target=System.out
504log4j.appender.A1.layout=PatternLayout
505log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
506""")
508 # Forward all Python logging to lsst.log
509 lgr = logging.getLogger()
510 lgr.setLevel(logging.INFO) # same as in log4cxx config above
511 lgr.addHandler(lsstLog.LogHandler())
513 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
514 ContainerClass=DataIdContainer):
515 """Add a data ID argument.
518 Parameters
519 ----------
520 name : `str`
521 Data ID argument (including leading dashes, if wanted).
522 datasetType : `str` or `DynamicDatasetType`-type
523 Type of dataset. Supply a string for a fixed dataset type.
524 For a dynamically determined dataset type, supply
525 a `DynamicDatasetType`, such a `DatasetArgument`.
526 help : `str`
527 Help string for the argument.
528 level : `str`
529 The lowest hierarchy level to descend to for this dataset type,
530 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
531 Use `""` to use the mapper's default for the dataset type.
532 Some container classes may also support `None`, which means
533 the level should not be restricted; however the default class,
534 `DataIdContainer`, does not support `None`.
535 doMakeDataRefList : bool, optional
536 If `True` (default), construct data references.
537 ContainerClass : `class`, optional
538 Class to contain data IDs and data references; the default class
539 `DataIdContainer` will work for many, but not all, cases.
540 For example if the dataset type is specified on the command line
541 then use `DynamicDatasetType`.
543 Notes
544 -----
545 If ``datasetType`` is an instance of `DatasetArgument`,
546 then add a second argument to specify the dataset type.
548 The associated data is put into ``namespace.<dataIdArgument.name>``
549 as an instance of `ContainerClass`; the container includes fields:
551 - ``idList``: a list of data ID dicts.
552 - ``refList``: a list of `~lsst.daf.persistence.Butler`
553 data references (empty if ``doMakeDataRefList`` is `False`).
554 """
555 argName = name.lstrip("-")
557 if argName in self._dataIdArgDict:
558 raise RuntimeError(f"Data ID argument {name} already exists")
559 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
560 raise RuntimeError(f"Data ID argument {name} is a reserved name")
562 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
563 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
565 dataIdArgument = DataIdArgument(
566 name=argName,
567 datasetType=datasetType,
568 level=level,
569 doMakeDataRefList=doMakeDataRefList,
570 ContainerClass=ContainerClass,
571 )
573 if dataIdArgument.isDynamicDatasetType:
574 datasetType.addArgument(parser=self, idName=argName)
576 self._dataIdArgDict[argName] = dataIdArgument
578 def parse_args(self, config, args=None, log=None, override=None):
579 """Parse arguments for a command-line task.
581 Parameters
582 ----------
583 config : `lsst.pex.config.Config`
584 Config for the task being run.
585 args : `list`, optional
586 Argument list; if `None` then ``sys.argv[1:]`` is used.
587 log : `lsst.log.Log` or `logging.Logger`, optional
588 Logger instance; if `None` use the default log.
589 override : callable, optional
590 A config override function. It must take the root config object
591 as its only argument and must modify the config in place.
592 This function is called after camera-specific overrides files
593 are applied, and before command-line config overrides
594 are applied (thus allowing the user the final word).
596 Returns
597 -------
598 namespace : `argparse.Namespace`
599 A `~argparse.Namespace` instance containing fields:
601 - ``camera``: camera name.
602 - ``config``: the supplied config with all overrides applied,
603 validated and frozen.
604 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
605 - An entry for each of the data ID arguments registered by
606 `add_id_argument`, of the type passed to its ``ContainerClass``
607 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
608 includes public elements ``idList`` and ``refList``.
609 - ``log``: a `lsst.pipe.base.TaskLogAdapter` log.
610 - An entry for each command-line argument,
611 with the following exceptions:
613 - config is the supplied config, suitably updated.
614 - configfile, id and loglevel are all missing.
615 - ``obsPkg``: name of the ``obs_`` package for this camera.
616 """
617 if args is None:
618 args = sys.argv[1:]
620 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
621 self.print_help()
622 if len(args) == 1 and args[0] in ("-h", "--help"):
623 self.exit()
624 else:
625 self.exit(f"{self.prog}: error: Must specify input as first argument")
627 # Note that --rerun may change namespace.input, but if it does
628 # we verify that the new input has the same mapper class.
629 namespace = argparse.Namespace()
630 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
631 if not os.path.isdir(namespace.input):
632 self.error(f"Error: input={namespace.input!r} not found")
634 namespace.config = config
635 # Ensure that the external logger is converted to the expected
636 # logger class.
637 namespace.log = lsst.utils.logging.getLogger(log.name) \
638 if log is not None else lsst.utils.logging.getLogger()
639 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
640 if mapperClass is None:
641 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
643 namespace.camera = mapperClass.getCameraName()
644 namespace.obsPkg = mapperClass.getPackageName()
646 self.handleCamera(namespace)
648 self._applyInitialOverrides(namespace)
649 if override is not None:
650 override(namespace.config)
652 # Add data ID containers to namespace
653 for dataIdArgument in self._dataIdArgDict.values():
654 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
656 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
657 del namespace.configfile
659 self._parseDirectories(namespace)
661 if namespace.clobberOutput:
662 if namespace.output is None:
663 self.error("--clobber-output is only valid with --output or --rerun")
664 elif namespace.output == namespace.input:
665 self.error("--clobber-output is not valid when the output and input repos are the same")
666 if os.path.exists(namespace.output):
667 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
668 shutil.rmtree(namespace.output)
670 namespace.log.debug("input=%s", namespace.input)
671 namespace.log.debug("calib=%s", namespace.calib)
672 namespace.log.debug("output=%s", namespace.output)
674 obeyShowArgument(namespace.show, namespace.config, exit=False)
676 # No environment variable or --output or --rerun specified.
677 if self.requireOutput and namespace.output is None and namespace.rerun is None:
678 self.error("no output directory specified.\n"
679 "An output directory must be specified with the --output or --rerun\n"
680 "command-line arguments.\n")
682 butlerArgs = {} # common arguments for butler elements
683 if namespace.calib:
684 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
685 if namespace.output:
686 outputs = {'root': namespace.output, 'mode': 'rw'}
687 inputs = {'root': namespace.input}
688 inputs.update(butlerArgs)
689 outputs.update(butlerArgs)
690 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
691 else:
692 outputs = {'root': namespace.input, 'mode': 'rw'}
693 outputs.update(butlerArgs)
694 namespace.butler = dafPersist.Butler(outputs=outputs)
696 # convert data in each of the identifier lists to proper types
697 # this is done after constructing the butler,
698 # hence after parsing the command line,
699 # because it takes a long time to construct a butler
700 self._processDataIds(namespace)
701 if "data" in namespace.show:
702 for dataIdName in self._dataIdArgDict.keys():
703 for dataRef in getattr(namespace, dataIdName).refList:
704 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
706 if namespace.show and "run" not in namespace.show:
707 sys.exit(0)
709 if namespace.debug:
710 try:
711 import debug
712 assert debug # silence pyflakes
713 except ImportError:
714 print("Warning: no 'debug' module found", file=sys.stderr)
715 namespace.debug = False
717 del namespace.loglevel
718 del namespace.longlog
720 namespace.config.validate()
721 namespace.config.freeze()
723 return namespace
725 def _parseDirectories(self, namespace):
726 """Parse input, output and calib directories
728 This allows for hacking the directories, e.g., to include a
729 "rerun".
730 Modifications are made to the 'namespace' object in-place.
731 """
732 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
733 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
735 # If an output directory is specified, process it and assign it to the
736 # namespace
737 if namespace.rawOutput:
738 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
739 else:
740 namespace.output = None
742 # This section processes the rerun argument.
743 # If rerun is specified as a colon separated value,
744 # it will be parsed as an input and output.
745 # The input value will be overridden if previously specified
746 # (but a check is made to make sure both inputs use
747 # the same mapper)
748 if namespace.rawRerun:
749 if namespace.output:
750 self.error("Error: cannot specify both --output and --rerun")
751 namespace.rerun = namespace.rawRerun.split(":")
752 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
753 modifiedInput = False
754 if len(rerunDir) == 2:
755 namespace.input, namespace.output = rerunDir
756 modifiedInput = True
757 elif len(rerunDir) == 1:
758 namespace.output = rerunDir[0]
759 if os.path.exists(os.path.join(namespace.output, "_parent")):
760 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
761 modifiedInput = True
762 else:
763 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
764 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
765 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
766 else:
767 namespace.rerun = None
768 del namespace.rawInput
769 del namespace.rawCalib
770 del namespace.rawOutput
771 del namespace.rawRerun
773 def _processDataIds(self, namespace):
774 """Process the parsed data for each data ID argument in an
775 `~argparse.Namespace`.
777 Processing includes:
779 - Validate data ID keys.
780 - Cast the data ID values to the correct type.
781 - Compute data references from data IDs.
783 Parameters
784 ----------
785 namespace : `argparse.Namespace`
786 Parsed namespace. These attributes are read:
788 - ``butler``
789 - ``log``
790 - ``config``, if any dynamic dataset types are set by
791 a config parameter.
792 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
793 dataset types are specified by such
795 These attributes are modified:
797 - ``<name>`` for each data ID argument registered using
798 `add_id_argument` with name ``<name>``.
799 """
800 for dataIdArgument in self._dataIdArgDict.values():
801 dataIdContainer = getattr(namespace, dataIdArgument.name)
802 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
803 if dataIdArgument.doMakeDataRefList:
804 try:
805 dataIdContainer.castDataIds(butler=namespace.butler)
806 except (KeyError, TypeError) as e:
807 # failure of castDataIds indicates invalid command args
808 self.error(e)
810 # failure of makeDataRefList indicates a bug
811 # that wants a traceback
812 dataIdContainer.makeDataRefList(namespace)
814 def _applyInitialOverrides(self, namespace):
815 """Apply obs-package-specific and camera-specific config
816 override files, if found
818 Parameters
819 ----------
820 namespace : `argparse.Namespace`
821 Parsed namespace. These attributes are read:
823 - ``obsPkg``
825 Look in the package namespace.obsPkg for files:
827 - ``config/<task_name>.py``
828 - ``config/<camera_name>/<task_name>.py`` and load if found.
829 """
830 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
831 fileName = self._name + ".py"
832 for filePath in (
833 os.path.join(obsPkgDir, "config", fileName),
834 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
835 ):
836 if os.path.exists(filePath):
837 namespace.log.info("Loading config overrride file %r", filePath)
838 namespace.config.load(filePath)
839 else:
840 namespace.log.debug("Config override file does not exist: %r", filePath)
842 def handleCamera(self, namespace):
843 """Perform camera-specific operations before parsing the command-line.
845 Parameters
846 ----------
847 namespace : `argparse.Namespace`
848 Namespace (an ) with the following fields:
850 - ``camera``: the camera name.
851 - ``config``: the config passed to parse_args, with no overrides
852 applied.
853 - ``obsPkg``: the ``obs_`` package for this camera.
854 - ``log``: a `lsst.pipe.base.TaskLogAdapter` Log.
856 Notes
857 -----
858 The default implementation does nothing.
859 """
860 pass
862 def convert_arg_line_to_args(self, arg_line):
863 """Allow files of arguments referenced by ``@<path>`` to contain
864 multiple values on each line.
866 Parameters
867 ----------
868 arg_line : `str`
869 Line of text read from an argument file.
870 """
871 arg_line = arg_line.strip()
872 if not arg_line or arg_line.startswith("#"):
873 return
874 for arg in shlex.split(arg_line, comments=True, posix=True):
875 if not arg.strip():
876 continue
877 yield arg
879 def addReuseOption(self, choices):
880 """Add a "--reuse-outputs-from SUBTASK" option to the argument
881 parser.
883 CmdLineTasks that can be restarted at an intermediate step using
884 outputs from earlier (but still internal) steps should use this
885 method to allow the user to control whether that happens when
886 outputs from earlier steps are present.
888 Parameters
889 ----------
890 choices : sequence
891 A sequence of string names (by convention, top-level subtasks)
892 that identify the steps that could be skipped when their
893 outputs are already present. The list is ordered, so when the
894 user specifies one step on the command line, all previous steps
895 may be skipped as well. In addition to the choices provided,
896 users may pass "all" to indicate that all steps may be thus
897 skipped.
899 When this method is called, the ``namespace`` object returned by
900 ``parse_args`` will contain a ``reuse`` attribute containing
901 a list of all steps that should be skipped if their outputs
902 are already present.
903 If no steps should be skipped, the ``reuse`` will be an empty list.
904 """
905 choices = list(choices)
906 choices.append("all")
907 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
908 default=[], action=ReuseAction,
909 help=("Skip the given subtask and its predecessors and reuse their outputs "
910 "if those outputs already exist. Use 'all' to specify all subtasks."))
913class InputOnlyArgumentParser(ArgumentParser):
914 """`ArgumentParser` for command-line tasks that don't write any output.
915 """
917 requireOutput = False # We're not going to write anything
920def getTaskDict(config, taskDict=None, baseName=""):
921 """Get a dictionary of task info for all subtasks in a config
923 Parameters
924 ----------
925 config : `lsst.pex.config.Config`
926 Configuration to process.
927 taskDict : `dict`, optional
928 Users should not specify this argument. Supports recursion.
929 If provided, taskDict is updated in place, else a new `dict`
930 is started.
931 baseName : `str`, optional
932 Users should not specify this argument. It is only used for
933 recursion: if a non-empty string then a period is appended
934 and the result is used as a prefix for additional entries
935 in taskDict; otherwise no prefix is used.
937 Returns
938 -------
939 taskDict : `dict`
940 Keys are config field names, values are task names.
942 Notes
943 -----
944 This function is designed to be called recursively.
945 The user should call with only a config (leaving taskDict and baseName
946 at their default values).
947 """
948 if taskDict is None:
949 taskDict = dict()
950 for fieldName, field in config.items():
951 if hasattr(field, "value") and hasattr(field, "target"):
952 subConfig = field.value
953 if isinstance(subConfig, pexConfig.Config):
954 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
955 try:
956 taskName = f"{field.target.__module__}.{field.target.__name__}"
957 except Exception:
958 taskName = repr(field.target)
959 taskDict[subBaseName] = taskName
960 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
961 return taskDict
964def obeyShowArgument(showOpts, config=None, exit=False):
965 """Process arguments specified with ``--show`` (but ignores
966 ``"data"``).
968 Parameters
969 ----------
970 showOpts : `list` of `str`
971 List of options passed to ``--show``.
972 config : optional
973 The provided config.
974 exit : bool, optional
975 Exit if ``"run"`` isn't included in ``showOpts``.
977 Parameters
978 ----------
979 Supports the following options in showOpts:
981 - ``config[=PAT]``. Dump all the config entries, or just the ones that
982 match the glob pattern.
983 - ``history=PAT``. Show where the config entries that match the glob
984 pattern were set.
985 - ``tasks``. Show task hierarchy.
986 - ``data``. Ignored; to be processed by caller.
987 - ``run``. Keep going (the default behaviour is to exit if
988 ``--show`` is specified).
990 Calls ``sys.exit(1)`` if any other option found.
991 """
992 if not showOpts:
993 return
995 for what in showOpts:
996 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
998 if showCommand == "config":
999 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1000 pattern = matConfig.group(1)
1001 if pattern:
1002 class FilteredStream:
1003 """A file object that only prints lines
1004 that match the glob "pattern".
1006 N.b. Newlines are silently discarded and reinserted;
1007 crude but effective.
1008 """
1010 def __init__(self, pattern):
1011 # obey case if pattern isn't lowecase or requests
1012 # NOIGNORECASE
1013 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1015 if mat:
1016 pattern = mat.group(1)
1017 self._pattern = re.compile(fnmatch.translate(pattern))
1018 else:
1019 if pattern != pattern.lower():
1020 print(f"Matching {pattern!r} without regard to case "
1021 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1022 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1024 def write(self, showStr):
1025 showStr = showStr.rstrip()
1026 # Strip off doc string line(s) and cut off
1027 # at "=" for string matching
1028 matchStr = showStr.split("\n")[-1].split("=")[0]
1029 if self._pattern.search(matchStr):
1030 print("\n" + showStr)
1032 fd = FilteredStream(pattern)
1033 else:
1034 fd = sys.stdout
1036 config.saveToStream(fd, "config")
1037 elif showCommand == "history":
1038 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1039 globPattern = matHistory.group(1)
1040 if not globPattern:
1041 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1042 sys.exit(1)
1044 error = False
1045 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1046 if i > 0:
1047 print("")
1049 pattern = pattern.split(".")
1050 cpath, cname = pattern[:-1], pattern[-1]
1051 hconfig = config # the config that we're interested in
1052 for i, cpt in enumerate(cpath):
1053 try:
1054 hconfig = getattr(hconfig, cpt)
1055 except AttributeError:
1056 config_path = ".".join(["config"] + cpath[:i])
1057 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1058 error = True
1060 try:
1061 print(pexConfig.history.format(hconfig, cname))
1062 except KeyError:
1063 config_path = ".".join(["config"] + cpath)
1064 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1065 error = True
1067 if error:
1068 sys.exit(1)
1070 elif showCommand == "data":
1071 pass
1072 elif showCommand == "run":
1073 pass
1074 elif showCommand == "tasks":
1075 showTaskHierarchy(config)
1076 else:
1077 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1078 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1079 sys.exit(1)
1081 if exit and "run" not in showOpts:
1082 sys.exit(0)
1085def showTaskHierarchy(config):
1086 """Print task hierarchy to stdout.
1088 Parameters
1089 ----------
1090 config : `lsst.pex.config.Config`
1091 Configuration to process.
1092 """
1093 print("Subtasks:")
1094 taskDict = getTaskDict(config=config)
1096 fieldNameList = sorted(taskDict.keys())
1097 for fieldName in fieldNameList:
1098 taskName = taskDict[fieldName]
1099 print(f"{fieldName}: {taskName}")
1102class ConfigValueAction(argparse.Action):
1103 """argparse action callback to override config parameters using
1104 name=value pairs from the command-line.
1105 """
1107 def __call__(self, parser, namespace, values, option_string):
1108 """Override one or more config name value pairs.
1110 Parameters
1111 ----------
1112 parser : `argparse.ArgumentParser`
1113 Argument parser.
1114 namespace : `argparse.Namespace`
1115 Parsed command. The ``namespace.config`` attribute is updated.
1116 values : `list`
1117 A list of ``configItemName=value`` pairs.
1118 option_string : `str`
1119 Option value specified by the user.
1120 """
1121 if namespace.config is None:
1122 return
1123 for nameValue in values:
1124 name, sep, valueStr = nameValue.partition("=")
1125 if not valueStr:
1126 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1128 # see if setting the string value works; if not, try eval
1129 try:
1130 setDottedAttr(namespace.config, name, valueStr)
1131 except AttributeError:
1132 parser.error(f"no config field: {name}")
1133 except Exception:
1134 try:
1135 value = eval(valueStr, {})
1136 except Exception:
1137 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1138 try:
1139 setDottedAttr(namespace.config, name, value)
1140 except Exception as e:
1141 parser.error(f"cannot set config.{name}={value!r}: {e}")
1144class ConfigFileAction(argparse.Action):
1145 """argparse action to load config overrides from one or more files.
1146 """
1148 def __call__(self, parser, namespace, values, option_string=None):
1149 """Load one or more files of config overrides.
1151 Parameters
1152 ----------
1153 parser : `argparse.ArgumentParser`
1154 Argument parser.
1155 namespace : `argparse.Namespace`
1156 Parsed command. The following attributes are updated by this
1157 method: ``namespace.config``.
1158 values : `list`
1159 A list of data config file paths.
1160 option_string : `str`, optional
1161 Option value specified by the user.
1162 """
1163 if namespace.config is None:
1164 return
1165 for configfile in values:
1166 try:
1167 namespace.config.load(configfile)
1168 except Exception as e:
1169 parser.error(f"cannot load config file {configfile!r}: {e}")
1172class IdValueAction(argparse.Action):
1173 """argparse action callback to process a data ID into a dict.
1174 """
1176 def __call__(self, parser, namespace, values, option_string):
1177 """Parse ``--id`` data and append results to
1178 ``namespace.<argument>.idList``.
1180 Parameters
1181 ----------
1182 parser : `ArgumentParser`
1183 Argument parser.
1184 namespace : `argparse.Namespace`
1185 Parsed command (an instance of argparse.Namespace).
1186 The following attributes are updated:
1188 - ``<idName>.idList``, where ``<idName>`` is the name of the
1189 ID argument, for instance ``"id"`` for ID argument ``--id``.
1190 values : `list`
1191 A list of data IDs; see Notes below.
1192 option_string : `str`
1193 Option value specified by the user.
1195 Notes
1196 -----
1197 The data format is::
1199 key1=value1_1[^value1_2[^value1_3...]
1200 key2=value2_1[^value2_2[^value2_3...]...
1202 The values (e.g. ``value1_1``) may either be a string,
1203 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1204 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1205 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1206 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1208 The cross product is computed for keys with multiple values.
1209 For example::
1211 --id visit 1^2 ccd 1,1^2,2
1213 results in the following data ID dicts being appended to
1214 ``namespace.<argument>.idList``:
1216 {"visit":1, "ccd":"1,1"}
1217 {"visit":2, "ccd":"1,1"}
1218 {"visit":1, "ccd":"2,2"}
1219 {"visit":2, "ccd":"2,2"}
1220 """
1221 if namespace.config is None:
1222 return
1223 idDict = collections.OrderedDict()
1224 for nameValue in values:
1225 name, sep, valueStr = nameValue.partition("=")
1226 if name in idDict:
1227 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1228 idDict[name] = []
1229 for v in valueStr.split("^"):
1230 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1231 if mat:
1232 v1 = int(mat.group(1))
1233 v2 = int(mat.group(2))
1234 v3 = mat.group(3)
1235 v3 = int(v3) if v3 else 1
1236 for v in range(v1, v2 + 1, v3):
1237 idDict[name].append(str(v))
1238 else:
1239 idDict[name].append(v)
1241 iterList = [idDict[key] for key in idDict.keys()]
1242 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1243 for valList in itertools.product(*iterList)]
1245 argName = option_string.lstrip("-")
1246 ident = getattr(namespace, argName)
1247 ident.idList += idDictList
1250class LongLogAction(argparse.Action):
1251 """argparse action to make logs verbose.
1253 An action so that it can take effect before log level options.
1254 """
1256 def __call__(self, parser, namespace, values, option_string):
1257 """Set long log.
1259 Parameters
1260 ----------
1261 parser : `ArgumentParser`
1262 Argument parser.
1263 namespace : `argparse.Namespace`
1264 Parsed command. This argument is not used.
1265 values : `list`
1266 Unused.
1267 option_string : `str`
1268 Option value specified by the user (unused).
1269 """
1270 lsstLog.configure_prop("""
1271log4j.rootLogger=INFO, A1
1272log4j.appender.A1=ConsoleAppender
1273log4j.appender.A1.Target=System.out
1274log4j.appender.A1.layout=PatternLayout
1275log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
1276""")
1279class LogLevelAction(argparse.Action):
1280 """argparse action to set log level.
1281 """
1283 def __call__(self, parser, namespace, values, option_string):
1284 """Set trace level.
1286 Parameters
1287 ----------
1288 parser : `ArgumentParser`
1289 Argument parser.
1290 namespace : `argparse.Namespace`
1291 Parsed command. This argument is not used.
1292 values : `list`
1293 List of trace levels; each item must be of the form
1294 ``component_name=level`` or ``level``, where ``level``
1295 is a keyword (not case sensitive) or an integer.
1296 option_string : `str`
1297 Option value specified by the user.
1298 """
1299 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1300 permittedLevelSet = set(permittedLevelList)
1301 for componentLevel in values:
1302 component, sep, levelStr = componentLevel.partition("=")
1303 if not levelStr:
1304 levelStr, component = component, None
1305 logLevelUpr = levelStr.upper()
1307 if component is None:
1308 logger = namespace.log
1309 else:
1310 logger = lsst.utils.logging.getLogger(component)
1312 if logLevelUpr in permittedLevelSet:
1313 logLevel = getattr(logger, logLevelUpr)
1314 else:
1315 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1317 logger.setLevel(logLevel)
1319 # Set logging level for whatever logger this wasn't.
1320 if isinstance(logger, lsstLog.Log):
1321 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1322 logging.getLogger(component or None).setLevel(pyLevel)
1323 else:
1324 # Need to set lsstLog level
1325 lsstLogLevel = lsstLog.LevelTranslator.logging2lsstLog(logLevel)
1326 lsstLog.getLogger(component or "").setLevel(lsstLogLevel)
1329class ReuseAction(argparse.Action):
1330 """argparse action associated with ArgumentPraser.addReuseOption."""
1332 def __call__(self, parser, namespace, value, option_string):
1333 if value == "all":
1334 value = self.choices[-2]
1335 index = self.choices.index(value)
1336 namespace.reuse = self.choices[:index + 1]
1339def setDottedAttr(item, name, value):
1340 """Set an instance attribute (like `setattr` but accepting
1341 hierarchical names such as ``foo.bar.baz``).
1343 Parameters
1344 ----------
1345 item : obj
1346 Object whose attribute is to be set.
1347 name : `str`
1348 Name of attribute to set.
1349 value : obj
1350 New value for the attribute.
1352 Notes
1353 -----
1354 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1355 is set to the specified value.
1356 """
1357 subitem = item
1358 subnameList = name.split(".")
1359 for subname in subnameList[:-1]:
1360 subitem = getattr(subitem, subname)
1361 setattr(subitem, subnameList[-1], value)
1364def getDottedAttr(item, name):
1365 """Get an attribute (like `getattr` but accepts hierarchical names
1366 such as ``foo.bar.baz``).
1368 Parameters
1369 ----------
1370 item : obj
1371 Object whose attribute is to be returned.
1372 name : `str`
1373 Name of the attribute to get.
1375 Returns
1376 -------
1377 itemAttr : obj
1378 If name is ``foo.bar.baz then the return value is
1379 ``item.foo.bar.baz``.
1380 """
1381 subitem = item
1382 for subname in name.split("."):
1383 subitem = getattr(subitem, subname)
1384 return subitem