Coverage for python/lsst/pipe/base/argumentParser.py: 14%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = [
23 "ArgumentParser",
24 "ConfigFileAction",
25 "ConfigValueAction",
26 "DataIdContainer",
27 "DatasetArgument",
28 "ConfigDatasetType",
29 "InputOnlyArgumentParser",
30]
32import abc
33import argparse
34import collections
35import fnmatch
36import itertools
37import logging
38import os
39import re
40import shlex
41import shutil
42import sys
43import textwrap
45import lsst.daf.persistence as dafPersist
46import lsst.log as lsstLog
47import lsst.pex.config as pexConfig
48import lsst.pex.config.history
49import lsst.utils
50import lsst.utils.logging
52DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
53DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
54DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
57def _fixPath(defName, path):
58 """Apply environment variable as default root, if present, and abspath.
60 Parameters
61 ----------
62 defName : `str`
63 Name of environment variable containing default root path;
64 if the environment variable does not exist
65 then the path is relative to the current working directory
66 path : `str`
67 Path relative to default root path.
69 Returns
70 -------
71 abspath : `str`
72 Path that has been expanded, or `None` if the environment variable
73 does not exist and path is `None`.
74 """
75 defRoot = os.environ.get(defName)
76 if defRoot is None:
77 if path is None:
78 return None
79 return os.path.abspath(path)
80 return os.path.abspath(os.path.join(defRoot, path or ""))
83class DataIdContainer:
84 """Container for data IDs and associated data references.
86 Parameters
87 ----------
88 level : `str`
89 The lowest hierarchy level to descend to for this dataset type,
90 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
91 Use `""` to use the mapper's default for the dataset type.
92 This class does not support `None`, but if it did, `None`
93 would mean the level should not be restricted.
95 Notes
96 -----
97 Override this class for data IDs that require special handling to be
98 converted to ``data references``, and specify the override class
99 as ``ContainerClass`` for ``add_id_argument``.
101 If you don't want the argument parser to compute data references,
102 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
103 """
105 def __init__(self, level=None):
106 self.datasetType = None
107 """Dataset type of the data references (`str`).
108 """
109 self.level = level
110 """See parameter ``level`` (`str`).
111 """
112 self.idList = []
113 """List of data IDs specified on the command line for the
114 appropriate data ID argument (`list` of `dict`).
115 """
116 self.refList = []
117 """List of data references for the data IDs in ``idList``
118 (`list` of `lsst.daf.persistence.ButlerDataRef`).
119 Elements will be omitted if the corresponding data is not found.
120 The list will be empty when returned by ``parse_args`` if
121 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
122 """
124 def setDatasetType(self, datasetType):
125 """Set actual dataset type, once it is known.
127 Parameters
128 ----------
129 datasetType : `str`
130 Dataset type.
132 Notes
133 -----
134 The reason ``datasetType`` is not a constructor argument is that
135 some subclasses do not know the dataset type until the command
136 is parsed. Thus, to reduce special cases in the code,
137 ``datasetType`` is always set after the command is parsed.
138 """
139 self.datasetType = datasetType
141 def castDataIds(self, butler):
142 """Validate data IDs and cast them to the correct type
143 (modify idList in place).
145 This code casts the values in the data IDs dicts in `dataIdList`
146 to the type required by the butler. Data IDs are read from the
147 command line as `str`, but the butler requires some values to be
148 other types. For example "visit" values should be `int`.
150 Parameters
151 ----------
152 butler : `lsst.daf.persistence.Butler`
153 Data butler.
154 """
155 if self.datasetType is None:
156 raise RuntimeError("Must call setDatasetType first")
157 try:
158 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
159 except KeyError as e:
160 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
161 raise KeyError(msg) from e
163 for dataDict in self.idList:
164 for key, strVal in dataDict.items():
165 try:
166 keyType = idKeyTypeDict[key]
167 except KeyError:
168 # OK, assume that it's a valid key and guess that it's a
169 # string
170 keyType = str
172 log = lsst.utils.logging.getLogger()
173 log.warning(
174 'Unexpected ID %s; guessing type is "%s"', key, "str" if keyType == str else keyType
175 )
176 idKeyTypeDict[key] = keyType
178 if keyType != str:
179 try:
180 castVal = keyType(strVal)
181 except Exception:
182 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
183 dataDict[key] = castVal
185 def makeDataRefList(self, namespace):
186 """Compute refList based on idList.
188 Parameters
189 ----------
190 namespace : `argparse.Namespace`
191 Results of parsing command-line. The ``butler`` and ``log``
192 elements must be set.
194 Notes
195 -----
196 Not called if ``add_id_argument`` was called with
197 ``doMakeDataRefList=False``.
198 """
199 if self.datasetType is None:
200 raise RuntimeError("Must call setDatasetType first")
201 butler = namespace.butler
202 for dataId in self.idList:
203 refList = dafPersist.searchDataRefs(
204 butler, datasetType=self.datasetType, level=self.level, dataId=dataId
205 )
206 if not refList:
207 namespace.log.warning("No data found for dataId=%s", dataId)
208 continue
209 self.refList += refList
212class DataIdArgument:
213 """data ID argument, used by `ArgumentParser.add_id_argument`.
215 Parameters
216 ----------
217 name : `str`
218 Name of identifier (argument name without dashes).
219 datasetType : `str`
220 Type of dataset; specify a string for a fixed dataset type
221 or a `DatasetArgument` for a dynamic dataset type (e.g.
222 one specified by a command-line argument).
223 level : `str`
224 The lowest hierarchy level to descend to for this dataset type,
225 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
226 Use `""` to use the mapper's default for the dataset type.
227 Some container classes may also support `None`, which means
228 the level should not be restricted; however the default class,
229 `DataIdContainer`, does not support `None`.
230 doMakeDataRefList : `bool`, optional
231 If `True` (default), construct data references.
232 ContainerClass : `class`, optional
233 Class to contain data IDs and data references; the default class
234 `DataIdContainer` will work for many, but not all, cases.
235 For example if the dataset type is specified on the command line
236 then use `DynamicDatasetType`.
237 """
239 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
240 if name.startswith("-"):
241 raise RuntimeError(f"Name {name} must not start with -")
242 self.name = name
243 self.datasetType = datasetType
244 self.level = level
245 self.doMakeDataRefList = bool(doMakeDataRefList)
246 self.ContainerClass = ContainerClass
247 self.argName = name.lstrip("-")
249 @property
250 def isDynamicDatasetType(self):
251 """`True` if the dataset type is dynamic (that is, specified
252 on the command line).
253 """
254 return isinstance(self.datasetType, DynamicDatasetType)
256 def getDatasetType(self, namespace):
257 """Get the dataset type as a string.
259 Parameters
260 ----------
261 namespace
262 Parsed command.
264 Returns
265 -------
266 datasetType : `str`
267 Dataset type.
268 """
269 if self.isDynamicDatasetType:
270 return self.datasetType.getDatasetType(namespace)
271 else:
272 return self.datasetType
275class DynamicDatasetType(metaclass=abc.ABCMeta):
276 """Abstract base class for a dataset type determined from parsed
277 command-line arguments.
278 """
280 def addArgument(self, parser, idName):
281 """Add a command-line argument to specify dataset type name,
282 if wanted.
284 Parameters
285 ----------
286 parser : `ArgumentParser`
287 Argument parser to add the argument to.
288 idName : `str`
289 Name of data ID argument, without the leading ``"--"``,
290 e.g. ``"id"``.
292 Notes
293 -----
294 The default implementation does nothing
295 """
296 pass
298 @abc.abstractmethod
299 def getDatasetType(self, namespace):
300 """Get the dataset type as a string, based on parsed command-line
301 arguments.
303 Returns
304 -------
305 datasetType : `str`
306 Dataset type.
307 """
308 raise NotImplementedError("Subclasses must override")
311class DatasetArgument(DynamicDatasetType):
312 """Dataset type specified by a command-line argument.
314 Parameters
315 ----------
316 name : `str`, optional
317 Name of command-line argument (including leading "--",
318 if appropriate) whose value is the dataset type.
319 If `None`, uses ``--idName_dstype`` where idName
320 is the name of the data ID argument (e.g. "id").
321 help : `str`, optional
322 Help string for the command-line argument.
323 default : `object`, optional
324 Default value. If `None`, then the command-line option is required.
325 This argument isignored if the command-line argument is positional
326 (name does not start with "-") because positional arguments do
327 not support default values.
328 """
330 def __init__(
331 self,
332 name=None,
333 help="dataset type to process from input data repository",
334 default=None,
335 ):
336 DynamicDatasetType.__init__(self)
337 self.name = name
338 self.help = help
339 self.default = default
341 def getDatasetType(self, namespace):
342 """Get the dataset type as a string, from the appropriate
343 command-line argument.
345 Parameters
346 ----------
347 namespace :
348 Parsed command.
350 Returns
351 -------
352 datasetType : `str`
353 Dataset type.
354 """
355 argName = self.name.lstrip("-")
356 return getattr(namespace, argName)
358 def addArgument(self, parser, idName):
359 """Add a command-line argument to specify the dataset type name.
361 Parameters
362 ----------
363 parser : `ArgumentParser`
364 Argument parser.
365 idName : `str`
366 Data ID.
368 Notes
369 -----
370 Also sets the `name` attribute if it is currently `None`.
371 """
372 help = self.help if self.help else f"dataset type for {idName}"
373 if self.name is None:
374 self.name = f"--{idName}_dstype"
375 requiredDict = dict()
376 if self.name.startswith("-"):
377 requiredDict = dict(required=self.default is None)
378 parser.add_argument(self.name, default=self.default, help=help, **requiredDict)
381class ConfigDatasetType(DynamicDatasetType):
382 """Dataset type specified by a config parameter.
384 Parameters
385 ----------
386 name : `str`
387 Name of config option whose value is the dataset type.
388 """
390 def __init__(self, name):
391 DynamicDatasetType.__init__(self)
392 self.name = name
394 def getDatasetType(self, namespace):
395 """Return the dataset type as a string, from the appropriate
396 config field.
398 Parameters
399 ----------
400 namespace : `argparse.Namespace`
401 Parsed command.
402 """
403 # getattr does not work reliably if the config field name is
404 # dotted, so step through one level at a time
405 keyList = self.name.split(".")
406 value = namespace.config
407 for key in keyList:
408 try:
409 value = getattr(value, key)
410 except KeyError:
411 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
412 return value
415class ArgumentParser(argparse.ArgumentParser):
416 """Argument parser for command-line tasks that is based on
417 `argparse.ArgumentParser`.
419 Parameters
420 ----------
421 name : `str`
422 Name of top-level task; used to identify camera-specific override
423 files.
424 usage : `str`, optional
425 Command-line usage signature.
426 **kwargs
427 Additional keyword arguments for `argparse.ArgumentParser`.
429 Notes
430 -----
431 Users may wish to add additional arguments before calling `parse_args`.
432 """
434 # I would prefer to check data ID keys and values as they are parsed,
435 # but the required information comes from the butler, so I have to
436 # construct a butler before I do this checking. Constructing a butler
437 # is slow, so I only want do it once, after parsing the command line,
438 # so as to catch syntax errors quickly.
440 requireOutput = True
441 """Require an output directory to be specified (`bool`)."""
443 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
444 self._name = name
445 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
446 argparse.ArgumentParser.__init__(
447 self,
448 usage=usage,
449 fromfile_prefix_chars="@",
450 epilog=textwrap.dedent(
451 """Notes:
452 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
453 all values are used, in order left to right
454 * @file reads command-line options from the specified file:
455 * data may be distributed among multiple lines (e.g. one option per line)
456 * data after # is treated as a comment and ignored
457 * blank lines and lines starting with # are ignored
458 * To specify multiple values for an option, do not use = after the option name:
459 * right: --config-file foo bar
460 * wrong: --config-file=foo bar
461 """
462 ),
463 formatter_class=argparse.RawDescriptionHelpFormatter,
464 **kwargs,
465 )
466 self.add_argument(
467 metavar="input",
468 dest="rawInput",
469 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}",
470 )
471 self.add_argument(
472 "--calib",
473 dest="rawCalib",
474 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}",
475 )
476 self.add_argument(
477 "--output",
478 dest="rawOutput",
479 help=f"path to output data repository (need not exist), relative to ${DEFAULT_OUTPUT_NAME}",
480 )
481 self.add_argument(
482 "--rerun",
483 dest="rawRerun",
484 metavar="[INPUT:]OUTPUT",
485 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; optionally sets ROOT to ROOT/rerun/INPUT",
486 )
487 self.add_argument(
488 "-c",
489 "--config",
490 nargs="*",
491 action=ConfigValueAction,
492 help="config override(s), e.g. -c foo=newfoo bar.baz=3",
493 metavar="NAME=VALUE",
494 )
495 self.add_argument(
496 "-C",
497 "--config-file",
498 "--configfile",
499 dest="configfile",
500 nargs="*",
501 action=ConfigFileAction,
502 help="config override file(s)",
503 )
504 self.add_argument(
505 "-L",
506 "--loglevel",
507 nargs="*",
508 action=LogLevelAction,
509 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
510 metavar="LEVEL|COMPONENT=LEVEL",
511 )
512 self.add_argument("--longlog", action=LongLogAction, help="use a more verbose format for the logging")
513 self.add_argument("--debug", action="store_true", help="enable debugging output?")
514 self.add_argument(
515 "--doraise",
516 action="store_true",
517 help="raise an exception on error (else log a message and continue)?",
518 )
519 self.add_argument(
520 "--noExit",
521 action="store_true",
522 help="Do not exit even upon failure (i.e. return a struct to the calling script)",
523 )
524 self.add_argument("--profile", help="Dump cProfile statistics to filename")
525 self.add_argument(
526 "--show",
527 nargs="+",
528 default=(),
529 help="display the specified information to stdout and quit "
530 "(unless run is specified); information is "
531 "(config[=PATTERN]|history=PATTERN|tasks|data|run)",
532 )
533 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
534 self.add_argument(
535 "-t", "--timeout", type=float, help="Timeout for multiprocessing; maximum wall time (sec)"
536 )
537 self.add_argument(
538 "--clobber-output",
539 action="store_true",
540 dest="clobberOutput",
541 default=False,
542 help=(
543 "remove and re-create the output directory if it already exists "
544 "(safe with -j, but not all other forms of parallel execution)"
545 ),
546 )
547 self.add_argument(
548 "--clobber-config",
549 action="store_true",
550 dest="clobberConfig",
551 default=False,
552 help=(
553 "backup and then overwrite existing config files instead of checking them "
554 "(safe with -j, but not all other forms of parallel execution)"
555 ),
556 )
557 self.add_argument(
558 "--no-backup-config",
559 action="store_true",
560 dest="noBackupConfig",
561 default=False,
562 help="Don't copy config to file~N backup.",
563 )
564 self.add_argument(
565 "--clobber-versions",
566 action="store_true",
567 dest="clobberVersions",
568 default=False,
569 help=(
570 "backup and then overwrite existing package versions instead of checking"
571 "them (safe with -j, but not all other forms of parallel execution)"
572 ),
573 )
574 self.add_argument(
575 "--no-versions",
576 action="store_true",
577 dest="noVersions",
578 default=False,
579 help="don't check package versions; useful for development",
580 )
581 lsstLog.configure_prop(
582 """
583log4j.rootLogger=INFO, A1
584log4j.appender.A1=ConsoleAppender
585log4j.appender.A1.Target=System.out
586log4j.appender.A1.layout=PatternLayout
587log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
588"""
589 )
591 # Forward all Python logging to lsst.log
592 lgr = logging.getLogger()
593 lgr.setLevel(logging.INFO) # same as in log4cxx config above
594 lgr.addHandler(lsstLog.LogHandler())
596 def add_id_argument(
597 self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer
598 ):
599 """Add a data ID argument.
602 Parameters
603 ----------
604 name : `str`
605 Data ID argument (including leading dashes, if wanted).
606 datasetType : `str` or `DynamicDatasetType`-type
607 Type of dataset. Supply a string for a fixed dataset type.
608 For a dynamically determined dataset type, supply
609 a `DynamicDatasetType`, such a `DatasetArgument`.
610 help : `str`
611 Help string for the argument.
612 level : `str`
613 The lowest hierarchy level to descend to for this dataset type,
614 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
615 Use `""` to use the mapper's default for the dataset type.
616 Some container classes may also support `None`, which means
617 the level should not be restricted; however the default class,
618 `DataIdContainer`, does not support `None`.
619 doMakeDataRefList : bool, optional
620 If `True` (default), construct data references.
621 ContainerClass : `class`, optional
622 Class to contain data IDs and data references; the default class
623 `DataIdContainer` will work for many, but not all, cases.
624 For example if the dataset type is specified on the command line
625 then use `DynamicDatasetType`.
627 Notes
628 -----
629 If ``datasetType`` is an instance of `DatasetArgument`,
630 then add a second argument to specify the dataset type.
632 The associated data is put into ``namespace.<dataIdArgument.name>``
633 as an instance of `ContainerClass`; the container includes fields:
635 - ``idList``: a list of data ID dicts.
636 - ``refList``: a list of `~lsst.daf.persistence.Butler`
637 data references (empty if ``doMakeDataRefList`` is `False`).
638 """
639 argName = name.lstrip("-")
641 if argName in self._dataIdArgDict:
642 raise RuntimeError(f"Data ID argument {name} already exists")
643 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
644 raise RuntimeError(f"Data ID argument {name} is a reserved name")
646 self.add_argument(
647 name, nargs="*", action=IdValueAction, help=help, metavar="KEY=VALUE1[^VALUE2[^VALUE3...]"
648 )
650 dataIdArgument = DataIdArgument(
651 name=argName,
652 datasetType=datasetType,
653 level=level,
654 doMakeDataRefList=doMakeDataRefList,
655 ContainerClass=ContainerClass,
656 )
658 if dataIdArgument.isDynamicDatasetType:
659 datasetType.addArgument(parser=self, idName=argName)
661 self._dataIdArgDict[argName] = dataIdArgument
663 def parse_args(self, config, args=None, log=None, override=None):
664 """Parse arguments for a command-line task.
666 Parameters
667 ----------
668 config : `lsst.pex.config.Config`
669 Config for the task being run.
670 args : `list`, optional
671 Argument list; if `None` then ``sys.argv[1:]`` is used.
672 log : `lsst.log.Log` or `logging.Logger`, optional
673 Logger instance; if `None` use the default log.
674 override : callable, optional
675 A config override function. It must take the root config object
676 as its only argument and must modify the config in place.
677 This function is called after camera-specific overrides files
678 are applied, and before command-line config overrides
679 are applied (thus allowing the user the final word).
681 Returns
682 -------
683 namespace : `argparse.Namespace`
684 A `~argparse.Namespace` instance containing fields:
686 - ``camera``: camera name.
687 - ``config``: the supplied config with all overrides applied,
688 validated and frozen.
689 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
690 - An entry for each of the data ID arguments registered by
691 `add_id_argument`, of the type passed to its ``ContainerClass``
692 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
693 includes public elements ``idList`` and ``refList``.
694 - ``log``: a `lsst.pipe.base.TaskLogAdapter` log.
695 - An entry for each command-line argument,
696 with the following exceptions:
698 - config is the supplied config, suitably updated.
699 - configfile, id and loglevel are all missing.
700 - ``obsPkg``: name of the ``obs_`` package for this camera.
701 """
702 if args is None:
703 args = sys.argv[1:]
705 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
706 self.print_help()
707 if len(args) == 1 and args[0] in ("-h", "--help"):
708 self.exit()
709 else:
710 self.exit(f"{self.prog}: error: Must specify input as first argument")
712 # Note that --rerun may change namespace.input, but if it does
713 # we verify that the new input has the same mapper class.
714 namespace = argparse.Namespace()
715 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
716 if not os.path.isdir(namespace.input):
717 self.error(f"Error: input={namespace.input!r} not found")
719 namespace.config = config
720 # Ensure that the external logger is converted to the expected
721 # logger class.
722 namespace.log = (
723 lsst.utils.logging.getLogger(log.name) if log is not None else lsst.utils.logging.getLogger()
724 )
725 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
726 if mapperClass is None:
727 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
729 namespace.camera = mapperClass.getCameraName()
730 namespace.obsPkg = mapperClass.getPackageName()
732 self.handleCamera(namespace)
734 self._applyInitialOverrides(namespace)
735 if override is not None:
736 override(namespace.config)
738 # Add data ID containers to namespace
739 for dataIdArgument in self._dataIdArgDict.values():
740 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
742 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
743 del namespace.configfile
745 self._parseDirectories(namespace)
747 if namespace.clobberOutput:
748 if namespace.output is None:
749 self.error("--clobber-output is only valid with --output or --rerun")
750 elif namespace.output == namespace.input:
751 self.error("--clobber-output is not valid when the output and input repos are the same")
752 if os.path.exists(namespace.output):
753 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
754 shutil.rmtree(namespace.output)
756 namespace.log.debug("input=%s", namespace.input)
757 namespace.log.debug("calib=%s", namespace.calib)
758 namespace.log.debug("output=%s", namespace.output)
760 obeyShowArgument(namespace.show, namespace.config, exit=False)
762 # No environment variable or --output or --rerun specified.
763 if self.requireOutput and namespace.output is None and namespace.rerun is None:
764 self.error(
765 "no output directory specified.\n"
766 "An output directory must be specified with the --output or --rerun\n"
767 "command-line arguments.\n"
768 )
770 butlerArgs = {} # common arguments for butler elements
771 if namespace.calib:
772 butlerArgs = {"mapperArgs": {"calibRoot": namespace.calib}}
773 if namespace.output:
774 outputs = {"root": namespace.output, "mode": "rw"}
775 inputs = {"root": namespace.input}
776 inputs.update(butlerArgs)
777 outputs.update(butlerArgs)
778 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
779 else:
780 outputs = {"root": namespace.input, "mode": "rw"}
781 outputs.update(butlerArgs)
782 namespace.butler = dafPersist.Butler(outputs=outputs)
784 # convert data in each of the identifier lists to proper types
785 # this is done after constructing the butler,
786 # hence after parsing the command line,
787 # because it takes a long time to construct a butler
788 self._processDataIds(namespace)
789 if "data" in namespace.show:
790 for dataIdName in self._dataIdArgDict.keys():
791 for dataRef in getattr(namespace, dataIdName).refList:
792 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
794 if namespace.show and "run" not in namespace.show:
795 sys.exit(0)
797 if namespace.debug:
798 try:
799 import debug # type: ignore
801 assert debug # silence pyflakes (above silences mypy)
802 except ImportError:
803 print("Warning: no 'debug' module found", file=sys.stderr)
804 namespace.debug = False
806 del namespace.loglevel
807 del namespace.longlog
809 namespace.config.validate()
810 namespace.config.freeze()
812 return namespace
814 def _parseDirectories(self, namespace):
815 """Parse input, output and calib directories
817 This allows for hacking the directories, e.g., to include a
818 "rerun".
819 Modifications are made to the 'namespace' object in-place.
820 """
821 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
822 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
824 # If an output directory is specified, process it and assign it to the
825 # namespace
826 if namespace.rawOutput:
827 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
828 else:
829 namespace.output = None
831 # This section processes the rerun argument.
832 # If rerun is specified as a colon separated value,
833 # it will be parsed as an input and output.
834 # The input value will be overridden if previously specified
835 # (but a check is made to make sure both inputs use
836 # the same mapper)
837 if namespace.rawRerun:
838 if namespace.output:
839 self.error("Error: cannot specify both --output and --rerun")
840 namespace.rerun = namespace.rawRerun.split(":")
841 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
842 modifiedInput = False
843 if len(rerunDir) == 2:
844 namespace.input, namespace.output = rerunDir
845 modifiedInput = True
846 elif len(rerunDir) == 1:
847 namespace.output = rerunDir[0]
848 if os.path.exists(os.path.join(namespace.output, "_parent")):
849 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
850 modifiedInput = True
851 else:
852 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
853 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
854 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
855 else:
856 namespace.rerun = None
857 del namespace.rawInput
858 del namespace.rawCalib
859 del namespace.rawOutput
860 del namespace.rawRerun
862 def _processDataIds(self, namespace):
863 """Process the parsed data for each data ID argument in an
864 `~argparse.Namespace`.
866 Processing includes:
868 - Validate data ID keys.
869 - Cast the data ID values to the correct type.
870 - Compute data references from data IDs.
872 Parameters
873 ----------
874 namespace : `argparse.Namespace`
875 Parsed namespace. These attributes are read:
877 - ``butler``
878 - ``log``
879 - ``config``, if any dynamic dataset types are set by
880 a config parameter.
881 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
882 dataset types are specified by such
884 These attributes are modified:
886 - ``<name>`` for each data ID argument registered using
887 `add_id_argument` with name ``<name>``.
888 """
889 for dataIdArgument in self._dataIdArgDict.values():
890 dataIdContainer = getattr(namespace, dataIdArgument.name)
891 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
892 if dataIdArgument.doMakeDataRefList:
893 try:
894 dataIdContainer.castDataIds(butler=namespace.butler)
895 except (KeyError, TypeError) as e:
896 # failure of castDataIds indicates invalid command args
897 self.error(e)
899 # failure of makeDataRefList indicates a bug
900 # that wants a traceback
901 dataIdContainer.makeDataRefList(namespace)
903 def _applyInitialOverrides(self, namespace):
904 """Apply obs-package-specific and camera-specific config
905 override files, if found
907 Parameters
908 ----------
909 namespace : `argparse.Namespace`
910 Parsed namespace. These attributes are read:
912 - ``obsPkg``
914 Look in the package namespace.obsPkg for files:
916 - ``config/<task_name>.py``
917 - ``config/<camera_name>/<task_name>.py`` and load if found.
918 """
919 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
920 fileName = self._name + ".py"
921 for filePath in (
922 os.path.join(obsPkgDir, "config", fileName),
923 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
924 ):
925 if os.path.exists(filePath):
926 namespace.log.info("Loading config overrride file %r", filePath)
927 namespace.config.load(filePath)
928 else:
929 namespace.log.debug("Config override file does not exist: %r", filePath)
931 def handleCamera(self, namespace):
932 """Perform camera-specific operations before parsing the command-line.
934 Parameters
935 ----------
936 namespace : `argparse.Namespace`
937 Namespace (an ) with the following fields:
939 - ``camera``: the camera name.
940 - ``config``: the config passed to parse_args, with no overrides
941 applied.
942 - ``obsPkg``: the ``obs_`` package for this camera.
943 - ``log``: a `lsst.pipe.base.TaskLogAdapter` Log.
945 Notes
946 -----
947 The default implementation does nothing.
948 """
949 pass
951 def convert_arg_line_to_args(self, arg_line):
952 """Allow files of arguments referenced by ``@<path>`` to contain
953 multiple values on each line.
955 Parameters
956 ----------
957 arg_line : `str`
958 Line of text read from an argument file.
959 """
960 arg_line = arg_line.strip()
961 if not arg_line or arg_line.startswith("#"):
962 return
963 for arg in shlex.split(arg_line, comments=True, posix=True):
964 if not arg.strip():
965 continue
966 yield arg
968 def addReuseOption(self, choices):
969 """Add a "--reuse-outputs-from SUBTASK" option to the argument
970 parser.
972 CmdLineTasks that can be restarted at an intermediate step using
973 outputs from earlier (but still internal) steps should use this
974 method to allow the user to control whether that happens when
975 outputs from earlier steps are present.
977 Parameters
978 ----------
979 choices : sequence
980 A sequence of string names (by convention, top-level subtasks)
981 that identify the steps that could be skipped when their
982 outputs are already present. The list is ordered, so when the
983 user specifies one step on the command line, all previous steps
984 may be skipped as well. In addition to the choices provided,
985 users may pass "all" to indicate that all steps may be thus
986 skipped.
988 When this method is called, the ``namespace`` object returned by
989 ``parse_args`` will contain a ``reuse`` attribute containing
990 a list of all steps that should be skipped if their outputs
991 are already present.
992 If no steps should be skipped, the ``reuse`` will be an empty list.
993 """
994 choices = list(choices)
995 choices.append("all")
996 self.add_argument(
997 "--reuse-outputs-from",
998 dest="reuse",
999 choices=choices,
1000 default=[],
1001 action=ReuseAction,
1002 help=(
1003 "Skip the given subtask and its predecessors and reuse their outputs "
1004 "if those outputs already exist. Use 'all' to specify all subtasks."
1005 ),
1006 )
1009class InputOnlyArgumentParser(ArgumentParser):
1010 """`ArgumentParser` for command-line tasks that don't write any output."""
1012 requireOutput = False # We're not going to write anything
1015def getTaskDict(config, taskDict=None, baseName=""):
1016 """Get a dictionary of task info for all subtasks in a config
1018 Parameters
1019 ----------
1020 config : `lsst.pex.config.Config`
1021 Configuration to process.
1022 taskDict : `dict`, optional
1023 Users should not specify this argument. Supports recursion.
1024 If provided, taskDict is updated in place, else a new `dict`
1025 is started.
1026 baseName : `str`, optional
1027 Users should not specify this argument. It is only used for
1028 recursion: if a non-empty string then a period is appended
1029 and the result is used as a prefix for additional entries
1030 in taskDict; otherwise no prefix is used.
1032 Returns
1033 -------
1034 taskDict : `dict`
1035 Keys are config field names, values are task names.
1037 Notes
1038 -----
1039 This function is designed to be called recursively.
1040 The user should call with only a config (leaving taskDict and baseName
1041 at their default values).
1042 """
1043 if taskDict is None:
1044 taskDict = dict()
1045 for fieldName, field in config.items():
1046 if hasattr(field, "value") and hasattr(field, "target"):
1047 subConfig = field.value
1048 if isinstance(subConfig, pexConfig.Config):
1049 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
1050 try:
1051 taskName = f"{field.target.__module__}.{field.target.__name__}"
1052 except Exception:
1053 taskName = repr(field.target)
1054 taskDict[subBaseName] = taskName
1055 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
1056 return taskDict
1059def obeyShowArgument(showOpts, config=None, exit=False):
1060 """Process arguments specified with ``--show`` (but ignores
1061 ``"data"``).
1063 Parameters
1064 ----------
1065 showOpts : `list` of `str`
1066 List of options passed to ``--show``.
1067 config : optional
1068 The provided config.
1069 exit : bool, optional
1070 Exit if ``"run"`` isn't included in ``showOpts``.
1072 Parameters
1073 ----------
1074 Supports the following options in showOpts:
1076 - ``config[=PAT]``. Dump all the config entries, or just the ones that
1077 match the glob pattern.
1078 - ``history=PAT``. Show where the config entries that match the glob
1079 pattern were set.
1080 - ``tasks``. Show task hierarchy.
1081 - ``data``. Ignored; to be processed by caller.
1082 - ``run``. Keep going (the default behaviour is to exit if
1083 ``--show`` is specified).
1085 Calls ``sys.exit(1)`` if any other option found.
1086 """
1087 if not showOpts:
1088 return
1090 for what in showOpts:
1091 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
1093 if showCommand == "config":
1094 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1095 pattern = matConfig.group(1)
1096 if pattern:
1098 class FilteredStream:
1099 """A file object that only prints lines
1100 that match the glob "pattern".
1102 N.b. Newlines are silently discarded and reinserted;
1103 crude but effective.
1104 """
1106 def __init__(self, pattern):
1107 # obey case if pattern isn't lowecase or requests
1108 # NOIGNORECASE
1109 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1111 if mat:
1112 pattern = mat.group(1)
1113 self._pattern = re.compile(fnmatch.translate(pattern))
1114 else:
1115 if pattern != pattern.lower():
1116 print(
1117 f"Matching {pattern!r} without regard to case "
1118 "(append :NOIGNORECASE to prevent this)",
1119 file=sys.stdout,
1120 )
1121 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1123 def write(self, showStr):
1124 showStr = showStr.rstrip()
1125 # Strip off doc string line(s) and cut off
1126 # at "=" for string matching
1127 matchStr = showStr.split("\n")[-1].split("=")[0]
1128 if self._pattern.search(matchStr):
1129 print("\n" + showStr)
1131 fd = FilteredStream(pattern)
1132 else:
1133 fd = sys.stdout
1135 config.saveToStream(fd, "config")
1136 elif showCommand == "history":
1137 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1138 globPattern = matHistory.group(1)
1139 if not globPattern:
1140 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1141 sys.exit(1)
1143 error = False
1144 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1145 if i > 0:
1146 print("")
1148 pattern = pattern.split(".")
1149 cpath, cname = pattern[:-1], pattern[-1]
1150 hconfig = config # the config that we're interested in
1151 for i, cpt in enumerate(cpath):
1152 try:
1153 hconfig = getattr(hconfig, cpt)
1154 except AttributeError:
1155 config_path = ".".join(["config"] + cpath[:i])
1156 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1157 error = True
1159 try:
1160 print(pexConfig.history.format(hconfig, cname))
1161 except KeyError:
1162 config_path = ".".join(["config"] + cpath)
1163 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1164 error = True
1166 if error:
1167 sys.exit(1)
1169 elif showCommand == "data":
1170 pass
1171 elif showCommand == "run":
1172 pass
1173 elif showCommand == "tasks":
1174 showTaskHierarchy(config)
1175 else:
1176 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1177 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1178 sys.exit(1)
1180 if exit and "run" not in showOpts:
1181 sys.exit(0)
1184def showTaskHierarchy(config):
1185 """Print task hierarchy to stdout.
1187 Parameters
1188 ----------
1189 config : `lsst.pex.config.Config`
1190 Configuration to process.
1191 """
1192 print("Subtasks:")
1193 taskDict = getTaskDict(config=config)
1195 fieldNameList = sorted(taskDict.keys())
1196 for fieldName in fieldNameList:
1197 taskName = taskDict[fieldName]
1198 print(f"{fieldName}: {taskName}")
1201class ConfigValueAction(argparse.Action):
1202 """argparse action callback to override config parameters using
1203 name=value pairs from the command-line.
1204 """
1206 def __call__(self, parser, namespace, values, option_string):
1207 """Override one or more config name value pairs.
1209 Parameters
1210 ----------
1211 parser : `argparse.ArgumentParser`
1212 Argument parser.
1213 namespace : `argparse.Namespace`
1214 Parsed command. The ``namespace.config`` attribute is updated.
1215 values : `list`
1216 A list of ``configItemName=value`` pairs.
1217 option_string : `str`
1218 Option value specified by the user.
1219 """
1220 if namespace.config is None:
1221 return
1222 for nameValue in values:
1223 name, sep, valueStr = nameValue.partition("=")
1224 if not valueStr:
1225 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1227 # see if setting the string value works; if not, try eval
1228 try:
1229 setDottedAttr(namespace.config, name, valueStr)
1230 except AttributeError:
1231 parser.error(f"no config field: {name}")
1232 except Exception:
1233 try:
1234 value = eval(valueStr, {})
1235 except Exception:
1236 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1237 try:
1238 setDottedAttr(namespace.config, name, value)
1239 except Exception as e:
1240 parser.error(f"cannot set config.{name}={value!r}: {e}")
1243class ConfigFileAction(argparse.Action):
1244 """argparse action to load config overrides from one or more files."""
1246 def __call__(self, parser, namespace, values, option_string=None):
1247 """Load one or more files of config overrides.
1249 Parameters
1250 ----------
1251 parser : `argparse.ArgumentParser`
1252 Argument parser.
1253 namespace : `argparse.Namespace`
1254 Parsed command. The following attributes are updated by this
1255 method: ``namespace.config``.
1256 values : `list`
1257 A list of data config file paths.
1258 option_string : `str`, optional
1259 Option value specified by the user.
1260 """
1261 if namespace.config is None:
1262 return
1263 for configfile in values:
1264 try:
1265 namespace.config.load(configfile)
1266 except Exception as e:
1267 parser.error(f"cannot load config file {configfile!r}: {e}")
1270class IdValueAction(argparse.Action):
1271 """argparse action callback to process a data ID into a dict."""
1273 def __call__(self, parser, namespace, values, option_string):
1274 """Parse ``--id`` data and append results to
1275 ``namespace.<argument>.idList``.
1277 Parameters
1278 ----------
1279 parser : `ArgumentParser`
1280 Argument parser.
1281 namespace : `argparse.Namespace`
1282 Parsed command (an instance of argparse.Namespace).
1283 The following attributes are updated:
1285 - ``<idName>.idList``, where ``<idName>`` is the name of the
1286 ID argument, for instance ``"id"`` for ID argument ``--id``.
1287 values : `list`
1288 A list of data IDs; see Notes below.
1289 option_string : `str`
1290 Option value specified by the user.
1292 Notes
1293 -----
1294 The data format is::
1296 key1=value1_1[^value1_2[^value1_3...]
1297 key2=value2_1[^value2_2[^value2_3...]...
1299 The values (e.g. ``value1_1``) may either be a string,
1300 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1301 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1302 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1303 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1305 The cross product is computed for keys with multiple values.
1306 For example::
1308 --id visit 1^2 ccd 1,1^2,2
1310 results in the following data ID dicts being appended to
1311 ``namespace.<argument>.idList``:
1313 {"visit":1, "ccd":"1,1"}
1314 {"visit":2, "ccd":"1,1"}
1315 {"visit":1, "ccd":"2,2"}
1316 {"visit":2, "ccd":"2,2"}
1317 """
1318 if namespace.config is None:
1319 return
1320 idDict = collections.OrderedDict()
1321 for nameValue in values:
1322 name, sep, valueStr = nameValue.partition("=")
1323 if name in idDict:
1324 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1325 idDict[name] = []
1326 for v in valueStr.split("^"):
1327 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1328 if mat:
1329 v1 = int(mat.group(1))
1330 v2 = int(mat.group(2))
1331 v3 = mat.group(3)
1332 v3 = int(v3) if v3 else 1
1333 for v in range(v1, v2 + 1, v3):
1334 idDict[name].append(str(v))
1335 else:
1336 idDict[name].append(v)
1338 iterList = [idDict[key] for key in idDict.keys()]
1339 idDictList = [
1340 collections.OrderedDict(zip(idDict.keys(), valList)) for valList in itertools.product(*iterList)
1341 ]
1343 argName = option_string.lstrip("-")
1344 ident = getattr(namespace, argName)
1345 ident.idList += idDictList
1348class LongLogAction(argparse.Action):
1349 """argparse action to make logs verbose.
1351 An action so that it can take effect before log level options.
1352 """
1354 def __call__(self, parser, namespace, values, option_string):
1355 """Set long log.
1357 Parameters
1358 ----------
1359 parser : `ArgumentParser`
1360 Argument parser.
1361 namespace : `argparse.Namespace`
1362 Parsed command. This argument is not used.
1363 values : `list`
1364 Unused.
1365 option_string : `str`
1366 Option value specified by the user (unused).
1367 """
1368 lsstLog.configure_prop(
1369 """
1370log4j.rootLogger=INFO, A1
1371log4j.appender.A1=ConsoleAppender
1372log4j.appender.A1.Target=System.out
1373log4j.appender.A1.layout=PatternLayout
1374log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
1375"""
1376 )
1379class LogLevelAction(argparse.Action):
1380 """argparse action to set log level."""
1382 def __call__(self, parser, namespace, values, option_string):
1383 """Set trace level.
1385 Parameters
1386 ----------
1387 parser : `ArgumentParser`
1388 Argument parser.
1389 namespace : `argparse.Namespace`
1390 Parsed command. This argument is not used.
1391 values : `list`
1392 List of trace levels; each item must be of the form
1393 ``component_name=level`` or ``level``, where ``level``
1394 is a keyword (not case sensitive) or an integer.
1395 option_string : `str`
1396 Option value specified by the user.
1397 """
1398 permittedLevelList = ("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL")
1399 permittedLevelSet = set(permittedLevelList)
1400 for componentLevel in values:
1401 component, sep, levelStr = componentLevel.partition("=")
1402 if not levelStr:
1403 levelStr, component = component, None
1404 logLevelUpr = levelStr.upper()
1406 if component is None:
1407 logger = namespace.log
1408 else:
1409 logger = lsst.utils.logging.getLogger(component)
1411 if logLevelUpr in permittedLevelSet:
1412 logLevel = getattr(logger, logLevelUpr)
1413 else:
1414 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1416 logger.setLevel(logLevel)
1418 # Set logging level for whatever logger this wasn't.
1419 if isinstance(logger, lsstLog.Log):
1420 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1421 logging.getLogger(component or None).setLevel(pyLevel)
1422 else:
1423 # Need to set lsstLog level
1424 lsstLogLevel = lsstLog.LevelTranslator.logging2lsstLog(logLevel)
1425 lsstLog.getLogger(component or "").setLevel(lsstLogLevel)
1428class ReuseAction(argparse.Action):
1429 """argparse action associated with ArgumentPraser.addReuseOption."""
1431 def __call__(self, parser, namespace, value, option_string):
1432 if value == "all":
1433 value = self.choices[-2]
1434 index = self.choices.index(value)
1435 namespace.reuse = self.choices[: index + 1]
1438def setDottedAttr(item, name, value):
1439 """Set an instance attribute (like `setattr` but accepting
1440 hierarchical names such as ``foo.bar.baz``).
1442 Parameters
1443 ----------
1444 item : obj
1445 Object whose attribute is to be set.
1446 name : `str`
1447 Name of attribute to set.
1448 value : obj
1449 New value for the attribute.
1451 Notes
1452 -----
1453 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1454 is set to the specified value.
1455 """
1456 subitem = item
1457 subnameList = name.split(".")
1458 for subname in subnameList[:-1]:
1459 subitem = getattr(subitem, subname)
1460 setattr(subitem, subnameList[-1], value)
1463def getDottedAttr(item, name):
1464 """Get an attribute (like `getattr` but accepts hierarchical names
1465 such as ``foo.bar.baz``).
1467 Parameters
1468 ----------
1469 item : obj
1470 Object whose attribute is to be returned.
1471 name : `str`
1472 Name of the attribute to get.
1474 Returns
1475 -------
1476 itemAttr : obj
1477 If name is ``foo.bar.baz then the return value is
1478 ``item.foo.bar.baz``.
1479 """
1480 subitem = item
1481 for subname in name.split("."):
1482 subitem = getattr(subitem, subname)
1483 return subitem