Coverage for python/lsst/pipe/base/argumentParser.py: 14%
487 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-09 06:14 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-09 06:14 -0700
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = [
23 "ArgumentParser",
24 "ConfigFileAction",
25 "ConfigValueAction",
26 "DataIdContainer",
27 "DatasetArgument",
28 "ConfigDatasetType",
29 "InputOnlyArgumentParser",
30]
32import abc
33import argparse
34import collections
35import fnmatch
36import itertools
37import logging
38import os
39import re
40import shlex
41import shutil
42import sys
43import textwrap
45import lsst.daf.persistence as dafPersist
46import lsst.log as lsstLog
47import lsst.pex.config as pexConfig
48import lsst.pex.config.history
49import lsst.utils
50import lsst.utils.logging
52DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
53DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
54DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
57def _fixPath(defName, path):
58 """Apply environment variable as default root, if present, and abspath.
60 Parameters
61 ----------
62 defName : `str`
63 Name of environment variable containing default root path;
64 if the environment variable does not exist
65 then the path is relative to the current working directory
66 path : `str`
67 Path relative to default root path.
69 Returns
70 -------
71 abspath : `str`
72 Path that has been expanded, or `None` if the environment variable
73 does not exist and path is `None`.
74 """
75 defRoot = os.environ.get(defName)
76 if defRoot is None:
77 if path is None:
78 return None
79 return os.path.abspath(path)
80 return os.path.abspath(os.path.join(defRoot, path or ""))
83class DataIdContainer:
84 """Container for data IDs and associated data references.
86 Parameters
87 ----------
88 level : `str`
89 The lowest hierarchy level to descend to for this dataset type,
90 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
91 Use `""` to use the mapper's default for the dataset type.
92 This class does not support `None`, but if it did, `None`
93 would mean the level should not be restricted.
95 Notes
96 -----
97 Override this class for data IDs that require special handling to be
98 converted to ``data references``, and specify the override class
99 as ``ContainerClass`` for ``add_id_argument``.
101 If you don't want the argument parser to compute data references,
102 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
103 """
105 def __init__(self, level=None):
106 self.datasetType = None
107 """Dataset type of the data references (`str`).
108 """
109 self.level = level
110 """See parameter ``level`` (`str`).
111 """
112 self.idList = []
113 """List of data IDs specified on the command line for the
114 appropriate data ID argument (`list` of `dict`).
115 """
116 self.refList = []
117 """List of data references for the data IDs in ``idList``
118 (`list` of `lsst.daf.persistence.ButlerDataRef`).
119 Elements will be omitted if the corresponding data is not found.
120 The list will be empty when returned by ``parse_args`` if
121 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
122 """
124 def setDatasetType(self, datasetType):
125 """Set actual dataset type, once it is known.
127 Parameters
128 ----------
129 datasetType : `str`
130 Dataset type.
132 Notes
133 -----
134 The reason ``datasetType`` is not a constructor argument is that
135 some subclasses do not know the dataset type until the command
136 is parsed. Thus, to reduce special cases in the code,
137 ``datasetType`` is always set after the command is parsed.
138 """
139 self.datasetType = datasetType
141 def castDataIds(self, butler):
142 """Validate data IDs and cast them to the correct type
143 (modify idList in place).
145 This code casts the values in the data IDs dicts in `dataIdList`
146 to the type required by the butler. Data IDs are read from the
147 command line as `str`, but the butler requires some values to be
148 other types. For example "visit" values should be `int`.
150 Parameters
151 ----------
152 butler : `lsst.daf.persistence.Butler`
153 Data butler.
154 """
155 if self.datasetType is None:
156 raise RuntimeError("Must call setDatasetType first")
157 try:
158 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
159 except KeyError as e:
160 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
161 raise KeyError(msg) from e
163 for dataDict in self.idList:
164 for key, strVal in dataDict.items():
165 try:
166 keyType = idKeyTypeDict[key]
167 except KeyError:
168 # OK, assume that it's a valid key and guess that it's a
169 # string
170 keyType = str
172 log = lsst.utils.logging.getLogger()
173 log.warning(
174 'Unexpected ID %s; guessing type is "%s"', key, "str" if keyType == str else keyType
175 )
176 idKeyTypeDict[key] = keyType
178 if keyType != str:
179 try:
180 castVal = keyType(strVal)
181 except Exception:
182 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
183 dataDict[key] = castVal
185 def makeDataRefList(self, namespace):
186 """Compute refList based on idList.
188 Parameters
189 ----------
190 namespace : `argparse.Namespace`
191 Results of parsing command-line. The ``butler`` and ``log``
192 elements must be set.
194 Notes
195 -----
196 Not called if ``add_id_argument`` was called with
197 ``doMakeDataRefList=False``.
198 """
199 if self.datasetType is None:
200 raise RuntimeError("Must call setDatasetType first")
201 butler = namespace.butler
202 for dataId in self.idList:
203 refList = dafPersist.searchDataRefs(
204 butler, datasetType=self.datasetType, level=self.level, dataId=dataId
205 )
206 if not refList:
207 namespace.log.warning("No data found for dataId=%s", dataId)
208 continue
209 self.refList += refList
212class DataIdArgument:
213 """data ID argument, used by `ArgumentParser.add_id_argument`.
215 Parameters
216 ----------
217 name : `str`
218 Name of identifier (argument name without dashes).
219 datasetType : `str`
220 Type of dataset; specify a string for a fixed dataset type
221 or a `DatasetArgument` for a dynamic dataset type (e.g.
222 one specified by a command-line argument).
223 level : `str`
224 The lowest hierarchy level to descend to for this dataset type,
225 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
226 Use `""` to use the mapper's default for the dataset type.
227 Some container classes may also support `None`, which means
228 the level should not be restricted; however the default class,
229 `DataIdContainer`, does not support `None`.
230 doMakeDataRefList : `bool`, optional
231 If `True` (default), construct data references.
232 ContainerClass : `class`, optional
233 Class to contain data IDs and data references; the default class
234 `DataIdContainer` will work for many, but not all, cases.
235 For example if the dataset type is specified on the command line
236 then use `DynamicDatasetType`.
237 """
239 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
240 if name.startswith("-"):
241 raise RuntimeError(f"Name {name} must not start with -")
242 self.name = name
243 self.datasetType = datasetType
244 self.level = level
245 self.doMakeDataRefList = bool(doMakeDataRefList)
246 self.ContainerClass = ContainerClass
247 self.argName = name.lstrip("-")
249 @property
250 def isDynamicDatasetType(self):
251 """`True` if the dataset type is dynamic (that is, specified
252 on the command line).
253 """
254 return isinstance(self.datasetType, DynamicDatasetType)
256 def getDatasetType(self, namespace):
257 """Get the dataset type as a string.
259 Parameters
260 ----------
261 namespace
262 Parsed command.
264 Returns
265 -------
266 datasetType : `str`
267 Dataset type.
268 """
269 if self.isDynamicDatasetType:
270 return self.datasetType.getDatasetType(namespace)
271 else:
272 return self.datasetType
275class DynamicDatasetType(metaclass=abc.ABCMeta):
276 """Abstract base class for a dataset type determined from parsed
277 command-line arguments.
278 """
280 def addArgument(self, parser, idName):
281 """Add a command-line argument to specify dataset type name,
282 if wanted.
284 Parameters
285 ----------
286 parser : `ArgumentParser`
287 Argument parser to add the argument to.
288 idName : `str`
289 Name of data ID argument, without the leading ``"--"``,
290 e.g. ``"id"``.
292 Notes
293 -----
294 The default implementation does nothing
295 """
296 pass
298 @abc.abstractmethod
299 def getDatasetType(self, namespace):
300 """Get the dataset type as a string, based on parsed command-line
301 arguments.
303 Returns
304 -------
305 datasetType : `str`
306 Dataset type.
307 """
308 raise NotImplementedError("Subclasses must override")
311class DatasetArgument(DynamicDatasetType):
312 """Dataset type specified by a command-line argument.
314 Parameters
315 ----------
316 name : `str`, optional
317 Name of command-line argument (including leading "--",
318 if appropriate) whose value is the dataset type.
319 If `None`, uses ``--idName_dstype`` where idName
320 is the name of the data ID argument (e.g. "id").
321 help : `str`, optional
322 Help string for the command-line argument.
323 default : `object`, optional
324 Default value. If `None`, then the command-line option is required.
325 This argument isignored if the command-line argument is positional
326 (name does not start with "-") because positional arguments do
327 not support default values.
328 """
330 def __init__(
331 self,
332 name=None,
333 help="dataset type to process from input data repository",
334 default=None,
335 ):
336 DynamicDatasetType.__init__(self)
337 self.name = name
338 self.help = help
339 self.default = default
341 def getDatasetType(self, namespace):
342 """Get the dataset type as a string, from the appropriate
343 command-line argument.
345 Parameters
346 ----------
347 namespace :
348 Parsed command.
350 Returns
351 -------
352 datasetType : `str`
353 Dataset type.
354 """
355 argName = self.name.lstrip("-")
356 return getattr(namespace, argName)
358 def addArgument(self, parser, idName):
359 """Add a command-line argument to specify the dataset type name.
361 Parameters
362 ----------
363 parser : `ArgumentParser`
364 Argument parser.
365 idName : `str`
366 Data ID.
368 Notes
369 -----
370 Also sets the `name` attribute if it is currently `None`.
371 """
372 help = self.help if self.help else f"dataset type for {idName}"
373 if self.name is None:
374 self.name = f"--{idName}_dstype"
375 requiredDict = dict()
376 if self.name.startswith("-"):
377 requiredDict = dict(required=self.default is None)
378 parser.add_argument(self.name, default=self.default, help=help, **requiredDict)
381class ConfigDatasetType(DynamicDatasetType):
382 """Dataset type specified by a config parameter.
384 Parameters
385 ----------
386 name : `str`
387 Name of config option whose value is the dataset type.
388 """
390 def __init__(self, name):
391 DynamicDatasetType.__init__(self)
392 self.name = name
394 def getDatasetType(self, namespace):
395 """Return the dataset type as a string, from the appropriate
396 config field.
398 Parameters
399 ----------
400 namespace : `argparse.Namespace`
401 Parsed command.
402 """
403 # getattr does not work reliably if the config field name is
404 # dotted, so step through one level at a time
405 keyList = self.name.split(".")
406 value = namespace.config
407 for key in keyList:
408 try:
409 value = getattr(value, key)
410 except KeyError:
411 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
412 return value
415class ArgumentParser(argparse.ArgumentParser):
416 """Argument parser for command-line tasks that is based on
417 `argparse.ArgumentParser`.
419 Parameters
420 ----------
421 name : `str`
422 Name of top-level task; used to identify camera-specific override
423 files.
424 usage : `str`, optional
425 Command-line usage signature.
426 **kwargs
427 Additional keyword arguments for `argparse.ArgumentParser`.
429 Notes
430 -----
431 Users may wish to add additional arguments before calling `parse_args`.
432 """
434 # I would prefer to check data ID keys and values as they are parsed,
435 # but the required information comes from the butler, so I have to
436 # construct a butler before I do this checking. Constructing a butler
437 # is slow, so I only want do it once, after parsing the command line,
438 # so as to catch syntax errors quickly.
440 requireOutput = True
441 """Require an output directory to be specified (`bool`)."""
443 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
444 self._name = name
445 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
446 argparse.ArgumentParser.__init__(
447 self,
448 usage=usage,
449 fromfile_prefix_chars="@",
450 epilog=textwrap.dedent(
451 """Notes:
452 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
453 all values are used, in order left to right
454 * @file reads command-line options from the specified file:
455 * data may be distributed among multiple lines (e.g. one option per line)
456 * data after # is treated as a comment and ignored
457 * blank lines and lines starting with # are ignored
458 * To specify multiple values for an option, do not use = after the option name:
459 * right: --config-file foo bar
460 * wrong: --config-file=foo bar
461 """
462 ),
463 formatter_class=argparse.RawDescriptionHelpFormatter,
464 **kwargs,
465 )
466 self.add_argument(
467 metavar="input",
468 dest="rawInput",
469 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}",
470 )
471 self.add_argument(
472 "--calib",
473 dest="rawCalib",
474 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}",
475 )
476 self.add_argument(
477 "--output",
478 dest="rawOutput",
479 help=f"path to output data repository (need not exist), relative to ${DEFAULT_OUTPUT_NAME}",
480 )
481 self.add_argument(
482 "--rerun",
483 dest="rawRerun",
484 metavar="[INPUT:]OUTPUT",
485 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; optionally sets ROOT to ROOT/rerun/INPUT",
486 )
487 self.add_argument(
488 "-c",
489 "--config",
490 nargs="*",
491 action=ConfigValueAction,
492 help="config override(s), e.g. -c foo=newfoo bar.baz=3",
493 metavar="NAME=VALUE",
494 )
495 self.add_argument(
496 "-C",
497 "--config-file",
498 "--configfile",
499 dest="configfile",
500 nargs="*",
501 action=ConfigFileAction,
502 help="config override file(s)",
503 )
504 self.add_argument(
505 "-L",
506 "--loglevel",
507 nargs="*",
508 action=LogLevelAction,
509 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
510 metavar="LEVEL|COMPONENT=LEVEL",
511 )
512 self.add_argument(
513 "--longlog",
514 nargs=0,
515 action=LongLogAction,
516 help="use a more verbose format for the logging",
517 )
518 self.add_argument("--debug", action="store_true", help="enable debugging output?")
519 self.add_argument(
520 "--doraise",
521 action="store_true",
522 help="raise an exception on error (else log a message and continue)?",
523 )
524 self.add_argument(
525 "--noExit",
526 action="store_true",
527 help="Do not exit even upon failure (i.e. return a struct to the calling script)",
528 )
529 self.add_argument("--profile", help="Dump cProfile statistics to filename")
530 self.add_argument(
531 "--show",
532 nargs="+",
533 default=(),
534 help="display the specified information to stdout and quit "
535 "(unless run is specified); information is "
536 "(config[=PATTERN]|history=PATTERN|tasks|data|run)",
537 )
538 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
539 self.add_argument(
540 "-t", "--timeout", type=float, help="Timeout for multiprocessing; maximum wall time (sec)"
541 )
542 self.add_argument(
543 "--clobber-output",
544 action="store_true",
545 dest="clobberOutput",
546 default=False,
547 help=(
548 "remove and re-create the output directory if it already exists "
549 "(safe with -j, but not all other forms of parallel execution)"
550 ),
551 )
552 self.add_argument(
553 "--clobber-config",
554 action="store_true",
555 dest="clobberConfig",
556 default=False,
557 help=(
558 "backup and then overwrite existing config files instead of checking them "
559 "(safe with -j, but not all other forms of parallel execution)"
560 ),
561 )
562 self.add_argument(
563 "--no-backup-config",
564 action="store_true",
565 dest="noBackupConfig",
566 default=False,
567 help="Don't copy config to file~N backup.",
568 )
569 self.add_argument(
570 "--clobber-versions",
571 action="store_true",
572 dest="clobberVersions",
573 default=False,
574 help=(
575 "backup and then overwrite existing package versions instead of checking"
576 "them (safe with -j, but not all other forms of parallel execution)"
577 ),
578 )
579 self.add_argument(
580 "--no-versions",
581 action="store_true",
582 dest="noVersions",
583 default=False,
584 help="don't check package versions; useful for development",
585 )
586 lsstLog.configure_prop(
587 """
588log4j.rootLogger=INFO, A1
589log4j.appender.A1=ConsoleAppender
590log4j.appender.A1.Target=System.out
591log4j.appender.A1.layout=PatternLayout
592log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
593"""
594 )
596 # Forward all Python logging to lsst.log
597 lgr = logging.getLogger()
598 lgr.setLevel(logging.INFO) # same as in log4cxx config above
599 lgr.addHandler(lsstLog.LogHandler())
601 def add_id_argument(
602 self, name, datasetType, help, level=None, doMakeDataRefList=True, ContainerClass=DataIdContainer
603 ):
604 """Add a data ID argument.
607 Parameters
608 ----------
609 name : `str`
610 Data ID argument (including leading dashes, if wanted).
611 datasetType : `str` or `DynamicDatasetType`-type
612 Type of dataset. Supply a string for a fixed dataset type.
613 For a dynamically determined dataset type, supply
614 a `DynamicDatasetType`, such a `DatasetArgument`.
615 help : `str`
616 Help string for the argument.
617 level : `str`
618 The lowest hierarchy level to descend to for this dataset type,
619 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
620 Use `""` to use the mapper's default for the dataset type.
621 Some container classes may also support `None`, which means
622 the level should not be restricted; however the default class,
623 `DataIdContainer`, does not support `None`.
624 doMakeDataRefList : bool, optional
625 If `True` (default), construct data references.
626 ContainerClass : `class`, optional
627 Class to contain data IDs and data references; the default class
628 `DataIdContainer` will work for many, but not all, cases.
629 For example if the dataset type is specified on the command line
630 then use `DynamicDatasetType`.
632 Notes
633 -----
634 If ``datasetType`` is an instance of `DatasetArgument`,
635 then add a second argument to specify the dataset type.
637 The associated data is put into ``namespace.<dataIdArgument.name>``
638 as an instance of `ContainerClass`; the container includes fields:
640 - ``idList``: a list of data ID dicts.
641 - ``refList``: a list of `~lsst.daf.persistence.Butler`
642 data references (empty if ``doMakeDataRefList`` is `False`).
643 """
644 argName = name.lstrip("-")
646 if argName in self._dataIdArgDict:
647 raise RuntimeError(f"Data ID argument {name} already exists")
648 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
649 raise RuntimeError(f"Data ID argument {name} is a reserved name")
651 self.add_argument(
652 name, nargs="*", action=IdValueAction, help=help, metavar="KEY=VALUE1[^VALUE2[^VALUE3...]"
653 )
655 dataIdArgument = DataIdArgument(
656 name=argName,
657 datasetType=datasetType,
658 level=level,
659 doMakeDataRefList=doMakeDataRefList,
660 ContainerClass=ContainerClass,
661 )
663 if dataIdArgument.isDynamicDatasetType:
664 datasetType.addArgument(parser=self, idName=argName)
666 self._dataIdArgDict[argName] = dataIdArgument
668 def parse_args(self, config, args=None, log=None, override=None):
669 """Parse arguments for a command-line task.
671 Parameters
672 ----------
673 config : `lsst.pex.config.Config`
674 Config for the task being run.
675 args : `list`, optional
676 Argument list; if `None` then ``sys.argv[1:]`` is used.
677 log : `lsst.log.Log` or `logging.Logger`, optional
678 Logger instance; if `None` use the default log.
679 override : callable, optional
680 A config override function. It must take the root config object
681 as its only argument and must modify the config in place.
682 This function is called after camera-specific overrides files
683 are applied, and before command-line config overrides
684 are applied (thus allowing the user the final word).
686 Returns
687 -------
688 namespace : `argparse.Namespace`
689 A `~argparse.Namespace` instance containing fields:
691 - ``camera``: camera name.
692 - ``config``: the supplied config with all overrides applied,
693 validated and frozen.
694 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
695 - An entry for each of the data ID arguments registered by
696 `add_id_argument`, of the type passed to its ``ContainerClass``
697 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
698 includes public elements ``idList`` and ``refList``.
699 - ``log``: a `lsst.pipe.base.TaskLogAdapter` log.
700 - An entry for each command-line argument,
701 with the following exceptions:
703 - config is the supplied config, suitably updated.
704 - configfile, id and loglevel are all missing.
705 - ``obsPkg``: name of the ``obs_`` package for this camera.
706 """
707 if args is None:
708 args = sys.argv[1:]
710 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
711 self.print_help()
712 if len(args) == 1 and args[0] in ("-h", "--help"):
713 self.exit()
714 else:
715 self.exit(f"{self.prog}: error: Must specify input as first argument")
717 # Note that --rerun may change namespace.input, but if it does
718 # we verify that the new input has the same mapper class.
719 namespace = argparse.Namespace()
720 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
721 if not os.path.isdir(namespace.input):
722 self.error(f"Error: input={namespace.input!r} not found")
724 namespace.config = config
725 # Ensure that the external logger is converted to the expected
726 # logger class.
727 namespace.log = (
728 lsst.utils.logging.getLogger(log.name) if log is not None else lsst.utils.logging.getLogger()
729 )
730 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
731 if mapperClass is None:
732 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
734 namespace.camera = mapperClass.getCameraName()
735 namespace.obsPkg = mapperClass.getPackageName()
737 self.handleCamera(namespace)
739 self._applyInitialOverrides(namespace)
740 if override is not None:
741 override(namespace.config)
743 # Add data ID containers to namespace
744 for dataIdArgument in self._dataIdArgDict.values():
745 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
747 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
748 del namespace.configfile
750 self._parseDirectories(namespace)
752 if namespace.clobberOutput:
753 if namespace.output is None:
754 self.error("--clobber-output is only valid with --output or --rerun")
755 elif namespace.output == namespace.input:
756 self.error("--clobber-output is not valid when the output and input repos are the same")
757 if os.path.exists(namespace.output):
758 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
759 shutil.rmtree(namespace.output)
761 namespace.log.debug("input=%s", namespace.input)
762 namespace.log.debug("calib=%s", namespace.calib)
763 namespace.log.debug("output=%s", namespace.output)
765 obeyShowArgument(namespace.show, namespace.config, exit=False)
767 # No environment variable or --output or --rerun specified.
768 if self.requireOutput and namespace.output is None and namespace.rerun is None:
769 self.error(
770 "no output directory specified.\n"
771 "An output directory must be specified with the --output or --rerun\n"
772 "command-line arguments.\n"
773 )
775 butlerArgs = {} # common arguments for butler elements
776 if namespace.calib:
777 butlerArgs = {"mapperArgs": {"calibRoot": namespace.calib}}
778 if namespace.output:
779 outputs = {"root": namespace.output, "mode": "rw"}
780 inputs = {"root": namespace.input}
781 inputs.update(butlerArgs)
782 outputs.update(butlerArgs)
783 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
784 else:
785 outputs = {"root": namespace.input, "mode": "rw"}
786 outputs.update(butlerArgs)
787 namespace.butler = dafPersist.Butler(outputs=outputs)
789 # convert data in each of the identifier lists to proper types
790 # this is done after constructing the butler,
791 # hence after parsing the command line,
792 # because it takes a long time to construct a butler
793 self._processDataIds(namespace)
794 if "data" in namespace.show:
795 for dataIdName in self._dataIdArgDict.keys():
796 for dataRef in getattr(namespace, dataIdName).refList:
797 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
799 if namespace.show and "run" not in namespace.show:
800 sys.exit(0)
802 if namespace.debug:
803 try:
804 import debug # type: ignore
806 assert debug # silence pyflakes (above silences mypy)
807 except ImportError:
808 print("Warning: no 'debug' module found", file=sys.stderr)
809 namespace.debug = False
811 del namespace.loglevel
812 del namespace.longlog
814 namespace.config.validate()
815 namespace.config.freeze()
817 return namespace
819 def _parseDirectories(self, namespace):
820 """Parse input, output and calib directories
822 This allows for hacking the directories, e.g., to include a
823 "rerun".
824 Modifications are made to the 'namespace' object in-place.
825 """
826 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
827 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
829 # If an output directory is specified, process it and assign it to the
830 # namespace
831 if namespace.rawOutput:
832 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
833 else:
834 namespace.output = None
836 # This section processes the rerun argument.
837 # If rerun is specified as a colon separated value,
838 # it will be parsed as an input and output.
839 # The input value will be overridden if previously specified
840 # (but a check is made to make sure both inputs use
841 # the same mapper)
842 if namespace.rawRerun:
843 if namespace.output:
844 self.error("Error: cannot specify both --output and --rerun")
845 namespace.rerun = namespace.rawRerun.split(":")
846 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
847 modifiedInput = False
848 if len(rerunDir) == 2:
849 namespace.input, namespace.output = rerunDir
850 modifiedInput = True
851 elif len(rerunDir) == 1:
852 namespace.output = rerunDir[0]
853 if os.path.exists(os.path.join(namespace.output, "_parent")):
854 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
855 modifiedInput = True
856 else:
857 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
858 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
859 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
860 else:
861 namespace.rerun = None
862 del namespace.rawInput
863 del namespace.rawCalib
864 del namespace.rawOutput
865 del namespace.rawRerun
867 def _processDataIds(self, namespace):
868 """Process the parsed data for each data ID argument in an
869 `~argparse.Namespace`.
871 Processing includes:
873 - Validate data ID keys.
874 - Cast the data ID values to the correct type.
875 - Compute data references from data IDs.
877 Parameters
878 ----------
879 namespace : `argparse.Namespace`
880 Parsed namespace. These attributes are read:
882 - ``butler``
883 - ``log``
884 - ``config``, if any dynamic dataset types are set by
885 a config parameter.
886 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
887 dataset types are specified by such
889 These attributes are modified:
891 - ``<name>`` for each data ID argument registered using
892 `add_id_argument` with name ``<name>``.
893 """
894 for dataIdArgument in self._dataIdArgDict.values():
895 dataIdContainer = getattr(namespace, dataIdArgument.name)
896 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
897 if dataIdArgument.doMakeDataRefList:
898 try:
899 dataIdContainer.castDataIds(butler=namespace.butler)
900 except (KeyError, TypeError) as e:
901 # failure of castDataIds indicates invalid command args
902 self.error(e)
904 # failure of makeDataRefList indicates a bug
905 # that wants a traceback
906 dataIdContainer.makeDataRefList(namespace)
908 def _applyInitialOverrides(self, namespace):
909 """Apply obs-package-specific and camera-specific config
910 override files, if found
912 Parameters
913 ----------
914 namespace : `argparse.Namespace`
915 Parsed namespace. These attributes are read:
917 - ``obsPkg``
919 Look in the package namespace.obsPkg for files:
921 - ``config/<task_name>.py``
922 - ``config/<camera_name>/<task_name>.py`` and load if found.
923 """
924 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
925 fileName = self._name + ".py"
926 for filePath in (
927 os.path.join(obsPkgDir, "config", fileName),
928 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
929 ):
930 if os.path.exists(filePath):
931 namespace.log.info("Loading config overrride file %r", filePath)
932 namespace.config.load(filePath)
933 else:
934 namespace.log.debug("Config override file does not exist: %r", filePath)
936 def handleCamera(self, namespace):
937 """Perform camera-specific operations before parsing the command-line.
939 Parameters
940 ----------
941 namespace : `argparse.Namespace`
942 Namespace (an ) with the following fields:
944 - ``camera``: the camera name.
945 - ``config``: the config passed to parse_args, with no overrides
946 applied.
947 - ``obsPkg``: the ``obs_`` package for this camera.
948 - ``log``: a `lsst.pipe.base.TaskLogAdapter` Log.
950 Notes
951 -----
952 The default implementation does nothing.
953 """
954 pass
956 def convert_arg_line_to_args(self, arg_line):
957 """Allow files of arguments referenced by ``@<path>`` to contain
958 multiple values on each line.
960 Parameters
961 ----------
962 arg_line : `str`
963 Line of text read from an argument file.
964 """
965 arg_line = arg_line.strip()
966 if not arg_line or arg_line.startswith("#"):
967 return
968 for arg in shlex.split(arg_line, comments=True, posix=True):
969 if not arg.strip():
970 continue
971 yield arg
973 def addReuseOption(self, choices):
974 """Add a "--reuse-outputs-from SUBTASK" option to the argument
975 parser.
977 CmdLineTasks that can be restarted at an intermediate step using
978 outputs from earlier (but still internal) steps should use this
979 method to allow the user to control whether that happens when
980 outputs from earlier steps are present.
982 Parameters
983 ----------
984 choices : sequence
985 A sequence of string names (by convention, top-level subtasks)
986 that identify the steps that could be skipped when their
987 outputs are already present. The list is ordered, so when the
988 user specifies one step on the command line, all previous steps
989 may be skipped as well. In addition to the choices provided,
990 users may pass "all" to indicate that all steps may be thus
991 skipped.
993 When this method is called, the ``namespace`` object returned by
994 ``parse_args`` will contain a ``reuse`` attribute containing
995 a list of all steps that should be skipped if their outputs
996 are already present.
997 If no steps should be skipped, the ``reuse`` will be an empty list.
998 """
999 choices = list(choices)
1000 choices.append("all")
1001 self.add_argument(
1002 "--reuse-outputs-from",
1003 dest="reuse",
1004 choices=choices,
1005 default=[],
1006 action=ReuseAction,
1007 help=(
1008 "Skip the given subtask and its predecessors and reuse their outputs "
1009 "if those outputs already exist. Use 'all' to specify all subtasks."
1010 ),
1011 )
1014class InputOnlyArgumentParser(ArgumentParser):
1015 """`ArgumentParser` for command-line tasks that don't write any output."""
1017 requireOutput = False # We're not going to write anything
1020def getTaskDict(config, taskDict=None, baseName=""):
1021 """Get a dictionary of task info for all subtasks in a config
1023 Parameters
1024 ----------
1025 config : `lsst.pex.config.Config`
1026 Configuration to process.
1027 taskDict : `dict`, optional
1028 Users should not specify this argument. Supports recursion.
1029 If provided, taskDict is updated in place, else a new `dict`
1030 is started.
1031 baseName : `str`, optional
1032 Users should not specify this argument. It is only used for
1033 recursion: if a non-empty string then a period is appended
1034 and the result is used as a prefix for additional entries
1035 in taskDict; otherwise no prefix is used.
1037 Returns
1038 -------
1039 taskDict : `dict`
1040 Keys are config field names, values are task names.
1042 Notes
1043 -----
1044 This function is designed to be called recursively.
1045 The user should call with only a config (leaving taskDict and baseName
1046 at their default values).
1047 """
1048 if taskDict is None:
1049 taskDict = dict()
1050 for fieldName, field in config.items():
1051 if hasattr(field, "value") and hasattr(field, "target"):
1052 subConfig = field.value
1053 if isinstance(subConfig, pexConfig.Config):
1054 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
1055 try:
1056 taskName = f"{field.target.__module__}.{field.target.__name__}"
1057 except Exception:
1058 taskName = repr(field.target)
1059 taskDict[subBaseName] = taskName
1060 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
1061 return taskDict
1064def obeyShowArgument(showOpts, config=None, exit=False):
1065 """Process arguments specified with ``--show`` (but ignores
1066 ``"data"``).
1068 Parameters
1069 ----------
1070 showOpts : `list` of `str`
1071 List of options passed to ``--show``.
1072 config : optional
1073 The provided config.
1074 exit : bool, optional
1075 Exit if ``"run"`` isn't included in ``showOpts``.
1077 Parameters
1078 ----------
1079 Supports the following options in showOpts:
1081 - ``config[=PAT]``. Dump all the config entries, or just the ones that
1082 match the glob pattern.
1083 - ``history=PAT``. Show where the config entries that match the glob
1084 pattern were set.
1085 - ``tasks``. Show task hierarchy.
1086 - ``data``. Ignored; to be processed by caller.
1087 - ``run``. Keep going (the default behaviour is to exit if
1088 ``--show`` is specified).
1090 Calls ``sys.exit(1)`` if any other option found.
1091 """
1092 if not showOpts:
1093 return
1095 for what in showOpts:
1096 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
1098 if showCommand == "config":
1099 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1100 pattern = matConfig.group(1)
1101 if pattern:
1103 class FilteredStream:
1104 """A file object that only prints lines
1105 that match the glob "pattern".
1107 N.b. Newlines are silently discarded and reinserted;
1108 crude but effective.
1109 """
1111 def __init__(self, pattern):
1112 # obey case if pattern isn't lowecase or requests
1113 # NOIGNORECASE
1114 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1116 if mat:
1117 pattern = mat.group(1)
1118 self._pattern = re.compile(fnmatch.translate(pattern))
1119 else:
1120 if pattern != pattern.lower():
1121 print(
1122 f"Matching {pattern!r} without regard to case "
1123 "(append :NOIGNORECASE to prevent this)",
1124 file=sys.stdout,
1125 )
1126 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1128 def write(self, showStr):
1129 showStr = showStr.rstrip()
1130 # Strip off doc string line(s) and cut off
1131 # at "=" for string matching
1132 matchStr = showStr.split("\n")[-1].split("=")[0]
1133 if self._pattern.search(matchStr):
1134 print("\n" + showStr)
1136 fd = FilteredStream(pattern)
1137 else:
1138 fd = sys.stdout
1140 config.saveToStream(fd, "config")
1141 elif showCommand == "history":
1142 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1143 globPattern = matHistory.group(1)
1144 if not globPattern:
1145 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1146 sys.exit(1)
1148 error = False
1149 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1150 if i > 0:
1151 print("")
1153 pattern = pattern.split(".")
1154 cpath, cname = pattern[:-1], pattern[-1]
1155 hconfig = config # the config that we're interested in
1156 for i, cpt in enumerate(cpath):
1157 try:
1158 hconfig = getattr(hconfig, cpt)
1159 except AttributeError:
1160 config_path = ".".join(["config"] + cpath[:i])
1161 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1162 error = True
1164 try:
1165 print(pexConfig.history.format(hconfig, cname))
1166 except KeyError:
1167 config_path = ".".join(["config"] + cpath)
1168 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1169 error = True
1171 if error:
1172 sys.exit(1)
1174 elif showCommand == "data":
1175 pass
1176 elif showCommand == "run":
1177 pass
1178 elif showCommand == "tasks":
1179 showTaskHierarchy(config)
1180 else:
1181 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1182 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1183 sys.exit(1)
1185 if exit and "run" not in showOpts:
1186 sys.exit(0)
1189def showTaskHierarchy(config):
1190 """Print task hierarchy to stdout.
1192 Parameters
1193 ----------
1194 config : `lsst.pex.config.Config`
1195 Configuration to process.
1196 """
1197 print("Subtasks:")
1198 taskDict = getTaskDict(config=config)
1200 fieldNameList = sorted(taskDict.keys())
1201 for fieldName in fieldNameList:
1202 taskName = taskDict[fieldName]
1203 print(f"{fieldName}: {taskName}")
1206class ConfigValueAction(argparse.Action):
1207 """argparse action callback to override config parameters using
1208 name=value pairs from the command-line.
1209 """
1211 def __call__(self, parser, namespace, values, option_string):
1212 """Override one or more config name value pairs.
1214 Parameters
1215 ----------
1216 parser : `argparse.ArgumentParser`
1217 Argument parser.
1218 namespace : `argparse.Namespace`
1219 Parsed command. The ``namespace.config`` attribute is updated.
1220 values : `list`
1221 A list of ``configItemName=value`` pairs.
1222 option_string : `str`
1223 Option value specified by the user.
1224 """
1225 if namespace.config is None:
1226 return
1227 for nameValue in values:
1228 name, sep, valueStr = nameValue.partition("=")
1229 if not valueStr:
1230 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1232 # see if setting the string value works; if not, try eval
1233 try:
1234 setDottedAttr(namespace.config, name, valueStr)
1235 except AttributeError:
1236 parser.error(f"no config field: {name}")
1237 except Exception:
1238 try:
1239 value = eval(valueStr, {})
1240 except Exception:
1241 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1242 try:
1243 setDottedAttr(namespace.config, name, value)
1244 except Exception as e:
1245 parser.error(f"cannot set config.{name}={value!r}: {e}")
1248class ConfigFileAction(argparse.Action):
1249 """argparse action to load config overrides from one or more files."""
1251 def __call__(self, parser, namespace, values, option_string=None):
1252 """Load one or more files of config overrides.
1254 Parameters
1255 ----------
1256 parser : `argparse.ArgumentParser`
1257 Argument parser.
1258 namespace : `argparse.Namespace`
1259 Parsed command. The following attributes are updated by this
1260 method: ``namespace.config``.
1261 values : `list`
1262 A list of data config file paths.
1263 option_string : `str`, optional
1264 Option value specified by the user.
1265 """
1266 if namespace.config is None:
1267 return
1268 for configfile in values:
1269 try:
1270 namespace.config.load(configfile)
1271 except Exception as e:
1272 parser.error(f"cannot load config file {configfile!r}: {e}")
1275class IdValueAction(argparse.Action):
1276 """argparse action callback to process a data ID into a dict."""
1278 def __call__(self, parser, namespace, values, option_string):
1279 """Parse ``--id`` data and append results to
1280 ``namespace.<argument>.idList``.
1282 Parameters
1283 ----------
1284 parser : `ArgumentParser`
1285 Argument parser.
1286 namespace : `argparse.Namespace`
1287 Parsed command (an instance of argparse.Namespace).
1288 The following attributes are updated:
1290 - ``<idName>.idList``, where ``<idName>`` is the name of the
1291 ID argument, for instance ``"id"`` for ID argument ``--id``.
1292 values : `list`
1293 A list of data IDs; see Notes below.
1294 option_string : `str`
1295 Option value specified by the user.
1297 Notes
1298 -----
1299 The data format is::
1301 key1=value1_1[^value1_2[^value1_3...]
1302 key2=value2_1[^value2_2[^value2_3...]...
1304 The values (e.g. ``value1_1``) may either be a string,
1305 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1306 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1307 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1308 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1310 The cross product is computed for keys with multiple values.
1311 For example::
1313 --id visit 1^2 ccd 1,1^2,2
1315 results in the following data ID dicts being appended to
1316 ``namespace.<argument>.idList``:
1318 {"visit":1, "ccd":"1,1"}
1319 {"visit":2, "ccd":"1,1"}
1320 {"visit":1, "ccd":"2,2"}
1321 {"visit":2, "ccd":"2,2"}
1322 """
1323 if namespace.config is None:
1324 return
1325 idDict = collections.OrderedDict()
1326 for nameValue in values:
1327 name, sep, valueStr = nameValue.partition("=")
1328 if name in idDict:
1329 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1330 idDict[name] = []
1331 for v in valueStr.split("^"):
1332 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1333 if mat:
1334 v1 = int(mat.group(1))
1335 v2 = int(mat.group(2))
1336 v3 = mat.group(3)
1337 v3 = int(v3) if v3 else 1
1338 for v in range(v1, v2 + 1, v3):
1339 idDict[name].append(str(v))
1340 else:
1341 idDict[name].append(v)
1343 iterList = [idDict[key] for key in idDict.keys()]
1344 idDictList = [
1345 collections.OrderedDict(zip(idDict.keys(), valList)) for valList in itertools.product(*iterList)
1346 ]
1348 argName = option_string.lstrip("-")
1349 ident = getattr(namespace, argName)
1350 ident.idList += idDictList
1353class LongLogAction(argparse.Action):
1354 """argparse action to make logs verbose.
1356 An action so that it can take effect before log level options.
1357 """
1359 def __call__(self, parser, namespace, values, option_string):
1360 """Set long log.
1362 Parameters
1363 ----------
1364 parser : `ArgumentParser`
1365 Argument parser.
1366 namespace : `argparse.Namespace`
1367 Parsed command. This argument is not used.
1368 values : `list`
1369 Unused.
1370 option_string : `str`
1371 Option value specified by the user (unused).
1372 """
1373 lsstLog.configure_prop(
1374 """
1375log4j.rootLogger=INFO, A1
1376log4j.appender.A1=ConsoleAppender
1377log4j.appender.A1.Target=System.out
1378log4j.appender.A1.layout=PatternLayout
1379log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
1380"""
1381 )
1384class LogLevelAction(argparse.Action):
1385 """argparse action to set log level."""
1387 def __call__(self, parser, namespace, values, option_string):
1388 """Set trace level.
1390 Parameters
1391 ----------
1392 parser : `ArgumentParser`
1393 Argument parser.
1394 namespace : `argparse.Namespace`
1395 Parsed command. This argument is not used.
1396 values : `list`
1397 List of trace levels; each item must be of the form
1398 ``component_name=level`` or ``level``, where ``level``
1399 is a keyword (not case sensitive) or an integer.
1400 option_string : `str`
1401 Option value specified by the user.
1402 """
1403 permittedLevelList = ("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL")
1404 permittedLevelSet = set(permittedLevelList)
1405 for componentLevel in values:
1406 component, sep, levelStr = componentLevel.partition("=")
1407 if not levelStr:
1408 levelStr, component = component, None
1409 logLevelUpr = levelStr.upper()
1411 if component is None:
1412 logger = namespace.log
1413 else:
1414 logger = lsst.utils.logging.getLogger(component)
1416 if logLevelUpr in permittedLevelSet:
1417 logLevel = getattr(logger, logLevelUpr)
1418 else:
1419 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1421 logger.setLevel(logLevel)
1423 # Set logging level for whatever logger this wasn't.
1424 if isinstance(logger, lsstLog.Log):
1425 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1426 logging.getLogger(component or None).setLevel(pyLevel)
1427 else:
1428 # Need to set lsstLog level
1429 lsstLogLevel = lsstLog.LevelTranslator.logging2lsstLog(logLevel)
1430 lsstLog.getLogger(component or "").setLevel(lsstLogLevel)
1433class ReuseAction(argparse.Action):
1434 """argparse action associated with ArgumentPraser.addReuseOption."""
1436 def __call__(self, parser, namespace, value, option_string):
1437 if value == "all":
1438 value = self.choices[-2]
1439 index = self.choices.index(value)
1440 namespace.reuse = self.choices[: index + 1]
1443def setDottedAttr(item, name, value):
1444 """Set an instance attribute (like `setattr` but accepting
1445 hierarchical names such as ``foo.bar.baz``).
1447 Parameters
1448 ----------
1449 item : obj
1450 Object whose attribute is to be set.
1451 name : `str`
1452 Name of attribute to set.
1453 value : obj
1454 New value for the attribute.
1456 Notes
1457 -----
1458 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1459 is set to the specified value.
1460 """
1461 subitem = item
1462 subnameList = name.split(".")
1463 for subname in subnameList[:-1]:
1464 subitem = getattr(subitem, subname)
1465 setattr(subitem, subnameList[-1], value)
1468def getDottedAttr(item, name):
1469 """Get an attribute (like `getattr` but accepts hierarchical names
1470 such as ``foo.bar.baz``).
1472 Parameters
1473 ----------
1474 item : obj
1475 Object whose attribute is to be returned.
1476 name : `str`
1477 Name of the attribute to get.
1479 Returns
1480 -------
1481 itemAttr : obj
1482 If name is ``foo.bar.baz then the return value is
1483 ``item.foo.bar.baz``.
1484 """
1485 subitem = item
1486 for subname in name.split("."):
1487 subitem = getattr(subitem, subname)
1488 return subitem