Coverage for python/lsst/pipe/base/argumentParser.py: 11%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.pex.config as pexConfig
40import lsst.pex.config.history
41import lsst.log as lsstLog
42import lsst.daf.persistence as dafPersist
43from .task_logging import getTaskLogger
45DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
46DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
47DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
50def _fixPath(defName, path):
51 """Apply environment variable as default root, if present, and abspath.
53 Parameters
54 ----------
55 defName : `str`
56 Name of environment variable containing default root path;
57 if the environment variable does not exist
58 then the path is relative to the current working directory
59 path : `str`
60 Path relative to default root path.
62 Returns
63 -------
64 abspath : `str`
65 Path that has been expanded, or `None` if the environment variable
66 does not exist and path is `None`.
67 """
68 defRoot = os.environ.get(defName)
69 if defRoot is None:
70 if path is None:
71 return None
72 return os.path.abspath(path)
73 return os.path.abspath(os.path.join(defRoot, path or ""))
76class DataIdContainer:
77 """Container for data IDs and associated data references.
79 Parameters
80 ----------
81 level : `str`
82 The lowest hierarchy level to descend to for this dataset type,
83 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
84 Use `""` to use the mapper's default for the dataset type.
85 This class does not support `None`, but if it did, `None`
86 would mean the level should not be restricted.
88 Notes
89 -----
90 Override this class for data IDs that require special handling to be
91 converted to ``data references``, and specify the override class
92 as ``ContainerClass`` for ``add_id_argument``.
94 If you don't want the argument parser to compute data references,
95 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
96 """
98 def __init__(self, level=None):
99 self.datasetType = None
100 """Dataset type of the data references (`str`).
101 """
102 self.level = level
103 """See parameter ``level`` (`str`).
104 """
105 self.idList = []
106 """List of data IDs specified on the command line for the
107 appropriate data ID argument (`list` of `dict`).
108 """
109 self.refList = []
110 """List of data references for the data IDs in ``idList``
111 (`list` of `lsst.daf.persistence.ButlerDataRef`).
112 Elements will be omitted if the corresponding data is not found.
113 The list will be empty when returned by ``parse_args`` if
114 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
115 """
117 def setDatasetType(self, datasetType):
118 """Set actual dataset type, once it is known.
120 Parameters
121 ----------
122 datasetType : `str`
123 Dataset type.
125 Notes
126 -----
127 The reason ``datasetType`` is not a constructor argument is that
128 some subclasses do not know the dataset type until the command
129 is parsed. Thus, to reduce special cases in the code,
130 ``datasetType`` is always set after the command is parsed.
131 """
132 self.datasetType = datasetType
134 def castDataIds(self, butler):
135 """Validate data IDs and cast them to the correct type
136 (modify idList in place).
138 This code casts the values in the data IDs dicts in `dataIdList`
139 to the type required by the butler. Data IDs are read from the
140 command line as `str`, but the butler requires some values to be
141 other types. For example "visit" values should be `int`.
143 Parameters
144 ----------
145 butler : `lsst.daf.persistence.Butler`
146 Data butler.
147 """
148 if self.datasetType is None:
149 raise RuntimeError("Must call setDatasetType first")
150 try:
151 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
152 except KeyError as e:
153 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
154 raise KeyError(msg) from e
156 for dataDict in self.idList:
157 for key, strVal in dataDict.items():
158 try:
159 keyType = idKeyTypeDict[key]
160 except KeyError:
161 # OK, assume that it's a valid key and guess that it's a
162 # string
163 keyType = str
165 log = getTaskLogger()
166 log.warning("Unexpected ID %s; guessing type is \"%s\"",
167 key, 'str' if keyType == str else keyType)
168 idKeyTypeDict[key] = keyType
170 if keyType != str:
171 try:
172 castVal = keyType(strVal)
173 except Exception:
174 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
175 dataDict[key] = castVal
177 def makeDataRefList(self, namespace):
178 """Compute refList based on idList.
180 Parameters
181 ----------
182 namespace : `argparse.Namespace`
183 Results of parsing command-line. The ``butler`` and ``log``
184 elements must be set.
186 Notes
187 -----
188 Not called if ``add_id_argument`` was called with
189 ``doMakeDataRefList=False``.
190 """
191 if self.datasetType is None:
192 raise RuntimeError("Must call setDatasetType first")
193 butler = namespace.butler
194 for dataId in self.idList:
195 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
196 level=self.level, dataId=dataId)
197 if not refList:
198 namespace.log.warning("No data found for dataId=%s", dataId)
199 continue
200 self.refList += refList
203class DataIdArgument:
204 """data ID argument, used by `ArgumentParser.add_id_argument`.
206 Parameters
207 ----------
208 name : `str`
209 Name of identifier (argument name without dashes).
210 datasetType : `str`
211 Type of dataset; specify a string for a fixed dataset type
212 or a `DatasetArgument` for a dynamic dataset type (e.g.
213 one specified by a command-line argument).
214 level : `str`
215 The lowest hierarchy level to descend to for this dataset type,
216 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
217 Use `""` to use the mapper's default for the dataset type.
218 Some container classes may also support `None`, which means
219 the level should not be restricted; however the default class,
220 `DataIdContainer`, does not support `None`.
221 doMakeDataRefList : `bool`, optional
222 If `True` (default), construct data references.
223 ContainerClass : `class`, optional
224 Class to contain data IDs and data references; the default class
225 `DataIdContainer` will work for many, but not all, cases.
226 For example if the dataset type is specified on the command line
227 then use `DynamicDatasetType`.
228 """
230 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
231 if name.startswith("-"):
232 raise RuntimeError(f"Name {name} must not start with -")
233 self.name = name
234 self.datasetType = datasetType
235 self.level = level
236 self.doMakeDataRefList = bool(doMakeDataRefList)
237 self.ContainerClass = ContainerClass
238 self.argName = name.lstrip("-")
240 @property
241 def isDynamicDatasetType(self):
242 """`True` if the dataset type is dynamic (that is, specified
243 on the command line).
244 """
245 return isinstance(self.datasetType, DynamicDatasetType)
247 def getDatasetType(self, namespace):
248 """Get the dataset type as a string.
250 Parameters
251 ----------
252 namespace
253 Parsed command.
255 Returns
256 -------
257 datasetType : `str`
258 Dataset type.
259 """
260 if self.isDynamicDatasetType:
261 return self.datasetType.getDatasetType(namespace)
262 else:
263 return self.datasetType
266class DynamicDatasetType(metaclass=abc.ABCMeta):
267 """Abstract base class for a dataset type determined from parsed
268 command-line arguments.
269 """
271 def addArgument(self, parser, idName):
272 """Add a command-line argument to specify dataset type name,
273 if wanted.
275 Parameters
276 ----------
277 parser : `ArgumentParser`
278 Argument parser to add the argument to.
279 idName : `str`
280 Name of data ID argument, without the leading ``"--"``,
281 e.g. ``"id"``.
283 Notes
284 -----
285 The default implementation does nothing
286 """
287 pass
289 @abc.abstractmethod
290 def getDatasetType(self, namespace):
291 """Get the dataset type as a string, based on parsed command-line
292 arguments.
294 Returns
295 -------
296 datasetType : `str`
297 Dataset type.
298 """
299 raise NotImplementedError("Subclasses must override")
302class DatasetArgument(DynamicDatasetType):
303 """Dataset type specified by a command-line argument.
305 Parameters
306 ----------
307 name : `str`, optional
308 Name of command-line argument (including leading "--",
309 if appropriate) whose value is the dataset type.
310 If `None`, uses ``--idName_dstype`` where idName
311 is the name of the data ID argument (e.g. "id").
312 help : `str`, optional
313 Help string for the command-line argument.
314 default : `object`, optional
315 Default value. If `None`, then the command-line option is required.
316 This argument isignored if the command-line argument is positional
317 (name does not start with "-") because positional arguments do
318 not support default values.
319 """
321 def __init__(self,
322 name=None,
323 help="dataset type to process from input data repository",
324 default=None,
325 ):
326 DynamicDatasetType.__init__(self)
327 self.name = name
328 self.help = help
329 self.default = default
331 def getDatasetType(self, namespace):
332 """Get the dataset type as a string, from the appropriate
333 command-line argument.
335 Parameters
336 ----------
337 namespace :
338 Parsed command.
340 Returns
341 -------
342 datasetType : `str`
343 Dataset type.
344 """
345 argName = self.name.lstrip("-")
346 return getattr(namespace, argName)
348 def addArgument(self, parser, idName):
349 """Add a command-line argument to specify the dataset type name.
351 Parameters
352 ----------
353 parser : `ArgumentParser`
354 Argument parser.
355 idName : `str`
356 Data ID.
358 Notes
359 -----
360 Also sets the `name` attribute if it is currently `None`.
361 """
362 help = self.help if self.help else f"dataset type for {idName}"
363 if self.name is None:
364 self.name = f"--{idName}_dstype"
365 requiredDict = dict()
366 if self.name.startswith("-"):
367 requiredDict = dict(required=self.default is None)
368 parser.add_argument(
369 self.name,
370 default=self.default,
371 help=help,
372 **requiredDict)
375class ConfigDatasetType(DynamicDatasetType):
376 """Dataset type specified by a config parameter.
378 Parameters
379 ----------
380 name : `str`
381 Name of config option whose value is the dataset type.
382 """
384 def __init__(self, name):
385 DynamicDatasetType.__init__(self)
386 self.name = name
388 def getDatasetType(self, namespace):
389 """Return the dataset type as a string, from the appropriate
390 config field.
392 Parameters
393 ----------
394 namespace : `argparse.Namespace`
395 Parsed command.
396 """
397 # getattr does not work reliably if the config field name is
398 # dotted, so step through one level at a time
399 keyList = self.name.split(".")
400 value = namespace.config
401 for key in keyList:
402 try:
403 value = getattr(value, key)
404 except KeyError:
405 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
406 return value
409class ArgumentParser(argparse.ArgumentParser):
410 """Argument parser for command-line tasks that is based on
411 `argparse.ArgumentParser`.
413 Parameters
414 ----------
415 name : `str`
416 Name of top-level task; used to identify camera-specific override
417 files.
418 usage : `str`, optional
419 Command-line usage signature.
420 **kwargs
421 Additional keyword arguments for `argparse.ArgumentParser`.
423 Notes
424 -----
425 Users may wish to add additional arguments before calling `parse_args`.
426 """
427 # I would prefer to check data ID keys and values as they are parsed,
428 # but the required information comes from the butler, so I have to
429 # construct a butler before I do this checking. Constructing a butler
430 # is slow, so I only want do it once, after parsing the command line,
431 # so as to catch syntax errors quickly.
433 requireOutput = True
434 """Require an output directory to be specified (`bool`)."""
436 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
437 self._name = name
438 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
439 argparse.ArgumentParser.__init__(self,
440 usage=usage,
441 fromfile_prefix_chars='@',
442 epilog=textwrap.dedent("""Notes:
443 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
444 all values are used, in order left to right
445 * @file reads command-line options from the specified file:
446 * data may be distributed among multiple lines (e.g. one option per line)
447 * data after # is treated as a comment and ignored
448 * blank lines and lines starting with # are ignored
449 * To specify multiple values for an option, do not use = after the option name:
450 * right: --config-file foo bar
451 * wrong: --config-file=foo bar
452 """),
453 formatter_class=argparse.RawDescriptionHelpFormatter,
454 **kwargs)
455 self.add_argument(metavar='input', dest="rawInput",
456 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
457 self.add_argument("--calib", dest="rawCalib",
458 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
459 self.add_argument("--output", dest="rawOutput",
460 help="path to output data repository (need not exist), "
461 f"relative to ${DEFAULT_OUTPUT_NAME}")
462 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
463 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
464 "optionally sets ROOT to ROOT/rerun/INPUT")
465 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
466 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
467 self.add_argument("-C", "--config-file", "--configfile",
468 dest="configfile", nargs="*", action=ConfigFileAction,
469 help="config override file(s)")
470 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
471 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
472 metavar="LEVEL|COMPONENT=LEVEL")
473 self.add_argument("--longlog", action=LongLogAction, help="use a more verbose format for the logging")
474 self.add_argument("--debug", action="store_true", help="enable debugging output?")
475 self.add_argument("--doraise", action="store_true",
476 help="raise an exception on error (else log a message and continue)?")
477 self.add_argument("--noExit", action="store_true",
478 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
479 self.add_argument("--profile", help="Dump cProfile statistics to filename")
480 self.add_argument("--show", nargs="+", default=(),
481 help="display the specified information to stdout and quit "
482 "(unless run is specified); information is "
483 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
484 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
485 self.add_argument("-t", "--timeout", type=float,
486 help="Timeout for multiprocessing; maximum wall time (sec)")
487 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
488 help=("remove and re-create the output directory if it already exists "
489 "(safe with -j, but not all other forms of parallel execution)"))
490 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
491 help=("backup and then overwrite existing config files instead of checking them "
492 "(safe with -j, but not all other forms of parallel execution)"))
493 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
494 help="Don't copy config to file~N backup.")
495 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
496 help=("backup and then overwrite existing package versions instead of checking"
497 "them (safe with -j, but not all other forms of parallel execution)"))
498 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
499 help="don't check package versions; useful for development")
500 lsstLog.configure_prop("""
501log4j.rootLogger=INFO, A1
502log4j.appender.A1=ConsoleAppender
503log4j.appender.A1.Target=System.out
504log4j.appender.A1.layout=PatternLayout
505log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
506""")
508 # Forward all Python logging to lsst.log
509 lgr = logging.getLogger()
510 lgr.setLevel(logging.INFO) # same as in log4cxx config above
511 lgr.addHandler(lsstLog.LogHandler())
513 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
514 ContainerClass=DataIdContainer):
515 """Add a data ID argument.
518 Parameters
519 ----------
520 name : `str`
521 Data ID argument (including leading dashes, if wanted).
522 datasetType : `str` or `DynamicDatasetType`-type
523 Type of dataset. Supply a string for a fixed dataset type.
524 For a dynamically determined dataset type, supply
525 a `DynamicDatasetType`, such a `DatasetArgument`.
526 help : `str`
527 Help string for the argument.
528 level : `str`
529 The lowest hierarchy level to descend to for this dataset type,
530 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
531 Use `""` to use the mapper's default for the dataset type.
532 Some container classes may also support `None`, which means
533 the level should not be restricted; however the default class,
534 `DataIdContainer`, does not support `None`.
535 doMakeDataRefList : bool, optional
536 If `True` (default), construct data references.
537 ContainerClass : `class`, optional
538 Class to contain data IDs and data references; the default class
539 `DataIdContainer` will work for many, but not all, cases.
540 For example if the dataset type is specified on the command line
541 then use `DynamicDatasetType`.
543 Notes
544 -----
545 If ``datasetType`` is an instance of `DatasetArgument`,
546 then add a second argument to specify the dataset type.
548 The associated data is put into ``namespace.<dataIdArgument.name>``
549 as an instance of `ContainerClass`; the container includes fields:
551 - ``idList``: a list of data ID dicts.
552 - ``refList``: a list of `~lsst.daf.persistence.Butler`
553 data references (empty if ``doMakeDataRefList`` is `False`).
554 """
555 argName = name.lstrip("-")
557 if argName in self._dataIdArgDict:
558 raise RuntimeError(f"Data ID argument {name} already exists")
559 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
560 raise RuntimeError(f"Data ID argument {name} is a reserved name")
562 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
563 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
565 dataIdArgument = DataIdArgument(
566 name=argName,
567 datasetType=datasetType,
568 level=level,
569 doMakeDataRefList=doMakeDataRefList,
570 ContainerClass=ContainerClass,
571 )
573 if dataIdArgument.isDynamicDatasetType:
574 datasetType.addArgument(parser=self, idName=argName)
576 self._dataIdArgDict[argName] = dataIdArgument
578 def parse_args(self, config, args=None, log=None, override=None):
579 """Parse arguments for a command-line task.
581 Parameters
582 ----------
583 config : `lsst.pex.config.Config`
584 Config for the task being run.
585 args : `list`, optional
586 Argument list; if `None` then ``sys.argv[1:]`` is used.
587 log : `lsst.log.Log` or `logging.Logger`, optional
588 Logger instance; if `None` use the default log.
589 override : callable, optional
590 A config override function. It must take the root config object
591 as its only argument and must modify the config in place.
592 This function is called after camera-specific overrides files
593 are applied, and before command-line config overrides
594 are applied (thus allowing the user the final word).
596 Returns
597 -------
598 namespace : `argparse.Namespace`
599 A `~argparse.Namespace` instance containing fields:
601 - ``camera``: camera name.
602 - ``config``: the supplied config with all overrides applied,
603 validated and frozen.
604 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
605 - An entry for each of the data ID arguments registered by
606 `add_id_argument`, of the type passed to its ``ContainerClass``
607 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
608 includes public elements ``idList`` and ``refList``.
609 - ``log``: a `lsst.pipe.base.TaskLogAdapter` log.
610 - An entry for each command-line argument,
611 with the following exceptions:
613 - config is the supplied config, suitably updated.
614 - configfile, id and loglevel are all missing.
615 - ``obsPkg``: name of the ``obs_`` package for this camera.
616 """
617 if args is None:
618 args = sys.argv[1:]
620 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
621 self.print_help()
622 if len(args) == 1 and args[0] in ("-h", "--help"):
623 self.exit()
624 else:
625 self.exit(f"{self.prog}: error: Must specify input as first argument")
627 # Note that --rerun may change namespace.input, but if it does
628 # we verify that the new input has the same mapper class.
629 namespace = argparse.Namespace()
630 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
631 if not os.path.isdir(namespace.input):
632 self.error(f"Error: input={namespace.input!r} not found")
634 namespace.config = config
635 # Ensure that the external logger is converted to the expected
636 # logger class.
637 namespace.log = getTaskLogger(log.name) if log is not None else getTaskLogger()
638 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
639 if mapperClass is None:
640 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
642 namespace.camera = mapperClass.getCameraName()
643 namespace.obsPkg = mapperClass.getPackageName()
645 self.handleCamera(namespace)
647 self._applyInitialOverrides(namespace)
648 if override is not None:
649 override(namespace.config)
651 # Add data ID containers to namespace
652 for dataIdArgument in self._dataIdArgDict.values():
653 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
655 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
656 del namespace.configfile
658 self._parseDirectories(namespace)
660 if namespace.clobberOutput:
661 if namespace.output is None:
662 self.error("--clobber-output is only valid with --output or --rerun")
663 elif namespace.output == namespace.input:
664 self.error("--clobber-output is not valid when the output and input repos are the same")
665 if os.path.exists(namespace.output):
666 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
667 shutil.rmtree(namespace.output)
669 namespace.log.debug("input=%s", namespace.input)
670 namespace.log.debug("calib=%s", namespace.calib)
671 namespace.log.debug("output=%s", namespace.output)
673 obeyShowArgument(namespace.show, namespace.config, exit=False)
675 # No environment variable or --output or --rerun specified.
676 if self.requireOutput and namespace.output is None and namespace.rerun is None:
677 self.error("no output directory specified.\n"
678 "An output directory must be specified with the --output or --rerun\n"
679 "command-line arguments.\n")
681 butlerArgs = {} # common arguments for butler elements
682 if namespace.calib:
683 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
684 if namespace.output:
685 outputs = {'root': namespace.output, 'mode': 'rw'}
686 inputs = {'root': namespace.input}
687 inputs.update(butlerArgs)
688 outputs.update(butlerArgs)
689 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
690 else:
691 outputs = {'root': namespace.input, 'mode': 'rw'}
692 outputs.update(butlerArgs)
693 namespace.butler = dafPersist.Butler(outputs=outputs)
695 # convert data in each of the identifier lists to proper types
696 # this is done after constructing the butler,
697 # hence after parsing the command line,
698 # because it takes a long time to construct a butler
699 self._processDataIds(namespace)
700 if "data" in namespace.show:
701 for dataIdName in self._dataIdArgDict.keys():
702 for dataRef in getattr(namespace, dataIdName).refList:
703 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
705 if namespace.show and "run" not in namespace.show:
706 sys.exit(0)
708 if namespace.debug:
709 try:
710 import debug
711 assert debug # silence pyflakes
712 except ImportError:
713 print("Warning: no 'debug' module found", file=sys.stderr)
714 namespace.debug = False
716 del namespace.loglevel
717 del namespace.longlog
719 namespace.config.validate()
720 namespace.config.freeze()
722 return namespace
724 def _parseDirectories(self, namespace):
725 """Parse input, output and calib directories
727 This allows for hacking the directories, e.g., to include a
728 "rerun".
729 Modifications are made to the 'namespace' object in-place.
730 """
731 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
732 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
734 # If an output directory is specified, process it and assign it to the
735 # namespace
736 if namespace.rawOutput:
737 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
738 else:
739 namespace.output = None
741 # This section processes the rerun argument.
742 # If rerun is specified as a colon separated value,
743 # it will be parsed as an input and output.
744 # The input value will be overridden if previously specified
745 # (but a check is made to make sure both inputs use
746 # the same mapper)
747 if namespace.rawRerun:
748 if namespace.output:
749 self.error("Error: cannot specify both --output and --rerun")
750 namespace.rerun = namespace.rawRerun.split(":")
751 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
752 modifiedInput = False
753 if len(rerunDir) == 2:
754 namespace.input, namespace.output = rerunDir
755 modifiedInput = True
756 elif len(rerunDir) == 1:
757 namespace.output = rerunDir[0]
758 if os.path.exists(os.path.join(namespace.output, "_parent")):
759 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
760 modifiedInput = True
761 else:
762 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
763 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
764 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
765 else:
766 namespace.rerun = None
767 del namespace.rawInput
768 del namespace.rawCalib
769 del namespace.rawOutput
770 del namespace.rawRerun
772 def _processDataIds(self, namespace):
773 """Process the parsed data for each data ID argument in an
774 `~argparse.Namespace`.
776 Processing includes:
778 - Validate data ID keys.
779 - Cast the data ID values to the correct type.
780 - Compute data references from data IDs.
782 Parameters
783 ----------
784 namespace : `argparse.Namespace`
785 Parsed namespace. These attributes are read:
787 - ``butler``
788 - ``log``
789 - ``config``, if any dynamic dataset types are set by
790 a config parameter.
791 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
792 dataset types are specified by such
794 These attributes are modified:
796 - ``<name>`` for each data ID argument registered using
797 `add_id_argument` with name ``<name>``.
798 """
799 for dataIdArgument in self._dataIdArgDict.values():
800 dataIdContainer = getattr(namespace, dataIdArgument.name)
801 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
802 if dataIdArgument.doMakeDataRefList:
803 try:
804 dataIdContainer.castDataIds(butler=namespace.butler)
805 except (KeyError, TypeError) as e:
806 # failure of castDataIds indicates invalid command args
807 self.error(e)
809 # failure of makeDataRefList indicates a bug
810 # that wants a traceback
811 dataIdContainer.makeDataRefList(namespace)
813 def _applyInitialOverrides(self, namespace):
814 """Apply obs-package-specific and camera-specific config
815 override files, if found
817 Parameters
818 ----------
819 namespace : `argparse.Namespace`
820 Parsed namespace. These attributes are read:
822 - ``obsPkg``
824 Look in the package namespace.obsPkg for files:
826 - ``config/<task_name>.py``
827 - ``config/<camera_name>/<task_name>.py`` and load if found.
828 """
829 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
830 fileName = self._name + ".py"
831 for filePath in (
832 os.path.join(obsPkgDir, "config", fileName),
833 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
834 ):
835 if os.path.exists(filePath):
836 namespace.log.info("Loading config overrride file %r", filePath)
837 namespace.config.load(filePath)
838 else:
839 namespace.log.debug("Config override file does not exist: %r", filePath)
841 def handleCamera(self, namespace):
842 """Perform camera-specific operations before parsing the command-line.
844 Parameters
845 ----------
846 namespace : `argparse.Namespace`
847 Namespace (an ) with the following fields:
849 - ``camera``: the camera name.
850 - ``config``: the config passed to parse_args, with no overrides
851 applied.
852 - ``obsPkg``: the ``obs_`` package for this camera.
853 - ``log``: a `lsst.pipe.base.TaskLogAdapter` Log.
855 Notes
856 -----
857 The default implementation does nothing.
858 """
859 pass
861 def convert_arg_line_to_args(self, arg_line):
862 """Allow files of arguments referenced by ``@<path>`` to contain
863 multiple values on each line.
865 Parameters
866 ----------
867 arg_line : `str`
868 Line of text read from an argument file.
869 """
870 arg_line = arg_line.strip()
871 if not arg_line or arg_line.startswith("#"):
872 return
873 for arg in shlex.split(arg_line, comments=True, posix=True):
874 if not arg.strip():
875 continue
876 yield arg
878 def addReuseOption(self, choices):
879 """Add a "--reuse-outputs-from SUBTASK" option to the argument
880 parser.
882 CmdLineTasks that can be restarted at an intermediate step using
883 outputs from earlier (but still internal) steps should use this
884 method to allow the user to control whether that happens when
885 outputs from earlier steps are present.
887 Parameters
888 ----------
889 choices : sequence
890 A sequence of string names (by convention, top-level subtasks)
891 that identify the steps that could be skipped when their
892 outputs are already present. The list is ordered, so when the
893 user specifies one step on the command line, all previous steps
894 may be skipped as well. In addition to the choices provided,
895 users may pass "all" to indicate that all steps may be thus
896 skipped.
898 When this method is called, the ``namespace`` object returned by
899 ``parse_args`` will contain a ``reuse`` attribute containing
900 a list of all steps that should be skipped if their outputs
901 are already present.
902 If no steps should be skipped, the ``reuse`` will be an empty list.
903 """
904 choices = list(choices)
905 choices.append("all")
906 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
907 default=[], action=ReuseAction,
908 help=("Skip the given subtask and its predecessors and reuse their outputs "
909 "if those outputs already exist. Use 'all' to specify all subtasks."))
912class InputOnlyArgumentParser(ArgumentParser):
913 """`ArgumentParser` for command-line tasks that don't write any output.
914 """
916 requireOutput = False # We're not going to write anything
919def getTaskDict(config, taskDict=None, baseName=""):
920 """Get a dictionary of task info for all subtasks in a config
922 Parameters
923 ----------
924 config : `lsst.pex.config.Config`
925 Configuration to process.
926 taskDict : `dict`, optional
927 Users should not specify this argument. Supports recursion.
928 If provided, taskDict is updated in place, else a new `dict`
929 is started.
930 baseName : `str`, optional
931 Users should not specify this argument. It is only used for
932 recursion: if a non-empty string then a period is appended
933 and the result is used as a prefix for additional entries
934 in taskDict; otherwise no prefix is used.
936 Returns
937 -------
938 taskDict : `dict`
939 Keys are config field names, values are task names.
941 Notes
942 -----
943 This function is designed to be called recursively.
944 The user should call with only a config (leaving taskDict and baseName
945 at their default values).
946 """
947 if taskDict is None:
948 taskDict = dict()
949 for fieldName, field in config.items():
950 if hasattr(field, "value") and hasattr(field, "target"):
951 subConfig = field.value
952 if isinstance(subConfig, pexConfig.Config):
953 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
954 try:
955 taskName = f"{field.target.__module__}.{field.target.__name__}"
956 except Exception:
957 taskName = repr(field.target)
958 taskDict[subBaseName] = taskName
959 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
960 return taskDict
963def obeyShowArgument(showOpts, config=None, exit=False):
964 """Process arguments specified with ``--show`` (but ignores
965 ``"data"``).
967 Parameters
968 ----------
969 showOpts : `list` of `str`
970 List of options passed to ``--show``.
971 config : optional
972 The provided config.
973 exit : bool, optional
974 Exit if ``"run"`` isn't included in ``showOpts``.
976 Parameters
977 ----------
978 Supports the following options in showOpts:
980 - ``config[=PAT]``. Dump all the config entries, or just the ones that
981 match the glob pattern.
982 - ``history=PAT``. Show where the config entries that match the glob
983 pattern were set.
984 - ``tasks``. Show task hierarchy.
985 - ``data``. Ignored; to be processed by caller.
986 - ``run``. Keep going (the default behaviour is to exit if
987 ``--show`` is specified).
989 Calls ``sys.exit(1)`` if any other option found.
990 """
991 if not showOpts:
992 return
994 for what in showOpts:
995 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
997 if showCommand == "config":
998 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
999 pattern = matConfig.group(1)
1000 if pattern:
1001 class FilteredStream:
1002 """A file object that only prints lines
1003 that match the glob "pattern".
1005 N.b. Newlines are silently discarded and reinserted;
1006 crude but effective.
1007 """
1009 def __init__(self, pattern):
1010 # obey case if pattern isn't lowecase or requests
1011 # NOIGNORECASE
1012 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1014 if mat:
1015 pattern = mat.group(1)
1016 self._pattern = re.compile(fnmatch.translate(pattern))
1017 else:
1018 if pattern != pattern.lower():
1019 print(f"Matching {pattern!r} without regard to case "
1020 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1021 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1023 def write(self, showStr):
1024 showStr = showStr.rstrip()
1025 # Strip off doc string line(s) and cut off
1026 # at "=" for string matching
1027 matchStr = showStr.split("\n")[-1].split("=")[0]
1028 if self._pattern.search(matchStr):
1029 print("\n" + showStr)
1031 fd = FilteredStream(pattern)
1032 else:
1033 fd = sys.stdout
1035 config.saveToStream(fd, "config")
1036 elif showCommand == "history":
1037 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1038 globPattern = matHistory.group(1)
1039 if not globPattern:
1040 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1041 sys.exit(1)
1043 error = False
1044 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1045 if i > 0:
1046 print("")
1048 pattern = pattern.split(".")
1049 cpath, cname = pattern[:-1], pattern[-1]
1050 hconfig = config # the config that we're interested in
1051 for i, cpt in enumerate(cpath):
1052 try:
1053 hconfig = getattr(hconfig, cpt)
1054 except AttributeError:
1055 config_path = ".".join(["config"] + cpath[:i])
1056 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1057 error = True
1059 try:
1060 print(pexConfig.history.format(hconfig, cname))
1061 except KeyError:
1062 config_path = ".".join(["config"] + cpath)
1063 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1064 error = True
1066 if error:
1067 sys.exit(1)
1069 elif showCommand == "data":
1070 pass
1071 elif showCommand == "run":
1072 pass
1073 elif showCommand == "tasks":
1074 showTaskHierarchy(config)
1075 else:
1076 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1077 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1078 sys.exit(1)
1080 if exit and "run" not in showOpts:
1081 sys.exit(0)
1084def showTaskHierarchy(config):
1085 """Print task hierarchy to stdout.
1087 Parameters
1088 ----------
1089 config : `lsst.pex.config.Config`
1090 Configuration to process.
1091 """
1092 print("Subtasks:")
1093 taskDict = getTaskDict(config=config)
1095 fieldNameList = sorted(taskDict.keys())
1096 for fieldName in fieldNameList:
1097 taskName = taskDict[fieldName]
1098 print(f"{fieldName}: {taskName}")
1101class ConfigValueAction(argparse.Action):
1102 """argparse action callback to override config parameters using
1103 name=value pairs from the command-line.
1104 """
1106 def __call__(self, parser, namespace, values, option_string):
1107 """Override one or more config name value pairs.
1109 Parameters
1110 ----------
1111 parser : `argparse.ArgumentParser`
1112 Argument parser.
1113 namespace : `argparse.Namespace`
1114 Parsed command. The ``namespace.config`` attribute is updated.
1115 values : `list`
1116 A list of ``configItemName=value`` pairs.
1117 option_string : `str`
1118 Option value specified by the user.
1119 """
1120 if namespace.config is None:
1121 return
1122 for nameValue in values:
1123 name, sep, valueStr = nameValue.partition("=")
1124 if not valueStr:
1125 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1127 # see if setting the string value works; if not, try eval
1128 try:
1129 setDottedAttr(namespace.config, name, valueStr)
1130 except AttributeError:
1131 parser.error(f"no config field: {name}")
1132 except Exception:
1133 try:
1134 value = eval(valueStr, {})
1135 except Exception:
1136 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1137 try:
1138 setDottedAttr(namespace.config, name, value)
1139 except Exception as e:
1140 parser.error(f"cannot set config.{name}={value!r}: {e}")
1143class ConfigFileAction(argparse.Action):
1144 """argparse action to load config overrides from one or more files.
1145 """
1147 def __call__(self, parser, namespace, values, option_string=None):
1148 """Load one or more files of config overrides.
1150 Parameters
1151 ----------
1152 parser : `argparse.ArgumentParser`
1153 Argument parser.
1154 namespace : `argparse.Namespace`
1155 Parsed command. The following attributes are updated by this
1156 method: ``namespace.config``.
1157 values : `list`
1158 A list of data config file paths.
1159 option_string : `str`, optional
1160 Option value specified by the user.
1161 """
1162 if namespace.config is None:
1163 return
1164 for configfile in values:
1165 try:
1166 namespace.config.load(configfile)
1167 except Exception as e:
1168 parser.error(f"cannot load config file {configfile!r}: {e}")
1171class IdValueAction(argparse.Action):
1172 """argparse action callback to process a data ID into a dict.
1173 """
1175 def __call__(self, parser, namespace, values, option_string):
1176 """Parse ``--id`` data and append results to
1177 ``namespace.<argument>.idList``.
1179 Parameters
1180 ----------
1181 parser : `ArgumentParser`
1182 Argument parser.
1183 namespace : `argparse.Namespace`
1184 Parsed command (an instance of argparse.Namespace).
1185 The following attributes are updated:
1187 - ``<idName>.idList``, where ``<idName>`` is the name of the
1188 ID argument, for instance ``"id"`` for ID argument ``--id``.
1189 values : `list`
1190 A list of data IDs; see Notes below.
1191 option_string : `str`
1192 Option value specified by the user.
1194 Notes
1195 -----
1196 The data format is::
1198 key1=value1_1[^value1_2[^value1_3...]
1199 key2=value2_1[^value2_2[^value2_3...]...
1201 The values (e.g. ``value1_1``) may either be a string,
1202 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1203 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1204 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1205 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1207 The cross product is computed for keys with multiple values.
1208 For example::
1210 --id visit 1^2 ccd 1,1^2,2
1212 results in the following data ID dicts being appended to
1213 ``namespace.<argument>.idList``:
1215 {"visit":1, "ccd":"1,1"}
1216 {"visit":2, "ccd":"1,1"}
1217 {"visit":1, "ccd":"2,2"}
1218 {"visit":2, "ccd":"2,2"}
1219 """
1220 if namespace.config is None:
1221 return
1222 idDict = collections.OrderedDict()
1223 for nameValue in values:
1224 name, sep, valueStr = nameValue.partition("=")
1225 if name in idDict:
1226 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1227 idDict[name] = []
1228 for v in valueStr.split("^"):
1229 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1230 if mat:
1231 v1 = int(mat.group(1))
1232 v2 = int(mat.group(2))
1233 v3 = mat.group(3)
1234 v3 = int(v3) if v3 else 1
1235 for v in range(v1, v2 + 1, v3):
1236 idDict[name].append(str(v))
1237 else:
1238 idDict[name].append(v)
1240 iterList = [idDict[key] for key in idDict.keys()]
1241 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1242 for valList in itertools.product(*iterList)]
1244 argName = option_string.lstrip("-")
1245 ident = getattr(namespace, argName)
1246 ident.idList += idDictList
1249class LongLogAction(argparse.Action):
1250 """argparse action to make logs verbose.
1252 An action so that it can take effect before log level options.
1253 """
1255 def __call__(self, parser, namespace, values, option_string):
1256 """Set long log.
1258 Parameters
1259 ----------
1260 parser : `ArgumentParser`
1261 Argument parser.
1262 namespace : `argparse.Namespace`
1263 Parsed command. This argument is not used.
1264 values : `list`
1265 Unused.
1266 option_string : `str`
1267 Option value specified by the user (unused).
1268 """
1269 lsstLog.configure_prop("""
1270log4j.rootLogger=INFO, A1
1271log4j.appender.A1=ConsoleAppender
1272log4j.appender.A1.Target=System.out
1273log4j.appender.A1.layout=PatternLayout
1274log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
1275""")
1278class LogLevelAction(argparse.Action):
1279 """argparse action to set log level.
1280 """
1282 def __call__(self, parser, namespace, values, option_string):
1283 """Set trace level.
1285 Parameters
1286 ----------
1287 parser : `ArgumentParser`
1288 Argument parser.
1289 namespace : `argparse.Namespace`
1290 Parsed command. This argument is not used.
1291 values : `list`
1292 List of trace levels; each item must be of the form
1293 ``component_name=level`` or ``level``, where ``level``
1294 is a keyword (not case sensitive) or an integer.
1295 option_string : `str`
1296 Option value specified by the user.
1297 """
1298 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1299 permittedLevelSet = set(permittedLevelList)
1300 for componentLevel in values:
1301 component, sep, levelStr = componentLevel.partition("=")
1302 if not levelStr:
1303 levelStr, component = component, None
1304 logLevelUpr = levelStr.upper()
1306 if component is None:
1307 logger = namespace.log
1308 else:
1309 logger = getTaskLogger(component)
1311 if logLevelUpr in permittedLevelSet:
1312 logLevel = getattr(logger, logLevelUpr)
1313 else:
1314 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1316 logger.setLevel(logLevel)
1318 # Set logging level for whatever logger this wasn't.
1319 if isinstance(logger, lsstLog.Log):
1320 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1321 logging.getLogger(component or None).setLevel(pyLevel)
1322 else:
1323 # Need to set lsstLog level
1324 lsstLogLevel = lsstLog.LevelTranslator.logging2lsstLog(logLevel)
1325 lsstLog.getLogger(component or "").setLevel(lsstLogLevel)
1328class ReuseAction(argparse.Action):
1329 """argparse action associated with ArgumentPraser.addReuseOption."""
1331 def __call__(self, parser, namespace, value, option_string):
1332 if value == "all":
1333 value = self.choices[-2]
1334 index = self.choices.index(value)
1335 namespace.reuse = self.choices[:index + 1]
1338def setDottedAttr(item, name, value):
1339 """Set an instance attribute (like `setattr` but accepting
1340 hierarchical names such as ``foo.bar.baz``).
1342 Parameters
1343 ----------
1344 item : obj
1345 Object whose attribute is to be set.
1346 name : `str`
1347 Name of attribute to set.
1348 value : obj
1349 New value for the attribute.
1351 Notes
1352 -----
1353 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1354 is set to the specified value.
1355 """
1356 subitem = item
1357 subnameList = name.split(".")
1358 for subname in subnameList[:-1]:
1359 subitem = getattr(subitem, subname)
1360 setattr(subitem, subnameList[-1], value)
1363def getDottedAttr(item, name):
1364 """Get an attribute (like `getattr` but accepts hierarchical names
1365 such as ``foo.bar.baz``).
1367 Parameters
1368 ----------
1369 item : obj
1370 Object whose attribute is to be returned.
1371 name : `str`
1372 Name of the attribute to get.
1374 Returns
1375 -------
1376 itemAttr : obj
1377 If name is ``foo.bar.baz then the return value is
1378 ``item.foo.bar.baz``.
1379 """
1380 subitem = item
1381 for subname in name.split("."):
1382 subitem = getattr(subitem, subname)
1383 return subitem