Coverage for python/lsst/pipe/base/argumentParser.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.pex.config as pexConfig
40import lsst.pex.config.history
41import lsst.log as lsstLog
42import lsst.daf.persistence as dafPersist
44DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
45DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
46DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
49def _fixPath(defName, path):
50 """Apply environment variable as default root, if present, and abspath.
52 Parameters
53 ----------
54 defName : `str`
55 Name of environment variable containing default root path;
56 if the environment variable does not exist
57 then the path is relative to the current working directory
58 path : `str`
59 Path relative to default root path.
61 Returns
62 -------
63 abspath : `str`
64 Path that has been expanded, or `None` if the environment variable
65 does not exist and path is `None`.
66 """
67 defRoot = os.environ.get(defName)
68 if defRoot is None:
69 if path is None:
70 return None
71 return os.path.abspath(path)
72 return os.path.abspath(os.path.join(defRoot, path or ""))
75class DataIdContainer:
76 """Container for data IDs and associated data references.
78 Parameters
79 ----------
80 level : `str`
81 The lowest hierarchy level to descend to for this dataset type,
82 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
83 Use `""` to use the mapper's default for the dataset type.
84 This class does not support `None`, but if it did, `None`
85 would mean the level should not be restricted.
87 Notes
88 -----
89 Override this class for data IDs that require special handling to be
90 converted to ``data references``, and specify the override class
91 as ``ContainerClass`` for ``add_id_argument``.
93 If you don't want the argument parser to compute data references,
94 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
95 """
97 def __init__(self, level=None):
98 self.datasetType = None
99 """Dataset type of the data references (`str`).
100 """
101 self.level = level
102 """See parameter ``level`` (`str`).
103 """
104 self.idList = []
105 """List of data IDs specified on the command line for the
106 appropriate data ID argument (`list` of `dict`).
107 """
108 self.refList = []
109 """List of data references for the data IDs in ``idList``
110 (`list` of `lsst.daf.persistence.ButlerDataRef`).
111 Elements will be omitted if the corresponding data is not found.
112 The list will be empty when returned by ``parse_args`` if
113 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
114 """
116 def setDatasetType(self, datasetType):
117 """Set actual dataset type, once it is known.
119 Parameters
120 ----------
121 datasetType : `str`
122 Dataset type.
124 Notes
125 -----
126 The reason ``datasetType`` is not a constructor argument is that
127 some subclasses do not know the dataset type until the command
128 is parsed. Thus, to reduce special cases in the code,
129 ``datasetType`` is always set after the command is parsed.
130 """
131 self.datasetType = datasetType
133 def castDataIds(self, butler):
134 """Validate data IDs and cast them to the correct type
135 (modify idList in place).
137 This code casts the values in the data IDs dicts in `dataIdList`
138 to the type required by the butler. Data IDs are read from the
139 command line as `str`, but the butler requires some values to be
140 other types. For example "visit" values should be `int`.
142 Parameters
143 ----------
144 butler : `lsst.daf.persistence.Butler`
145 Data butler.
146 """
147 if self.datasetType is None:
148 raise RuntimeError("Must call setDatasetType first")
149 try:
150 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
151 except KeyError as e:
152 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
153 raise KeyError(msg) from e
155 for dataDict in self.idList:
156 for key, strVal in dataDict.items():
157 try:
158 keyType = idKeyTypeDict[key]
159 except KeyError:
160 # OK, assume that it's a valid key and guess that it's a
161 # string
162 keyType = str
164 log = lsstLog.Log.getDefaultLogger()
165 log.warn("Unexpected ID %s; guessing type is \"%s\"",
166 key, 'str' if keyType == str else keyType)
167 idKeyTypeDict[key] = keyType
169 if keyType != str:
170 try:
171 castVal = keyType(strVal)
172 except Exception:
173 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
174 dataDict[key] = castVal
176 def makeDataRefList(self, namespace):
177 """Compute refList based on idList.
179 Parameters
180 ----------
181 namespace : `argparse.Namespace`
182 Results of parsing command-line. The ``butler`` and ``log``
183 elements must be set.
185 Notes
186 -----
187 Not called if ``add_id_argument`` was called with
188 ``doMakeDataRefList=False``.
189 """
190 if self.datasetType is None:
191 raise RuntimeError("Must call setDatasetType first")
192 butler = namespace.butler
193 for dataId in self.idList:
194 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
195 level=self.level, dataId=dataId)
196 if not refList:
197 namespace.log.warn("No data found for dataId=%s", dataId)
198 continue
199 self.refList += refList
202class DataIdArgument:
203 """data ID argument, used by `ArgumentParser.add_id_argument`.
205 Parameters
206 ----------
207 name : `str`
208 Name of identifier (argument name without dashes).
209 datasetType : `str`
210 Type of dataset; specify a string for a fixed dataset type
211 or a `DatasetArgument` for a dynamic dataset type (e.g.
212 one specified by a command-line argument).
213 level : `str`
214 The lowest hierarchy level to descend to for this dataset type,
215 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
216 Use `""` to use the mapper's default for the dataset type.
217 Some container classes may also support `None`, which means
218 the level should not be restricted; however the default class,
219 `DataIdContainer`, does not support `None`.
220 doMakeDataRefList : `bool`, optional
221 If `True` (default), construct data references.
222 ContainerClass : `class`, optional
223 Class to contain data IDs and data references; the default class
224 `DataIdContainer` will work for many, but not all, cases.
225 For example if the dataset type is specified on the command line
226 then use `DynamicDatasetType`.
227 """
229 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
230 if name.startswith("-"):
231 raise RuntimeError(f"Name {name} must not start with -")
232 self.name = name
233 self.datasetType = datasetType
234 self.level = level
235 self.doMakeDataRefList = bool(doMakeDataRefList)
236 self.ContainerClass = ContainerClass
237 self.argName = name.lstrip("-")
239 @property
240 def isDynamicDatasetType(self):
241 """`True` if the dataset type is dynamic (that is, specified
242 on the command line).
243 """
244 return isinstance(self.datasetType, DynamicDatasetType)
246 def getDatasetType(self, namespace):
247 """Get the dataset type as a string.
249 Parameters
250 ----------
251 namespace
252 Parsed command.
254 Returns
255 -------
256 datasetType : `str`
257 Dataset type.
258 """
259 if self.isDynamicDatasetType:
260 return self.datasetType.getDatasetType(namespace)
261 else:
262 return self.datasetType
265class DynamicDatasetType(metaclass=abc.ABCMeta):
266 """Abstract base class for a dataset type determined from parsed
267 command-line arguments.
268 """
270 def addArgument(self, parser, idName):
271 """Add a command-line argument to specify dataset type name,
272 if wanted.
274 Parameters
275 ----------
276 parser : `ArgumentParser`
277 Argument parser to add the argument to.
278 idName : `str`
279 Name of data ID argument, without the leading ``"--"``,
280 e.g. ``"id"``.
282 Notes
283 -----
284 The default implementation does nothing
285 """
286 pass
288 @abc.abstractmethod
289 def getDatasetType(self, namespace):
290 """Get the dataset type as a string, based on parsed command-line
291 arguments.
293 Returns
294 -------
295 datasetType : `str`
296 Dataset type.
297 """
298 raise NotImplementedError("Subclasses must override")
301class DatasetArgument(DynamicDatasetType):
302 """Dataset type specified by a command-line argument.
304 Parameters
305 ----------
306 name : `str`, optional
307 Name of command-line argument (including leading "--",
308 if appropriate) whose value is the dataset type.
309 If `None`, uses ``--idName_dstype`` where idName
310 is the name of the data ID argument (e.g. "id").
311 help : `str`, optional
312 Help string for the command-line argument.
313 default : `object`, optional
314 Default value. If `None`, then the command-line option is required.
315 This argument isignored if the command-line argument is positional
316 (name does not start with "-") because positional arguments do
317 not support default values.
318 """
320 def __init__(self,
321 name=None,
322 help="dataset type to process from input data repository",
323 default=None,
324 ):
325 DynamicDatasetType.__init__(self)
326 self.name = name
327 self.help = help
328 self.default = default
330 def getDatasetType(self, namespace):
331 """Get the dataset type as a string, from the appropriate
332 command-line argument.
334 Parameters
335 ----------
336 namespace :
337 Parsed command.
339 Returns
340 -------
341 datasetType : `str`
342 Dataset type.
343 """
344 argName = self.name.lstrip("-")
345 return getattr(namespace, argName)
347 def addArgument(self, parser, idName):
348 """Add a command-line argument to specify the dataset type name.
350 Parameters
351 ----------
352 parser : `ArgumentParser`
353 Argument parser.
354 idName : `str`
355 Data ID.
357 Notes
358 -----
359 Also sets the `name` attribute if it is currently `None`.
360 """
361 help = self.help if self.help else f"dataset type for {idName}"
362 if self.name is None:
363 self.name = f"--{idName}_dstype"
364 requiredDict = dict()
365 if self.name.startswith("-"):
366 requiredDict = dict(required=self.default is None)
367 parser.add_argument(
368 self.name,
369 default=self.default,
370 help=help,
371 **requiredDict)
374class ConfigDatasetType(DynamicDatasetType):
375 """Dataset type specified by a config parameter.
377 Parameters
378 ----------
379 name : `str`
380 Name of config option whose value is the dataset type.
381 """
383 def __init__(self, name):
384 DynamicDatasetType.__init__(self)
385 self.name = name
387 def getDatasetType(self, namespace):
388 """Return the dataset type as a string, from the appropriate
389 config field.
391 Parameters
392 ----------
393 namespace : `argparse.Namespace`
394 Parsed command.
395 """
396 # getattr does not work reliably if the config field name is
397 # dotted, so step through one level at a time
398 keyList = self.name.split(".")
399 value = namespace.config
400 for key in keyList:
401 try:
402 value = getattr(value, key)
403 except KeyError:
404 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
405 return value
408class ArgumentParser(argparse.ArgumentParser):
409 """Argument parser for command-line tasks that is based on
410 `argparse.ArgumentParser`.
412 Parameters
413 ----------
414 name : `str`
415 Name of top-level task; used to identify camera-specific override
416 files.
417 usage : `str`, optional
418 Command-line usage signature.
419 **kwargs
420 Additional keyword arguments for `argparse.ArgumentParser`.
422 Notes
423 -----
424 Users may wish to add additional arguments before calling `parse_args`.
425 """
426 # I would prefer to check data ID keys and values as they are parsed,
427 # but the required information comes from the butler, so I have to
428 # construct a butler before I do this checking. Constructing a butler
429 # is slow, so I only want do it once, after parsing the command line,
430 # so as to catch syntax errors quickly.
432 requireOutput = True
433 """Require an output directory to be specified (`bool`)."""
435 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
436 self._name = name
437 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
438 argparse.ArgumentParser.__init__(self,
439 usage=usage,
440 fromfile_prefix_chars='@',
441 epilog=textwrap.dedent("""Notes:
442 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
443 all values are used, in order left to right
444 * @file reads command-line options from the specified file:
445 * data may be distributed among multiple lines (e.g. one option per line)
446 * data after # is treated as a comment and ignored
447 * blank lines and lines starting with # are ignored
448 * To specify multiple values for an option, do not use = after the option name:
449 * right: --config-file foo bar
450 * wrong: --config-file=foo bar
451 """),
452 formatter_class=argparse.RawDescriptionHelpFormatter,
453 **kwargs)
454 self.add_argument(metavar='input', dest="rawInput",
455 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
456 self.add_argument("--calib", dest="rawCalib",
457 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
458 self.add_argument("--output", dest="rawOutput",
459 help="path to output data repository (need not exist), "
460 f"relative to ${DEFAULT_OUTPUT_NAME}")
461 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
462 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
463 "optionally sets ROOT to ROOT/rerun/INPUT")
464 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
465 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
466 self.add_argument("-C", "--config-file", "--configfile",
467 dest="configfile", nargs="*", action=ConfigFileAction,
468 help="config override file(s)")
469 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
470 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
471 metavar="LEVEL|COMPONENT=LEVEL")
472 self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
473 self.add_argument("--debug", action="store_true", help="enable debugging output?")
474 self.add_argument("--doraise", action="store_true",
475 help="raise an exception on error (else log a message and continue)?")
476 self.add_argument("--noExit", action="store_true",
477 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
478 self.add_argument("--profile", help="Dump cProfile statistics to filename")
479 self.add_argument("--show", nargs="+", default=(),
480 help="display the specified information to stdout and quit "
481 "(unless run is specified); information is "
482 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
483 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
484 self.add_argument("-t", "--timeout", type=float,
485 help="Timeout for multiprocessing; maximum wall time (sec)")
486 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
487 help=("remove and re-create the output directory if it already exists "
488 "(safe with -j, but not all other forms of parallel execution)"))
489 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
490 help=("backup and then overwrite existing config files instead of checking them "
491 "(safe with -j, but not all other forms of parallel execution)"))
492 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
493 help="Don't copy config to file~N backup.")
494 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
495 help=("backup and then overwrite existing package versions instead of checking"
496 "them (safe with -j, but not all other forms of parallel execution)"))
497 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
498 help="don't check package versions; useful for development")
499 lsstLog.configure_prop("""
500log4j.rootLogger=INFO, A1
501log4j.appender.A1=ConsoleAppender
502log4j.appender.A1.Target=System.out
503log4j.appender.A1.layout=PatternLayout
504log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
505""")
507 # Forward all Python logging to lsst.log
508 lgr = logging.getLogger()
509 lgr.setLevel(logging.INFO) # same as in log4cxx config above
510 lgr.addHandler(lsstLog.LogHandler())
512 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
513 ContainerClass=DataIdContainer):
514 """Add a data ID argument.
517 Parameters
518 ----------
519 name : `str`
520 Data ID argument (including leading dashes, if wanted).
521 datasetType : `str` or `DynamicDatasetType`-type
522 Type of dataset. Supply a string for a fixed dataset type.
523 For a dynamically determined dataset type, supply
524 a `DynamicDatasetType`, such a `DatasetArgument`.
525 help : `str`
526 Help string for the argument.
527 level : `str`
528 The lowest hierarchy level to descend to for this dataset type,
529 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
530 Use `""` to use the mapper's default for the dataset type.
531 Some container classes may also support `None`, which means
532 the level should not be restricted; however the default class,
533 `DataIdContainer`, does not support `None`.
534 doMakeDataRefList : bool, optional
535 If `True` (default), construct data references.
536 ContainerClass : `class`, optional
537 Class to contain data IDs and data references; the default class
538 `DataIdContainer` will work for many, but not all, cases.
539 For example if the dataset type is specified on the command line
540 then use `DynamicDatasetType`.
542 Notes
543 -----
544 If ``datasetType`` is an instance of `DatasetArgument`,
545 then add a second argument to specify the dataset type.
547 The associated data is put into ``namespace.<dataIdArgument.name>``
548 as an instance of `ContainerClass`; the container includes fields:
550 - ``idList``: a list of data ID dicts.
551 - ``refList``: a list of `~lsst.daf.persistence.Butler`
552 data references (empty if ``doMakeDataRefList`` is `False`).
553 """
554 argName = name.lstrip("-")
556 if argName in self._dataIdArgDict:
557 raise RuntimeError(f"Data ID argument {name} already exists")
558 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
559 raise RuntimeError(f"Data ID argument {name} is a reserved name")
561 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
562 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
564 dataIdArgument = DataIdArgument(
565 name=argName,
566 datasetType=datasetType,
567 level=level,
568 doMakeDataRefList=doMakeDataRefList,
569 ContainerClass=ContainerClass,
570 )
572 if dataIdArgument.isDynamicDatasetType:
573 datasetType.addArgument(parser=self, idName=argName)
575 self._dataIdArgDict[argName] = dataIdArgument
577 def parse_args(self, config, args=None, log=None, override=None):
578 """Parse arguments for a command-line task.
580 Parameters
581 ----------
582 config : `lsst.pex.config.Config`
583 Config for the task being run.
584 args : `list`, optional
585 Argument list; if `None` then ``sys.argv[1:]`` is used.
586 log : `lsst.log.Log`, optional
587 `~lsst.log.Log` instance; if `None` use the default log.
588 override : callable, optional
589 A config override function. It must take the root config object
590 as its only argument and must modify the config in place.
591 This function is called after camera-specific overrides files
592 are applied, and before command-line config overrides
593 are applied (thus allowing the user the final word).
595 Returns
596 -------
597 namespace : `argparse.Namespace`
598 A `~argparse.Namespace` instance containing fields:
600 - ``camera``: camera name.
601 - ``config``: the supplied config with all overrides applied,
602 validated and frozen.
603 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
604 - An entry for each of the data ID arguments registered by
605 `add_id_argument`, of the type passed to its ``ContainerClass``
606 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
607 includes public elements ``idList`` and ``refList``.
608 - ``log``: a `lsst.log` Log.
609 - An entry for each command-line argument,
610 with the following exceptions:
612 - config is the supplied config, suitably updated.
613 - configfile, id and loglevel are all missing.
614 - ``obsPkg``: name of the ``obs_`` package for this camera.
615 """
616 if args is None:
617 args = sys.argv[1:]
619 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
620 self.print_help()
621 if len(args) == 1 and args[0] in ("-h", "--help"):
622 self.exit()
623 else:
624 self.exit(f"{self.prog}: error: Must specify input as first argument")
626 # Note that --rerun may change namespace.input, but if it does
627 # we verify that the new input has the same mapper class.
628 namespace = argparse.Namespace()
629 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
630 if not os.path.isdir(namespace.input):
631 self.error(f"Error: input={namespace.input!r} not found")
633 namespace.config = config
634 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
635 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
636 if mapperClass is None:
637 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
639 namespace.camera = mapperClass.getCameraName()
640 namespace.obsPkg = mapperClass.getPackageName()
642 self.handleCamera(namespace)
644 self._applyInitialOverrides(namespace)
645 if override is not None:
646 override(namespace.config)
648 # Add data ID containers to namespace
649 for dataIdArgument in self._dataIdArgDict.values():
650 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
652 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
653 del namespace.configfile
655 self._parseDirectories(namespace)
657 if namespace.clobberOutput:
658 if namespace.output is None:
659 self.error("--clobber-output is only valid with --output or --rerun")
660 elif namespace.output == namespace.input:
661 self.error("--clobber-output is not valid when the output and input repos are the same")
662 if os.path.exists(namespace.output):
663 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
664 shutil.rmtree(namespace.output)
666 namespace.log.debug("input=%s", namespace.input)
667 namespace.log.debug("calib=%s", namespace.calib)
668 namespace.log.debug("output=%s", namespace.output)
670 obeyShowArgument(namespace.show, namespace.config, exit=False)
672 # No environment variable or --output or --rerun specified.
673 if self.requireOutput and namespace.output is None and namespace.rerun is None:
674 self.error("no output directory specified.\n"
675 "An output directory must be specified with the --output or --rerun\n"
676 "command-line arguments.\n")
678 butlerArgs = {} # common arguments for butler elements
679 if namespace.calib:
680 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
681 if namespace.output:
682 outputs = {'root': namespace.output, 'mode': 'rw'}
683 inputs = {'root': namespace.input}
684 inputs.update(butlerArgs)
685 outputs.update(butlerArgs)
686 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
687 else:
688 outputs = {'root': namespace.input, 'mode': 'rw'}
689 outputs.update(butlerArgs)
690 namespace.butler = dafPersist.Butler(outputs=outputs)
692 # convert data in each of the identifier lists to proper types
693 # this is done after constructing the butler,
694 # hence after parsing the command line,
695 # because it takes a long time to construct a butler
696 self._processDataIds(namespace)
697 if "data" in namespace.show:
698 for dataIdName in self._dataIdArgDict.keys():
699 for dataRef in getattr(namespace, dataIdName).refList:
700 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
702 if namespace.show and "run" not in namespace.show:
703 sys.exit(0)
705 if namespace.debug:
706 try:
707 import debug
708 assert debug # silence pyflakes
709 except ImportError:
710 print("Warning: no 'debug' module found", file=sys.stderr)
711 namespace.debug = False
713 del namespace.loglevel
715 if namespace.longlog:
716 lsstLog.configure_prop("""
717log4j.rootLogger=INFO, A1
718log4j.appender.A1=ConsoleAppender
719log4j.appender.A1.Target=System.out
720log4j.appender.A1.layout=PatternLayout
721log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
722""")
723 del namespace.longlog
725 namespace.config.validate()
726 namespace.config.freeze()
728 return namespace
730 def _parseDirectories(self, namespace):
731 """Parse input, output and calib directories
733 This allows for hacking the directories, e.g., to include a
734 "rerun".
735 Modifications are made to the 'namespace' object in-place.
736 """
737 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
738 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
740 # If an output directory is specified, process it and assign it to the
741 # namespace
742 if namespace.rawOutput:
743 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
744 else:
745 namespace.output = None
747 # This section processes the rerun argument.
748 # If rerun is specified as a colon separated value,
749 # it will be parsed as an input and output.
750 # The input value will be overridden if previously specified
751 # (but a check is made to make sure both inputs use
752 # the same mapper)
753 if namespace.rawRerun:
754 if namespace.output:
755 self.error("Error: cannot specify both --output and --rerun")
756 namespace.rerun = namespace.rawRerun.split(":")
757 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
758 modifiedInput = False
759 if len(rerunDir) == 2:
760 namespace.input, namespace.output = rerunDir
761 modifiedInput = True
762 elif len(rerunDir) == 1:
763 namespace.output = rerunDir[0]
764 if os.path.exists(os.path.join(namespace.output, "_parent")):
765 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
766 modifiedInput = True
767 else:
768 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
769 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
770 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
771 else:
772 namespace.rerun = None
773 del namespace.rawInput
774 del namespace.rawCalib
775 del namespace.rawOutput
776 del namespace.rawRerun
778 def _processDataIds(self, namespace):
779 """Process the parsed data for each data ID argument in an
780 `~argparse.Namespace`.
782 Processing includes:
784 - Validate data ID keys.
785 - Cast the data ID values to the correct type.
786 - Compute data references from data IDs.
788 Parameters
789 ----------
790 namespace : `argparse.Namespace`
791 Parsed namespace. These attributes are read:
793 - ``butler``
794 - ``log``
795 - ``config``, if any dynamic dataset types are set by
796 a config parameter.
797 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
798 dataset types are specified by such
800 These attributes are modified:
802 - ``<name>`` for each data ID argument registered using
803 `add_id_argument` with name ``<name>``.
804 """
805 for dataIdArgument in self._dataIdArgDict.values():
806 dataIdContainer = getattr(namespace, dataIdArgument.name)
807 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
808 if dataIdArgument.doMakeDataRefList:
809 try:
810 dataIdContainer.castDataIds(butler=namespace.butler)
811 except (KeyError, TypeError) as e:
812 # failure of castDataIds indicates invalid command args
813 self.error(e)
815 # failure of makeDataRefList indicates a bug
816 # that wants a traceback
817 dataIdContainer.makeDataRefList(namespace)
819 def _applyInitialOverrides(self, namespace):
820 """Apply obs-package-specific and camera-specific config
821 override files, if found
823 Parameters
824 ----------
825 namespace : `argparse.Namespace`
826 Parsed namespace. These attributes are read:
828 - ``obsPkg``
830 Look in the package namespace.obsPkg for files:
832 - ``config/<task_name>.py``
833 - ``config/<camera_name>/<task_name>.py`` and load if found.
834 """
835 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
836 fileName = self._name + ".py"
837 for filePath in (
838 os.path.join(obsPkgDir, "config", fileName),
839 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
840 ):
841 if os.path.exists(filePath):
842 namespace.log.info("Loading config overrride file %r", filePath)
843 namespace.config.load(filePath)
844 else:
845 namespace.log.debug("Config override file does not exist: %r", filePath)
847 def handleCamera(self, namespace):
848 """Perform camera-specific operations before parsing the command-line.
850 Parameters
851 ----------
852 namespace : `argparse.Namespace`
853 Namespace (an ) with the following fields:
855 - ``camera``: the camera name.
856 - ``config``: the config passed to parse_args, with no overrides
857 applied.
858 - ``obsPkg``: the ``obs_`` package for this camera.
859 - ``log``: a `lsst.log` Log.
861 Notes
862 -----
863 The default implementation does nothing.
864 """
865 pass
867 def convert_arg_line_to_args(self, arg_line):
868 """Allow files of arguments referenced by ``@<path>`` to contain
869 multiple values on each line.
871 Parameters
872 ----------
873 arg_line : `str`
874 Line of text read from an argument file.
875 """
876 arg_line = arg_line.strip()
877 if not arg_line or arg_line.startswith("#"):
878 return
879 for arg in shlex.split(arg_line, comments=True, posix=True):
880 if not arg.strip():
881 continue
882 yield arg
884 def addReuseOption(self, choices):
885 """Add a "--reuse-outputs-from SUBTASK" option to the argument
886 parser.
888 CmdLineTasks that can be restarted at an intermediate step using
889 outputs from earlier (but still internal) steps should use this
890 method to allow the user to control whether that happens when
891 outputs from earlier steps are present.
893 Parameters
894 ----------
895 choices : sequence
896 A sequence of string names (by convention, top-level subtasks)
897 that identify the steps that could be skipped when their
898 outputs are already present. The list is ordered, so when the
899 user specifies one step on the command line, all previous steps
900 may be skipped as well. In addition to the choices provided,
901 users may pass "all" to indicate that all steps may be thus
902 skipped.
904 When this method is called, the ``namespace`` object returned by
905 ``parse_args`` will contain a ``reuse`` attribute containing
906 a list of all steps that should be skipped if their outputs
907 are already present.
908 If no steps should be skipped, the ``reuse`` will be an empty list.
909 """
910 choices = list(choices)
911 choices.append("all")
912 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
913 default=[], action=ReuseAction,
914 help=("Skip the given subtask and its predecessors and reuse their outputs "
915 "if those outputs already exist. Use 'all' to specify all subtasks."))
918class InputOnlyArgumentParser(ArgumentParser):
919 """`ArgumentParser` for command-line tasks that don't write any output.
920 """
922 requireOutput = False # We're not going to write anything
925def getTaskDict(config, taskDict=None, baseName=""):
926 """Get a dictionary of task info for all subtasks in a config
928 Parameters
929 ----------
930 config : `lsst.pex.config.Config`
931 Configuration to process.
932 taskDict : `dict`, optional
933 Users should not specify this argument. Supports recursion.
934 If provided, taskDict is updated in place, else a new `dict`
935 is started.
936 baseName : `str`, optional
937 Users should not specify this argument. It is only used for
938 recursion: if a non-empty string then a period is appended
939 and the result is used as a prefix for additional entries
940 in taskDict; otherwise no prefix is used.
942 Returns
943 -------
944 taskDict : `dict`
945 Keys are config field names, values are task names.
947 Notes
948 -----
949 This function is designed to be called recursively.
950 The user should call with only a config (leaving taskDict and baseName
951 at their default values).
952 """
953 if taskDict is None:
954 taskDict = dict()
955 for fieldName, field in config.items():
956 if hasattr(field, "value") and hasattr(field, "target"):
957 subConfig = field.value
958 if isinstance(subConfig, pexConfig.Config):
959 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
960 try:
961 taskName = f"{field.target.__module__}.{field.target.__name__}"
962 except Exception:
963 taskName = repr(field.target)
964 taskDict[subBaseName] = taskName
965 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
966 return taskDict
969def obeyShowArgument(showOpts, config=None, exit=False):
970 """Process arguments specified with ``--show`` (but ignores
971 ``"data"``).
973 Parameters
974 ----------
975 showOpts : `list` of `str`
976 List of options passed to ``--show``.
977 config : optional
978 The provided config.
979 exit : bool, optional
980 Exit if ``"run"`` isn't included in ``showOpts``.
982 Parameters
983 ----------
984 Supports the following options in showOpts:
986 - ``config[=PAT]``. Dump all the config entries, or just the ones that
987 match the glob pattern.
988 - ``history=PAT``. Show where the config entries that match the glob
989 pattern were set.
990 - ``tasks``. Show task hierarchy.
991 - ``data``. Ignored; to be processed by caller.
992 - ``run``. Keep going (the default behaviour is to exit if
993 ``--show`` is specified).
995 Calls ``sys.exit(1)`` if any other option found.
996 """
997 if not showOpts:
998 return
1000 for what in showOpts:
1001 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
1003 if showCommand == "config":
1004 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1005 pattern = matConfig.group(1)
1006 if pattern:
1007 class FilteredStream:
1008 """A file object that only prints lines
1009 that match the glob "pattern".
1011 N.b. Newlines are silently discarded and reinserted;
1012 crude but effective.
1013 """
1015 def __init__(self, pattern):
1016 # obey case if pattern isn't lowecase or requests
1017 # NOIGNORECASE
1018 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1020 if mat:
1021 pattern = mat.group(1)
1022 self._pattern = re.compile(fnmatch.translate(pattern))
1023 else:
1024 if pattern != pattern.lower():
1025 print(f"Matching {pattern!r} without regard to case "
1026 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1027 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1029 def write(self, showStr):
1030 showStr = showStr.rstrip()
1031 # Strip off doc string line(s) and cut off
1032 # at "=" for string matching
1033 matchStr = showStr.split("\n")[-1].split("=")[0]
1034 if self._pattern.search(matchStr):
1035 print("\n" + showStr)
1037 fd = FilteredStream(pattern)
1038 else:
1039 fd = sys.stdout
1041 config.saveToStream(fd, "config")
1042 elif showCommand == "history":
1043 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1044 globPattern = matHistory.group(1)
1045 if not globPattern:
1046 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1047 sys.exit(1)
1049 error = False
1050 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1051 if i > 0:
1052 print("")
1054 pattern = pattern.split(".")
1055 cpath, cname = pattern[:-1], pattern[-1]
1056 hconfig = config # the config that we're interested in
1057 for i, cpt in enumerate(cpath):
1058 try:
1059 hconfig = getattr(hconfig, cpt)
1060 except AttributeError:
1061 config_path = ".".join(["config"] + cpath[:i])
1062 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1063 error = True
1065 try:
1066 print(pexConfig.history.format(hconfig, cname))
1067 except KeyError:
1068 config_path = ".".join(["config"] + cpath)
1069 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1070 error = True
1072 if error:
1073 sys.exit(1)
1075 elif showCommand == "data":
1076 pass
1077 elif showCommand == "run":
1078 pass
1079 elif showCommand == "tasks":
1080 showTaskHierarchy(config)
1081 else:
1082 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1083 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1084 sys.exit(1)
1086 if exit and "run" not in showOpts:
1087 sys.exit(0)
1090def showTaskHierarchy(config):
1091 """Print task hierarchy to stdout.
1093 Parameters
1094 ----------
1095 config : `lsst.pex.config.Config`
1096 Configuration to process.
1097 """
1098 print("Subtasks:")
1099 taskDict = getTaskDict(config=config)
1101 fieldNameList = sorted(taskDict.keys())
1102 for fieldName in fieldNameList:
1103 taskName = taskDict[fieldName]
1104 print(f"{fieldName}: {taskName}")
1107class ConfigValueAction(argparse.Action):
1108 """argparse action callback to override config parameters using
1109 name=value pairs from the command-line.
1110 """
1112 def __call__(self, parser, namespace, values, option_string):
1113 """Override one or more config name value pairs.
1115 Parameters
1116 ----------
1117 parser : `argparse.ArgumentParser`
1118 Argument parser.
1119 namespace : `argparse.Namespace`
1120 Parsed command. The ``namespace.config`` attribute is updated.
1121 values : `list`
1122 A list of ``configItemName=value`` pairs.
1123 option_string : `str`
1124 Option value specified by the user.
1125 """
1126 if namespace.config is None:
1127 return
1128 for nameValue in values:
1129 name, sep, valueStr = nameValue.partition("=")
1130 if not valueStr:
1131 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1133 # see if setting the string value works; if not, try eval
1134 try:
1135 setDottedAttr(namespace.config, name, valueStr)
1136 except AttributeError:
1137 parser.error(f"no config field: {name}")
1138 except Exception:
1139 try:
1140 value = eval(valueStr, {})
1141 except Exception:
1142 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1143 try:
1144 setDottedAttr(namespace.config, name, value)
1145 except Exception as e:
1146 parser.error(f"cannot set config.{name}={value!r}: {e}")
1149class ConfigFileAction(argparse.Action):
1150 """argparse action to load config overrides from one or more files.
1151 """
1153 def __call__(self, parser, namespace, values, option_string=None):
1154 """Load one or more files of config overrides.
1156 Parameters
1157 ----------
1158 parser : `argparse.ArgumentParser`
1159 Argument parser.
1160 namespace : `argparse.Namespace`
1161 Parsed command. The following attributes are updated by this
1162 method: ``namespace.config``.
1163 values : `list`
1164 A list of data config file paths.
1165 option_string : `str`, optional
1166 Option value specified by the user.
1167 """
1168 if namespace.config is None:
1169 return
1170 for configfile in values:
1171 try:
1172 namespace.config.load(configfile)
1173 except Exception as e:
1174 parser.error(f"cannot load config file {configfile!r}: {e}")
1177class IdValueAction(argparse.Action):
1178 """argparse action callback to process a data ID into a dict.
1179 """
1181 def __call__(self, parser, namespace, values, option_string):
1182 """Parse ``--id`` data and append results to
1183 ``namespace.<argument>.idList``.
1185 Parameters
1186 ----------
1187 parser : `ArgumentParser`
1188 Argument parser.
1189 namespace : `argparse.Namespace`
1190 Parsed command (an instance of argparse.Namespace).
1191 The following attributes are updated:
1193 - ``<idName>.idList``, where ``<idName>`` is the name of the
1194 ID argument, for instance ``"id"`` for ID argument ``--id``.
1195 values : `list`
1196 A list of data IDs; see Notes below.
1197 option_string : `str`
1198 Option value specified by the user.
1200 Notes
1201 -----
1202 The data format is::
1204 key1=value1_1[^value1_2[^value1_3...]
1205 key2=value2_1[^value2_2[^value2_3...]...
1207 The values (e.g. ``value1_1``) may either be a string,
1208 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1209 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1210 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1211 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1213 The cross product is computed for keys with multiple values.
1214 For example::
1216 --id visit 1^2 ccd 1,1^2,2
1218 results in the following data ID dicts being appended to
1219 ``namespace.<argument>.idList``:
1221 {"visit":1, "ccd":"1,1"}
1222 {"visit":2, "ccd":"1,1"}
1223 {"visit":1, "ccd":"2,2"}
1224 {"visit":2, "ccd":"2,2"}
1225 """
1226 if namespace.config is None:
1227 return
1228 idDict = collections.OrderedDict()
1229 for nameValue in values:
1230 name, sep, valueStr = nameValue.partition("=")
1231 if name in idDict:
1232 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1233 idDict[name] = []
1234 for v in valueStr.split("^"):
1235 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1236 if mat:
1237 v1 = int(mat.group(1))
1238 v2 = int(mat.group(2))
1239 v3 = mat.group(3)
1240 v3 = int(v3) if v3 else 1
1241 for v in range(v1, v2 + 1, v3):
1242 idDict[name].append(str(v))
1243 else:
1244 idDict[name].append(v)
1246 iterList = [idDict[key] for key in idDict.keys()]
1247 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1248 for valList in itertools.product(*iterList)]
1250 argName = option_string.lstrip("-")
1251 ident = getattr(namespace, argName)
1252 ident.idList += idDictList
1255class LogLevelAction(argparse.Action):
1256 """argparse action to set log level.
1257 """
1259 def __call__(self, parser, namespace, values, option_string):
1260 """Set trace level.
1262 Parameters
1263 ----------
1264 parser : `ArgumentParser`
1265 Argument parser.
1266 namespace : `argparse.Namespace`
1267 Parsed command. This argument is not used.
1268 values : `list`
1269 List of trace levels; each item must be of the form
1270 ``component_name=level`` or ``level``, where ``level``
1271 is a keyword (not case sensitive) or an integer.
1272 option_string : `str`
1273 Option value specified by the user.
1274 """
1275 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1276 permittedLevelSet = set(permittedLevelList)
1277 for componentLevel in values:
1278 component, sep, levelStr = componentLevel.partition("=")
1279 if not levelStr:
1280 levelStr, component = component, None
1281 logLevelUpr = levelStr.upper()
1282 if logLevelUpr in permittedLevelSet:
1283 logLevel = getattr(lsstLog.Log, logLevelUpr)
1284 else:
1285 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1286 if component is None:
1287 namespace.log.setLevel(logLevel)
1288 else:
1289 lsstLog.Log.getLogger(component).setLevel(logLevel)
1290 # set logging level for Python logging
1291 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1292 logging.getLogger(component).setLevel(pyLevel)
1295class ReuseAction(argparse.Action):
1296 """argparse action associated with ArgumentPraser.addReuseOption."""
1298 def __call__(self, parser, namespace, value, option_string):
1299 if value == "all":
1300 value = self.choices[-2]
1301 index = self.choices.index(value)
1302 namespace.reuse = self.choices[:index + 1]
1305def setDottedAttr(item, name, value):
1306 """Set an instance attribute (like `setattr` but accepting
1307 hierarchical names such as ``foo.bar.baz``).
1309 Parameters
1310 ----------
1311 item : obj
1312 Object whose attribute is to be set.
1313 name : `str`
1314 Name of attribute to set.
1315 value : obj
1316 New value for the attribute.
1318 Notes
1319 -----
1320 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1321 is set to the specified value.
1322 """
1323 subitem = item
1324 subnameList = name.split(".")
1325 for subname in subnameList[:-1]:
1326 subitem = getattr(subitem, subname)
1327 setattr(subitem, subnameList[-1], value)
1330def getDottedAttr(item, name):
1331 """Get an attribute (like `getattr` but accepts hierarchical names
1332 such as ``foo.bar.baz``).
1334 Parameters
1335 ----------
1336 item : obj
1337 Object whose attribute is to be returned.
1338 name : `str`
1339 Name of the attribute to get.
1341 Returns
1342 -------
1343 itemAttr : obj
1344 If name is ``foo.bar.baz then the return value is
1345 ``item.foo.bar.baz``.
1346 """
1347 subitem = item
1348 for subname in name.split("."):
1349 subitem = getattr(subitem, subname)
1350 return subitem