Coverage for python/lsst/pipe/base/argumentParser.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.pex.config as pexConfig
40import lsst.pex.config.history
41import lsst.log as lsstLog
42import lsst.daf.persistence as dafPersist
44DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
45DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
46DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
49def _fixPath(defName, path):
50 """Apply environment variable as default root, if present, and abspath.
52 Parameters
53 ----------
54 defName : `str`
55 Name of environment variable containing default root path;
56 if the environment variable does not exist
57 then the path is relative to the current working directory
58 path : `str`
59 Path relative to default root path.
61 Returns
62 -------
63 abspath : `str`
64 Path that has been expanded, or `None` if the environment variable
65 does not exist and path is `None`.
66 """
67 defRoot = os.environ.get(defName)
68 if defRoot is None:
69 if path is None:
70 return None
71 return os.path.abspath(path)
72 return os.path.abspath(os.path.join(defRoot, path or ""))
75class DataIdContainer:
76 """Container for data IDs and associated data references.
78 Parameters
79 ----------
80 level : `str`
81 The lowest hierarchy level to descend to for this dataset type,
82 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
83 Use `""` to use the mapper's default for the dataset type.
84 This class does not support `None`, but if it did, `None`
85 would mean the level should not be restricted.
87 Notes
88 -----
89 Override this class for data IDs that require special handling to be
90 converted to ``data references``, and specify the override class
91 as ``ContainerClass`` for ``add_id_argument``.
93 If you don't want the argument parser to compute data references,
94 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
95 """
97 def __init__(self, level=None):
98 self.datasetType = None
99 """Dataset type of the data references (`str`).
100 """
101 self.level = level
102 """See parameter ``level`` (`str`).
103 """
104 self.idList = []
105 """List of data IDs specified on the command line for the
106 appropriate data ID argument (`list` of `dict`).
107 """
108 self.refList = []
109 """List of data references for the data IDs in ``idList``
110 (`list` of `lsst.daf.persistence.ButlerDataRef`).
111 Elements will be omitted if the corresponding data is not found.
112 The list will be empty when returned by ``parse_args`` if
113 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
114 """
116 def setDatasetType(self, datasetType):
117 """Set actual dataset type, once it is known.
119 Parameters
120 ----------
121 datasetType : `str`
122 Dataset type.
124 Notes
125 -----
126 The reason ``datasetType`` is not a constructor argument is that
127 some subclasses do not know the dataset type until the command
128 is parsed. Thus, to reduce special cases in the code,
129 ``datasetType`` is always set after the command is parsed.
130 """
131 self.datasetType = datasetType
133 def castDataIds(self, butler):
134 """Validate data IDs and cast them to the correct type
135 (modify idList in place).
137 This code casts the values in the data IDs dicts in `dataIdList`
138 to the type required by the butler. Data IDs are read from the
139 command line as `str`, but the butler requires some values to be
140 other types. For example "visit" values should be `int`.
142 Parameters
143 ----------
144 butler : `lsst.daf.persistence.Butler`
145 Data butler.
146 """
147 if self.datasetType is None:
148 raise RuntimeError("Must call setDatasetType first")
149 try:
150 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
151 except KeyError as e:
152 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
153 raise KeyError(msg) from e
155 for dataDict in self.idList:
156 for key, strVal in dataDict.items():
157 try:
158 keyType = idKeyTypeDict[key]
159 except KeyError:
160 # OK, assume that it's a valid key and guess that it's a
161 # string
162 keyType = str
164 log = lsstLog.Log.getDefaultLogger()
165 log.warn("Unexpected ID %s; guessing type is \"%s\"",
166 key, 'str' if keyType == str else keyType)
167 idKeyTypeDict[key] = keyType
169 if keyType != str:
170 try:
171 castVal = keyType(strVal)
172 except Exception:
173 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
174 dataDict[key] = castVal
176 def makeDataRefList(self, namespace):
177 """Compute refList based on idList.
179 Parameters
180 ----------
181 namespace : `argparse.Namespace`
182 Results of parsing command-line. The ``butler`` and ``log``
183 elements must be set.
185 Notes
186 -----
187 Not called if ``add_id_argument`` was called with
188 ``doMakeDataRefList=False``.
189 """
190 if self.datasetType is None:
191 raise RuntimeError("Must call setDatasetType first")
192 butler = namespace.butler
193 for dataId in self.idList:
194 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
195 level=self.level, dataId=dataId)
196 if not refList:
197 namespace.log.warn("No data found for dataId=%s", dataId)
198 continue
199 self.refList += refList
202class DataIdArgument:
203 """data ID argument, used by `ArgumentParser.add_id_argument`.
205 Parameters
206 ----------
207 name : `str`
208 Name of identifier (argument name without dashes).
209 datasetType : `str`
210 Type of dataset; specify a string for a fixed dataset type
211 or a `DatasetArgument` for a dynamic dataset type (e.g.
212 one specified by a command-line argument).
213 level : `str`
214 The lowest hierarchy level to descend to for this dataset type,
215 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
216 Use `""` to use the mapper's default for the dataset type.
217 Some container classes may also support `None`, which means
218 the level should not be restricted; however the default class,
219 `DataIdContainer`, does not support `None`.
220 doMakeDataRefList : `bool`, optional
221 If `True` (default), construct data references.
222 ContainerClass : `class`, optional
223 Class to contain data IDs and data references; the default class
224 `DataIdContainer` will work for many, but not all, cases.
225 For example if the dataset type is specified on the command line
226 then use `DynamicDatasetType`.
227 """
229 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
230 if name.startswith("-"):
231 raise RuntimeError(f"Name {name} must not start with -")
232 self.name = name
233 self.datasetType = datasetType
234 self.level = level
235 self.doMakeDataRefList = bool(doMakeDataRefList)
236 self.ContainerClass = ContainerClass
237 self.argName = name.lstrip("-")
239 @property
240 def isDynamicDatasetType(self):
241 """`True` if the dataset type is dynamic (that is, specified
242 on the command line).
243 """
244 return isinstance(self.datasetType, DynamicDatasetType)
246 def getDatasetType(self, namespace):
247 """Get the dataset type as a string.
249 Parameters
250 ----------
251 namespace
252 Parsed command.
254 Returns
255 -------
256 datasetType : `str`
257 Dataset type.
258 """
259 if self.isDynamicDatasetType:
260 return self.datasetType.getDatasetType(namespace)
261 else:
262 return self.datasetType
265class DynamicDatasetType(metaclass=abc.ABCMeta):
266 """Abstract base class for a dataset type determined from parsed
267 command-line arguments.
268 """
270 def addArgument(self, parser, idName):
271 """Add a command-line argument to specify dataset type name,
272 if wanted.
274 Parameters
275 ----------
276 parser : `ArgumentParser`
277 Argument parser to add the argument to.
278 idName : `str`
279 Name of data ID argument, without the leading ``"--"``,
280 e.g. ``"id"``.
282 Notes
283 -----
284 The default implementation does nothing
285 """
286 pass
288 @abc.abstractmethod
289 def getDatasetType(self, namespace):
290 """Get the dataset type as a string, based on parsed command-line
291 arguments.
293 Returns
294 -------
295 datasetType : `str`
296 Dataset type.
297 """
298 raise NotImplementedError("Subclasses must override")
301class DatasetArgument(DynamicDatasetType):
302 """Dataset type specified by a command-line argument.
304 Parameters
305 ----------
306 name : `str`, optional
307 Name of command-line argument (including leading "--",
308 if appropriate) whose value is the dataset type.
309 If `None`, uses ``--idName_dstype`` where idName
310 is the name of the data ID argument (e.g. "id").
311 help : `str`, optional
312 Help string for the command-line argument.
313 default : `object`, optional
314 Default value. If `None`, then the command-line option is required.
315 This argument isignored if the command-line argument is positional
316 (name does not start with "-") because positional arguments do
317 not support default values.
318 """
320 def __init__(self,
321 name=None,
322 help="dataset type to process from input data repository",
323 default=None,
324 ):
325 DynamicDatasetType.__init__(self)
326 self.name = name
327 self.help = help
328 self.default = default
330 def getDatasetType(self, namespace):
331 """Get the dataset type as a string, from the appropriate
332 command-line argument.
334 Parameters
335 ----------
336 namespace :
337 Parsed command.
339 Returns
340 -------
341 datasetType : `str`
342 Dataset type.
343 """
344 argName = self.name.lstrip("-")
345 return getattr(namespace, argName)
347 def addArgument(self, parser, idName):
348 """Add a command-line argument to specify the dataset type name.
350 Parameters
351 ----------
352 parser : `ArgumentParser`
353 Argument parser.
354 idName : `str`
355 Data ID.
357 Notes
358 -----
359 Also sets the `name` attribute if it is currently `None`.
360 """
361 help = self.help if self.help else f"dataset type for {idName}"
362 if self.name is None:
363 self.name = f"--{idName}_dstype"
364 requiredDict = dict()
365 if self.name.startswith("-"):
366 requiredDict = dict(required=self.default is None)
367 parser.add_argument(
368 self.name,
369 default=self.default,
370 help=help,
371 **requiredDict)
374class ConfigDatasetType(DynamicDatasetType):
375 """Dataset type specified by a config parameter.
377 Parameters
378 ----------
379 name : `str`
380 Name of config option whose value is the dataset type.
381 """
383 def __init__(self, name):
384 DynamicDatasetType.__init__(self)
385 self.name = name
387 def getDatasetType(self, namespace):
388 """Return the dataset type as a string, from the appropriate
389 config field.
391 Parameters
392 ----------
393 namespace : `argparse.Namespace`
394 Parsed command.
395 """
396 # getattr does not work reliably if the config field name is
397 # dotted, so step through one level at a time
398 keyList = self.name.split(".")
399 value = namespace.config
400 for key in keyList:
401 try:
402 value = getattr(value, key)
403 except KeyError:
404 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
405 return value
408class ArgumentParser(argparse.ArgumentParser):
409 """Argument parser for command-line tasks that is based on
410 `argparse.ArgumentParser`.
412 Parameters
413 ----------
414 name : `str`
415 Name of top-level task; used to identify camera-specific override
416 files.
417 usage : `str`, optional
418 Command-line usage signature.
419 **kwargs
420 Additional keyword arguments for `argparse.ArgumentParser`.
422 Notes
423 -----
424 Users may wish to add additional arguments before calling `parse_args`.
425 """
426 # I would prefer to check data ID keys and values as they are parsed,
427 # but the required information comes from the butler, so I have to
428 # construct a butler before I do this checking. Constructing a butler
429 # is slow, so I only want do it once, after parsing the command line,
430 # so as to catch syntax errors quickly.
432 requireOutput = True
433 """Require an output directory to be specified (`bool`)."""
435 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
436 self._name = name
437 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
438 argparse.ArgumentParser.__init__(self,
439 usage=usage,
440 fromfile_prefix_chars='@',
441 epilog=textwrap.dedent("""Notes:
442 * --config, --config-file or --configfile, --id, --loglevel and @file may appear multiple times;
443 all values are used, in order left to right
444 * @file reads command-line options from the specified file:
445 * data may be distributed among multiple lines (e.g. one option per line)
446 * data after # is treated as a comment and ignored
447 * blank lines and lines starting with # are ignored
448 * To specify multiple values for an option, do not use = after the option name:
449 * right: --config-file foo bar
450 * wrong: --config-file=foo bar
451 """),
452 formatter_class=argparse.RawDescriptionHelpFormatter,
453 **kwargs)
454 self.add_argument(metavar='input', dest="rawInput",
455 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
456 self.add_argument("--calib", dest="rawCalib",
457 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
458 self.add_argument("--output", dest="rawOutput",
459 help="path to output data repository (need not exist), "
460 f"relative to ${DEFAULT_OUTPUT_NAME}")
461 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
462 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
463 "optionally sets ROOT to ROOT/rerun/INPUT")
464 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
465 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
466 self.add_argument("-C", "--config-file", "--configfile",
467 dest="configfile", nargs="*", action=ConfigFileAction,
468 help="config override file(s)")
469 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
470 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
471 metavar="LEVEL|COMPONENT=LEVEL")
472 self.add_argument("--longlog", action=LongLogAction, help="use a more verbose format for the logging")
473 self.add_argument("--debug", action="store_true", help="enable debugging output?")
474 self.add_argument("--doraise", action="store_true",
475 help="raise an exception on error (else log a message and continue)?")
476 self.add_argument("--noExit", action="store_true",
477 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
478 self.add_argument("--profile", help="Dump cProfile statistics to filename")
479 self.add_argument("--show", nargs="+", default=(),
480 help="display the specified information to stdout and quit "
481 "(unless run is specified); information is "
482 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
483 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
484 self.add_argument("-t", "--timeout", type=float,
485 help="Timeout for multiprocessing; maximum wall time (sec)")
486 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
487 help=("remove and re-create the output directory if it already exists "
488 "(safe with -j, but not all other forms of parallel execution)"))
489 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
490 help=("backup and then overwrite existing config files instead of checking them "
491 "(safe with -j, but not all other forms of parallel execution)"))
492 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
493 help="Don't copy config to file~N backup.")
494 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
495 help=("backup and then overwrite existing package versions instead of checking"
496 "them (safe with -j, but not all other forms of parallel execution)"))
497 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
498 help="don't check package versions; useful for development")
499 lsstLog.configure_prop("""
500log4j.rootLogger=INFO, A1
501log4j.appender.A1=ConsoleAppender
502log4j.appender.A1.Target=System.out
503log4j.appender.A1.layout=PatternLayout
504log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
505""")
507 # Forward all Python logging to lsst.log
508 lgr = logging.getLogger()
509 lgr.setLevel(logging.INFO) # same as in log4cxx config above
510 lgr.addHandler(lsstLog.LogHandler())
512 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
513 ContainerClass=DataIdContainer):
514 """Add a data ID argument.
517 Parameters
518 ----------
519 name : `str`
520 Data ID argument (including leading dashes, if wanted).
521 datasetType : `str` or `DynamicDatasetType`-type
522 Type of dataset. Supply a string for a fixed dataset type.
523 For a dynamically determined dataset type, supply
524 a `DynamicDatasetType`, such a `DatasetArgument`.
525 help : `str`
526 Help string for the argument.
527 level : `str`
528 The lowest hierarchy level to descend to for this dataset type,
529 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
530 Use `""` to use the mapper's default for the dataset type.
531 Some container classes may also support `None`, which means
532 the level should not be restricted; however the default class,
533 `DataIdContainer`, does not support `None`.
534 doMakeDataRefList : bool, optional
535 If `True` (default), construct data references.
536 ContainerClass : `class`, optional
537 Class to contain data IDs and data references; the default class
538 `DataIdContainer` will work for many, but not all, cases.
539 For example if the dataset type is specified on the command line
540 then use `DynamicDatasetType`.
542 Notes
543 -----
544 If ``datasetType`` is an instance of `DatasetArgument`,
545 then add a second argument to specify the dataset type.
547 The associated data is put into ``namespace.<dataIdArgument.name>``
548 as an instance of `ContainerClass`; the container includes fields:
550 - ``idList``: a list of data ID dicts.
551 - ``refList``: a list of `~lsst.daf.persistence.Butler`
552 data references (empty if ``doMakeDataRefList`` is `False`).
553 """
554 argName = name.lstrip("-")
556 if argName in self._dataIdArgDict:
557 raise RuntimeError(f"Data ID argument {name} already exists")
558 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
559 raise RuntimeError(f"Data ID argument {name} is a reserved name")
561 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
562 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
564 dataIdArgument = DataIdArgument(
565 name=argName,
566 datasetType=datasetType,
567 level=level,
568 doMakeDataRefList=doMakeDataRefList,
569 ContainerClass=ContainerClass,
570 )
572 if dataIdArgument.isDynamicDatasetType:
573 datasetType.addArgument(parser=self, idName=argName)
575 self._dataIdArgDict[argName] = dataIdArgument
577 def parse_args(self, config, args=None, log=None, override=None):
578 """Parse arguments for a command-line task.
580 Parameters
581 ----------
582 config : `lsst.pex.config.Config`
583 Config for the task being run.
584 args : `list`, optional
585 Argument list; if `None` then ``sys.argv[1:]`` is used.
586 log : `lsst.log.Log`, optional
587 `~lsst.log.Log` instance; if `None` use the default log.
588 override : callable, optional
589 A config override function. It must take the root config object
590 as its only argument and must modify the config in place.
591 This function is called after camera-specific overrides files
592 are applied, and before command-line config overrides
593 are applied (thus allowing the user the final word).
595 Returns
596 -------
597 namespace : `argparse.Namespace`
598 A `~argparse.Namespace` instance containing fields:
600 - ``camera``: camera name.
601 - ``config``: the supplied config with all overrides applied,
602 validated and frozen.
603 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
604 - An entry for each of the data ID arguments registered by
605 `add_id_argument`, of the type passed to its ``ContainerClass``
606 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
607 includes public elements ``idList`` and ``refList``.
608 - ``log``: a `lsst.log` Log.
609 - An entry for each command-line argument,
610 with the following exceptions:
612 - config is the supplied config, suitably updated.
613 - configfile, id and loglevel are all missing.
614 - ``obsPkg``: name of the ``obs_`` package for this camera.
615 """
616 if args is None:
617 args = sys.argv[1:]
619 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
620 self.print_help()
621 if len(args) == 1 and args[0] in ("-h", "--help"):
622 self.exit()
623 else:
624 self.exit(f"{self.prog}: error: Must specify input as first argument")
626 # Note that --rerun may change namespace.input, but if it does
627 # we verify that the new input has the same mapper class.
628 namespace = argparse.Namespace()
629 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
630 if not os.path.isdir(namespace.input):
631 self.error(f"Error: input={namespace.input!r} not found")
633 namespace.config = config
634 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
635 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
636 if mapperClass is None:
637 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
639 namespace.camera = mapperClass.getCameraName()
640 namespace.obsPkg = mapperClass.getPackageName()
642 self.handleCamera(namespace)
644 self._applyInitialOverrides(namespace)
645 if override is not None:
646 override(namespace.config)
648 # Add data ID containers to namespace
649 for dataIdArgument in self._dataIdArgDict.values():
650 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
652 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
653 del namespace.configfile
655 self._parseDirectories(namespace)
657 if namespace.clobberOutput:
658 if namespace.output is None:
659 self.error("--clobber-output is only valid with --output or --rerun")
660 elif namespace.output == namespace.input:
661 self.error("--clobber-output is not valid when the output and input repos are the same")
662 if os.path.exists(namespace.output):
663 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
664 shutil.rmtree(namespace.output)
666 namespace.log.debug("input=%s", namespace.input)
667 namespace.log.debug("calib=%s", namespace.calib)
668 namespace.log.debug("output=%s", namespace.output)
670 obeyShowArgument(namespace.show, namespace.config, exit=False)
672 # No environment variable or --output or --rerun specified.
673 if self.requireOutput and namespace.output is None and namespace.rerun is None:
674 self.error("no output directory specified.\n"
675 "An output directory must be specified with the --output or --rerun\n"
676 "command-line arguments.\n")
678 butlerArgs = {} # common arguments for butler elements
679 if namespace.calib:
680 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
681 if namespace.output:
682 outputs = {'root': namespace.output, 'mode': 'rw'}
683 inputs = {'root': namespace.input}
684 inputs.update(butlerArgs)
685 outputs.update(butlerArgs)
686 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
687 else:
688 outputs = {'root': namespace.input, 'mode': 'rw'}
689 outputs.update(butlerArgs)
690 namespace.butler = dafPersist.Butler(outputs=outputs)
692 # convert data in each of the identifier lists to proper types
693 # this is done after constructing the butler,
694 # hence after parsing the command line,
695 # because it takes a long time to construct a butler
696 self._processDataIds(namespace)
697 if "data" in namespace.show:
698 for dataIdName in self._dataIdArgDict.keys():
699 for dataRef in getattr(namespace, dataIdName).refList:
700 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
702 if namespace.show and "run" not in namespace.show:
703 sys.exit(0)
705 if namespace.debug:
706 try:
707 import debug
708 assert debug # silence pyflakes
709 except ImportError:
710 print("Warning: no 'debug' module found", file=sys.stderr)
711 namespace.debug = False
713 del namespace.loglevel
714 del namespace.longlog
716 namespace.config.validate()
717 namespace.config.freeze()
719 return namespace
721 def _parseDirectories(self, namespace):
722 """Parse input, output and calib directories
724 This allows for hacking the directories, e.g., to include a
725 "rerun".
726 Modifications are made to the 'namespace' object in-place.
727 """
728 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
729 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
731 # If an output directory is specified, process it and assign it to the
732 # namespace
733 if namespace.rawOutput:
734 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
735 else:
736 namespace.output = None
738 # This section processes the rerun argument.
739 # If rerun is specified as a colon separated value,
740 # it will be parsed as an input and output.
741 # The input value will be overridden if previously specified
742 # (but a check is made to make sure both inputs use
743 # the same mapper)
744 if namespace.rawRerun:
745 if namespace.output:
746 self.error("Error: cannot specify both --output and --rerun")
747 namespace.rerun = namespace.rawRerun.split(":")
748 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
749 modifiedInput = False
750 if len(rerunDir) == 2:
751 namespace.input, namespace.output = rerunDir
752 modifiedInput = True
753 elif len(rerunDir) == 1:
754 namespace.output = rerunDir[0]
755 if os.path.exists(os.path.join(namespace.output, "_parent")):
756 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
757 modifiedInput = True
758 else:
759 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
760 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
761 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
762 else:
763 namespace.rerun = None
764 del namespace.rawInput
765 del namespace.rawCalib
766 del namespace.rawOutput
767 del namespace.rawRerun
769 def _processDataIds(self, namespace):
770 """Process the parsed data for each data ID argument in an
771 `~argparse.Namespace`.
773 Processing includes:
775 - Validate data ID keys.
776 - Cast the data ID values to the correct type.
777 - Compute data references from data IDs.
779 Parameters
780 ----------
781 namespace : `argparse.Namespace`
782 Parsed namespace. These attributes are read:
784 - ``butler``
785 - ``log``
786 - ``config``, if any dynamic dataset types are set by
787 a config parameter.
788 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
789 dataset types are specified by such
791 These attributes are modified:
793 - ``<name>`` for each data ID argument registered using
794 `add_id_argument` with name ``<name>``.
795 """
796 for dataIdArgument in self._dataIdArgDict.values():
797 dataIdContainer = getattr(namespace, dataIdArgument.name)
798 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
799 if dataIdArgument.doMakeDataRefList:
800 try:
801 dataIdContainer.castDataIds(butler=namespace.butler)
802 except (KeyError, TypeError) as e:
803 # failure of castDataIds indicates invalid command args
804 self.error(e)
806 # failure of makeDataRefList indicates a bug
807 # that wants a traceback
808 dataIdContainer.makeDataRefList(namespace)
810 def _applyInitialOverrides(self, namespace):
811 """Apply obs-package-specific and camera-specific config
812 override files, if found
814 Parameters
815 ----------
816 namespace : `argparse.Namespace`
817 Parsed namespace. These attributes are read:
819 - ``obsPkg``
821 Look in the package namespace.obsPkg for files:
823 - ``config/<task_name>.py``
824 - ``config/<camera_name>/<task_name>.py`` and load if found.
825 """
826 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
827 fileName = self._name + ".py"
828 for filePath in (
829 os.path.join(obsPkgDir, "config", fileName),
830 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
831 ):
832 if os.path.exists(filePath):
833 namespace.log.info("Loading config overrride file %r", filePath)
834 namespace.config.load(filePath)
835 else:
836 namespace.log.debug("Config override file does not exist: %r", filePath)
838 def handleCamera(self, namespace):
839 """Perform camera-specific operations before parsing the command-line.
841 Parameters
842 ----------
843 namespace : `argparse.Namespace`
844 Namespace (an ) with the following fields:
846 - ``camera``: the camera name.
847 - ``config``: the config passed to parse_args, with no overrides
848 applied.
849 - ``obsPkg``: the ``obs_`` package for this camera.
850 - ``log``: a `lsst.log` Log.
852 Notes
853 -----
854 The default implementation does nothing.
855 """
856 pass
858 def convert_arg_line_to_args(self, arg_line):
859 """Allow files of arguments referenced by ``@<path>`` to contain
860 multiple values on each line.
862 Parameters
863 ----------
864 arg_line : `str`
865 Line of text read from an argument file.
866 """
867 arg_line = arg_line.strip()
868 if not arg_line or arg_line.startswith("#"):
869 return
870 for arg in shlex.split(arg_line, comments=True, posix=True):
871 if not arg.strip():
872 continue
873 yield arg
875 def addReuseOption(self, choices):
876 """Add a "--reuse-outputs-from SUBTASK" option to the argument
877 parser.
879 CmdLineTasks that can be restarted at an intermediate step using
880 outputs from earlier (but still internal) steps should use this
881 method to allow the user to control whether that happens when
882 outputs from earlier steps are present.
884 Parameters
885 ----------
886 choices : sequence
887 A sequence of string names (by convention, top-level subtasks)
888 that identify the steps that could be skipped when their
889 outputs are already present. The list is ordered, so when the
890 user specifies one step on the command line, all previous steps
891 may be skipped as well. In addition to the choices provided,
892 users may pass "all" to indicate that all steps may be thus
893 skipped.
895 When this method is called, the ``namespace`` object returned by
896 ``parse_args`` will contain a ``reuse`` attribute containing
897 a list of all steps that should be skipped if their outputs
898 are already present.
899 If no steps should be skipped, the ``reuse`` will be an empty list.
900 """
901 choices = list(choices)
902 choices.append("all")
903 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
904 default=[], action=ReuseAction,
905 help=("Skip the given subtask and its predecessors and reuse their outputs "
906 "if those outputs already exist. Use 'all' to specify all subtasks."))
909class InputOnlyArgumentParser(ArgumentParser):
910 """`ArgumentParser` for command-line tasks that don't write any output.
911 """
913 requireOutput = False # We're not going to write anything
916def getTaskDict(config, taskDict=None, baseName=""):
917 """Get a dictionary of task info for all subtasks in a config
919 Parameters
920 ----------
921 config : `lsst.pex.config.Config`
922 Configuration to process.
923 taskDict : `dict`, optional
924 Users should not specify this argument. Supports recursion.
925 If provided, taskDict is updated in place, else a new `dict`
926 is started.
927 baseName : `str`, optional
928 Users should not specify this argument. It is only used for
929 recursion: if a non-empty string then a period is appended
930 and the result is used as a prefix for additional entries
931 in taskDict; otherwise no prefix is used.
933 Returns
934 -------
935 taskDict : `dict`
936 Keys are config field names, values are task names.
938 Notes
939 -----
940 This function is designed to be called recursively.
941 The user should call with only a config (leaving taskDict and baseName
942 at their default values).
943 """
944 if taskDict is None:
945 taskDict = dict()
946 for fieldName, field in config.items():
947 if hasattr(field, "value") and hasattr(field, "target"):
948 subConfig = field.value
949 if isinstance(subConfig, pexConfig.Config):
950 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
951 try:
952 taskName = f"{field.target.__module__}.{field.target.__name__}"
953 except Exception:
954 taskName = repr(field.target)
955 taskDict[subBaseName] = taskName
956 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
957 return taskDict
960def obeyShowArgument(showOpts, config=None, exit=False):
961 """Process arguments specified with ``--show`` (but ignores
962 ``"data"``).
964 Parameters
965 ----------
966 showOpts : `list` of `str`
967 List of options passed to ``--show``.
968 config : optional
969 The provided config.
970 exit : bool, optional
971 Exit if ``"run"`` isn't included in ``showOpts``.
973 Parameters
974 ----------
975 Supports the following options in showOpts:
977 - ``config[=PAT]``. Dump all the config entries, or just the ones that
978 match the glob pattern.
979 - ``history=PAT``. Show where the config entries that match the glob
980 pattern were set.
981 - ``tasks``. Show task hierarchy.
982 - ``data``. Ignored; to be processed by caller.
983 - ``run``. Keep going (the default behaviour is to exit if
984 ``--show`` is specified).
986 Calls ``sys.exit(1)`` if any other option found.
987 """
988 if not showOpts:
989 return
991 for what in showOpts:
992 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
994 if showCommand == "config":
995 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
996 pattern = matConfig.group(1)
997 if pattern:
998 class FilteredStream:
999 """A file object that only prints lines
1000 that match the glob "pattern".
1002 N.b. Newlines are silently discarded and reinserted;
1003 crude but effective.
1004 """
1006 def __init__(self, pattern):
1007 # obey case if pattern isn't lowecase or requests
1008 # NOIGNORECASE
1009 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1011 if mat:
1012 pattern = mat.group(1)
1013 self._pattern = re.compile(fnmatch.translate(pattern))
1014 else:
1015 if pattern != pattern.lower():
1016 print(f"Matching {pattern!r} without regard to case "
1017 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1018 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1020 def write(self, showStr):
1021 showStr = showStr.rstrip()
1022 # Strip off doc string line(s) and cut off
1023 # at "=" for string matching
1024 matchStr = showStr.split("\n")[-1].split("=")[0]
1025 if self._pattern.search(matchStr):
1026 print("\n" + showStr)
1028 fd = FilteredStream(pattern)
1029 else:
1030 fd = sys.stdout
1032 config.saveToStream(fd, "config")
1033 elif showCommand == "history":
1034 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1035 globPattern = matHistory.group(1)
1036 if not globPattern:
1037 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1038 sys.exit(1)
1040 error = False
1041 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1042 if i > 0:
1043 print("")
1045 pattern = pattern.split(".")
1046 cpath, cname = pattern[:-1], pattern[-1]
1047 hconfig = config # the config that we're interested in
1048 for i, cpt in enumerate(cpath):
1049 try:
1050 hconfig = getattr(hconfig, cpt)
1051 except AttributeError:
1052 config_path = ".".join(["config"] + cpath[:i])
1053 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1054 error = True
1056 try:
1057 print(pexConfig.history.format(hconfig, cname))
1058 except KeyError:
1059 config_path = ".".join(["config"] + cpath)
1060 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1061 error = True
1063 if error:
1064 sys.exit(1)
1066 elif showCommand == "data":
1067 pass
1068 elif showCommand == "run":
1069 pass
1070 elif showCommand == "tasks":
1071 showTaskHierarchy(config)
1072 else:
1073 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1074 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1075 sys.exit(1)
1077 if exit and "run" not in showOpts:
1078 sys.exit(0)
1081def showTaskHierarchy(config):
1082 """Print task hierarchy to stdout.
1084 Parameters
1085 ----------
1086 config : `lsst.pex.config.Config`
1087 Configuration to process.
1088 """
1089 print("Subtasks:")
1090 taskDict = getTaskDict(config=config)
1092 fieldNameList = sorted(taskDict.keys())
1093 for fieldName in fieldNameList:
1094 taskName = taskDict[fieldName]
1095 print(f"{fieldName}: {taskName}")
1098class ConfigValueAction(argparse.Action):
1099 """argparse action callback to override config parameters using
1100 name=value pairs from the command-line.
1101 """
1103 def __call__(self, parser, namespace, values, option_string):
1104 """Override one or more config name value pairs.
1106 Parameters
1107 ----------
1108 parser : `argparse.ArgumentParser`
1109 Argument parser.
1110 namespace : `argparse.Namespace`
1111 Parsed command. The ``namespace.config`` attribute is updated.
1112 values : `list`
1113 A list of ``configItemName=value`` pairs.
1114 option_string : `str`
1115 Option value specified by the user.
1116 """
1117 if namespace.config is None:
1118 return
1119 for nameValue in values:
1120 name, sep, valueStr = nameValue.partition("=")
1121 if not valueStr:
1122 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1124 # see if setting the string value works; if not, try eval
1125 try:
1126 setDottedAttr(namespace.config, name, valueStr)
1127 except AttributeError:
1128 parser.error(f"no config field: {name}")
1129 except Exception:
1130 try:
1131 value = eval(valueStr, {})
1132 except Exception:
1133 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1134 try:
1135 setDottedAttr(namespace.config, name, value)
1136 except Exception as e:
1137 parser.error(f"cannot set config.{name}={value!r}: {e}")
1140class ConfigFileAction(argparse.Action):
1141 """argparse action to load config overrides from one or more files.
1142 """
1144 def __call__(self, parser, namespace, values, option_string=None):
1145 """Load one or more files of config overrides.
1147 Parameters
1148 ----------
1149 parser : `argparse.ArgumentParser`
1150 Argument parser.
1151 namespace : `argparse.Namespace`
1152 Parsed command. The following attributes are updated by this
1153 method: ``namespace.config``.
1154 values : `list`
1155 A list of data config file paths.
1156 option_string : `str`, optional
1157 Option value specified by the user.
1158 """
1159 if namespace.config is None:
1160 return
1161 for configfile in values:
1162 try:
1163 namespace.config.load(configfile)
1164 except Exception as e:
1165 parser.error(f"cannot load config file {configfile!r}: {e}")
1168class IdValueAction(argparse.Action):
1169 """argparse action callback to process a data ID into a dict.
1170 """
1172 def __call__(self, parser, namespace, values, option_string):
1173 """Parse ``--id`` data and append results to
1174 ``namespace.<argument>.idList``.
1176 Parameters
1177 ----------
1178 parser : `ArgumentParser`
1179 Argument parser.
1180 namespace : `argparse.Namespace`
1181 Parsed command (an instance of argparse.Namespace).
1182 The following attributes are updated:
1184 - ``<idName>.idList``, where ``<idName>`` is the name of the
1185 ID argument, for instance ``"id"`` for ID argument ``--id``.
1186 values : `list`
1187 A list of data IDs; see Notes below.
1188 option_string : `str`
1189 Option value specified by the user.
1191 Notes
1192 -----
1193 The data format is::
1195 key1=value1_1[^value1_2[^value1_3...]
1196 key2=value2_1[^value2_2[^value2_3...]...
1198 The values (e.g. ``value1_1``) may either be a string,
1199 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1200 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1201 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1202 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1204 The cross product is computed for keys with multiple values.
1205 For example::
1207 --id visit 1^2 ccd 1,1^2,2
1209 results in the following data ID dicts being appended to
1210 ``namespace.<argument>.idList``:
1212 {"visit":1, "ccd":"1,1"}
1213 {"visit":2, "ccd":"1,1"}
1214 {"visit":1, "ccd":"2,2"}
1215 {"visit":2, "ccd":"2,2"}
1216 """
1217 if namespace.config is None:
1218 return
1219 idDict = collections.OrderedDict()
1220 for nameValue in values:
1221 name, sep, valueStr = nameValue.partition("=")
1222 if name in idDict:
1223 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1224 idDict[name] = []
1225 for v in valueStr.split("^"):
1226 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1227 if mat:
1228 v1 = int(mat.group(1))
1229 v2 = int(mat.group(2))
1230 v3 = mat.group(3)
1231 v3 = int(v3) if v3 else 1
1232 for v in range(v1, v2 + 1, v3):
1233 idDict[name].append(str(v))
1234 else:
1235 idDict[name].append(v)
1237 iterList = [idDict[key] for key in idDict.keys()]
1238 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1239 for valList in itertools.product(*iterList)]
1241 argName = option_string.lstrip("-")
1242 ident = getattr(namespace, argName)
1243 ident.idList += idDictList
1246class LongLogAction(argparse.Action):
1247 """argparse action to make logs verbose.
1249 An action so that it can take effect before log level options.
1250 """
1252 def __call__(self, parser, namespace, values, option_string):
1253 """Set long log.
1255 Parameters
1256 ----------
1257 parser : `ArgumentParser`
1258 Argument parser.
1259 namespace : `argparse.Namespace`
1260 Parsed command. This argument is not used.
1261 values : `list`
1262 Unused.
1263 option_string : `str`
1264 Option value specified by the user (unused).
1265 """
1266 lsstLog.configure_prop("""
1267log4j.rootLogger=INFO, A1
1268log4j.appender.A1=ConsoleAppender
1269log4j.appender.A1.Target=System.out
1270log4j.appender.A1.layout=PatternLayout
1271log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
1272""")
1275class LogLevelAction(argparse.Action):
1276 """argparse action to set log level.
1277 """
1279 def __call__(self, parser, namespace, values, option_string):
1280 """Set trace level.
1282 Parameters
1283 ----------
1284 parser : `ArgumentParser`
1285 Argument parser.
1286 namespace : `argparse.Namespace`
1287 Parsed command. This argument is not used.
1288 values : `list`
1289 List of trace levels; each item must be of the form
1290 ``component_name=level`` or ``level``, where ``level``
1291 is a keyword (not case sensitive) or an integer.
1292 option_string : `str`
1293 Option value specified by the user.
1294 """
1295 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1296 permittedLevelSet = set(permittedLevelList)
1297 for componentLevel in values:
1298 component, sep, levelStr = componentLevel.partition("=")
1299 if not levelStr:
1300 levelStr, component = component, None
1301 logLevelUpr = levelStr.upper()
1302 if logLevelUpr in permittedLevelSet:
1303 logLevel = getattr(lsstLog.Log, logLevelUpr)
1304 else:
1305 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1306 if component is None:
1307 namespace.log.setLevel(logLevel)
1308 else:
1309 lsstLog.Log.getLogger(component).setLevel(logLevel)
1310 # set logging level for Python logging
1311 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1312 logging.getLogger(component).setLevel(pyLevel)
1315class ReuseAction(argparse.Action):
1316 """argparse action associated with ArgumentPraser.addReuseOption."""
1318 def __call__(self, parser, namespace, value, option_string):
1319 if value == "all":
1320 value = self.choices[-2]
1321 index = self.choices.index(value)
1322 namespace.reuse = self.choices[:index + 1]
1325def setDottedAttr(item, name, value):
1326 """Set an instance attribute (like `setattr` but accepting
1327 hierarchical names such as ``foo.bar.baz``).
1329 Parameters
1330 ----------
1331 item : obj
1332 Object whose attribute is to be set.
1333 name : `str`
1334 Name of attribute to set.
1335 value : obj
1336 New value for the attribute.
1338 Notes
1339 -----
1340 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1341 is set to the specified value.
1342 """
1343 subitem = item
1344 subnameList = name.split(".")
1345 for subname in subnameList[:-1]:
1346 subitem = getattr(subitem, subname)
1347 setattr(subitem, subnameList[-1], value)
1350def getDottedAttr(item, name):
1351 """Get an attribute (like `getattr` but accepts hierarchical names
1352 such as ``foo.bar.baz``).
1354 Parameters
1355 ----------
1356 item : obj
1357 Object whose attribute is to be returned.
1358 name : `str`
1359 Name of the attribute to get.
1361 Returns
1362 -------
1363 itemAttr : obj
1364 If name is ``foo.bar.baz then the return value is
1365 ``item.foo.bar.baz``.
1366 """
1367 subitem = item
1368 for subname in name.split("."):
1369 subitem = getattr(subitem, subname)
1370 return subitem