Coverage for python/lsst/pipe/base/argumentParser.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2015 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
23 "DatasetArgument", "ConfigDatasetType", "InputOnlyArgumentParser"]
25import abc
26import argparse
27import collections
28import fnmatch
29import itertools
30import logging
31import os
32import re
33import shlex
34import sys
35import shutil
36import textwrap
38import lsst.utils
39import lsst.pex.config as pexConfig
40import lsst.pex.config.history
41import lsst.log as lsstLog
42import lsst.daf.persistence as dafPersist
44DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
45DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
46DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
49def _fixPath(defName, path):
50 """Apply environment variable as default root, if present, and abspath.
52 Parameters
53 ----------
54 defName : `str`
55 Name of environment variable containing default root path;
56 if the environment variable does not exist
57 then the path is relative to the current working directory
58 path : `str`
59 Path relative to default root path.
61 Returns
62 -------
63 abspath : `str`
64 Path that has been expanded, or `None` if the environment variable
65 does not exist and path is `None`.
66 """
67 defRoot = os.environ.get(defName)
68 if defRoot is None:
69 if path is None:
70 return None
71 return os.path.abspath(path)
72 return os.path.abspath(os.path.join(defRoot, path or ""))
75class DataIdContainer:
76 """Container for data IDs and associated data references.
78 Parameters
79 ----------
80 level : `str`
81 The lowest hierarchy level to descend to for this dataset type,
82 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
83 Use `""` to use the mapper's default for the dataset type.
84 This class does not support `None`, but if it did, `None`
85 would mean the level should not be restricted.
87 Notes
88 -----
89 Override this class for data IDs that require special handling to be
90 converted to ``data references``, and specify the override class
91 as ``ContainerClass`` for ``add_id_argument``.
93 If you don't want the argument parser to compute data references,
94 specify ``doMakeDataRefList=False`` in ``add_id_argument``.
95 """
97 def __init__(self, level=None):
98 self.datasetType = None
99 """Dataset type of the data references (`str`).
100 """
101 self.level = level
102 """See parameter ``level`` (`str`).
103 """
104 self.idList = []
105 """List of data IDs specified on the command line for the
106 appropriate data ID argument (`list` of `dict`).
107 """
108 self.refList = []
109 """List of data references for the data IDs in ``idList``
110 (`list` of `lsst.daf.persistence.ButlerDataRef`).
111 Elements will be omitted if the corresponding data is not found.
112 The list will be empty when returned by ``parse_args`` if
113 ``doMakeDataRefList=False`` was specified in ``add_id_argument``.
114 """
116 def setDatasetType(self, datasetType):
117 """Set actual dataset type, once it is known.
119 Parameters
120 ----------
121 datasetType : `str`
122 Dataset type.
124 Notes
125 -----
126 The reason ``datasetType`` is not a constructor argument is that
127 some subclasses do not know the dataset type until the command
128 is parsed. Thus, to reduce special cases in the code,
129 ``datasetType`` is always set after the command is parsed.
130 """
131 self.datasetType = datasetType
133 def castDataIds(self, butler):
134 """Validate data IDs and cast them to the correct type
135 (modify idList in place).
137 This code casts the values in the data IDs dicts in `dataIdList`
138 to the type required by the butler. Data IDs are read from the
139 command line as `str`, but the butler requires some values to be
140 other types. For example "visit" values should be `int`.
142 Parameters
143 ----------
144 butler : `lsst.daf.persistence.Butler`
145 Data butler.
146 """
147 if self.datasetType is None:
148 raise RuntimeError("Must call setDatasetType first")
149 try:
150 idKeyTypeDict = butler.getKeys(datasetType=self.datasetType, level=self.level)
151 except KeyError as e:
152 msg = f"Cannot get keys for datasetType {self.datasetType} at level {self.level}"
153 raise KeyError(msg) from e
155 for dataDict in self.idList:
156 for key, strVal in dataDict.items():
157 try:
158 keyType = idKeyTypeDict[key]
159 except KeyError:
160 # OK, assume that it's a valid key and guess that it's a string
161 keyType = str
163 log = lsstLog.Log.getDefaultLogger()
164 log.warn("Unexpected ID %s; guessing type is \"%s\"",
165 key, 'str' if keyType == str else keyType)
166 idKeyTypeDict[key] = keyType
168 if keyType != str:
169 try:
170 castVal = keyType(strVal)
171 except Exception:
172 raise TypeError(f"Cannot cast value {strVal!r} to {keyType} for ID key {key}")
173 dataDict[key] = castVal
175 def makeDataRefList(self, namespace):
176 """Compute refList based on idList.
178 Parameters
179 ----------
180 namespace : `argparse.Namespace`
181 Results of parsing command-line. The ``butler`` and ``log``
182 elements must be set.
184 Notes
185 -----
186 Not called if ``add_id_argument`` was called with
187 ``doMakeDataRefList=False``.
188 """
189 if self.datasetType is None:
190 raise RuntimeError("Must call setDatasetType first")
191 butler = namespace.butler
192 for dataId in self.idList:
193 refList = dafPersist.searchDataRefs(butler, datasetType=self.datasetType,
194 level=self.level, dataId=dataId)
195 if not refList:
196 namespace.log.warn("No data found for dataId=%s", dataId)
197 continue
198 self.refList += refList
201class DataIdArgument:
202 """data ID argument, used by `ArgumentParser.add_id_argument`.
204 Parameters
205 ----------
206 name : `str`
207 Name of identifier (argument name without dashes).
208 datasetType : `str`
209 Type of dataset; specify a string for a fixed dataset type
210 or a `DatasetArgument` for a dynamic dataset type (e.g.
211 one specified by a command-line argument).
212 level : `str`
213 The lowest hierarchy level to descend to for this dataset type,
214 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
215 Use `""` to use the mapper's default for the dataset type.
216 Some container classes may also support `None`, which means
217 the level should not be restricted; however the default class,
218 `DataIdContainer`, does not support `None`.
219 doMakeDataRefList : `bool`, optional
220 If `True` (default), construct data references.
221 ContainerClass : `class`, optional
222 Class to contain data IDs and data references; the default class
223 `DataIdContainer` will work for many, but not all, cases.
224 For example if the dataset type is specified on the command line
225 then use `DynamicDatasetType`.
226 """
228 def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
229 if name.startswith("-"):
230 raise RuntimeError(f"Name {name} must not start with -")
231 self.name = name
232 self.datasetType = datasetType
233 self.level = level
234 self.doMakeDataRefList = bool(doMakeDataRefList)
235 self.ContainerClass = ContainerClass
236 self.argName = name.lstrip("-")
238 @property
239 def isDynamicDatasetType(self):
240 """`True` if the dataset type is dynamic (that is, specified
241 on the command line).
242 """
243 return isinstance(self.datasetType, DynamicDatasetType)
245 def getDatasetType(self, namespace):
246 """Get the dataset type as a string.
248 Parameters
249 ----------
250 namespace
251 Parsed command.
253 Returns
254 -------
255 datasetType : `str`
256 Dataset type.
257 """
258 if self.isDynamicDatasetType:
259 return self.datasetType.getDatasetType(namespace)
260 else:
261 return self.datasetType
264class DynamicDatasetType(metaclass=abc.ABCMeta):
265 """Abstract base class for a dataset type determined from parsed
266 command-line arguments.
267 """
269 def addArgument(self, parser, idName):
270 """Add a command-line argument to specify dataset type name,
271 if wanted.
273 Parameters
274 ----------
275 parser : `ArgumentParser`
276 Argument parser to add the argument to.
277 idName : `str`
278 Name of data ID argument, without the leading ``"--"``,
279 e.g. ``"id"``.
281 Notes
282 -----
283 The default implementation does nothing
284 """
285 pass
287 @abc.abstractmethod
288 def getDatasetType(self, namespace):
289 """Get the dataset type as a string, based on parsed command-line
290 arguments.
292 Returns
293 -------
294 datasetType : `str`
295 Dataset type.
296 """
297 raise NotImplementedError("Subclasses must override")
300class DatasetArgument(DynamicDatasetType):
301 """Dataset type specified by a command-line argument.
303 Parameters
304 ----------
305 name : `str`, optional
306 Name of command-line argument (including leading "--",
307 if appropriate) whose value is the dataset type.
308 If `None`, uses ``--idName_dstype`` where idName
309 is the name of the data ID argument (e.g. "id").
310 help : `str`, optional
311 Help string for the command-line argument.
312 default : `object`, optional
313 Default value. If `None`, then the command-line option is required.
314 This argument isignored if the command-line argument is positional
315 (name does not start with "-") because positional arguments do
316 not support default values.
317 """
319 def __init__(self,
320 name=None,
321 help="dataset type to process from input data repository",
322 default=None,
323 ):
324 DynamicDatasetType.__init__(self)
325 self.name = name
326 self.help = help
327 self.default = default
329 def getDatasetType(self, namespace):
330 """Get the dataset type as a string, from the appropriate
331 command-line argument.
333 Parameters
334 ----------
335 namespace :
336 Parsed command.
338 Returns
339 -------
340 datasetType : `str`
341 Dataset type.
342 """
343 argName = self.name.lstrip("-")
344 return getattr(namespace, argName)
346 def addArgument(self, parser, idName):
347 """Add a command-line argument to specify the dataset type name.
349 Parameters
350 ----------
351 parser : `ArgumentParser`
352 Argument parser.
353 idName : `str`
354 Data ID.
356 Notes
357 -----
358 Also sets the `name` attribute if it is currently `None`.
359 """
360 help = self.help if self.help else f"dataset type for {idName}"
361 if self.name is None:
362 self.name = f"--{idName}_dstype"
363 requiredDict = dict()
364 if self.name.startswith("-"):
365 requiredDict = dict(required=self.default is None)
366 parser.add_argument(
367 self.name,
368 default=self.default,
369 help=help,
370 **requiredDict)
373class ConfigDatasetType(DynamicDatasetType):
374 """Dataset type specified by a config parameter.
376 Parameters
377 ----------
378 name : `str`
379 Name of config option whose value is the dataset type.
380 """
382 def __init__(self, name):
383 DynamicDatasetType.__init__(self)
384 self.name = name
386 def getDatasetType(self, namespace):
387 """Return the dataset type as a string, from the appropriate
388 config field.
390 Parameters
391 ----------
392 namespace : `argparse.Namespace`
393 Parsed command.
394 """
395 # getattr does not work reliably if the config field name is
396 # dotted, so step through one level at a time
397 keyList = self.name.split(".")
398 value = namespace.config
399 for key in keyList:
400 try:
401 value = getattr(value, key)
402 except KeyError:
403 raise RuntimeError(f"Cannot find config parameter {self.name!r}")
404 return value
407class ArgumentParser(argparse.ArgumentParser):
408 """Argument parser for command-line tasks that is based on
409 `argparse.ArgumentParser`.
411 Parameters
412 ----------
413 name : `str`
414 Name of top-level task; used to identify camera-specific override
415 files.
416 usage : `str`, optional
417 Command-line usage signature.
418 **kwargs
419 Additional keyword arguments for `argparse.ArgumentParser`.
421 Notes
422 -----
423 Users may wish to add additional arguments before calling `parse_args`.
424 """
425 # I would prefer to check data ID keys and values as they are parsed,
426 # but the required information comes from the butler, so I have to
427 # construct a butler before I do this checking. Constructing a butler
428 # is slow, so I only want do it once, after parsing the command line,
429 # so as to catch syntax errors quickly.
431 requireOutput = True
432 """Require an output directory to be specified (`bool`)."""
434 def __init__(self, name, usage="%(prog)s input [options]", **kwargs):
435 self._name = name
436 self._dataIdArgDict = {} # Dict of data identifier specifications, by argument name
437 argparse.ArgumentParser.__init__(self,
438 usage=usage,
439 fromfile_prefix_chars='@',
440 epilog=textwrap.dedent("""Notes:
441 * --config, --configfile, --id, --loglevel and @file may appear multiple times;
442 all values are used, in order left to right
443 * @file reads command-line options from the specified file:
444 * data may be distributed among multiple lines (e.g. one option per line)
445 * data after # is treated as a comment and ignored
446 * blank lines and lines starting with # are ignored
447 * To specify multiple values for an option, do not use = after the option name:
448 * right: --configfile foo bar
449 * wrong: --configfile=foo bar
450 """),
451 formatter_class=argparse.RawDescriptionHelpFormatter,
452 **kwargs)
453 self.add_argument(metavar='input', dest="rawInput",
454 help=f"path to input data repository, relative to ${DEFAULT_INPUT_NAME}")
455 self.add_argument("--calib", dest="rawCalib",
456 help=f"path to input calibration repository, relative to ${DEFAULT_CALIB_NAME}")
457 self.add_argument("--output", dest="rawOutput",
458 help="path to output data repository (need not exist), "
459 f"relative to ${DEFAULT_OUTPUT_NAME}")
460 self.add_argument("--rerun", dest="rawRerun", metavar="[INPUT:]OUTPUT",
461 help="rerun name: sets OUTPUT to ROOT/rerun/OUTPUT; "
462 "optionally sets ROOT to ROOT/rerun/INPUT")
463 self.add_argument("-c", "--config", nargs="*", action=ConfigValueAction,
464 help="config override(s), e.g. -c foo=newfoo bar.baz=3", metavar="NAME=VALUE")
465 self.add_argument("-C", "--configfile", dest="configfile", nargs="*", action=ConfigFileAction,
466 help="config override file(s)")
467 self.add_argument("-L", "--loglevel", nargs="*", action=LogLevelAction,
468 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]",
469 metavar="LEVEL|COMPONENT=LEVEL")
470 self.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging")
471 self.add_argument("--debug", action="store_true", help="enable debugging output?")
472 self.add_argument("--doraise", action="store_true",
473 help="raise an exception on error (else log a message and continue)?")
474 self.add_argument("--noExit", action="store_true",
475 help="Do not exit even upon failure (i.e. return a struct to the calling script)")
476 self.add_argument("--profile", help="Dump cProfile statistics to filename")
477 self.add_argument("--show", nargs="+", default=(),
478 help="display the specified information to stdout and quit "
479 "(unless run is specified); information is "
480 "(config[=PATTERN]|history=PATTERN|tasks|data|run)")
481 self.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use")
482 self.add_argument("-t", "--timeout", type=float,
483 help="Timeout for multiprocessing; maximum wall time (sec)")
484 self.add_argument("--clobber-output", action="store_true", dest="clobberOutput", default=False,
485 help=("remove and re-create the output directory if it already exists "
486 "(safe with -j, but not all other forms of parallel execution)"))
487 self.add_argument("--clobber-config", action="store_true", dest="clobberConfig", default=False,
488 help=("backup and then overwrite existing config files instead of checking them "
489 "(safe with -j, but not all other forms of parallel execution)"))
490 self.add_argument("--no-backup-config", action="store_true", dest="noBackupConfig", default=False,
491 help="Don't copy config to file~N backup.")
492 self.add_argument("--clobber-versions", action="store_true", dest="clobberVersions", default=False,
493 help=("backup and then overwrite existing package versions instead of checking"
494 "them (safe with -j, but not all other forms of parallel execution)"))
495 self.add_argument("--no-versions", action="store_true", dest="noVersions", default=False,
496 help="don't check package versions; useful for development")
497 lsstLog.configure_prop("""
498log4j.rootLogger=INFO, A1
499log4j.appender.A1=ConsoleAppender
500log4j.appender.A1.Target=System.out
501log4j.appender.A1.layout=PatternLayout
502log4j.appender.A1.layout.ConversionPattern=%c %p: %m%n
503""")
505 # Forward all Python logging to lsst.log
506 lgr = logging.getLogger()
507 lgr.setLevel(logging.INFO) # same as in log4cxx config above
508 lgr.addHandler(lsstLog.LogHandler())
510 def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList=True,
511 ContainerClass=DataIdContainer):
512 """Add a data ID argument.
515 Parameters
516 ----------
517 name : `str`
518 Data ID argument (including leading dashes, if wanted).
519 datasetType : `str` or `DynamicDatasetType`-type
520 Type of dataset. Supply a string for a fixed dataset type.
521 For a dynamically determined dataset type, supply
522 a `DynamicDatasetType`, such a `DatasetArgument`.
523 help : `str`
524 Help string for the argument.
525 level : `str`
526 The lowest hierarchy level to descend to for this dataset type,
527 for example `"amp"` for `"raw"` or `"ccd"` for `"calexp"`.
528 Use `""` to use the mapper's default for the dataset type.
529 Some container classes may also support `None`, which means
530 the level should not be restricted; however the default class,
531 `DataIdContainer`, does not support `None`.
532 doMakeDataRefList : bool, optional
533 If `True` (default), construct data references.
534 ContainerClass : `class`, optional
535 Class to contain data IDs and data references; the default class
536 `DataIdContainer` will work for many, but not all, cases.
537 For example if the dataset type is specified on the command line
538 then use `DynamicDatasetType`.
540 Notes
541 -----
542 If ``datasetType`` is an instance of `DatasetArgument`,
543 then add a second argument to specify the dataset type.
545 The associated data is put into ``namespace.<dataIdArgument.name>``
546 as an instance of `ContainerClass`; the container includes fields:
548 - ``idList``: a list of data ID dicts.
549 - ``refList``: a list of `~lsst.daf.persistence.Butler`
550 data references (empty if ``doMakeDataRefList`` is `False`).
551 """
552 argName = name.lstrip("-")
554 if argName in self._dataIdArgDict:
555 raise RuntimeError(f"Data ID argument {name} already exists")
556 if argName in set(("camera", "config", "butler", "log", "obsPkg")):
557 raise RuntimeError(f"Data ID argument {name} is a reserved name")
559 self.add_argument(name, nargs="*", action=IdValueAction, help=help,
560 metavar="KEY=VALUE1[^VALUE2[^VALUE3...]")
562 dataIdArgument = DataIdArgument(
563 name=argName,
564 datasetType=datasetType,
565 level=level,
566 doMakeDataRefList=doMakeDataRefList,
567 ContainerClass=ContainerClass,
568 )
570 if dataIdArgument.isDynamicDatasetType:
571 datasetType.addArgument(parser=self, idName=argName)
573 self._dataIdArgDict[argName] = dataIdArgument
575 def parse_args(self, config, args=None, log=None, override=None):
576 """Parse arguments for a command-line task.
578 Parameters
579 ----------
580 config : `lsst.pex.config.Config`
581 Config for the task being run.
582 args : `list`, optional
583 Argument list; if `None` then ``sys.argv[1:]`` is used.
584 log : `lsst.log.Log`, optional
585 `~lsst.log.Log` instance; if `None` use the default log.
586 override : callable, optional
587 A config override function. It must take the root config object
588 as its only argument and must modify the config in place.
589 This function is called after camera-specific overrides files
590 are applied, and before command-line config overrides
591 are applied (thus allowing the user the final word).
593 Returns
594 -------
595 namespace : `argparse.Namespace`
596 A `~argparse.Namespace` instance containing fields:
598 - ``camera``: camera name.
599 - ``config``: the supplied config with all overrides applied,
600 validated and frozen.
601 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
602 - An entry for each of the data ID arguments registered by
603 `add_id_argument`, of the type passed to its ``ContainerClass``
604 keyword (`~lsst.pipe.base.DataIdContainer` by default). It
605 includes public elements ``idList`` and ``refList``.
606 - ``log``: a `lsst.log` Log.
607 - An entry for each command-line argument,
608 with the following exceptions:
610 - config is the supplied config, suitably updated.
611 - configfile, id and loglevel are all missing.
612 - ``obsPkg``: name of the ``obs_`` package for this camera.
613 """
614 if args is None:
615 args = sys.argv[1:]
617 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
618 self.print_help()
619 if len(args) == 1 and args[0] in ("-h", "--help"):
620 self.exit()
621 else:
622 self.exit(f"{self.prog}: error: Must specify input as first argument")
624 # Note that --rerun may change namespace.input, but if it does
625 # we verify that the new input has the same mapper class.
626 namespace = argparse.Namespace()
627 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
628 if not os.path.isdir(namespace.input):
629 self.error(f"Error: input={namespace.input!r} not found")
631 namespace.config = config
632 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
633 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
634 if mapperClass is None:
635 self.error(f"Error: no mapper specified for input repo {namespace.input!r}")
637 namespace.camera = mapperClass.getCameraName()
638 namespace.obsPkg = mapperClass.getPackageName()
640 self.handleCamera(namespace)
642 self._applyInitialOverrides(namespace)
643 if override is not None:
644 override(namespace.config)
646 # Add data ID containers to namespace
647 for dataIdArgument in self._dataIdArgDict.values():
648 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
650 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
651 del namespace.configfile
653 self._parseDirectories(namespace)
655 if namespace.clobberOutput:
656 if namespace.output is None:
657 self.error("--clobber-output is only valid with --output or --rerun")
658 elif namespace.output == namespace.input:
659 self.error("--clobber-output is not valid when the output and input repos are the same")
660 if os.path.exists(namespace.output):
661 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
662 shutil.rmtree(namespace.output)
664 namespace.log.debug("input=%s", namespace.input)
665 namespace.log.debug("calib=%s", namespace.calib)
666 namespace.log.debug("output=%s", namespace.output)
668 obeyShowArgument(namespace.show, namespace.config, exit=False)
670 # No environment variable or --output or --rerun specified.
671 if self.requireOutput and namespace.output is None and namespace.rerun is None:
672 self.error("no output directory specified.\n"
673 "An output directory must be specified with the --output or --rerun\n"
674 "command-line arguments.\n")
676 butlerArgs = {} # common arguments for butler elements
677 if namespace.calib:
678 butlerArgs = {'mapperArgs': {'calibRoot': namespace.calib}}
679 if namespace.output:
680 outputs = {'root': namespace.output, 'mode': 'rw'}
681 inputs = {'root': namespace.input}
682 inputs.update(butlerArgs)
683 outputs.update(butlerArgs)
684 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
685 else:
686 outputs = {'root': namespace.input, 'mode': 'rw'}
687 outputs.update(butlerArgs)
688 namespace.butler = dafPersist.Butler(outputs=outputs)
690 # convert data in each of the identifier lists to proper types
691 # this is done after constructing the butler,
692 # hence after parsing the command line,
693 # because it takes a long time to construct a butler
694 self._processDataIds(namespace)
695 if "data" in namespace.show:
696 for dataIdName in self._dataIdArgDict.keys():
697 for dataRef in getattr(namespace, dataIdName).refList:
698 print(f"{dataIdName} dataRef.dataId = {dataRef.dataId}")
700 if namespace.show and "run" not in namespace.show:
701 sys.exit(0)
703 if namespace.debug:
704 try:
705 import debug
706 assert debug # silence pyflakes
707 except ImportError:
708 print("Warning: no 'debug' module found", file=sys.stderr)
709 namespace.debug = False
711 del namespace.loglevel
713 if namespace.longlog:
714 lsstLog.configure_prop("""
715log4j.rootLogger=INFO, A1
716log4j.appender.A1=ConsoleAppender
717log4j.appender.A1.Target=System.out
718log4j.appender.A1.layout=PatternLayout
719log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
720""")
721 del namespace.longlog
723 namespace.config.validate()
724 namespace.config.freeze()
726 return namespace
728 def _parseDirectories(self, namespace):
729 """Parse input, output and calib directories
731 This allows for hacking the directories, e.g., to include a
732 "rerun".
733 Modifications are made to the 'namespace' object in-place.
734 """
735 mapperClass = dafPersist.Butler.getMapperClass(_fixPath(DEFAULT_INPUT_NAME, namespace.rawInput))
736 namespace.calib = _fixPath(DEFAULT_CALIB_NAME, namespace.rawCalib)
738 # If an output directory is specified, process it and assign it to the namespace
739 if namespace.rawOutput:
740 namespace.output = _fixPath(DEFAULT_OUTPUT_NAME, namespace.rawOutput)
741 else:
742 namespace.output = None
744 # This section processes the rerun argument.
745 # If rerun is specified as a colon separated value,
746 # it will be parsed as an input and output.
747 # The input value will be overridden if previously specified
748 # (but a check is made to make sure both inputs use
749 # the same mapper)
750 if namespace.rawRerun:
751 if namespace.output:
752 self.error("Error: cannot specify both --output and --rerun")
753 namespace.rerun = namespace.rawRerun.split(":")
754 rerunDir = [os.path.join(namespace.input, "rerun", dd) for dd in namespace.rerun]
755 modifiedInput = False
756 if len(rerunDir) == 2:
757 namespace.input, namespace.output = rerunDir
758 modifiedInput = True
759 elif len(rerunDir) == 1:
760 namespace.output = rerunDir[0]
761 if os.path.exists(os.path.join(namespace.output, "_parent")):
762 namespace.input = os.path.realpath(os.path.join(namespace.output, "_parent"))
763 modifiedInput = True
764 else:
765 self.error(f"Error: invalid argument for --rerun: {namespace.rerun}")
766 if modifiedInput and dafPersist.Butler.getMapperClass(namespace.input) != mapperClass:
767 self.error("Error: input directory specified by --rerun must have the same mapper as INPUT")
768 else:
769 namespace.rerun = None
770 del namespace.rawInput
771 del namespace.rawCalib
772 del namespace.rawOutput
773 del namespace.rawRerun
775 def _processDataIds(self, namespace):
776 """Process the parsed data for each data ID argument in an
777 `~argparse.Namespace`.
779 Processing includes:
781 - Validate data ID keys.
782 - Cast the data ID values to the correct type.
783 - Compute data references from data IDs.
785 Parameters
786 ----------
787 namespace : `argparse.Namespace`
788 Parsed namespace. These attributes are read:
790 - ``butler``
791 - ``log``
792 - ``config``, if any dynamic dataset types are set by
793 a config parameter.
794 - Dataset type arguments (e.g. ``id_dstype``), if any dynamic
795 dataset types are specified by such
797 These attributes are modified:
799 - ``<name>`` for each data ID argument registered using
800 `add_id_argument` with name ``<name>``.
801 """
802 for dataIdArgument in self._dataIdArgDict.values():
803 dataIdContainer = getattr(namespace, dataIdArgument.name)
804 dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
805 if dataIdArgument.doMakeDataRefList:
806 try:
807 dataIdContainer.castDataIds(butler=namespace.butler)
808 except (KeyError, TypeError) as e:
809 # failure of castDataIds indicates invalid command args
810 self.error(e)
812 # failure of makeDataRefList indicates a bug
813 # that wants a traceback
814 dataIdContainer.makeDataRefList(namespace)
816 def _applyInitialOverrides(self, namespace):
817 """Apply obs-package-specific and camera-specific config
818 override files, if found
820 Parameters
821 ----------
822 namespace : `argparse.Namespace`
823 Parsed namespace. These attributes are read:
825 - ``obsPkg``
827 Look in the package namespace.obsPkg for files:
829 - ``config/<task_name>.py``
830 - ``config/<camera_name>/<task_name>.py`` and load if found.
831 """
832 obsPkgDir = lsst.utils.getPackageDir(namespace.obsPkg)
833 fileName = self._name + ".py"
834 for filePath in (
835 os.path.join(obsPkgDir, "config", fileName),
836 os.path.join(obsPkgDir, "config", namespace.camera, fileName),
837 ):
838 if os.path.exists(filePath):
839 namespace.log.info("Loading config overrride file %r", filePath)
840 namespace.config.load(filePath)
841 else:
842 namespace.log.debug("Config override file does not exist: %r", filePath)
844 def handleCamera(self, namespace):
845 """Perform camera-specific operations before parsing the command-line.
847 Parameters
848 ----------
849 namespace : `argparse.Namespace`
850 Namespace (an ) with the following fields:
852 - ``camera``: the camera name.
853 - ``config``: the config passed to parse_args, with no overrides applied.
854 - ``obsPkg``: the ``obs_`` package for this camera.
855 - ``log``: a `lsst.log` Log.
857 Notes
858 -----
859 The default implementation does nothing.
860 """
861 pass
863 def convert_arg_line_to_args(self, arg_line):
864 """Allow files of arguments referenced by ``@<path>`` to contain
865 multiple values on each line.
867 Parameters
868 ----------
869 arg_line : `str`
870 Line of text read from an argument file.
871 """
872 arg_line = arg_line.strip()
873 if not arg_line or arg_line.startswith("#"):
874 return
875 for arg in shlex.split(arg_line, comments=True, posix=True):
876 if not arg.strip():
877 continue
878 yield arg
880 def addReuseOption(self, choices):
881 """Add a "--reuse-outputs-from SUBTASK" option to the argument
882 parser.
884 CmdLineTasks that can be restarted at an intermediate step using
885 outputs from earlier (but still internal) steps should use this
886 method to allow the user to control whether that happens when
887 outputs from earlier steps are present.
889 Parameters
890 ----------
891 choices : sequence
892 A sequence of string names (by convention, top-level subtasks)
893 that identify the steps that could be skipped when their
894 outputs are already present. The list is ordered, so when the
895 user specifies one step on the command line, all previous steps
896 may be skipped as well. In addition to the choices provided,
897 users may pass "all" to indicate that all steps may be thus
898 skipped.
900 When this method is called, the ``namespace`` object returned by
901 ``parse_args`` will contain a ``reuse`` attribute containing
902 a list of all steps that should be skipped if their outputs
903 are already present.
904 If no steps should be skipped, the ``reuse`` will be an empty list.
905 """
906 choices = list(choices)
907 choices.append("all")
908 self.add_argument("--reuse-outputs-from", dest="reuse", choices=choices,
909 default=[], action=ReuseAction,
910 help=("Skip the given subtask and its predecessors and reuse their outputs "
911 "if those outputs already exist. Use 'all' to specify all subtasks."))
914class InputOnlyArgumentParser(ArgumentParser):
915 """`ArgumentParser` for command-line tasks that don't write any output.
916 """
918 requireOutput = False # We're not going to write anything
921def getTaskDict(config, taskDict=None, baseName=""):
922 """Get a dictionary of task info for all subtasks in a config
924 Parameters
925 ----------
926 config : `lsst.pex.config.Config`
927 Configuration to process.
928 taskDict : `dict`, optional
929 Users should not specify this argument. Supports recursion.
930 If provided, taskDict is updated in place, else a new `dict`
931 is started.
932 baseName : `str`, optional
933 Users should not specify this argument. It is only used for
934 recursion: if a non-empty string then a period is appended
935 and the result is used as a prefix for additional entries
936 in taskDict; otherwise no prefix is used.
938 Returns
939 -------
940 taskDict : `dict`
941 Keys are config field names, values are task names.
943 Notes
944 -----
945 This function is designed to be called recursively.
946 The user should call with only a config (leaving taskDict and baseName
947 at their default values).
948 """
949 if taskDict is None:
950 taskDict = dict()
951 for fieldName, field in config.items():
952 if hasattr(field, "value") and hasattr(field, "target"):
953 subConfig = field.value
954 if isinstance(subConfig, pexConfig.Config):
955 subBaseName = f"{baseName}.{fieldName}" if baseName else fieldName
956 try:
957 taskName = f"{field.target.__module__}.{field.target.__name__}"
958 except Exception:
959 taskName = repr(field.target)
960 taskDict[subBaseName] = taskName
961 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
962 return taskDict
965def obeyShowArgument(showOpts, config=None, exit=False):
966 """Process arguments specified with ``--show`` (but ignores
967 ``"data"``).
969 Parameters
970 ----------
971 showOpts : `list` of `str`
972 List of options passed to ``--show``.
973 config : optional
974 The provided config.
975 exit : bool, optional
976 Exit if ``"run"`` isn't included in ``showOpts``.
978 Parameters
979 ----------
980 Supports the following options in showOpts:
982 - ``config[=PAT]``. Dump all the config entries, or just the ones that
983 match the glob pattern.
984 - ``history=PAT``. Show where the config entries that match the glob
985 pattern were set.
986 - ``tasks``. Show task hierarchy.
987 - ``data``. Ignored; to be processed by caller.
988 - ``run``. Keep going (the default behaviour is to exit if
989 ``--show`` is specified).
991 Calls ``sys.exit(1)`` if any other option found.
992 """
993 if not showOpts:
994 return
996 for what in showOpts:
997 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
999 if showCommand == "config":
1000 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
1001 pattern = matConfig.group(1)
1002 if pattern:
1003 class FilteredStream:
1004 """A file object that only prints lines
1005 that match the glob "pattern".
1007 N.b. Newlines are silently discarded and reinserted;
1008 crude but effective.
1009 """
1011 def __init__(self, pattern):
1012 # obey case if pattern isn't lowecase or requests NOIGNORECASE
1013 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
1015 if mat:
1016 pattern = mat.group(1)
1017 self._pattern = re.compile(fnmatch.translate(pattern))
1018 else:
1019 if pattern != pattern.lower():
1020 print(f"Matching {pattern!r} without regard to case "
1021 "(append :NOIGNORECASE to prevent this)", file=sys.stdout)
1022 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
1024 def write(self, showStr):
1025 showStr = showStr.rstrip()
1026 # Strip off doc string line(s) and cut off
1027 # at "=" for string matching
1028 matchStr = showStr.split("\n")[-1].split("=")[0]
1029 if self._pattern.search(matchStr):
1030 print("\n" + showStr)
1032 fd = FilteredStream(pattern)
1033 else:
1034 fd = sys.stdout
1036 config.saveToStream(fd, "config")
1037 elif showCommand == "history":
1038 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
1039 globPattern = matHistory.group(1)
1040 if not globPattern:
1041 print("Please provide a value with --show history (e.g. history=*.doXXX)", file=sys.stderr)
1042 sys.exit(1)
1044 error = False
1045 for i, pattern in enumerate(fnmatch.filter(config.names(), globPattern)):
1046 if i > 0:
1047 print("")
1049 pattern = pattern.split(".")
1050 cpath, cname = pattern[:-1], pattern[-1]
1051 hconfig = config # the config that we're interested in
1052 for i, cpt in enumerate(cpath):
1053 try:
1054 hconfig = getattr(hconfig, cpt)
1055 except AttributeError:
1056 config_path = ".".join(["config"] + cpath[:i])
1057 print(f"Error: configuration {config_path} has no subconfig {cpt}", file=sys.stderr)
1058 error = True
1060 try:
1061 print(pexConfig.history.format(hconfig, cname))
1062 except KeyError:
1063 config_path = ".".join(["config"] + cpath)
1064 print(f"Error: {config_path} has no field {cname}", file=sys.stderr)
1065 error = True
1067 if error:
1068 sys.exit(1)
1070 elif showCommand == "data":
1071 pass
1072 elif showCommand == "run":
1073 pass
1074 elif showCommand == "tasks":
1075 showTaskHierarchy(config)
1076 else:
1077 choices = "', '".join("config[=XXX] data history=XXX tasks run".split())
1078 print(f"Unknown value for show: {what} (choose from {choices!r})", file=sys.stderr)
1079 sys.exit(1)
1081 if exit and "run" not in showOpts:
1082 sys.exit(0)
1085def showTaskHierarchy(config):
1086 """Print task hierarchy to stdout.
1088 Parameters
1089 ----------
1090 config : `lsst.pex.config.Config`
1091 Configuration to process.
1092 """
1093 print("Subtasks:")
1094 taskDict = getTaskDict(config=config)
1096 fieldNameList = sorted(taskDict.keys())
1097 for fieldName in fieldNameList:
1098 taskName = taskDict[fieldName]
1099 print(f"{fieldName}: {taskName}")
1102class ConfigValueAction(argparse.Action):
1103 """argparse action callback to override config parameters using
1104 name=value pairs from the command-line.
1105 """
1107 def __call__(self, parser, namespace, values, option_string):
1108 """Override one or more config name value pairs.
1110 Parameters
1111 ----------
1112 parser : `argparse.ArgumentParser`
1113 Argument parser.
1114 namespace : `argparse.Namespace`
1115 Parsed command. The ``namespace.config`` attribute is updated.
1116 values : `list`
1117 A list of ``configItemName=value`` pairs.
1118 option_string : `str`
1119 Option value specified by the user.
1120 """
1121 if namespace.config is None:
1122 return
1123 for nameValue in values:
1124 name, sep, valueStr = nameValue.partition("=")
1125 if not valueStr:
1126 parser.error(f"{option_string} value {nameValue} must be in form name=value")
1128 # see if setting the string value works; if not, try eval
1129 try:
1130 setDottedAttr(namespace.config, name, valueStr)
1131 except AttributeError:
1132 parser.error(f"no config field: {name}")
1133 except Exception:
1134 try:
1135 value = eval(valueStr, {})
1136 except Exception:
1137 parser.error(f"cannot parse {valueStr!r} as a value for {name}")
1138 try:
1139 setDottedAttr(namespace.config, name, value)
1140 except Exception as e:
1141 parser.error(f"cannot set config.{name}={value!r}: {e}")
1144class ConfigFileAction(argparse.Action):
1145 """argparse action to load config overrides from one or more files.
1146 """
1148 def __call__(self, parser, namespace, values, option_string=None):
1149 """Load one or more files of config overrides.
1151 Parameters
1152 ----------
1153 parser : `argparse.ArgumentParser`
1154 Argument parser.
1155 namespace : `argparse.Namespace`
1156 Parsed command. The following attributes are updated by this
1157 method: ``namespace.config``.
1158 values : `list`
1159 A list of data config file paths.
1160 option_string : `str`, optional
1161 Option value specified by the user.
1162 """
1163 if namespace.config is None:
1164 return
1165 for configfile in values:
1166 try:
1167 namespace.config.load(configfile)
1168 except Exception as e:
1169 parser.error(f"cannot load config file {configfile!r}: {e}")
1172class IdValueAction(argparse.Action):
1173 """argparse action callback to process a data ID into a dict.
1174 """
1176 def __call__(self, parser, namespace, values, option_string):
1177 """Parse ``--id`` data and append results to
1178 ``namespace.<argument>.idList``.
1180 Parameters
1181 ----------
1182 parser : `ArgumentParser`
1183 Argument parser.
1184 namespace : `argparse.Namespace`
1185 Parsed command (an instance of argparse.Namespace).
1186 The following attributes are updated:
1188 - ``<idName>.idList``, where ``<idName>`` is the name of the
1189 ID argument, for instance ``"id"`` for ID argument ``--id``.
1190 values : `list`
1191 A list of data IDs; see Notes below.
1192 option_string : `str`
1193 Option value specified by the user.
1195 Notes
1196 -----
1197 The data format is::
1199 key1=value1_1[^value1_2[^value1_3...]
1200 key2=value2_1[^value2_2[^value2_3...]...
1202 The values (e.g. ``value1_1``) may either be a string,
1203 or of the form ``"int..int"`` (e.g. ``"1..3"``) which is
1204 interpreted as ``"1^2^3"`` (inclusive, unlike a python range).
1205 So ``"0^2..4^7..9"`` is equivalent to ``"0^2^3^4^7^8^9"``.
1206 You may also specify a stride: ``"1..5:2"`` is ``"1^3^5"``.
1208 The cross product is computed for keys with multiple values.
1209 For example::
1211 --id visit 1^2 ccd 1,1^2,2
1213 results in the following data ID dicts being appended to
1214 ``namespace.<argument>.idList``:
1216 {"visit":1, "ccd":"1,1"}
1217 {"visit":2, "ccd":"1,1"}
1218 {"visit":1, "ccd":"2,2"}
1219 {"visit":2, "ccd":"2,2"}
1220 """
1221 if namespace.config is None:
1222 return
1223 idDict = collections.OrderedDict()
1224 for nameValue in values:
1225 name, sep, valueStr = nameValue.partition("=")
1226 if name in idDict:
1227 parser.error(f"{name} appears multiple times in one ID argument: {option_string}")
1228 idDict[name] = []
1229 for v in valueStr.split("^"):
1230 mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", v)
1231 if mat:
1232 v1 = int(mat.group(1))
1233 v2 = int(mat.group(2))
1234 v3 = mat.group(3)
1235 v3 = int(v3) if v3 else 1
1236 for v in range(v1, v2 + 1, v3):
1237 idDict[name].append(str(v))
1238 else:
1239 idDict[name].append(v)
1241 iterList = [idDict[key] for key in idDict.keys()]
1242 idDictList = [collections.OrderedDict(zip(idDict.keys(), valList))
1243 for valList in itertools.product(*iterList)]
1245 argName = option_string.lstrip("-")
1246 ident = getattr(namespace, argName)
1247 ident.idList += idDictList
1250class LogLevelAction(argparse.Action):
1251 """argparse action to set log level.
1252 """
1254 def __call__(self, parser, namespace, values, option_string):
1255 """Set trace level.
1257 Parameters
1258 ----------
1259 parser : `ArgumentParser`
1260 Argument parser.
1261 namespace : `argparse.Namespace`
1262 Parsed command. This argument is not used.
1263 values : `list`
1264 List of trace levels; each item must be of the form
1265 ``component_name=level`` or ``level``, where ``level``
1266 is a keyword (not case sensitive) or an integer.
1267 option_string : `str`
1268 Option value specified by the user.
1269 """
1270 permittedLevelList = ('TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL')
1271 permittedLevelSet = set(permittedLevelList)
1272 for componentLevel in values:
1273 component, sep, levelStr = componentLevel.partition("=")
1274 if not levelStr:
1275 levelStr, component = component, None
1276 logLevelUpr = levelStr.upper()
1277 if logLevelUpr in permittedLevelSet:
1278 logLevel = getattr(lsstLog.Log, logLevelUpr)
1279 else:
1280 parser.error(f"loglevel={levelStr!r} not one of {permittedLevelList}")
1281 if component is None:
1282 namespace.log.setLevel(logLevel)
1283 else:
1284 lsstLog.Log.getLogger(component).setLevel(logLevel)
1285 # set logging level for Python logging
1286 pyLevel = lsstLog.LevelTranslator.lsstLog2logging(logLevel)
1287 logging.getLogger(component).setLevel(pyLevel)
1290class ReuseAction(argparse.Action):
1291 """argparse action associated with ArgumentPraser.addReuseOption."""
1293 def __call__(self, parser, namespace, value, option_string):
1294 if value == "all":
1295 value = self.choices[-2]
1296 index = self.choices.index(value)
1297 namespace.reuse = self.choices[:index + 1]
1300def setDottedAttr(item, name, value):
1301 """Set an instance attribute (like `setattr` but accepting
1302 hierarchical names such as ``foo.bar.baz``).
1304 Parameters
1305 ----------
1306 item : obj
1307 Object whose attribute is to be set.
1308 name : `str`
1309 Name of attribute to set.
1310 value : obj
1311 New value for the attribute.
1313 Notes
1314 -----
1315 For example if name is ``foo.bar.baz`` then ``item.foo.bar.baz``
1316 is set to the specified value.
1317 """
1318 subitem = item
1319 subnameList = name.split(".")
1320 for subname in subnameList[:-1]:
1321 subitem = getattr(subitem, subname)
1322 setattr(subitem, subnameList[-1], value)
1325def getDottedAttr(item, name):
1326 """Get an attribute (like `getattr` but accepts hierarchical names
1327 such as ``foo.bar.baz``).
1329 Parameters
1330 ----------
1331 item : obj
1332 Object whose attribute is to be returned.
1333 name : `str`
1334 Name of the attribute to get.
1336 Returns
1337 -------
1338 itemAttr : obj
1339 If name is ``foo.bar.baz then the return value is
1340 ``item.foo.bar.baz``.
1341 """
1342 subitem = item
1343 for subname in name.split("."):
1344 subitem = getattr(subitem, subname)
1345 return subitem