Coverage for python/lsst/ap/pipe/apPipeParser.py: 9%
197 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-26 01:59 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-26 01:59 -0700
1#
2# This file is part of ap_pipe.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# salong with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["ApPipeParser"]
26import argparse
27import fnmatch
28import os
29import re
30import shutil
31import sys
33import lsst.log as lsstLog
34import lsst.pex.config as pexConfig
35import lsst.daf.persistence as dafPersist
36import lsst.pipe.base as pipeBase
38DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
39DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
40DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT"
43class ApPipeParser(pipeBase.ArgumentParser):
44 """Custom argument parser to handle multiple input repos.
45 """
47 def __init__(self, *args, **kwargs):
48 pipeBase.ArgumentParser.__init__(
49 self,
50 description="Process raw images through the AP pipeline "
51 "from ISR through source association",
52 *args,
53 **kwargs)
54 inputDataset = "raw"
55 self.add_id_argument("--id", inputDataset,
56 help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3")
58 self.add_argument("--template", dest="rawTemplate",
59 help="path to input template repository, relative to $%s" % DEFAULT_INPUT_NAME)
60 self.add_id_argument("--templateId", inputDataset, doMakeDataRefList=True,
61 help="Optional template data ID (visit only), e.g. --templateId visit=410929")
63 self.addReuseOption(["ccdProcessor", "differencer", "diaPipe"])
65 # TODO: workaround for lack of support for multi-input butlers; see DM-11865
66 # Can't delegate to pipeBase.ArgumentParser.parse_args because creating the
67 # Butler more than once causes repo conflicts
68 def parse_args(self, config, args=None, log=None, override=None):
69 """Parse arguments for a command-line task.
71 Parameters
72 ----------
73 config : `lsst.pex.config.Config`
74 Config for the task being run.
75 args : `list`, optional
76 Argument list; if `None` then ``sys.argv[1:]`` is used.
77 log : `lsst.log.Log`, optional
78 `~lsst.log.Log` instance; if `None` use the default log.
79 override : callable, optional
80 A config override function. It must take the root config object as its only argument and must
81 modify the config in place. This function is called after camera-specific overrides files are
82 applied, and before command-line config overrides are applied (thus allowing the user the final
83 word).
85 Returns
86 -------
87 namespace : `argparse.Namespace`
88 A `~argparse.Namespace` instance containing fields:
90 - ``camera``: camera name.
91 - ``config``: the supplied config with all overrides applied, validated and frozen.
92 - ``butler``: a `lsst.daf.persistence.Butler` for the data.
93 - An entry for each of the data ID arguments registered by `add_id_argument`,
94 the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements
95 ``idList`` and ``refList``.
96 - ``log``: a `lsst.log` Log.
97 - An entry for each command-line argument, with the following exceptions:
98 - config is the supplied config, suitably updated.
99 - configfile, id and loglevel are all missing.
100 - ``obsPkg``: name of the ``obs_`` package for this camera.
101 """
102 if args is None:
103 args = sys.argv[1:]
105 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"):
106 self.print_help()
107 if len(args) == 1 and args[0] in ("-h", "--help"):
108 self.exit()
109 else:
110 self.exit("%s: error: Must specify input as first argument" % self.prog)
112 # Note that --rerun may change namespace.input, but if it does we verify that the
113 # new input has the same mapper class.
114 namespace = argparse.Namespace()
115 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0])
116 if not os.path.isdir(namespace.input):
117 self.error("Error: input=%r not found" % (namespace.input,))
119 namespace.config = config
120 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger()
121 mapperClass = dafPersist.Butler.getMapperClass(namespace.input)
122 namespace.camera = mapperClass.getCameraName()
123 namespace.obsPkg = mapperClass.getPackageName()
125 self.handleCamera(namespace)
127 self._applyInitialOverrides(namespace)
128 if override is not None:
129 override(namespace.config)
131 # Add data ID containers to namespace
132 for dataIdArgument in self._dataIdArgDict.values():
133 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level))
135 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace)
136 del namespace.configfile
138 self._parseDirectories(namespace)
139 namespace.template = _fixPath(DEFAULT_INPUT_NAME, namespace.rawTemplate)
140 del namespace.rawTemplate
142 if namespace.clobberOutput:
143 if namespace.output is None:
144 self.error("--clobber-output is only valid with --output or --rerun")
145 elif namespace.output == namespace.input:
146 self.error("--clobber-output is not valid when the output and input repos are the same")
147 if os.path.exists(namespace.output):
148 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output)
149 shutil.rmtree(namespace.output)
151 namespace.log.debug("input=%s", namespace.input)
152 namespace.log.debug("calib=%s", namespace.calib)
153 namespace.log.debug("output=%s", namespace.output)
154 namespace.log.debug("template=%s", namespace.template)
156 obeyShowArgument(namespace.show, namespace.config, exit=False)
158 # No environment variable or --output or --rerun specified.
159 if self.requireOutput and namespace.output is None and namespace.rerun is None:
160 self.error("no output directory specified.\n"
161 "An output directory must be specified with the --output or --rerun\n"
162 "command-line arguments.\n")
164 self._makeButler(namespace)
166 # convert data in each of the identifier lists to proper types
167 # this is done after constructing the butler, hence after parsing the command line,
168 # because it takes a long time to construct a butler
169 self._processDataIds(namespace)
170 if "data" in namespace.show:
171 for dataIdName in self._dataIdArgDict.keys():
172 for dataRef in getattr(namespace, dataIdName).refList:
173 print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId))
175 if namespace.show and "run" not in namespace.show:
176 sys.exit(0)
178 if namespace.debug:
179 try:
180 import debug
181 assert debug # silence pyflakes
182 except ImportError:
183 sys.stderr.write("Warning: no 'debug' module found\n")
184 namespace.debug = False
186 del namespace.loglevel
188 if namespace.longlog:
189 lsstLog.configure_prop("""
190log4j.rootLogger=INFO, A1
191log4j.appender.A1=ConsoleAppender
192log4j.appender.A1.Target=System.out
193log4j.appender.A1.layout=PatternLayout
194log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n
195""")
196 del namespace.longlog
198 namespace.config.validate()
199 namespace.config.freeze()
201 return namespace
203 def _makeButler(self, namespace):
204 """Create a butler according to parsed command line arguments.
206 The butler is stored as ``namespace.butler``.
208 Parameters
209 ----------
210 namespace : `argparse.Namespace`
211 a parsed command line containing all information needed to set up a new butler.
212 """
213 butlerArgs = {} # common arguments for butler elements
214 if namespace.calib:
215 butlerArgs = {"mapperArgs": {"calibRoot": namespace.calib}}
217 if namespace.output:
218 inputs = [{"root": namespace.input}]
219 outputs = [{"root": namespace.output, "mode": "rw"}]
220 else:
221 inputs = [{"root": namespace.input, "mode": "rw"}]
222 outputs = []
224 if namespace.template:
225 ApPipeParser._addRepo(inputs, {"root": namespace.template, "mode": "r"})
227 for repoList in inputs, outputs:
228 for repo in repoList:
229 repo.update(butlerArgs)
231 if namespace.output:
232 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs)
233 else:
234 namespace.butler = dafPersist.Butler(outputs=inputs)
236 @staticmethod
237 def _addRepo(repos, newRepo):
238 """Add an extra repository to a collection.
240 ``newRepo`` will be updated, possibly after validity checks.
242 Parameters
243 ----------
244 repos : `iterable` of `dict`
245 The collection of repositories to update. Each element must be a
246 valid input or output argument to an `lsst.daf.persistence.Butler`.
247 newRepo : `dict`
248 The repository to add.
249 """
250 # workaround for DM-13626, blocks DM-11482
251 duplicate = False
252 for repo in repos:
253 if os.path.samefile(repo["root"], newRepo["root"]):
254 duplicate = True
256 if not duplicate:
257 repos.append(newRepo)
260# TODO: duplicated code; can remove once DM-11865 resolved
261def _fixPath(defName, path):
262 """Apply environment variable as default root, if present, and abspath.
264 Parameters
265 ----------
266 defName : `str`
267 Name of environment variable containing default root path; if the
268 environment variable does not exist then the path is relative to
269 the current working directory
270 path : `str`
271 Path relative to default root path.
273 Returns
274 -------
275 abspath : `str`
276 Path that has been expanded, or `None` if the environment variable
277 does not exist and path is `None`.
278 """
279 defRoot = os.environ.get(defName)
280 if defRoot is None:
281 if path is None:
282 return None
283 return os.path.abspath(path)
284 return os.path.abspath(os.path.join(defRoot, path or ""))
287# TODO: duplicated code; can remove once DM-11865 resolved
288def obeyShowArgument(showOpts, config=None, exit=False):
289 """Process arguments specified with ``--show`` (but ignores ``"data"``).
291 Parameters
292 ----------
293 showOpts : `list` of `str`
294 List of options passed to ``--show``.
295 config : optional
296 The provided config.
297 exit : bool, optional
298 Exit if ``"run"`` isn't included in ``showOpts``.
300 Parameters
301 ----------
302 Supports the following options in showOpts:
304 - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern.
305 - ``history=PAT``. Show where the config entries that match the glob pattern were set.
306 - ``tasks``. Show task hierarchy.
307 - ``data``. Ignored; to be processed by caller.
308 - ``run``. Keep going (the default behaviour is to exit if --show is specified).
310 Calls ``sys.exit(1)`` if any other option found.
311 """
312 if not showOpts:
313 return
315 for what in showOpts:
316 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "")
318 if showCommand == "config":
319 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs)
320 pattern = matConfig.group(1)
321 if pattern:
322 class FilteredStream(object):
323 """A file object that only prints lines that match the glob "pattern"
325 N.b. Newlines are silently discarded and reinserted; crude but effective.
326 """
328 def __init__(self, pattern):
329 # obey case if pattern isn't lowecase or requests NOIGNORECASE
330 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
332 if mat:
333 pattern = mat.group(1)
334 self._pattern = re.compile(fnmatch.translate(pattern))
335 else:
336 if pattern != pattern.lower():
337 print(u"Matching \"%s\" without regard to case "
338 "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout)
339 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
341 def write(self, showStr):
342 showStr = showStr.rstrip()
343 # Strip off doc string line(s) and cut off at "=" for string matching
344 matchStr = showStr.split("\n")[-1].split("=")[0]
345 if self._pattern.search(matchStr):
346 print(u"\n" + showStr)
348 fd = FilteredStream(pattern)
349 else:
350 fd = sys.stdout
352 config.saveToStream(fd, "config")
353 elif showCommand == "history":
354 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs)
355 pattern = matHistory.group(1)
356 if not pattern:
357 print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr)
358 sys.exit(1)
360 pattern = pattern.split(".")
361 cpath, cname = pattern[:-1], pattern[-1]
362 hconfig = config # the config that we're interested in
363 for i, cpt in enumerate(cpath):
364 try:
365 hconfig = getattr(hconfig, cpt)
366 except AttributeError:
367 print("Error: configuration %s has no subconfig %s" %
368 (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr)
370 sys.exit(1)
372 try:
373 print(pexConfig.history.format(hconfig, cname))
374 except KeyError:
375 print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr)
376 sys.exit(1)
378 elif showCommand == "data":
379 pass
380 elif showCommand == "run":
381 pass
382 elif showCommand == "tasks":
383 showTaskHierarchy(config)
384 else:
385 print(u"Unknown value for show: %s (choose from '%s')" %
386 (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr)
387 sys.exit(1)
389 if exit and "run" not in showOpts:
390 sys.exit(0)
393def showTaskHierarchy(config):
394 """Print task hierarchy to stdout.
396 Parameters
397 ----------
398 config : `lsst.pex.config.Config`
399 Configuration to process.
400 """
401 print(u"Subtasks:")
402 taskDict = getTaskDict(config=config)
404 fieldNameList = sorted(taskDict.keys())
405 for fieldName in fieldNameList:
406 taskName = taskDict[fieldName]
407 print(u"%s: %s" % (fieldName, taskName))
410def getTaskDict(config, taskDict=None, baseName=""):
411 """Get a dictionary of task info for all subtasks in a config
413 Parameters
414 ----------
415 config : `lsst.pex.config.Config`
416 Configuration to process.
417 taskDict : `dict`, optional
418 Users should not specify this argument. Supports recursion; if provided, taskDict is updated in
419 place, else a new `dict` is started).
420 baseName : `str`, optional
421 Users should not specify this argument. It is only used for recursion: if a non-empty string then a
422 period is appended and the result is used as a prefix for additional entries in taskDict; otherwise
423 no prefix is used.
425 Returns
426 -------
427 taskDict : `dict`
428 Keys are config field names, values are task names.
430 Notes
431 -----
432 This function is designed to be called recursively. The user should call with only a config
433 (leaving taskDict and baseName at their default values).
434 """
435 if taskDict is None:
436 taskDict = dict()
437 for fieldName, field in config.items():
438 if hasattr(field, "value") and hasattr(field, "target"):
439 subConfig = field.value
440 if isinstance(subConfig, pexConfig.Config):
441 subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName
442 try:
443 taskName = "%s.%s" % (field.target.__module__, field.target.__name__)
444 except Exception:
445 taskName = repr(field.target)
446 taskDict[subBaseName] = taskName
447 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName)
448 return taskDict