Coverage for python/lsst/ap/pipe/apPipeParser.py: 9%

197 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-15 12:00 +0000

1# 

2# This file is part of ap_pipe. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# salong with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["ApPipeParser"] 

25 

26import argparse 

27import fnmatch 

28import os 

29import re 

30import shutil 

31import sys 

32 

33import lsst.log as lsstLog 

34import lsst.pex.config as pexConfig 

35import lsst.daf.persistence as dafPersist 

36import lsst.pipe.base as pipeBase 

37 

38DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT" 

39DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT" 

40DEFAULT_OUTPUT_NAME = "PIPE_OUTPUT_ROOT" 

41 

42 

43class ApPipeParser(pipeBase.ArgumentParser): 

44 """Custom argument parser to handle multiple input repos. 

45 """ 

46 

47 def __init__(self, *args, **kwargs): 

48 pipeBase.ArgumentParser.__init__( 

49 self, 

50 description="Process raw images through the AP pipeline " 

51 "from ISR through source association", 

52 *args, 

53 **kwargs) 

54 inputDataset = "raw" 

55 self.add_id_argument("--id", inputDataset, 

56 help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3") 

57 

58 self.add_argument("--template", dest="rawTemplate", 

59 help="path to input template repository, relative to $%s" % DEFAULT_INPUT_NAME) 

60 self.add_id_argument("--templateId", inputDataset, doMakeDataRefList=True, 

61 help="Optional template data ID (visit only), e.g. --templateId visit=410929") 

62 

63 self.addReuseOption(["ccdProcessor", "differencer", "diaPipe"]) 

64 

65 # TODO: workaround for lack of support for multi-input butlers; see DM-11865 

66 # Can't delegate to pipeBase.ArgumentParser.parse_args because creating the 

67 # Butler more than once causes repo conflicts 

68 def parse_args(self, config, args=None, log=None, override=None): 

69 """Parse arguments for a command-line task. 

70 

71 Parameters 

72 ---------- 

73 config : `lsst.pex.config.Config` 

74 Config for the task being run. 

75 args : `list`, optional 

76 Argument list; if `None` then ``sys.argv[1:]`` is used. 

77 log : `lsst.log.Log`, optional 

78 `~lsst.log.Log` instance; if `None` use the default log. 

79 override : callable, optional 

80 A config override function. It must take the root config object as its only argument and must 

81 modify the config in place. This function is called after camera-specific overrides files are 

82 applied, and before command-line config overrides are applied (thus allowing the user the final 

83 word). 

84 

85 Returns 

86 ------- 

87 namespace : `argparse.Namespace` 

88 A `~argparse.Namespace` instance containing fields: 

89 

90 - ``camera``: camera name. 

91 - ``config``: the supplied config with all overrides applied, validated and frozen. 

92 - ``butler``: a `lsst.daf.persistence.Butler` for the data. 

93 - An entry for each of the data ID arguments registered by `add_id_argument`, 

94 the value of which is a `~lsst.pipe.base.DataIdArgument` that includes public elements 

95 ``idList`` and ``refList``. 

96 - ``log``: a `lsst.log` Log. 

97 - An entry for each command-line argument, with the following exceptions: 

98 - config is the supplied config, suitably updated. 

99 - configfile, id and loglevel are all missing. 

100 - ``obsPkg``: name of the ``obs_`` package for this camera. 

101 """ 

102 if args is None: 

103 args = sys.argv[1:] 

104 

105 if len(args) < 1 or args[0].startswith("-") or args[0].startswith("@"): 

106 self.print_help() 

107 if len(args) == 1 and args[0] in ("-h", "--help"): 

108 self.exit() 

109 else: 

110 self.exit("%s: error: Must specify input as first argument" % self.prog) 

111 

112 # Note that --rerun may change namespace.input, but if it does we verify that the 

113 # new input has the same mapper class. 

114 namespace = argparse.Namespace() 

115 namespace.input = _fixPath(DEFAULT_INPUT_NAME, args[0]) 

116 if not os.path.isdir(namespace.input): 

117 self.error("Error: input=%r not found" % (namespace.input,)) 

118 

119 namespace.config = config 

120 namespace.log = log if log is not None else lsstLog.Log.getDefaultLogger() 

121 mapperClass = dafPersist.Butler.getMapperClass(namespace.input) 

122 namespace.camera = mapperClass.getCameraName() 

123 namespace.obsPkg = mapperClass.getPackageName() 

124 

125 self.handleCamera(namespace) 

126 

127 self._applyInitialOverrides(namespace) 

128 if override is not None: 

129 override(namespace.config) 

130 

131 # Add data ID containers to namespace 

132 for dataIdArgument in self._dataIdArgDict.values(): 

133 setattr(namespace, dataIdArgument.name, dataIdArgument.ContainerClass(level=dataIdArgument.level)) 

134 

135 namespace = argparse.ArgumentParser.parse_args(self, args=args, namespace=namespace) 

136 del namespace.configfile 

137 

138 self._parseDirectories(namespace) 

139 namespace.template = _fixPath(DEFAULT_INPUT_NAME, namespace.rawTemplate) 

140 del namespace.rawTemplate 

141 

142 if namespace.clobberOutput: 

143 if namespace.output is None: 

144 self.error("--clobber-output is only valid with --output or --rerun") 

145 elif namespace.output == namespace.input: 

146 self.error("--clobber-output is not valid when the output and input repos are the same") 

147 if os.path.exists(namespace.output): 

148 namespace.log.info("Removing output repo %s for --clobber-output", namespace.output) 

149 shutil.rmtree(namespace.output) 

150 

151 namespace.log.debug("input=%s", namespace.input) 

152 namespace.log.debug("calib=%s", namespace.calib) 

153 namespace.log.debug("output=%s", namespace.output) 

154 namespace.log.debug("template=%s", namespace.template) 

155 

156 obeyShowArgument(namespace.show, namespace.config, exit=False) 

157 

158 # No environment variable or --output or --rerun specified. 

159 if self.requireOutput and namespace.output is None and namespace.rerun is None: 

160 self.error("no output directory specified.\n" 

161 "An output directory must be specified with the --output or --rerun\n" 

162 "command-line arguments.\n") 

163 

164 self._makeButler(namespace) 

165 

166 # convert data in each of the identifier lists to proper types 

167 # this is done after constructing the butler, hence after parsing the command line, 

168 # because it takes a long time to construct a butler 

169 self._processDataIds(namespace) 

170 if "data" in namespace.show: 

171 for dataIdName in self._dataIdArgDict.keys(): 

172 for dataRef in getattr(namespace, dataIdName).refList: 

173 print("%s dataRef.dataId = %s" % (dataIdName, dataRef.dataId)) 

174 

175 if namespace.show and "run" not in namespace.show: 

176 sys.exit(0) 

177 

178 if namespace.debug: 

179 try: 

180 import debug 

181 assert debug # silence pyflakes 

182 except ImportError: 

183 sys.stderr.write("Warning: no 'debug' module found\n") 

184 namespace.debug = False 

185 

186 del namespace.loglevel 

187 

188 if namespace.longlog: 

189 lsstLog.configure_prop(""" 

190log4j.rootLogger=INFO, A1 

191log4j.appender.A1=ConsoleAppender 

192log4j.appender.A1.Target=System.out 

193log4j.appender.A1.layout=PatternLayout 

194log4j.appender.A1.layout.ConversionPattern=%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n 

195""") 

196 del namespace.longlog 

197 

198 namespace.config.validate() 

199 namespace.config.freeze() 

200 

201 return namespace 

202 

203 def _makeButler(self, namespace): 

204 """Create a butler according to parsed command line arguments. 

205 

206 The butler is stored as ``namespace.butler``. 

207 

208 Parameters 

209 ---------- 

210 namespace : `argparse.Namespace` 

211 a parsed command line containing all information needed to set up a new butler. 

212 """ 

213 butlerArgs = {} # common arguments for butler elements 

214 if namespace.calib: 

215 butlerArgs = {"mapperArgs": {"calibRoot": namespace.calib}} 

216 

217 if namespace.output: 

218 inputs = [{"root": namespace.input}] 

219 outputs = [{"root": namespace.output, "mode": "rw"}] 

220 else: 

221 inputs = [{"root": namespace.input, "mode": "rw"}] 

222 outputs = [] 

223 

224 if namespace.template: 

225 ApPipeParser._addRepo(inputs, {"root": namespace.template, "mode": "r"}) 

226 

227 for repoList in inputs, outputs: 

228 for repo in repoList: 

229 repo.update(butlerArgs) 

230 

231 if namespace.output: 

232 namespace.butler = dafPersist.Butler(inputs=inputs, outputs=outputs) 

233 else: 

234 namespace.butler = dafPersist.Butler(outputs=inputs) 

235 

236 @staticmethod 

237 def _addRepo(repos, newRepo): 

238 """Add an extra repository to a collection. 

239 

240 ``newRepo`` will be updated, possibly after validity checks. 

241 

242 Parameters 

243 ---------- 

244 repos : `iterable` of `dict` 

245 The collection of repositories to update. Each element must be a 

246 valid input or output argument to an `lsst.daf.persistence.Butler`. 

247 newRepo : `dict` 

248 The repository to add. 

249 """ 

250 # workaround for DM-13626, blocks DM-11482 

251 duplicate = False 

252 for repo in repos: 

253 if os.path.samefile(repo["root"], newRepo["root"]): 

254 duplicate = True 

255 

256 if not duplicate: 

257 repos.append(newRepo) 

258 

259 

260# TODO: duplicated code; can remove once DM-11865 resolved 

261def _fixPath(defName, path): 

262 """Apply environment variable as default root, if present, and abspath. 

263 

264 Parameters 

265 ---------- 

266 defName : `str` 

267 Name of environment variable containing default root path; if the 

268 environment variable does not exist then the path is relative to 

269 the current working directory 

270 path : `str` 

271 Path relative to default root path. 

272 

273 Returns 

274 ------- 

275 abspath : `str` 

276 Path that has been expanded, or `None` if the environment variable 

277 does not exist and path is `None`. 

278 """ 

279 defRoot = os.environ.get(defName) 

280 if defRoot is None: 

281 if path is None: 

282 return None 

283 return os.path.abspath(path) 

284 return os.path.abspath(os.path.join(defRoot, path or "")) 

285 

286 

287# TODO: duplicated code; can remove once DM-11865 resolved 

288def obeyShowArgument(showOpts, config=None, exit=False): 

289 """Process arguments specified with ``--show`` (but ignores ``"data"``). 

290 

291 Parameters 

292 ---------- 

293 showOpts : `list` of `str` 

294 List of options passed to ``--show``. 

295 config : optional 

296 The provided config. 

297 exit : bool, optional 

298 Exit if ``"run"`` isn't included in ``showOpts``. 

299 

300 Parameters 

301 ---------- 

302 Supports the following options in showOpts: 

303 

304 - ``config[=PAT]``. Dump all the config entries, or just the ones that match the glob pattern. 

305 - ``history=PAT``. Show where the config entries that match the glob pattern were set. 

306 - ``tasks``. Show task hierarchy. 

307 - ``data``. Ignored; to be processed by caller. 

308 - ``run``. Keep going (the default behaviour is to exit if --show is specified). 

309 

310 Calls ``sys.exit(1)`` if any other option found. 

311 """ 

312 if not showOpts: 

313 return 

314 

315 for what in showOpts: 

316 showCommand, showArgs = what.split("=", 1) if "=" in what else (what, "") 

317 

318 if showCommand == "config": 

319 matConfig = re.search(r"^(?:config.)?(.+)?", showArgs) 

320 pattern = matConfig.group(1) 

321 if pattern: 

322 class FilteredStream(object): 

323 """A file object that only prints lines that match the glob "pattern" 

324 

325 N.b. Newlines are silently discarded and reinserted; crude but effective. 

326 """ 

327 

328 def __init__(self, pattern): 

329 # obey case if pattern isn't lowecase or requests NOIGNORECASE 

330 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

331 

332 if mat: 

333 pattern = mat.group(1) 

334 self._pattern = re.compile(fnmatch.translate(pattern)) 

335 else: 

336 if pattern != pattern.lower(): 

337 print(u"Matching \"%s\" without regard to case " 

338 "(append :NOIGNORECASE to prevent this)" % (pattern,), file=sys.stdout) 

339 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

340 

341 def write(self, showStr): 

342 showStr = showStr.rstrip() 

343 # Strip off doc string line(s) and cut off at "=" for string matching 

344 matchStr = showStr.split("\n")[-1].split("=")[0] 

345 if self._pattern.search(matchStr): 

346 print(u"\n" + showStr) 

347 

348 fd = FilteredStream(pattern) 

349 else: 

350 fd = sys.stdout 

351 

352 config.saveToStream(fd, "config") 

353 elif showCommand == "history": 

354 matHistory = re.search(r"^(?:config.)?(.+)?", showArgs) 

355 pattern = matHistory.group(1) 

356 if not pattern: 

357 print("Please provide a value with --show history (e.g. history=XXX)", file=sys.stderr) 

358 sys.exit(1) 

359 

360 pattern = pattern.split(".") 

361 cpath, cname = pattern[:-1], pattern[-1] 

362 hconfig = config # the config that we're interested in 

363 for i, cpt in enumerate(cpath): 

364 try: 

365 hconfig = getattr(hconfig, cpt) 

366 except AttributeError: 

367 print("Error: configuration %s has no subconfig %s" % 

368 (".".join(["config"] + cpath[:i]), cpt), file=sys.stderr) 

369 

370 sys.exit(1) 

371 

372 try: 

373 print(pexConfig.history.format(hconfig, cname)) 

374 except KeyError: 

375 print("Error: %s has no field %s" % (".".join(["config"] + cpath), cname), file=sys.stderr) 

376 sys.exit(1) 

377 

378 elif showCommand == "data": 

379 pass 

380 elif showCommand == "run": 

381 pass 

382 elif showCommand == "tasks": 

383 showTaskHierarchy(config) 

384 else: 

385 print(u"Unknown value for show: %s (choose from '%s')" % 

386 (what, "', '".join("config[=XXX] data history=XXX tasks run".split())), file=sys.stderr) 

387 sys.exit(1) 

388 

389 if exit and "run" not in showOpts: 

390 sys.exit(0) 

391 

392 

393def showTaskHierarchy(config): 

394 """Print task hierarchy to stdout. 

395 

396 Parameters 

397 ---------- 

398 config : `lsst.pex.config.Config` 

399 Configuration to process. 

400 """ 

401 print(u"Subtasks:") 

402 taskDict = getTaskDict(config=config) 

403 

404 fieldNameList = sorted(taskDict.keys()) 

405 for fieldName in fieldNameList: 

406 taskName = taskDict[fieldName] 

407 print(u"%s: %s" % (fieldName, taskName)) 

408 

409 

410def getTaskDict(config, taskDict=None, baseName=""): 

411 """Get a dictionary of task info for all subtasks in a config 

412 

413 Parameters 

414 ---------- 

415 config : `lsst.pex.config.Config` 

416 Configuration to process. 

417 taskDict : `dict`, optional 

418 Users should not specify this argument. Supports recursion; if provided, taskDict is updated in 

419 place, else a new `dict` is started). 

420 baseName : `str`, optional 

421 Users should not specify this argument. It is only used for recursion: if a non-empty string then a 

422 period is appended and the result is used as a prefix for additional entries in taskDict; otherwise 

423 no prefix is used. 

424 

425 Returns 

426 ------- 

427 taskDict : `dict` 

428 Keys are config field names, values are task names. 

429 

430 Notes 

431 ----- 

432 This function is designed to be called recursively. The user should call with only a config 

433 (leaving taskDict and baseName at their default values). 

434 """ 

435 if taskDict is None: 

436 taskDict = dict() 

437 for fieldName, field in config.items(): 

438 if hasattr(field, "value") and hasattr(field, "target"): 

439 subConfig = field.value 

440 if isinstance(subConfig, pexConfig.Config): 

441 subBaseName = "%s.%s" % (baseName, fieldName) if baseName else fieldName 

442 try: 

443 taskName = "%s.%s" % (field.target.__module__, field.target.__name__) 

444 except Exception: 

445 taskName = repr(field.target) 

446 taskDict[subBaseName] = taskName 

447 getTaskDict(config=subConfig, taskDict=taskDict, baseName=subBaseName) 

448 return taskDict