Coverage for python/lsst/ap/verify/pipeline_driver.py: 17%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

113 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36import logging 

37 

38import lsst.pipe.base as pipeBase 

39import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

40import lsst.ctrl.mpexec.cli.pipetask 

41import lsst.ap.pipe as apPipe 

42from lsst.ap.pipe.make_apdb import makeApdb 

43 

44_LOG = logging.getLogger(__name__) 

45 

46 

47class ApPipeParser(argparse.ArgumentParser): 

48 """An argument parser for data needed by ``ap_pipe`` activities. 

49 

50 This parser is not complete, and is designed to be passed to another parser 

51 using the `parent` parameter. 

52 """ 

53 

54 def __init__(self): 

55 # Help and documentation will be handled by main program's parser 

56 argparse.ArgumentParser.__init__(self, add_help=False) 

57 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

58 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

59 self.add_argument('--id', '-d', '--data-query', dest='dataIds', 

60 action=self.AppendOptional, nargs='?', default=[], 

61 help='An identifier for the data to process.') 

62 self.add_argument("-p", "--pipeline", default=None, 

63 help="A custom version of the ap_verify pipeline (e.g., with different metrics). " 

64 "Defaults to the ApVerify.yaml within --dataset.") 

65 self.add_argument("--db", "--db_url", default=None, 

66 help="A location for the AP database, formatted as if for ApdbConfig.db_url. " 

67 "Defaults to an SQLite file in the --output directory.") 

68 self.add_argument("--skip-pipeline", action="store_true", 

69 help="Do not run the AP pipeline itself. This argument is useful " 

70 "for testing metrics on a fixed data set.") 

71 self.add_argument("--clean-run", action="store_true", 

72 help="Run the pipeline with a new run collection, " 

73 "even if one already exists.") 

74 

75 class AppendOptional(argparse.Action): 

76 """A variant of the built-in "append" action that ignores None values 

77 instead of appending them. 

78 """ 

79 # This class can't safely inherit from the built-in "append" action 

80 # because there is no public class that implements it. 

81 def __call__(self, parser, namespace, values, option_string=None): 

82 if values is not None: 

83 try: 

84 allValues = getattr(namespace, self.dest) 

85 allValues.append(values) 

86 except AttributeError: 

87 setattr(namespace, self.dest, [values]) 

88 

89 

90def runApPipeGen2(workspace, parsedCmdLine, processes=1): 

91 """Run `ap_pipe` on this object's dataset. 

92 

93 Parameters 

94 ---------- 

95 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

96 The abstract location containing input and output repositories. 

97 parsedCmdLine : `argparse.Namespace` 

98 Command-line arguments, including all arguments supported by `ApPipeParser`. 

99 processes : `int` 

100 The number of processes with which to call the AP pipeline 

101 

102 Returns 

103 ------- 

104 apPipeReturn : `lsst.pipe.base.Struct` 

105 The `~lsst.pipe.base.Struct` returned from 

106 `~lsst.ap.pipe.ApPipeTask.parseAndRun` with 

107 ``doReturnResults=False``. This object is valid even if 

108 `~lsst.ap.pipe.ApPipeTask` was never run. 

109 """ 

110 log = _LOG.getChild('runApPipeGen2') 

111 

112 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

113 

114 pipelineArgs = [workspace.dataRepo, 

115 "--output", workspace.outputRepo, 

116 "--calib", workspace.calibRepo, 

117 "--template", workspace.templateRepo] 

118 pipelineArgs.extend(_getConfigArguments(workspace, parsedCmdLine)) 

119 if parsedCmdLine.dataIds: 

120 for singleId in parsedCmdLine.dataIds: 

121 pipelineArgs.extend(["--id", *singleId.split(" ")]) 

122 else: 

123 pipelineArgs.extend(["--id"]) 

124 pipelineArgs.extend(["--processes", str(processes)]) 

125 pipelineArgs.extend(["--noExit"]) 

126 

127 if not parsedCmdLine.skip_pipeline: 

128 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs) 

129 log.info('Pipeline complete') 

130 else: 

131 log.info('Skipping AP pipeline entirely.') 

132 apPipeParser = apPipe.ApPipeTask._makeArgumentParser() 

133 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs) 

134 results = pipeBase.Struct( 

135 argumentParser=apPipeParser, 

136 parsedCmd=apPipeParsed, 

137 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed), 

138 resultList=[], 

139 ) 

140 

141 return results 

142 

143 

144def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

145 """Run `ap_pipe` on this object's dataset. 

146 

147 Parameters 

148 ---------- 

149 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

150 The abstract location containing input and output repositories. 

151 parsedCmdLine : `argparse.Namespace` 

152 Command-line arguments, including all arguments supported by `ApPipeParser`. 

153 processes : `int` 

154 The number of processes with which to call the AP pipeline 

155 

156 Returns 

157 ------- 

158 code : `int` 

159 An error code that is zero if the pipeline ran without problems, or 

160 nonzero if there were errors. The exact meaning of nonzereo values 

161 is an implementation detail. 

162 """ 

163 log = _LOG.getChild('runApPipeGen3') 

164 

165 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

166 

167 pipelineFile = _getPipelineFile(workspace, parsedCmdLine) 

168 pipelineArgs = ["pipetask", "run", 

169 "--butler-config", workspace.repo, 

170 "--pipeline", pipelineFile, 

171 ] 

172 # TODO: workaround for inability to generate crosstalk sources in main 

173 # processing pipeline (DM-31492). 

174 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")} 

175 if "DECam" in instruments: 

176 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DarkEnergyCamera/RunIsrForCrosstalkSources.yaml" 

177 crosstalkArgs = ["pipetask", "run", 

178 "--butler-config", workspace.repo, 

179 "--pipeline", crosstalkPipeline, 

180 ] 

181 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

182 if parsedCmdLine.dataIds: 

183 for singleId in parsedCmdLine.dataIds: 

184 crosstalkArgs.extend(["--data-query", singleId]) 

185 crosstalkArgs.extend(["--processes", str(processes)]) 

186 crosstalkArgs.extend(["--register-dataset-types"]) 

187 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False) 

188 

189 # Force same output run for crosstalk and main processing. 

190 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True)) 

191 else: 

192 # TODO: collections should be determined exclusively by Workspace.workButler, 

193 # but I can't find a way to hook that up to the graph builder. So use the CLI 

194 # for now and revisit once DM-26239 is done. 

195 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

196 

197 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

198 if parsedCmdLine.dataIds: 

199 for singleId in parsedCmdLine.dataIds: 

200 pipelineArgs.extend(["--data-query", singleId]) 

201 pipelineArgs.extend(["--processes", str(processes)]) 

202 pipelineArgs.extend(["--register-dataset-types"]) 

203 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

204 

205 if not parsedCmdLine.skip_pipeline: 

206 # subprocess is an unsafe workaround for DM-26239 

207 # TODO: generalize this code in DM-26028 

208 # TODO: work off of workspace.workButler after DM-26239 

209 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

210 log.info('Pipeline complete.') 

211 return results.returncode 

212 else: 

213 log.info('Skipping AP pipeline entirely.') 

214 

215 

216def _getExecOrder(): 

217 """Return any constraints on the Gen 3 execution order. 

218 

219 The current constraints are that executions of DiaPipelineTask must be 

220 ordered by visit ID, but this is subject to change. 

221 

222 Returns 

223 ------- 

224 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

225 An object encoding the desired execution order as an algorithm for 

226 modifying inter-quantum dependencies. 

227 

228 Notes 

229 ----- 

230 This function must be importable, but need not be public. 

231 """ 

232 # Source association algorithm is not time-symmetric. Force execution of 

233 # association (through DiaPipelineTask) in order of ascending visit number. 

234 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

235 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

236 

237 

238def _getPipelineFile(workspace, parsed): 

239 """Return the config options for running make_apdb.py on this workspace, 

240 as command-line arguments. 

241 

242 Parameters 

243 ---------- 

244 workspace : `lsst.ap.verify.workspace.Workspace` 

245 A Workspace whose pipeline directory may contain an ApVerify pipeline. 

246 parsed : `argparse.Namespace` 

247 Command-line arguments, including all arguments supported by `ApPipeParser`. 

248 

249 Returns 

250 ------- 

251 pipeline : `str` 

252 The location of the pipeline file to use for running ap_verify. 

253 """ 

254 if parsed.pipeline: 

255 return parsed.pipeline 

256 else: 

257 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml") 

258 if os.path.exists(customPipeline): 

259 return customPipeline 

260 else: 

261 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml") 

262 

263 

264def _getApdbArguments(workspace, parsed): 

265 """Return the config options for running make_apdb.py on this workspace, 

266 as command-line arguments. 

267 

268 Parameters 

269 ---------- 

270 workspace : `lsst.ap.verify.workspace.Workspace` 

271 A Workspace whose config directory may contain an 

272 `~lsst.ap.pipe.ApPipeTask` config. 

273 parsed : `argparse.Namespace` 

274 Command-line arguments, including all arguments supported by `ApPipeParser`. 

275 

276 Returns 

277 ------- 

278 args : `list` of `str` 

279 Command-line arguments calling ``--config`` or ``--config-file``, 

280 following the conventions of `sys.argv`. 

281 """ 

282 if not parsed.db: 

283 parsed.db = "sqlite:///" + workspace.dbLocation 

284 

285 args = ["--config", "db_url=" + parsed.db] 

286 # Same special-case check as ApdbConfig.validate() 

287 if parsed.db.startswith("sqlite"): 

288 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"]) 

289 

290 return args 

291 

292 

293def _getConfigArguments(workspace, parsed): 

294 """Return the config options for running ApPipeTask on this workspace, as 

295 command-line arguments. 

296 

297 Parameters 

298 ---------- 

299 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

300 A Workspace whose config directory may contain an 

301 `~lsst.ap.pipe.ApPipeTask` config. 

302 parsed : `argparse.Namespace` 

303 Command-line arguments, including all arguments supported by `ApPipeParser`. 

304 

305 Returns 

306 ------- 

307 args : `list` of `str` 

308 Command-line arguments calling ``--config`` or ``--configfile``, 

309 following the conventions of `sys.argv`. 

310 """ 

311 overrideFile = apPipe.ApPipeTask._DefaultName + ".py" 

312 overridePath = os.path.join(workspace.configDir, overrideFile) 

313 

314 args = ["--configfile", overridePath] 

315 # Translate APDB-only arguments to work as a sub-config 

316 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg) 

317 for arg in _getApdbArguments(workspace, parsed)]) 

318 # Put output alerts into the workspace. 

319 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation]) 

320 args.extend(["--config", "diaPipe.doPackageAlerts=True"]) 

321 

322 return args 

323 

324 

325def _getConfigArgumentsGen3(workspace, parsed): 

326 """Return the config options for running the Gen 3 AP Pipeline on this 

327 workspace, as command-line arguments. 

328 

329 Parameters 

330 ---------- 

331 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

332 A Workspace whose config directory may contain various configs. 

333 parsed : `argparse.Namespace` 

334 Command-line arguments, including all arguments supported by `ApPipeParser`. 

335 

336 Returns 

337 ------- 

338 args : `list` of `str` 

339 Command-line arguments calling ``--config`` or ``--config-file``, 

340 following the conventions of `sys.argv`. 

341 """ 

342 # Translate APDB-only arguments to work as a sub-config 

343 args = [("diaPipe:apdb." + arg if arg != "--config" else arg) 

344 for arg in _getApdbArguments(workspace, parsed)] 

345 args.extend([ 

346 # Put output alerts into the workspace. 

347 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

348 ]) 

349 return args 

350 

351 

352def _getCollectionArguments(workspace, reuse): 

353 """Return the collections for running the Gen 3 AP Pipeline on this 

354 workspace, as command-line arguments. 

355 

356 Parameters 

357 ---------- 

358 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

359 A Workspace with a Gen 3 repository. 

360 reuse : `bool` 

361 If true, use the previous run collection if one exists. Otherwise, 

362 create a new run. 

363 

364 Returns 

365 ------- 

366 args : `list` of `str` 

367 Command-line arguments calling ``--input`` or ``--output``, 

368 following the conventions of `sys.argv`. 

369 """ 

370 # workspace.outputName is a chained collection containing all inputs 

371 args = ["--output", workspace.outputName, 

372 "--clobber-outputs", 

373 ] 

374 

375 registry = workspace.workButler.registry 

376 # Should refresh registry to see crosstalk run from DM-31492, but this 

377 # currently leads to a bug involving --skip-existing. The only downside of 

378 # the cached registry is that, with two runs for DECam datasets, a rerun of 

379 # ap_verify will re-run crosstalk sources in the second run. Using 

380 # skip-existing-in would work around that, but would lead to a worse bug in 

381 # the case that the user is alternating runs with and without --clean-run. 

382 # registry.refresh() 

383 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

384 if reuse and oldRuns: 

385 args.extend(["--extend-run", "--skip-existing"]) 

386 return args