Coverage for python/lsst/ap/verify/pipeline_driver.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

112 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36 

37import lsst.log 

38import lsst.pipe.base as pipeBase 

39import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

40import lsst.ctrl.mpexec.cli.pipetask 

41import lsst.ap.pipe as apPipe 

42from lsst.ap.pipe.make_apdb import makeApdb 

43 

44 

45class ApPipeParser(argparse.ArgumentParser): 

46 """An argument parser for data needed by ``ap_pipe`` activities. 

47 

48 This parser is not complete, and is designed to be passed to another parser 

49 using the `parent` parameter. 

50 """ 

51 

52 def __init__(self): 

53 # Help and documentation will be handled by main program's parser 

54 argparse.ArgumentParser.__init__(self, add_help=False) 

55 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

56 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

57 self.add_argument('--id', '-d', '--data-query', dest='dataIds', 

58 action=self.AppendOptional, nargs='?', default=[], 

59 help='An identifier for the data to process.') 

60 self.add_argument("-p", "--pipeline", default=None, 

61 help="A custom version of the ap_verify pipeline (e.g., with different metrics). " 

62 "Defaults to the ApVerify.yaml within --dataset.") 

63 self.add_argument("--db", "--db_url", default=None, 

64 help="A location for the AP database, formatted as if for ApdbConfig.db_url. " 

65 "Defaults to an SQLite file in the --output directory.") 

66 self.add_argument("--skip-pipeline", action="store_true", 

67 help="Do not run the AP pipeline itself. This argument is useful " 

68 "for testing metrics on a fixed data set.") 

69 self.add_argument("--clean-run", action="store_true", 

70 help="Run the pipeline with a new run collection, " 

71 "even if one already exists.") 

72 

73 class AppendOptional(argparse.Action): 

74 """A variant of the built-in "append" action that ignores None values 

75 instead of appending them. 

76 """ 

77 # This class can't safely inherit from the built-in "append" action 

78 # because there is no public class that implements it. 

79 def __call__(self, parser, namespace, values, option_string=None): 

80 if values is not None: 

81 try: 

82 allValues = getattr(namespace, self.dest) 

83 allValues.append(values) 

84 except AttributeError: 

85 setattr(namespace, self.dest, [values]) 

86 

87 

88def runApPipeGen2(workspace, parsedCmdLine, processes=1): 

89 """Run `ap_pipe` on this object's dataset. 

90 

91 Parameters 

92 ---------- 

93 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

94 The abstract location containing input and output repositories. 

95 parsedCmdLine : `argparse.Namespace` 

96 Command-line arguments, including all arguments supported by `ApPipeParser`. 

97 processes : `int` 

98 The number of processes with which to call the AP pipeline 

99 

100 Returns 

101 ------- 

102 apPipeReturn : `lsst.pipe.base.Struct` 

103 The `~lsst.pipe.base.Struct` returned from 

104 `~lsst.ap.pipe.ApPipeTask.parseAndRun` with 

105 ``doReturnResults=False``. This object is valid even if 

106 `~lsst.ap.pipe.ApPipeTask` was never run. 

107 """ 

108 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen2') 

109 

110 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

111 

112 pipelineArgs = [workspace.dataRepo, 

113 "--output", workspace.outputRepo, 

114 "--calib", workspace.calibRepo, 

115 "--template", workspace.templateRepo] 

116 pipelineArgs.extend(_getConfigArguments(workspace, parsedCmdLine)) 

117 if parsedCmdLine.dataIds: 

118 for singleId in parsedCmdLine.dataIds: 

119 pipelineArgs.extend(["--id", *singleId.split(" ")]) 

120 else: 

121 pipelineArgs.extend(["--id"]) 

122 pipelineArgs.extend(["--processes", str(processes)]) 

123 pipelineArgs.extend(["--noExit"]) 

124 

125 if not parsedCmdLine.skip_pipeline: 

126 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs) 

127 log.info('Pipeline complete') 

128 else: 

129 log.info('Skipping AP pipeline entirely.') 

130 apPipeParser = apPipe.ApPipeTask._makeArgumentParser() 

131 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs) 

132 results = pipeBase.Struct( 

133 argumentParser=apPipeParser, 

134 parsedCmd=apPipeParsed, 

135 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed), 

136 resultList=[], 

137 ) 

138 

139 return results 

140 

141 

142def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

143 """Run `ap_pipe` on this object's dataset. 

144 

145 Parameters 

146 ---------- 

147 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

148 The abstract location containing input and output repositories. 

149 parsedCmdLine : `argparse.Namespace` 

150 Command-line arguments, including all arguments supported by `ApPipeParser`. 

151 processes : `int` 

152 The number of processes with which to call the AP pipeline 

153 

154 Returns 

155 ------- 

156 code : `int` 

157 An error code that is zero if the pipeline ran without problems, or 

158 nonzero if there were errors. The exact meaning of nonzereo values 

159 is an implementation detail. 

160 """ 

161 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen3') 

162 

163 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

164 

165 pipelineFile = _getPipelineFile(workspace, parsedCmdLine) 

166 pipelineArgs = ["pipetask", "run", 

167 "--butler-config", workspace.repo, 

168 "--pipeline", pipelineFile, 

169 ] 

170 # TODO: workaround for inability to generate crosstalk sources in main 

171 # processing pipeline (DM-31492). 

172 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")} 

173 if "DECam" in instruments: 

174 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DarkEnergyCamera/RunIsrForCrosstalkSources.yaml" 

175 crosstalkArgs = ["pipetask", "run", 

176 "--butler-config", workspace.repo, 

177 "--pipeline", crosstalkPipeline, 

178 ] 

179 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

180 if parsedCmdLine.dataIds: 

181 for singleId in parsedCmdLine.dataIds: 

182 crosstalkArgs.extend(["--data-query", singleId]) 

183 crosstalkArgs.extend(["--processes", str(processes)]) 

184 crosstalkArgs.extend(["--register-dataset-types"]) 

185 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False) 

186 

187 # Force same output run for crosstalk and main processing. 

188 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True)) 

189 else: 

190 # TODO: collections should be determined exclusively by Workspace.workButler, 

191 # but I can't find a way to hook that up to the graph builder. So use the CLI 

192 # for now and revisit once DM-26239 is done. 

193 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

194 

195 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

196 if parsedCmdLine.dataIds: 

197 for singleId in parsedCmdLine.dataIds: 

198 pipelineArgs.extend(["--data-query", singleId]) 

199 pipelineArgs.extend(["--processes", str(processes)]) 

200 pipelineArgs.extend(["--register-dataset-types"]) 

201 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

202 

203 if not parsedCmdLine.skip_pipeline: 

204 # subprocess is an unsafe workaround for DM-26239 

205 # TODO: generalize this code in DM-26028 

206 # TODO: work off of workspace.workButler after DM-26239 

207 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

208 log.info('Pipeline complete.') 

209 return results.returncode 

210 else: 

211 log.info('Skipping AP pipeline entirely.') 

212 

213 

214def _getExecOrder(): 

215 """Return any constraints on the Gen 3 execution order. 

216 

217 The current constraints are that executions of DiaPipelineTask must be 

218 ordered by visit ID, but this is subject to change. 

219 

220 Returns 

221 ------- 

222 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

223 An object encoding the desired execution order as an algorithm for 

224 modifying inter-quantum dependencies. 

225 

226 Notes 

227 ----- 

228 This function must be importable, but need not be public. 

229 """ 

230 # Source association algorithm is not time-symmetric. Force execution of 

231 # association (through DiaPipelineTask) in order of ascending visit number. 

232 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

233 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

234 

235 

236def _getPipelineFile(workspace, parsed): 

237 """Return the config options for running make_apdb.py on this workspace, 

238 as command-line arguments. 

239 

240 Parameters 

241 ---------- 

242 workspace : `lsst.ap.verify.workspace.Workspace` 

243 A Workspace whose pipeline directory may contain an ApVerify pipeline. 

244 parsed : `argparse.Namespace` 

245 Command-line arguments, including all arguments supported by `ApPipeParser`. 

246 

247 Returns 

248 ------- 

249 pipeline : `str` 

250 The location of the pipeline file to use for running ap_verify. 

251 """ 

252 if parsed.pipeline: 

253 return parsed.pipeline 

254 else: 

255 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml") 

256 if os.path.exists(customPipeline): 

257 return customPipeline 

258 else: 

259 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml") 

260 

261 

262def _getApdbArguments(workspace, parsed): 

263 """Return the config options for running make_apdb.py on this workspace, 

264 as command-line arguments. 

265 

266 Parameters 

267 ---------- 

268 workspace : `lsst.ap.verify.workspace.Workspace` 

269 A Workspace whose config directory may contain an 

270 `~lsst.ap.pipe.ApPipeTask` config. 

271 parsed : `argparse.Namespace` 

272 Command-line arguments, including all arguments supported by `ApPipeParser`. 

273 

274 Returns 

275 ------- 

276 args : `list` of `str` 

277 Command-line arguments calling ``--config`` or ``--config-file``, 

278 following the conventions of `sys.argv`. 

279 """ 

280 if not parsed.db: 

281 parsed.db = "sqlite:///" + workspace.dbLocation 

282 

283 args = ["--config", "db_url=" + parsed.db] 

284 # Same special-case check as ApdbConfig.validate() 

285 if parsed.db.startswith("sqlite"): 

286 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"]) 

287 

288 return args 

289 

290 

291def _getConfigArguments(workspace, parsed): 

292 """Return the config options for running ApPipeTask on this workspace, as 

293 command-line arguments. 

294 

295 Parameters 

296 ---------- 

297 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

298 A Workspace whose config directory may contain an 

299 `~lsst.ap.pipe.ApPipeTask` config. 

300 parsed : `argparse.Namespace` 

301 Command-line arguments, including all arguments supported by `ApPipeParser`. 

302 

303 Returns 

304 ------- 

305 args : `list` of `str` 

306 Command-line arguments calling ``--config`` or ``--configfile``, 

307 following the conventions of `sys.argv`. 

308 """ 

309 overrideFile = apPipe.ApPipeTask._DefaultName + ".py" 

310 overridePath = os.path.join(workspace.configDir, overrideFile) 

311 

312 args = ["--configfile", overridePath] 

313 # Translate APDB-only arguments to work as a sub-config 

314 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg) 

315 for arg in _getApdbArguments(workspace, parsed)]) 

316 # Put output alerts into the workspace. 

317 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation]) 

318 args.extend(["--config", "diaPipe.doPackageAlerts=True"]) 

319 

320 return args 

321 

322 

323def _getConfigArgumentsGen3(workspace, parsed): 

324 """Return the config options for running the Gen 3 AP Pipeline on this 

325 workspace, as command-line arguments. 

326 

327 Parameters 

328 ---------- 

329 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

330 A Workspace whose config directory may contain various configs. 

331 parsed : `argparse.Namespace` 

332 Command-line arguments, including all arguments supported by `ApPipeParser`. 

333 

334 Returns 

335 ------- 

336 args : `list` of `str` 

337 Command-line arguments calling ``--config`` or ``--config-file``, 

338 following the conventions of `sys.argv`. 

339 """ 

340 # Translate APDB-only arguments to work as a sub-config 

341 args = [("diaPipe:apdb." + arg if arg != "--config" else arg) 

342 for arg in _getApdbArguments(workspace, parsed)] 

343 args.extend([ 

344 # Put output alerts into the workspace. 

345 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

346 ]) 

347 return args 

348 

349 

350def _getCollectionArguments(workspace, reuse): 

351 """Return the collections for running the Gen 3 AP Pipeline on this 

352 workspace, as command-line arguments. 

353 

354 Parameters 

355 ---------- 

356 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

357 A Workspace with a Gen 3 repository. 

358 reuse : `bool` 

359 If true, use the previous run collection if one exists. Otherwise, 

360 create a new run. 

361 

362 Returns 

363 ------- 

364 args : `list` of `str` 

365 Command-line arguments calling ``--input`` or ``--output``, 

366 following the conventions of `sys.argv`. 

367 """ 

368 # workspace.outputName is a chained collection containing all inputs 

369 args = ["--output", workspace.outputName, 

370 "--clobber-outputs", 

371 ] 

372 

373 registry = workspace.workButler.registry 

374 # Should refresh registry to see crosstalk run from DM-31492, but this 

375 # currently leads to a bug involving --skip-existing. The only downside of 

376 # the cached registry is that, with two runs for DECam datasets, a rerun of 

377 # ap_verify will re-run crosstalk sources in the second run. Using 

378 # skip-existing-in would work around that, but would lead to a worse bug in 

379 # the case that the user is alternating runs with and without --clean-run. 

380 # registry.refresh() 

381 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

382 if reuse and oldRuns: 

383 args.extend(["--extend-run", "--skip-existing"]) 

384 return args