Coverage for python/lsst/ap/verify/pipeline_driver.py: 21%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

99 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36 

37import lsst.log 

38from lsst.utils import getPackageDir 

39import lsst.pipe.base as pipeBase 

40import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

41import lsst.ctrl.mpexec.cli.pipetask 

42import lsst.ap.pipe as apPipe 

43from lsst.ap.pipe.make_apdb import makeApdb 

44 

45 

46class ApPipeParser(argparse.ArgumentParser): 

47 """An argument parser for data needed by ``ap_pipe`` activities. 

48 

49 This parser is not complete, and is designed to be passed to another parser 

50 using the `parent` parameter. 

51 """ 

52 

53 def __init__(self): 

54 defaultPipeline = os.path.join(getPackageDir("ap_verify"), "pipelines", "ApVerify.yaml") 

55 

56 # Help and documentation will be handled by main program's parser 

57 argparse.ArgumentParser.__init__(self, add_help=False) 

58 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

59 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

60 self.add_argument('--id', '-d', '--data-query', dest='dataIds', 

61 action=self.AppendOptional, nargs='?', default=[], 

62 help='An identifier for the data to process.') 

63 self.add_argument("-p", "--pipeline", default=defaultPipeline, 

64 help="A custom version of the ap_verify pipeline (e.g., with different metrics).") 

65 self.add_argument("--db", "--db_url", default=None, 

66 help="A location for the AP database, formatted as if for ApdbConfig.db_url. " 

67 "Defaults to an SQLite file in the --output directory.") 

68 self.add_argument("--skip-pipeline", action="store_true", 

69 help="Do not run the AP pipeline itself. This argument is useful " 

70 "for testing metrics on a fixed data set.") 

71 self.add_argument("--clean-run", action="store_true", 

72 help="Run the pipeline with a new run collection, " 

73 "even if one already exists.") 

74 

75 class AppendOptional(argparse.Action): 

76 """A variant of the built-in "append" action that ignores None values 

77 instead of appending them. 

78 """ 

79 # This class can't safely inherit from the built-in "append" action 

80 # because there is no public class that implements it. 

81 def __call__(self, parser, namespace, values, option_string=None): 

82 if values is not None: 

83 try: 

84 allValues = getattr(namespace, self.dest) 

85 allValues.append(values) 

86 except AttributeError: 

87 setattr(namespace, self.dest, [values]) 

88 

89 

90def runApPipeGen2(workspace, parsedCmdLine, processes=1): 

91 """Run `ap_pipe` on this object's dataset. 

92 

93 Parameters 

94 ---------- 

95 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

96 The abstract location containing input and output repositories. 

97 parsedCmdLine : `argparse.Namespace` 

98 Command-line arguments, including all arguments supported by `ApPipeParser`. 

99 processes : `int` 

100 The number of processes with which to call the AP pipeline 

101 

102 Returns 

103 ------- 

104 apPipeReturn : `lsst.pipe.base.Struct` 

105 The `~lsst.pipe.base.Struct` returned from 

106 `~lsst.ap.pipe.ApPipeTask.parseAndRun` with 

107 ``doReturnResults=False``. This object is valid even if 

108 `~lsst.ap.pipe.ApPipeTask` was never run. 

109 """ 

110 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen2') 

111 

112 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

113 

114 pipelineArgs = [workspace.dataRepo, 

115 "--output", workspace.outputRepo, 

116 "--calib", workspace.calibRepo, 

117 "--template", workspace.templateRepo] 

118 pipelineArgs.extend(_getConfigArguments(workspace, parsedCmdLine)) 

119 if parsedCmdLine.dataIds: 

120 for singleId in parsedCmdLine.dataIds: 

121 pipelineArgs.extend(["--id", *singleId.split(" ")]) 

122 else: 

123 pipelineArgs.extend(["--id"]) 

124 pipelineArgs.extend(["--processes", str(processes)]) 

125 pipelineArgs.extend(["--noExit"]) 

126 

127 if not parsedCmdLine.skip_pipeline: 

128 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs) 

129 log.info('Pipeline complete') 

130 else: 

131 log.info('Skipping AP pipeline entirely.') 

132 apPipeParser = apPipe.ApPipeTask._makeArgumentParser() 

133 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs) 

134 results = pipeBase.Struct( 

135 argumentParser=apPipeParser, 

136 parsedCmd=apPipeParsed, 

137 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed), 

138 resultList=[], 

139 ) 

140 

141 return results 

142 

143 

144def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

145 """Run `ap_pipe` on this object's dataset. 

146 

147 Parameters 

148 ---------- 

149 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

150 The abstract location containing input and output repositories. 

151 parsedCmdLine : `argparse.Namespace` 

152 Command-line arguments, including all arguments supported by `ApPipeParser`. 

153 processes : `int` 

154 The number of processes with which to call the AP pipeline 

155 

156 Returns 

157 ------- 

158 code : `int` 

159 An error code that is zero if the pipeline ran without problems, or 

160 nonzero if there were errors. The exact meaning of nonzereo values 

161 is an implementation detail. 

162 """ 

163 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen3') 

164 

165 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

166 

167 pipelineArgs = ["pipetask", "run", 

168 "--butler-config", workspace.repo, 

169 "--pipeline", parsedCmdLine.pipeline, 

170 ] 

171 # TODO: collections should be determined exclusively by Workspace.workButler, 

172 # but I can't find a way to hook that up to the graph builder. So use the CLI 

173 # for now and revisit once DM-26239 is done. 

174 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

175 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

176 if parsedCmdLine.dataIds: 

177 for singleId in parsedCmdLine.dataIds: 

178 pipelineArgs.extend(["--data-query", singleId]) 

179 pipelineArgs.extend(["--processes", str(processes)]) 

180 pipelineArgs.extend(["--register-dataset-types"]) 

181 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

182 

183 if not parsedCmdLine.skip_pipeline: 

184 # subprocess is an unsafe workaround for DM-26239 

185 # TODO: generalize this code in DM-26028 

186 # TODO: work off of workspace.workButler after DM-26239 

187 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

188 log.info('Pipeline complete.') 

189 return results.returncode 

190 else: 

191 log.info('Skipping AP pipeline entirely.') 

192 

193 

194def _getExecOrder(): 

195 """Return any constraints on the Gen 3 execution order. 

196 

197 The current constraints are that executions of DiaPipelineTask must be 

198 ordered by visit ID, but this is subject to change. 

199 

200 Returns 

201 ------- 

202 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

203 An object encoding the desired execution order as an algorithm for 

204 modifying inter-quantum dependencies. 

205 

206 Notes 

207 ----- 

208 This function must be importable, but need not be public. 

209 """ 

210 # Source association algorithm is not time-symmetric. Force execution of 

211 # association (through DiaPipelineTask) in order of ascending visit number. 

212 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

213 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

214 

215 

216def _getApdbArguments(workspace, parsed): 

217 """Return the config options for running make_apdb.py on this workspace, 

218 as command-line arguments. 

219 

220 Parameters 

221 ---------- 

222 workspace : `lsst.ap.verify.workspace.Workspace` 

223 A Workspace whose config directory may contain an 

224 `~lsst.ap.pipe.ApPipeTask` config. 

225 parsed : `argparse.Namespace` 

226 Command-line arguments, including all arguments supported by `ApPipeParser`. 

227 

228 Returns 

229 ------- 

230 args : `list` of `str` 

231 Command-line arguments calling ``--config`` or ``--config-file``, 

232 following the conventions of `sys.argv`. 

233 """ 

234 if not parsed.db: 

235 parsed.db = "sqlite:///" + workspace.dbLocation 

236 

237 args = ["--config", "db_url=" + parsed.db] 

238 # Same special-case check as ApdbConfig.validate() 

239 if parsed.db.startswith("sqlite"): 

240 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"]) 

241 

242 return args 

243 

244 

245def _getConfigArguments(workspace, parsed): 

246 """Return the config options for running ApPipeTask on this workspace, as 

247 command-line arguments. 

248 

249 Parameters 

250 ---------- 

251 workspace : `lsst.ap.verify.workspace.WorkspaceGen2` 

252 A Workspace whose config directory may contain an 

253 `~lsst.ap.pipe.ApPipeTask` config. 

254 parsed : `argparse.Namespace` 

255 Command-line arguments, including all arguments supported by `ApPipeParser`. 

256 

257 Returns 

258 ------- 

259 args : `list` of `str` 

260 Command-line arguments calling ``--config`` or ``--configfile``, 

261 following the conventions of `sys.argv`. 

262 """ 

263 overrideFile = apPipe.ApPipeTask._DefaultName + ".py" 

264 overridePath = os.path.join(workspace.configDir, overrideFile) 

265 

266 args = ["--configfile", overridePath] 

267 # Translate APDB-only arguments to work as a sub-config 

268 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg) 

269 for arg in _getApdbArguments(workspace, parsed)]) 

270 # Put output alerts into the workspace. 

271 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation]) 

272 args.extend(["--config", "diaPipe.doPackageAlerts=True"]) 

273 

274 return args 

275 

276 

277def _getConfigArgumentsGen3(workspace, parsed): 

278 """Return the config options for running the Gen 3 AP Pipeline on this 

279 workspace, as command-line arguments. 

280 

281 Parameters 

282 ---------- 

283 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

284 A Workspace whose config directory may contain various configs. 

285 parsed : `argparse.Namespace` 

286 Command-line arguments, including all arguments supported by `ApPipeParser`. 

287 

288 Returns 

289 ------- 

290 args : `list` of `str` 

291 Command-line arguments calling ``--config`` or ``--config-file``, 

292 following the conventions of `sys.argv`. 

293 """ 

294 # Translate APDB-only arguments to work as a sub-config 

295 args = [("diaPipe:apdb." + arg if arg != "--config" else arg) 

296 for arg in _getApdbArguments(workspace, parsed)] 

297 args.extend([ 

298 # Put output alerts into the workspace. 

299 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

300 "--config", "diaPipe:doPackageAlerts=True", 

301 # TODO: the configs below should not be needed after DM-26140 

302 "--config-file", "calibrate:" + os.path.join(workspace.configDir, "calibrate.py"), 

303 "--config-file", "imageDifference:" + os.path.join(workspace.configDir, "imageDifference.py"), 

304 ]) 

305 # TODO: this config should not be needed either after DM-26140 

306 if os.path.exists(os.path.join(workspace.configDir, "isr.py")): 

307 args.extend(["--config-file", "isr:" + os.path.join(workspace.configDir, "isr.py"), ]) 

308 # TODO: reverse-engineering the instrument should not be needed after DM-26140 

309 # pipetask will crash if there is more than one instrument 

310 for idRecord in workspace.workButler.registry.queryDataIds("instrument").expanded(): 

311 className = idRecord.records["instrument"].class_name 

312 args.extend(["--instrument", className]) 

313 

314 return args 

315 

316 

317def _getCollectionArguments(workspace, reuse): 

318 """Return the collections for running the Gen 3 AP Pipeline on this 

319 workspace, as command-line arguments. 

320 

321 Parameters 

322 ---------- 

323 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

324 A Workspace with a Gen 3 repository. 

325 reuse : `bool` 

326 If true, use the previous run collection if one exists. Otherwise, 

327 create a new run. 

328 

329 Returns 

330 ------- 

331 args : `list` of `str` 

332 Command-line arguments calling ``--input`` or ``--output``, 

333 following the conventions of `sys.argv`. 

334 """ 

335 # workspace.outputName is a chained collection containing all inputs 

336 args = ["--output", workspace.outputName, 

337 "--clobber-outputs", 

338 ] 

339 

340 registry = workspace.workButler.registry 

341 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

342 if reuse and oldRuns: 

343 args.extend(["--extend-run", "--skip-existing"]) 

344 return args