Coverage for python/lsst/ap/verify/pipeline_driver.py: 19%

76 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-29 04:45 -0700

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36import logging 

37 

38import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

39import lsst.ctrl.mpexec.cli.pipetask 

40from lsst.ap.pipe.make_apdb import makeApdb 

41 

42_LOG = logging.getLogger(__name__) 

43 

44 

45class ApPipeParser(argparse.ArgumentParser): 

46 """An argument parser for data needed by ``ap_pipe`` activities. 

47 

48 This parser is not complete, and is designed to be passed to another parser 

49 using the `parent` parameter. 

50 """ 

51 

52 def __init__(self): 

53 # Help and documentation will be handled by main program's parser 

54 argparse.ArgumentParser.__init__(self, add_help=False) 

55 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

56 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

57 self.add_argument('-d', '--data-query', dest='dataIds', action='append', default=[], 

58 help='An identifier for the data to process.') 

59 self.add_argument("-p", "--pipeline", default=None, 

60 help="A custom version of the ap_verify pipeline (e.g., with different metrics). " 

61 "Defaults to the ApVerify.yaml within --dataset.") 

62 self.add_argument("--db", "--db_url", default=None, 

63 help="A location for the AP database, formatted as if for ApdbConfig.db_url. " 

64 "Defaults to an SQLite file in the --output directory.") 

65 self.add_argument("--skip-pipeline", action="store_true", 

66 help="Do not run the AP pipeline itself. This argument is useful " 

67 "for testing metrics on a fixed data set.") 

68 self.add_argument("--clean-run", action="store_true", 

69 help="Run the pipeline with a new run collection, " 

70 "even if one already exists.") 

71 

72 

73def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

74 """Run `ap_pipe` on this object's dataset. 

75 

76 Parameters 

77 ---------- 

78 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

79 The abstract location containing input and output repositories. 

80 parsedCmdLine : `argparse.Namespace` 

81 Command-line arguments, including all arguments supported by `ApPipeParser`. 

82 processes : `int` 

83 The number of processes with which to call the AP pipeline 

84 

85 Returns 

86 ------- 

87 code : `int` 

88 An error code that is zero if the pipeline ran without problems, or 

89 nonzero if there were errors. The exact meaning of nonzereo values 

90 is an implementation detail. 

91 """ 

92 log = _LOG.getChild('runApPipeGen3') 

93 

94 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

95 

96 pipelineFile = _getPipelineFile(workspace, parsedCmdLine) 

97 pipelineArgs = ["pipetask", "--long-log", "run", 

98 # fail-fast to ensure processing errors are obvious, and 

99 # to compensate for the extra interconnections added by 

100 # --graph-fixup (further down). 

101 "--fail-fast", 

102 "--butler-config", workspace.repo, 

103 "--pipeline", pipelineFile, 

104 ] 

105 # TODO: workaround for inability to generate crosstalk sources in main 

106 # processing pipeline (DM-31492). 

107 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")} 

108 if "DECam" in instruments: 

109 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DECam/RunIsrForCrosstalkSources.yaml" 

110 crosstalkArgs = ["pipetask", "run", 

111 "--butler-config", workspace.repo, 

112 "--pipeline", crosstalkPipeline, 

113 ] 

114 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

115 if parsedCmdLine.dataIds: 

116 for singleId in parsedCmdLine.dataIds: 

117 crosstalkArgs.extend(["--data-query", singleId]) 

118 crosstalkArgs.extend(["--processes", str(processes)]) 

119 crosstalkArgs.extend(["--register-dataset-types"]) 

120 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False) 

121 

122 # Force same output run for crosstalk and main processing. 

123 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True)) 

124 else: 

125 # TODO: collections should be determined exclusively by Workspace.workButler, 

126 # but I can't find a way to hook that up to the graph builder. So use the CLI 

127 # for now and revisit once DM-26239 is done. 

128 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

129 

130 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

131 if parsedCmdLine.dataIds: 

132 for singleId in parsedCmdLine.dataIds: 

133 pipelineArgs.extend(["--data-query", singleId]) 

134 pipelineArgs.extend(["--processes", str(processes)]) 

135 pipelineArgs.extend(["--register-dataset-types"]) 

136 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

137 

138 if not parsedCmdLine.skip_pipeline: 

139 # subprocess is an unsafe workaround for DM-26239 

140 # TODO: generalize this code in DM-26028 

141 # TODO: work off of workspace.workButler after DM-26239 

142 log.info("Running pipeline:") 

143 log.info(' '.join(pipelineArgs)) 

144 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

145 log.info('Pipeline complete.') 

146 return results.returncode 

147 else: 

148 log.info('Skipping AP pipeline entirely.') 

149 

150 

151def _getExecOrder(): 

152 """Return any constraints on the Gen 3 execution order. 

153 

154 The current constraints are that executions of DiaPipelineTask must be 

155 ordered by visit ID, but this is subject to change. 

156 

157 Returns 

158 ------- 

159 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

160 An object encoding the desired execution order as an algorithm for 

161 modifying inter-quantum dependencies. 

162 

163 Notes 

164 ----- 

165 This function must be importable, but need not be public. 

166 """ 

167 # Source association algorithm is not time-symmetric. Force execution of 

168 # association (through DiaPipelineTask) in order of ascending visit number. 

169 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

170 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

171 

172 

173def _getPipelineFile(workspace, parsed): 

174 """Return the config options for running make_apdb.py on this workspace, 

175 as command-line arguments. 

176 

177 Parameters 

178 ---------- 

179 workspace : `lsst.ap.verify.workspace.Workspace` 

180 A Workspace whose pipeline directory may contain an ApVerify pipeline. 

181 parsed : `argparse.Namespace` 

182 Command-line arguments, including all arguments supported by `ApPipeParser`. 

183 

184 Returns 

185 ------- 

186 pipeline : `str` 

187 The location of the pipeline file to use for running ap_verify. 

188 """ 

189 if parsed.pipeline: 

190 return parsed.pipeline 

191 else: 

192 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml") 

193 if os.path.exists(customPipeline): 

194 return customPipeline 

195 else: 

196 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml") 

197 

198 

199def _getApdbArguments(workspace, parsed): 

200 """Return the config options for running make_apdb.py on this workspace, 

201 as command-line arguments. 

202 

203 Parameters 

204 ---------- 

205 workspace : `lsst.ap.verify.workspace.Workspace` 

206 A Workspace whose config directory may contain an 

207 `~lsst.ap.pipe.ApPipeTask` config. 

208 parsed : `argparse.Namespace` 

209 Command-line arguments, including all arguments supported by `ApPipeParser`. 

210 

211 Returns 

212 ------- 

213 args : `list` of `str` 

214 Command-line arguments calling ``--config`` or ``--config-file``, 

215 following the conventions of `sys.argv`. 

216 """ 

217 if not parsed.db: 

218 parsed.db = "sqlite:///" + workspace.dbLocation 

219 

220 args = ["--config", "db_url=" + parsed.db] 

221 

222 return args 

223 

224 

225def _getConfigArgumentsGen3(workspace, parsed): 

226 """Return the config options for running the Gen 3 AP Pipeline on this 

227 workspace, as command-line arguments. 

228 

229 Parameters 

230 ---------- 

231 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

232 A Workspace whose config directory may contain various configs. 

233 parsed : `argparse.Namespace` 

234 Command-line arguments, including all arguments supported by `ApPipeParser`. 

235 

236 Returns 

237 ------- 

238 args : `list` of `str` 

239 Command-line arguments calling ``--config`` or ``--config-file``, 

240 following the conventions of `sys.argv`. 

241 """ 

242 # Translate APDB-only arguments to work as a sub-config 

243 args = [("diaPipe:apdb." + arg if arg != "--config" else arg) 

244 for arg in _getApdbArguments(workspace, parsed)] 

245 args.extend([ 

246 # Put output alerts into the workspace. 

247 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

248 ]) 

249 return args 

250 

251 

252def _getCollectionArguments(workspace, reuse): 

253 """Return the collections for running the Gen 3 AP Pipeline on this 

254 workspace, as command-line arguments. 

255 

256 Parameters 

257 ---------- 

258 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

259 A Workspace with a Gen 3 repository. 

260 reuse : `bool` 

261 If true, use the previous run collection if one exists. Otherwise, 

262 create a new run. 

263 

264 Returns 

265 ------- 

266 args : `list` of `str` 

267 Command-line arguments calling ``--input`` or ``--output``, 

268 following the conventions of `sys.argv`. 

269 """ 

270 # workspace.outputName is a chained collection containing all inputs 

271 args = ["--output", workspace.outputName, 

272 "--clobber-outputs", 

273 ] 

274 

275 registry = workspace.workButler.registry 

276 # Should refresh registry to see crosstalk run from DM-31492, but this 

277 # currently leads to a bug involving --skip-existing. The only downside of 

278 # the cached registry is that, with two runs for DECam datasets, a rerun of 

279 # ap_verify will re-run crosstalk sources in the second run. Using 

280 # skip-existing-in would work around that, but would lead to a worse bug in 

281 # the case that the user is alternating runs with and without --clean-run. 

282 # registry.refresh() 

283 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

284 if reuse and oldRuns: 

285 args.extend(["--extend-run", "--skip-existing"]) 

286 return args