Coverage for python/lsst/ap/verify/pipeline_driver.py: 19%

76 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:44 -0800

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36import logging 

37 

38import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

39import lsst.ctrl.mpexec.cli.pipetask 

40from lsst.ap.pipe.make_apdb import makeApdb 

41 

42_LOG = logging.getLogger(__name__) 

43 

44 

45class ApPipeParser(argparse.ArgumentParser): 

46 """An argument parser for data needed by ``ap_pipe`` activities. 

47 

48 This parser is not complete, and is designed to be passed to another parser 

49 using the `parent` parameter. 

50 """ 

51 

52 def __init__(self): 

53 # Help and documentation will be handled by main program's parser 

54 argparse.ArgumentParser.__init__(self, add_help=False) 

55 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

56 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

57 self.add_argument('-d', '--data-query', dest='dataIds', action='append', default=[], 

58 help='An identifier for the data to process.') 

59 self.add_argument("-p", "--pipeline", default=None, 

60 help="A custom version of the ap_verify pipeline (e.g., with different metrics). " 

61 "Defaults to the ApVerify.yaml within --dataset.") 

62 self.add_argument("--db", "--db_url", default=None, 

63 help="A location for the AP database, formatted as if for ApdbConfig.db_url. " 

64 "Defaults to an SQLite file in the --output directory.") 

65 self.add_argument("--skip-pipeline", action="store_true", 

66 help="Do not run the AP pipeline itself. This argument is useful " 

67 "for testing metrics on a fixed data set.") 

68 self.add_argument("--clean-run", action="store_true", 

69 help="Run the pipeline with a new run collection, " 

70 "even if one already exists.") 

71 

72 

73def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

74 """Run `ap_pipe` on this object's dataset. 

75 

76 Parameters 

77 ---------- 

78 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

79 The abstract location containing input and output repositories. 

80 parsedCmdLine : `argparse.Namespace` 

81 Command-line arguments, including all arguments supported by `ApPipeParser`. 

82 processes : `int` 

83 The number of processes with which to call the AP pipeline 

84 

85 Returns 

86 ------- 

87 code : `int` 

88 An error code that is zero if the pipeline ran without problems, or 

89 nonzero if there were errors. The exact meaning of nonzereo values 

90 is an implementation detail. 

91 """ 

92 log = _LOG.getChild('runApPipeGen3') 

93 

94 makeApdb(_getApdbArguments(workspace, parsedCmdLine)) 

95 

96 pipelineFile = _getPipelineFile(workspace, parsedCmdLine) 

97 pipelineArgs = ["pipetask", "run", 

98 "--butler-config", workspace.repo, 

99 "--pipeline", pipelineFile, 

100 ] 

101 # TODO: workaround for inability to generate crosstalk sources in main 

102 # processing pipeline (DM-31492). 

103 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")} 

104 if "DECam" in instruments: 

105 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DarkEnergyCamera/RunIsrForCrosstalkSources.yaml" 

106 crosstalkArgs = ["pipetask", "run", 

107 "--butler-config", workspace.repo, 

108 "--pipeline", crosstalkPipeline, 

109 ] 

110 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

111 if parsedCmdLine.dataIds: 

112 for singleId in parsedCmdLine.dataIds: 

113 crosstalkArgs.extend(["--data-query", singleId]) 

114 crosstalkArgs.extend(["--processes", str(processes)]) 

115 crosstalkArgs.extend(["--register-dataset-types"]) 

116 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False) 

117 

118 # Force same output run for crosstalk and main processing. 

119 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True)) 

120 else: 

121 # TODO: collections should be determined exclusively by Workspace.workButler, 

122 # but I can't find a way to hook that up to the graph builder. So use the CLI 

123 # for now and revisit once DM-26239 is done. 

124 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

125 

126 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

127 if parsedCmdLine.dataIds: 

128 for singleId in parsedCmdLine.dataIds: 

129 pipelineArgs.extend(["--data-query", singleId]) 

130 pipelineArgs.extend(["--processes", str(processes)]) 

131 pipelineArgs.extend(["--register-dataset-types"]) 

132 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

133 

134 if not parsedCmdLine.skip_pipeline: 

135 # subprocess is an unsafe workaround for DM-26239 

136 # TODO: generalize this code in DM-26028 

137 # TODO: work off of workspace.workButler after DM-26239 

138 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

139 log.info('Pipeline complete.') 

140 return results.returncode 

141 else: 

142 log.info('Skipping AP pipeline entirely.') 

143 

144 

145def _getExecOrder(): 

146 """Return any constraints on the Gen 3 execution order. 

147 

148 The current constraints are that executions of DiaPipelineTask must be 

149 ordered by visit ID, but this is subject to change. 

150 

151 Returns 

152 ------- 

153 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

154 An object encoding the desired execution order as an algorithm for 

155 modifying inter-quantum dependencies. 

156 

157 Notes 

158 ----- 

159 This function must be importable, but need not be public. 

160 """ 

161 # Source association algorithm is not time-symmetric. Force execution of 

162 # association (through DiaPipelineTask) in order of ascending visit number. 

163 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

164 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

165 

166 

167def _getPipelineFile(workspace, parsed): 

168 """Return the config options for running make_apdb.py on this workspace, 

169 as command-line arguments. 

170 

171 Parameters 

172 ---------- 

173 workspace : `lsst.ap.verify.workspace.Workspace` 

174 A Workspace whose pipeline directory may contain an ApVerify pipeline. 

175 parsed : `argparse.Namespace` 

176 Command-line arguments, including all arguments supported by `ApPipeParser`. 

177 

178 Returns 

179 ------- 

180 pipeline : `str` 

181 The location of the pipeline file to use for running ap_verify. 

182 """ 

183 if parsed.pipeline: 

184 return parsed.pipeline 

185 else: 

186 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml") 

187 if os.path.exists(customPipeline): 

188 return customPipeline 

189 else: 

190 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml") 

191 

192 

193def _getApdbArguments(workspace, parsed): 

194 """Return the config options for running make_apdb.py on this workspace, 

195 as command-line arguments. 

196 

197 Parameters 

198 ---------- 

199 workspace : `lsst.ap.verify.workspace.Workspace` 

200 A Workspace whose config directory may contain an 

201 `~lsst.ap.pipe.ApPipeTask` config. 

202 parsed : `argparse.Namespace` 

203 Command-line arguments, including all arguments supported by `ApPipeParser`. 

204 

205 Returns 

206 ------- 

207 args : `list` of `str` 

208 Command-line arguments calling ``--config`` or ``--config-file``, 

209 following the conventions of `sys.argv`. 

210 """ 

211 if not parsed.db: 

212 parsed.db = "sqlite:///" + workspace.dbLocation 

213 

214 args = ["--config", "db_url=" + parsed.db] 

215 # Same special-case check as ApdbConfig.validate() 

216 if parsed.db.startswith("sqlite"): 

217 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"]) 

218 

219 return args 

220 

221 

222def _getConfigArgumentsGen3(workspace, parsed): 

223 """Return the config options for running the Gen 3 AP Pipeline on this 

224 workspace, as command-line arguments. 

225 

226 Parameters 

227 ---------- 

228 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

229 A Workspace whose config directory may contain various configs. 

230 parsed : `argparse.Namespace` 

231 Command-line arguments, including all arguments supported by `ApPipeParser`. 

232 

233 Returns 

234 ------- 

235 args : `list` of `str` 

236 Command-line arguments calling ``--config`` or ``--config-file``, 

237 following the conventions of `sys.argv`. 

238 """ 

239 # Translate APDB-only arguments to work as a sub-config 

240 args = [("diaPipe:apdb." + arg if arg != "--config" else arg) 

241 for arg in _getApdbArguments(workspace, parsed)] 

242 args.extend([ 

243 # Put output alerts into the workspace. 

244 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

245 ]) 

246 return args 

247 

248 

249def _getCollectionArguments(workspace, reuse): 

250 """Return the collections for running the Gen 3 AP Pipeline on this 

251 workspace, as command-line arguments. 

252 

253 Parameters 

254 ---------- 

255 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

256 A Workspace with a Gen 3 repository. 

257 reuse : `bool` 

258 If true, use the previous run collection if one exists. Otherwise, 

259 create a new run. 

260 

261 Returns 

262 ------- 

263 args : `list` of `str` 

264 Command-line arguments calling ``--input`` or ``--output``, 

265 following the conventions of `sys.argv`. 

266 """ 

267 # workspace.outputName is a chained collection containing all inputs 

268 args = ["--output", workspace.outputName, 

269 "--clobber-outputs", 

270 ] 

271 

272 registry = workspace.workButler.registry 

273 # Should refresh registry to see crosstalk run from DM-31492, but this 

274 # currently leads to a bug involving --skip-existing. The only downside of 

275 # the cached registry is that, with two runs for DECam datasets, a rerun of 

276 # ap_verify will re-run crosstalk sources in the second run. Using 

277 # skip-existing-in would work around that, but would lead to a worse bug in 

278 # the case that the user is alternating runs with and without --clean-run. 

279 # registry.refresh() 

280 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

281 if reuse and oldRuns: 

282 args.extend(["--extend-run", "--skip-existing"]) 

283 return args