Coverage for python/lsst/ap/verify/pipeline_driver.py: 19%

76 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-11 05:18 -0700

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24"""Interface between `ap_verify` and `ap_pipe`. 

25 

26This module handles calling `ap_pipe` and converting any information 

27as needed. 

28""" 

29 

30__all__ = ["ApPipeParser", "runApPipeGen3"] 

31 

32import argparse 

33import os 

34import re 

35import subprocess 

36import logging 

37 

38import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec 

39import lsst.dax.apdb as daxApdb 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class ApPipeParser(argparse.ArgumentParser): 

45 """An argument parser for data needed by ``ap_pipe`` activities. 

46 

47 This parser is not complete, and is designed to be passed to another parser 

48 using the `parent` parameter. 

49 """ 

50 

51 def __init__(self): 

52 # Help and documentation will be handled by main program's parser 

53 argparse.ArgumentParser.__init__(self, add_help=False) 

54 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs. 

55 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'. 

56 self.add_argument('-d', '--data-query', dest='dataIds', action='append', default=[], 

57 help='An identifier for the data to process.') 

58 self.add_argument("-p", "--pipeline", default=None, 

59 help="A custom version of the ap_verify pipeline (e.g., with different metrics). " 

60 "Defaults to the ApVerify.yaml within --dataset.") 

61 self.add_argument("--db", "--db_url", default=None, 

62 help="A location for the AP database, formatted as if for apdb-cli create-sql. " 

63 "Defaults to an SQLite file in the --output directory.") 

64 self.add_argument("--skip-pipeline", action="store_true", 

65 help="Do not run the AP pipeline itself. This argument is useful " 

66 "for testing metrics on a fixed data set.") 

67 self.add_argument("--clean-run", action="store_true", 

68 help="Run the pipeline with a new run collection, " 

69 "even if one already exists.") 

70 

71 

72def runApPipeGen3(workspace, parsedCmdLine, processes=1): 

73 """Run `ap_pipe` on this object's dataset. 

74 

75 Parameters 

76 ---------- 

77 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

78 The abstract location containing input and output repositories. 

79 parsedCmdLine : `argparse.Namespace` 

80 Command-line arguments, including all arguments supported by `ApPipeParser`. 

81 processes : `int` 

82 The number of processes with which to call the AP pipeline 

83 

84 Returns 

85 ------- 

86 code : `int` 

87 An error code that is zero if the pipeline ran without problems, or 

88 nonzero if there were errors. The exact meaning of nonzereo values 

89 is an implementation detail. 

90 """ 

91 log = _LOG.getChild('runApPipeGen3') 

92 

93 _makeApdb(workspace, _getApdbArguments(workspace, parsedCmdLine)) 

94 

95 pipelineFile = _getPipelineFile(workspace, parsedCmdLine) 

96 pipelineArgs = ["pipetask", "--long-log", "run", 

97 # fail-fast to ensure processing errors are obvious, and 

98 # to compensate for the extra interconnections added by 

99 # --graph-fixup (further down). 

100 "--fail-fast", 

101 "--butler-config", workspace.repo, 

102 "--pipeline", pipelineFile, 

103 ] 

104 # TODO: workaround for inability to generate crosstalk sources in main 

105 # processing pipeline (DM-31492). 

106 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")} 

107 if "DECam" in instruments: 

108 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DECam/RunIsrForCrosstalkSources.yaml" 

109 crosstalkArgs = ["pipetask", "run", 

110 "--butler-config", workspace.repo, 

111 "--pipeline", crosstalkPipeline, 

112 ] 

113 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

114 if parsedCmdLine.dataIds: 

115 for singleId in parsedCmdLine.dataIds: 

116 crosstalkArgs.extend(["--data-query", singleId]) 

117 crosstalkArgs.extend(["--processes", str(processes)]) 

118 crosstalkArgs.extend(["--register-dataset-types"]) 

119 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False) 

120 

121 # Force same output run for crosstalk and main processing. 

122 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True)) 

123 else: 

124 # TODO: collections should be determined exclusively by Workspace.workButler, 

125 # but I can't find a way to hook that up to the graph builder. So use the CLI 

126 # for now and revisit once DM-26239 is done. 

127 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run))) 

128 

129 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine)) 

130 if parsedCmdLine.dataIds: 

131 for singleId in parsedCmdLine.dataIds: 

132 pipelineArgs.extend(["--data-query", singleId]) 

133 pipelineArgs.extend(["--processes", str(processes)]) 

134 pipelineArgs.extend(["--register-dataset-types"]) 

135 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"]) 

136 

137 if not parsedCmdLine.skip_pipeline: 

138 # subprocess is an unsafe workaround for DM-26239 

139 # TODO: generalize this code in DM-26028 

140 # TODO: work off of workspace.workButler after DM-26239 

141 log.info("Running pipeline:") 

142 log.info(' '.join(pipelineArgs)) 

143 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False) 

144 log.info('Pipeline complete.') 

145 return results.returncode 

146 else: 

147 log.info('Skipping AP pipeline entirely.') 

148 

149 

150def _getExecOrder(): 

151 """Return any constraints on the Gen 3 execution order. 

152 

153 The current constraints are that executions of DiaPipelineTask must be 

154 ordered by visit ID, but this is subject to change. 

155 

156 Returns 

157 ------- 

158 order : `lsst.ctrl.mpexec.ExecutionGraphFixup` 

159 An object encoding the desired execution order as an algorithm for 

160 modifying inter-quantum dependencies. 

161 

162 Notes 

163 ----- 

164 This function must be importable, but need not be public. 

165 """ 

166 # Source association algorithm is not time-symmetric. Force execution of 

167 # association (through DiaPipelineTask) in order of ascending visit number. 

168 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId( 

169 taskLabel="diaPipe", dimensions=["visit", ], reverse=False) 

170 

171 

172def _getPipelineFile(workspace, parsed): 

173 """Return the config options for running make_apdb.py on this workspace, 

174 as command-line arguments. 

175 

176 Parameters 

177 ---------- 

178 workspace : `lsst.ap.verify.workspace.Workspace` 

179 A Workspace whose pipeline directory may contain an ApVerify pipeline. 

180 parsed : `argparse.Namespace` 

181 Command-line arguments, including all arguments supported by `ApPipeParser`. 

182 

183 Returns 

184 ------- 

185 pipeline : `str` 

186 The location of the pipeline file to use for running ap_verify. 

187 """ 

188 if parsed.pipeline: 

189 return parsed.pipeline 

190 else: 

191 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml") 

192 if os.path.exists(customPipeline): 

193 return customPipeline 

194 else: 

195 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml") 

196 

197 

198def _getApdbArguments(workspace, parsed): 

199 """Return the arguments for running apdb-cli create-sql on this workspace, 

200 as key-value pairs. 

201 

202 Parameters 

203 ---------- 

204 workspace : `lsst.ap.verify.workspace.Workspace` 

205 A Workspace whose config directory may contain an 

206 `~lsst.ap.pipe.ApPipeTask` config. 

207 parsed : `argparse.Namespace` 

208 Command-line arguments, including all arguments supported by `ApPipeParser`. 

209 

210 Returns 

211 ------- 

212 args : mapping [`str`] 

213 Arguments to `lsst.dax.apdb.sql.Apdb.init_database`. 

214 """ 

215 if not parsed.db: 

216 parsed.db = "sqlite:///" + workspace.dbLocation 

217 

218 args = {"db_url": parsed.db, 

219 } 

220 

221 return args 

222 

223 

224def _getConfigArgumentsGen3(workspace, parsed): 

225 """Return the config options for running the Gen 3 AP Pipeline on this 

226 workspace, as command-line arguments. 

227 

228 Parameters 

229 ---------- 

230 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

231 A Workspace whose config directory may contain various configs. 

232 parsed : `argparse.Namespace` 

233 Command-line arguments, including all arguments supported by `ApPipeParser`. 

234 

235 Returns 

236 ------- 

237 args : `list` of `str` 

238 Command-line arguments calling ``--config`` or ``--config-file``, 

239 following the conventions of `sys.argv`. 

240 """ 

241 return [ 

242 # APDB config should have been stored in the workspace. 

243 "--config", "parameters:apdb_config=" + workspace.dbConfigLocation, 

244 # Put output alerts into the workspace. 

245 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, 

246 ] 

247 

248 

249def _getCollectionArguments(workspace, reuse): 

250 """Return the collections for running the Gen 3 AP Pipeline on this 

251 workspace, as command-line arguments. 

252 

253 Parameters 

254 ---------- 

255 workspace : `lsst.ap.verify.workspace.WorkspaceGen3` 

256 A Workspace with a Gen 3 repository. 

257 reuse : `bool` 

258 If true, use the previous run collection if one exists. Otherwise, 

259 create a new run. 

260 

261 Returns 

262 ------- 

263 args : `list` of `str` 

264 Command-line arguments calling ``--input`` or ``--output``, 

265 following the conventions of `sys.argv`. 

266 """ 

267 # workspace.outputName is a chained collection containing all inputs 

268 args = ["--output", workspace.outputName, 

269 "--clobber-outputs", 

270 ] 

271 

272 registry = workspace.workButler.registry 

273 # Should refresh registry to see crosstalk run from DM-31492, but this 

274 # currently leads to a bug involving --skip-existing. The only downside of 

275 # the cached registry is that, with two runs for DECam datasets, a rerun of 

276 # ap_verify will re-run crosstalk sources in the second run. Using 

277 # skip-existing-in would work around that, but would lead to a worse bug in 

278 # the case that the user is alternating runs with and without --clean-run. 

279 # registry.refresh() 

280 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z"))) 

281 if reuse and oldRuns: 

282 args.extend(["--extend-run", "--skip-existing"]) 

283 return args 

284 

285 

286def _makeApdb(workspace, args): 

287 """Create an APDB and store its config for future use. 

288 

289 Parameters 

290 ---------- 

291 workspace : `lsst.ap.verify.workspace.Workspace` 

292 A Workspace in which to store the database config. 

293 args : mapping [`str`] 

294 Arguments to `lsst.dax.apdb.sql.Apdb.init_database`. 

295 """ 

296 config = daxApdb.ApdbSql.init_database(**args) 

297 config.save(workspace.dbConfigLocation)