Coverage for python/lsst/ap/verify/pipeline_driver.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24"""Interface between `ap_verify` and `ap_pipe`.
26This module handles calling `ap_pipe` and converting any information
27as needed.
28"""
30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"]
32import argparse
33import os
34import re
36import click.testing
38import lsst.log
39from lsst.utils import getPackageDir
40import lsst.pipe.base as pipeBase
41import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec
42import lsst.ctrl.mpexec.cli.pipetask
43import lsst.ap.pipe as apPipe
44from lsst.ap.pipe.make_apdb import makeApdb
47class ApPipeParser(argparse.ArgumentParser):
48 """An argument parser for data needed by ``ap_pipe`` activities.
50 This parser is not complete, and is designed to be passed to another parser
51 using the `parent` parameter.
52 """
54 def __init__(self):
55 defaultPipeline = os.path.join(getPackageDir("ap_verify"), "pipelines", "ApVerify.yaml")
57 # Help and documentation will be handled by main program's parser
58 argparse.ArgumentParser.__init__(self, add_help=False)
59 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs.
60 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'.
61 self.add_argument('--id', '-d', '--data-query', dest='dataIds',
62 action=self.AppendOptional, nargs='?', default=[],
63 help='An identifier for the data to process.')
64 self.add_argument("-p", "--pipeline", default=defaultPipeline,
65 help="A custom version of the ap_verify pipeline (e.g., with different metrics).")
66 self.add_argument("--db", "--db_url", default=None,
67 help="A location for the AP database, formatted as if for ApdbConfig.db_url. "
68 "Defaults to an SQLite file in the --output directory.")
69 self.add_argument("--skip-pipeline", action="store_true",
70 help="Do not run the AP pipeline itself. This argument is useful "
71 "for testing metrics on a fixed data set.")
72 self.add_argument("--clean-run", action="store_true",
73 help="Run the pipeline with a new run collection, "
74 "even if one already exists.")
76 class AppendOptional(argparse.Action):
77 """A variant of the built-in "append" action that ignores None values
78 instead of appending them.
79 """
80 # This class can't safely inherit from the built-in "append" action
81 # because there is no public class that implements it.
82 def __call__(self, parser, namespace, values, option_string=None):
83 if values is not None:
84 try:
85 allValues = getattr(namespace, self.dest)
86 allValues.append(values)
87 except AttributeError:
88 setattr(namespace, self.dest, [values])
91def runApPipeGen2(workspace, parsedCmdLine, processes=1):
92 """Run `ap_pipe` on this object's dataset.
94 Parameters
95 ----------
96 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
97 The abstract location containing input and output repositories.
98 parsedCmdLine : `argparse.Namespace`
99 Command-line arguments, including all arguments supported by `ApPipeParser`.
100 processes : `int`
101 The number of processes with which to call the AP pipeline
103 Returns
104 -------
105 apPipeReturn : `lsst.pipe.base.Struct`
106 The `~lsst.pipe.base.Struct` returned from
107 `~lsst.ap.pipe.ApPipeTask.parseAndRun` with
108 ``doReturnResults=False``. This object is valid even if
109 `~lsst.ap.pipe.ApPipeTask` was never run.
110 """
111 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen2')
113 makeApdb(_getApdbArguments(workspace, parsedCmdLine))
115 pipelineArgs = [workspace.dataRepo,
116 "--output", workspace.outputRepo,
117 "--calib", workspace.calibRepo,
118 "--template", workspace.templateRepo]
119 pipelineArgs.extend(_getConfigArguments(workspace, parsedCmdLine))
120 if parsedCmdLine.dataIds:
121 for singleId in parsedCmdLine.dataIds:
122 pipelineArgs.extend(["--id", *singleId.split(" ")])
123 else:
124 pipelineArgs.extend(["--id"])
125 pipelineArgs.extend(["--processes", str(processes)])
126 pipelineArgs.extend(["--noExit"])
128 if not parsedCmdLine.skip_pipeline:
129 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs)
130 log.info('Pipeline complete')
131 else:
132 log.info('Skipping AP pipeline entirely.')
133 apPipeParser = apPipe.ApPipeTask._makeArgumentParser()
134 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs)
135 results = pipeBase.Struct(
136 argumentParser=apPipeParser,
137 parsedCmd=apPipeParsed,
138 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed),
139 resultList=[],
140 )
142 return results
145def runApPipeGen3(workspace, parsedCmdLine, processes=1):
146 """Run `ap_pipe` on this object's dataset.
148 Parameters
149 ----------
150 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
151 The abstract location containing input and output repositories.
152 parsedCmdLine : `argparse.Namespace`
153 Command-line arguments, including all arguments supported by `ApPipeParser`.
154 processes : `int`
155 The number of processes with which to call the AP pipeline
156 """
157 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen3')
159 makeApdb(_getApdbArguments(workspace, parsedCmdLine))
161 pipelineArgs = ["run",
162 "--butler-config", workspace.repo,
163 "--pipeline", parsedCmdLine.pipeline,
164 ]
165 # TODO: collections should be determined exclusively by Workspace.workButler,
166 # but I can't find a way to hook that up to the graph builder. So use the CLI
167 # for now and revisit once DM-26239 is done.
168 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
169 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine))
170 if parsedCmdLine.dataIds:
171 for singleId in parsedCmdLine.dataIds:
172 pipelineArgs.extend(["--data-query", singleId])
173 pipelineArgs.extend(["--processes", str(processes)])
174 pipelineArgs.extend(["--register-dataset-types"])
175 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"])
177 if not parsedCmdLine.skip_pipeline:
178 # CliRunner is an unsafe workaround for DM-26239
179 runner = click.testing.CliRunner()
180 # TODO: generalize this code in DM-26028
181 # TODO: work off of workspace.workButler after DM-26239
182 results = runner.invoke(lsst.ctrl.mpexec.cli.pipetask.cli, pipelineArgs)
183 if results.exception:
184 raise RuntimeError("Pipeline failed.") from results.exception
186 log.info('Pipeline complete.')
187 return results.exit_code
188 else:
189 log.info('Skipping AP pipeline entirely.')
192def _getExecOrder():
193 """Return any constraints on the Gen 3 execution order.
195 The current constraints are that executions of DiaPipelineTask must be
196 ordered by visit ID, but this is subject to change.
198 Returns
199 -------
200 order : `lsst.ctrl.mpexec.ExecutionGraphFixup`
201 An object encoding the desired execution order as an algorithm for
202 modifying inter-quantum dependencies.
204 Notes
205 -----
206 This function must be importable, but need not be public.
207 """
208 # Source association algorithm is not time-symmetric. Force execution of
209 # association (through DiaPipelineTask) in order of ascending visit number.
210 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId(
211 taskLabel="diaPipe", dimensions=["visit", ], reverse=False)
214def _getApdbArguments(workspace, parsed):
215 """Return the config options for running make_apdb.py on this workspace,
216 as command-line arguments.
218 Parameters
219 ----------
220 workspace : `lsst.ap.verify.workspace.Workspace`
221 A Workspace whose config directory may contain an
222 `~lsst.ap.pipe.ApPipeTask` config.
223 parsed : `argparse.Namespace`
224 Command-line arguments, including all arguments supported by `ApPipeParser`.
226 Returns
227 -------
228 args : `list` of `str`
229 Command-line arguments calling ``--config`` or ``--config-file``,
230 following the conventions of `sys.argv`.
231 """
232 if not parsed.db:
233 parsed.db = "sqlite:///" + workspace.dbLocation
235 args = ["--config", "db_url=" + parsed.db]
236 # Same special-case check as ApdbConfig.validate()
237 if parsed.db.startswith("sqlite"):
238 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"])
240 return args
243def _getConfigArguments(workspace, parsed):
244 """Return the config options for running ApPipeTask on this workspace, as
245 command-line arguments.
247 Parameters
248 ----------
249 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
250 A Workspace whose config directory may contain an
251 `~lsst.ap.pipe.ApPipeTask` config.
252 parsed : `argparse.Namespace`
253 Command-line arguments, including all arguments supported by `ApPipeParser`.
255 Returns
256 -------
257 args : `list` of `str`
258 Command-line arguments calling ``--config`` or ``--configfile``,
259 following the conventions of `sys.argv`.
260 """
261 overrideFile = apPipe.ApPipeTask._DefaultName + ".py"
262 overridePath = os.path.join(workspace.configDir, overrideFile)
264 args = ["--configfile", overridePath]
265 # Translate APDB-only arguments to work as a sub-config
266 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg)
267 for arg in _getApdbArguments(workspace, parsed)])
268 # Put output alerts into the workspace.
269 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation])
270 args.extend(["--config", "diaPipe.doPackageAlerts=True"])
272 return args
275def _getConfigArgumentsGen3(workspace, parsed):
276 """Return the config options for running the Gen 3 AP Pipeline on this
277 workspace, as command-line arguments.
279 Parameters
280 ----------
281 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
282 A Workspace whose config directory may contain various configs.
283 parsed : `argparse.Namespace`
284 Command-line arguments, including all arguments supported by `ApPipeParser`.
286 Returns
287 -------
288 args : `list` of `str`
289 Command-line arguments calling ``--config`` or ``--config-file``,
290 following the conventions of `sys.argv`.
291 """
292 # Translate APDB-only arguments to work as a sub-config
293 args = [("diaPipe:apdb." + arg if arg != "--config" else arg)
294 for arg in _getApdbArguments(workspace, parsed)]
295 args.extend([
296 # Put output alerts into the workspace.
297 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation,
298 "--config", "diaPipe:doPackageAlerts=True",
299 # TODO: the configs below should not be needed after DM-26140
300 "--config-file", "calibrate:" + os.path.join(workspace.configDir, "calibrate.py"),
301 "--config-file", "imageDifference:" + os.path.join(workspace.configDir, "imageDifference.py"),
302 ])
303 # TODO: this config should not be needed either after DM-26140
304 if os.path.exists(os.path.join(workspace.configDir, "isr.py")):
305 args.extend(["--config-file", "isr:" + os.path.join(workspace.configDir, "isr.py"), ])
306 # TODO: reverse-engineering the instrument should not be needed after DM-26140
307 # pipetask will crash if there is more than one instrument
308 for idRecord in workspace.workButler.registry.queryDataIds("instrument").expanded():
309 className = idRecord.records["instrument"].class_name
310 args.extend(["--instrument", className])
312 return args
315def _getCollectionArguments(workspace, reuse):
316 """Return the collections for running the Gen 3 AP Pipeline on this
317 workspace, as command-line arguments.
319 Parameters
320 ----------
321 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
322 A Workspace with a Gen 3 repository.
323 reuse : `bool`
324 If true, use the previous run collection if one exists. Otherwise,
325 create a new run.
327 Returns
328 -------
329 args : `list` of `str`
330 Command-line arguments calling ``--input`` or ``--output``,
331 following the conventions of `sys.argv`.
332 """
333 # workspace.outputName is a chained collection containing all inputs
334 args = ["--output", workspace.outputName,
335 "--clobber-partial-outputs",
336 ]
338 registry = workspace.workButler.registry
339 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
340 if reuse and oldRuns:
341 args.extend(["--extend-run", "--skip-existing"])
342 return args