Coverage for python/lsst/ap/verify/pipeline_driver.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24"""Interface between `ap_verify` and `ap_pipe`.
26This module handles calling `ap_pipe` and converting any information
27as needed.
28"""
30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"]
32import argparse
33import os
34import re
36import click.testing
38import lsst.log
39from lsst.utils import getPackageDir
40import lsst.pipe.base as pipeBase
41import lsst.obs.base as obsBase
42import lsst.ctrl.mpexec.cli.pipetask
43import lsst.ap.pipe as apPipe
44from lsst.ap.pipe.make_apdb import makeApdb
47class ApPipeParser(argparse.ArgumentParser):
48 """An argument parser for data needed by ``ap_pipe`` activities.
50 This parser is not complete, and is designed to be passed to another parser
51 using the `parent` parameter.
52 """
54 def __init__(self):
55 defaultPipeline = os.path.join(getPackageDir("ap_verify"), "pipelines", "ApVerify.yaml")
57 # Help and documentation will be handled by main program's parser
58 argparse.ArgumentParser.__init__(self, add_help=False)
59 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs.
60 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'.
61 self.add_argument('--id', '-d', '--data-query', dest='dataIds',
62 action=self.AppendOptional, nargs='?', default=[],
63 help='An identifier for the data to process.')
64 self.add_argument("-p", "--pipeline", default=defaultPipeline,
65 help="A custom version of the ap_verify pipeline (e.g., with different metrics).")
66 self.add_argument("--skip-pipeline", action="store_true",
67 help="Do not run the AP pipeline itself. This argument is useful "
68 "for testing metrics on a fixed data set.")
70 class AppendOptional(argparse.Action):
71 """A variant of the built-in "append" action that ignores None values
72 instead of appending them.
73 """
74 # This class can't safely inherit from the built-in "append" action
75 # because there is no public class that implements it.
76 def __call__(self, parser, namespace, values, option_string=None):
77 if values is not None:
78 try:
79 allValues = getattr(namespace, self.dest)
80 allValues.append(values)
81 except AttributeError:
82 setattr(namespace, self.dest, [values])
85def runApPipeGen2(workspace, parsedCmdLine, processes=1):
86 """Run `ap_pipe` on this object's dataset.
88 Parameters
89 ----------
90 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
91 The abstract location containing input and output repositories.
92 parsedCmdLine : `argparse.Namespace`
93 Command-line arguments, including all arguments supported by `ApPipeParser`.
94 processes : `int`
95 The number of processes with which to call the AP pipeline
97 Returns
98 -------
99 apPipeReturn : `Struct`
100 The `Struct` returned from `~lsst.ap.pipe.ApPipeTask.parseAndRun` with
101 ``doReturnResults=False``. This object is valid even if
102 `~lsst.ap.pipe.ApPipeTask` was never run.
103 """
104 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen2')
106 makeApdb(_getApdbArguments(workspace))
108 pipelineArgs = [workspace.dataRepo,
109 "--output", workspace.outputRepo,
110 "--calib", workspace.calibRepo,
111 "--template", workspace.templateRepo]
112 pipelineArgs.extend(_getConfigArguments(workspace))
113 if parsedCmdLine.dataIds:
114 for singleId in parsedCmdLine.dataIds:
115 pipelineArgs.extend(["--id", *singleId.split(" ")])
116 else:
117 pipelineArgs.extend(["--id"])
118 pipelineArgs.extend(["--processes", str(processes)])
119 pipelineArgs.extend(["--noExit"])
121 if not parsedCmdLine.skip_pipeline:
122 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs)
123 log.info('Pipeline complete')
124 else:
125 log.info('Skipping AP pipeline entirely.')
126 apPipeParser = apPipe.ApPipeTask._makeArgumentParser()
127 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs)
128 results = pipeBase.Struct(
129 argumentParser=apPipeParser,
130 parsedCmd=apPipeParsed,
131 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed),
132 resultList=[],
133 )
135 return results
138def runApPipeGen3(workspace, parsedCmdLine, processes=1):
139 """Run `ap_pipe` on this object's dataset.
141 Parameters
142 ----------
143 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
144 The abstract location containing input and output repositories.
145 parsedCmdLine : `argparse.Namespace`
146 Command-line arguments, including all arguments supported by `ApPipeParser`.
147 processes : `int`
148 The number of processes with which to call the AP pipeline
149 """
150 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen3')
152 # Currently makeApdb has different argument conventions from Gen 3; see DM-22663
153 makeApdb(_getApdbArguments(workspace))
155 pipelineArgs = ["run",
156 "--butler-config", workspace.repo,
157 "--pipeline", parsedCmdLine.pipeline,
158 ]
159 # TODO: collections should be determined exclusively by Workspace.workButler,
160 # but I can't find a way to hook that up to the graph builder. So use the CLI
161 # for now and revisit once DM-26239 is done.
162 pipelineArgs.extend(_getCollectionArguments(workspace))
163 pipelineArgs.extend(_getConfigArgumentsGen3(workspace))
164 if parsedCmdLine.dataIds:
165 for singleId in parsedCmdLine.dataIds:
166 pipelineArgs.extend(["--data-query", singleId])
167 pipelineArgs.extend(["--processes", str(processes)])
168 pipelineArgs.extend(["--register-dataset-types"])
170 if not parsedCmdLine.skip_pipeline:
171 # CliRunner is an unsafe workaround for DM-26239
172 runner = click.testing.CliRunner()
173 # TODO: generalize this code in DM-26028
174 # TODO: work off of workspace.workButler after DM-26239
175 results = runner.invoke(lsst.ctrl.mpexec.cli.pipetask.cli, pipelineArgs)
176 if results.exception:
177 raise RuntimeError("Pipeline failed.") from results.exception
179 log.info('Pipeline complete.')
180 return results.exit_code
181 else:
182 log.info('Skipping AP pipeline entirely.')
185def _getApdbArguments(workspace):
186 """Return the config options for running make_apdb.py on this workspace,
187 as command-line arguments.
189 Parameters
190 ----------
191 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
192 A Workspace whose config directory may contain an
193 `~lsst.ap.pipe.ApPipeTask` config.
195 Returns
196 -------
197 args : `list` of `str`
198 Command-line arguments calling ``--config`` or ``--config-file``,
199 following the conventions of `sys.argv`.
200 """
201 # ApVerify will use the sqlite hooks for the Apdb.
202 return [
203 "--config", "db_url=sqlite:///" + workspace.dbLocation,
204 "--config", "isolation_level=READ_UNCOMMITTED",
205 ]
208def _getConfigArguments(workspace):
209 """Return the config options for running ApPipeTask on this workspace, as
210 command-line arguments.
212 Parameters
213 ----------
214 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
215 A Workspace whose config directory may contain an
216 `~lsst.ap.pipe.ApPipeTask` config.
218 Returns
219 -------
220 args : `list` of `str`
221 Command-line arguments calling ``--config`` or ``--configfile``,
222 following the conventions of `sys.argv`.
223 """
224 overrideFile = apPipe.ApPipeTask._DefaultName + ".py"
225 overridePath = os.path.join(workspace.configDir, overrideFile)
227 args = ["--configfile", overridePath]
228 # Translate APDB-only arguments to work as a sub-config
229 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg)
230 for arg in _getApdbArguments(workspace)])
231 # Put output alerts into the workspace.
232 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation])
233 args.extend(["--config", "diaPipe.doPackageAlerts=True"])
235 return args
238def _getConfigArgumentsGen3(workspace):
239 """Return the config options for running the Gen 3 AP Pipeline on this
240 workspace, as command-line arguments.
242 Parameters
243 ----------
244 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
245 A Workspace whose config directory may contain various configs.
247 Returns
248 -------
249 args : `list` of `str`
250 Command-line arguments calling ``--config`` or ``--config-file``,
251 following the conventions of `sys.argv`.
252 """
253 # Translate APDB-only arguments to work as a sub-config
254 args = [("diaPipe:apdb." + arg if arg != "--config" else arg)
255 for arg in _getApdbArguments(workspace)]
256 args.extend([
257 # Put output alerts into the workspace.
258 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation,
259 "--config", "diaPipe:doPackageAlerts=True",
260 # TODO: the configs below should not be needed after DM-26140
261 "--config-file", "calibrate:" + os.path.join(workspace.configDir, "calibrate.py"),
262 "--config-file", "imageDifference:" + os.path.join(workspace.configDir, "imageDifference.py"),
263 ])
264 # TODO: reverse-engineering the instrument should not be needed after DM-26140
265 # pipetask will crash if there is more than one instrument
266 for idRecord in workspace.workButler.registry.queryDataIds("instrument").expanded():
267 className = idRecord.records["instrument"].class_name
268 args.extend(["--instrument", className])
270 return args
273def _getCollectionArguments(workspace):
274 """Return the collections for running the Gen 3 AP Pipeline on this
275 workspace, as command-line arguments.
277 Parameters
278 ----------
279 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
280 A Workspace with a Gen 3 repository.
282 Returns
283 -------
284 args : `list` of `str`
285 Command-line arguments calling ``--input`` or ``--output``,
286 following the conventions of `sys.argv`.
287 """
288 butler = workspace.workButler
289 # Hard-code the collection names because it's hard to infer the inputs from the Butler
290 inputs = {"skymaps", "refcats"}
291 for dimension in butler.registry.queryDataIds('instrument'):
292 instrument = obsBase.Instrument.fromName(dimension["instrument"], butler.registry)
293 inputs.add(instrument.makeDefaultRawIngestRunName())
294 inputs.add(instrument.makeCalibrationCollectionName())
295 inputs.update(butler.registry.queryCollections(re.compile(r"templates/\w+")))
297 return ["--input", ",".join(inputs),
298 "--output-run", workspace.runName,
299 ]