Coverage for python/lsst/ap/verify/pipeline_driver.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24"""Interface between `ap_verify` and `ap_pipe`.
26This module handles calling `ap_pipe` and converting any information
27as needed.
28"""
30__all__ = ["ApPipeParser", "runApPipeGen2", "runApPipeGen3"]
32import argparse
33import os
34import re
35import subprocess
37import lsst.log
38import lsst.pipe.base as pipeBase
39import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec
40import lsst.ctrl.mpexec.cli.pipetask
41import lsst.ap.pipe as apPipe
42from lsst.ap.pipe.make_apdb import makeApdb
45class ApPipeParser(argparse.ArgumentParser):
46 """An argument parser for data needed by ``ap_pipe`` activities.
48 This parser is not complete, and is designed to be passed to another parser
49 using the `parent` parameter.
50 """
52 def __init__(self):
53 # Help and documentation will be handled by main program's parser
54 argparse.ArgumentParser.__init__(self, add_help=False)
55 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs.
56 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'.
57 self.add_argument('--id', '-d', '--data-query', dest='dataIds',
58 action=self.AppendOptional, nargs='?', default=[],
59 help='An identifier for the data to process.')
60 self.add_argument("-p", "--pipeline", default=None,
61 help="A custom version of the ap_verify pipeline (e.g., with different metrics). "
62 "Defaults to the ApVerify.yaml within --dataset.")
63 self.add_argument("--db", "--db_url", default=None,
64 help="A location for the AP database, formatted as if for ApdbConfig.db_url. "
65 "Defaults to an SQLite file in the --output directory.")
66 self.add_argument("--skip-pipeline", action="store_true",
67 help="Do not run the AP pipeline itself. This argument is useful "
68 "for testing metrics on a fixed data set.")
69 self.add_argument("--clean-run", action="store_true",
70 help="Run the pipeline with a new run collection, "
71 "even if one already exists.")
73 class AppendOptional(argparse.Action):
74 """A variant of the built-in "append" action that ignores None values
75 instead of appending them.
76 """
77 # This class can't safely inherit from the built-in "append" action
78 # because there is no public class that implements it.
79 def __call__(self, parser, namespace, values, option_string=None):
80 if values is not None:
81 try:
82 allValues = getattr(namespace, self.dest)
83 allValues.append(values)
84 except AttributeError:
85 setattr(namespace, self.dest, [values])
88def runApPipeGen2(workspace, parsedCmdLine, processes=1):
89 """Run `ap_pipe` on this object's dataset.
91 Parameters
92 ----------
93 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
94 The abstract location containing input and output repositories.
95 parsedCmdLine : `argparse.Namespace`
96 Command-line arguments, including all arguments supported by `ApPipeParser`.
97 processes : `int`
98 The number of processes with which to call the AP pipeline
100 Returns
101 -------
102 apPipeReturn : `lsst.pipe.base.Struct`
103 The `~lsst.pipe.base.Struct` returned from
104 `~lsst.ap.pipe.ApPipeTask.parseAndRun` with
105 ``doReturnResults=False``. This object is valid even if
106 `~lsst.ap.pipe.ApPipeTask` was never run.
107 """
108 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen2')
110 makeApdb(_getApdbArguments(workspace, parsedCmdLine))
112 pipelineArgs = [workspace.dataRepo,
113 "--output", workspace.outputRepo,
114 "--calib", workspace.calibRepo,
115 "--template", workspace.templateRepo]
116 pipelineArgs.extend(_getConfigArguments(workspace, parsedCmdLine))
117 if parsedCmdLine.dataIds:
118 for singleId in parsedCmdLine.dataIds:
119 pipelineArgs.extend(["--id", *singleId.split(" ")])
120 else:
121 pipelineArgs.extend(["--id"])
122 pipelineArgs.extend(["--processes", str(processes)])
123 pipelineArgs.extend(["--noExit"])
125 if not parsedCmdLine.skip_pipeline:
126 results = apPipe.ApPipeTask.parseAndRun(pipelineArgs)
127 log.info('Pipeline complete')
128 else:
129 log.info('Skipping AP pipeline entirely.')
130 apPipeParser = apPipe.ApPipeTask._makeArgumentParser()
131 apPipeParsed = apPipeParser.parse_args(config=apPipe.ApPipeTask.ConfigClass(), args=pipelineArgs)
132 results = pipeBase.Struct(
133 argumentParser=apPipeParser,
134 parsedCmd=apPipeParsed,
135 taskRunner=apPipe.ApPipeTask.RunnerClass(TaskClass=apPipe.ApPipeTask, parsedCmd=apPipeParsed),
136 resultList=[],
137 )
139 return results
142def runApPipeGen3(workspace, parsedCmdLine, processes=1):
143 """Run `ap_pipe` on this object's dataset.
145 Parameters
146 ----------
147 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
148 The abstract location containing input and output repositories.
149 parsedCmdLine : `argparse.Namespace`
150 Command-line arguments, including all arguments supported by `ApPipeParser`.
151 processes : `int`
152 The number of processes with which to call the AP pipeline
154 Returns
155 -------
156 code : `int`
157 An error code that is zero if the pipeline ran without problems, or
158 nonzero if there were errors. The exact meaning of nonzereo values
159 is an implementation detail.
160 """
161 log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.runApPipeGen3')
163 makeApdb(_getApdbArguments(workspace, parsedCmdLine))
165 pipelineFile = _getPipelineFile(workspace, parsedCmdLine)
166 pipelineArgs = ["pipetask", "run",
167 "--butler-config", workspace.repo,
168 "--pipeline", pipelineFile,
169 ]
170 # TODO: workaround for inability to generate crosstalk sources in main
171 # processing pipeline (DM-31492).
172 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")}
173 if "DECam" in instruments:
174 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DarkEnergyCamera/RunIsrForCrosstalkSources.yaml"
175 crosstalkArgs = ["pipetask", "run",
176 "--butler-config", workspace.repo,
177 "--pipeline", crosstalkPipeline,
178 ]
179 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
180 if parsedCmdLine.dataIds:
181 for singleId in parsedCmdLine.dataIds:
182 crosstalkArgs.extend(["--data-query", singleId])
183 crosstalkArgs.extend(["--processes", str(processes)])
184 crosstalkArgs.extend(["--register-dataset-types"])
185 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False)
187 # Force same output run for crosstalk and main processing.
188 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True))
189 else:
190 # TODO: collections should be determined exclusively by Workspace.workButler,
191 # but I can't find a way to hook that up to the graph builder. So use the CLI
192 # for now and revisit once DM-26239 is done.
193 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
195 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine))
196 if parsedCmdLine.dataIds:
197 for singleId in parsedCmdLine.dataIds:
198 pipelineArgs.extend(["--data-query", singleId])
199 pipelineArgs.extend(["--processes", str(processes)])
200 pipelineArgs.extend(["--register-dataset-types"])
201 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"])
203 if not parsedCmdLine.skip_pipeline:
204 # subprocess is an unsafe workaround for DM-26239
205 # TODO: generalize this code in DM-26028
206 # TODO: work off of workspace.workButler after DM-26239
207 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False)
208 log.info('Pipeline complete.')
209 return results.returncode
210 else:
211 log.info('Skipping AP pipeline entirely.')
214def _getExecOrder():
215 """Return any constraints on the Gen 3 execution order.
217 The current constraints are that executions of DiaPipelineTask must be
218 ordered by visit ID, but this is subject to change.
220 Returns
221 -------
222 order : `lsst.ctrl.mpexec.ExecutionGraphFixup`
223 An object encoding the desired execution order as an algorithm for
224 modifying inter-quantum dependencies.
226 Notes
227 -----
228 This function must be importable, but need not be public.
229 """
230 # Source association algorithm is not time-symmetric. Force execution of
231 # association (through DiaPipelineTask) in order of ascending visit number.
232 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId(
233 taskLabel="diaPipe", dimensions=["visit", ], reverse=False)
236def _getPipelineFile(workspace, parsed):
237 """Return the config options for running make_apdb.py on this workspace,
238 as command-line arguments.
240 Parameters
241 ----------
242 workspace : `lsst.ap.verify.workspace.Workspace`
243 A Workspace whose pipeline directory may contain an ApVerify pipeline.
244 parsed : `argparse.Namespace`
245 Command-line arguments, including all arguments supported by `ApPipeParser`.
247 Returns
248 -------
249 pipeline : `str`
250 The location of the pipeline file to use for running ap_verify.
251 """
252 if parsed.pipeline:
253 return parsed.pipeline
254 else:
255 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml")
256 if os.path.exists(customPipeline):
257 return customPipeline
258 else:
259 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml")
262def _getApdbArguments(workspace, parsed):
263 """Return the config options for running make_apdb.py on this workspace,
264 as command-line arguments.
266 Parameters
267 ----------
268 workspace : `lsst.ap.verify.workspace.Workspace`
269 A Workspace whose config directory may contain an
270 `~lsst.ap.pipe.ApPipeTask` config.
271 parsed : `argparse.Namespace`
272 Command-line arguments, including all arguments supported by `ApPipeParser`.
274 Returns
275 -------
276 args : `list` of `str`
277 Command-line arguments calling ``--config`` or ``--config-file``,
278 following the conventions of `sys.argv`.
279 """
280 if not parsed.db:
281 parsed.db = "sqlite:///" + workspace.dbLocation
283 args = ["--config", "db_url=" + parsed.db]
284 # Same special-case check as ApdbConfig.validate()
285 if parsed.db.startswith("sqlite"):
286 args.extend(["--config", "isolation_level=READ_UNCOMMITTED"])
288 return args
291def _getConfigArguments(workspace, parsed):
292 """Return the config options for running ApPipeTask on this workspace, as
293 command-line arguments.
295 Parameters
296 ----------
297 workspace : `lsst.ap.verify.workspace.WorkspaceGen2`
298 A Workspace whose config directory may contain an
299 `~lsst.ap.pipe.ApPipeTask` config.
300 parsed : `argparse.Namespace`
301 Command-line arguments, including all arguments supported by `ApPipeParser`.
303 Returns
304 -------
305 args : `list` of `str`
306 Command-line arguments calling ``--config`` or ``--configfile``,
307 following the conventions of `sys.argv`.
308 """
309 overrideFile = apPipe.ApPipeTask._DefaultName + ".py"
310 overridePath = os.path.join(workspace.configDir, overrideFile)
312 args = ["--configfile", overridePath]
313 # Translate APDB-only arguments to work as a sub-config
314 args.extend([("diaPipe.apdb." + arg if arg != "--config" else arg)
315 for arg in _getApdbArguments(workspace, parsed)])
316 # Put output alerts into the workspace.
317 args.extend(["--config", "diaPipe.alertPackager.alertWriteLocation=" + workspace.alertLocation])
318 args.extend(["--config", "diaPipe.doPackageAlerts=True"])
320 return args
323def _getConfigArgumentsGen3(workspace, parsed):
324 """Return the config options for running the Gen 3 AP Pipeline on this
325 workspace, as command-line arguments.
327 Parameters
328 ----------
329 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
330 A Workspace whose config directory may contain various configs.
331 parsed : `argparse.Namespace`
332 Command-line arguments, including all arguments supported by `ApPipeParser`.
334 Returns
335 -------
336 args : `list` of `str`
337 Command-line arguments calling ``--config`` or ``--config-file``,
338 following the conventions of `sys.argv`.
339 """
340 # Translate APDB-only arguments to work as a sub-config
341 args = [("diaPipe:apdb." + arg if arg != "--config" else arg)
342 for arg in _getApdbArguments(workspace, parsed)]
343 args.extend([
344 # Put output alerts into the workspace.
345 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation,
346 ])
347 return args
350def _getCollectionArguments(workspace, reuse):
351 """Return the collections for running the Gen 3 AP Pipeline on this
352 workspace, as command-line arguments.
354 Parameters
355 ----------
356 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
357 A Workspace with a Gen 3 repository.
358 reuse : `bool`
359 If true, use the previous run collection if one exists. Otherwise,
360 create a new run.
362 Returns
363 -------
364 args : `list` of `str`
365 Command-line arguments calling ``--input`` or ``--output``,
366 following the conventions of `sys.argv`.
367 """
368 # workspace.outputName is a chained collection containing all inputs
369 args = ["--output", workspace.outputName,
370 "--clobber-outputs",
371 ]
373 registry = workspace.workButler.registry
374 # Should refresh registry to see crosstalk run from DM-31492, but this
375 # currently leads to a bug involving --skip-existing. The only downside of
376 # the cached registry is that, with two runs for DECam datasets, a rerun of
377 # ap_verify will re-run crosstalk sources in the second run. Using
378 # skip-existing-in would work around that, but would lead to a worse bug in
379 # the case that the user is alternating runs with and without --clean-run.
380 # registry.refresh()
381 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
382 if reuse and oldRuns:
383 args.extend(["--extend-run", "--skip-existing"])
384 return args