Coverage for python/lsst/ap/verify/pipeline_driver.py: 19%
76 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 12:25 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 12:25 +0000
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24"""Interface between `ap_verify` and `ap_pipe`.
26This module handles calling `ap_pipe` and converting any information
27as needed.
28"""
30__all__ = ["ApPipeParser", "runApPipeGen3"]
32import argparse
33import os
34import re
35import subprocess
36import logging
38import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec
39import lsst.ctrl.mpexec.cli.pipetask
40from lsst.ap.pipe.make_apdb import makeApdb
42_LOG = logging.getLogger(__name__)
45class ApPipeParser(argparse.ArgumentParser):
46 """An argument parser for data needed by ``ap_pipe`` activities.
48 This parser is not complete, and is designed to be passed to another parser
49 using the `parent` parameter.
50 """
52 def __init__(self):
53 # Help and documentation will be handled by main program's parser
54 argparse.ArgumentParser.__init__(self, add_help=False)
55 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs.
56 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'.
57 self.add_argument('-d', '--data-query', dest='dataIds', action='append', default=[],
58 help='An identifier for the data to process.')
59 self.add_argument("-p", "--pipeline", default=None,
60 help="A custom version of the ap_verify pipeline (e.g., with different metrics). "
61 "Defaults to the ApVerify.yaml within --dataset.")
62 self.add_argument("--db", "--db_url", default=None,
63 help="A location for the AP database, formatted as if for ApdbConfig.db_url. "
64 "Defaults to an SQLite file in the --output directory.")
65 self.add_argument("--skip-pipeline", action="store_true",
66 help="Do not run the AP pipeline itself. This argument is useful "
67 "for testing metrics on a fixed data set.")
68 self.add_argument("--clean-run", action="store_true",
69 help="Run the pipeline with a new run collection, "
70 "even if one already exists.")
73def runApPipeGen3(workspace, parsedCmdLine, processes=1):
74 """Run `ap_pipe` on this object's dataset.
76 Parameters
77 ----------
78 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
79 The abstract location containing input and output repositories.
80 parsedCmdLine : `argparse.Namespace`
81 Command-line arguments, including all arguments supported by `ApPipeParser`.
82 processes : `int`
83 The number of processes with which to call the AP pipeline
85 Returns
86 -------
87 code : `int`
88 An error code that is zero if the pipeline ran without problems, or
89 nonzero if there were errors. The exact meaning of nonzereo values
90 is an implementation detail.
91 """
92 log = _LOG.getChild('runApPipeGen3')
94 makeApdb(_getApdbArguments(workspace, parsedCmdLine))
96 pipelineFile = _getPipelineFile(workspace, parsedCmdLine)
97 pipelineArgs = ["pipetask", "run",
98 "--fail-fast",
99 "--butler-config", workspace.repo,
100 "--pipeline", pipelineFile,
101 ]
102 # TODO: workaround for inability to generate crosstalk sources in main
103 # processing pipeline (DM-31492).
104 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")}
105 if "DECam" in instruments:
106 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DarkEnergyCamera/RunIsrForCrosstalkSources.yaml"
107 crosstalkArgs = ["pipetask", "run",
108 "--butler-config", workspace.repo,
109 "--pipeline", crosstalkPipeline,
110 ]
111 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
112 if parsedCmdLine.dataIds:
113 for singleId in parsedCmdLine.dataIds:
114 crosstalkArgs.extend(["--data-query", singleId])
115 crosstalkArgs.extend(["--processes", str(processes)])
116 crosstalkArgs.extend(["--register-dataset-types"])
117 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False)
119 # Force same output run for crosstalk and main processing.
120 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True))
121 else:
122 # TODO: collections should be determined exclusively by Workspace.workButler,
123 # but I can't find a way to hook that up to the graph builder. So use the CLI
124 # for now and revisit once DM-26239 is done.
125 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
127 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine))
128 if parsedCmdLine.dataIds:
129 for singleId in parsedCmdLine.dataIds:
130 pipelineArgs.extend(["--data-query", singleId])
131 pipelineArgs.extend(["--processes", str(processes)])
132 pipelineArgs.extend(["--register-dataset-types"])
133 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"])
135 if not parsedCmdLine.skip_pipeline:
136 # subprocess is an unsafe workaround for DM-26239
137 # TODO: generalize this code in DM-26028
138 # TODO: work off of workspace.workButler after DM-26239
139 log.info("Running pipeline:")
140 log.info(' '.join(pipelineArgs))
141 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False)
142 log.info('Pipeline complete.')
143 return results.returncode
144 else:
145 log.info('Skipping AP pipeline entirely.')
148def _getExecOrder():
149 """Return any constraints on the Gen 3 execution order.
151 The current constraints are that executions of DiaPipelineTask must be
152 ordered by visit ID, but this is subject to change.
154 Returns
155 -------
156 order : `lsst.ctrl.mpexec.ExecutionGraphFixup`
157 An object encoding the desired execution order as an algorithm for
158 modifying inter-quantum dependencies.
160 Notes
161 -----
162 This function must be importable, but need not be public.
163 """
164 # Source association algorithm is not time-symmetric. Force execution of
165 # association (through DiaPipelineTask) in order of ascending visit number.
166 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId(
167 taskLabel="diaPipe", dimensions=["visit", ], reverse=False)
170def _getPipelineFile(workspace, parsed):
171 """Return the config options for running make_apdb.py on this workspace,
172 as command-line arguments.
174 Parameters
175 ----------
176 workspace : `lsst.ap.verify.workspace.Workspace`
177 A Workspace whose pipeline directory may contain an ApVerify pipeline.
178 parsed : `argparse.Namespace`
179 Command-line arguments, including all arguments supported by `ApPipeParser`.
181 Returns
182 -------
183 pipeline : `str`
184 The location of the pipeline file to use for running ap_verify.
185 """
186 if parsed.pipeline:
187 return parsed.pipeline
188 else:
189 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml")
190 if os.path.exists(customPipeline):
191 return customPipeline
192 else:
193 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml")
196def _getApdbArguments(workspace, parsed):
197 """Return the config options for running make_apdb.py on this workspace,
198 as command-line arguments.
200 Parameters
201 ----------
202 workspace : `lsst.ap.verify.workspace.Workspace`
203 A Workspace whose config directory may contain an
204 `~lsst.ap.pipe.ApPipeTask` config.
205 parsed : `argparse.Namespace`
206 Command-line arguments, including all arguments supported by `ApPipeParser`.
208 Returns
209 -------
210 args : `list` of `str`
211 Command-line arguments calling ``--config`` or ``--config-file``,
212 following the conventions of `sys.argv`.
213 """
214 if not parsed.db:
215 parsed.db = "sqlite:///" + workspace.dbLocation
217 args = ["--config", "db_url=" + parsed.db]
219 return args
222def _getConfigArgumentsGen3(workspace, parsed):
223 """Return the config options for running the Gen 3 AP Pipeline on this
224 workspace, as command-line arguments.
226 Parameters
227 ----------
228 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
229 A Workspace whose config directory may contain various configs.
230 parsed : `argparse.Namespace`
231 Command-line arguments, including all arguments supported by `ApPipeParser`.
233 Returns
234 -------
235 args : `list` of `str`
236 Command-line arguments calling ``--config`` or ``--config-file``,
237 following the conventions of `sys.argv`.
238 """
239 # Translate APDB-only arguments to work as a sub-config
240 args = [("diaPipe:apdb." + arg if arg != "--config" else arg)
241 for arg in _getApdbArguments(workspace, parsed)]
242 args.extend([
243 # Put output alerts into the workspace.
244 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation,
245 ])
246 return args
249def _getCollectionArguments(workspace, reuse):
250 """Return the collections for running the Gen 3 AP Pipeline on this
251 workspace, as command-line arguments.
253 Parameters
254 ----------
255 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
256 A Workspace with a Gen 3 repository.
257 reuse : `bool`
258 If true, use the previous run collection if one exists. Otherwise,
259 create a new run.
261 Returns
262 -------
263 args : `list` of `str`
264 Command-line arguments calling ``--input`` or ``--output``,
265 following the conventions of `sys.argv`.
266 """
267 # workspace.outputName is a chained collection containing all inputs
268 args = ["--output", workspace.outputName,
269 "--clobber-outputs",
270 ]
272 registry = workspace.workButler.registry
273 # Should refresh registry to see crosstalk run from DM-31492, but this
274 # currently leads to a bug involving --skip-existing. The only downside of
275 # the cached registry is that, with two runs for DECam datasets, a rerun of
276 # ap_verify will re-run crosstalk sources in the second run. Using
277 # skip-existing-in would work around that, but would lead to a worse bug in
278 # the case that the user is alternating runs with and without --clean-run.
279 # registry.refresh()
280 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
281 if reuse and oldRuns:
282 args.extend(["--extend-run", "--skip-existing"])
283 return args