Coverage for python/lsst/ap/verify/pipeline_driver.py: 19%
76 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 12:20 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 12:20 +0000
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24"""Interface between `ap_verify` and `ap_pipe`.
26This module handles calling `ap_pipe` and converting any information
27as needed.
28"""
30__all__ = ["ApPipeParser", "runApPipeGen3"]
32import argparse
33import os
34import re
35import subprocess
36import logging
38import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec
39import lsst.dax.apdb as daxApdb
41_LOG = logging.getLogger(__name__)
44class ApPipeParser(argparse.ArgumentParser):
45 """An argument parser for data needed by ``ap_pipe`` activities.
47 This parser is not complete, and is designed to be passed to another parser
48 using the `parent` parameter.
49 """
51 def __init__(self):
52 # Help and documentation will be handled by main program's parser
53 argparse.ArgumentParser.__init__(self, add_help=False)
54 # namespace.dataIds will always be a list of 0 or more nonempty strings, regardless of inputs.
55 # TODO: in Python 3.8+, action='extend' handles nargs='?' more naturally than 'append'.
56 self.add_argument('-d', '--data-query', dest='dataIds', action='append', default=[],
57 help='An identifier for the data to process.')
58 self.add_argument("-p", "--pipeline", default=None,
59 help="A custom version of the ap_verify pipeline (e.g., with different metrics). "
60 "Defaults to the ApVerify.yaml within --dataset.")
61 self.add_argument("--db", "--db_url", default=None,
62 help="A location for the AP database, formatted as if for apdb-cli create-sql. "
63 "Defaults to an SQLite file in the --output directory.")
64 self.add_argument("--skip-pipeline", action="store_true",
65 help="Do not run the AP pipeline itself. This argument is useful "
66 "for testing metrics on a fixed data set.")
67 self.add_argument("--clean-run", action="store_true",
68 help="Run the pipeline with a new run collection, "
69 "even if one already exists.")
72def runApPipeGen3(workspace, parsedCmdLine, processes=1):
73 """Run `ap_pipe` on this object's dataset.
75 Parameters
76 ----------
77 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
78 The abstract location containing input and output repositories.
79 parsedCmdLine : `argparse.Namespace`
80 Command-line arguments, including all arguments supported by `ApPipeParser`.
81 processes : `int`
82 The number of processes with which to call the AP pipeline
84 Returns
85 -------
86 code : `int`
87 An error code that is zero if the pipeline ran without problems, or
88 nonzero if there were errors. The exact meaning of nonzereo values
89 is an implementation detail.
90 """
91 log = _LOG.getChild('runApPipeGen3')
93 _makeApdb(workspace, _getApdbArguments(workspace, parsedCmdLine))
95 pipelineFile = _getPipelineFile(workspace, parsedCmdLine)
96 pipelineArgs = ["pipetask", "--long-log", "run",
97 # fail-fast to ensure processing errors are obvious, and
98 # to compensate for the extra interconnections added by
99 # --graph-fixup (further down).
100 "--fail-fast",
101 "--butler-config", workspace.repo,
102 "--pipeline", pipelineFile,
103 ]
104 # TODO: workaround for inability to generate crosstalk sources in main
105 # processing pipeline (DM-31492).
106 instruments = {id["instrument"] for id in workspace.workButler.registry.queryDataIds("instrument")}
107 if "DECam" in instruments:
108 crosstalkPipeline = "${AP_PIPE_DIR}/pipelines/DECam/RunIsrForCrosstalkSources.yaml"
109 crosstalkArgs = ["pipetask", "run",
110 "--butler-config", workspace.repo,
111 "--pipeline", crosstalkPipeline,
112 ]
113 crosstalkArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
114 if parsedCmdLine.dataIds:
115 for singleId in parsedCmdLine.dataIds:
116 crosstalkArgs.extend(["--data-query", singleId])
117 crosstalkArgs.extend(["--processes", str(processes)])
118 crosstalkArgs.extend(["--register-dataset-types"])
119 subprocess.run(crosstalkArgs, capture_output=False, shell=False, check=False)
121 # Force same output run for crosstalk and main processing.
122 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=True))
123 else:
124 # TODO: collections should be determined exclusively by Workspace.workButler,
125 # but I can't find a way to hook that up to the graph builder. So use the CLI
126 # for now and revisit once DM-26239 is done.
127 pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
129 pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine))
130 if parsedCmdLine.dataIds:
131 for singleId in parsedCmdLine.dataIds:
132 pipelineArgs.extend(["--data-query", singleId])
133 pipelineArgs.extend(["--processes", str(processes)])
134 pipelineArgs.extend(["--register-dataset-types"])
135 pipelineArgs.extend(["--graph-fixup", "lsst.ap.verify.pipeline_driver._getExecOrder"])
137 if not parsedCmdLine.skip_pipeline:
138 # subprocess is an unsafe workaround for DM-26239
139 # TODO: generalize this code in DM-26028
140 # TODO: work off of workspace.workButler after DM-26239
141 log.info("Running pipeline:")
142 log.info(' '.join(pipelineArgs))
143 results = subprocess.run(pipelineArgs, capture_output=False, shell=False, check=False)
144 log.info('Pipeline complete.')
145 return results.returncode
146 else:
147 log.info('Skipping AP pipeline entirely.')
150def _getExecOrder():
151 """Return any constraints on the Gen 3 execution order.
153 The current constraints are that executions of DiaPipelineTask must be
154 ordered by visit ID, but this is subject to change.
156 Returns
157 -------
158 order : `lsst.ctrl.mpexec.ExecutionGraphFixup`
159 An object encoding the desired execution order as an algorithm for
160 modifying inter-quantum dependencies.
162 Notes
163 -----
164 This function must be importable, but need not be public.
165 """
166 # Source association algorithm is not time-symmetric. Force execution of
167 # association (through DiaPipelineTask) in order of ascending visit number.
168 return lsst.ctrl.mpexec.execFixupDataId.ExecFixupDataId(
169 taskLabel="diaPipe", dimensions=["visit", ], reverse=False)
172def _getPipelineFile(workspace, parsed):
173 """Return the config options for running make_apdb.py on this workspace,
174 as command-line arguments.
176 Parameters
177 ----------
178 workspace : `lsst.ap.verify.workspace.Workspace`
179 A Workspace whose pipeline directory may contain an ApVerify pipeline.
180 parsed : `argparse.Namespace`
181 Command-line arguments, including all arguments supported by `ApPipeParser`.
183 Returns
184 -------
185 pipeline : `str`
186 The location of the pipeline file to use for running ap_verify.
187 """
188 if parsed.pipeline:
189 return parsed.pipeline
190 else:
191 customPipeline = os.path.join(workspace.pipelineDir, "ApVerify.yaml")
192 if os.path.exists(customPipeline):
193 return customPipeline
194 else:
195 return os.path.join("${AP_VERIFY_DIR}", "pipelines", "ApVerify.yaml")
198def _getApdbArguments(workspace, parsed):
199 """Return the arguments for running apdb-cli create-sql on this workspace,
200 as key-value pairs.
202 Parameters
203 ----------
204 workspace : `lsst.ap.verify.workspace.Workspace`
205 A Workspace whose config directory may contain an
206 `~lsst.ap.pipe.ApPipeTask` config.
207 parsed : `argparse.Namespace`
208 Command-line arguments, including all arguments supported by `ApPipeParser`.
210 Returns
211 -------
212 args : mapping [`str`]
213 Arguments to `lsst.dax.apdb.sql.Apdb.init_database`.
214 """
215 if not parsed.db:
216 parsed.db = "sqlite:///" + workspace.dbLocation
218 args = {"db_url": parsed.db,
219 }
221 return args
224def _getConfigArgumentsGen3(workspace, parsed):
225 """Return the config options for running the Gen 3 AP Pipeline on this
226 workspace, as command-line arguments.
228 Parameters
229 ----------
230 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
231 A Workspace whose config directory may contain various configs.
232 parsed : `argparse.Namespace`
233 Command-line arguments, including all arguments supported by `ApPipeParser`.
235 Returns
236 -------
237 args : `list` of `str`
238 Command-line arguments calling ``--config`` or ``--config-file``,
239 following the conventions of `sys.argv`.
240 """
241 return [
242 # APDB config should have been stored in the workspace.
243 "--config", "parameters:apdb_config=" + workspace.dbConfigLocation,
244 # Put output alerts into the workspace.
245 "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation,
246 ]
249def _getCollectionArguments(workspace, reuse):
250 """Return the collections for running the Gen 3 AP Pipeline on this
251 workspace, as command-line arguments.
253 Parameters
254 ----------
255 workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
256 A Workspace with a Gen 3 repository.
257 reuse : `bool`
258 If true, use the previous run collection if one exists. Otherwise,
259 create a new run.
261 Returns
262 -------
263 args : `list` of `str`
264 Command-line arguments calling ``--input`` or ``--output``,
265 following the conventions of `sys.argv`.
266 """
267 # workspace.outputName is a chained collection containing all inputs
268 args = ["--output", workspace.outputName,
269 "--clobber-outputs",
270 ]
272 registry = workspace.workButler.registry
273 # Should refresh registry to see crosstalk run from DM-31492, but this
274 # currently leads to a bug involving --skip-existing. The only downside of
275 # the cached registry is that, with two runs for DECam datasets, a rerun of
276 # ap_verify will re-run crosstalk sources in the second run. Using
277 # skip-existing-in would work around that, but would lead to a worse bug in
278 # the case that the user is alternating runs with and without --clean-run.
279 # registry.refresh()
280 oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
281 if reuse and oldRuns:
282 args.extend(["--extend-run", "--skip-existing"])
283 return args
286def _makeApdb(workspace, args):
287 """Create an APDB and store its config for future use.
289 Parameters
290 ----------
291 workspace : `lsst.ap.verify.workspace.Workspace`
292 A Workspace in which to store the database config.
293 args : mapping [`str`]
294 Arguments to `lsst.dax.apdb.sql.Apdb.init_database`.
295 """
296 config = daxApdb.ApdbSql.init_database(**args)
297 config.save(workspace.dbConfigLocation)