Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 43%
50 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 04:15 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 04:15 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
29from __future__ import annotations
31__all__ = [
32 "SeparablePipelineExecutor",
33]
36import datetime
37import getpass
38import logging
39from collections.abc import Iterable, Mapping
40from typing import Any, Protocol
42import lsst.pipe.base
43import lsst.resources
44from lsst.daf.butler import Butler
46from .mpGraphExecutor import MPGraphExecutor
47from .preExecInit import PreExecInit
48from .quantumGraphExecutor import QuantumGraphExecutor
49from .singleQuantumExecutor import SingleQuantumExecutor
50from .taskFactory import TaskFactory
52_LOG = logging.getLogger(__name__)
55# Only way to keep black, flake8, and mypy all happy
56_dqc = lsst.pipe.base._datasetQueryConstraints
59class _GraphBuilderLike(Protocol):
60 def makeGraph(
61 self,
62 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
63 collections: Any,
64 run: str,
65 userQuery: str | None,
66 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL,
67 metadata: Mapping[str, Any] | None = None,
68 bind: Mapping[str, Any] | None = None,
69 ) -> lsst.pipe.base.QuantumGraph:
70 pass
73class SeparablePipelineExecutor:
74 """An executor that allows each step of pipeline execution to be
75 run independently.
77 The executor can run any or all of the following steps:
79 * pre-execution initialization
80 * pipeline building
81 * quantum graph generation
82 * quantum graph execution
84 Any of these steps can also be handed off to external code without
85 compromising the remaining ones.
87 Parameters
88 ----------
89 butler : `lsst.daf.butler.Butler`
90 A Butler whose ``collections`` and ``run`` attributes contain the input
91 and output collections to use for processing.
92 clobber_output : `bool`, optional
93 If set, the pipeline execution overwrites existing output files.
94 Otherwise, any conflict between existing and new outputs is an error.
95 skip_existing_in : iterable [`str`], optional
96 If not empty, the pipeline execution searches the listed collections
97 for existing outputs, and skips any quanta that have run to completion
98 (or have no work to do). Otherwise, all tasks are attempted (subject
99 to ``clobber_output``).
100 task_factory : `lsst.pipe.base.TaskFactory`, optional
101 A custom task factory for use in pre-execution and execution. By
102 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
103 resources : `~lsst.pipe.base.ExecutionResources`
104 The resources available to each quantum being executed.
105 """
107 def __init__(
108 self,
109 butler: Butler,
110 clobber_output: bool = False,
111 skip_existing_in: Iterable[str] | None = None,
112 task_factory: lsst.pipe.base.TaskFactory | None = None,
113 resources: lsst.pipe.base.ExecutionResources | None = None,
114 ):
115 self._butler = Butler.from_config(butler=butler, collections=butler.collections, run=butler.run)
116 if not self._butler.collections:
117 raise ValueError("Butler must specify input collections for pipeline.")
118 if not self._butler.run:
119 raise ValueError("Butler must specify output run for pipeline.")
121 self._clobber_output = clobber_output
122 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
124 self._task_factory = task_factory if task_factory else TaskFactory()
125 self.resources = resources
127 def pre_execute_qgraph(
128 self,
129 graph: lsst.pipe.base.QuantumGraph,
130 register_dataset_types: bool = False,
131 save_init_outputs: bool = True,
132 save_versions: bool = True,
133 ) -> None:
134 """Run pre-execution initialization.
136 This method will be deprecated after DM-38041, to be replaced with a
137 method that takes either a `~lsst.pipe.base.Pipeline` or a
138 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
140 Parameters
141 ----------
142 graph : `lsst.pipe.base.QuantumGraph`
143 The quantum graph defining the pipeline and datasets to
144 be initialized.
145 register_dataset_types : `bool`, optional
146 If `True`, register all output dataset types from the pipeline
147 represented by ``graph``.
148 save_init_outputs : `bool`, optional
149 If `True`, create init-output datasets in this object's output run.
150 save_versions : `bool`, optional
151 If `True`, save a package versions dataset.
152 """
153 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
154 pre_exec_init.initialize(
155 graph=graph,
156 saveInitOutputs=save_init_outputs,
157 registerDatasetTypes=register_dataset_types,
158 saveVersions=save_versions,
159 )
161 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
162 """Build a pipeline from pipeline and configuration information.
164 Parameters
165 ----------
166 pipeline_uri : `str` or `lsst.resources.ResourcePath`
167 URI to a file containing a pipeline definition. A URI fragment may
168 be used to specify a subset of the pipeline, as described in
169 :ref:`pipeline-running-intro`.
171 Returns
172 -------
173 pipeline : `lsst.pipe.base.Pipeline`
174 The fully-built pipeline.
175 """
176 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
178 def make_quantum_graph(
179 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None
180 ) -> lsst.pipe.base.QuantumGraph:
181 """Build a quantum graph from a pipeline and input datasets.
183 Parameters
184 ----------
185 pipeline : `lsst.pipe.base.Pipeline`
186 The pipeline for which to generate a quantum graph.
187 where : `str`, optional
188 A data ID query that constrains the quanta generated.
189 builder : `lsst.pipe.base.GraphBuilder`-like, optional
190 A graph builder that implements a
191 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
192 instance of `lsst.pipe.base.GraphBuilder` is used.
194 Returns
195 -------
196 graph : `lsst.pipe.base.QuantumGraph`
197 The quantum graph for ``pipeline`` as run on the datasets
198 identified by ``where``.
200 Notes
201 -----
202 This method does no special handling of empty quantum graphs. If
203 needed, clients can use `len` to test if the returned graph is empty.
204 """
205 if not builder:
206 builder = lsst.pipe.base.GraphBuilder(
207 self._butler.registry,
208 skipExistingIn=self._skip_existing_in,
209 clobberOutputs=self._clobber_output,
210 )
212 metadata = {
213 "input": self._butler.collections,
214 "output_run": self._butler.run,
215 "skip_existing_in": self._skip_existing_in,
216 "skip_existing": bool(self._skip_existing_in),
217 "data_query": where,
218 "user": getpass.getuser(),
219 "time": str(datetime.datetime.now()),
220 }
221 assert self._butler.run is not None, "Butler output run collection must be defined"
222 graph = builder.makeGraph(
223 pipeline,
224 self._butler.collections,
225 self._butler.run,
226 userQuery=where,
227 metadata=metadata,
228 )
229 _LOG.info(
230 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
231 len(graph),
232 len(graph.taskGraph),
233 graph.graphID,
234 )
235 return graph
237 def run_pipeline(
238 self,
239 graph: lsst.pipe.base.QuantumGraph,
240 fail_fast: bool = False,
241 graph_executor: QuantumGraphExecutor | None = None,
242 num_proc: int = 1,
243 ) -> None:
244 """Run a pipeline in the form of a prepared quantum graph.
246 Pre-execution initialization must have already been run;
247 see `pre_execute_qgraph`.
249 Parameters
250 ----------
251 graph : `lsst.pipe.base.QuantumGraph`
252 The pipeline and datasets to execute.
253 fail_fast : `bool`, optional
254 If `True`, abort all execution if any task fails when
255 running with multiple processes. Only used with the default graph
256 executor).
257 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
258 A custom graph executor. By default, a new instance of
259 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
260 num_proc : `int`, optional
261 The number of processes that can be used to run the pipeline. The
262 default value ensures that no subprocess is created. Only used with
263 the default graph executor.
264 """
265 if not graph_executor:
266 quantum_executor = SingleQuantumExecutor(
267 self._butler,
268 self._task_factory,
269 skipExistingIn=self._skip_existing_in,
270 clobberOutputs=self._clobber_output,
271 resources=self.resources,
272 )
273 graph_executor = MPGraphExecutor(
274 numProc=num_proc,
275 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
276 quantumExecutor=quantum_executor,
277 failFast=fail_fast,
278 )
279 # Have to reset connection pool to avoid sharing connections with
280 # forked processes.
281 self._butler.registry.resetConnectionPool()
283 graph_executor.execute(graph)