Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 46%
49 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-20 10:51 +0000
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-20 10:51 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from __future__ import annotations
25__all__ = [
26 "SeparablePipelineExecutor",
27]
30import datetime
31import getpass
32import logging
33import math
34import multiprocessing
35from typing import Any, Iterable, Mapping, Protocol
37import lsst.pipe.base
38import lsst.resources
39from lsst.daf.butler import Butler
41from .mpGraphExecutor import MPGraphExecutor
42from .preExecInit import PreExecInit
43from .quantumGraphExecutor import QuantumGraphExecutor
44from .singleQuantumExecutor import SingleQuantumExecutor
45from .taskFactory import TaskFactory
47_LOG = logging.getLogger(__name__)
50# Only way to keep black, flake8, and mypy all happy
51_dqc = lsst.pipe.base._datasetQueryConstraints
54class _GraphBuilderLike(Protocol):
55 def makeGraph(
56 self,
57 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
58 collections: Any,
59 run: str | None,
60 userQuery: str | None,
61 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL,
62 metadata: Mapping[str, Any] | None = None,
63 resolveRefs: bool = False,
64 bind: Mapping[str, Any] | None = None,
65 ) -> lsst.pipe.base.QuantumGraph:
66 pass
69class SeparablePipelineExecutor:
70 """An executor that allows each step of pipeline execution to be
71 run independently.
73 The executor can run any or all of the following steps:
75 * pre-execution initialization
76 * pipeline building
77 * quantum graph generation
78 * quantum graph execution
80 Any of these steps can also be handed off to external code without
81 compromising the remaining ones.
83 Parameters
84 ----------
85 butler : `lsst.daf.butler.Butler`
86 A Butler whose ``collections`` and ``run`` attributes contain the input
87 and output collections to use for processing.
88 clobber_output : `bool`, optional
89 If set, the pipeline execution overwrites existing output files.
90 Otherwise, any conflict between existing and new outputs is an error.
91 skip_existing_in : iterable [`str`], optional
92 If not empty, the pipeline execution searches the listed collections
93 for existing outputs, and skips any quanta that have run to completion
94 (or have no work to do). Otherwise, all tasks are attempted (subject
95 to ``clobber_output``).
96 task_factory : `lsst.pipe.base.TaskFactory`, optional
97 A custom task factory for use in pre-execution and execution. By
98 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
99 """
101 def __init__(
102 self,
103 butler: Butler,
104 clobber_output: bool = False,
105 skip_existing_in: Iterable[str] | None = None,
106 task_factory: lsst.pipe.base.TaskFactory | None = None,
107 ):
108 self._butler = Butler(butler=butler, collections=butler.collections, run=butler.run)
109 if not self._butler.collections:
110 raise ValueError("Butler must specify input collections for pipeline.")
111 if not self._butler.run:
112 raise ValueError("Butler must specify output run for pipeline.")
114 self._clobber_output = clobber_output
115 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
117 self._task_factory = task_factory if task_factory else TaskFactory()
119 def pre_execute_qgraph(
120 self,
121 graph: lsst.pipe.base.QuantumGraph,
122 register_dataset_types: bool = False,
123 save_init_outputs: bool = True,
124 save_versions: bool = True,
125 ) -> None:
126 """Run pre-execution initialization.
128 This method will be deprecated after DM-38041, to be replaced with a
129 method that takes either a `~lsst.pipe.base.Pipeline` or a
130 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
132 Parameters
133 ----------
134 graph : `lsst.pipe.base.QuantumGraph`
135 The quantum graph defining the pipeline and datasets to
136 be initialized.
137 register_dataset_types : `bool`, optional
138 If `True`, register all output dataset types from the pipeline
139 represented by ``graph``.
140 save_init_outputs : `bool`, optional
141 If `True`, create init-output datasets in this object's output run.
142 save_versions : `bool`, optional
143 If `True`, save a package versions dataset.
144 """
145 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
146 pre_exec_init.initialize(
147 graph=graph,
148 saveInitOutputs=save_init_outputs,
149 registerDatasetTypes=register_dataset_types,
150 saveVersions=save_versions,
151 )
153 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
154 """Build a pipeline from pipeline and configuration information.
156 Parameters
157 ----------
158 pipeline_uri : `str` or `lsst.resources.ResourcePath`
159 URI to a file containing a pipeline definition. A URI fragment may
160 be used to specify a subset of the pipeline, as described in
161 :ref:`pipeline-running-intro`.
163 Returns
164 -------
165 pipeline : `lsst.pipe.base.Pipeline`
166 The fully-built pipeline.
167 """
168 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
170 def make_quantum_graph(
171 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None
172 ) -> lsst.pipe.base.QuantumGraph:
173 """Build a quantum graph from a pipeline and input datasets.
175 Parameters
176 ----------
177 pipeline : `lsst.pipe.base.Pipeline`
178 The pipeline for which to generate a quantum graph.
179 where : `str`, optional
180 A data ID query that constrains the quanta generated.
181 builder : `lsst.pipe.base.GraphBuilder`-like, optional
182 A graph builder that implements a
183 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
184 instance of `lsst.pipe.base.GraphBuilder` is used.
186 Returns
187 -------
188 graph : `lsst.pipe.base.QuantumGraph`
189 The quantum graph for ``pipeline`` as run on the datasets
190 identified by ``where``.
192 Notes
193 -----
194 This method does no special handling of empty quantum graphs. If
195 needed, clients can use `len` to test if the returned graph is empty.
196 """
197 if not builder:
198 builder = lsst.pipe.base.GraphBuilder(
199 self._butler.registry,
200 skipExistingIn=self._skip_existing_in,
201 clobberOutputs=self._clobber_output,
202 )
204 metadata = {
205 "input": self._butler.collections,
206 "output_run": self._butler.run,
207 "skip_existing_in": self._skip_existing_in,
208 "skip_existing": bool(self._skip_existing_in),
209 "data_query": where,
210 "user": getpass.getuser(),
211 "time": str(datetime.datetime.now()),
212 }
213 graph = builder.makeGraph(
214 pipeline,
215 self._butler.collections,
216 self._butler.run,
217 userQuery=where,
218 metadata=metadata,
219 resolveRefs=True,
220 )
221 _LOG.info(
222 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
223 len(graph),
224 len(graph.taskGraph),
225 graph.graphID,
226 )
227 return graph
229 def run_pipeline(
230 self,
231 graph: lsst.pipe.base.QuantumGraph,
232 fail_fast: bool = False,
233 graph_executor: QuantumGraphExecutor | None = None,
234 ) -> None:
235 """Run a pipeline in the form of a prepared quantum graph.
237 Pre-execution initialization must have already been run;
238 see `pre_execute_qgraph`.
240 Parameters
241 ----------
242 graph : `lsst.pipe.base.QuantumGraph`
243 The pipeline and datasets to execute.
244 fail_fast : `bool`, optional
245 If `True`, abort all (parallel) execution if any task fails (only
246 used with the default graph executor).
247 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
248 A custom graph executor. By default, a new instance of
249 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
250 """
251 if not graph_executor:
252 quantum_executor = SingleQuantumExecutor(
253 self._butler,
254 self._task_factory,
255 skipExistingIn=self._skip_existing_in,
256 clobberOutputs=self._clobber_output,
257 )
258 graph_executor = MPGraphExecutor(
259 numProc=math.ceil(0.8 * multiprocessing.cpu_count()),
260 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
261 quantumExecutor=quantum_executor,
262 failFast=fail_fast,
263 )
264 # Have to reset connection pool to avoid sharing connections with
265 # forked processes.
266 self._butler.registry.resetConnectionPool()
268 graph_executor.execute(graph)