Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 45%
52 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:56 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:56 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from __future__ import annotations
25__all__ = [
26 "SeparablePipelineExecutor",
27]
30import datetime
31import getpass
32import logging
33import math
34import multiprocessing
35from collections.abc import Iterable, Mapping
36from typing import Any, Protocol
38import lsst.pipe.base
39import lsst.resources
40from lsst.daf.butler import Butler
42from .mpGraphExecutor import MPGraphExecutor
43from .preExecInit import PreExecInit
44from .quantumGraphExecutor import QuantumGraphExecutor
45from .singleQuantumExecutor import SingleQuantumExecutor
46from .taskFactory import TaskFactory
48_LOG = logging.getLogger(__name__)
51# Only way to keep black, flake8, and mypy all happy
52_dqc = lsst.pipe.base._datasetQueryConstraints
55class _GraphBuilderLike(Protocol):
56 def makeGraph(
57 self,
58 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
59 collections: Any,
60 run: str,
61 userQuery: str | None,
62 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL,
63 metadata: Mapping[str, Any] | None = None,
64 bind: Mapping[str, Any] | None = None,
65 ) -> lsst.pipe.base.QuantumGraph:
66 pass
69class SeparablePipelineExecutor:
70 """An executor that allows each step of pipeline execution to be
71 run independently.
73 The executor can run any or all of the following steps:
75 * pre-execution initialization
76 * pipeline building
77 * quantum graph generation
78 * quantum graph execution
80 Any of these steps can also be handed off to external code without
81 compromising the remaining ones.
83 Parameters
84 ----------
85 butler : `lsst.daf.butler.Butler`
86 A Butler whose ``collections`` and ``run`` attributes contain the input
87 and output collections to use for processing.
88 clobber_output : `bool`, optional
89 If set, the pipeline execution overwrites existing output files.
90 Otherwise, any conflict between existing and new outputs is an error.
91 skip_existing_in : iterable [`str`], optional
92 If not empty, the pipeline execution searches the listed collections
93 for existing outputs, and skips any quanta that have run to completion
94 (or have no work to do). Otherwise, all tasks are attempted (subject
95 to ``clobber_output``).
96 task_factory : `lsst.pipe.base.TaskFactory`, optional
97 A custom task factory for use in pre-execution and execution. By
98 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
99 resources : `~lsst.pipe.base.ExecutionResources`
100 The resources available to each quantum being executed.
101 """
103 def __init__(
104 self,
105 butler: Butler,
106 clobber_output: bool = False,
107 skip_existing_in: Iterable[str] | None = None,
108 task_factory: lsst.pipe.base.TaskFactory | None = None,
109 resources: lsst.pipe.base.ExecutionResources | None = None,
110 ):
111 self._butler = Butler(butler=butler, collections=butler.collections, run=butler.run)
112 if not self._butler.collections:
113 raise ValueError("Butler must specify input collections for pipeline.")
114 if not self._butler.run:
115 raise ValueError("Butler must specify output run for pipeline.")
117 self._clobber_output = clobber_output
118 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
120 self._task_factory = task_factory if task_factory else TaskFactory()
121 self.resources = resources
123 def pre_execute_qgraph(
124 self,
125 graph: lsst.pipe.base.QuantumGraph,
126 register_dataset_types: bool = False,
127 save_init_outputs: bool = True,
128 save_versions: bool = True,
129 ) -> None:
130 """Run pre-execution initialization.
132 This method will be deprecated after DM-38041, to be replaced with a
133 method that takes either a `~lsst.pipe.base.Pipeline` or a
134 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
136 Parameters
137 ----------
138 graph : `lsst.pipe.base.QuantumGraph`
139 The quantum graph defining the pipeline and datasets to
140 be initialized.
141 register_dataset_types : `bool`, optional
142 If `True`, register all output dataset types from the pipeline
143 represented by ``graph``.
144 save_init_outputs : `bool`, optional
145 If `True`, create init-output datasets in this object's output run.
146 save_versions : `bool`, optional
147 If `True`, save a package versions dataset.
148 """
149 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
150 pre_exec_init.initialize(
151 graph=graph,
152 saveInitOutputs=save_init_outputs,
153 registerDatasetTypes=register_dataset_types,
154 saveVersions=save_versions,
155 )
157 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
158 """Build a pipeline from pipeline and configuration information.
160 Parameters
161 ----------
162 pipeline_uri : `str` or `lsst.resources.ResourcePath`
163 URI to a file containing a pipeline definition. A URI fragment may
164 be used to specify a subset of the pipeline, as described in
165 :ref:`pipeline-running-intro`.
167 Returns
168 -------
169 pipeline : `lsst.pipe.base.Pipeline`
170 The fully-built pipeline.
171 """
172 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
174 def make_quantum_graph(
175 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None
176 ) -> lsst.pipe.base.QuantumGraph:
177 """Build a quantum graph from a pipeline and input datasets.
179 Parameters
180 ----------
181 pipeline : `lsst.pipe.base.Pipeline`
182 The pipeline for which to generate a quantum graph.
183 where : `str`, optional
184 A data ID query that constrains the quanta generated.
185 builder : `lsst.pipe.base.GraphBuilder`-like, optional
186 A graph builder that implements a
187 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
188 instance of `lsst.pipe.base.GraphBuilder` is used.
190 Returns
191 -------
192 graph : `lsst.pipe.base.QuantumGraph`
193 The quantum graph for ``pipeline`` as run on the datasets
194 identified by ``where``.
196 Notes
197 -----
198 This method does no special handling of empty quantum graphs. If
199 needed, clients can use `len` to test if the returned graph is empty.
200 """
201 if not builder:
202 builder = lsst.pipe.base.GraphBuilder(
203 self._butler.registry,
204 skipExistingIn=self._skip_existing_in,
205 clobberOutputs=self._clobber_output,
206 )
208 metadata = {
209 "input": self._butler.collections,
210 "output_run": self._butler.run,
211 "skip_existing_in": self._skip_existing_in,
212 "skip_existing": bool(self._skip_existing_in),
213 "data_query": where,
214 "user": getpass.getuser(),
215 "time": str(datetime.datetime.now()),
216 }
217 assert self._butler.run is not None, "Butler output run collection must be defined"
218 graph = builder.makeGraph(
219 pipeline,
220 self._butler.collections,
221 self._butler.run,
222 userQuery=where,
223 metadata=metadata,
224 )
225 _LOG.info(
226 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
227 len(graph),
228 len(graph.taskGraph),
229 graph.graphID,
230 )
231 return graph
233 def run_pipeline(
234 self,
235 graph: lsst.pipe.base.QuantumGraph,
236 fail_fast: bool = False,
237 graph_executor: QuantumGraphExecutor | None = None,
238 ) -> None:
239 """Run a pipeline in the form of a prepared quantum graph.
241 Pre-execution initialization must have already been run;
242 see `pre_execute_qgraph`.
244 Parameters
245 ----------
246 graph : `lsst.pipe.base.QuantumGraph`
247 The pipeline and datasets to execute.
248 fail_fast : `bool`, optional
249 If `True`, abort all (parallel) execution if any task fails (only
250 used with the default graph executor).
251 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
252 A custom graph executor. By default, a new instance of
253 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
254 """
255 if not graph_executor:
256 quantum_executor = SingleQuantumExecutor(
257 self._butler,
258 self._task_factory,
259 skipExistingIn=self._skip_existing_in,
260 clobberOutputs=self._clobber_output,
261 resources=self.resources,
262 )
263 graph_executor = MPGraphExecutor(
264 numProc=math.ceil(0.8 * multiprocessing.cpu_count()),
265 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
266 quantumExecutor=quantum_executor,
267 failFast=fail_fast,
268 )
269 # Have to reset connection pool to avoid sharing connections with
270 # forked processes.
271 self._butler.registry.resetConnectionPool()
273 graph_executor.execute(graph)