Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 45%
50 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 10:25 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 10:25 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from __future__ import annotations
25__all__ = [
26 "SeparablePipelineExecutor",
27]
30import datetime
31import getpass
32import logging
33import math
34import multiprocessing
35from typing import Any, Iterable, Mapping, Protocol
37import lsst.pipe.base
38import lsst.resources
39from lsst.daf.butler import Butler
41from .mpGraphExecutor import MPGraphExecutor
42from .preExecInit import PreExecInit
43from .quantumGraphExecutor import QuantumGraphExecutor
44from .singleQuantumExecutor import SingleQuantumExecutor
45from .taskFactory import TaskFactory
47_LOG = logging.getLogger(__name__)
50# Only way to keep black, flake8, and mypy all happy
51_dqc = lsst.pipe.base._datasetQueryConstraints
54class _GraphBuilderLike(Protocol):
55 def makeGraph(
56 self,
57 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
58 collections: Any,
59 run: str,
60 userQuery: str | None,
61 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL,
62 metadata: Mapping[str, Any] | None = None,
63 bind: Mapping[str, Any] | None = None,
64 ) -> lsst.pipe.base.QuantumGraph:
65 pass
68class SeparablePipelineExecutor:
69 """An executor that allows each step of pipeline execution to be
70 run independently.
72 The executor can run any or all of the following steps:
74 * pre-execution initialization
75 * pipeline building
76 * quantum graph generation
77 * quantum graph execution
79 Any of these steps can also be handed off to external code without
80 compromising the remaining ones.
82 Parameters
83 ----------
84 butler : `lsst.daf.butler.Butler`
85 A Butler whose ``collections`` and ``run`` attributes contain the input
86 and output collections to use for processing.
87 clobber_output : `bool`, optional
88 If set, the pipeline execution overwrites existing output files.
89 Otherwise, any conflict between existing and new outputs is an error.
90 skip_existing_in : iterable [`str`], optional
91 If not empty, the pipeline execution searches the listed collections
92 for existing outputs, and skips any quanta that have run to completion
93 (or have no work to do). Otherwise, all tasks are attempted (subject
94 to ``clobber_output``).
95 task_factory : `lsst.pipe.base.TaskFactory`, optional
96 A custom task factory for use in pre-execution and execution. By
97 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
98 """
100 def __init__(
101 self,
102 butler: Butler,
103 clobber_output: bool = False,
104 skip_existing_in: Iterable[str] | None = None,
105 task_factory: lsst.pipe.base.TaskFactory | None = None,
106 ):
107 self._butler = Butler(butler=butler, collections=butler.collections, run=butler.run)
108 if not self._butler.collections:
109 raise ValueError("Butler must specify input collections for pipeline.")
110 if not self._butler.run:
111 raise ValueError("Butler must specify output run for pipeline.")
113 self._clobber_output = clobber_output
114 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
116 self._task_factory = task_factory if task_factory else TaskFactory()
118 def pre_execute_qgraph(
119 self,
120 graph: lsst.pipe.base.QuantumGraph,
121 register_dataset_types: bool = False,
122 save_init_outputs: bool = True,
123 save_versions: bool = True,
124 ) -> None:
125 """Run pre-execution initialization.
127 This method will be deprecated after DM-38041, to be replaced with a
128 method that takes either a `~lsst.pipe.base.Pipeline` or a
129 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
131 Parameters
132 ----------
133 graph : `lsst.pipe.base.QuantumGraph`
134 The quantum graph defining the pipeline and datasets to
135 be initialized.
136 register_dataset_types : `bool`, optional
137 If `True`, register all output dataset types from the pipeline
138 represented by ``graph``.
139 save_init_outputs : `bool`, optional
140 If `True`, create init-output datasets in this object's output run.
141 save_versions : `bool`, optional
142 If `True`, save a package versions dataset.
143 """
144 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
145 pre_exec_init.initialize(
146 graph=graph,
147 saveInitOutputs=save_init_outputs,
148 registerDatasetTypes=register_dataset_types,
149 saveVersions=save_versions,
150 )
152 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
153 """Build a pipeline from pipeline and configuration information.
155 Parameters
156 ----------
157 pipeline_uri : `str` or `lsst.resources.ResourcePath`
158 URI to a file containing a pipeline definition. A URI fragment may
159 be used to specify a subset of the pipeline, as described in
160 :ref:`pipeline-running-intro`.
162 Returns
163 -------
164 pipeline : `lsst.pipe.base.Pipeline`
165 The fully-built pipeline.
166 """
167 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
169 def make_quantum_graph(
170 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None
171 ) -> lsst.pipe.base.QuantumGraph:
172 """Build a quantum graph from a pipeline and input datasets.
174 Parameters
175 ----------
176 pipeline : `lsst.pipe.base.Pipeline`
177 The pipeline for which to generate a quantum graph.
178 where : `str`, optional
179 A data ID query that constrains the quanta generated.
180 builder : `lsst.pipe.base.GraphBuilder`-like, optional
181 A graph builder that implements a
182 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
183 instance of `lsst.pipe.base.GraphBuilder` is used.
185 Returns
186 -------
187 graph : `lsst.pipe.base.QuantumGraph`
188 The quantum graph for ``pipeline`` as run on the datasets
189 identified by ``where``.
191 Notes
192 -----
193 This method does no special handling of empty quantum graphs. If
194 needed, clients can use `len` to test if the returned graph is empty.
195 """
196 if not builder:
197 builder = lsst.pipe.base.GraphBuilder(
198 self._butler.registry,
199 skipExistingIn=self._skip_existing_in,
200 clobberOutputs=self._clobber_output,
201 )
203 metadata = {
204 "input": self._butler.collections,
205 "output_run": self._butler.run,
206 "skip_existing_in": self._skip_existing_in,
207 "skip_existing": bool(self._skip_existing_in),
208 "data_query": where,
209 "user": getpass.getuser(),
210 "time": str(datetime.datetime.now()),
211 }
212 assert self._butler.run is not None, "Butler output run collection must be defined"
213 graph = builder.makeGraph(
214 pipeline,
215 self._butler.collections,
216 self._butler.run,
217 userQuery=where,
218 metadata=metadata,
219 )
220 _LOG.info(
221 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
222 len(graph),
223 len(graph.taskGraph),
224 graph.graphID,
225 )
226 return graph
228 def run_pipeline(
229 self,
230 graph: lsst.pipe.base.QuantumGraph,
231 fail_fast: bool = False,
232 graph_executor: QuantumGraphExecutor | None = None,
233 ) -> None:
234 """Run a pipeline in the form of a prepared quantum graph.
236 Pre-execution initialization must have already been run;
237 see `pre_execute_qgraph`.
239 Parameters
240 ----------
241 graph : `lsst.pipe.base.QuantumGraph`
242 The pipeline and datasets to execute.
243 fail_fast : `bool`, optional
244 If `True`, abort all (parallel) execution if any task fails (only
245 used with the default graph executor).
246 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
247 A custom graph executor. By default, a new instance of
248 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
249 """
250 if not graph_executor:
251 quantum_executor = SingleQuantumExecutor(
252 self._butler,
253 self._task_factory,
254 skipExistingIn=self._skip_existing_in,
255 clobberOutputs=self._clobber_output,
256 )
257 graph_executor = MPGraphExecutor(
258 numProc=math.ceil(0.8 * multiprocessing.cpu_count()),
259 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
260 quantumExecutor=quantum_executor,
261 failFast=fail_fast,
262 )
263 # Have to reset connection pool to avoid sharing connections with
264 # forked processes.
265 self._butler.registry.resetConnectionPool()
267 graph_executor.execute(graph)