Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 45%
52 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:53 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:53 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
29from __future__ import annotations
31__all__ = [
32 "SeparablePipelineExecutor",
33]
36import datetime
37import getpass
38import logging
39import math
40import multiprocessing
41from collections.abc import Iterable, Mapping
42from typing import Any, Protocol
44import lsst.pipe.base
45import lsst.resources
46from lsst.daf.butler import Butler
48from .mpGraphExecutor import MPGraphExecutor
49from .preExecInit import PreExecInit
50from .quantumGraphExecutor import QuantumGraphExecutor
51from .singleQuantumExecutor import SingleQuantumExecutor
52from .taskFactory import TaskFactory
54_LOG = logging.getLogger(__name__)
57# Only way to keep black, flake8, and mypy all happy
58_dqc = lsst.pipe.base._datasetQueryConstraints
61class _GraphBuilderLike(Protocol):
62 def makeGraph(
63 self,
64 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
65 collections: Any,
66 run: str,
67 userQuery: str | None,
68 datasetQueryConstraint: _dqc.DatasetQueryConstraintVariant = _dqc._ALL,
69 metadata: Mapping[str, Any] | None = None,
70 bind: Mapping[str, Any] | None = None,
71 ) -> lsst.pipe.base.QuantumGraph:
72 pass
75class SeparablePipelineExecutor:
76 """An executor that allows each step of pipeline execution to be
77 run independently.
79 The executor can run any or all of the following steps:
81 * pre-execution initialization
82 * pipeline building
83 * quantum graph generation
84 * quantum graph execution
86 Any of these steps can also be handed off to external code without
87 compromising the remaining ones.
89 Parameters
90 ----------
91 butler : `lsst.daf.butler.Butler`
92 A Butler whose ``collections`` and ``run`` attributes contain the input
93 and output collections to use for processing.
94 clobber_output : `bool`, optional
95 If set, the pipeline execution overwrites existing output files.
96 Otherwise, any conflict between existing and new outputs is an error.
97 skip_existing_in : iterable [`str`], optional
98 If not empty, the pipeline execution searches the listed collections
99 for existing outputs, and skips any quanta that have run to completion
100 (or have no work to do). Otherwise, all tasks are attempted (subject
101 to ``clobber_output``).
102 task_factory : `lsst.pipe.base.TaskFactory`, optional
103 A custom task factory for use in pre-execution and execution. By
104 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
105 resources : `~lsst.pipe.base.ExecutionResources`
106 The resources available to each quantum being executed.
107 """
109 def __init__(
110 self,
111 butler: Butler,
112 clobber_output: bool = False,
113 skip_existing_in: Iterable[str] | None = None,
114 task_factory: lsst.pipe.base.TaskFactory | None = None,
115 resources: lsst.pipe.base.ExecutionResources | None = None,
116 ):
117 self._butler = Butler(butler=butler, collections=butler.collections, run=butler.run)
118 if not self._butler.collections:
119 raise ValueError("Butler must specify input collections for pipeline.")
120 if not self._butler.run:
121 raise ValueError("Butler must specify output run for pipeline.")
123 self._clobber_output = clobber_output
124 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
126 self._task_factory = task_factory if task_factory else TaskFactory()
127 self.resources = resources
129 def pre_execute_qgraph(
130 self,
131 graph: lsst.pipe.base.QuantumGraph,
132 register_dataset_types: bool = False,
133 save_init_outputs: bool = True,
134 save_versions: bool = True,
135 ) -> None:
136 """Run pre-execution initialization.
138 This method will be deprecated after DM-38041, to be replaced with a
139 method that takes either a `~lsst.pipe.base.Pipeline` or a
140 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
142 Parameters
143 ----------
144 graph : `lsst.pipe.base.QuantumGraph`
145 The quantum graph defining the pipeline and datasets to
146 be initialized.
147 register_dataset_types : `bool`, optional
148 If `True`, register all output dataset types from the pipeline
149 represented by ``graph``.
150 save_init_outputs : `bool`, optional
151 If `True`, create init-output datasets in this object's output run.
152 save_versions : `bool`, optional
153 If `True`, save a package versions dataset.
154 """
155 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
156 pre_exec_init.initialize(
157 graph=graph,
158 saveInitOutputs=save_init_outputs,
159 registerDatasetTypes=register_dataset_types,
160 saveVersions=save_versions,
161 )
163 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
164 """Build a pipeline from pipeline and configuration information.
166 Parameters
167 ----------
168 pipeline_uri : `str` or `lsst.resources.ResourcePath`
169 URI to a file containing a pipeline definition. A URI fragment may
170 be used to specify a subset of the pipeline, as described in
171 :ref:`pipeline-running-intro`.
173 Returns
174 -------
175 pipeline : `lsst.pipe.base.Pipeline`
176 The fully-built pipeline.
177 """
178 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
180 def make_quantum_graph(
181 self, pipeline: lsst.pipe.base.Pipeline, where: str = "", builder: _GraphBuilderLike | None = None
182 ) -> lsst.pipe.base.QuantumGraph:
183 """Build a quantum graph from a pipeline and input datasets.
185 Parameters
186 ----------
187 pipeline : `lsst.pipe.base.Pipeline`
188 The pipeline for which to generate a quantum graph.
189 where : `str`, optional
190 A data ID query that constrains the quanta generated.
191 builder : `lsst.pipe.base.GraphBuilder`-like, optional
192 A graph builder that implements a
193 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
194 instance of `lsst.pipe.base.GraphBuilder` is used.
196 Returns
197 -------
198 graph : `lsst.pipe.base.QuantumGraph`
199 The quantum graph for ``pipeline`` as run on the datasets
200 identified by ``where``.
202 Notes
203 -----
204 This method does no special handling of empty quantum graphs. If
205 needed, clients can use `len` to test if the returned graph is empty.
206 """
207 if not builder:
208 builder = lsst.pipe.base.GraphBuilder(
209 self._butler.registry,
210 skipExistingIn=self._skip_existing_in,
211 clobberOutputs=self._clobber_output,
212 )
214 metadata = {
215 "input": self._butler.collections,
216 "output_run": self._butler.run,
217 "skip_existing_in": self._skip_existing_in,
218 "skip_existing": bool(self._skip_existing_in),
219 "data_query": where,
220 "user": getpass.getuser(),
221 "time": str(datetime.datetime.now()),
222 }
223 assert self._butler.run is not None, "Butler output run collection must be defined"
224 graph = builder.makeGraph(
225 pipeline,
226 self._butler.collections,
227 self._butler.run,
228 userQuery=where,
229 metadata=metadata,
230 )
231 _LOG.info(
232 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
233 len(graph),
234 len(graph.taskGraph),
235 graph.graphID,
236 )
237 return graph
239 def run_pipeline(
240 self,
241 graph: lsst.pipe.base.QuantumGraph,
242 fail_fast: bool = False,
243 graph_executor: QuantumGraphExecutor | None = None,
244 ) -> None:
245 """Run a pipeline in the form of a prepared quantum graph.
247 Pre-execution initialization must have already been run;
248 see `pre_execute_qgraph`.
250 Parameters
251 ----------
252 graph : `lsst.pipe.base.QuantumGraph`
253 The pipeline and datasets to execute.
254 fail_fast : `bool`, optional
255 If `True`, abort all (parallel) execution if any task fails (only
256 used with the default graph executor).
257 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
258 A custom graph executor. By default, a new instance of
259 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
260 """
261 if not graph_executor:
262 quantum_executor = SingleQuantumExecutor(
263 self._butler,
264 self._task_factory,
265 skipExistingIn=self._skip_existing_in,
266 clobberOutputs=self._clobber_output,
267 resources=self.resources,
268 )
269 graph_executor = MPGraphExecutor(
270 numProc=math.ceil(0.8 * multiprocessing.cpu_count()),
271 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
272 quantumExecutor=quantum_executor,
273 failFast=fail_fast,
274 )
275 # Have to reset connection pool to avoid sharing connections with
276 # forked processes.
277 self._butler.registry.resetConnectionPool()
279 graph_executor.execute(graph)