Coverage for python/lsst/ctrl/mpexec/separablePipelineExecutor.py: 42%
57 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:29 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:29 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
29from __future__ import annotations
31__all__ = [
32 "SeparablePipelineExecutor",
33]
36import datetime
37import getpass
38import logging
39import warnings
40from collections.abc import Iterable, Mapping
41from typing import Any, Protocol
43import lsst.pipe.base
44import lsst.resources
45from lsst.daf.butler import Butler
46from lsst.pipe.base.all_dimensions_quantum_graph_builder import (
47 AllDimensionsQuantumGraphBuilder,
48 DatasetQueryConstraintVariant,
49)
50from lsst.pipe.base.quantum_graph_builder import QuantumGraphBuilder
51from lsst.utils.introspection import find_outside_stacklevel
53from .mpGraphExecutor import MPGraphExecutor
54from .preExecInit import PreExecInit
55from .quantumGraphExecutor import QuantumGraphExecutor
56from .singleQuantumExecutor import SingleQuantumExecutor
57from .taskFactory import TaskFactory
59_LOG = logging.getLogger(__name__)
62class _GraphBuilderLike(Protocol):
63 def makeGraph(
64 self,
65 pipeline: lsst.pipe.base.Pipeline | Iterable[lsst.pipe.base.pipeline.TaskDef],
66 collections: Any,
67 run: str,
68 userQuery: str | None,
69 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL,
70 metadata: Mapping[str, Any] | None = None,
71 bind: Mapping[str, Any] | None = None,
72 ) -> lsst.pipe.base.QuantumGraph:
73 pass
76class SeparablePipelineExecutor:
77 """An executor that allows each step of pipeline execution to be
78 run independently.
80 The executor can run any or all of the following steps:
82 * pre-execution initialization
83 * pipeline building
84 * quantum graph generation
85 * quantum graph execution
87 Any of these steps can also be handed off to external code without
88 compromising the remaining ones.
90 Parameters
91 ----------
92 butler : `lsst.daf.butler.Butler`
93 A Butler whose ``collections`` and ``run`` attributes contain the input
94 and output collections to use for processing.
95 clobber_output : `bool`, optional
96 If set, the pipeline execution overwrites existing output files.
97 Otherwise, any conflict between existing and new outputs is an error.
98 skip_existing_in : iterable [`str`], optional
99 If not empty, the pipeline execution searches the listed collections
100 for existing outputs, and skips any quanta that have run to completion
101 (or have no work to do). Otherwise, all tasks are attempted (subject
102 to ``clobber_output``).
103 task_factory : `lsst.pipe.base.TaskFactory`, optional
104 A custom task factory for use in pre-execution and execution. By
105 default, a new instance of `lsst.ctrl.mpexec.TaskFactory` is used.
106 resources : `~lsst.pipe.base.ExecutionResources`
107 The resources available to each quantum being executed.
108 """
110 def __init__(
111 self,
112 butler: Butler,
113 clobber_output: bool = False,
114 skip_existing_in: Iterable[str] | None = None,
115 task_factory: lsst.pipe.base.TaskFactory | None = None,
116 resources: lsst.pipe.base.ExecutionResources | None = None,
117 ):
118 self._butler = Butler.from_config(butler=butler, collections=butler.collections, run=butler.run)
119 if not self._butler.collections:
120 raise ValueError("Butler must specify input collections for pipeline.")
121 if not self._butler.run:
122 raise ValueError("Butler must specify output run for pipeline.")
124 self._clobber_output = clobber_output
125 self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
127 self._task_factory = task_factory if task_factory else TaskFactory()
128 self.resources = resources
130 def pre_execute_qgraph(
131 self,
132 graph: lsst.pipe.base.QuantumGraph,
133 register_dataset_types: bool = False,
134 save_init_outputs: bool = True,
135 save_versions: bool = True,
136 ) -> None:
137 """Run pre-execution initialization.
139 This method will be deprecated after DM-38041, to be replaced with a
140 method that takes either a `~lsst.pipe.base.Pipeline` or a
141 ``ResolvedPipelineGraph`` instead of a `~lsst.pipe.base.QuantumGraph`.
143 Parameters
144 ----------
145 graph : `lsst.pipe.base.QuantumGraph`
146 The quantum graph defining the pipeline and datasets to
147 be initialized.
148 register_dataset_types : `bool`, optional
149 If `True`, register all output dataset types from the pipeline
150 represented by ``graph``.
151 save_init_outputs : `bool`, optional
152 If `True`, create init-output datasets in this object's output run.
153 save_versions : `bool`, optional
154 If `True`, save a package versions dataset.
155 """
156 pre_exec_init = PreExecInit(self._butler, self._task_factory, extendRun=self._clobber_output)
157 pre_exec_init.initialize(
158 graph=graph,
159 saveInitOutputs=save_init_outputs,
160 registerDatasetTypes=register_dataset_types,
161 saveVersions=save_versions,
162 )
164 def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> lsst.pipe.base.Pipeline:
165 """Build a pipeline from pipeline and configuration information.
167 Parameters
168 ----------
169 pipeline_uri : `str` or `lsst.resources.ResourcePath`
170 URI to a file containing a pipeline definition. A URI fragment may
171 be used to specify a subset of the pipeline, as described in
172 :ref:`pipeline-running-intro`.
174 Returns
175 -------
176 pipeline : `lsst.pipe.base.Pipeline`
177 The fully-built pipeline.
178 """
179 return lsst.pipe.base.Pipeline.from_uri(pipeline_uri)
181 def make_quantum_graph(
182 self,
183 pipeline: lsst.pipe.base.Pipeline,
184 where: str = "",
185 builder: _GraphBuilderLike | None = None,
186 *,
187 builder_class: type[QuantumGraphBuilder] = AllDimensionsQuantumGraphBuilder,
188 attach_datastore_records: bool = False,
189 **kwargs: Any,
190 ) -> lsst.pipe.base.QuantumGraph:
191 """Build a quantum graph from a pipeline and input datasets.
193 Parameters
194 ----------
195 pipeline : `lsst.pipe.base.Pipeline`
196 The pipeline for which to generate a quantum graph.
197 where : `str`, optional
198 A data ID query that constrains the quanta generated. Must not be
199 provided if a custom ``builder_class`` is given and that class does
200 not accept ``where`` as a construction argument.
201 builder : `lsst.pipe.base.GraphBuilder`-like, optional
202 A graph builder that implements a
203 `~lsst.pipe.base.GraphBuilder.makeGraph` method. By default, a new
204 instance of `lsst.pipe.base.GraphBuilder` is used. Deprecated in
205 favor of ``builder_class`` and will be removed after v27.
206 builder_class : `type` [ \
207 `lsst.pipe.base.quantum_graph_builder.QuantumGraphBuilder` ], \
208 optional
209 Quantum graph builder implementation. Ignored if ``builder`` is
210 provided.
211 attach_datastore_records : `bool`, optional
212 Whether to attach datastore records. These are currently used only
213 by `lsst.daf.butler.QuantumBackedButler`, which is not used by
214 `SeparablePipelineExecutor` for execution.
215 **kwargs
216 Additional keyword arguments are forwarded to ``builder_class``
217 when a quantum graph builder instance is constructed. All
218 arguments accepted by the
219 `~lsst.pipe.base.quantum_graph_builder.QuantumGraphBuilder` base
220 class are provided automatically (from explicit arguments to this
221 method and executor attributes) and do not need to be included
222 as keyword arguments.
224 Returns
225 -------
226 graph : `lsst.pipe.base.QuantumGraph`
227 The quantum graph for ``pipeline`` as run on the datasets
228 identified by ``where``.
230 Notes
231 -----
232 This method does no special handling of empty quantum graphs. If
233 needed, clients can use `len` to test if the returned graph is empty.
234 """
235 metadata = {
236 "input": self._butler.collections,
237 "output_run": self._butler.run,
238 "skip_existing_in": self._skip_existing_in,
239 "skip_existing": bool(self._skip_existing_in),
240 "data_query": where,
241 "user": getpass.getuser(),
242 "time": str(datetime.datetime.now()),
243 }
244 if builder:
245 warnings.warn(
246 "The 'builder' argument to SeparablePipelineBuilder.make_quantum_graph "
247 "is deprecated in favor of 'builder_class', and will be removed after v27.",
248 FutureWarning,
249 find_outside_stacklevel("lsst.ctrl.mpexec"),
250 )
251 assert self._butler.run is not None, "Butler output run collection must be defined"
252 graph = builder.makeGraph(
253 pipeline,
254 self._butler.collections,
255 self._butler.run,
256 userQuery=where,
257 metadata=metadata,
258 )
259 else:
260 if where:
261 # Only pass 'where' if it's actually provided, since some
262 # QuantumGraphBuilder subclasses may not accept it.
263 kwargs["where"] = where
264 qg_builder = builder_class(
265 pipeline.to_graph(),
266 self._butler,
267 skip_existing_in=self._skip_existing_in,
268 clobber=self._clobber_output,
269 **kwargs,
270 )
271 graph = qg_builder.build(metadata=metadata, attach_datastore_records=attach_datastore_records)
272 _LOG.info(
273 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
274 len(graph),
275 len(graph.taskGraph),
276 graph.graphID,
277 )
278 return graph
280 def run_pipeline(
281 self,
282 graph: lsst.pipe.base.QuantumGraph,
283 fail_fast: bool = False,
284 graph_executor: QuantumGraphExecutor | None = None,
285 num_proc: int = 1,
286 ) -> None:
287 """Run a pipeline in the form of a prepared quantum graph.
289 Pre-execution initialization must have already been run;
290 see `pre_execute_qgraph`.
292 Parameters
293 ----------
294 graph : `lsst.pipe.base.QuantumGraph`
295 The pipeline and datasets to execute.
296 fail_fast : `bool`, optional
297 If `True`, abort all execution if any task fails when
298 running with multiple processes. Only used with the default graph
299 executor).
300 graph_executor : `lsst.ctrl.mpexec.QuantumGraphExecutor`, optional
301 A custom graph executor. By default, a new instance of
302 `lsst.ctrl.mpexec.MPGraphExecutor` is used.
303 num_proc : `int`, optional
304 The number of processes that can be used to run the pipeline. The
305 default value ensures that no subprocess is created. Only used with
306 the default graph executor.
307 """
308 if not graph_executor:
309 quantum_executor = SingleQuantumExecutor(
310 self._butler,
311 self._task_factory,
312 skipExistingIn=self._skip_existing_in,
313 clobberOutputs=self._clobber_output,
314 resources=self.resources,
315 )
316 graph_executor = MPGraphExecutor(
317 numProc=num_proc,
318 timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
319 quantumExecutor=quantum_executor,
320 failFast=fail_fast,
321 )
322 # Have to reset connection pool to avoid sharing connections with
323 # forked processes.
324 self._butler.registry.resetConnectionPool()
326 graph_executor.execute(graph)