Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 39%
57 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("SimplePipelineExecutor",)
32from collections.abc import Iterable, Iterator, Mapping
33from typing import Any
35from lsst.daf.butler import Butler, CollectionType, Quantum
36from lsst.pex.config import Config
37from lsst.pipe.base import (
38 ExecutionResources,
39 GraphBuilder,
40 Instrument,
41 Pipeline,
42 PipelineTask,
43 QuantumGraph,
44 TaskDef,
45)
47from .preExecInit import PreExecInit
48from .singleQuantumExecutor import SingleQuantumExecutor
49from .taskFactory import TaskFactory
52class SimplePipelineExecutor:
53 """A simple, high-level executor for pipelines.
55 Parameters
56 ----------
57 quantum_graph : `~lsst.pipe.base.QuantumGraph`
58 Graph to be executed.
59 butler : `~lsst.daf.butler.Butler`
60 Object that manages all I/O. Must be initialized with `collections`
61 and `run` properties that correspond to the input and output
62 collections, which must be consistent with those used to create
63 ``quantum_graph``.
64 resources : `~lsst.pipe.base.ExecutionResources`
65 The resources available to each quantum being executed.
67 Notes
68 -----
69 Most callers should use one of the `classmethod` factory functions
70 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
71 invoking the constructor directly; these guarantee that the
72 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created
73 consistently.
75 This class is intended primarily to support unit testing and small-scale
76 integration testing of `~lsst.pipe.base.PipelineTask` classes. It
77 deliberately lacks many features present in the command-line-only
78 ``pipetask`` tool in order to keep the implementation simple. Python
79 callers that need more sophistication should call lower-level tools like
80 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor`
81 directly.
82 """
84 def __init__(
85 self,
86 quantum_graph: QuantumGraph,
87 butler: Butler,
88 resources: ExecutionResources | None = None,
89 ):
90 self.quantum_graph = quantum_graph
91 self.butler = butler
92 self.resources = resources
94 @classmethod
95 def prep_butler(
96 cls,
97 root: str,
98 inputs: Iterable[str],
99 output: str,
100 output_run: str | None = None,
101 ) -> Butler:
102 """Return configured `~lsst.daf.butler.Butler`.
104 Helper method for creating `~lsst.daf.butler.Butler` instances with
105 collections appropriate for processing.
107 Parameters
108 ----------
109 root : `str`
110 Root of the butler data repository; must already exist, with all
111 necessary input data.
112 inputs : `~collections.abc.Iterable` [ `str` ]
113 Collections to search for all input datasets, in search order.
114 output : `str`
115 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
116 collection to create that will combine both inputs and outputs.
117 output_run : `str`, optional
118 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
119 directly hold all output datasets. If not provided, a name will
120 be created from ``output`` and a timestamp.
122 Returns
123 -------
124 butler : `~lsst.daf.butler.Butler`
125 Butler client instance compatible with all `classmethod` factories.
126 Always writeable.
127 """
128 if output_run is None:
129 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
130 # Make initial butler with no collections, since we haven't created
131 # them yet.
132 butler = Butler.from_config(root, writeable=True)
133 butler.registry.registerCollection(output_run, CollectionType.RUN)
134 butler.registry.registerCollection(output, CollectionType.CHAINED)
135 collections = [output_run]
136 collections.extend(inputs)
137 butler.registry.setCollectionChain(output, collections)
138 # Remake butler to let it infer default data IDs from collections, now
139 # that those collections exist.
140 return Butler.from_config(butler=butler, collections=[output], run=output_run)
142 @classmethod
143 def from_pipeline_filename(
144 cls,
145 pipeline_filename: str,
146 *,
147 where: str = "",
148 bind: Mapping[str, Any] | None = None,
149 butler: Butler,
150 resources: ExecutionResources | None = None,
151 ) -> SimplePipelineExecutor:
152 """Create an executor by building a QuantumGraph from an on-disk
153 pipeline YAML file.
155 Parameters
156 ----------
157 pipeline_filename : `str`
158 Name of the YAML file to load the pipeline definition from.
159 where : `str`, optional
160 Data ID query expression that constraints the quanta generated.
161 bind : `~collections.abc.Mapping`, optional
162 Mapping containing literal values that should be injected into the
163 ``where`` expression, keyed by the identifiers they replace.
164 butler : `~lsst.daf.butler.Butler`
165 Butler that manages all I/O. `prep_butler` can be used to create
166 one.
167 resources : `~lsst.pipe.base.ExecutionResources`
168 The resources available to each quantum being executed.
170 Returns
171 -------
172 executor : `SimplePipelineExecutor`
173 An executor instance containing the constructed
174 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
175 ready for `run` to be called.
176 """
177 pipeline = Pipeline.fromFile(pipeline_filename)
178 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources)
180 @classmethod
181 def from_task_class(
182 cls,
183 task_class: type[PipelineTask],
184 config: Config | None = None,
185 label: str | None = None,
186 *,
187 where: str = "",
188 bind: Mapping[str, Any] | None = None,
189 butler: Butler,
190 resources: ExecutionResources | None = None,
191 ) -> SimplePipelineExecutor:
192 """Create an executor by building a QuantumGraph from a pipeline
193 containing a single task.
195 Parameters
196 ----------
197 task_class : `type`
198 A concrete `~lsst.pipe.base.PipelineTask` subclass.
199 config : `~lsst.pex.config.Config`, optional
200 Configuration for the task. If not provided, task-level defaults
201 will be used (no per-instrument overrides).
202 label : `str`, optional
203 Label for the task in its pipeline; defaults to
204 ``task_class._DefaultName``.
205 where : `str`, optional
206 Data ID query expression that constraints the quanta generated.
207 bind : `~collections.abc.Mapping`, optional
208 Mapping containing literal values that should be injected into the
209 ``where`` expression, keyed by the identifiers they replace.
210 butler : `~lsst.daf.butler.Butler`
211 Butler that manages all I/O. `prep_butler` can be used to create
212 one.
213 resources : `~lsst.pipe.base.ExecutionResources`
214 The resources available to each quantum being executed.
216 Returns
217 -------
218 executor : `SimplePipelineExecutor`
219 An executor instance containing the constructed
220 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
221 ready for `run` to be called.
222 """
223 if config is None:
224 config = task_class.ConfigClass()
225 if label is None:
226 label = task_class._DefaultName
227 if not isinstance(config, task_class.ConfigClass):
228 raise TypeError(
229 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
230 f"got {type(config).__name__}."
231 )
232 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
233 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind, resources=resources)
235 @classmethod
236 def from_pipeline(
237 cls,
238 pipeline: Pipeline | Iterable[TaskDef],
239 *,
240 where: str = "",
241 bind: Mapping[str, Any] | None = None,
242 butler: Butler,
243 resources: ExecutionResources | None = None,
244 **kwargs: Any,
245 ) -> SimplePipelineExecutor:
246 """Create an executor by building a QuantumGraph from an in-memory
247 pipeline.
249 Parameters
250 ----------
251 pipeline : `~lsst.pipe.base.Pipeline` or \
252 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ]
253 A Python object describing the tasks to run, along with their
254 labels and configuration.
255 where : `str`, optional
256 Data ID query expression that constraints the quanta generated.
257 bind : `~collections.abc.Mapping`, optional
258 Mapping containing literal values that should be injected into the
259 ``where`` expression, keyed by the identifiers they replace.
260 butler : `~lsst.daf.butler.Butler`
261 Butler that manages all I/O. `prep_butler` can be used to create
262 one.
263 resources : `~lsst.pipe.base.ExecutionResources`
264 The resources available to each quantum being executed.
266 Returns
267 -------
268 executor : `SimplePipelineExecutor`
269 An executor instance containing the constructed
270 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
271 ready for `run` to be called.
272 """
273 if isinstance(pipeline, Pipeline):
274 pipeline = list(pipeline.toExpandedPipeline())
275 else:
276 pipeline = list(pipeline)
277 graph_builder = GraphBuilder(butler.registry)
278 assert butler.run is not None, "Butler output run collection must be defined"
279 quantum_graph = graph_builder.makeGraph(
280 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind
281 )
282 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources)
284 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
285 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in
286 topological order.
288 Use this method to run all quanta in the graph. Use
289 `as_generator` to get a generator to run the quanta one at
290 a time.
292 Parameters
293 ----------
294 register_dataset_types : `bool`, optional
295 If `True`, register all output dataset types before executing any
296 quanta.
297 save_versions : `bool`, optional
298 If `True` (default), save a package versions dataset.
300 Returns
301 -------
302 quanta : `list` [ `~lsst.daf.butler.Quantum` ]
303 Executed quanta.
305 Notes
306 -----
307 A topological ordering is not in general unique, but no other
308 guarantees are made about the order in which quanta are processed.
309 """
310 return list(
311 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
312 )
314 def as_generator(
315 self, register_dataset_types: bool = False, save_versions: bool = True
316 ) -> Iterator[Quantum]:
317 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological
318 order.
320 These quanta will be run as the returned generator is iterated
321 over. Use this method to run the quanta one at a time.
322 Use `run` to run all quanta in the graph.
324 Parameters
325 ----------
326 register_dataset_types : `bool`, optional
327 If `True`, register all output dataset types before executing any
328 quanta.
329 save_versions : `bool`, optional
330 If `True` (default), save a package versions dataset.
332 Returns
333 -------
334 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
335 Executed quanta.
337 Notes
338 -----
339 Global initialization steps (see `PreExecInit`) are performed
340 immediately when this method is called, but individual quanta are not
341 actually executed until the returned iterator is iterated over.
343 A topological ordering is not in general unique, but no other
344 guarantees are made about the order in which quanta are processed.
345 """
346 task_factory = TaskFactory()
347 pre_exec_init = PreExecInit(self.butler, task_factory)
348 pre_exec_init.initialize(
349 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions
350 )
351 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources)
352 # Important that this returns a generator expression rather than being
353 # a generator itself; that is what makes the PreExecInit stuff above
354 # happen immediately instead of when the first quanta is executed,
355 # which might be useful for callers who want to check the state of the
356 # repo in between.
357 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)