Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 41%
66 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:50 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:50 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("SimplePipelineExecutor",)
32import warnings
33from collections.abc import Iterable, Iterator, Mapping
34from typing import Any
36from lsst.daf.butler import Butler, CollectionType, Quantum
37from lsst.pex.config import Config
38from lsst.pipe.base import (
39 ExecutionResources,
40 Instrument,
41 Pipeline,
42 PipelineGraph,
43 PipelineTask,
44 QuantumGraph,
45 TaskDef,
46)
47from lsst.pipe.base.all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
48from lsst.utils.introspection import find_outside_stacklevel
50from .preExecInit import PreExecInit
51from .singleQuantumExecutor import SingleQuantumExecutor
52from .taskFactory import TaskFactory
55class SimplePipelineExecutor:
56 """A simple, high-level executor for pipelines.
58 Parameters
59 ----------
60 quantum_graph : `~lsst.pipe.base.QuantumGraph`
61 Graph to be executed.
62 butler : `~lsst.daf.butler.Butler`
63 Object that manages all I/O. Must be initialized with `collections`
64 and `run` properties that correspond to the input and output
65 collections, which must be consistent with those used to create
66 ``quantum_graph``.
67 resources : `~lsst.pipe.base.ExecutionResources`
68 The resources available to each quantum being executed.
70 Notes
71 -----
72 Most callers should use one of the `classmethod` factory functions
73 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
74 invoking the constructor directly; these guarantee that the
75 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created
76 consistently.
78 This class is intended primarily to support unit testing and small-scale
79 integration testing of `~lsst.pipe.base.PipelineTask` classes. It
80 deliberately lacks many features present in the command-line-only
81 ``pipetask`` tool in order to keep the implementation simple. Python
82 callers that need more sophistication should call lower-level tools like
83 `~lsst.pipe.base.quantum_graph_builder.QuantumGraphBuilder`, `PreExecInit`,
84 and `SingleQuantumExecutor` directly.
85 """
87 def __init__(
88 self,
89 quantum_graph: QuantumGraph,
90 butler: Butler,
91 resources: ExecutionResources | None = None,
92 ):
93 self.quantum_graph = quantum_graph
94 self.butler = butler
95 self.resources = resources
97 @classmethod
98 def prep_butler(
99 cls,
100 root: str,
101 inputs: Iterable[str],
102 output: str,
103 output_run: str | None = None,
104 ) -> Butler:
105 """Return configured `~lsst.daf.butler.Butler`.
107 Helper method for creating `~lsst.daf.butler.Butler` instances with
108 collections appropriate for processing.
110 Parameters
111 ----------
112 root : `str`
113 Root of the butler data repository; must already exist, with all
114 necessary input data.
115 inputs : `~collections.abc.Iterable` [ `str` ]
116 Collections to search for all input datasets, in search order.
117 output : `str`
118 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
119 collection to create that will combine both inputs and outputs.
120 output_run : `str`, optional
121 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
122 directly hold all output datasets. If not provided, a name will
123 be created from ``output`` and a timestamp.
125 Returns
126 -------
127 butler : `~lsst.daf.butler.Butler`
128 Butler client instance compatible with all `classmethod` factories.
129 Always writeable.
130 """
131 if output_run is None:
132 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
133 # Make initial butler with no collections, since we haven't created
134 # them yet.
135 butler = Butler.from_config(root, writeable=True)
136 butler.registry.registerCollection(output_run, CollectionType.RUN)
137 butler.registry.registerCollection(output, CollectionType.CHAINED)
138 collections = [output_run]
139 collections.extend(inputs)
140 butler.registry.setCollectionChain(output, collections)
141 # Remake butler to let it infer default data IDs from collections, now
142 # that those collections exist.
143 return Butler.from_config(butler=butler, collections=[output], run=output_run)
145 @classmethod
146 def from_pipeline_filename(
147 cls,
148 pipeline_filename: str,
149 *,
150 where: str = "",
151 bind: Mapping[str, Any] | None = None,
152 butler: Butler,
153 resources: ExecutionResources | None = None,
154 ) -> SimplePipelineExecutor:
155 """Create an executor by building a QuantumGraph from an on-disk
156 pipeline YAML file.
158 Parameters
159 ----------
160 pipeline_filename : `str`
161 Name of the YAML file to load the pipeline definition from.
162 where : `str`, optional
163 Data ID query expression that constraints the quanta generated.
164 bind : `~collections.abc.Mapping`, optional
165 Mapping containing literal values that should be injected into the
166 ``where`` expression, keyed by the identifiers they replace.
167 butler : `~lsst.daf.butler.Butler`
168 Butler that manages all I/O. `prep_butler` can be used to create
169 one.
170 resources : `~lsst.pipe.base.ExecutionResources`
171 The resources available to each quantum being executed.
173 Returns
174 -------
175 executor : `SimplePipelineExecutor`
176 An executor instance containing the constructed
177 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
178 ready for `run` to be called.
179 """
180 pipeline = Pipeline.fromFile(pipeline_filename)
181 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources)
183 @classmethod
184 def from_task_class(
185 cls,
186 task_class: type[PipelineTask],
187 config: Config | None = None,
188 label: str | None = None,
189 *,
190 where: str = "",
191 bind: Mapping[str, Any] | None = None,
192 butler: Butler,
193 resources: ExecutionResources | None = None,
194 ) -> SimplePipelineExecutor:
195 """Create an executor by building a QuantumGraph from a pipeline
196 containing a single task.
198 Parameters
199 ----------
200 task_class : `type`
201 A concrete `~lsst.pipe.base.PipelineTask` subclass.
202 config : `~lsst.pex.config.Config`, optional
203 Configuration for the task. If not provided, task-level defaults
204 will be used (no per-instrument overrides).
205 label : `str`, optional
206 Label for the task in its pipeline; defaults to
207 ``task_class._DefaultName``.
208 where : `str`, optional
209 Data ID query expression that constraints the quanta generated.
210 bind : `~collections.abc.Mapping`, optional
211 Mapping containing literal values that should be injected into the
212 ``where`` expression, keyed by the identifiers they replace.
213 butler : `~lsst.daf.butler.Butler`
214 Butler that manages all I/O. `prep_butler` can be used to create
215 one.
216 resources : `~lsst.pipe.base.ExecutionResources`
217 The resources available to each quantum being executed.
219 Returns
220 -------
221 executor : `SimplePipelineExecutor`
222 An executor instance containing the constructed
223 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
224 ready for `run` to be called.
225 """
226 if config is None:
227 config = task_class.ConfigClass()
228 if label is None:
229 label = task_class._DefaultName
230 if not isinstance(config, task_class.ConfigClass):
231 raise TypeError(
232 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
233 f"got {type(config).__name__}."
234 )
235 pipeline_graph = PipelineGraph()
236 pipeline_graph.add_task(label=label, task_class=task_class, config=config)
237 return cls.from_pipeline_graph(
238 pipeline_graph, butler=butler, where=where, bind=bind, resources=resources
239 )
241 @classmethod
242 def from_pipeline(
243 cls,
244 pipeline: Pipeline | Iterable[TaskDef],
245 *,
246 where: str = "",
247 bind: Mapping[str, Any] | None = None,
248 butler: Butler,
249 resources: ExecutionResources | None = None,
250 ) -> SimplePipelineExecutor:
251 """Create an executor by building a QuantumGraph from an in-memory
252 pipeline.
254 Parameters
255 ----------
256 pipeline : `~lsst.pipe.base.Pipeline` or \
257 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ]
258 A Python object describing the tasks to run, along with their
259 labels and configuration. Passing `~lsst.pipe.base.TaskDef`
260 objects is deprecated and will not be supported after v27.
261 where : `str`, optional
262 Data ID query expression that constraints the quanta generated.
263 bind : `~collections.abc.Mapping`, optional
264 Mapping containing literal values that should be injected into the
265 ``where`` expression, keyed by the identifiers they replace.
266 butler : `~lsst.daf.butler.Butler`
267 Butler that manages all I/O. `prep_butler` can be used to create
268 one.
269 resources : `~lsst.pipe.base.ExecutionResources`
270 The resources available to each quantum being executed.
272 Returns
273 -------
274 executor : `SimplePipelineExecutor`
275 An executor instance containing the constructed
276 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
277 ready for `run` to be called.
278 """
279 if isinstance(pipeline, Pipeline):
280 pipeline_graph = pipeline.to_graph()
281 else:
282 # TODO: disable this block and adjust docs and annotations
283 # on DM-40443.
284 warnings.warn(
285 "Passing TaskDefs to SimplePipelineExecutor.from_pipeline is deprecated "
286 "and will be removed after v27.",
287 category=FutureWarning,
288 stacklevel=find_outside_stacklevel("lsst.ctrl.mpexec"),
289 )
290 pipeline_graph = PipelineGraph()
291 for task_def in pipeline:
292 pipeline_graph.add_task(
293 task_def.label, task_def.taskClass, task_def.config, connections=task_def.connections
294 )
295 return cls.from_pipeline_graph(
296 pipeline_graph, where=where, bind=bind, butler=butler, resources=resources
297 )
299 @classmethod
300 def from_pipeline_graph(
301 cls,
302 pipeline_graph: PipelineGraph,
303 *,
304 where: str = "",
305 bind: Mapping[str, Any] | None = None,
306 butler: Butler,
307 resources: ExecutionResources | None = None,
308 ) -> SimplePipelineExecutor:
309 """Create an executor by building a QuantumGraph from an in-memory
310 pipeline graph.
312 Parameters
313 ----------
314 pipeline_graph : `~lsst.pipe.base.PipelineGraph`
315 A Python object describing the tasks to run, along with their
316 labels and configuration, in graph form. Will be resolved against
317 the given ``butler``, with any existing resolutions ignored.
318 where : `str`, optional
319 Data ID query expression that constraints the quanta generated.
320 bind : `~collections.abc.Mapping`, optional
321 Mapping containing literal values that should be injected into the
322 ``where`` expression, keyed by the identifiers they replace.
323 butler : `~lsst.daf.butler.Butler`
324 Butler that manages all I/O. `prep_butler` can be used to create
325 one. Must have its `~Butler.run` and `~Butler.collections` not
326 empty and not `None`.
327 resources : `~lsst.pipe.base.ExecutionResources`
328 The resources available to each quantum being executed.
330 Returns
331 -------
332 executor : `SimplePipelineExecutor`
333 An executor instance containing the constructed
334 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
335 ready for `run` to be called.
336 """
337 quantum_graph_builder = AllDimensionsQuantumGraphBuilder(
338 pipeline_graph, butler, where=where, bind=bind
339 )
340 quantum_graph = quantum_graph_builder.build(attach_datastore_records=False)
341 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources)
343 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
344 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in
345 topological order.
347 Use this method to run all quanta in the graph. Use
348 `as_generator` to get a generator to run the quanta one at
349 a time.
351 Parameters
352 ----------
353 register_dataset_types : `bool`, optional
354 If `True`, register all output dataset types before executing any
355 quanta.
356 save_versions : `bool`, optional
357 If `True` (default), save a package versions dataset.
359 Returns
360 -------
361 quanta : `list` [ `~lsst.daf.butler.Quantum` ]
362 Executed quanta.
364 Notes
365 -----
366 A topological ordering is not in general unique, but no other
367 guarantees are made about the order in which quanta are processed.
368 """
369 return list(
370 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
371 )
373 def as_generator(
374 self, register_dataset_types: bool = False, save_versions: bool = True
375 ) -> Iterator[Quantum]:
376 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological
377 order.
379 These quanta will be run as the returned generator is iterated
380 over. Use this method to run the quanta one at a time.
381 Use `run` to run all quanta in the graph.
383 Parameters
384 ----------
385 register_dataset_types : `bool`, optional
386 If `True`, register all output dataset types before executing any
387 quanta.
388 save_versions : `bool`, optional
389 If `True` (default), save a package versions dataset.
391 Returns
392 -------
393 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
394 Executed quanta.
396 Notes
397 -----
398 Global initialization steps (see `PreExecInit`) are performed
399 immediately when this method is called, but individual quanta are not
400 actually executed until the returned iterator is iterated over.
402 A topological ordering is not in general unique, but no other
403 guarantees are made about the order in which quanta are processed.
404 """
405 task_factory = TaskFactory()
406 pre_exec_init = PreExecInit(self.butler, task_factory)
407 pre_exec_init.initialize(
408 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions
409 )
410 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources)
411 # Important that this returns a generator expression rather than being
412 # a generator itself; that is what makes the PreExecInit stuff above
413 # happen immediately instead of when the first quanta is executed,
414 # which might be useful for callers who want to check the state of the
415 # repo in between.
416 return (
417 single_quantum_executor.execute(qnode.task_node, qnode.quantum) for qnode in self.quantum_graph
418 )