Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 35%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SimplePipelineExecutor",)
26from typing import Any, Iterable, Iterator, Optional, Type, Union, List
28from lsst.daf.butler import Butler, CollectionType, Quantum
29from lsst.obs.base import Instrument
30from lsst.pex.config import Config
31from lsst.pipe.base import GraphBuilder, Pipeline, PipelineTask, QuantumGraph, TaskDef
33from .preExecInit import PreExecInit
34from .singleQuantumExecutor import SingleQuantumExecutor
35from .taskFactory import TaskFactory
38class SimplePipelineExecutor:
39 """A simple, high-level executor for pipelines.
41 Parameters
42 ----------
43 quantum_graph : `QuantumGraph`
44 Graph to be executed.
45 butler : `Butler`
46 Object that manages all I/O. Must be initialized with `collections`
47 and `run` properties that correspond to the input and output
48 collections, which must be consistent with those used to create
49 ``quantum_graph``.
51 Notes
52 -----
53 Most callers should use one of the `classmethod` factory functions
54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
55 invoking the constructor directly; these guarantee that the `Butler` and
56 `QuantumGraph` are created consistently.
58 This class is intended primarily to support unit testing and small-scale
59 integration testing of `PipelineTask` classes. It deliberately lacks many
60 features present in the command-line-only ``pipetask`` tool in order to
61 keep the implementation simple. Python callers that need more
62 sophistication should call lower-level tools like `GraphBuilder`,
63 `PreExecInit`, and `SingleQuantumExecutor` directly.
64 """
66 def __init__(self, quantum_graph: QuantumGraph, butler: Butler):
67 self.quantum_graph = quantum_graph
68 self.butler = butler
70 @classmethod
71 def prep_butler(
72 cls,
73 root: str,
74 inputs: Iterable[str],
75 output: str,
76 output_run: Optional[str] = None,
77 ) -> Butler:
78 """Helper method for creating `Butler` instances with collections
79 appropriate for processing.
81 Parameters
82 ----------
83 root : `str`
84 Root of the butler data repository; must already exist, with all
85 necessary input data.
86 inputs : `Iterable` [ `str` ]
87 Collections to search for all input datasets, in search order.
88 output : `str`
89 Name of a new output `~CollectionType.CHAINED` collection to create
90 that will combine both inputs and outputs.
91 output_run : `str`, optional
92 Name of the output `~CollectionType.RUN` that will directly hold
93 all output datasets. If not provided, a name will be created from
94 ``output`` and a timestamp.
96 Returns
97 -------
98 butler : `Butler`
99 Butler client instance compatible with all `classmethod` factories.
100 Always writeable.
101 """
102 if output_run is None:
103 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
104 # Make initial butler with no collections, since we haven't created
105 # them yet.
106 butler = Butler(root, writeable=True)
107 butler.registry.registerCollection(output_run, CollectionType.RUN)
108 butler.registry.registerCollection(output, CollectionType.CHAINED)
109 collections = list(inputs)
110 collections.append(output_run)
111 butler.registry.setCollectionChain(output, collections)
112 # Remake butler to let it infer default data IDs from collections, now
113 # that those collections exist.
114 return Butler(butler=butler, collections=[output], run=output_run)
116 @classmethod
117 def from_pipeline_filename(
118 cls, pipeline_filename: str, *, where: str = "", butler: Butler
119 ) -> SimplePipelineExecutor:
120 """Create an executor by building a QuantumGraph from an on-disk
121 pipeline YAML file.
123 Parameters
124 ----------
125 pipeline_filename : `str`
126 Name of the YAML file to load the pipeline definition from.
127 where : `str`, optional
128 Data ID query expression that constraints the quanta generated.
129 butler : `Butler`
130 Butler that manages all I/O. `prep_butler` can be used to create
131 one.
133 Returns
134 -------
135 executor : `SimplePipelineExecutor`
136 An executor instance containing the constructed `QuantumGraph` and
137 `Butler`, ready for `run` to be called.
138 """
139 pipeline = Pipeline.fromFile(pipeline_filename)
140 return cls.from_pipeline(pipeline, butler=butler, where=where)
142 @classmethod
143 def from_task_class(
144 cls,
145 task_class: Type[PipelineTask],
146 config: Optional[Config] = None,
147 label: Optional[str] = None,
148 *,
149 where: str = "",
150 butler: Butler,
151 ) -> SimplePipelineExecutor:
152 """Create an executor by building a QuantumGraph from a pipeline
153 containing a single task.
155 Parameters
156 ----------
157 task_class : `type`
158 A concrete `PipelineTask` subclass.
159 config : `Config`, optional
160 Configuration for the task. If not provided, task-level defaults
161 will be used (no per-instrument overrides).
162 label : `str`, optional
163 Label for the task in its pipeline; defaults to
164 ``task_class._DefaultName``.
165 where : `str`, optional
166 Data ID query expression that constraints the quanta generated.
167 butler : `Butler`
168 Butler that manages all I/O. `prep_butler` can be used to create
169 one.
171 Returns
172 -------
173 executor : `SimplePipelineExecutor`
174 An executor instance containing the constructed `QuantumGraph` and
175 `Butler`, ready for `run` to be called.
176 """
177 if config is None:
178 config = task_class.ConfigClass()
179 if label is None:
180 label = task_class._DefaultName
181 if not isinstance(config, task_class.ConfigClass):
182 raise TypeError(
183 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
184 f"got {type(config).__name__}."
185 )
186 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
187 return cls.from_pipeline([task_def], butler=butler, where=where)
189 @classmethod
190 def from_pipeline(
191 cls,
192 pipeline: Union[Pipeline, Iterable[TaskDef]],
193 *,
194 where: str = "",
195 butler: Butler,
196 **kwargs: Any,
197 ) -> SimplePipelineExecutor:
198 """Create an executor by building a QuantumGraph from an in-memory
199 pipeline.
201 Parameters
202 ----------
203 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ]
204 A Python object describing the tasks to run, along with their
205 labels and configuration.
206 where : `str`, optional
207 Data ID query expression that constraints the quanta generated.
208 butler : `Butler`
209 Butler that manages all I/O. `prep_butler` can be used to create
210 one.
212 Returns
213 -------
214 executor : `SimplePipelineExecutor`
215 An executor instance containing the constructed `QuantumGraph` and
216 `Butler`, ready for `run` to be called.
217 """
218 if isinstance(pipeline, Pipeline):
219 pipeline = list(pipeline.toExpandedPipeline())
220 else:
221 pipeline = list(pipeline)
222 graph_builder = GraphBuilder(butler.registry)
223 quantum_graph = graph_builder.makeGraph(
224 pipeline, collections=butler.collections, run=butler.run, userQuery=where
225 )
226 return cls(quantum_graph=quantum_graph, butler=butler)
228 def run(self, register_dataset_types: bool = False) -> List[Quantum]:
229 """Run all the quanta in the `QuantumGraph` in topological order.
231 Use this method to run all quanta in the graph. Use
232 `as_generator` to get a generator to run the quanta one at
233 a time.
235 Parameters
236 ----------
237 register_dataset_types : `bool`, optional
238 If `True`, register all output dataset types before executing any
239 quanta.
241 Returns
242 -------
243 quanta : `List` [ `Quantum` ]
244 Executed quanta. At present, these will contain only unresolved
245 `DatasetRef` instances for output datasets, reflecting the state of
246 the quantum just before it was run (but after any adjustments for
247 predicted but now missing inputs). This may change in the future
248 to include resolved output `DatasetRef` objects.
250 Notes
251 -----
252 A topological ordering is not in general unique, but no other
253 guarantees are made about the order in which quanta are processed.
254 """
255 return list(self.as_generator(register_dataset_types=register_dataset_types))
257 def as_generator(self, register_dataset_types: bool = False) -> Iterator[Quantum]:
258 """Yield quanta in the `QuantumGraph` in topological order.
260 These quanta will be run as the returned generator is iterated
261 over. Use this method to run the quanta one at a time.
262 Use `run` to run all quanta in the graph.
264 Parameters
265 ----------
266 register_dataset_types : `bool`, optional
267 If `True`, register all output dataset types before executing any
268 quanta.
270 Returns
271 -------
272 quanta : `Iterator` [ `Quantum` ]
273 Executed quanta. At present, these will contain only unresolved
274 `DatasetRef` instances for output datasets, reflecting the state of
275 the quantum just before it was run (but after any adjustments for
276 predicted but now missing inputs). This may change in the future
277 to include resolved output `DatasetRef` objects.
280 Notes
281 -----
282 Global initialization steps (see `PreExecInit`) are performed
283 immediately when this method is called, but individual quanta are not
284 actually executed until the returned iterator is iterated over.
286 A topological ordering is not in general unique, but no other
287 guarantees are made about the order in which quanta are processed.
288 """
289 task_factory = TaskFactory()
290 pre_exec_init = PreExecInit(self.butler, task_factory)
291 pre_exec_init.initialize(graph=self.quantum_graph, registerDatasetTypes=register_dataset_types)
292 single_quantum_executor = SingleQuantumExecutor(task_factory)
293 # Important that this returns a generator expression rather than being
294 # a generator itself; that is what makes the PreExecInit stuff above
295 # happen immediately instead of when the first quanta is executed,
296 # which might be useful for callers who want to check the state of the
297 # repo in between.
298 return (
299 single_quantum_executor.execute(qnode.taskDef, qnode.quantum, self.butler)
300 for qnode in self.quantum_graph
301 )