Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 34%
54 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-04 02:37 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-04 02:37 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SimplePipelineExecutor",)
26from typing import Any, Iterable, Iterator, List, Optional, Type, Union
28from lsst.daf.butler import Butler, CollectionType, Quantum
29from lsst.pex.config import Config
30from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef
32from .preExecInit import PreExecInit
33from .singleQuantumExecutor import SingleQuantumExecutor
34from .taskFactory import TaskFactory
37class SimplePipelineExecutor:
38 """A simple, high-level executor for pipelines.
40 Parameters
41 ----------
42 quantum_graph : `QuantumGraph`
43 Graph to be executed.
44 butler : `Butler`
45 Object that manages all I/O. Must be initialized with `collections`
46 and `run` properties that correspond to the input and output
47 collections, which must be consistent with those used to create
48 ``quantum_graph``.
50 Notes
51 -----
52 Most callers should use one of the `classmethod` factory functions
53 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
54 invoking the constructor directly; these guarantee that the `Butler` and
55 `QuantumGraph` are created consistently.
57 This class is intended primarily to support unit testing and small-scale
58 integration testing of `PipelineTask` classes. It deliberately lacks many
59 features present in the command-line-only ``pipetask`` tool in order to
60 keep the implementation simple. Python callers that need more
61 sophistication should call lower-level tools like `GraphBuilder`,
62 `PreExecInit`, and `SingleQuantumExecutor` directly.
63 """
65 def __init__(self, quantum_graph: QuantumGraph, butler: Butler):
66 self.quantum_graph = quantum_graph
67 self.butler = butler
69 @classmethod
70 def prep_butler(
71 cls,
72 root: str,
73 inputs: Iterable[str],
74 output: str,
75 output_run: Optional[str] = None,
76 ) -> Butler:
77 """Helper method for creating `Butler` instances with collections
78 appropriate for processing.
80 Parameters
81 ----------
82 root : `str`
83 Root of the butler data repository; must already exist, with all
84 necessary input data.
85 inputs : `Iterable` [ `str` ]
86 Collections to search for all input datasets, in search order.
87 output : `str`
88 Name of a new output `~CollectionType.CHAINED` collection to create
89 that will combine both inputs and outputs.
90 output_run : `str`, optional
91 Name of the output `~CollectionType.RUN` that will directly hold
92 all output datasets. If not provided, a name will be created from
93 ``output`` and a timestamp.
95 Returns
96 -------
97 butler : `Butler`
98 Butler client instance compatible with all `classmethod` factories.
99 Always writeable.
100 """
101 if output_run is None:
102 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
103 # Make initial butler with no collections, since we haven't created
104 # them yet.
105 butler = Butler(root, writeable=True)
106 butler.registry.registerCollection(output_run, CollectionType.RUN)
107 butler.registry.registerCollection(output, CollectionType.CHAINED)
108 collections = [output_run]
109 collections.extend(inputs)
110 butler.registry.setCollectionChain(output, collections)
111 # Remake butler to let it infer default data IDs from collections, now
112 # that those collections exist.
113 return Butler(butler=butler, collections=[output], run=output_run)
115 @classmethod
116 def from_pipeline_filename(
117 cls, pipeline_filename: str, *, where: str = "", butler: Butler
118 ) -> SimplePipelineExecutor:
119 """Create an executor by building a QuantumGraph from an on-disk
120 pipeline YAML file.
122 Parameters
123 ----------
124 pipeline_filename : `str`
125 Name of the YAML file to load the pipeline definition from.
126 where : `str`, optional
127 Data ID query expression that constraints the quanta generated.
128 butler : `Butler`
129 Butler that manages all I/O. `prep_butler` can be used to create
130 one.
132 Returns
133 -------
134 executor : `SimplePipelineExecutor`
135 An executor instance containing the constructed `QuantumGraph` and
136 `Butler`, ready for `run` to be called.
137 """
138 pipeline = Pipeline.fromFile(pipeline_filename)
139 return cls.from_pipeline(pipeline, butler=butler, where=where)
141 @classmethod
142 def from_task_class(
143 cls,
144 task_class: Type[PipelineTask],
145 config: Optional[Config] = None,
146 label: Optional[str] = None,
147 *,
148 where: str = "",
149 butler: Butler,
150 ) -> SimplePipelineExecutor:
151 """Create an executor by building a QuantumGraph from a pipeline
152 containing a single task.
154 Parameters
155 ----------
156 task_class : `type`
157 A concrete `PipelineTask` subclass.
158 config : `Config`, optional
159 Configuration for the task. If not provided, task-level defaults
160 will be used (no per-instrument overrides).
161 label : `str`, optional
162 Label for the task in its pipeline; defaults to
163 ``task_class._DefaultName``.
164 where : `str`, optional
165 Data ID query expression that constraints the quanta generated.
166 butler : `Butler`
167 Butler that manages all I/O. `prep_butler` can be used to create
168 one.
170 Returns
171 -------
172 executor : `SimplePipelineExecutor`
173 An executor instance containing the constructed `QuantumGraph` and
174 `Butler`, ready for `run` to be called.
175 """
176 if config is None:
177 config = task_class.ConfigClass()
178 if label is None:
179 label = task_class._DefaultName
180 if not isinstance(config, task_class.ConfigClass):
181 raise TypeError(
182 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
183 f"got {type(config).__name__}."
184 )
185 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
186 return cls.from_pipeline([task_def], butler=butler, where=where)
188 @classmethod
189 def from_pipeline(
190 cls,
191 pipeline: Union[Pipeline, Iterable[TaskDef]],
192 *,
193 where: str = "",
194 butler: Butler,
195 **kwargs: Any,
196 ) -> SimplePipelineExecutor:
197 """Create an executor by building a QuantumGraph from an in-memory
198 pipeline.
200 Parameters
201 ----------
202 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ]
203 A Python object describing the tasks to run, along with their
204 labels and configuration.
205 where : `str`, optional
206 Data ID query expression that constraints the quanta generated.
207 butler : `Butler`
208 Butler that manages all I/O. `prep_butler` can be used to create
209 one.
211 Returns
212 -------
213 executor : `SimplePipelineExecutor`
214 An executor instance containing the constructed `QuantumGraph` and
215 `Butler`, ready for `run` to be called.
216 """
217 if isinstance(pipeline, Pipeline):
218 pipeline = list(pipeline.toExpandedPipeline())
219 else:
220 pipeline = list(pipeline)
221 graph_builder = GraphBuilder(butler.registry)
222 quantum_graph = graph_builder.makeGraph(
223 pipeline, collections=butler.collections, run=butler.run, userQuery=where
224 )
225 return cls(quantum_graph=quantum_graph, butler=butler)
227 def run(self, register_dataset_types: bool = False) -> List[Quantum]:
228 """Run all the quanta in the `QuantumGraph` in topological order.
230 Use this method to run all quanta in the graph. Use
231 `as_generator` to get a generator to run the quanta one at
232 a time.
234 Parameters
235 ----------
236 register_dataset_types : `bool`, optional
237 If `True`, register all output dataset types before executing any
238 quanta.
240 Returns
241 -------
242 quanta : `List` [ `Quantum` ]
243 Executed quanta. At present, these will contain only unresolved
244 `DatasetRef` instances for output datasets, reflecting the state of
245 the quantum just before it was run (but after any adjustments for
246 predicted but now missing inputs). This may change in the future
247 to include resolved output `DatasetRef` objects.
249 Notes
250 -----
251 A topological ordering is not in general unique, but no other
252 guarantees are made about the order in which quanta are processed.
253 """
254 return list(self.as_generator(register_dataset_types=register_dataset_types))
256 def as_generator(self, register_dataset_types: bool = False) -> Iterator[Quantum]:
257 """Yield quanta in the `QuantumGraph` in topological order.
259 These quanta will be run as the returned generator is iterated
260 over. Use this method to run the quanta one at a time.
261 Use `run` to run all quanta in the graph.
263 Parameters
264 ----------
265 register_dataset_types : `bool`, optional
266 If `True`, register all output dataset types before executing any
267 quanta.
269 Returns
270 -------
271 quanta : `Iterator` [ `Quantum` ]
272 Executed quanta. At present, these will contain only unresolved
273 `DatasetRef` instances for output datasets, reflecting the state of
274 the quantum just before it was run (but after any adjustments for
275 predicted but now missing inputs). This may change in the future
276 to include resolved output `DatasetRef` objects.
279 Notes
280 -----
281 Global initialization steps (see `PreExecInit`) are performed
282 immediately when this method is called, but individual quanta are not
283 actually executed until the returned iterator is iterated over.
285 A topological ordering is not in general unique, but no other
286 guarantees are made about the order in which quanta are processed.
287 """
288 task_factory = TaskFactory()
289 pre_exec_init = PreExecInit(self.butler, task_factory)
290 pre_exec_init.initialize(graph=self.quantum_graph, registerDatasetTypes=register_dataset_types)
291 single_quantum_executor = SingleQuantumExecutor(task_factory)
292 # Important that this returns a generator expression rather than being
293 # a generator itself; that is what makes the PreExecInit stuff above
294 # happen immediately instead of when the first quanta is executed,
295 # which might be useful for callers who want to check the state of the
296 # repo in between.
297 return (
298 single_quantum_executor.execute(qnode.taskDef, qnode.quantum, self.butler)
299 for qnode in self.quantum_graph
300 )