Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 39%
57 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-06 02:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-06 02:30 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SimplePipelineExecutor",)
26from collections.abc import Iterable, Iterator, Mapping
27from typing import Any
29from lsst.daf.butler import Butler, CollectionType, Quantum
30from lsst.pex.config import Config
31from lsst.pipe.base import (
32 ExecutionResources,
33 GraphBuilder,
34 Instrument,
35 Pipeline,
36 PipelineTask,
37 QuantumGraph,
38 TaskDef,
39)
41from .preExecInit import PreExecInit
42from .singleQuantumExecutor import SingleQuantumExecutor
43from .taskFactory import TaskFactory
46class SimplePipelineExecutor:
47 """A simple, high-level executor for pipelines.
49 Parameters
50 ----------
51 quantum_graph : `~lsst.pipe.base.QuantumGraph`
52 Graph to be executed.
53 butler : `~lsst.daf.butler.Butler`
54 Object that manages all I/O. Must be initialized with `collections`
55 and `run` properties that correspond to the input and output
56 collections, which must be consistent with those used to create
57 ``quantum_graph``.
58 resources : `~lsst.pipe.base.ExecutionResources`
59 The resources available to each quantum being executed.
61 Notes
62 -----
63 Most callers should use one of the `classmethod` factory functions
64 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
65 invoking the constructor directly; these guarantee that the
66 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created
67 consistently.
69 This class is intended primarily to support unit testing and small-scale
70 integration testing of `~lsst.pipe.base.PipelineTask` classes. It
71 deliberately lacks many features present in the command-line-only
72 ``pipetask`` tool in order to keep the implementation simple. Python
73 callers that need more sophistication should call lower-level tools like
74 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor`
75 directly.
76 """
78 def __init__(
79 self,
80 quantum_graph: QuantumGraph,
81 butler: Butler,
82 resources: ExecutionResources | None = None,
83 ):
84 self.quantum_graph = quantum_graph
85 self.butler = butler
86 self.resources = resources
88 @classmethod
89 def prep_butler(
90 cls,
91 root: str,
92 inputs: Iterable[str],
93 output: str,
94 output_run: str | None = None,
95 ) -> Butler:
96 """Return configured `~lsst.daf.butler.Butler`.
98 Helper method for creating `~lsst.daf.butler.Butler` instances with
99 collections appropriate for processing.
101 Parameters
102 ----------
103 root : `str`
104 Root of the butler data repository; must already exist, with all
105 necessary input data.
106 inputs : `~collections.abc.Iterable` [ `str` ]
107 Collections to search for all input datasets, in search order.
108 output : `str`
109 Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
110 collection to create that will combine both inputs and outputs.
111 output_run : `str`, optional
112 Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
113 directly hold all output datasets. If not provided, a name will
114 be created from ``output`` and a timestamp.
116 Returns
117 -------
118 butler : `~lsst.daf.butler.Butler`
119 Butler client instance compatible with all `classmethod` factories.
120 Always writeable.
121 """
122 if output_run is None:
123 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
124 # Make initial butler with no collections, since we haven't created
125 # them yet.
126 butler = Butler(root, writeable=True)
127 butler.registry.registerCollection(output_run, CollectionType.RUN)
128 butler.registry.registerCollection(output, CollectionType.CHAINED)
129 collections = [output_run]
130 collections.extend(inputs)
131 butler.registry.setCollectionChain(output, collections)
132 # Remake butler to let it infer default data IDs from collections, now
133 # that those collections exist.
134 return Butler(butler=butler, collections=[output], run=output_run)
136 @classmethod
137 def from_pipeline_filename(
138 cls,
139 pipeline_filename: str,
140 *,
141 where: str = "",
142 bind: Mapping[str, Any] | None = None,
143 butler: Butler,
144 resources: ExecutionResources | None = None,
145 ) -> SimplePipelineExecutor:
146 """Create an executor by building a QuantumGraph from an on-disk
147 pipeline YAML file.
149 Parameters
150 ----------
151 pipeline_filename : `str`
152 Name of the YAML file to load the pipeline definition from.
153 where : `str`, optional
154 Data ID query expression that constraints the quanta generated.
155 bind : `~collections.abc.Mapping`, optional
156 Mapping containing literal values that should be injected into the
157 ``where`` expression, keyed by the identifiers they replace.
158 butler : `~lsst.daf.butler.Butler`
159 Butler that manages all I/O. `prep_butler` can be used to create
160 one.
161 resources : `~lsst.pipe.base.ExecutionResources`
162 The resources available to each quantum being executed.
164 Returns
165 -------
166 executor : `SimplePipelineExecutor`
167 An executor instance containing the constructed
168 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
169 ready for `run` to be called.
170 """
171 pipeline = Pipeline.fromFile(pipeline_filename)
172 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind, resources=resources)
174 @classmethod
175 def from_task_class(
176 cls,
177 task_class: type[PipelineTask],
178 config: Config | None = None,
179 label: str | None = None,
180 *,
181 where: str = "",
182 bind: Mapping[str, Any] | None = None,
183 butler: Butler,
184 resources: ExecutionResources | None = None,
185 ) -> SimplePipelineExecutor:
186 """Create an executor by building a QuantumGraph from a pipeline
187 containing a single task.
189 Parameters
190 ----------
191 task_class : `type`
192 A concrete `~lsst.pipe.base.PipelineTask` subclass.
193 config : `~lsst.pex.config.Config`, optional
194 Configuration for the task. If not provided, task-level defaults
195 will be used (no per-instrument overrides).
196 label : `str`, optional
197 Label for the task in its pipeline; defaults to
198 ``task_class._DefaultName``.
199 where : `str`, optional
200 Data ID query expression that constraints the quanta generated.
201 bind : `~collections.abc.Mapping`, optional
202 Mapping containing literal values that should be injected into the
203 ``where`` expression, keyed by the identifiers they replace.
204 butler : `~lsst.daf.butler.Butler`
205 Butler that manages all I/O. `prep_butler` can be used to create
206 one.
207 resources : `~lsst.pipe.base.ExecutionResources`
208 The resources available to each quantum being executed.
210 Returns
211 -------
212 executor : `SimplePipelineExecutor`
213 An executor instance containing the constructed
214 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
215 ready for `run` to be called.
216 """
217 if config is None:
218 config = task_class.ConfigClass()
219 if label is None:
220 label = task_class._DefaultName
221 if not isinstance(config, task_class.ConfigClass):
222 raise TypeError(
223 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
224 f"got {type(config).__name__}."
225 )
226 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
227 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind, resources=resources)
229 @classmethod
230 def from_pipeline(
231 cls,
232 pipeline: Pipeline | Iterable[TaskDef],
233 *,
234 where: str = "",
235 bind: Mapping[str, Any] | None = None,
236 butler: Butler,
237 resources: ExecutionResources | None = None,
238 **kwargs: Any,
239 ) -> SimplePipelineExecutor:
240 """Create an executor by building a QuantumGraph from an in-memory
241 pipeline.
243 Parameters
244 ----------
245 pipeline : `~lsst.pipe.base.Pipeline` or \
246 `~collections.abc.Iterable` [ `~lsst.pipe.base.TaskDef` ]
247 A Python object describing the tasks to run, along with their
248 labels and configuration.
249 where : `str`, optional
250 Data ID query expression that constraints the quanta generated.
251 bind : `~collections.abc.Mapping`, optional
252 Mapping containing literal values that should be injected into the
253 ``where`` expression, keyed by the identifiers they replace.
254 butler : `~lsst.daf.butler.Butler`
255 Butler that manages all I/O. `prep_butler` can be used to create
256 one.
257 resources : `~lsst.pipe.base.ExecutionResources`
258 The resources available to each quantum being executed.
260 Returns
261 -------
262 executor : `SimplePipelineExecutor`
263 An executor instance containing the constructed
264 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
265 ready for `run` to be called.
266 """
267 if isinstance(pipeline, Pipeline):
268 pipeline = list(pipeline.toExpandedPipeline())
269 else:
270 pipeline = list(pipeline)
271 graph_builder = GraphBuilder(butler.registry)
272 assert butler.run is not None, "Butler output run collection must be defined"
273 quantum_graph = graph_builder.makeGraph(
274 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind
275 )
276 return cls(quantum_graph=quantum_graph, butler=butler, resources=resources)
278 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
279 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in
280 topological order.
282 Use this method to run all quanta in the graph. Use
283 `as_generator` to get a generator to run the quanta one at
284 a time.
286 Parameters
287 ----------
288 register_dataset_types : `bool`, optional
289 If `True`, register all output dataset types before executing any
290 quanta.
291 save_versions : `bool`, optional
292 If `True` (default), save a package versions dataset.
294 Returns
295 -------
296 quanta : `list` [ `~lsst.daf.butler.Quantum` ]
297 Executed quanta.
299 Notes
300 -----
301 A topological ordering is not in general unique, but no other
302 guarantees are made about the order in which quanta are processed.
303 """
304 return list(
305 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
306 )
308 def as_generator(
309 self, register_dataset_types: bool = False, save_versions: bool = True
310 ) -> Iterator[Quantum]:
311 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological
312 order.
314 These quanta will be run as the returned generator is iterated
315 over. Use this method to run the quanta one at a time.
316 Use `run` to run all quanta in the graph.
318 Parameters
319 ----------
320 register_dataset_types : `bool`, optional
321 If `True`, register all output dataset types before executing any
322 quanta.
323 save_versions : `bool`, optional
324 If `True` (default), save a package versions dataset.
326 Returns
327 -------
328 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
329 Executed quanta.
331 Notes
332 -----
333 Global initialization steps (see `PreExecInit`) are performed
334 immediately when this method is called, but individual quanta are not
335 actually executed until the returned iterator is iterated over.
337 A topological ordering is not in general unique, but no other
338 guarantees are made about the order in which quanta are processed.
339 """
340 task_factory = TaskFactory()
341 pre_exec_init = PreExecInit(self.butler, task_factory)
342 pre_exec_init.initialize(
343 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions
344 )
345 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory, resources=self.resources)
346 # Important that this returns a generator expression rather than being
347 # a generator itself; that is what makes the PreExecInit stuff above
348 # happen immediately instead of when the first quanta is executed,
349 # which might be useful for callers who want to check the state of the
350 # repo in between.
351 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)