Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 32%
56 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:14 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SimplePipelineExecutor",)
26from collections.abc import Iterable, Iterator, Mapping
27from typing import Any
29from lsst.daf.butler import Butler, CollectionType, Quantum
30from lsst.pex.config import Config
31from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef
33from .preExecInit import PreExecInit
34from .singleQuantumExecutor import SingleQuantumExecutor
35from .taskFactory import TaskFactory
38class SimplePipelineExecutor:
39 """A simple, high-level executor for pipelines.
41 Parameters
42 ----------
43 quantum_graph : `~lsst.pipe.base.QuantumGraph`
44 Graph to be executed.
45 butler : `~lsst.daf.butler.Butler`
46 Object that manages all I/O. Must be initialized with `collections`
47 and `run` properties that correspond to the input and output
48 collections, which must be consistent with those used to create
49 ``quantum_graph``.
51 Notes
52 -----
53 Most callers should use one of the `classmethod` factory functions
54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
55 invoking the constructor directly; these guarantee that the
56 `~lsst.daf.butler.Butler` and `~lsst.pipe.base.QuantumGraph` are created
57 consistently.
59 This class is intended primarily to support unit testing and small-scale
60 integration testing of `~lsst.pipe.base.PipelineTask` classes. It
61 deliberately lacks many features present in the command-line-only
62 ``pipetask`` tool in order to keep the implementation simple. Python
63 callers that need more sophistication should call lower-level tools like
64 `~lsst.pipe.base.GraphBuilder`, `PreExecInit`, and `SingleQuantumExecutor`
65 directly.
66 """
68 def __init__(self, quantum_graph: QuantumGraph, butler: Butler):
69 self.quantum_graph = quantum_graph
70 self.butler = butler
72 @classmethod
73 def prep_butler(
74 cls,
75 root: str,
76 inputs: Iterable[str],
77 output: str,
78 output_run: str | None = None,
79 ) -> Butler:
80 """Return configured `~lsst.daf.butler.Butler`.
82 Helper method for creating `~lsst.daf.butler.Butler` instances with
83 collections appropriate for processing.
85 Parameters
86 ----------
87 root : `str`
88 Root of the butler data repository; must already exist, with all
89 necessary input data.
90 inputs : `Iterable` [ `str` ]
91 Collections to search for all input datasets, in search order.
92 output : `str`
93 Name of a new output `~CollectionType.CHAINED` collection to create
94 that will combine both inputs and outputs.
95 output_run : `str`, optional
96 Name of the output `~CollectionType.RUN` that will directly hold
97 all output datasets. If not provided, a name will be created from
98 ``output`` and a timestamp.
100 Returns
101 -------
102 butler : `~lsst.daf.butler.Butler`
103 Butler client instance compatible with all `classmethod` factories.
104 Always writeable.
105 """
106 if output_run is None:
107 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
108 # Make initial butler with no collections, since we haven't created
109 # them yet.
110 butler = Butler(root, writeable=True)
111 butler.registry.registerCollection(output_run, CollectionType.RUN)
112 butler.registry.registerCollection(output, CollectionType.CHAINED)
113 collections = [output_run]
114 collections.extend(inputs)
115 butler.registry.setCollectionChain(output, collections)
116 # Remake butler to let it infer default data IDs from collections, now
117 # that those collections exist.
118 return Butler(butler=butler, collections=[output], run=output_run)
120 @classmethod
121 def from_pipeline_filename(
122 cls,
123 pipeline_filename: str,
124 *,
125 where: str = "",
126 bind: Mapping[str, Any] | None = None,
127 butler: Butler,
128 ) -> SimplePipelineExecutor:
129 """Create an executor by building a QuantumGraph from an on-disk
130 pipeline YAML file.
132 Parameters
133 ----------
134 pipeline_filename : `str`
135 Name of the YAML file to load the pipeline definition from.
136 where : `str`, optional
137 Data ID query expression that constraints the quanta generated.
138 bind : `Mapping`, optional
139 Mapping containing literal values that should be injected into the
140 ``where`` expression, keyed by the identifiers they replace.
141 butler : `~lsst.daf.butler.Butler`
142 Butler that manages all I/O. `prep_butler` can be used to create
143 one.
145 Returns
146 -------
147 executor : `SimplePipelineExecutor`
148 An executor instance containing the constructed
149 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
150 ready for `run` to be called.
151 """
152 pipeline = Pipeline.fromFile(pipeline_filename)
153 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind)
155 @classmethod
156 def from_task_class(
157 cls,
158 task_class: type[PipelineTask],
159 config: Config | None = None,
160 label: str | None = None,
161 *,
162 where: str = "",
163 bind: Mapping[str, Any] | None = None,
164 butler: Butler,
165 ) -> SimplePipelineExecutor:
166 """Create an executor by building a QuantumGraph from a pipeline
167 containing a single task.
169 Parameters
170 ----------
171 task_class : `type`
172 A concrete `~lsst.pipe.base.PipelineTask` subclass.
173 config : `Config`, optional
174 Configuration for the task. If not provided, task-level defaults
175 will be used (no per-instrument overrides).
176 label : `str`, optional
177 Label for the task in its pipeline; defaults to
178 ``task_class._DefaultName``.
179 where : `str`, optional
180 Data ID query expression that constraints the quanta generated.
181 bind : `Mapping`, optional
182 Mapping containing literal values that should be injected into the
183 ``where`` expression, keyed by the identifiers they replace.
184 butler : `~lsst.daf.butler.Butler`
185 Butler that manages all I/O. `prep_butler` can be used to create
186 one.
188 Returns
189 -------
190 executor : `SimplePipelineExecutor`
191 An executor instance containing the constructed
192 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
193 ready for `run` to be called.
194 """
195 if config is None:
196 config = task_class.ConfigClass()
197 if label is None:
198 label = task_class._DefaultName
199 if not isinstance(config, task_class.ConfigClass):
200 raise TypeError(
201 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
202 f"got {type(config).__name__}."
203 )
204 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
205 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind)
207 @classmethod
208 def from_pipeline(
209 cls,
210 pipeline: Pipeline | Iterable[TaskDef],
211 *,
212 where: str = "",
213 bind: Mapping[str, Any] | None = None,
214 butler: Butler,
215 **kwargs: Any,
216 ) -> SimplePipelineExecutor:
217 """Create an executor by building a QuantumGraph from an in-memory
218 pipeline.
220 Parameters
221 ----------
222 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ]
223 A Python object describing the tasks to run, along with their
224 labels and configuration.
225 where : `str`, optional
226 Data ID query expression that constraints the quanta generated.
227 bind : `Mapping`, optional
228 Mapping containing literal values that should be injected into the
229 ``where`` expression, keyed by the identifiers they replace.
230 butler : `~lsst.daf.butler.Butler`
231 Butler that manages all I/O. `prep_butler` can be used to create
232 one.
234 Returns
235 -------
236 executor : `SimplePipelineExecutor`
237 An executor instance containing the constructed
238 `~lsst.pipe.base.QuantumGraph` and `~lsst.daf.butler.Butler`,
239 ready for `run` to be called.
240 """
241 if isinstance(pipeline, Pipeline):
242 pipeline = list(pipeline.toExpandedPipeline())
243 else:
244 pipeline = list(pipeline)
245 graph_builder = GraphBuilder(butler.registry)
246 assert butler.run is not None, "Butler output run collection must be defined"
247 quantum_graph = graph_builder.makeGraph(
248 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind
249 )
250 return cls(quantum_graph=quantum_graph, butler=butler)
252 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
253 """Run all the quanta in the `~lsst.pipe.base.QuantumGraph` in
254 topological order.
256 Use this method to run all quanta in the graph. Use
257 `as_generator` to get a generator to run the quanta one at
258 a time.
260 Parameters
261 ----------
262 register_dataset_types : `bool`, optional
263 If `True`, register all output dataset types before executing any
264 quanta.
265 save_versions : `bool`, optional
266 If `True` (default), save a package versions dataset.
268 Returns
269 -------
270 quanta : `list` [ `~lsst.daf.butler.Quantum` ]
271 Executed quanta.
273 Notes
274 -----
275 A topological ordering is not in general unique, but no other
276 guarantees are made about the order in which quanta are processed.
277 """
278 return list(
279 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
280 )
282 def as_generator(
283 self, register_dataset_types: bool = False, save_versions: bool = True
284 ) -> Iterator[Quantum]:
285 """Yield quanta in the `~lsst.pipe.base.QuantumGraph` in topological
286 order.
288 These quanta will be run as the returned generator is iterated
289 over. Use this method to run the quanta one at a time.
290 Use `run` to run all quanta in the graph.
292 Parameters
293 ----------
294 register_dataset_types : `bool`, optional
295 If `True`, register all output dataset types before executing any
296 quanta.
297 save_versions : `bool`, optional
298 If `True` (default), save a package versions dataset.
300 Returns
301 -------
302 quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
303 Executed quanta.
305 Notes
306 -----
307 Global initialization steps (see `PreExecInit`) are performed
308 immediately when this method is called, but individual quanta are not
309 actually executed until the returned iterator is iterated over.
311 A topological ordering is not in general unique, but no other
312 guarantees are made about the order in which quanta are processed.
313 """
314 task_factory = TaskFactory()
315 pre_exec_init = PreExecInit(self.butler, task_factory)
316 pre_exec_init.initialize(
317 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions
318 )
319 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory)
320 # Important that this returns a generator expression rather than being
321 # a generator itself; that is what makes the PreExecInit stuff above
322 # happen immediately instead of when the first quanta is executed,
323 # which might be useful for callers who want to check the state of the
324 # repo in between.
325 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)