Coverage for python/lsst/ctrl/mpexec/simple_pipeline_executor.py: 33%
55 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-23 02:09 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-23 02:09 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SimplePipelineExecutor",)
26from collections.abc import Iterable, Iterator, Mapping
27from typing import Any, List, Optional, Type, Union
29from lsst.daf.butler import Butler, CollectionType, Quantum
30from lsst.pex.config import Config
31from lsst.pipe.base import GraphBuilder, Instrument, Pipeline, PipelineTask, QuantumGraph, TaskDef
33from .preExecInit import PreExecInit
34from .singleQuantumExecutor import SingleQuantumExecutor
35from .taskFactory import TaskFactory
38class SimplePipelineExecutor:
39 """A simple, high-level executor for pipelines.
41 Parameters
42 ----------
43 quantum_graph : `QuantumGraph`
44 Graph to be executed.
45 butler : `Butler`
46 Object that manages all I/O. Must be initialized with `collections`
47 and `run` properties that correspond to the input and output
48 collections, which must be consistent with those used to create
49 ``quantum_graph``.
51 Notes
52 -----
53 Most callers should use one of the `classmethod` factory functions
54 (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
55 invoking the constructor directly; these guarantee that the `Butler` and
56 `QuantumGraph` are created consistently.
58 This class is intended primarily to support unit testing and small-scale
59 integration testing of `PipelineTask` classes. It deliberately lacks many
60 features present in the command-line-only ``pipetask`` tool in order to
61 keep the implementation simple. Python callers that need more
62 sophistication should call lower-level tools like `GraphBuilder`,
63 `PreExecInit`, and `SingleQuantumExecutor` directly.
64 """
66 def __init__(self, quantum_graph: QuantumGraph, butler: Butler):
67 self.quantum_graph = quantum_graph
68 self.butler = butler
70 @classmethod
71 def prep_butler(
72 cls,
73 root: str,
74 inputs: Iterable[str],
75 output: str,
76 output_run: Optional[str] = None,
77 ) -> Butler:
78 """Helper method for creating `Butler` instances with collections
79 appropriate for processing.
81 Parameters
82 ----------
83 root : `str`
84 Root of the butler data repository; must already exist, with all
85 necessary input data.
86 inputs : `Iterable` [ `str` ]
87 Collections to search for all input datasets, in search order.
88 output : `str`
89 Name of a new output `~CollectionType.CHAINED` collection to create
90 that will combine both inputs and outputs.
91 output_run : `str`, optional
92 Name of the output `~CollectionType.RUN` that will directly hold
93 all output datasets. If not provided, a name will be created from
94 ``output`` and a timestamp.
96 Returns
97 -------
98 butler : `Butler`
99 Butler client instance compatible with all `classmethod` factories.
100 Always writeable.
101 """
102 if output_run is None:
103 output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
104 # Make initial butler with no collections, since we haven't created
105 # them yet.
106 butler = Butler(root, writeable=True)
107 butler.registry.registerCollection(output_run, CollectionType.RUN)
108 butler.registry.registerCollection(output, CollectionType.CHAINED)
109 collections = [output_run]
110 collections.extend(inputs)
111 butler.registry.setCollectionChain(output, collections)
112 # Remake butler to let it infer default data IDs from collections, now
113 # that those collections exist.
114 return Butler(butler=butler, collections=[output], run=output_run)
116 @classmethod
117 def from_pipeline_filename(
118 cls,
119 pipeline_filename: str,
120 *,
121 where: str = "",
122 bind: Optional[Mapping[str, Any]] = None,
123 butler: Butler,
124 ) -> SimplePipelineExecutor:
125 """Create an executor by building a QuantumGraph from an on-disk
126 pipeline YAML file.
128 Parameters
129 ----------
130 pipeline_filename : `str`
131 Name of the YAML file to load the pipeline definition from.
132 where : `str`, optional
133 Data ID query expression that constraints the quanta generated.
134 bind : `Mapping`, optional
135 Mapping containing literal values that should be injected into the
136 ``where`` expression, keyed by the identifiers they replace.
137 butler : `Butler`
138 Butler that manages all I/O. `prep_butler` can be used to create
139 one.
141 Returns
142 -------
143 executor : `SimplePipelineExecutor`
144 An executor instance containing the constructed `QuantumGraph` and
145 `Butler`, ready for `run` to be called.
146 """
147 pipeline = Pipeline.fromFile(pipeline_filename)
148 return cls.from_pipeline(pipeline, butler=butler, where=where, bind=bind)
150 @classmethod
151 def from_task_class(
152 cls,
153 task_class: Type[PipelineTask],
154 config: Optional[Config] = None,
155 label: Optional[str] = None,
156 *,
157 where: str = "",
158 bind: Optional[Mapping[str, Any]] = None,
159 butler: Butler,
160 ) -> SimplePipelineExecutor:
161 """Create an executor by building a QuantumGraph from a pipeline
162 containing a single task.
164 Parameters
165 ----------
166 task_class : `type`
167 A concrete `PipelineTask` subclass.
168 config : `Config`, optional
169 Configuration for the task. If not provided, task-level defaults
170 will be used (no per-instrument overrides).
171 label : `str`, optional
172 Label for the task in its pipeline; defaults to
173 ``task_class._DefaultName``.
174 where : `str`, optional
175 Data ID query expression that constraints the quanta generated.
176 bind : `Mapping`, optional
177 Mapping containing literal values that should be injected into the
178 ``where`` expression, keyed by the identifiers they replace.
179 butler : `Butler`
180 Butler that manages all I/O. `prep_butler` can be used to create
181 one.
183 Returns
184 -------
185 executor : `SimplePipelineExecutor`
186 An executor instance containing the constructed `QuantumGraph` and
187 `Butler`, ready for `run` to be called.
188 """
189 if config is None:
190 config = task_class.ConfigClass()
191 if label is None:
192 label = task_class._DefaultName
193 if not isinstance(config, task_class.ConfigClass):
194 raise TypeError(
195 f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
196 f"got {type(config).__name__}."
197 )
198 task_def = TaskDef(taskName=task_class.__name__, config=config, label=label, taskClass=task_class)
199 return cls.from_pipeline([task_def], butler=butler, where=where, bind=bind)
201 @classmethod
202 def from_pipeline(
203 cls,
204 pipeline: Union[Pipeline, Iterable[TaskDef]],
205 *,
206 where: str = "",
207 bind: Optional[Mapping[str, Any]] = None,
208 butler: Butler,
209 **kwargs: Any,
210 ) -> SimplePipelineExecutor:
211 """Create an executor by building a QuantumGraph from an in-memory
212 pipeline.
214 Parameters
215 ----------
216 pipeline : `Pipeline` or `Iterable` [ `TaskDef` ]
217 A Python object describing the tasks to run, along with their
218 labels and configuration.
219 where : `str`, optional
220 Data ID query expression that constraints the quanta generated.
221 bind : `Mapping`, optional
222 Mapping containing literal values that should be injected into the
223 ``where`` expression, keyed by the identifiers they replace.
224 butler : `Butler`
225 Butler that manages all I/O. `prep_butler` can be used to create
226 one.
228 Returns
229 -------
230 executor : `SimplePipelineExecutor`
231 An executor instance containing the constructed `QuantumGraph` and
232 `Butler`, ready for `run` to be called.
233 """
234 if isinstance(pipeline, Pipeline):
235 pipeline = list(pipeline.toExpandedPipeline())
236 else:
237 pipeline = list(pipeline)
238 graph_builder = GraphBuilder(butler.registry)
239 quantum_graph = graph_builder.makeGraph(
240 pipeline, collections=butler.collections, run=butler.run, userQuery=where, bind=bind
241 )
242 return cls(quantum_graph=quantum_graph, butler=butler)
244 def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> List[Quantum]:
245 """Run all the quanta in the `QuantumGraph` in topological order.
247 Use this method to run all quanta in the graph. Use
248 `as_generator` to get a generator to run the quanta one at
249 a time.
251 Parameters
252 ----------
253 register_dataset_types : `bool`, optional
254 If `True`, register all output dataset types before executing any
255 quanta.
256 save_versions : `bool`, optional
257 If `True` (default), save a package versions dataset.
259 Returns
260 -------
261 quanta : `List` [ `Quantum` ]
262 Executed quanta. At present, these will contain only unresolved
263 `DatasetRef` instances for output datasets, reflecting the state of
264 the quantum just before it was run (but after any adjustments for
265 predicted but now missing inputs). This may change in the future
266 to include resolved output `DatasetRef` objects.
268 Notes
269 -----
270 A topological ordering is not in general unique, but no other
271 guarantees are made about the order in which quanta are processed.
272 """
273 return list(
274 self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
275 )
277 def as_generator(
278 self, register_dataset_types: bool = False, save_versions: bool = True
279 ) -> Iterator[Quantum]:
280 """Yield quanta in the `QuantumGraph` in topological order.
282 These quanta will be run as the returned generator is iterated
283 over. Use this method to run the quanta one at a time.
284 Use `run` to run all quanta in the graph.
286 Parameters
287 ----------
288 register_dataset_types : `bool`, optional
289 If `True`, register all output dataset types before executing any
290 quanta.
291 save_versions : `bool`, optional
292 If `True` (default), save a package versions dataset.
294 Returns
295 -------
296 quanta : `Iterator` [ `Quantum` ]
297 Executed quanta. At present, these will contain only unresolved
298 `DatasetRef` instances for output datasets, reflecting the state of
299 the quantum just before it was run (but after any adjustments for
300 predicted but now missing inputs). This may change in the future
301 to include resolved output `DatasetRef` objects.
304 Notes
305 -----
306 Global initialization steps (see `PreExecInit`) are performed
307 immediately when this method is called, but individual quanta are not
308 actually executed until the returned iterator is iterated over.
310 A topological ordering is not in general unique, but no other
311 guarantees are made about the order in which quanta are processed.
312 """
313 task_factory = TaskFactory()
314 pre_exec_init = PreExecInit(self.butler, task_factory)
315 pre_exec_init.initialize(
316 graph=self.quantum_graph, registerDatasetTypes=register_dataset_types, saveVersions=save_versions
317 )
318 single_quantum_executor = SingleQuantumExecutor(self.butler, task_factory)
319 # Important that this returns a generator expression rather than being
320 # a generator itself; that is what makes the PreExecInit stuff above
321 # happen immediately instead of when the first quanta is executed,
322 # which might be useful for callers who want to check the state of the
323 # repo in between.
324 return (single_quantum_executor.execute(qnode.taskDef, qnode.quantum) for qnode in self.quantum_graph)