Coverage for tests/test_simple_pipeline_executor.py: 30%
145 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-04 02:07 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-04 02:07 -0800
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import os
23import shutil
24import tempfile
25import unittest
26from typing import Any, Dict
28import lsst.daf.butler
29import lsst.utils.tests
30from lsst.ctrl.mpexec import SimplePipelineExecutor
31from lsst.pex.config import Field
32from lsst.pipe.base import (
33 PipelineTask,
34 PipelineTaskConfig,
35 PipelineTaskConnections,
36 Struct,
37 TaskDef,
38 TaskMetadata,
39 connectionTypes,
40)
41from lsst.pipe.base.tests.no_dimensions import NoDimensionsTestTask
42from lsst.utils.introspection import get_full_type_name
44TESTDIR = os.path.abspath(os.path.dirname(__file__))
47class NoDimensionsTestConnections2(PipelineTaskConnections, dimensions=set()):
48 input = connectionTypes.Input(
49 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike"
50 )
51 output = connectionTypes.Output(
52 name="output", doc="some dict-y output data for testing", storageClass="StructuredDataDict"
53 )
56class NoDimensionsTestConfig2(PipelineTaskConfig, pipelineConnections=NoDimensionsTestConnections2):
57 key = Field(dtype=str, doc="String key for the dict entry the task sets.", default="one")
58 value = Field(dtype=int, doc="Integer value for the dict entry the task sets.", default=1)
59 outputSC = Field(dtype=str, doc="Output storage class requested", default="dict")
62class NoDimensionsMetadataTestConnections(PipelineTaskConnections, dimensions=set()):
63 input = connectionTypes.Input(
64 name="input", doc="some dict-y input data for testing", storageClass="StructuredDataDict"
65 )
66 # Deliberately choose a storage class that does not match the metadata
67 # default TaskMetadata storage class.
68 meta = connectionTypes.Input(
69 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict"
70 )
71 output = connectionTypes.Output(
72 name="output", doc="some dict-y output data for testing", storageClass="StructuredDataDict"
73 )
76class NoDimensionsMetadataTestConfig(
77 PipelineTaskConfig, pipelineConnections=NoDimensionsMetadataTestConnections
78):
79 key = Field(dtype=str, doc="String key for the dict entry the task sets.", default="one")
80 value = Field(dtype=int, doc="Integer value for the dict entry the task sets.", default=1)
81 outputSC = Field(dtype=str, doc="Output storage class requested", default="dict")
84class NoDimensionsMetadataTestTask(PipelineTask):
85 """A simple pipeline task that can take a metadata as input."""
87 ConfigClass = NoDimensionsMetadataTestConfig
88 _DefaultName = "noDimensionsMetadataTest"
90 def run(self, input: Dict[str, int], meta: Dict[str, Any]) -> Struct:
91 """Run the task, adding the configured key-value pair to the input
92 argument and returning it as the output.
94 Parameters
95 ----------
96 input : `dict`
97 Dictionary to update and return.
99 Returns
100 -------
101 result : `lsst.pipe.base.Struct`
102 Struct with a single ``output`` attribute.
103 """
104 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input))
105 output = input.copy()
106 output[self.config.key] = self.config.value
108 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta)
110 # Can change the return type via configuration.
111 if "TaskMetadata" in self.config.outputSC:
112 output = TaskMetadata.from_dict(output)
113 elif type(output) == TaskMetadata:
114 # Want the output to be a dict
115 output = output.to_dict()
116 self.log.info("Run method returns data of type: %s", get_full_type_name(output))
117 return Struct(output=output)
120class SimplePipelineExecutorTests(lsst.utils.tests.TestCase):
121 """Test the SimplePipelineExecutor API with a trivial task."""
123 def setUp(self):
124 self.path = tempfile.mkdtemp()
125 # standalone parameter forces the returned config to also include
126 # the information from the search paths.
127 config = lsst.daf.butler.Butler.makeRepo(
128 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")]
129 )
130 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake")
131 self.butler.registry.registerDatasetType(
132 lsst.daf.butler.DatasetType(
133 "input",
134 dimensions=self.butler.registry.dimensions.empty,
135 storageClass="StructuredDataDict",
136 )
137 )
138 self.butler.put({"zero": 0}, "input")
140 def tearDown(self):
141 shutil.rmtree(self.path, ignore_errors=True)
143 def test_from_task_class(self):
144 """Test executing a single quantum with an executor created by the
145 `from_task_class` factory method, and the
146 `SimplePipelineExecutor.as_generator` method.
147 """
148 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler)
149 (quantum,) = executor.as_generator(register_dataset_types=True)
150 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1})
152 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None):
153 """Configure a pipeline with from_pipeline."""
155 config_a = config_a_cls()
156 config_a.connections.output = "intermediate"
157 if storageClass_a:
158 config_a.outputSC = storageClass_a
159 config_b = config_b_cls()
160 config_b.connections.input = "intermediate"
161 if storageClass_b:
162 config_b.outputSC = storageClass_b
163 config_b.key = "two"
164 config_b.value = 2
165 task_defs = [
166 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
167 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b),
168 ]
169 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
170 return executor
172 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b):
173 """Check the expected input types received by tasks A and B"""
174 all_logs = "\n".join(log_output)
175 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs)
176 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs)
177 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs)
178 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs)
180 def test_from_pipeline(self):
181 """Test executing a two quanta from different configurations of the
182 same task, with an executor created by the `from_pipeline` factory
183 method, and the `SimplePipelineExecutor.run` method.
184 """
185 executor = self._configure_pipeline(
186 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
187 )
189 with self.assertLogs("lsst", level="INFO") as cm:
190 quanta = executor.run(register_dataset_types=True, save_versions=False)
191 self._test_logs(cm.output, "dict", "dict", "dict", "dict")
193 self.assertEqual(len(quanta), 2)
194 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
195 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
197 def test_from_pipeline_intermediates_differ(self):
198 """Run pipeline but intermediates definition in registry differs."""
199 executor = self._configure_pipeline(
200 NoDimensionsTestTask.ConfigClass,
201 NoDimensionsTestTask.ConfigClass,
202 storageClass_b="TaskMetadataLike",
203 )
205 # Pre-define the "intermediate" storage class to be something that is
206 # like a dict but is not a dict. This will fail unless storage
207 # class conversion is supported in put and get.
208 self.butler.registry.registerDatasetType(
209 lsst.daf.butler.DatasetType(
210 "intermediate",
211 dimensions=self.butler.registry.dimensions.empty,
212 storageClass="TaskMetadataLike",
213 )
214 )
216 with self.assertLogs("lsst", level="INFO") as cm:
217 quanta = executor.run(register_dataset_types=True, save_versions=False)
218 # A dict is given to task a without change.
219 # A returns a dict because it has not been told to do anything else.
220 # That does not match the storage class so it will be converted
221 # on put.
222 # b is given a dict, because that's what its connection asks for.
223 # b returns a TaskMetadata because that's how we configured it, but
224 # the butler expects a dict so it is converted on put.
225 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata")
227 self.assertEqual(len(quanta), 2)
228 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1})
229 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
231 def test_from_pipeline_output_differ(self):
232 """Run pipeline but output definition in registry differs."""
233 executor = self._configure_pipeline(
234 NoDimensionsTestTask.ConfigClass,
235 NoDimensionsTestTask.ConfigClass,
236 storageClass_a="TaskMetadataLike",
237 )
239 # Pre-define the "output" storage class to be something that is
240 # like a dict but is not a dict. This will fail unless storage
241 # class conversion is supported in put and get.
242 self.butler.registry.registerDatasetType(
243 lsst.daf.butler.DatasetType(
244 "output",
245 dimensions=self.butler.registry.dimensions.empty,
246 storageClass="TaskMetadataLike",
247 )
248 )
250 with self.assertLogs("lsst", level="INFO") as cm:
251 quanta = executor.run(register_dataset_types=True, save_versions=False)
252 # a has been told to return a TaskMetadata but will convert to dict.
253 # b returns a dict and that is converted to TaskMetadata on put.
254 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict")
256 self.assertEqual(len(quanta), 2)
257 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
258 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
260 def test_from_pipeline_input_differ(self):
261 """Run pipeline but input definition in registry differs."""
263 # This config declares that the pipeline takes a TaskMetadata
264 # as input but registry already thinks it has a StructureDataDict.
265 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass)
267 with self.assertLogs("lsst", level="INFO") as cm:
268 quanta = executor.run(register_dataset_types=True, save_versions=False)
269 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict")
271 self.assertEqual(len(quanta), 2)
272 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
273 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
275 def test_from_pipeline_incompatible(self):
276 """Run pipeline but definitions are not compatible."""
277 executor = self._configure_pipeline(
278 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
279 )
281 # Incompatible output dataset type.
282 self.butler.registry.registerDatasetType(
283 lsst.daf.butler.DatasetType(
284 "output",
285 dimensions=self.butler.registry.dimensions.empty,
286 storageClass="StructuredDataList",
287 )
288 )
290 with self.assertRaisesRegex(
291 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList"
292 ):
293 executor.run(register_dataset_types=True, save_versions=False)
295 def test_from_pipeline_metadata(self):
296 """Test two tasks where the output uses metadata from input."""
297 # Must configure a special pipeline for this test.
298 config_a = NoDimensionsTestTask.ConfigClass()
299 config_a.connections.output = "intermediate"
300 config_b = NoDimensionsMetadataTestTask.ConfigClass()
301 config_b.connections.input = "intermediate"
302 config_b.key = "two"
303 config_b.value = 2
304 task_defs = [
305 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
306 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b),
307 ]
308 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
310 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm:
311 quanta = executor.run(register_dataset_types=True, save_versions=False)
312 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output))
314 self.assertEqual(len(quanta), 2)
315 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
316 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
318 def test_from_pipeline_file(self):
319 """Test executing a two quanta from different configurations of the
320 same task, with an executor created by the `from_pipeline_filename`
321 factory method, and the `SimplePipelineExecutor.run` method.
322 """
323 filename = os.path.join(self.path, "pipeline.yaml")
324 with open(filename, "w") as f:
325 f.write(
326 """
327 description: test
328 tasks:
329 a:
330 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
331 config:
332 connections.output: "intermediate"
333 b:
334 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
335 config:
336 connections.input: "intermediate"
337 key: "two"
338 value: 2
339 """
340 )
341 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler)
342 quanta = executor.run(register_dataset_types=True, save_versions=False)
343 self.assertEqual(len(quanta), 2)
344 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
345 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
348class MemoryTester(lsst.utils.tests.MemoryTestCase):
349 pass
352def setup_module(module):
353 lsst.utils.tests.init()
356if __name__ == "__main__": 356 ↛ 357line 356 didn't jump to line 357, because the condition on line 356 was never true
357 lsst.utils.tests.init()
358 unittest.main()