Coverage for tests/test_simple_pipeline_executor.py: 26%
135 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:40 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:40 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import tempfile
27import unittest
28from typing import Any, Dict
30import lsst.daf.butler
31import lsst.utils.tests
32from lsst.ctrl.mpexec import SimplePipelineExecutor
33from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes
34from lsst.pipe.base.tests.no_dimensions import (
35 NoDimensionsTestConfig,
36 NoDimensionsTestConnections,
37 NoDimensionsTestTask,
38)
39from lsst.utils.introspection import get_full_type_name
41TESTDIR = os.path.abspath(os.path.dirname(__file__))
44class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()):
45 input = connectionTypes.Input(
46 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike"
47 )
50class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2):
51 pass
54class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()):
55 # Deliberately choose a storage class that does not match the metadata
56 # default TaskMetadata storage class.
57 meta = connectionTypes.Input(
58 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict"
59 )
62class NoDimensionsMetadataTestConfig(
63 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections
64):
65 pass
68class NoDimensionsMetadataTestTask(NoDimensionsTestTask):
69 """A simple pipeline task that can take a metadata as input."""
71 ConfigClass = NoDimensionsMetadataTestConfig
72 _DefaultName = "noDimensionsMetadataTest"
74 def run(self, input: Dict[str, int], meta: Dict[str, Any]) -> Struct:
75 """Run the task, adding the configured key-value pair to the input
76 argument and returning it as the output.
78 Parameters
79 ----------
80 input : `dict`
81 Dictionary to update and return.
83 Returns
84 -------
85 result : `lsst.pipe.base.Struct`
86 Struct with a single ``output`` attribute.
87 """
88 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input))
89 output = input.copy()
90 output[self.config.key] = self.config.value
92 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta)
94 # Can change the return type via configuration.
95 if "TaskMetadata" in self.config.outputSC:
96 output = TaskMetadata.from_dict(output)
97 elif type(output) == TaskMetadata:
98 # Want the output to be a dict
99 output = output.to_dict()
100 self.log.info("Run method returns data of type: %s", get_full_type_name(output))
101 return Struct(output=output)
104class SimplePipelineExecutorTests(lsst.utils.tests.TestCase):
105 """Test the SimplePipelineExecutor API with a trivial task."""
107 def setUp(self):
108 self.path = tempfile.mkdtemp()
109 # standalone parameter forces the returned config to also include
110 # the information from the search paths.
111 config = lsst.daf.butler.Butler.makeRepo(
112 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")]
113 )
114 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake")
115 self.butler.registry.registerDatasetType(
116 lsst.daf.butler.DatasetType(
117 "input",
118 dimensions=self.butler.dimensions.empty,
119 storageClass="StructuredDataDict",
120 )
121 )
122 self.butler.put({"zero": 0}, "input")
124 def tearDown(self):
125 shutil.rmtree(self.path, ignore_errors=True)
127 def test_from_task_class(self):
128 """Test executing a single quantum with an executor created by the
129 `from_task_class` factory method, and the
130 `SimplePipelineExecutor.as_generator` method.
131 """
132 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler)
133 (quantum,) = executor.as_generator(register_dataset_types=True)
134 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1})
136 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None):
137 """Configure a pipeline with from_pipeline."""
139 config_a = config_a_cls()
140 config_a.connections.output = "intermediate"
141 if storageClass_a:
142 config_a.outputSC = storageClass_a
143 config_b = config_b_cls()
144 config_b.connections.input = "intermediate"
145 if storageClass_b:
146 config_b.outputSC = storageClass_b
147 config_b.key = "two"
148 config_b.value = 2
149 task_defs = [
150 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
151 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b),
152 ]
153 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
154 return executor
156 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b):
157 """Check the expected input types received by tasks A and B"""
158 all_logs = "\n".join(log_output)
159 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs)
160 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs)
161 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs)
162 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs)
164 def test_from_pipeline(self):
165 """Test executing a two quanta from different configurations of the
166 same task, with an executor created by the `from_pipeline` factory
167 method, and the `SimplePipelineExecutor.run` method.
168 """
169 executor = self._configure_pipeline(
170 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
171 )
173 with self.assertLogs("lsst", level="INFO") as cm:
174 quanta = executor.run(register_dataset_types=True, save_versions=False)
175 self._test_logs(cm.output, "dict", "dict", "dict", "dict")
177 self.assertEqual(len(quanta), 2)
178 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
179 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
181 def test_from_pipeline_intermediates_differ(self):
182 """Run pipeline but intermediates definition in registry differs."""
183 executor = self._configure_pipeline(
184 NoDimensionsTestTask.ConfigClass,
185 NoDimensionsTestTask.ConfigClass,
186 storageClass_b="TaskMetadataLike",
187 )
189 # Pre-define the "intermediate" storage class to be something that is
190 # like a dict but is not a dict. This will fail unless storage
191 # class conversion is supported in put and get.
192 self.butler.registry.registerDatasetType(
193 lsst.daf.butler.DatasetType(
194 "intermediate",
195 dimensions=self.butler.dimensions.empty,
196 storageClass="TaskMetadataLike",
197 )
198 )
200 with self.assertLogs("lsst", level="INFO") as cm:
201 quanta = executor.run(register_dataset_types=True, save_versions=False)
202 # A dict is given to task a without change.
203 # A returns a dict because it has not been told to do anything else.
204 # That does not match the storage class so it will be converted
205 # on put.
206 # b is given a dict, because that's what its connection asks for.
207 # b returns a TaskMetadata because that's how we configured it, and
208 # since its output wasn't registered in advance, it will have been
209 # registered as TaskMetadata and will now be received as TaskMetadata.
210 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata")
212 self.assertEqual(len(quanta), 2)
213 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1})
214 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
216 def test_from_pipeline_output_differ(self):
217 """Run pipeline but output definition in registry differs."""
218 executor = self._configure_pipeline(
219 NoDimensionsTestTask.ConfigClass,
220 NoDimensionsTestTask.ConfigClass,
221 storageClass_a="TaskMetadataLike",
222 )
224 # Pre-define the "output" storage class to be something that is
225 # like a dict but is not a dict. This will fail unless storage
226 # class conversion is supported in put and get.
227 self.butler.registry.registerDatasetType(
228 lsst.daf.butler.DatasetType(
229 "output",
230 dimensions=self.butler.dimensions.empty,
231 storageClass="TaskMetadataLike",
232 )
233 )
235 with self.assertLogs("lsst", level="INFO") as cm:
236 quanta = executor.run(register_dataset_types=True, save_versions=False)
237 # a has been told to return a TaskMetadata but will convert to dict.
238 # b returns a dict and that is converted to TaskMetadata on put.
239 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict")
241 self.assertEqual(len(quanta), 2)
242 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
243 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
245 def test_from_pipeline_input_differ(self):
246 """Run pipeline but input definition in registry differs."""
248 # This config declares that the pipeline takes a TaskMetadata
249 # as input but registry already thinks it has a StructureDataDict.
250 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass)
252 with self.assertLogs("lsst", level="INFO") as cm:
253 quanta = executor.run(register_dataset_types=True, save_versions=False)
254 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict")
256 self.assertEqual(len(quanta), 2)
257 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
258 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
260 def test_from_pipeline_incompatible(self):
261 """Run pipeline but definitions are not compatible."""
262 executor = self._configure_pipeline(
263 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
264 )
266 # Incompatible output dataset type.
267 self.butler.registry.registerDatasetType(
268 lsst.daf.butler.DatasetType(
269 "output",
270 dimensions=self.butler.dimensions.empty,
271 storageClass="StructuredDataList",
272 )
273 )
275 with self.assertRaisesRegex(
276 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList"
277 ):
278 executor.run(register_dataset_types=True, save_versions=False)
280 def test_from_pipeline_metadata(self):
281 """Test two tasks where the output uses metadata from input."""
282 # Must configure a special pipeline for this test.
283 config_a = NoDimensionsTestTask.ConfigClass()
284 config_a.connections.output = "intermediate"
285 config_b = NoDimensionsMetadataTestTask.ConfigClass()
286 config_b.connections.input = "intermediate"
287 config_b.key = "two"
288 config_b.value = 2
289 task_defs = [
290 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
291 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b),
292 ]
293 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
295 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm:
296 quanta = executor.run(register_dataset_types=True, save_versions=False)
297 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output))
299 self.assertEqual(len(quanta), 2)
300 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
301 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
303 def test_from_pipeline_file(self):
304 """Test executing a two quanta from different configurations of the
305 same task, with an executor created by the `from_pipeline_filename`
306 factory method, and the `SimplePipelineExecutor.run` method.
307 """
308 filename = os.path.join(self.path, "pipeline.yaml")
309 with open(filename, "w") as f:
310 f.write(
311 """
312 description: test
313 tasks:
314 a:
315 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
316 config:
317 connections.output: "intermediate"
318 b:
319 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
320 config:
321 connections.input: "intermediate"
322 key: "two"
323 value: 2
324 """
325 )
326 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler)
327 quanta = executor.run(register_dataset_types=True, save_versions=False)
328 self.assertEqual(len(quanta), 2)
329 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
330 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
333class MemoryTester(lsst.utils.tests.MemoryTestCase):
334 pass
337def setup_module(module):
338 lsst.utils.tests.init()
341if __name__ == "__main__":
342 lsst.utils.tests.init()
343 unittest.main()