Coverage for tests/test_simple_pipeline_executor.py: 25%
132 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-25 09:44 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-25 09:44 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import shutil
26import tempfile
27import unittest
28from typing import Any
30import lsst.daf.butler
31import lsst.utils.tests
32from lsst.ctrl.mpexec import SimplePipelineExecutor
33from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes
34from lsst.pipe.base.tests.no_dimensions import (
35 NoDimensionsTestConfig,
36 NoDimensionsTestConnections,
37 NoDimensionsTestTask,
38)
39from lsst.utils.introspection import get_full_type_name
41TESTDIR = os.path.abspath(os.path.dirname(__file__))
44class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()):
45 """A connections class used for testing."""
47 input = connectionTypes.Input(
48 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike"
49 )
52class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2):
53 """A config used for testing."""
56class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()):
57 """Test connection class for metadata.
59 Deliberately choose a storage class that does not match the metadata
60 default TaskMetadata storage class.
61 """
63 meta = connectionTypes.Input(
64 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict"
65 )
68class NoDimensionsMetadataTestConfig(
69 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections
70):
71 """A config used for testing the metadata."""
74class NoDimensionsMetadataTestTask(NoDimensionsTestTask):
75 """A simple pipeline task that can take a metadata as input."""
77 ConfigClass = NoDimensionsMetadataTestConfig
78 _DefaultName = "noDimensionsMetadataTest"
80 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct:
81 """Run the task, adding the configured key-value pair to the input
82 argument and returning it as the output.
84 Parameters
85 ----------
86 input : `dict`
87 Dictionary to update and return.
89 Returns
90 -------
91 result : `lsst.pipe.base.Struct`
92 Struct with a single ``output`` attribute.
93 """
94 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input))
95 output = input.copy()
96 output[self.config.key] = self.config.value
98 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta)
100 # Can change the return type via configuration.
101 if "TaskMetadata" in self.config.outputSC:
102 output = TaskMetadata.from_dict(output)
103 elif type(output) == TaskMetadata:
104 # Want the output to be a dict
105 output = output.to_dict()
106 self.log.info("Run method returns data of type: %s", get_full_type_name(output))
107 return Struct(output=output)
110class SimplePipelineExecutorTests(lsst.utils.tests.TestCase):
111 """Test the SimplePipelineExecutor API with a trivial task."""
113 def setUp(self):
114 self.path = tempfile.mkdtemp()
115 # standalone parameter forces the returned config to also include
116 # the information from the search paths.
117 config = lsst.daf.butler.Butler.makeRepo(
118 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")]
119 )
120 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake")
121 self.butler.registry.registerDatasetType(
122 lsst.daf.butler.DatasetType(
123 "input",
124 dimensions=self.butler.dimensions.empty,
125 storageClass="StructuredDataDict",
126 )
127 )
128 self.butler.put({"zero": 0}, "input")
130 def tearDown(self):
131 shutil.rmtree(self.path, ignore_errors=True)
133 def test_from_task_class(self):
134 """Test executing a single quantum with an executor created by the
135 `from_task_class` factory method, and the
136 `SimplePipelineExecutor.as_generator` method.
137 """
138 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler)
139 (quantum,) = executor.as_generator(register_dataset_types=True)
140 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1})
142 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None):
143 """Configure a pipeline with from_pipeline."""
144 config_a = config_a_cls()
145 config_a.connections.output = "intermediate"
146 if storageClass_a:
147 config_a.outputSC = storageClass_a
148 config_b = config_b_cls()
149 config_b.connections.input = "intermediate"
150 if storageClass_b:
151 config_b.outputSC = storageClass_b
152 config_b.key = "two"
153 config_b.value = 2
154 task_defs = [
155 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
156 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b),
157 ]
158 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
159 return executor
161 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b):
162 """Check the expected input types received by tasks A and B"""
163 all_logs = "\n".join(log_output)
164 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs)
165 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs)
166 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs)
167 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs)
169 def test_from_pipeline(self):
170 """Test executing a two quanta from different configurations of the
171 same task, with an executor created by the `from_pipeline` factory
172 method, and the `SimplePipelineExecutor.run` method.
173 """
174 executor = self._configure_pipeline(
175 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
176 )
178 with self.assertLogs("lsst", level="INFO") as cm:
179 quanta = executor.run(register_dataset_types=True, save_versions=False)
180 self._test_logs(cm.output, "dict", "dict", "dict", "dict")
182 self.assertEqual(len(quanta), 2)
183 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
184 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
186 def test_from_pipeline_intermediates_differ(self):
187 """Run pipeline but intermediates definition in registry differs."""
188 executor = self._configure_pipeline(
189 NoDimensionsTestTask.ConfigClass,
190 NoDimensionsTestTask.ConfigClass,
191 storageClass_b="TaskMetadataLike",
192 )
194 # Pre-define the "intermediate" storage class to be something that is
195 # like a dict but is not a dict. This will fail unless storage
196 # class conversion is supported in put and get.
197 self.butler.registry.registerDatasetType(
198 lsst.daf.butler.DatasetType(
199 "intermediate",
200 dimensions=self.butler.dimensions.empty,
201 storageClass="TaskMetadataLike",
202 )
203 )
205 with self.assertLogs("lsst", level="INFO") as cm:
206 quanta = executor.run(register_dataset_types=True, save_versions=False)
207 # A dict is given to task a without change.
208 # A returns a dict because it has not been told to do anything else.
209 # That does not match the storage class so it will be converted
210 # on put.
211 # b is given a dict, because that's what its connection asks for.
212 # b returns a TaskMetadata because that's how we configured it, and
213 # since its output wasn't registered in advance, it will have been
214 # registered as TaskMetadata and will now be received as TaskMetadata.
215 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata")
217 self.assertEqual(len(quanta), 2)
218 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1})
219 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
221 def test_from_pipeline_output_differ(self):
222 """Run pipeline but output definition in registry differs."""
223 executor = self._configure_pipeline(
224 NoDimensionsTestTask.ConfigClass,
225 NoDimensionsTestTask.ConfigClass,
226 storageClass_a="TaskMetadataLike",
227 )
229 # Pre-define the "output" storage class to be something that is
230 # like a dict but is not a dict. This will fail unless storage
231 # class conversion is supported in put and get.
232 self.butler.registry.registerDatasetType(
233 lsst.daf.butler.DatasetType(
234 "output",
235 dimensions=self.butler.dimensions.empty,
236 storageClass="TaskMetadataLike",
237 )
238 )
240 with self.assertLogs("lsst", level="INFO") as cm:
241 quanta = executor.run(register_dataset_types=True, save_versions=False)
242 # a has been told to return a TaskMetadata but will convert to dict.
243 # b returns a dict and that is converted to TaskMetadata on put.
244 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict")
246 self.assertEqual(len(quanta), 2)
247 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
248 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
250 def test_from_pipeline_input_differ(self):
251 """Run pipeline but input definition in registry differs."""
252 # This config declares that the pipeline takes a TaskMetadata
253 # as input but registry already thinks it has a StructureDataDict.
254 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass)
256 with self.assertLogs("lsst", level="INFO") as cm:
257 quanta = executor.run(register_dataset_types=True, save_versions=False)
258 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict")
260 self.assertEqual(len(quanta), 2)
261 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
262 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
264 def test_from_pipeline_incompatible(self):
265 """Run pipeline but definitions are not compatible."""
266 executor = self._configure_pipeline(
267 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
268 )
270 # Incompatible output dataset type.
271 self.butler.registry.registerDatasetType(
272 lsst.daf.butler.DatasetType(
273 "output",
274 dimensions=self.butler.dimensions.empty,
275 storageClass="StructuredDataList",
276 )
277 )
279 with self.assertRaisesRegex(
280 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList"
281 ):
282 executor.run(register_dataset_types=True, save_versions=False)
284 def test_from_pipeline_metadata(self):
285 """Test two tasks where the output uses metadata from input."""
286 # Must configure a special pipeline for this test.
287 config_a = NoDimensionsTestTask.ConfigClass()
288 config_a.connections.output = "intermediate"
289 config_b = NoDimensionsMetadataTestTask.ConfigClass()
290 config_b.connections.input = "intermediate"
291 config_b.key = "two"
292 config_b.value = 2
293 task_defs = [
294 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
295 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b),
296 ]
297 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
299 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm:
300 quanta = executor.run(register_dataset_types=True, save_versions=False)
301 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output))
303 self.assertEqual(len(quanta), 2)
304 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
305 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
307 def test_from_pipeline_file(self):
308 """Test executing a two quanta from different configurations of the
309 same task, with an executor created by the `from_pipeline_filename`
310 factory method, and the `SimplePipelineExecutor.run` method.
311 """
312 filename = os.path.join(self.path, "pipeline.yaml")
313 with open(filename, "w") as f:
314 f.write(
315 """
316 description: test
317 tasks:
318 a:
319 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
320 config:
321 connections.output: "intermediate"
322 b:
323 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
324 config:
325 connections.input: "intermediate"
326 key: "two"
327 value: 2
328 """
329 )
330 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler)
331 quanta = executor.run(register_dataset_types=True, save_versions=False)
332 self.assertEqual(len(quanta), 2)
333 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
334 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
337class MemoryTester(lsst.utils.tests.MemoryTestCase):
338 """Generic tests for file leaks."""
341def setup_module(module):
342 """Set up the module for pytest."""
343 lsst.utils.tests.init()
346if __name__ == "__main__":
347 lsst.utils.tests.init()
348 unittest.main()