Coverage for tests/test_simple_pipeline_executor.py: 25%
132 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-18 10:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-18 10:51 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28from __future__ import annotations
30import os
31import shutil
32import tempfile
33import unittest
34from typing import Any
36import lsst.daf.butler
37import lsst.utils.tests
38from lsst.ctrl.mpexec import SimplePipelineExecutor
39from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes
40from lsst.pipe.base.tests.no_dimensions import (
41 NoDimensionsTestConfig,
42 NoDimensionsTestConnections,
43 NoDimensionsTestTask,
44)
45from lsst.utils.introspection import get_full_type_name
47TESTDIR = os.path.abspath(os.path.dirname(__file__))
50class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()):
51 """A connections class used for testing."""
53 input = connectionTypes.Input(
54 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike"
55 )
58class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2):
59 """A config used for testing."""
62class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()):
63 """Test connection class for metadata.
65 Deliberately choose a storage class that does not match the metadata
66 default TaskMetadata storage class.
67 """
69 meta = connectionTypes.Input(
70 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict"
71 )
74class NoDimensionsMetadataTestConfig(
75 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections
76):
77 """A config used for testing the metadata."""
80class NoDimensionsMetadataTestTask(NoDimensionsTestTask):
81 """A simple pipeline task that can take a metadata as input."""
83 ConfigClass = NoDimensionsMetadataTestConfig
84 _DefaultName = "noDimensionsMetadataTest"
86 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct:
87 """Run the task, adding the configured key-value pair to the input
88 argument and returning it as the output.
90 Parameters
91 ----------
92 input : `dict`
93 Dictionary to update and return.
95 Returns
96 -------
97 result : `lsst.pipe.base.Struct`
98 Struct with a single ``output`` attribute.
99 """
100 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input))
101 output = input.copy()
102 output[self.config.key] = self.config.value
104 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta)
106 # Can change the return type via configuration.
107 if "TaskMetadata" in self.config.outputSC:
108 output = TaskMetadata.from_dict(output)
109 elif type(output) == TaskMetadata:
110 # Want the output to be a dict
111 output = output.to_dict()
112 self.log.info("Run method returns data of type: %s", get_full_type_name(output))
113 return Struct(output=output)
116class SimplePipelineExecutorTests(lsst.utils.tests.TestCase):
117 """Test the SimplePipelineExecutor API with a trivial task."""
119 def setUp(self):
120 self.path = tempfile.mkdtemp()
121 # standalone parameter forces the returned config to also include
122 # the information from the search paths.
123 config = lsst.daf.butler.Butler.makeRepo(
124 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")]
125 )
126 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake")
127 self.butler.registry.registerDatasetType(
128 lsst.daf.butler.DatasetType(
129 "input",
130 dimensions=self.butler.dimensions.empty,
131 storageClass="StructuredDataDict",
132 )
133 )
134 self.butler.put({"zero": 0}, "input")
136 def tearDown(self):
137 shutil.rmtree(self.path, ignore_errors=True)
139 def test_from_task_class(self):
140 """Test executing a single quantum with an executor created by the
141 `from_task_class` factory method, and the
142 `SimplePipelineExecutor.as_generator` method.
143 """
144 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler)
145 (quantum,) = executor.as_generator(register_dataset_types=True)
146 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1})
148 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None):
149 """Configure a pipeline with from_pipeline."""
150 config_a = config_a_cls()
151 config_a.connections.output = "intermediate"
152 if storageClass_a:
153 config_a.outputSC = storageClass_a
154 config_b = config_b_cls()
155 config_b.connections.input = "intermediate"
156 if storageClass_b:
157 config_b.outputSC = storageClass_b
158 config_b.key = "two"
159 config_b.value = 2
160 task_defs = [
161 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
162 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b),
163 ]
164 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
165 return executor
167 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b):
168 """Check the expected input types received by tasks A and B"""
169 all_logs = "\n".join(log_output)
170 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs)
171 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs)
172 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs)
173 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs)
175 def test_from_pipeline(self):
176 """Test executing a two quanta from different configurations of the
177 same task, with an executor created by the `from_pipeline` factory
178 method, and the `SimplePipelineExecutor.run` method.
179 """
180 executor = self._configure_pipeline(
181 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
182 )
184 with self.assertLogs("lsst", level="INFO") as cm:
185 quanta = executor.run(register_dataset_types=True, save_versions=False)
186 self._test_logs(cm.output, "dict", "dict", "dict", "dict")
188 self.assertEqual(len(quanta), 2)
189 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
190 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
192 def test_from_pipeline_intermediates_differ(self):
193 """Run pipeline but intermediates definition in registry differs."""
194 executor = self._configure_pipeline(
195 NoDimensionsTestTask.ConfigClass,
196 NoDimensionsTestTask.ConfigClass,
197 storageClass_b="TaskMetadataLike",
198 )
200 # Pre-define the "intermediate" storage class to be something that is
201 # like a dict but is not a dict. This will fail unless storage
202 # class conversion is supported in put and get.
203 self.butler.registry.registerDatasetType(
204 lsst.daf.butler.DatasetType(
205 "intermediate",
206 dimensions=self.butler.dimensions.empty,
207 storageClass="TaskMetadataLike",
208 )
209 )
211 with self.assertLogs("lsst", level="INFO") as cm:
212 quanta = executor.run(register_dataset_types=True, save_versions=False)
213 # A dict is given to task a without change.
214 # A returns a dict because it has not been told to do anything else.
215 # That does not match the storage class so it will be converted
216 # on put.
217 # b is given a dict, because that's what its connection asks for.
218 # b returns a TaskMetadata because that's how we configured it, and
219 # since its output wasn't registered in advance, it will have been
220 # registered as TaskMetadata and will now be received as TaskMetadata.
221 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata")
223 self.assertEqual(len(quanta), 2)
224 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1})
225 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
227 def test_from_pipeline_output_differ(self):
228 """Run pipeline but output definition in registry differs."""
229 executor = self._configure_pipeline(
230 NoDimensionsTestTask.ConfigClass,
231 NoDimensionsTestTask.ConfigClass,
232 storageClass_a="TaskMetadataLike",
233 )
235 # Pre-define the "output" storage class to be something that is
236 # like a dict but is not a dict. This will fail unless storage
237 # class conversion is supported in put and get.
238 self.butler.registry.registerDatasetType(
239 lsst.daf.butler.DatasetType(
240 "output",
241 dimensions=self.butler.dimensions.empty,
242 storageClass="TaskMetadataLike",
243 )
244 )
246 with self.assertLogs("lsst", level="INFO") as cm:
247 quanta = executor.run(register_dataset_types=True, save_versions=False)
248 # a has been told to return a TaskMetadata but will convert to dict.
249 # b returns a dict and that is converted to TaskMetadata on put.
250 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict")
252 self.assertEqual(len(quanta), 2)
253 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
254 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2})
256 def test_from_pipeline_input_differ(self):
257 """Run pipeline but input definition in registry differs."""
258 # This config declares that the pipeline takes a TaskMetadata
259 # as input but registry already thinks it has a StructureDataDict.
260 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass)
262 with self.assertLogs("lsst", level="INFO") as cm:
263 quanta = executor.run(register_dataset_types=True, save_versions=False)
264 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict")
266 self.assertEqual(len(quanta), 2)
267 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
268 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
270 def test_from_pipeline_incompatible(self):
271 """Run pipeline but definitions are not compatible."""
272 executor = self._configure_pipeline(
273 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
274 )
276 # Incompatible output dataset type.
277 self.butler.registry.registerDatasetType(
278 lsst.daf.butler.DatasetType(
279 "output",
280 dimensions=self.butler.dimensions.empty,
281 storageClass="StructuredDataList",
282 )
283 )
285 with self.assertRaisesRegex(
286 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList"
287 ):
288 executor.run(register_dataset_types=True, save_versions=False)
290 def test_from_pipeline_metadata(self):
291 """Test two tasks where the output uses metadata from input."""
292 # Must configure a special pipeline for this test.
293 config_a = NoDimensionsTestTask.ConfigClass()
294 config_a.connections.output = "intermediate"
295 config_b = NoDimensionsMetadataTestTask.ConfigClass()
296 config_b.connections.input = "intermediate"
297 config_b.key = "two"
298 config_b.value = 2
299 task_defs = [
300 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
301 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b),
302 ]
303 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
305 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm:
306 quanta = executor.run(register_dataset_types=True, save_versions=False)
307 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output))
309 self.assertEqual(len(quanta), 2)
310 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
311 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
313 def test_from_pipeline_file(self):
314 """Test executing a two quanta from different configurations of the
315 same task, with an executor created by the `from_pipeline_filename`
316 factory method, and the `SimplePipelineExecutor.run` method.
317 """
318 filename = os.path.join(self.path, "pipeline.yaml")
319 with open(filename, "w") as f:
320 f.write(
321 """
322 description: test
323 tasks:
324 a:
325 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
326 config:
327 connections.output: "intermediate"
328 b:
329 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
330 config:
331 connections.input: "intermediate"
332 key: "two"
333 value: 2
334 """
335 )
336 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler)
337 quanta = executor.run(register_dataset_types=True, save_versions=False)
338 self.assertEqual(len(quanta), 2)
339 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
340 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
343class MemoryTester(lsst.utils.tests.MemoryTestCase):
344 """Generic tests for file leaks."""
347def setup_module(module):
348 """Set up the module for pytest."""
349 lsst.utils.tests.init()
352if __name__ == "__main__":
353 lsst.utils.tests.init()
354 unittest.main()