Coverage for tests/test_simple_pipeline_executor.py: 25%
132 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 12:18 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-07 12:18 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28from __future__ import annotations
30import os
31import shutil
32import tempfile
33import unittest
34from typing import Any
36import lsst.daf.butler
37import lsst.utils.tests
38from lsst.ctrl.mpexec import SimplePipelineExecutor
39from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes
40from lsst.pipe.base.tests.no_dimensions import (
41 NoDimensionsTestConfig,
42 NoDimensionsTestConnections,
43 NoDimensionsTestTask,
44)
45from lsst.utils.introspection import get_full_type_name
47TESTDIR = os.path.abspath(os.path.dirname(__file__))
50class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()):
51 """A connections class used for testing."""
53 input = connectionTypes.Input(
54 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike"
55 )
58class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2):
59 """A config used for testing."""
62class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()):
63 """Test connection class for metadata.
65 Deliberately choose a storage class that does not match the metadata
66 default TaskMetadata storage class.
67 """
69 meta = connectionTypes.Input(
70 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict"
71 )
74class NoDimensionsMetadataTestConfig(
75 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections
76):
77 """A config used for testing the metadata."""
80class NoDimensionsMetadataTestTask(NoDimensionsTestTask):
81 """A simple pipeline task that can take a metadata as input."""
83 ConfigClass = NoDimensionsMetadataTestConfig
84 _DefaultName = "noDimensionsMetadataTest"
86 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct:
87 """Run the task, adding the configured key-value pair to the input
88 argument and returning it as the output.
90 Parameters
91 ----------
92 input : `dict`
93 Dictionary to update and return.
94 meta : `dict`
95 Metadata to add.
97 Returns
98 -------
99 result : `lsst.pipe.base.Struct`
100 Struct with a single ``output`` attribute.
101 """
102 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input))
103 output = input.copy()
104 output[self.config.key] = self.config.value
106 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta)
108 # Can change the return type via configuration.
109 if "TaskMetadata" in self.config.outputSC:
110 output = TaskMetadata.from_dict(output)
111 elif type(output) == TaskMetadata:
112 # Want the output to be a dict
113 output = output.to_dict()
114 self.log.info("Run method returns data of type: %s", get_full_type_name(output))
115 return Struct(output=output)
118class SimplePipelineExecutorTests(lsst.utils.tests.TestCase):
119 """Test the SimplePipelineExecutor API with a trivial task."""
121 def setUp(self):
122 self.path = tempfile.mkdtemp()
123 # standalone parameter forces the returned config to also include
124 # the information from the search paths.
125 config = lsst.daf.butler.Butler.makeRepo(
126 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")]
127 )
128 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake")
129 self.butler.registry.registerDatasetType(
130 lsst.daf.butler.DatasetType(
131 "input",
132 dimensions=self.butler.dimensions.empty,
133 storageClass="StructuredDataDict",
134 )
135 )
136 self.butler.put({"zero": 0}, "input")
138 def tearDown(self):
139 shutil.rmtree(self.path, ignore_errors=True)
141 def test_from_task_class(self):
142 """Test executing a single quantum with an executor created by the
143 `from_task_class` factory method, and the
144 `SimplePipelineExecutor.as_generator` method.
145 """
146 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler)
147 (quantum,) = executor.as_generator(register_dataset_types=True)
148 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1})
150 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None):
151 """Configure a pipeline with from_pipeline."""
152 config_a = config_a_cls()
153 config_a.connections.output = "intermediate"
154 if storageClass_a:
155 config_a.outputSC = storageClass_a
156 config_b = config_b_cls()
157 config_b.connections.input = "intermediate"
158 if storageClass_b:
159 config_b.outputSC = storageClass_b
160 config_b.key = "two"
161 config_b.value = 2
162 task_defs = [
163 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
164 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b),
165 ]
166 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
167 return executor
169 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b):
170 """Check the expected input types received by tasks A and B.
172 Note that these are the types as seen from the perspective of the task,
173 so they must be consistent with the task's connections, but may not be
174 consistent with the registry dataset types.
175 """
176 all_logs = "\n".join(log_output)
177 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs)
178 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs)
179 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs)
180 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs)
182 def test_from_pipeline(self):
183 """Test executing a two quanta from different configurations of the
184 same task, with an executor created by the `from_pipeline` factory
185 method, and the `SimplePipelineExecutor.run` method.
186 """
187 executor = self._configure_pipeline(
188 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
189 )
191 with self.assertLogs("lsst", level="INFO") as cm:
192 quanta = executor.run(register_dataset_types=True, save_versions=False)
193 self._test_logs(cm.output, "dict", "dict", "dict", "dict")
195 self.assertEqual(len(quanta), 2)
196 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
197 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
199 def test_from_pipeline_intermediates_differ(self):
200 """Run pipeline but intermediates definition in registry differs."""
201 # Pre-define the "intermediate" storage class to be something that is
202 # like a dict but is not a dict. This will fail unless storage
203 # class conversion is supported in put and get.
204 self.butler.registry.registerDatasetType(
205 lsst.daf.butler.DatasetType(
206 "intermediate",
207 dimensions=self.butler.dimensions.empty,
208 storageClass="TaskMetadataLike",
209 )
210 )
211 executor = self._configure_pipeline(
212 NoDimensionsTestTask.ConfigClass,
213 NoDimensionsTestTask.ConfigClass,
214 storageClass_b="TaskMetadataLike",
215 )
216 with self.assertLogs("lsst", level="INFO") as cm:
217 quanta = executor.run(register_dataset_types=True, save_versions=False)
218 # A dict is given to task a without change.
219 # A returns a dict because it has not been told to do anything else.
220 # That does not match the storage class so it will be converted
221 # on put.
222 # b is given a dict, because that's what its connection asks for.
223 # b returns a TaskMetadata because that's how we configured it, and
224 # since its output wasn't registered in advance, it will have been
225 # registered as TaskMetadata and will now be received as TaskMetadata.
226 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata")
228 self.assertEqual(len(quanta), 2)
229 self.assertEqual(self.butler.get("intermediate"), TaskMetadata.from_dict({"zero": 0, "one": 1}))
230 self.assertEqual(self.butler.get("output"), TaskMetadata.from_dict({"zero": 0, "one": 1, "two": 2}))
232 def test_from_pipeline_output_differ(self):
233 """Run pipeline but output definition in registry differs."""
234 # Pre-define the "output" storage class to be something that is
235 # like a dict but is not a dict. This will fail unless storage
236 # class conversion is supported in put and get.
237 self.butler.registry.registerDatasetType(
238 lsst.daf.butler.DatasetType(
239 "output",
240 dimensions=self.butler.dimensions.empty,
241 storageClass="TaskMetadataLike",
242 )
243 )
244 executor = self._configure_pipeline(
245 NoDimensionsTestTask.ConfigClass,
246 NoDimensionsTestTask.ConfigClass,
247 storageClass_a="TaskMetadataLike",
248 )
249 with self.assertLogs("lsst", level="INFO") as cm:
250 quanta = executor.run(register_dataset_types=True, save_versions=False)
251 # a has been told to return a TaskMetadata but this will convert to
252 # dict on read by b.
253 # b returns a dict and that is converted to TaskMetadata on put.
254 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict")
256 self.assertEqual(len(quanta), 2)
257 self.assertEqual(self.butler.get("intermediate"), TaskMetadata.from_dict({"zero": 0, "one": 1}))
258 self.assertEqual(self.butler.get("output"), TaskMetadata.from_dict({"zero": 0, "one": 1, "two": 2}))
260 def test_from_pipeline_input_differ(self):
261 """Run pipeline but input definition in registry differs."""
262 # This config declares that the pipeline takes a TaskMetadata
263 # as input but registry already thinks it has a StructureDataDict.
264 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass)
266 with self.assertLogs("lsst", level="INFO") as cm:
267 quanta = executor.run(register_dataset_types=True, save_versions=False)
268 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict")
270 self.assertEqual(len(quanta), 2)
271 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
272 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
274 def test_from_pipeline_inconsistent_dataset_types(self):
275 """Generate the QG (by initializing the executor), then register the
276 dataset type with a different storage class than the QG should have
277 predicted, to make sure execution fails as it should.
278 """
279 executor = self._configure_pipeline(
280 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass
281 )
283 # Incompatible output dataset type.
284 self.butler.registry.registerDatasetType(
285 lsst.daf.butler.DatasetType(
286 "output",
287 dimensions=self.butler.dimensions.empty,
288 storageClass="StructuredDataList",
289 )
290 )
292 with self.assertRaisesRegex(
293 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList"
294 ):
295 executor.run(register_dataset_types=True, save_versions=False)
297 def test_from_pipeline_metadata(self):
298 """Test two tasks where the output uses metadata from input."""
299 # Must configure a special pipeline for this test.
300 config_a = NoDimensionsTestTask.ConfigClass()
301 config_a.connections.output = "intermediate"
302 config_b = NoDimensionsMetadataTestTask.ConfigClass()
303 config_b.connections.input = "intermediate"
304 config_b.key = "two"
305 config_b.value = 2
306 task_defs = [
307 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a),
308 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b),
309 ]
310 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler)
312 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm:
313 quanta = executor.run(register_dataset_types=True, save_versions=False)
314 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output))
316 self.assertEqual(len(quanta), 2)
317 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
318 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
320 def test_from_pipeline_file(self):
321 """Test executing a two quanta from different configurations of the
322 same task, with an executor created by the `from_pipeline_filename`
323 factory method, and the `SimplePipelineExecutor.run` method.
324 """
325 filename = os.path.join(self.path, "pipeline.yaml")
326 with open(filename, "w") as f:
327 f.write(
328 """
329 description: test
330 tasks:
331 a:
332 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
333 config:
334 connections.output: "intermediate"
335 b:
336 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask"
337 config:
338 connections.input: "intermediate"
339 key: "two"
340 value: 2
341 """
342 )
343 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler)
344 quanta = executor.run(register_dataset_types=True, save_versions=False)
345 self.assertEqual(len(quanta), 2)
346 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1})
347 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2})
350class MemoryTester(lsst.utils.tests.MemoryTestCase):
351 """Generic tests for file leaks."""
354def setup_module(module):
355 """Set up the module for pytest.
357 Parameters
358 ----------
359 module : `~types.ModuleType`
360 Module to set up.
361 """
362 lsst.utils.tests.init()
365if __name__ == "__main__":
366 lsst.utils.tests.init()
367 unittest.main()