Coverage for tests/test_simple_pipeline_executor.py: 26%

135 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 09:15 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import tempfile 

27import unittest 

28from typing import Any, Dict 

29 

30import lsst.daf.butler 

31import lsst.utils.tests 

32from lsst.ctrl.mpexec import SimplePipelineExecutor 

33from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes 

34from lsst.pipe.base.tests.no_dimensions import ( 

35 NoDimensionsTestConfig, 

36 NoDimensionsTestConnections, 

37 NoDimensionsTestTask, 

38) 

39from lsst.utils.introspection import get_full_type_name 

40 

41TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

42 

43 

44class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()): 

45 input = connectionTypes.Input( 

46 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike" 

47 ) 

48 

49 

50class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2): 

51 pass 

52 

53 

54class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()): 

55 # Deliberately choose a storage class that does not match the metadata 

56 # default TaskMetadata storage class. 

57 meta = connectionTypes.Input( 

58 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict" 

59 ) 

60 

61 

62class NoDimensionsMetadataTestConfig( 

63 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections 

64): 

65 pass 

66 

67 

68class NoDimensionsMetadataTestTask(NoDimensionsTestTask): 

69 """A simple pipeline task that can take a metadata as input.""" 

70 

71 ConfigClass = NoDimensionsMetadataTestConfig 

72 _DefaultName = "noDimensionsMetadataTest" 

73 

74 def run(self, input: Dict[str, int], meta: Dict[str, Any]) -> Struct: 

75 """Run the task, adding the configured key-value pair to the input 

76 argument and returning it as the output. 

77 

78 Parameters 

79 ---------- 

80 input : `dict` 

81 Dictionary to update and return. 

82 

83 Returns 

84 ------- 

85 result : `lsst.pipe.base.Struct` 

86 Struct with a single ``output`` attribute. 

87 """ 

88 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input)) 

89 output = input.copy() 

90 output[self.config.key] = self.config.value 

91 

92 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta) 

93 

94 # Can change the return type via configuration. 

95 if "TaskMetadata" in self.config.outputSC: 

96 output = TaskMetadata.from_dict(output) 

97 elif type(output) == TaskMetadata: 

98 # Want the output to be a dict 

99 output = output.to_dict() 

100 self.log.info("Run method returns data of type: %s", get_full_type_name(output)) 

101 return Struct(output=output) 

102 

103 

104class SimplePipelineExecutorTests(lsst.utils.tests.TestCase): 

105 """Test the SimplePipelineExecutor API with a trivial task.""" 

106 

107 def setUp(self): 

108 self.path = tempfile.mkdtemp() 

109 # standalone parameter forces the returned config to also include 

110 # the information from the search paths. 

111 config = lsst.daf.butler.Butler.makeRepo( 

112 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")] 

113 ) 

114 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake") 

115 self.butler.registry.registerDatasetType( 

116 lsst.daf.butler.DatasetType( 

117 "input", 

118 dimensions=self.butler.dimensions.empty, 

119 storageClass="StructuredDataDict", 

120 ) 

121 ) 

122 self.butler.put({"zero": 0}, "input") 

123 

124 def tearDown(self): 

125 shutil.rmtree(self.path, ignore_errors=True) 

126 

127 def test_from_task_class(self): 

128 """Test executing a single quantum with an executor created by the 

129 `from_task_class` factory method, and the 

130 `SimplePipelineExecutor.as_generator` method. 

131 """ 

132 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler) 

133 (quantum,) = executor.as_generator(register_dataset_types=True) 

134 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1}) 

135 

136 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None): 

137 """Configure a pipeline with from_pipeline.""" 

138 

139 config_a = config_a_cls() 

140 config_a.connections.output = "intermediate" 

141 if storageClass_a: 

142 config_a.outputSC = storageClass_a 

143 config_b = config_b_cls() 

144 config_b.connections.input = "intermediate" 

145 if storageClass_b: 

146 config_b.outputSC = storageClass_b 

147 config_b.key = "two" 

148 config_b.value = 2 

149 task_defs = [ 

150 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

151 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b), 

152 ] 

153 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

154 return executor 

155 

156 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b): 

157 """Check the expected input types received by tasks A and B""" 

158 all_logs = "\n".join(log_output) 

159 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs) 

160 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs) 

161 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs) 

162 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs) 

163 

164 def test_from_pipeline(self): 

165 """Test executing a two quanta from different configurations of the 

166 same task, with an executor created by the `from_pipeline` factory 

167 method, and the `SimplePipelineExecutor.run` method. 

168 """ 

169 executor = self._configure_pipeline( 

170 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

171 ) 

172 

173 with self.assertLogs("lsst", level="INFO") as cm: 

174 quanta = executor.run(register_dataset_types=True, save_versions=False) 

175 self._test_logs(cm.output, "dict", "dict", "dict", "dict") 

176 

177 self.assertEqual(len(quanta), 2) 

178 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

179 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

180 

181 def test_from_pipeline_intermediates_differ(self): 

182 """Run pipeline but intermediates definition in registry differs.""" 

183 executor = self._configure_pipeline( 

184 NoDimensionsTestTask.ConfigClass, 

185 NoDimensionsTestTask.ConfigClass, 

186 storageClass_b="TaskMetadataLike", 

187 ) 

188 

189 # Pre-define the "intermediate" storage class to be something that is 

190 # like a dict but is not a dict. This will fail unless storage 

191 # class conversion is supported in put and get. 

192 self.butler.registry.registerDatasetType( 

193 lsst.daf.butler.DatasetType( 

194 "intermediate", 

195 dimensions=self.butler.dimensions.empty, 

196 storageClass="TaskMetadataLike", 

197 ) 

198 ) 

199 

200 with self.assertLogs("lsst", level="INFO") as cm: 

201 quanta = executor.run(register_dataset_types=True, save_versions=False) 

202 # A dict is given to task a without change. 

203 # A returns a dict because it has not been told to do anything else. 

204 # That does not match the storage class so it will be converted 

205 # on put. 

206 # b is given a dict, because that's what its connection asks for. 

207 # b returns a TaskMetadata because that's how we configured it, and 

208 # since its output wasn't registered in advance, it will have been 

209 # registered as TaskMetadata and will now be received as TaskMetadata. 

210 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata") 

211 

212 self.assertEqual(len(quanta), 2) 

213 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1}) 

214 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

215 

216 def test_from_pipeline_output_differ(self): 

217 """Run pipeline but output definition in registry differs.""" 

218 executor = self._configure_pipeline( 

219 NoDimensionsTestTask.ConfigClass, 

220 NoDimensionsTestTask.ConfigClass, 

221 storageClass_a="TaskMetadataLike", 

222 ) 

223 

224 # Pre-define the "output" storage class to be something that is 

225 # like a dict but is not a dict. This will fail unless storage 

226 # class conversion is supported in put and get. 

227 self.butler.registry.registerDatasetType( 

228 lsst.daf.butler.DatasetType( 

229 "output", 

230 dimensions=self.butler.dimensions.empty, 

231 storageClass="TaskMetadataLike", 

232 ) 

233 ) 

234 

235 with self.assertLogs("lsst", level="INFO") as cm: 

236 quanta = executor.run(register_dataset_types=True, save_versions=False) 

237 # a has been told to return a TaskMetadata but will convert to dict. 

238 # b returns a dict and that is converted to TaskMetadata on put. 

239 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict") 

240 

241 self.assertEqual(len(quanta), 2) 

242 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

243 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

244 

245 def test_from_pipeline_input_differ(self): 

246 """Run pipeline but input definition in registry differs.""" 

247 

248 # This config declares that the pipeline takes a TaskMetadata 

249 # as input but registry already thinks it has a StructureDataDict. 

250 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass) 

251 

252 with self.assertLogs("lsst", level="INFO") as cm: 

253 quanta = executor.run(register_dataset_types=True, save_versions=False) 

254 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict") 

255 

256 self.assertEqual(len(quanta), 2) 

257 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

258 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

259 

260 def test_from_pipeline_incompatible(self): 

261 """Run pipeline but definitions are not compatible.""" 

262 executor = self._configure_pipeline( 

263 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

264 ) 

265 

266 # Incompatible output dataset type. 

267 self.butler.registry.registerDatasetType( 

268 lsst.daf.butler.DatasetType( 

269 "output", 

270 dimensions=self.butler.dimensions.empty, 

271 storageClass="StructuredDataList", 

272 ) 

273 ) 

274 

275 with self.assertRaisesRegex( 

276 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList" 

277 ): 

278 executor.run(register_dataset_types=True, save_versions=False) 

279 

280 def test_from_pipeline_metadata(self): 

281 """Test two tasks where the output uses metadata from input.""" 

282 # Must configure a special pipeline for this test. 

283 config_a = NoDimensionsTestTask.ConfigClass() 

284 config_a.connections.output = "intermediate" 

285 config_b = NoDimensionsMetadataTestTask.ConfigClass() 

286 config_b.connections.input = "intermediate" 

287 config_b.key = "two" 

288 config_b.value = 2 

289 task_defs = [ 

290 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

291 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b), 

292 ] 

293 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

294 

295 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm: 

296 quanta = executor.run(register_dataset_types=True, save_versions=False) 

297 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output)) 

298 

299 self.assertEqual(len(quanta), 2) 

300 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

301 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

302 

303 def test_from_pipeline_file(self): 

304 """Test executing a two quanta from different configurations of the 

305 same task, with an executor created by the `from_pipeline_filename` 

306 factory method, and the `SimplePipelineExecutor.run` method. 

307 """ 

308 filename = os.path.join(self.path, "pipeline.yaml") 

309 with open(filename, "w") as f: 

310 f.write( 

311 """ 

312 description: test 

313 tasks: 

314 a: 

315 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

316 config: 

317 connections.output: "intermediate" 

318 b: 

319 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

320 config: 

321 connections.input: "intermediate" 

322 key: "two" 

323 value: 2 

324 """ 

325 ) 

326 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler) 

327 quanta = executor.run(register_dataset_types=True, save_versions=False) 

328 self.assertEqual(len(quanta), 2) 

329 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

330 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

331 

332 

333class MemoryTester(lsst.utils.tests.MemoryTestCase): 

334 pass 

335 

336 

337def setup_module(module): 

338 lsst.utils.tests.init() 

339 

340 

341if __name__ == "__main__": 

342 lsst.utils.tests.init() 

343 unittest.main()