Coverage for tests/test_simple_pipeline_executor.py: 29%

142 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-09 02:48 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import tempfile 

25import unittest 

26from typing import Any, Dict 

27 

28import lsst.daf.butler 

29import lsst.utils.tests 

30from lsst.ctrl.mpexec import SimplePipelineExecutor 

31from lsst.pex.config import Field 

32from lsst.pipe.base import ( 

33 PipelineTask, 

34 PipelineTaskConfig, 

35 PipelineTaskConnections, 

36 Struct, 

37 TaskDef, 

38 TaskMetadata, 

39 connectionTypes, 

40) 

41from lsst.pipe.base.tests.no_dimensions import NoDimensionsTestTask 

42from lsst.utils.introspection import get_full_type_name 

43 

44TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

45 

46 

47class NoDimensionsTestConnections2(PipelineTaskConnections, dimensions=set()): 

48 input = connectionTypes.Input( 

49 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike" 

50 ) 

51 output = connectionTypes.Output( 

52 name="output", doc="some dict-y output data for testing", storageClass="StructuredDataDict" 

53 ) 

54 

55 

56class NoDimensionsTestConfig2(PipelineTaskConfig, pipelineConnections=NoDimensionsTestConnections2): 

57 key = Field(dtype=str, doc="String key for the dict entry the task sets.", default="one") 

58 value = Field(dtype=int, doc="Integer value for the dict entry the task sets.", default=1) 

59 outputSC = Field(dtype=str, doc="Output storage class requested", default="dict") 

60 

61 

62class NoDimensionsMetadataTestConnections(PipelineTaskConnections, dimensions=set()): 

63 input = connectionTypes.Input( 

64 name="input", doc="some dict-y input data for testing", storageClass="StructuredDataDict" 

65 ) 

66 # Deliberately choose a storage class that does not match the metadata 

67 # default TaskMetadata storage class. 

68 meta = connectionTypes.Input( 

69 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict" 

70 ) 

71 output = connectionTypes.Output( 

72 name="output", doc="some dict-y output data for testing", storageClass="StructuredDataDict" 

73 ) 

74 

75 

76class NoDimensionsMetadataTestConfig( 

77 PipelineTaskConfig, pipelineConnections=NoDimensionsMetadataTestConnections 

78): 

79 key = Field(dtype=str, doc="String key for the dict entry the task sets.", default="one") 

80 value = Field(dtype=int, doc="Integer value for the dict entry the task sets.", default=1) 

81 outputSC = Field(dtype=str, doc="Output storage class requested", default="dict") 

82 

83 

84class NoDimensionsMetadataTestTask(PipelineTask): 

85 """A simple pipeline task that can take a metadata as input.""" 

86 

87 ConfigClass = NoDimensionsMetadataTestConfig 

88 _DefaultName = "noDimensionsMetadataTest" 

89 

90 def run(self, input: Dict[str, int], meta: Dict[str, Any]) -> Struct: 

91 """Run the task, adding the configured key-value pair to the input 

92 argument and returning it as the output. 

93 

94 Parameters 

95 ---------- 

96 input : `dict` 

97 Dictionary to update and return. 

98 

99 Returns 

100 ------- 

101 result : `lsst.pipe.base.Struct` 

102 Struct with a single ``output`` attribute. 

103 """ 

104 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input)) 

105 output = input.copy() 

106 output[self.config.key] = self.config.value 

107 

108 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta) 

109 

110 # Can change the return type via configuration. 

111 if "TaskMetadata" in self.config.outputSC: 

112 output = TaskMetadata.from_dict(output) 

113 elif type(output) == TaskMetadata: 

114 # Want the output to be a dict 

115 output = output.to_dict() 

116 self.log.info("Run method returns data of type: %s", get_full_type_name(output)) 

117 return Struct(output=output) 

118 

119 

120class SimplePipelineExecutorTests(lsst.utils.tests.TestCase): 

121 """Test the SimplePipelineExecutor API with a trivial task.""" 

122 

123 def setUp(self): 

124 self.path = tempfile.mkdtemp() 

125 # standalone parameter forces the returned config to also include 

126 # the information from the search paths. 

127 config = lsst.daf.butler.Butler.makeRepo( 

128 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")] 

129 ) 

130 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake") 

131 self.butler.registry.registerDatasetType( 

132 lsst.daf.butler.DatasetType( 

133 "input", 

134 dimensions=self.butler.registry.dimensions.empty, 

135 storageClass="StructuredDataDict", 

136 ) 

137 ) 

138 self.butler.put({"zero": 0}, "input") 

139 

140 def tearDown(self): 

141 shutil.rmtree(self.path, ignore_errors=True) 

142 

143 def test_from_task_class(self): 

144 """Test executing a single quantum with an executor created by the 

145 `from_task_class` factory method, and the 

146 `SimplePipelineExecutor.as_generator` method. 

147 """ 

148 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler) 

149 (quantum,) = executor.as_generator(register_dataset_types=True) 

150 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1}) 

151 

152 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None): 

153 """Configure a pipeline with from_pipeline.""" 

154 

155 config_a = config_a_cls() 

156 config_a.connections.output = "intermediate" 

157 if storageClass_a: 

158 config_a.outputSC = storageClass_a 

159 config_b = config_b_cls() 

160 config_b.connections.input = "intermediate" 

161 if storageClass_b: 

162 config_b.outputSC = storageClass_b 

163 config_b.key = "two" 

164 config_b.value = 2 

165 task_defs = [ 

166 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

167 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b), 

168 ] 

169 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

170 return executor 

171 

172 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b): 

173 """Check the expected input types received by tasks A and B""" 

174 all_logs = "\n".join(log_output) 

175 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs) 

176 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs) 

177 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs) 

178 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs) 

179 

180 def test_from_pipeline(self): 

181 """Test executing a two quanta from different configurations of the 

182 same task, with an executor created by the `from_pipeline` factory 

183 method, and the `SimplePipelineExecutor.run` method. 

184 """ 

185 executor = self._configure_pipeline( 

186 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

187 ) 

188 

189 with self.assertLogs("lsst", level="INFO") as cm: 

190 quanta = executor.run(register_dataset_types=True, save_versions=False) 

191 self._test_logs(cm.output, "dict", "dict", "dict", "dict") 

192 

193 self.assertEqual(len(quanta), 2) 

194 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

195 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

196 

197 def test_from_pipeline_intermediates_differ(self): 

198 """Run pipeline but intermediates definition in registry differs.""" 

199 executor = self._configure_pipeline( 

200 NoDimensionsTestTask.ConfigClass, 

201 NoDimensionsTestTask.ConfigClass, 

202 storageClass_b="TaskMetadataLike", 

203 ) 

204 

205 # Pre-define the "intermediate" storage class to be something that is 

206 # like a dict but is not a dict. This will fail unless storage 

207 # class conversion is supported in put and get. 

208 self.butler.registry.registerDatasetType( 

209 lsst.daf.butler.DatasetType( 

210 "intermediate", 

211 dimensions=self.butler.registry.dimensions.empty, 

212 storageClass="TaskMetadataLike", 

213 ) 

214 ) 

215 

216 with self.assertLogs("lsst", level="INFO") as cm: 

217 quanta = executor.run(register_dataset_types=True, save_versions=False) 

218 # A dict is given to task a without change. 

219 # A returns a dict because it has not been told to do anything else. 

220 # That does not match the storage class so it will be converted 

221 # on put. 

222 # b is given a dict, because that's what its connection asks for. 

223 # b returns a TaskMetadata because that's how we configured it, but 

224 # the butler expects a dict so it is converted on put. 

225 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata") 

226 

227 self.assertEqual(len(quanta), 2) 

228 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1}) 

229 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

230 

231 def test_from_pipeline_output_differ(self): 

232 """Run pipeline but output definition in registry differs.""" 

233 executor = self._configure_pipeline( 

234 NoDimensionsTestTask.ConfigClass, 

235 NoDimensionsTestTask.ConfigClass, 

236 storageClass_a="TaskMetadataLike", 

237 ) 

238 

239 # Pre-define the "output" storage class to be something that is 

240 # like a dict but is not a dict. This will fail unless storage 

241 # class conversion is supported in put and get. 

242 self.butler.registry.registerDatasetType( 

243 lsst.daf.butler.DatasetType( 

244 "output", 

245 dimensions=self.butler.registry.dimensions.empty, 

246 storageClass="TaskMetadataLike", 

247 ) 

248 ) 

249 

250 with self.assertLogs("lsst", level="INFO") as cm: 

251 quanta = executor.run(register_dataset_types=True, save_versions=False) 

252 # a has been told to return a TaskMetadata but will convert to dict. 

253 # b returns a dict and that is converted to TaskMetadata on put. 

254 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict") 

255 

256 self.assertEqual(len(quanta), 2) 

257 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

258 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

259 

260 def test_from_pipeline_input_differ(self): 

261 """Run pipeline but input definition in registry differs.""" 

262 

263 # This config declares that the pipeline takes a TaskMetadata 

264 # as input but registry already thinks it has a StructureDataDict. 

265 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass) 

266 

267 with self.assertLogs("lsst", level="INFO") as cm: 

268 quanta = executor.run(register_dataset_types=True, save_versions=False) 

269 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict") 

270 

271 self.assertEqual(len(quanta), 2) 

272 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

273 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

274 

275 def test_from_pipeline_incompatible(self): 

276 """Run pipeline but definitions are not compatible.""" 

277 executor = self._configure_pipeline( 

278 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

279 ) 

280 

281 # Incompatible output dataset type. 

282 self.butler.registry.registerDatasetType( 

283 lsst.daf.butler.DatasetType( 

284 "output", 

285 dimensions=self.butler.registry.dimensions.empty, 

286 storageClass="StructuredDataList", 

287 ) 

288 ) 

289 

290 with self.assertRaisesRegex( 

291 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList" 

292 ): 

293 executor.run(register_dataset_types=True, save_versions=False) 

294 

295 def test_from_pipeline_metadata(self): 

296 """Test two tasks where the output uses metadata from input.""" 

297 # Must configure a special pipeline for this test. 

298 config_a = NoDimensionsTestTask.ConfigClass() 

299 config_a.connections.output = "intermediate" 

300 config_b = NoDimensionsMetadataTestTask.ConfigClass() 

301 config_b.connections.input = "intermediate" 

302 config_b.key = "two" 

303 config_b.value = 2 

304 task_defs = [ 

305 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

306 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b), 

307 ] 

308 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

309 

310 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm: 

311 quanta = executor.run(register_dataset_types=True, save_versions=False) 

312 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output)) 

313 

314 self.assertEqual(len(quanta), 2) 

315 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

316 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

317 

318 def test_from_pipeline_file(self): 

319 """Test executing a two quanta from different configurations of the 

320 same task, with an executor created by the `from_pipeline_filename` 

321 factory method, and the `SimplePipelineExecutor.run` method. 

322 """ 

323 filename = os.path.join(self.path, "pipeline.yaml") 

324 with open(filename, "w") as f: 

325 f.write( 

326 """ 

327 description: test 

328 tasks: 

329 a: 

330 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

331 config: 

332 connections.output: "intermediate" 

333 b: 

334 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

335 config: 

336 connections.input: "intermediate" 

337 key: "two" 

338 value: 2 

339 """ 

340 ) 

341 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler) 

342 quanta = executor.run(register_dataset_types=True, save_versions=False) 

343 self.assertEqual(len(quanta), 2) 

344 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

345 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

346 

347 

348class MemoryTester(lsst.utils.tests.MemoryTestCase): 

349 pass 

350 

351 

352def setup_module(module): 

353 lsst.utils.tests.init() 

354 

355 

356if __name__ == "__main__": 

357 lsst.utils.tests.init() 

358 unittest.main()