Coverage for tests/test_simple_pipeline_executor.py: 25%

132 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-06 02:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import tempfile 

27import unittest 

28from typing import Any 

29 

30import lsst.daf.butler 

31import lsst.utils.tests 

32from lsst.ctrl.mpexec import SimplePipelineExecutor 

33from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes 

34from lsst.pipe.base.tests.no_dimensions import ( 

35 NoDimensionsTestConfig, 

36 NoDimensionsTestConnections, 

37 NoDimensionsTestTask, 

38) 

39from lsst.utils.introspection import get_full_type_name 

40 

41TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

42 

43 

44class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()): 

45 """A connections class used for testing.""" 

46 

47 input = connectionTypes.Input( 

48 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike" 

49 ) 

50 

51 

52class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2): 

53 """A config used for testing.""" 

54 

55 

56class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()): 

57 """Test connection class for metadata. 

58 

59 Deliberately choose a storage class that does not match the metadata 

60 default TaskMetadata storage class. 

61 """ 

62 

63 meta = connectionTypes.Input( 

64 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict" 

65 ) 

66 

67 

68class NoDimensionsMetadataTestConfig( 

69 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections 

70): 

71 """A config used for testing the metadata.""" 

72 

73 

74class NoDimensionsMetadataTestTask(NoDimensionsTestTask): 

75 """A simple pipeline task that can take a metadata as input.""" 

76 

77 ConfigClass = NoDimensionsMetadataTestConfig 

78 _DefaultName = "noDimensionsMetadataTest" 

79 

80 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct: 

81 """Run the task, adding the configured key-value pair to the input 

82 argument and returning it as the output. 

83 

84 Parameters 

85 ---------- 

86 input : `dict` 

87 Dictionary to update and return. 

88 

89 Returns 

90 ------- 

91 result : `lsst.pipe.base.Struct` 

92 Struct with a single ``output`` attribute. 

93 """ 

94 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input)) 

95 output = input.copy() 

96 output[self.config.key] = self.config.value 

97 

98 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta) 

99 

100 # Can change the return type via configuration. 

101 if "TaskMetadata" in self.config.outputSC: 

102 output = TaskMetadata.from_dict(output) 

103 elif type(output) == TaskMetadata: 

104 # Want the output to be a dict 

105 output = output.to_dict() 

106 self.log.info("Run method returns data of type: %s", get_full_type_name(output)) 

107 return Struct(output=output) 

108 

109 

110class SimplePipelineExecutorTests(lsst.utils.tests.TestCase): 

111 """Test the SimplePipelineExecutor API with a trivial task.""" 

112 

113 def setUp(self): 

114 self.path = tempfile.mkdtemp() 

115 # standalone parameter forces the returned config to also include 

116 # the information from the search paths. 

117 config = lsst.daf.butler.Butler.makeRepo( 

118 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")] 

119 ) 

120 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake") 

121 self.butler.registry.registerDatasetType( 

122 lsst.daf.butler.DatasetType( 

123 "input", 

124 dimensions=self.butler.dimensions.empty, 

125 storageClass="StructuredDataDict", 

126 ) 

127 ) 

128 self.butler.put({"zero": 0}, "input") 

129 

130 def tearDown(self): 

131 shutil.rmtree(self.path, ignore_errors=True) 

132 

133 def test_from_task_class(self): 

134 """Test executing a single quantum with an executor created by the 

135 `from_task_class` factory method, and the 

136 `SimplePipelineExecutor.as_generator` method. 

137 """ 

138 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler) 

139 (quantum,) = executor.as_generator(register_dataset_types=True) 

140 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1}) 

141 

142 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None): 

143 """Configure a pipeline with from_pipeline.""" 

144 config_a = config_a_cls() 

145 config_a.connections.output = "intermediate" 

146 if storageClass_a: 

147 config_a.outputSC = storageClass_a 

148 config_b = config_b_cls() 

149 config_b.connections.input = "intermediate" 

150 if storageClass_b: 

151 config_b.outputSC = storageClass_b 

152 config_b.key = "two" 

153 config_b.value = 2 

154 task_defs = [ 

155 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

156 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b), 

157 ] 

158 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

159 return executor 

160 

161 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b): 

162 """Check the expected input types received by tasks A and B""" 

163 all_logs = "\n".join(log_output) 

164 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs) 

165 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs) 

166 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs) 

167 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs) 

168 

169 def test_from_pipeline(self): 

170 """Test executing a two quanta from different configurations of the 

171 same task, with an executor created by the `from_pipeline` factory 

172 method, and the `SimplePipelineExecutor.run` method. 

173 """ 

174 executor = self._configure_pipeline( 

175 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

176 ) 

177 

178 with self.assertLogs("lsst", level="INFO") as cm: 

179 quanta = executor.run(register_dataset_types=True, save_versions=False) 

180 self._test_logs(cm.output, "dict", "dict", "dict", "dict") 

181 

182 self.assertEqual(len(quanta), 2) 

183 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

184 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

185 

186 def test_from_pipeline_intermediates_differ(self): 

187 """Run pipeline but intermediates definition in registry differs.""" 

188 executor = self._configure_pipeline( 

189 NoDimensionsTestTask.ConfigClass, 

190 NoDimensionsTestTask.ConfigClass, 

191 storageClass_b="TaskMetadataLike", 

192 ) 

193 

194 # Pre-define the "intermediate" storage class to be something that is 

195 # like a dict but is not a dict. This will fail unless storage 

196 # class conversion is supported in put and get. 

197 self.butler.registry.registerDatasetType( 

198 lsst.daf.butler.DatasetType( 

199 "intermediate", 

200 dimensions=self.butler.dimensions.empty, 

201 storageClass="TaskMetadataLike", 

202 ) 

203 ) 

204 

205 with self.assertLogs("lsst", level="INFO") as cm: 

206 quanta = executor.run(register_dataset_types=True, save_versions=False) 

207 # A dict is given to task a without change. 

208 # A returns a dict because it has not been told to do anything else. 

209 # That does not match the storage class so it will be converted 

210 # on put. 

211 # b is given a dict, because that's what its connection asks for. 

212 # b returns a TaskMetadata because that's how we configured it, and 

213 # since its output wasn't registered in advance, it will have been 

214 # registered as TaskMetadata and will now be received as TaskMetadata. 

215 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata") 

216 

217 self.assertEqual(len(quanta), 2) 

218 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1}) 

219 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

220 

221 def test_from_pipeline_output_differ(self): 

222 """Run pipeline but output definition in registry differs.""" 

223 executor = self._configure_pipeline( 

224 NoDimensionsTestTask.ConfigClass, 

225 NoDimensionsTestTask.ConfigClass, 

226 storageClass_a="TaskMetadataLike", 

227 ) 

228 

229 # Pre-define the "output" storage class to be something that is 

230 # like a dict but is not a dict. This will fail unless storage 

231 # class conversion is supported in put and get. 

232 self.butler.registry.registerDatasetType( 

233 lsst.daf.butler.DatasetType( 

234 "output", 

235 dimensions=self.butler.dimensions.empty, 

236 storageClass="TaskMetadataLike", 

237 ) 

238 ) 

239 

240 with self.assertLogs("lsst", level="INFO") as cm: 

241 quanta = executor.run(register_dataset_types=True, save_versions=False) 

242 # a has been told to return a TaskMetadata but will convert to dict. 

243 # b returns a dict and that is converted to TaskMetadata on put. 

244 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict") 

245 

246 self.assertEqual(len(quanta), 2) 

247 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

248 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

249 

250 def test_from_pipeline_input_differ(self): 

251 """Run pipeline but input definition in registry differs.""" 

252 # This config declares that the pipeline takes a TaskMetadata 

253 # as input but registry already thinks it has a StructureDataDict. 

254 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass) 

255 

256 with self.assertLogs("lsst", level="INFO") as cm: 

257 quanta = executor.run(register_dataset_types=True, save_versions=False) 

258 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict") 

259 

260 self.assertEqual(len(quanta), 2) 

261 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

262 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

263 

264 def test_from_pipeline_incompatible(self): 

265 """Run pipeline but definitions are not compatible.""" 

266 executor = self._configure_pipeline( 

267 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

268 ) 

269 

270 # Incompatible output dataset type. 

271 self.butler.registry.registerDatasetType( 

272 lsst.daf.butler.DatasetType( 

273 "output", 

274 dimensions=self.butler.dimensions.empty, 

275 storageClass="StructuredDataList", 

276 ) 

277 ) 

278 

279 with self.assertRaisesRegex( 

280 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList" 

281 ): 

282 executor.run(register_dataset_types=True, save_versions=False) 

283 

284 def test_from_pipeline_metadata(self): 

285 """Test two tasks where the output uses metadata from input.""" 

286 # Must configure a special pipeline for this test. 

287 config_a = NoDimensionsTestTask.ConfigClass() 

288 config_a.connections.output = "intermediate" 

289 config_b = NoDimensionsMetadataTestTask.ConfigClass() 

290 config_b.connections.input = "intermediate" 

291 config_b.key = "two" 

292 config_b.value = 2 

293 task_defs = [ 

294 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

295 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b), 

296 ] 

297 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

298 

299 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm: 

300 quanta = executor.run(register_dataset_types=True, save_versions=False) 

301 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output)) 

302 

303 self.assertEqual(len(quanta), 2) 

304 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

305 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

306 

307 def test_from_pipeline_file(self): 

308 """Test executing a two quanta from different configurations of the 

309 same task, with an executor created by the `from_pipeline_filename` 

310 factory method, and the `SimplePipelineExecutor.run` method. 

311 """ 

312 filename = os.path.join(self.path, "pipeline.yaml") 

313 with open(filename, "w") as f: 

314 f.write( 

315 """ 

316 description: test 

317 tasks: 

318 a: 

319 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

320 config: 

321 connections.output: "intermediate" 

322 b: 

323 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

324 config: 

325 connections.input: "intermediate" 

326 key: "two" 

327 value: 2 

328 """ 

329 ) 

330 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler) 

331 quanta = executor.run(register_dataset_types=True, save_versions=False) 

332 self.assertEqual(len(quanta), 2) 

333 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

334 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

335 

336 

337class MemoryTester(lsst.utils.tests.MemoryTestCase): 

338 """Generic tests for file leaks.""" 

339 

340 

341def setup_module(module): 

342 """Set up the module for pytest.""" 

343 lsst.utils.tests.init() 

344 

345 

346if __name__ == "__main__": 

347 lsst.utils.tests.init() 

348 unittest.main()