Coverage for tests/test_simple_pipeline_executor.py: 25%

132 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-03 10:43 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import os 

31import shutil 

32import tempfile 

33import unittest 

34from typing import Any 

35 

36import lsst.daf.butler 

37import lsst.utils.tests 

38from lsst.ctrl.mpexec import SimplePipelineExecutor 

39from lsst.pipe.base import Struct, TaskDef, TaskMetadata, connectionTypes 

40from lsst.pipe.base.tests.no_dimensions import ( 

41 NoDimensionsTestConfig, 

42 NoDimensionsTestConnections, 

43 NoDimensionsTestTask, 

44) 

45from lsst.utils.introspection import get_full_type_name 

46 

47TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

48 

49 

50class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()): 

51 """A connections class used for testing.""" 

52 

53 input = connectionTypes.Input( 

54 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike" 

55 ) 

56 

57 

58class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2): 

59 """A config used for testing.""" 

60 

61 

62class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()): 

63 """Test connection class for metadata. 

64 

65 Deliberately choose a storage class that does not match the metadata 

66 default TaskMetadata storage class. 

67 """ 

68 

69 meta = connectionTypes.Input( 

70 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict" 

71 ) 

72 

73 

74class NoDimensionsMetadataTestConfig( 

75 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections 

76): 

77 """A config used for testing the metadata.""" 

78 

79 

80class NoDimensionsMetadataTestTask(NoDimensionsTestTask): 

81 """A simple pipeline task that can take a metadata as input.""" 

82 

83 ConfigClass = NoDimensionsMetadataTestConfig 

84 _DefaultName = "noDimensionsMetadataTest" 

85 

86 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct: 

87 """Run the task, adding the configured key-value pair to the input 

88 argument and returning it as the output. 

89 

90 Parameters 

91 ---------- 

92 input : `dict` 

93 Dictionary to update and return. 

94 

95 Returns 

96 ------- 

97 result : `lsst.pipe.base.Struct` 

98 Struct with a single ``output`` attribute. 

99 """ 

100 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input)) 

101 output = input.copy() 

102 output[self.config.key] = self.config.value 

103 

104 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta) 

105 

106 # Can change the return type via configuration. 

107 if "TaskMetadata" in self.config.outputSC: 

108 output = TaskMetadata.from_dict(output) 

109 elif type(output) == TaskMetadata: 

110 # Want the output to be a dict 

111 output = output.to_dict() 

112 self.log.info("Run method returns data of type: %s", get_full_type_name(output)) 

113 return Struct(output=output) 

114 

115 

116class SimplePipelineExecutorTests(lsst.utils.tests.TestCase): 

117 """Test the SimplePipelineExecutor API with a trivial task.""" 

118 

119 def setUp(self): 

120 self.path = tempfile.mkdtemp() 

121 # standalone parameter forces the returned config to also include 

122 # the information from the search paths. 

123 config = lsst.daf.butler.Butler.makeRepo( 

124 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")] 

125 ) 

126 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake") 

127 self.butler.registry.registerDatasetType( 

128 lsst.daf.butler.DatasetType( 

129 "input", 

130 dimensions=self.butler.dimensions.empty, 

131 storageClass="StructuredDataDict", 

132 ) 

133 ) 

134 self.butler.put({"zero": 0}, "input") 

135 

136 def tearDown(self): 

137 shutil.rmtree(self.path, ignore_errors=True) 

138 

139 def test_from_task_class(self): 

140 """Test executing a single quantum with an executor created by the 

141 `from_task_class` factory method, and the 

142 `SimplePipelineExecutor.as_generator` method. 

143 """ 

144 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler) 

145 (quantum,) = executor.as_generator(register_dataset_types=True) 

146 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1}) 

147 

148 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None): 

149 """Configure a pipeline with from_pipeline.""" 

150 config_a = config_a_cls() 

151 config_a.connections.output = "intermediate" 

152 if storageClass_a: 

153 config_a.outputSC = storageClass_a 

154 config_b = config_b_cls() 

155 config_b.connections.input = "intermediate" 

156 if storageClass_b: 

157 config_b.outputSC = storageClass_b 

158 config_b.key = "two" 

159 config_b.value = 2 

160 task_defs = [ 

161 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

162 TaskDef(label="b", taskClass=NoDimensionsTestTask, config=config_b), 

163 ] 

164 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

165 return executor 

166 

167 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b): 

168 """Check the expected input types received by tasks A and B. 

169 

170 Note that these are the types as seen from the perspective of the task, 

171 so they must be consistent with the task's connections, but may not be 

172 consistent with the registry dataset types. 

173 """ 

174 all_logs = "\n".join(log_output) 

175 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs) 

176 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs) 

177 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs) 

178 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs) 

179 

180 def test_from_pipeline(self): 

181 """Test executing a two quanta from different configurations of the 

182 same task, with an executor created by the `from_pipeline` factory 

183 method, and the `SimplePipelineExecutor.run` method. 

184 """ 

185 executor = self._configure_pipeline( 

186 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

187 ) 

188 

189 with self.assertLogs("lsst", level="INFO") as cm: 

190 quanta = executor.run(register_dataset_types=True, save_versions=False) 

191 self._test_logs(cm.output, "dict", "dict", "dict", "dict") 

192 

193 self.assertEqual(len(quanta), 2) 

194 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

195 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

196 

197 def test_from_pipeline_intermediates_differ(self): 

198 """Run pipeline but intermediates definition in registry differs.""" 

199 # Pre-define the "intermediate" storage class to be something that is 

200 # like a dict but is not a dict. This will fail unless storage 

201 # class conversion is supported in put and get. 

202 self.butler.registry.registerDatasetType( 

203 lsst.daf.butler.DatasetType( 

204 "intermediate", 

205 dimensions=self.butler.dimensions.empty, 

206 storageClass="TaskMetadataLike", 

207 ) 

208 ) 

209 executor = self._configure_pipeline( 

210 NoDimensionsTestTask.ConfigClass, 

211 NoDimensionsTestTask.ConfigClass, 

212 storageClass_b="TaskMetadataLike", 

213 ) 

214 with self.assertLogs("lsst", level="INFO") as cm: 

215 quanta = executor.run(register_dataset_types=True, save_versions=False) 

216 # A dict is given to task a without change. 

217 # A returns a dict because it has not been told to do anything else. 

218 # That does not match the storage class so it will be converted 

219 # on put. 

220 # b is given a dict, because that's what its connection asks for. 

221 # b returns a TaskMetadata because that's how we configured it, and 

222 # since its output wasn't registered in advance, it will have been 

223 # registered as TaskMetadata and will now be received as TaskMetadata. 

224 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata") 

225 

226 self.assertEqual(len(quanta), 2) 

227 self.assertEqual(self.butler.get("intermediate"), TaskMetadata.from_dict({"zero": 0, "one": 1})) 

228 self.assertEqual(self.butler.get("output"), TaskMetadata.from_dict({"zero": 0, "one": 1, "two": 2})) 

229 

230 def test_from_pipeline_output_differ(self): 

231 """Run pipeline but output definition in registry differs.""" 

232 # Pre-define the "output" storage class to be something that is 

233 # like a dict but is not a dict. This will fail unless storage 

234 # class conversion is supported in put and get. 

235 self.butler.registry.registerDatasetType( 

236 lsst.daf.butler.DatasetType( 

237 "output", 

238 dimensions=self.butler.dimensions.empty, 

239 storageClass="TaskMetadataLike", 

240 ) 

241 ) 

242 executor = self._configure_pipeline( 

243 NoDimensionsTestTask.ConfigClass, 

244 NoDimensionsTestTask.ConfigClass, 

245 storageClass_a="TaskMetadataLike", 

246 ) 

247 with self.assertLogs("lsst", level="INFO") as cm: 

248 quanta = executor.run(register_dataset_types=True, save_versions=False) 

249 # a has been told to return a TaskMetadata but this will convert to 

250 # dict on read by b. 

251 # b returns a dict and that is converted to TaskMetadata on put. 

252 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict") 

253 

254 self.assertEqual(len(quanta), 2) 

255 self.assertEqual(self.butler.get("intermediate"), TaskMetadata.from_dict({"zero": 0, "one": 1})) 

256 self.assertEqual(self.butler.get("output"), TaskMetadata.from_dict({"zero": 0, "one": 1, "two": 2})) 

257 

258 def test_from_pipeline_input_differ(self): 

259 """Run pipeline but input definition in registry differs.""" 

260 # This config declares that the pipeline takes a TaskMetadata 

261 # as input but registry already thinks it has a StructureDataDict. 

262 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass) 

263 

264 with self.assertLogs("lsst", level="INFO") as cm: 

265 quanta = executor.run(register_dataset_types=True, save_versions=False) 

266 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict") 

267 

268 self.assertEqual(len(quanta), 2) 

269 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

270 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

271 

272 def test_from_pipeline_inconsistent_dataset_types(self): 

273 """Generate the QG (by initializing the executor), then register the 

274 dataset type with a different storage class than the QG should have 

275 predicted, to make sure execution fails as it should. 

276 """ 

277 executor = self._configure_pipeline( 

278 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

279 ) 

280 

281 # Incompatible output dataset type. 

282 self.butler.registry.registerDatasetType( 

283 lsst.daf.butler.DatasetType( 

284 "output", 

285 dimensions=self.butler.dimensions.empty, 

286 storageClass="StructuredDataList", 

287 ) 

288 ) 

289 

290 with self.assertRaisesRegex( 

291 ValueError, "StructuredDataDict.*inconsistent with registry definition.*StructuredDataList" 

292 ): 

293 executor.run(register_dataset_types=True, save_versions=False) 

294 

295 def test_from_pipeline_metadata(self): 

296 """Test two tasks where the output uses metadata from input.""" 

297 # Must configure a special pipeline for this test. 

298 config_a = NoDimensionsTestTask.ConfigClass() 

299 config_a.connections.output = "intermediate" 

300 config_b = NoDimensionsMetadataTestTask.ConfigClass() 

301 config_b.connections.input = "intermediate" 

302 config_b.key = "two" 

303 config_b.value = 2 

304 task_defs = [ 

305 TaskDef(label="a", taskClass=NoDimensionsTestTask, config=config_a), 

306 TaskDef(label="b", taskClass=NoDimensionsMetadataTestTask, config=config_b), 

307 ] 

308 executor = SimplePipelineExecutor.from_pipeline(task_defs, butler=self.butler) 

309 

310 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm: 

311 quanta = executor.run(register_dataset_types=True, save_versions=False) 

312 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output)) 

313 

314 self.assertEqual(len(quanta), 2) 

315 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

316 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

317 

318 def test_from_pipeline_file(self): 

319 """Test executing a two quanta from different configurations of the 

320 same task, with an executor created by the `from_pipeline_filename` 

321 factory method, and the `SimplePipelineExecutor.run` method. 

322 """ 

323 filename = os.path.join(self.path, "pipeline.yaml") 

324 with open(filename, "w") as f: 

325 f.write( 

326 """ 

327 description: test 

328 tasks: 

329 a: 

330 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

331 config: 

332 connections.output: "intermediate" 

333 b: 

334 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

335 config: 

336 connections.input: "intermediate" 

337 key: "two" 

338 value: 2 

339 """ 

340 ) 

341 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler) 

342 quanta = executor.run(register_dataset_types=True, save_versions=False) 

343 self.assertEqual(len(quanta), 2) 

344 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

345 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

346 

347 

348class MemoryTester(lsst.utils.tests.MemoryTestCase): 

349 """Generic tests for file leaks.""" 

350 

351 

352def setup_module(module): 

353 """Set up the module for pytest.""" 

354 lsst.utils.tests.init() 

355 

356 

357if __name__ == "__main__": 

358 lsst.utils.tests.init() 

359 unittest.main()