Coverage for tests/test_simple_pipeline_executor.py: 24%

141 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 03:29 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import os 

31import shutil 

32import tempfile 

33import unittest 

34from typing import Any 

35 

36import lsst.daf.butler 

37import lsst.utils.tests 

38from lsst.ctrl.mpexec import SimplePipelineExecutor 

39from lsst.pipe.base import PipelineGraph, Struct, TaskMetadata, connectionTypes 

40from lsst.pipe.base.pipeline_graph import IncompatibleDatasetTypeError 

41from lsst.pipe.base.tests.no_dimensions import ( 

42 NoDimensionsTestConfig, 

43 NoDimensionsTestConnections, 

44 NoDimensionsTestTask, 

45) 

46from lsst.utils.introspection import get_full_type_name 

47 

48TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

49 

50 

51class NoDimensionsTestConnections2(NoDimensionsTestConnections, dimensions=set()): 

52 """A connections class used for testing.""" 

53 

54 input = connectionTypes.Input( 

55 name="input", doc="some dict-y input data for testing", storageClass="TaskMetadataLike" 

56 ) 

57 

58 

59class NoDimensionsTestConfig2(NoDimensionsTestConfig, pipelineConnections=NoDimensionsTestConnections2): 

60 """A config used for testing.""" 

61 

62 

63class NoDimensionsMetadataTestConnections(NoDimensionsTestConnections, dimensions=set()): 

64 """Test connection class for metadata. 

65 

66 Deliberately choose a storage class that does not match the metadata 

67 default TaskMetadata storage class. 

68 """ 

69 

70 meta = connectionTypes.Input( 

71 name="a_metadata", doc="Metadata from previous task", storageClass="StructuredDataDict" 

72 ) 

73 

74 

75class NoDimensionsMetadataTestConfig( 

76 NoDimensionsTestConfig, pipelineConnections=NoDimensionsMetadataTestConnections 

77): 

78 """A config used for testing the metadata.""" 

79 

80 

81class NoDimensionsMetadataTestTask(NoDimensionsTestTask): 

82 """A simple pipeline task that can take a metadata as input.""" 

83 

84 ConfigClass = NoDimensionsMetadataTestConfig 

85 _DefaultName = "noDimensionsMetadataTest" 

86 

87 def run(self, input: dict[str, int], meta: dict[str, Any]) -> Struct: 

88 """Run the task, adding the configured key-value pair to the input 

89 argument and returning it as the output. 

90 

91 Parameters 

92 ---------- 

93 input : `dict` 

94 Dictionary to update and return. 

95 meta : `dict` 

96 Metadata to add. 

97 

98 Returns 

99 ------- 

100 result : `lsst.pipe.base.Struct` 

101 Struct with a single ``output`` attribute. 

102 """ 

103 self.log.info("Run metadata method given data of type: %s", get_full_type_name(input)) 

104 output = input.copy() 

105 output[self.config.key] = self.config.value 

106 

107 self.log.info("Received task metadata (%s): %s", get_full_type_name(meta), meta) 

108 

109 # Can change the return type via configuration. 

110 if "TaskMetadata" in self.config.outputSC: 

111 output = TaskMetadata.from_dict(output) 

112 elif type(output) == TaskMetadata: 

113 # Want the output to be a dict 

114 output = output.to_dict() 

115 self.log.info("Run method returns data of type: %s", get_full_type_name(output)) 

116 return Struct(output=output) 

117 

118 

119class SimplePipelineExecutorTests(lsst.utils.tests.TestCase): 

120 """Test the SimplePipelineExecutor API with a trivial task.""" 

121 

122 def setUp(self): 

123 self.path = tempfile.mkdtemp() 

124 # standalone parameter forces the returned config to also include 

125 # the information from the search paths. 

126 config = lsst.daf.butler.Butler.makeRepo( 

127 self.path, standalone=True, searchPaths=[os.path.join(TESTDIR, "config")] 

128 ) 

129 self.butler = SimplePipelineExecutor.prep_butler(config, [], "fake") 

130 self.butler.registry.registerDatasetType( 

131 lsst.daf.butler.DatasetType( 

132 "input", 

133 dimensions=self.butler.dimensions.empty, 

134 storageClass="StructuredDataDict", 

135 ) 

136 ) 

137 self.butler.put({"zero": 0}, "input") 

138 

139 def tearDown(self): 

140 shutil.rmtree(self.path, ignore_errors=True) 

141 

142 def test_from_task_class(self): 

143 """Test executing a single quantum with an executor created by the 

144 `from_task_class` factory method, and the 

145 `SimplePipelineExecutor.as_generator` method. 

146 """ 

147 executor = SimplePipelineExecutor.from_task_class(NoDimensionsTestTask, butler=self.butler) 

148 (quantum,) = executor.as_generator(register_dataset_types=True) 

149 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1}) 

150 

151 def _configure_pipeline(self, config_a_cls, config_b_cls, storageClass_a=None, storageClass_b=None): 

152 """Configure a pipeline with from_pipeline_graph.""" 

153 config_a = config_a_cls() 

154 config_a.connections.output = "intermediate" 

155 if storageClass_a: 

156 config_a.outputSC = storageClass_a 

157 config_b = config_b_cls() 

158 config_b.connections.input = "intermediate" 

159 if storageClass_b: 

160 config_b.outputSC = storageClass_b 

161 config_b.key = "two" 

162 config_b.value = 2 

163 pipeline_graph = PipelineGraph() 

164 pipeline_graph.add_task("a", NoDimensionsTestTask, config_a) 

165 pipeline_graph.add_task("b", NoDimensionsTestTask, config_b) 

166 executor = SimplePipelineExecutor.from_pipeline_graph(pipeline_graph, butler=self.butler) 

167 return executor 

168 

169 def _test_logs(self, log_output, input_type_a, output_type_a, input_type_b, output_type_b): 

170 """Check the expected input types received by tasks A and B. 

171 

172 Note that these are the types as seen from the perspective of the task, 

173 so they must be consistent with the task's connections, but may not be 

174 consistent with the registry dataset types. 

175 """ 

176 all_logs = "\n".join(log_output) 

177 self.assertIn(f"lsst.a:Run method given data of type: {input_type_a}", all_logs) 

178 self.assertIn(f"lsst.b:Run method given data of type: {input_type_b}", all_logs) 

179 self.assertIn(f"lsst.a:Run method returns data of type: {output_type_a}", all_logs) 

180 self.assertIn(f"lsst.b:Run method returns data of type: {output_type_b}", all_logs) 

181 

182 def test_from_pipeline(self): 

183 """Test executing a two quanta from different configurations of the 

184 same task, with an executor created by the `from_pipeline` factory 

185 method, and the `SimplePipelineExecutor.run` method. 

186 """ 

187 executor = self._configure_pipeline( 

188 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

189 ) 

190 

191 with self.assertLogs("lsst", level="INFO") as cm: 

192 quanta = executor.run(register_dataset_types=True, save_versions=False) 

193 self._test_logs(cm.output, "dict", "dict", "dict", "dict") 

194 

195 self.assertEqual(len(quanta), 2) 

196 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

197 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

198 

199 def test_from_pipeline_intermediates_differ(self): 

200 """Run pipeline but intermediates definition in registry differs.""" 

201 # Pre-define the "intermediate" storage class to be something that is 

202 # like a dict but is not a dict. This will fail unless storage 

203 # class conversion is supported in put and get. 

204 self.butler.registry.registerDatasetType( 

205 lsst.daf.butler.DatasetType( 

206 "intermediate", 

207 dimensions=self.butler.dimensions.empty, 

208 storageClass="TaskMetadataLike", 

209 ) 

210 ) 

211 executor = self._configure_pipeline( 

212 NoDimensionsTestTask.ConfigClass, 

213 NoDimensionsTestTask.ConfigClass, 

214 storageClass_b="TaskMetadataLike", 

215 ) 

216 with self.assertLogs("lsst", level="INFO") as cm: 

217 quanta = executor.run(register_dataset_types=True, save_versions=False) 

218 # A dict is given to task a without change. 

219 # A returns a dict because it has not been told to do anything else. 

220 # That does not match the storage class so it will be converted 

221 # on put. 

222 # b is given a dict, because that's what its connection asks for. 

223 # b returns a TaskMetadata because that's how we configured it, and 

224 # since its output wasn't registered in advance, it will have been 

225 # registered as TaskMetadata and will now be received as TaskMetadata. 

226 self._test_logs(cm.output, "dict", "dict", "dict", "lsst.pipe.base.TaskMetadata") 

227 

228 self.assertEqual(len(quanta), 2) 

229 self.assertEqual(self.butler.get("intermediate"), TaskMetadata.from_dict({"zero": 0, "one": 1})) 

230 self.assertEqual(self.butler.get("output"), TaskMetadata.from_dict({"zero": 0, "one": 1, "two": 2})) 

231 

232 def test_from_pipeline_output_differ(self): 

233 """Run pipeline but output definition in registry differs.""" 

234 # Pre-define the "output" storage class to be something that is 

235 # like a dict but is not a dict. This will fail unless storage 

236 # class conversion is supported in put and get. 

237 self.butler.registry.registerDatasetType( 

238 lsst.daf.butler.DatasetType( 

239 "output", 

240 dimensions=self.butler.dimensions.empty, 

241 storageClass="TaskMetadataLike", 

242 ) 

243 ) 

244 executor = self._configure_pipeline( 

245 NoDimensionsTestTask.ConfigClass, 

246 NoDimensionsTestTask.ConfigClass, 

247 storageClass_a="TaskMetadataLike", 

248 ) 

249 with self.assertLogs("lsst", level="INFO") as cm: 

250 quanta = executor.run(register_dataset_types=True, save_versions=False) 

251 # a has been told to return a TaskMetadata but this will convert to 

252 # dict on read by b. 

253 # b returns a dict and that is converted to TaskMetadata on put. 

254 self._test_logs(cm.output, "dict", "lsst.pipe.base.TaskMetadata", "dict", "dict") 

255 

256 self.assertEqual(len(quanta), 2) 

257 self.assertEqual(self.butler.get("intermediate").to_dict(), {"zero": 0, "one": 1}) 

258 self.assertEqual(self.butler.get("output").to_dict(), {"zero": 0, "one": 1, "two": 2}) 

259 

260 def test_from_pipeline_input_differ(self): 

261 """Run pipeline but input definition in registry differs.""" 

262 # This config declares that the pipeline takes a TaskMetadata 

263 # as input but registry already thinks it has a StructureDataDict. 

264 executor = self._configure_pipeline(NoDimensionsTestConfig2, NoDimensionsTestTask.ConfigClass) 

265 

266 with self.assertLogs("lsst", level="INFO") as cm: 

267 quanta = executor.run(register_dataset_types=True, save_versions=False) 

268 self._test_logs(cm.output, "lsst.pipe.base.TaskMetadata", "dict", "dict", "dict") 

269 

270 self.assertEqual(len(quanta), 2) 

271 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

272 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

273 

274 def test_from_pipeline_incompatible(self): 

275 """Test that we cannot make a QG if the registry and pipeline have 

276 incompatible storage classes for a dataset type. 

277 """ 

278 # Incompatible output dataset type. 

279 self.butler.registry.registerDatasetType( 

280 lsst.daf.butler.DatasetType( 

281 "output", 

282 dimensions=self.butler.dimensions.empty, 

283 storageClass="StructuredDataList", 

284 ) 

285 ) 

286 with self.assertRaisesRegex( 

287 IncompatibleDatasetTypeError, "Incompatible definition.*StructuredDataDict.*StructuredDataList.*" 

288 ): 

289 self._configure_pipeline(NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass) 

290 

291 def test_from_pipeline_registry_changed(self): 

292 """Run pipeline, but change registry dataset types between making the 

293 QG and executing it. 

294 

295 This only fails with full-butler execution; we don't have a way to 

296 prevent it with QBB. 

297 """ 

298 executor = self._configure_pipeline( 

299 NoDimensionsTestTask.ConfigClass, NoDimensionsTestTask.ConfigClass 

300 ) 

301 self.butler.registry.registerDatasetType( 

302 lsst.daf.butler.DatasetType( 

303 "output", 

304 dimensions=self.butler.dimensions.empty, 

305 storageClass="TaskMetadataLike", # even compatible is not okay 

306 ) 

307 ) 

308 with self.assertRaisesRegex( 

309 lsst.daf.butler.registry.ConflictingDefinitionError, 

310 ".*definition in registry has changed.*StructuredDataDict.*TaskMetadataLike.*", 

311 ): 

312 executor.run(register_dataset_types=True, save_versions=False) 

313 

314 def test_from_pipeline_metadata(self): 

315 """Test two tasks where the output uses metadata from input.""" 

316 # Must configure a special pipeline for this test. 

317 config_a = NoDimensionsTestTask.ConfigClass() 

318 config_a.connections.output = "intermediate" 

319 config_b = NoDimensionsMetadataTestTask.ConfigClass() 

320 config_b.connections.input = "intermediate" 

321 config_b.key = "two" 

322 config_b.value = 2 

323 pipeline_graph = PipelineGraph() 

324 pipeline_graph.add_task("a", NoDimensionsTestTask, config=config_a) 

325 pipeline_graph.add_task("b", NoDimensionsMetadataTestTask, config=config_b) 

326 executor = SimplePipelineExecutor.from_pipeline_graph(pipeline_graph, butler=self.butler) 

327 

328 with self.assertLogs("test_simple_pipeline_executor", level="INFO") as cm: 

329 quanta = executor.run(register_dataset_types=True, save_versions=False) 

330 self.assertIn(f"Received task metadata ({get_full_type_name(dict)})", "".join(cm.output)) 

331 

332 self.assertEqual(len(quanta), 2) 

333 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

334 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

335 

336 def test_from_pipeline_file(self): 

337 """Test executing a two quanta from different configurations of the 

338 same task, with an executor created by the `from_pipeline_filename` 

339 factory method, and the `SimplePipelineExecutor.run` method. 

340 """ 

341 filename = os.path.join(self.path, "pipeline.yaml") 

342 with open(filename, "w") as f: 

343 f.write( 

344 """ 

345 description: test 

346 tasks: 

347 a: 

348 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

349 config: 

350 connections.output: "intermediate" 

351 b: 

352 class: "lsst.pipe.base.tests.no_dimensions.NoDimensionsTestTask" 

353 config: 

354 connections.input: "intermediate" 

355 key: "two" 

356 value: 2 

357 """ 

358 ) 

359 executor = SimplePipelineExecutor.from_pipeline_filename(filename, butler=self.butler) 

360 quanta = executor.run(register_dataset_types=True, save_versions=False) 

361 self.assertEqual(len(quanta), 2) 

362 self.assertEqual(self.butler.get("intermediate"), {"zero": 0, "one": 1}) 

363 self.assertEqual(self.butler.get("output"), {"zero": 0, "one": 1, "two": 2}) 

364 

365 

366class MemoryTester(lsst.utils.tests.MemoryTestCase): 

367 """Generic tests for file leaks.""" 

368 

369 

370def setup_module(module): 

371 """Set up the module for pytest. 

372 

373 Parameters 

374 ---------- 

375 module : `~types.ModuleType` 

376 Module to set up. 

377 """ 

378 lsst.utils.tests.init() 

379 

380 

381if __name__ == "__main__": 

382 lsst.utils.tests.init() 

383 unittest.main()