Coverage for tests/test_htcondor_service.py: 28%

148 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-24 03:04 -0700

1# This file is part of ctrl_bps_htcondor. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Unit tests for the HTCondor WMS service class and related functions.""" 

29 

30import logging 

31import pathlib 

32import tempfile 

33import unittest 

34 

35import htcondor 

36from lsst.ctrl.bps import WmsStates 

37from lsst.ctrl.bps.htcondor.htcondor_service import ( 

38 NodeStatus, 

39 _get_exit_code_summary, 

40 _htc_node_status_to_wms_state, 

41) 

42from lsst.ctrl.bps.htcondor.lssthtc import _tweak_log_info 

43 

44logger = logging.getLogger("lsst.ctrl.bps.htcondor") 

45 

46 

47class GetExitCodeSummaryTestCase(unittest.TestCase): 

48 """Test the function responsible for creating exit code summary.""" 

49 

50 def setUp(self): 

51 self.jobs = { 

52 "1.0": { 

53 "JobStatus": htcondor.JobStatus.IDLE, 

54 "bps_job_label": "foo", 

55 }, 

56 "2.0": { 

57 "JobStatus": htcondor.JobStatus.RUNNING, 

58 "bps_job_label": "foo", 

59 }, 

60 "3.0": { 

61 "JobStatus": htcondor.JobStatus.REMOVED, 

62 "bps_job_label": "foo", 

63 }, 

64 "4.0": { 

65 "ExitCode": 0, 

66 "ExitBySignal": False, 

67 "JobStatus": htcondor.JobStatus.COMPLETED, 

68 "bps_job_label": "bar", 

69 }, 

70 "5.0": { 

71 "ExitCode": 1, 

72 "ExitBySignal": False, 

73 "JobStatus": htcondor.JobStatus.COMPLETED, 

74 "bps_job_label": "bar", 

75 }, 

76 "6.0": { 

77 "ExitBySignal": True, 

78 "ExitSignal": 11, 

79 "JobStatus": htcondor.JobStatus.HELD, 

80 "bps_job_label": "baz", 

81 }, 

82 "7.0": { 

83 "ExitBySignal": False, 

84 "ExitCode": 42, 

85 "JobStatus": htcondor.JobStatus.HELD, 

86 "bps_job_label": "baz", 

87 }, 

88 "8.0": { 

89 "JobStatus": htcondor.JobStatus.TRANSFERRING_OUTPUT, 

90 "bps_job_label": "qux", 

91 }, 

92 "9.0": { 

93 "JobStatus": htcondor.JobStatus.SUSPENDED, 

94 "bps_job_label": "qux", 

95 }, 

96 } 

97 

98 def tearDown(self): 

99 pass 

100 

101 def testMainScenario(self): 

102 actual = _get_exit_code_summary(self.jobs) 

103 expected = {"foo": [], "bar": [1], "baz": [11, 42], "qux": []} 

104 self.assertEqual(actual, expected) 

105 

106 def testUnknownStatus(self): 

107 jobs = { 

108 "1.0": { 

109 "JobStatus": -1, 

110 "bps_job_label": "foo", 

111 } 

112 } 

113 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

114 _get_exit_code_summary(jobs) 

115 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name) 

116 self.assertIn("Unknown", cm.output[0]) 

117 self.assertIn("JobStatus", cm.output[0]) 

118 

119 def testUnknownKey(self): 

120 jobs = { 

121 "1.0": { 

122 "JobStatus": htcondor.JobStatus.COMPLETED, 

123 "UnknownKey": None, 

124 "bps_job_label": "foo", 

125 } 

126 } 

127 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

128 _get_exit_code_summary(jobs) 

129 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name) 

130 self.assertIn("Attribute", cm.output[0]) 

131 self.assertIn("not found", cm.output[0]) 

132 

133 

134class HtcNodeStatusToWmsStateTestCase(unittest.TestCase): 

135 """Test assigning WMS state base on HTCondor node status.""" 

136 

137 def setUp(self): 

138 pass 

139 

140 def tearDown(self): 

141 pass 

142 

143 def testNotReady(self): 

144 job = {"NodeStatus": NodeStatus.NOT_READY} 

145 result = _htc_node_status_to_wms_state(job) 

146 self.assertEqual(result, WmsStates.UNREADY) 

147 

148 def testReady(self): 

149 job = {"NodeStatus": NodeStatus.READY} 

150 result = _htc_node_status_to_wms_state(job) 

151 self.assertEqual(result, WmsStates.READY) 

152 

153 def testPrerun(self): 

154 job = {"NodeStatus": NodeStatus.PRERUN} 

155 result = _htc_node_status_to_wms_state(job) 

156 self.assertEqual(result, WmsStates.MISFIT) 

157 

158 def testSubmittedHeld(self): 

159 job = { 

160 "NodeStatus": NodeStatus.SUBMITTED, 

161 "JobProcsHeld": 1, 

162 "StatusDetails": "", 

163 "JobProcsQueued": 0, 

164 } 

165 result = _htc_node_status_to_wms_state(job) 

166 self.assertEqual(result, WmsStates.HELD) 

167 

168 def testSubmittedRunning(self): 

169 job = { 

170 "NodeStatus": NodeStatus.SUBMITTED, 

171 "JobProcsHeld": 0, 

172 "StatusDetails": "not_idle", 

173 "JobProcsQueued": 0, 

174 } 

175 result = _htc_node_status_to_wms_state(job) 

176 self.assertEqual(result, WmsStates.RUNNING) 

177 

178 def testSubmittedPending(self): 

179 job = { 

180 "NodeStatus": NodeStatus.SUBMITTED, 

181 "JobProcsHeld": 0, 

182 "StatusDetails": "", 

183 "JobProcsQueued": 1, 

184 } 

185 result = _htc_node_status_to_wms_state(job) 

186 self.assertEqual(result, WmsStates.PENDING) 

187 

188 def testPostrun(self): 

189 job = {"NodeStatus": NodeStatus.POSTRUN} 

190 result = _htc_node_status_to_wms_state(job) 

191 self.assertEqual(result, WmsStates.MISFIT) 

192 

193 def testDone(self): 

194 job = {"NodeStatus": NodeStatus.DONE} 

195 result = _htc_node_status_to_wms_state(job) 

196 self.assertEqual(result, WmsStates.SUCCEEDED) 

197 

198 def testErrorDagmanSuccess(self): 

199 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 0"} 

200 result = _htc_node_status_to_wms_state(job) 

201 self.assertEqual(result, WmsStates.SUCCEEDED) 

202 

203 def testErrorDagmanFailure(self): 

204 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 1"} 

205 result = _htc_node_status_to_wms_state(job) 

206 self.assertEqual(result, WmsStates.FAILED) 

207 

208 def testFutile(self): 

209 job = {"NodeStatus": NodeStatus.FUTILE} 

210 result = _htc_node_status_to_wms_state(job) 

211 self.assertEqual(result, WmsStates.PRUNED) 

212 

213 

214class TweakJobInfoTestCase(unittest.TestCase): 

215 """Test the function responsible for massaging job information.""" 

216 

217 def setUp(self): 

218 self.log_file = tempfile.NamedTemporaryFile(prefix="test_", suffix=".log") 

219 self.log_name = pathlib.Path(self.log_file.name) 

220 self.job = { 

221 "Cluster": 1, 

222 "Proc": 0, 

223 "Iwd": str(self.log_name.parent), 

224 "Owner": self.log_name.owner(), 

225 "MyType": None, 

226 "TerminatedNormally": True, 

227 } 

228 

229 def tearDown(self): 

230 self.log_file.close() 

231 

232 def testDirectAssignments(self): 

233 _tweak_log_info(self.log_name, self.job) 

234 self.assertEqual(self.job["ClusterId"], self.job["Cluster"]) 

235 self.assertEqual(self.job["ProcId"], self.job["Proc"]) 

236 self.assertEqual(self.job["Iwd"], str(self.log_name.parent)) 

237 self.assertEqual(self.job["Owner"], self.log_name.owner()) 

238 

239 def testJobStatusAssignmentJobAbortedEvent(self): 

240 job = self.job | {"MyType": "JobAbortedEvent"} 

241 _tweak_log_info(self.log_name, job) 

242 self.assertTrue("JobStatus" in job) 

243 self.assertEqual(job["JobStatus"], htcondor.JobStatus.REMOVED) 

244 

245 def testJobStatusAssignmentExecuteEvent(self): 

246 job = self.job | {"MyType": "ExecuteEvent"} 

247 _tweak_log_info(self.log_name, job) 

248 self.assertTrue("JobStatus" in job) 

249 self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING) 

250 

251 def testJobStatusAssignmentSubmitEvent(self): 

252 job = self.job | {"MyType": "SubmitEvent"} 

253 _tweak_log_info(self.log_name, job) 

254 self.assertTrue("JobStatus" in job) 

255 self.assertEqual(job["JobStatus"], htcondor.JobStatus.IDLE) 

256 

257 def testJobStatusAssignmentJobHeldEvent(self): 

258 job = self.job | {"MyType": "JobHeldEvent"} 

259 _tweak_log_info(self.log_name, job) 

260 self.assertTrue("JobStatus" in job) 

261 self.assertEqual(job["JobStatus"], htcondor.JobStatus.HELD) 

262 

263 def testJobStatusAssignmentJobTerminatedEvent(self): 

264 job = self.job | {"MyType": "JobTerminatedEvent"} 

265 _tweak_log_info(self.log_name, job) 

266 self.assertTrue("JobStatus" in job) 

267 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED) 

268 

269 def testJobStatusAssignmentPostScriptTerminatedEvent(self): 

270 job = self.job | {"MyType": "PostScriptTerminatedEvent"} 

271 _tweak_log_info(self.log_name, job) 

272 self.assertTrue("JobStatus" in job) 

273 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED) 

274 

275 def testAddingExitStatusSuccess(self): 

276 job = self.job | { 

277 "MyType": "JobTerminatedEvent", 

278 "ToE": {"ExitBySignal": False, "ExitCode": 1}, 

279 } 

280 _tweak_log_info(self.log_name, job) 

281 self.assertIn("ExitBySignal", job) 

282 self.assertIs(job["ExitBySignal"], False) 

283 self.assertIn("ExitCode", job) 

284 self.assertEqual(job["ExitCode"], 1) 

285 

286 def testAddingExitStatusFailure(self): 

287 job = self.job | { 

288 "MyType": "JobHeldEvent", 

289 } 

290 with self.assertLogs(logger=logger, level="ERROR") as cm: 

291 _tweak_log_info(self.log_name, job) 

292 self.assertIn("Could not determine exit status", cm.output[0]) 

293 

294 def testLoggingUnknownLogEvent(self): 

295 job = self.job | {"MyType": "Foo"} 

296 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

297 _tweak_log_info(self.log_name, job) 

298 self.assertIn("Unknown log event", cm.output[1]) 

299 

300 def testMissingKey(self): 

301 job = self.job 

302 del job["Cluster"] 

303 with self.assertRaises(KeyError) as cm: 

304 _tweak_log_info(self.log_name, job) 

305 self.assertEqual(str(cm.exception), "'Cluster'")