Coverage for tests/test_htcondor_service.py: 28%

161 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 03:01 -0700

1# This file is part of ctrl_bps_htcondor. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Unit tests for the HTCondor WMS service class and related functions.""" 

29 

30import logging 

31import pathlib 

32import tempfile 

33import unittest 

34 

35import htcondor 

36from lsst.ctrl.bps import WmsStates 

37from lsst.ctrl.bps.htcondor.htcondor_service import ( 

38 NodeStatus, 

39 _get_exit_code_summary, 

40 _htc_node_status_to_wms_state, 

41 _htc_status_to_wms_state, 

42) 

43from lsst.ctrl.bps.htcondor.lssthtc import _tweak_log_info 

44 

45logger = logging.getLogger("lsst.ctrl.bps.htcondor") 

46 

47 

48class GetExitCodeSummaryTestCase(unittest.TestCase): 

49 """Test the function responsible for creating exit code summary.""" 

50 

51 def setUp(self): 

52 self.jobs = { 

53 "1.0": { 

54 "JobStatus": htcondor.JobStatus.IDLE, 

55 "bps_job_label": "foo", 

56 }, 

57 "2.0": { 

58 "JobStatus": htcondor.JobStatus.RUNNING, 

59 "bps_job_label": "foo", 

60 }, 

61 "3.0": { 

62 "JobStatus": htcondor.JobStatus.REMOVED, 

63 "bps_job_label": "foo", 

64 }, 

65 "4.0": { 

66 "ExitCode": 0, 

67 "ExitBySignal": False, 

68 "JobStatus": htcondor.JobStatus.COMPLETED, 

69 "bps_job_label": "bar", 

70 }, 

71 "5.0": { 

72 "ExitCode": 1, 

73 "ExitBySignal": False, 

74 "JobStatus": htcondor.JobStatus.COMPLETED, 

75 "bps_job_label": "bar", 

76 }, 

77 "6.0": { 

78 "ExitBySignal": True, 

79 "ExitSignal": 11, 

80 "JobStatus": htcondor.JobStatus.HELD, 

81 "bps_job_label": "baz", 

82 }, 

83 "7.0": { 

84 "ExitBySignal": False, 

85 "ExitCode": 42, 

86 "JobStatus": htcondor.JobStatus.HELD, 

87 "bps_job_label": "baz", 

88 }, 

89 "8.0": { 

90 "JobStatus": htcondor.JobStatus.TRANSFERRING_OUTPUT, 

91 "bps_job_label": "qux", 

92 }, 

93 "9.0": { 

94 "JobStatus": htcondor.JobStatus.SUSPENDED, 

95 "bps_job_label": "qux", 

96 }, 

97 } 

98 

99 def tearDown(self): 

100 pass 

101 

102 def testMainScenario(self): 

103 actual = _get_exit_code_summary(self.jobs) 

104 expected = {"foo": [], "bar": [1], "baz": [11, 42], "qux": []} 

105 self.assertEqual(actual, expected) 

106 

107 def testUnknownStatus(self): 

108 jobs = { 

109 "1.0": { 

110 "JobStatus": -1, 

111 "bps_job_label": "foo", 

112 } 

113 } 

114 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

115 _get_exit_code_summary(jobs) 

116 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name) 

117 self.assertIn("Unknown", cm.output[0]) 

118 self.assertIn("JobStatus", cm.output[0]) 

119 

120 def testUnknownKey(self): 

121 jobs = { 

122 "1.0": { 

123 "JobStatus": htcondor.JobStatus.COMPLETED, 

124 "UnknownKey": None, 

125 "bps_job_label": "foo", 

126 } 

127 } 

128 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

129 _get_exit_code_summary(jobs) 

130 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name) 

131 self.assertIn("Attribute", cm.output[0]) 

132 self.assertIn("not found", cm.output[0]) 

133 

134 

135class HtcNodeStatusToWmsStateTestCase(unittest.TestCase): 

136 """Test assigning WMS state base on HTCondor node status.""" 

137 

138 def setUp(self): 

139 pass 

140 

141 def tearDown(self): 

142 pass 

143 

144 def testNotReady(self): 

145 job = {"NodeStatus": NodeStatus.NOT_READY} 

146 result = _htc_node_status_to_wms_state(job) 

147 self.assertEqual(result, WmsStates.UNREADY) 

148 

149 def testReady(self): 

150 job = {"NodeStatus": NodeStatus.READY} 

151 result = _htc_node_status_to_wms_state(job) 

152 self.assertEqual(result, WmsStates.READY) 

153 

154 def testPrerun(self): 

155 job = {"NodeStatus": NodeStatus.PRERUN} 

156 result = _htc_node_status_to_wms_state(job) 

157 self.assertEqual(result, WmsStates.MISFIT) 

158 

159 def testSubmittedHeld(self): 

160 job = { 

161 "NodeStatus": NodeStatus.SUBMITTED, 

162 "JobProcsHeld": 1, 

163 "StatusDetails": "", 

164 "JobProcsQueued": 0, 

165 } 

166 result = _htc_node_status_to_wms_state(job) 

167 self.assertEqual(result, WmsStates.HELD) 

168 

169 def testSubmittedRunning(self): 

170 job = { 

171 "NodeStatus": NodeStatus.SUBMITTED, 

172 "JobProcsHeld": 0, 

173 "StatusDetails": "not_idle", 

174 "JobProcsQueued": 0, 

175 } 

176 result = _htc_node_status_to_wms_state(job) 

177 self.assertEqual(result, WmsStates.RUNNING) 

178 

179 def testSubmittedPending(self): 

180 job = { 

181 "NodeStatus": NodeStatus.SUBMITTED, 

182 "JobProcsHeld": 0, 

183 "StatusDetails": "", 

184 "JobProcsQueued": 1, 

185 } 

186 result = _htc_node_status_to_wms_state(job) 

187 self.assertEqual(result, WmsStates.PENDING) 

188 

189 def testPostrun(self): 

190 job = {"NodeStatus": NodeStatus.POSTRUN} 

191 result = _htc_node_status_to_wms_state(job) 

192 self.assertEqual(result, WmsStates.MISFIT) 

193 

194 def testDone(self): 

195 job = {"NodeStatus": NodeStatus.DONE} 

196 result = _htc_node_status_to_wms_state(job) 

197 self.assertEqual(result, WmsStates.SUCCEEDED) 

198 

199 def testErrorDagmanSuccess(self): 

200 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 0"} 

201 result = _htc_node_status_to_wms_state(job) 

202 self.assertEqual(result, WmsStates.SUCCEEDED) 

203 

204 def testErrorDagmanFailure(self): 

205 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 1"} 

206 result = _htc_node_status_to_wms_state(job) 

207 self.assertEqual(result, WmsStates.FAILED) 

208 

209 def testFutile(self): 

210 job = {"NodeStatus": NodeStatus.FUTILE} 

211 result = _htc_node_status_to_wms_state(job) 

212 self.assertEqual(result, WmsStates.PRUNED) 

213 

214 

215class TweakJobInfoTestCase(unittest.TestCase): 

216 """Test the function responsible for massaging job information.""" 

217 

218 def setUp(self): 

219 self.log_file = tempfile.NamedTemporaryFile(prefix="test_", suffix=".log") 

220 self.log_name = pathlib.Path(self.log_file.name) 

221 self.job = { 

222 "Cluster": 1, 

223 "Proc": 0, 

224 "Iwd": str(self.log_name.parent), 

225 "Owner": self.log_name.owner(), 

226 "MyType": None, 

227 "TerminatedNormally": True, 

228 } 

229 

230 def tearDown(self): 

231 self.log_file.close() 

232 

233 def testDirectAssignments(self): 

234 _tweak_log_info(self.log_name, self.job) 

235 self.assertEqual(self.job["ClusterId"], self.job["Cluster"]) 

236 self.assertEqual(self.job["ProcId"], self.job["Proc"]) 

237 self.assertEqual(self.job["Iwd"], str(self.log_name.parent)) 

238 self.assertEqual(self.job["Owner"], self.log_name.owner()) 

239 

240 def testJobStatusAssignmentJobAbortedEvent(self): 

241 job = self.job | {"MyType": "JobAbortedEvent"} 

242 _tweak_log_info(self.log_name, job) 

243 self.assertTrue("JobStatus" in job) 

244 self.assertEqual(job["JobStatus"], htcondor.JobStatus.REMOVED) 

245 

246 def testJobStatusAssignmentExecuteEvent(self): 

247 job = self.job | {"MyType": "ExecuteEvent"} 

248 _tweak_log_info(self.log_name, job) 

249 self.assertTrue("JobStatus" in job) 

250 self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING) 

251 

252 def testJobStatusAssignmentSubmitEvent(self): 

253 job = self.job | {"MyType": "SubmitEvent"} 

254 _tweak_log_info(self.log_name, job) 

255 self.assertTrue("JobStatus" in job) 

256 self.assertEqual(job["JobStatus"], htcondor.JobStatus.IDLE) 

257 

258 def testJobStatusAssignmentJobHeldEvent(self): 

259 job = self.job | {"MyType": "JobHeldEvent"} 

260 _tweak_log_info(self.log_name, job) 

261 self.assertTrue("JobStatus" in job) 

262 self.assertEqual(job["JobStatus"], htcondor.JobStatus.HELD) 

263 

264 def testJobStatusAssignmentJobTerminatedEvent(self): 

265 job = self.job | {"MyType": "JobTerminatedEvent"} 

266 _tweak_log_info(self.log_name, job) 

267 self.assertTrue("JobStatus" in job) 

268 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED) 

269 

270 def testJobStatusAssignmentPostScriptTerminatedEvent(self): 

271 job = self.job | {"MyType": "PostScriptTerminatedEvent"} 

272 _tweak_log_info(self.log_name, job) 

273 self.assertTrue("JobStatus" in job) 

274 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED) 

275 

276 def testAddingExitStatusSuccess(self): 

277 job = self.job | { 

278 "MyType": "JobTerminatedEvent", 

279 "ToE": {"ExitBySignal": False, "ExitCode": 1}, 

280 } 

281 _tweak_log_info(self.log_name, job) 

282 self.assertIn("ExitBySignal", job) 

283 self.assertIs(job["ExitBySignal"], False) 

284 self.assertIn("ExitCode", job) 

285 self.assertEqual(job["ExitCode"], 1) 

286 

287 def testAddingExitStatusFailure(self): 

288 job = self.job | { 

289 "MyType": "JobHeldEvent", 

290 } 

291 with self.assertLogs(logger=logger, level="ERROR") as cm: 

292 _tweak_log_info(self.log_name, job) 

293 self.assertIn("Could not determine exit status", cm.output[0]) 

294 

295 def testLoggingUnknownLogEvent(self): 

296 job = self.job | {"MyType": "Foo"} 

297 with self.assertLogs(logger=logger, level="DEBUG") as cm: 

298 _tweak_log_info(self.log_name, job) 

299 self.assertIn("Unknown log event", cm.output[1]) 

300 

301 def testMissingKey(self): 

302 job = self.job 

303 del job["Cluster"] 

304 with self.assertRaises(KeyError) as cm: 

305 _tweak_log_info(self.log_name, job) 

306 self.assertEqual(str(cm.exception), "'Cluster'") 

307 

308 

309class HtcStatusToWmsStateTestCase(unittest.TestCase): 

310 """Test assigning WMS state base on HTCondor status.""" 

311 

312 def testJobStatus(self): 

313 job = { 

314 "ClusterId": 1, 

315 "JobStatus": htcondor.JobStatus.IDLE, 

316 "bps_job_label": "foo", 

317 } 

318 result = _htc_status_to_wms_state(job) 

319 self.assertEqual(result, WmsStates.PENDING) 

320 

321 def testNodeStatus(self): 

322 # Hold/Release test case 

323 job = { 

324 "ClusterId": 1, 

325 "JobStatus": 0, 

326 "NodeStatus": NodeStatus.SUBMITTED, 

327 "JobProcsHeld": 0, 

328 "StatusDetails": "", 

329 "JobProcsQueued": 1, 

330 } 

331 result = _htc_status_to_wms_state(job) 

332 self.assertEqual(result, WmsStates.PENDING) 

333 

334 def testNeitherStatus(self): 

335 job = {"ClusterId": 1} 

336 result = _htc_status_to_wms_state(job) 

337 self.assertEqual(result, WmsStates.MISFIT)