Coverage for tests/test_htcondor_service.py: 28%
161 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 10:12 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 10:12 +0000
1# This file is part of ctrl_bps_htcondor.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Unit tests for the HTCondor WMS service class and related functions."""
30import logging
31import pathlib
32import tempfile
33import unittest
35import htcondor
36from lsst.ctrl.bps import WmsStates
37from lsst.ctrl.bps.htcondor.htcondor_service import (
38 NodeStatus,
39 _get_exit_code_summary,
40 _htc_node_status_to_wms_state,
41 _htc_status_to_wms_state,
42)
43from lsst.ctrl.bps.htcondor.lssthtc import _tweak_log_info
45logger = logging.getLogger("lsst.ctrl.bps.htcondor")
48class GetExitCodeSummaryTestCase(unittest.TestCase):
49 """Test the function responsible for creating exit code summary."""
51 def setUp(self):
52 self.jobs = {
53 "1.0": {
54 "JobStatus": htcondor.JobStatus.IDLE,
55 "bps_job_label": "foo",
56 },
57 "2.0": {
58 "JobStatus": htcondor.JobStatus.RUNNING,
59 "bps_job_label": "foo",
60 },
61 "3.0": {
62 "JobStatus": htcondor.JobStatus.REMOVED,
63 "bps_job_label": "foo",
64 },
65 "4.0": {
66 "ExitCode": 0,
67 "ExitBySignal": False,
68 "JobStatus": htcondor.JobStatus.COMPLETED,
69 "bps_job_label": "bar",
70 },
71 "5.0": {
72 "ExitCode": 1,
73 "ExitBySignal": False,
74 "JobStatus": htcondor.JobStatus.COMPLETED,
75 "bps_job_label": "bar",
76 },
77 "6.0": {
78 "ExitBySignal": True,
79 "ExitSignal": 11,
80 "JobStatus": htcondor.JobStatus.HELD,
81 "bps_job_label": "baz",
82 },
83 "7.0": {
84 "ExitBySignal": False,
85 "ExitCode": 42,
86 "JobStatus": htcondor.JobStatus.HELD,
87 "bps_job_label": "baz",
88 },
89 "8.0": {
90 "JobStatus": htcondor.JobStatus.TRANSFERRING_OUTPUT,
91 "bps_job_label": "qux",
92 },
93 "9.0": {
94 "JobStatus": htcondor.JobStatus.SUSPENDED,
95 "bps_job_label": "qux",
96 },
97 }
99 def tearDown(self):
100 pass
102 def testMainScenario(self):
103 actual = _get_exit_code_summary(self.jobs)
104 expected = {"foo": [], "bar": [1], "baz": [11, 42], "qux": []}
105 self.assertEqual(actual, expected)
107 def testUnknownStatus(self):
108 jobs = {
109 "1.0": {
110 "JobStatus": -1,
111 "bps_job_label": "foo",
112 }
113 }
114 with self.assertLogs(logger=logger, level="DEBUG") as cm:
115 _get_exit_code_summary(jobs)
116 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name)
117 self.assertIn("Unknown", cm.output[0])
118 self.assertIn("JobStatus", cm.output[0])
120 def testUnknownKey(self):
121 jobs = {
122 "1.0": {
123 "JobStatus": htcondor.JobStatus.COMPLETED,
124 "UnknownKey": None,
125 "bps_job_label": "foo",
126 }
127 }
128 with self.assertLogs(logger=logger, level="DEBUG") as cm:
129 _get_exit_code_summary(jobs)
130 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name)
131 self.assertIn("Attribute", cm.output[0])
132 self.assertIn("not found", cm.output[0])
135class HtcNodeStatusToWmsStateTestCase(unittest.TestCase):
136 """Test assigning WMS state base on HTCondor node status."""
138 def setUp(self):
139 pass
141 def tearDown(self):
142 pass
144 def testNotReady(self):
145 job = {"NodeStatus": NodeStatus.NOT_READY}
146 result = _htc_node_status_to_wms_state(job)
147 self.assertEqual(result, WmsStates.UNREADY)
149 def testReady(self):
150 job = {"NodeStatus": NodeStatus.READY}
151 result = _htc_node_status_to_wms_state(job)
152 self.assertEqual(result, WmsStates.READY)
154 def testPrerun(self):
155 job = {"NodeStatus": NodeStatus.PRERUN}
156 result = _htc_node_status_to_wms_state(job)
157 self.assertEqual(result, WmsStates.MISFIT)
159 def testSubmittedHeld(self):
160 job = {
161 "NodeStatus": NodeStatus.SUBMITTED,
162 "JobProcsHeld": 1,
163 "StatusDetails": "",
164 "JobProcsQueued": 0,
165 }
166 result = _htc_node_status_to_wms_state(job)
167 self.assertEqual(result, WmsStates.HELD)
169 def testSubmittedRunning(self):
170 job = {
171 "NodeStatus": NodeStatus.SUBMITTED,
172 "JobProcsHeld": 0,
173 "StatusDetails": "not_idle",
174 "JobProcsQueued": 0,
175 }
176 result = _htc_node_status_to_wms_state(job)
177 self.assertEqual(result, WmsStates.RUNNING)
179 def testSubmittedPending(self):
180 job = {
181 "NodeStatus": NodeStatus.SUBMITTED,
182 "JobProcsHeld": 0,
183 "StatusDetails": "",
184 "JobProcsQueued": 1,
185 }
186 result = _htc_node_status_to_wms_state(job)
187 self.assertEqual(result, WmsStates.PENDING)
189 def testPostrun(self):
190 job = {"NodeStatus": NodeStatus.POSTRUN}
191 result = _htc_node_status_to_wms_state(job)
192 self.assertEqual(result, WmsStates.MISFIT)
194 def testDone(self):
195 job = {"NodeStatus": NodeStatus.DONE}
196 result = _htc_node_status_to_wms_state(job)
197 self.assertEqual(result, WmsStates.SUCCEEDED)
199 def testErrorDagmanSuccess(self):
200 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 0"}
201 result = _htc_node_status_to_wms_state(job)
202 self.assertEqual(result, WmsStates.SUCCEEDED)
204 def testErrorDagmanFailure(self):
205 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 1"}
206 result = _htc_node_status_to_wms_state(job)
207 self.assertEqual(result, WmsStates.FAILED)
209 def testFutile(self):
210 job = {"NodeStatus": NodeStatus.FUTILE}
211 result = _htc_node_status_to_wms_state(job)
212 self.assertEqual(result, WmsStates.PRUNED)
215class TweakJobInfoTestCase(unittest.TestCase):
216 """Test the function responsible for massaging job information."""
218 def setUp(self):
219 self.log_file = tempfile.NamedTemporaryFile(prefix="test_", suffix=".log")
220 self.log_name = pathlib.Path(self.log_file.name)
221 self.job = {
222 "Cluster": 1,
223 "Proc": 0,
224 "Iwd": str(self.log_name.parent),
225 "Owner": self.log_name.owner(),
226 "MyType": None,
227 "TerminatedNormally": True,
228 }
230 def tearDown(self):
231 self.log_file.close()
233 def testDirectAssignments(self):
234 _tweak_log_info(self.log_name, self.job)
235 self.assertEqual(self.job["ClusterId"], self.job["Cluster"])
236 self.assertEqual(self.job["ProcId"], self.job["Proc"])
237 self.assertEqual(self.job["Iwd"], str(self.log_name.parent))
238 self.assertEqual(self.job["Owner"], self.log_name.owner())
240 def testJobStatusAssignmentJobAbortedEvent(self):
241 job = self.job | {"MyType": "JobAbortedEvent"}
242 _tweak_log_info(self.log_name, job)
243 self.assertTrue("JobStatus" in job)
244 self.assertEqual(job["JobStatus"], htcondor.JobStatus.REMOVED)
246 def testJobStatusAssignmentExecuteEvent(self):
247 job = self.job | {"MyType": "ExecuteEvent"}
248 _tweak_log_info(self.log_name, job)
249 self.assertTrue("JobStatus" in job)
250 self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING)
252 def testJobStatusAssignmentSubmitEvent(self):
253 job = self.job | {"MyType": "SubmitEvent"}
254 _tweak_log_info(self.log_name, job)
255 self.assertTrue("JobStatus" in job)
256 self.assertEqual(job["JobStatus"], htcondor.JobStatus.IDLE)
258 def testJobStatusAssignmentJobHeldEvent(self):
259 job = self.job | {"MyType": "JobHeldEvent"}
260 _tweak_log_info(self.log_name, job)
261 self.assertTrue("JobStatus" in job)
262 self.assertEqual(job["JobStatus"], htcondor.JobStatus.HELD)
264 def testJobStatusAssignmentJobTerminatedEvent(self):
265 job = self.job | {"MyType": "JobTerminatedEvent"}
266 _tweak_log_info(self.log_name, job)
267 self.assertTrue("JobStatus" in job)
268 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
270 def testJobStatusAssignmentPostScriptTerminatedEvent(self):
271 job = self.job | {"MyType": "PostScriptTerminatedEvent"}
272 _tweak_log_info(self.log_name, job)
273 self.assertTrue("JobStatus" in job)
274 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
276 def testAddingExitStatusSuccess(self):
277 job = self.job | {
278 "MyType": "JobTerminatedEvent",
279 "ToE": {"ExitBySignal": False, "ExitCode": 1},
280 }
281 _tweak_log_info(self.log_name, job)
282 self.assertIn("ExitBySignal", job)
283 self.assertIs(job["ExitBySignal"], False)
284 self.assertIn("ExitCode", job)
285 self.assertEqual(job["ExitCode"], 1)
287 def testAddingExitStatusFailure(self):
288 job = self.job | {
289 "MyType": "JobHeldEvent",
290 }
291 with self.assertLogs(logger=logger, level="ERROR") as cm:
292 _tweak_log_info(self.log_name, job)
293 self.assertIn("Could not determine exit status", cm.output[0])
295 def testLoggingUnknownLogEvent(self):
296 job = self.job | {"MyType": "Foo"}
297 with self.assertLogs(logger=logger, level="DEBUG") as cm:
298 _tweak_log_info(self.log_name, job)
299 self.assertIn("Unknown log event", cm.output[1])
301 def testMissingKey(self):
302 job = self.job
303 del job["Cluster"]
304 with self.assertRaises(KeyError) as cm:
305 _tweak_log_info(self.log_name, job)
306 self.assertEqual(str(cm.exception), "'Cluster'")
309class HtcStatusToWmsStateTestCase(unittest.TestCase):
310 """Test assigning WMS state base on HTCondor status."""
312 def testJobStatus(self):
313 job = {
314 "ClusterId": 1,
315 "JobStatus": htcondor.JobStatus.IDLE,
316 "bps_job_label": "foo",
317 }
318 result = _htc_status_to_wms_state(job)
319 self.assertEqual(result, WmsStates.PENDING)
321 def testNodeStatus(self):
322 # Hold/Release test case
323 job = {
324 "ClusterId": 1,
325 "JobStatus": 0,
326 "NodeStatus": NodeStatus.SUBMITTED,
327 "JobProcsHeld": 0,
328 "StatusDetails": "",
329 "JobProcsQueued": 1,
330 }
331 result = _htc_status_to_wms_state(job)
332 self.assertEqual(result, WmsStates.PENDING)
334 def testNeitherStatus(self):
335 job = {"ClusterId": 1}
336 result = _htc_status_to_wms_state(job)
337 self.assertEqual(result, WmsStates.MISFIT)