Coverage for tests/test_htcondor_service.py: 28%
148 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 11:07 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 11:07 +0000
1# This file is part of ctrl_bps_htcondor.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Unit tests for the HTCondor WMS service class and related functions."""
30import logging
31import pathlib
32import tempfile
33import unittest
35import htcondor
36from lsst.ctrl.bps import WmsStates
37from lsst.ctrl.bps.htcondor.htcondor_service import (
38 NodeStatus,
39 _get_exit_code_summary,
40 _htc_node_status_to_wms_state,
41)
42from lsst.ctrl.bps.htcondor.lssthtc import _tweak_log_info
44logger = logging.getLogger("lsst.ctrl.bps.htcondor")
47class GetExitCodeSummaryTestCase(unittest.TestCase):
48 """Test the function responsible for creating exit code summary."""
50 def setUp(self):
51 self.jobs = {
52 "1.0": {
53 "JobStatus": htcondor.JobStatus.IDLE,
54 "bps_job_label": "foo",
55 },
56 "2.0": {
57 "JobStatus": htcondor.JobStatus.RUNNING,
58 "bps_job_label": "foo",
59 },
60 "3.0": {
61 "JobStatus": htcondor.JobStatus.REMOVED,
62 "bps_job_label": "foo",
63 },
64 "4.0": {
65 "ExitCode": 0,
66 "ExitBySignal": False,
67 "JobStatus": htcondor.JobStatus.COMPLETED,
68 "bps_job_label": "bar",
69 },
70 "5.0": {
71 "ExitCode": 1,
72 "ExitBySignal": False,
73 "JobStatus": htcondor.JobStatus.COMPLETED,
74 "bps_job_label": "bar",
75 },
76 "6.0": {
77 "ExitBySignal": True,
78 "ExitSignal": 11,
79 "JobStatus": htcondor.JobStatus.HELD,
80 "bps_job_label": "baz",
81 },
82 "7.0": {
83 "ExitBySignal": False,
84 "ExitCode": 42,
85 "JobStatus": htcondor.JobStatus.HELD,
86 "bps_job_label": "baz",
87 },
88 "8.0": {
89 "JobStatus": htcondor.JobStatus.TRANSFERRING_OUTPUT,
90 "bps_job_label": "qux",
91 },
92 "9.0": {
93 "JobStatus": htcondor.JobStatus.SUSPENDED,
94 "bps_job_label": "qux",
95 },
96 }
98 def tearDown(self):
99 pass
101 def testMainScenario(self):
102 actual = _get_exit_code_summary(self.jobs)
103 expected = {"foo": [], "bar": [1], "baz": [11, 42], "qux": []}
104 self.assertEqual(actual, expected)
106 def testUnknownStatus(self):
107 jobs = {
108 "1.0": {
109 "JobStatus": -1,
110 "bps_job_label": "foo",
111 }
112 }
113 with self.assertLogs(logger=logger, level="DEBUG") as cm:
114 _get_exit_code_summary(jobs)
115 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name)
116 self.assertIn("Unknown", cm.output[0])
117 self.assertIn("JobStatus", cm.output[0])
119 def testUnknownKey(self):
120 jobs = {
121 "1.0": {
122 "JobStatus": htcondor.JobStatus.COMPLETED,
123 "UnknownKey": None,
124 "bps_job_label": "foo",
125 }
126 }
127 with self.assertLogs(logger=logger, level="DEBUG") as cm:
128 _get_exit_code_summary(jobs)
129 self.assertIn("lsst.ctrl.bps.htcondor", cm.records[0].name)
130 self.assertIn("Attribute", cm.output[0])
131 self.assertIn("not found", cm.output[0])
134class HtcNodeStatusToWmsStateTestCase(unittest.TestCase):
135 """Test assigning WMS state base on HTCondor node status."""
137 def setUp(self):
138 pass
140 def tearDown(self):
141 pass
143 def testNotReady(self):
144 job = {"NodeStatus": NodeStatus.NOT_READY}
145 result = _htc_node_status_to_wms_state(job)
146 self.assertEqual(result, WmsStates.UNREADY)
148 def testReady(self):
149 job = {"NodeStatus": NodeStatus.READY}
150 result = _htc_node_status_to_wms_state(job)
151 self.assertEqual(result, WmsStates.READY)
153 def testPrerun(self):
154 job = {"NodeStatus": NodeStatus.PRERUN}
155 result = _htc_node_status_to_wms_state(job)
156 self.assertEqual(result, WmsStates.MISFIT)
158 def testSubmittedHeld(self):
159 job = {
160 "NodeStatus": NodeStatus.SUBMITTED,
161 "JobProcsHeld": 1,
162 "StatusDetails": "",
163 "JobProcsQueued": 0,
164 }
165 result = _htc_node_status_to_wms_state(job)
166 self.assertEqual(result, WmsStates.HELD)
168 def testSubmittedRunning(self):
169 job = {
170 "NodeStatus": NodeStatus.SUBMITTED,
171 "JobProcsHeld": 0,
172 "StatusDetails": "not_idle",
173 "JobProcsQueued": 0,
174 }
175 result = _htc_node_status_to_wms_state(job)
176 self.assertEqual(result, WmsStates.RUNNING)
178 def testSubmittedPending(self):
179 job = {
180 "NodeStatus": NodeStatus.SUBMITTED,
181 "JobProcsHeld": 0,
182 "StatusDetails": "",
183 "JobProcsQueued": 1,
184 }
185 result = _htc_node_status_to_wms_state(job)
186 self.assertEqual(result, WmsStates.PENDING)
188 def testPostrun(self):
189 job = {"NodeStatus": NodeStatus.POSTRUN}
190 result = _htc_node_status_to_wms_state(job)
191 self.assertEqual(result, WmsStates.MISFIT)
193 def testDone(self):
194 job = {"NodeStatus": NodeStatus.DONE}
195 result = _htc_node_status_to_wms_state(job)
196 self.assertEqual(result, WmsStates.SUCCEEDED)
198 def testErrorDagmanSuccess(self):
199 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 0"}
200 result = _htc_node_status_to_wms_state(job)
201 self.assertEqual(result, WmsStates.SUCCEEDED)
203 def testErrorDagmanFailure(self):
204 job = {"NodeStatus": NodeStatus.ERROR, "StatusDetails": "DAGMAN error 1"}
205 result = _htc_node_status_to_wms_state(job)
206 self.assertEqual(result, WmsStates.FAILED)
208 def testFutile(self):
209 job = {"NodeStatus": NodeStatus.FUTILE}
210 result = _htc_node_status_to_wms_state(job)
211 self.assertEqual(result, WmsStates.PRUNED)
214class TweakJobInfoTestCase(unittest.TestCase):
215 """Test the function responsible for massaging job information."""
217 def setUp(self):
218 self.log_file = tempfile.NamedTemporaryFile(prefix="test_", suffix=".log")
219 self.log_name = pathlib.Path(self.log_file.name)
220 self.job = {
221 "Cluster": 1,
222 "Proc": 0,
223 "Iwd": str(self.log_name.parent),
224 "Owner": self.log_name.owner(),
225 "MyType": None,
226 "TerminatedNormally": True,
227 }
229 def tearDown(self):
230 self.log_file.close()
232 def testDirectAssignments(self):
233 _tweak_log_info(self.log_name, self.job)
234 self.assertEqual(self.job["ClusterId"], self.job["Cluster"])
235 self.assertEqual(self.job["ProcId"], self.job["Proc"])
236 self.assertEqual(self.job["Iwd"], str(self.log_name.parent))
237 self.assertEqual(self.job["Owner"], self.log_name.owner())
239 def testJobStatusAssignmentJobAbortedEvent(self):
240 job = self.job | {"MyType": "JobAbortedEvent"}
241 _tweak_log_info(self.log_name, job)
242 self.assertTrue("JobStatus" in job)
243 self.assertEqual(job["JobStatus"], htcondor.JobStatus.REMOVED)
245 def testJobStatusAssignmentExecuteEvent(self):
246 job = self.job | {"MyType": "ExecuteEvent"}
247 _tweak_log_info(self.log_name, job)
248 self.assertTrue("JobStatus" in job)
249 self.assertEqual(job["JobStatus"], htcondor.JobStatus.RUNNING)
251 def testJobStatusAssignmentSubmitEvent(self):
252 job = self.job | {"MyType": "SubmitEvent"}
253 _tweak_log_info(self.log_name, job)
254 self.assertTrue("JobStatus" in job)
255 self.assertEqual(job["JobStatus"], htcondor.JobStatus.IDLE)
257 def testJobStatusAssignmentJobHeldEvent(self):
258 job = self.job | {"MyType": "JobHeldEvent"}
259 _tweak_log_info(self.log_name, job)
260 self.assertTrue("JobStatus" in job)
261 self.assertEqual(job["JobStatus"], htcondor.JobStatus.HELD)
263 def testJobStatusAssignmentJobTerminatedEvent(self):
264 job = self.job | {"MyType": "JobTerminatedEvent"}
265 _tweak_log_info(self.log_name, job)
266 self.assertTrue("JobStatus" in job)
267 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
269 def testJobStatusAssignmentPostScriptTerminatedEvent(self):
270 job = self.job | {"MyType": "PostScriptTerminatedEvent"}
271 _tweak_log_info(self.log_name, job)
272 self.assertTrue("JobStatus" in job)
273 self.assertEqual(job["JobStatus"], htcondor.JobStatus.COMPLETED)
275 def testAddingExitStatusSuccess(self):
276 job = self.job | {
277 "MyType": "JobTerminatedEvent",
278 "ToE": {"ExitBySignal": False, "ExitCode": 1},
279 }
280 _tweak_log_info(self.log_name, job)
281 self.assertIn("ExitBySignal", job)
282 self.assertIs(job["ExitBySignal"], False)
283 self.assertIn("ExitCode", job)
284 self.assertEqual(job["ExitCode"], 1)
286 def testAddingExitStatusFailure(self):
287 job = self.job | {
288 "MyType": "JobHeldEvent",
289 }
290 with self.assertLogs(logger=logger, level="ERROR") as cm:
291 _tweak_log_info(self.log_name, job)
292 self.assertIn("Could not determine exit status", cm.output[0])
294 def testLoggingUnknownLogEvent(self):
295 job = self.job | {"MyType": "Foo"}
296 with self.assertLogs(logger=logger, level="DEBUG") as cm:
297 _tweak_log_info(self.log_name, job)
298 self.assertIn("Unknown log event", cm.output[1])
300 def testMissingKey(self):
301 job = self.job
302 del job["Cluster"]
303 with self.assertRaises(KeyError) as cm:
304 _tweak_log_info(self.log_name, job)
305 self.assertEqual(str(cm.exception), "'Cluster'")