Coverage for python/lsst/ctrl/bps/htcondor/handlers.py: 29%
101 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:42 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:42 -0700
1# This file is part of ctrl_bps_htcondor.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Definitions of handlers of HTCondor job ClassAds."""
30__all__ = [
31 "HTC_JOB_AD_HANDLERS",
32 "Chain",
33 "Handler",
34 "JobCompletedWithExecTicketHandler",
35 "JobCompletedWithoutExecTicketHandler",
36 "JobHeldByOtherHandler",
37 "JobHeldBySignalHandler",
38 "JobHeldByUserHandler",
39]
42import abc
43import logging
44import re
45from collections.abc import Sequence
46from typing import Any
48_LOG = logging.getLogger(__name__)
51class Handler(abc.ABC):
52 """Abstract base class defining Handler interface."""
54 @abc.abstractmethod
55 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
56 """Handle a ClassAd.
58 Parameters
59 ----------
60 ad : `dict[`str`, Any]`
61 The dictionary representing ClassAd that need to be processed.
63 Returns
64 -------
65 ad : `dict[`str`, Any]` | None
66 The dictionary representing ClassAd after processing and ``None``
67 if the handler was not able to process the ad.
69 Notes
70 -----
71 To optimize the memory usage, the implementation of this method may
72 modify the ClassAd in place. In such a case, the ClassAd returned by
73 the method will be the same object that was passed to it as
74 the argument, but including any modifications that were made.
75 """
78class Chain(Sequence):
79 """Class defining chaining of handlers.
81 Parameters
82 ----------
83 handlers : `Sequence` [`Handler`]
84 List of handlers that will be used to initialize the chain.
85 """
87 def __init__(self, handlers: Sequence[Handler] = None) -> None:
88 self._handlers = []
89 if handlers is not None: 89 ↛ exitline 89 didn't return from function '__init__', because the condition on line 89 was never false
90 for handler in handlers:
91 self.append(handler)
93 def __getitem__(self, index: int) -> Handler:
94 return self._handlers[index]
96 def __len__(self) -> int:
97 return len(self._handlers)
99 def append(self, handler: Handler) -> None:
100 """Append a handler to the chain.
102 Parameters
103 ----------
104 handler : `Handler`
105 The handler that needs to be added to the chain.
107 Raises
108 ------
109 TypeError
110 Raised if the passed object in not a ``Handler``.
111 """
112 if not isinstance(handler, Handler): 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true
113 raise TypeError(f"append() argument must be a 'Handler', not a '{type(handler)}'")
114 self._handlers.append(handler)
116 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
117 """Handle a ClassAd.
119 Parameters
120 ----------
121 ad : `dict[`str`, Any]`
122 The dictionary representing a ClassAd that need to be handled.
124 Returns
125 -------
126 ad : `dict[`str`, Any]`
127 A modified ClassAd if any handler in the chain was able to
128 process the ad, None otherwise.
129 """
130 new_ad = None
131 for handler in self:
132 try:
133 new_ad = handler.handle(ad)
134 except Exception as e:
135 _LOG.warning(
136 "Handler '%s' raised an exception while processing the ad: %s. "
137 "Proceeding to the next handler (if any).",
138 type(handler).__name__,
139 repr(e),
140 )
141 else:
142 if new_ad is not None:
143 break
144 return new_ad
147class JobCompletedWithExecTicketHandler(Handler):
148 """Handler of ClassAds for completed jobs with the ticket of execution.
150 Usually, the entry in the event log for a completed job contains the ticket
151 of execution -- a record describing how and when the job was terminated.
152 If it exists, this handler will use it to add the attributes describing
153 job's exit status.
154 """
156 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
157 if not ad["MyType"].endswith("TerminatedEvent"):
158 _LOG.debug(
159 "Handler '%s': refusing to process the ad for the job '%s.%s': job not completed",
160 self.__class__.__name__,
161 ad["ClusterId"],
162 ad["ProcId"],
163 )
164 return None
165 if "ToE" in ad:
166 toe = ad["ToE"]
167 ad["ExitBySignal"] = toe["ExitBySignal"]
168 if ad["ExitBySignal"]:
169 ad["ExitSignal"] = toe["ExitSignal"]
170 else:
171 ad["ExitCode"] = toe["ExitCode"]
172 else:
173 _LOG.debug(
174 "%s: refusing to process the ad for the job '%s.%s': ticket of execution missing",
175 self.__class__.__name__,
176 ad["ClusterId"],
177 ad["ProcId"],
178 )
179 return None
180 return ad
183class JobCompletedWithoutExecTicketHandler(Handler):
184 """Handler of ClassAds for completed jobs w/o the ticket of execution.
186 The entry in the event log for some completed jobs (e.g. jobs that run
187 ``condor_dagman``) do *not* contain the ticket of execution -- a record
188 describing how and when the job was terminated. This handler will try
189 to use other existing attributes to add the ones describing job's exit
190 status.
191 """
193 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
194 if not ad["MyType"].endswith("TerminatedEvent"):
195 _LOG.debug(
196 "Handler '%s': refusing to process the ad for the job '%s.%s': job not completed",
197 self.__class__.__name__,
198 ad["ClusterId"],
199 ad["ProcId"],
200 )
201 return None
202 if "ToE" not in ad:
203 ad["ExitBySignal"] = not ad["TerminatedNormally"]
204 if ad["ExitBySignal"]:
205 ad["ExitSignal"] = ad["TerminatedBySignal"]
206 else:
207 ad["ExitCode"] = ad["ReturnValue"]
208 else:
209 _LOG.debug(
210 "%s: refusing to process the ad for the job '%s.%s': ticket of execution found",
211 self.__class__.__name__,
212 ad["ClusterId"],
213 ad["ProcId"],
214 )
215 return None
216 return ad
219class JobHeldByOtherHandler(Handler):
220 """Handler of ClassAds for jobs put on hold."""
222 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
223 if not ad["MyType"].endswith("HeldEvent"):
224 _LOG.debug(
225 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held",
226 self.__class__.__name__,
227 ad["ClusterId"],
228 ad["ProcId"],
229 )
230 return None
231 if ad["HoldReasonCode"] not in {1, 3}:
232 ad["ExitBySignal"] = False
233 ad["ExitCode"] = ad["HoldReasonCode"]
234 else:
235 _LOG.debug(
236 "Handler '%s': refusing to process the ad for the job '%s.%s': "
237 "invalid hold reason code: HoldReasonCode = %s",
238 self.__class__.__name__,
239 ad["ClusterId"],
240 ad["ProcId"],
241 ad["HoldReasonCode"],
242 )
243 return None
244 return ad
247class JobHeldBySignalHandler(Handler):
248 """Handler of ClassAds for jobs put on hold by signals."""
250 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
251 if not ad["MyType"].endswith("HeldEvent"):
252 _LOG.debug(
253 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held",
254 self.__class__.__name__,
255 ad["ClusterId"],
256 ad["ProcId"],
257 )
258 return None
259 if ad["HoldReasonCode"] == 3:
260 match = re.search(r"signal (\d+)", ad["HoldReason"])
261 if match is not None:
262 ad["ExitBySignal"] = True
263 ad["ExitSignal"] = match.group(1)
264 else:
265 _LOG.debug(
266 "Handler '%s': refusing to process the ad for the job '%s.%s': "
267 "signal not found: HoldReason = %s",
268 self.__class__.__name__,
269 ad["ClusterId"],
270 ad["ProcId"],
271 ad["HoldReason"],
272 )
273 return None
274 else:
275 _LOG.debug(
276 "Handler '%s': refusing to process the ad for the job '%s.%s': "
277 "job not held by a signal: HoldReasonCode = %s, HoldReason = %s",
278 self.__class__.__name__,
279 ad["ClusterId"],
280 ad["ProcId"],
281 ad["HoldReasonCode"],
282 ad["HoldReason"],
283 )
284 return None
285 return ad
288class JobHeldByUserHandler(Handler):
289 """Handler of ClassAds for jobs put on hold by the user."""
291 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None:
292 if not ad["MyType"].endswith("HeldEvent"):
293 _LOG.debug(
294 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held",
295 self.__class__.__name__,
296 ad["ClusterId"],
297 ad["ProcId"],
298 )
299 return None
300 if ad["HoldReasonCode"] == 1:
301 ad["ExitBySignal"] = False
302 ad["ExitCode"] = 0
303 else:
304 _LOG.debug(
305 "Handler '%s': refusing to process the ad for the job '%s.%s': "
306 "job not held by the user: HoldReasonCode = %s, HoldReason = %s",
307 self.__class__.__name__,
308 ad["ClusterId"],
309 ad["ProcId"],
310 ad["HoldReasonCode"],
311 ad["HoldReason"],
312 )
313 return None
314 return ad
317_handlers = [
318 JobHeldByUserHandler(),
319 JobHeldBySignalHandler(),
320 JobHeldByOtherHandler(),
321 JobCompletedWithExecTicketHandler(),
322 JobCompletedWithoutExecTicketHandler(),
323]
324HTC_JOB_AD_HANDLERS = Chain(handlers=_handlers)