Coverage for python/lsst/ctrl/bps/htcondor/handlers.py: 29%

101 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 17:53 +0000

1# This file is part of ctrl_bps_htcondor. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Definitions of handlers of HTCondor job ClassAds.""" 

29 

30__all__ = [ 

31 "HTC_JOB_AD_HANDLERS", 

32 "Chain", 

33 "Handler", 

34 "JobCompletedWithExecTicketHandler", 

35 "JobCompletedWithoutExecTicketHandler", 

36 "JobHeldByOtherHandler", 

37 "JobHeldBySignalHandler", 

38 "JobHeldByUserHandler", 

39] 

40 

41 

42import abc 

43import logging 

44import re 

45from collections.abc import Sequence 

46from typing import Any 

47 

48_LOG = logging.getLogger(__name__) 

49 

50 

51class Handler(abc.ABC): 

52 """Abstract base class defining Handler interface.""" 

53 

54 @abc.abstractmethod 

55 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

56 """Handle a ClassAd. 

57 

58 Parameters 

59 ---------- 

60 ad : `dict[`str`, Any]` 

61 The dictionary representing ClassAd that need to be processed. 

62 

63 Returns 

64 ------- 

65 ad : `dict[`str`, Any]` | None 

66 The dictionary representing ClassAd after processing and ``None`` 

67 if the handler was not able to process the ad. 

68 

69 Notes 

70 ----- 

71 To optimize the memory usage, the implementation of this method may 

72 modify the ClassAd in place. In such a case, the ClassAd returned by 

73 the method will be the same object that was passed to it as 

74 the argument, but including any modifications that were made. 

75 """ 

76 

77 

78class Chain(Sequence): 

79 """Class defining chaining of handlers. 

80 

81 Parameters 

82 ---------- 

83 handlers : `Sequence` [`Handler`] 

84 List of handlers that will be used to initialize the chain. 

85 """ 

86 

87 def __init__(self, handlers: Sequence[Handler] = None) -> None: 

88 self._handlers = [] 

89 if handlers is not None: 89 ↛ exitline 89 didn't return from function '__init__', because the condition on line 89 was never false

90 for handler in handlers: 

91 self.append(handler) 

92 

93 def __getitem__(self, index: int) -> Handler: 

94 return self._handlers[index] 

95 

96 def __len__(self) -> int: 

97 return len(self._handlers) 

98 

99 def append(self, handler: Handler) -> None: 

100 """Append a handler to the chain. 

101 

102 Parameters 

103 ---------- 

104 handler : `Handler` 

105 The handler that needs to be added to the chain. 

106 

107 Raises 

108 ------ 

109 TypeError 

110 Raised if the passed object in not a ``Handler``. 

111 """ 

112 if not isinstance(handler, Handler): 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true

113 raise TypeError(f"append() argument must be a 'Handler', not a '{type(handler)}'") 

114 self._handlers.append(handler) 

115 

116 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

117 """Handle a ClassAd. 

118 

119 Parameters 

120 ---------- 

121 ad : `dict[`str`, Any]` 

122 The dictionary representing a ClassAd that need to be handled. 

123 

124 Returns 

125 ------- 

126 ad : `dict[`str`, Any]` 

127 A modified ClassAd if any handler in the chain was able to 

128 process the ad, None otherwise. 

129 """ 

130 new_ad = None 

131 for handler in self: 

132 try: 

133 new_ad = handler.handle(ad) 

134 except Exception as e: 

135 _LOG.warning( 

136 "Handler '%s' raised an exception while processing the ad: %s. " 

137 "Proceeding to the next handler (if any).", 

138 type(handler).__name__, 

139 repr(e), 

140 ) 

141 else: 

142 if new_ad is not None: 

143 break 

144 return new_ad 

145 

146 

147class JobCompletedWithExecTicketHandler(Handler): 

148 """Handler of ClassAds for completed jobs with the ticket of execution. 

149 

150 Usually, the entry in the event log for a completed job contains the ticket 

151 of execution -- a record describing how and when the job was terminated. 

152 If it exists, this handler will use it to add the attributes describing 

153 job's exit status. 

154 """ 

155 

156 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

157 if not ad["MyType"].endswith("TerminatedEvent"): 

158 _LOG.debug( 

159 "Handler '%s': refusing to process the ad for the job '%s.%s': job not completed", 

160 self.__class__.__name__, 

161 ad["ClusterId"], 

162 ad["ProcId"], 

163 ) 

164 return None 

165 if "ToE" in ad: 

166 toe = ad["ToE"] 

167 ad["ExitBySignal"] = toe["ExitBySignal"] 

168 if ad["ExitBySignal"]: 

169 ad["ExitSignal"] = toe["ExitSignal"] 

170 else: 

171 ad["ExitCode"] = toe["ExitCode"] 

172 else: 

173 _LOG.debug( 

174 "%s: refusing to process the ad for the job '%s.%s': ticket of execution missing", 

175 self.__class__.__name__, 

176 ad["ClusterId"], 

177 ad["ProcId"], 

178 ) 

179 return None 

180 return ad 

181 

182 

183class JobCompletedWithoutExecTicketHandler(Handler): 

184 """Handler of ClassAds for completed jobs w/o the ticket of execution. 

185 

186 The entry in the event log for some completed jobs (e.g. jobs that run 

187 ``condor_dagman``) do *not* contain the ticket of execution -- a record 

188 describing how and when the job was terminated. This handler will try 

189 to use other existing attributes to add the ones describing job's exit 

190 status. 

191 """ 

192 

193 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

194 if not ad["MyType"].endswith("TerminatedEvent"): 

195 _LOG.debug( 

196 "Handler '%s': refusing to process the ad for the job '%s.%s': job not completed", 

197 self.__class__.__name__, 

198 ad["ClusterId"], 

199 ad["ProcId"], 

200 ) 

201 return None 

202 if "ToE" not in ad: 

203 ad["ExitBySignal"] = not ad["TerminatedNormally"] 

204 if ad["ExitBySignal"]: 

205 ad["ExitSignal"] = ad["TerminatedBySignal"] 

206 else: 

207 ad["ExitCode"] = ad["ReturnValue"] 

208 else: 

209 _LOG.debug( 

210 "%s: refusing to process the ad for the job '%s.%s': ticket of execution found", 

211 self.__class__.__name__, 

212 ad["ClusterId"], 

213 ad["ProcId"], 

214 ) 

215 return None 

216 return ad 

217 

218 

219class JobHeldByOtherHandler(Handler): 

220 """Handler of ClassAds for jobs put on hold.""" 

221 

222 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

223 if not ad["MyType"].endswith("HeldEvent"): 

224 _LOG.debug( 

225 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held", 

226 self.__class__.__name__, 

227 ad["ClusterId"], 

228 ad["ProcId"], 

229 ) 

230 return None 

231 if ad["HoldReasonCode"] not in {1, 3}: 

232 ad["ExitBySignal"] = False 

233 ad["ExitCode"] = ad["HoldReasonCode"] 

234 else: 

235 _LOG.debug( 

236 "Handler '%s': refusing to process the ad for the job '%s.%s': " 

237 "invalid hold reason code: HoldReasonCode = %s", 

238 self.__class__.__name__, 

239 ad["ClusterId"], 

240 ad["ProcId"], 

241 ad["HoldReasonCode"], 

242 ) 

243 return None 

244 return ad 

245 

246 

247class JobHeldBySignalHandler(Handler): 

248 """Handler of ClassAds for jobs put on hold by signals.""" 

249 

250 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

251 if not ad["MyType"].endswith("HeldEvent"): 

252 _LOG.debug( 

253 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held", 

254 self.__class__.__name__, 

255 ad["ClusterId"], 

256 ad["ProcId"], 

257 ) 

258 return None 

259 if ad["HoldReasonCode"] == 3: 

260 match = re.search(r"signal (\d+)", ad["HoldReason"]) 

261 if match is not None: 

262 ad["ExitBySignal"] = True 

263 ad["ExitSignal"] = match.group(1) 

264 else: 

265 _LOG.debug( 

266 "Handler '%s': refusing to process the ad for the job '%s.%s': " 

267 "signal not found: HoldReason = %s", 

268 self.__class__.__name__, 

269 ad["ClusterId"], 

270 ad["ProcId"], 

271 ad["HoldReason"], 

272 ) 

273 return None 

274 else: 

275 _LOG.debug( 

276 "Handler '%s': refusing to process the ad for the job '%s.%s': " 

277 "job not held by a signal: HoldReasonCode = %s, HoldReason = %s", 

278 self.__class__.__name__, 

279 ad["ClusterId"], 

280 ad["ProcId"], 

281 ad["HoldReasonCode"], 

282 ad["HoldReason"], 

283 ) 

284 return None 

285 return ad 

286 

287 

288class JobHeldByUserHandler(Handler): 

289 """Handler of ClassAds for jobs put on hold by the user.""" 

290 

291 def handle(self, ad: dict[str, Any]) -> dict[str, Any] | None: 

292 if not ad["MyType"].endswith("HeldEvent"): 

293 _LOG.debug( 

294 "Handler '%s': refusing to process the ad for the job '%s.%s': job not held", 

295 self.__class__.__name__, 

296 ad["ClusterId"], 

297 ad["ProcId"], 

298 ) 

299 return None 

300 if ad["HoldReasonCode"] == 1: 

301 ad["ExitBySignal"] = False 

302 ad["ExitCode"] = 0 

303 else: 

304 _LOG.debug( 

305 "Handler '%s': refusing to process the ad for the job '%s.%s': " 

306 "job not held by the user: HoldReasonCode = %s, HoldReason = %s", 

307 self.__class__.__name__, 

308 ad["ClusterId"], 

309 ad["ProcId"], 

310 ad["HoldReasonCode"], 

311 ad["HoldReason"], 

312 ) 

313 return None 

314 return ad 

315 

316 

317_handlers = [ 

318 JobHeldByUserHandler(), 

319 JobHeldBySignalHandler(), 

320 JobHeldByOtherHandler(), 

321 JobCompletedWithExecTicketHandler(), 

322 JobCompletedWithoutExecTicketHandler(), 

323] 

324HTC_JOB_AD_HANDLERS = Chain(handlers=_handlers)