Coverage for python/lsst/ctrl/mpexec/reports.py: 77%

61 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-11 09:04 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ExecutionStatus", "Report", "QuantumReport"] 

25 

26import enum 

27import sys 

28 

29from lsst.daf.butler import DataCoordinate, DataId, DataIdValue 

30from lsst.utils.introspection import get_full_type_name 

31from pydantic import BaseModel, validator 

32 

33 

34def _serializeDataId(dataId: DataId) -> dict[str, DataIdValue]: 

35 if isinstance(dataId, DataCoordinate): 

36 return dataId.byName() 

37 else: 

38 return dataId # type: ignore 

39 

40 

41class ExecutionStatus(enum.Enum): 

42 """Possible values for job execution status. 

43 

44 Status `FAILURE` is set if one or more tasks failed. Status `TIMEOUT` is 

45 set if there are no failures but one or more tasks timed out. Timeouts can 

46 only be detected in multi-process mode, child task is killed on timeout 

47 and usually should have non-zero exit code. 

48 """ 

49 

50 SUCCESS = "success" 

51 FAILURE = "failure" 

52 TIMEOUT = "timeout" 

53 SKIPPED = "skipped" 

54 

55 

56class ExceptionInfo(BaseModel): 

57 """Information about exception.""" 

58 

59 className: str 

60 """Name of the exception class if exception was raised.""" 

61 

62 message: str 

63 """Exception message for in-process quantum execution, None if 

64 quantum was executed in sub-process. 

65 """ 

66 

67 @classmethod 

68 def from_exception(cls, exception: Exception) -> ExceptionInfo: 

69 """Construct instance from an exception.""" 

70 return cls(className=get_full_type_name(exception), message=str(exception)) 

71 

72 

73class QuantumReport(BaseModel): 

74 """Task execution report for a single Quantum.""" 

75 

76 status: ExecutionStatus = ExecutionStatus.SUCCESS 

77 """Execution status, one of the values in `ExecutionStatus` enum.""" 

78 

79 dataId: dict[str, DataIdValue] 

80 """Quantum DataId.""" 

81 

82 taskLabel: str | None 

83 """Label for a task executing this Quantum.""" 

84 

85 exitCode: int | None = None 

86 """Exit code for a sub-process executing Quantum, None for in-process 

87 Quantum execution. Negative if process was killed by a signal. 

88 """ 

89 

90 exceptionInfo: ExceptionInfo | None = None 

91 """Exception information if exception was raised.""" 

92 

93 def __init__( 

94 self, 

95 dataId: DataId, 

96 taskLabel: str, 

97 status: ExecutionStatus = ExecutionStatus.SUCCESS, 

98 exitCode: int | None = None, 

99 exceptionInfo: ExceptionInfo | None = None, 

100 ): 

101 super().__init__( 

102 status=status, 

103 dataId=_serializeDataId(dataId), 

104 taskLabel=taskLabel, 

105 exitCode=exitCode, 

106 exceptionInfo=exceptionInfo, 

107 ) 

108 

109 @classmethod 

110 def from_exception( 

111 cls, 

112 exception: Exception, 

113 dataId: DataId, 

114 taskLabel: str, 

115 ) -> QuantumReport: 

116 """Construct report instance from an exception and other pieces of 

117 data. 

118 """ 

119 return cls( 

120 status=ExecutionStatus.FAILURE, 

121 dataId=dataId, 

122 taskLabel=taskLabel, 

123 exceptionInfo=ExceptionInfo.from_exception(exception), 

124 ) 

125 

126 @classmethod 

127 def from_exit_code( 

128 cls, 

129 exitCode: int, 

130 dataId: DataId, 

131 taskLabel: str, 

132 ) -> QuantumReport: 

133 """Construct report instance from an exit code and other pieces of 

134 data. 

135 """ 

136 return cls( 

137 status=ExecutionStatus.SUCCESS if exitCode == 0 else ExecutionStatus.FAILURE, 

138 dataId=dataId, 

139 taskLabel=taskLabel, 

140 exitCode=exitCode, 

141 ) 

142 

143 

144class Report(BaseModel): 

145 """Execution report for the whole job with one or few quanta.""" 

146 

147 status: ExecutionStatus = ExecutionStatus.SUCCESS 

148 """Job status.""" 

149 

150 cmdLine: list[str] | None = None 

151 """Command line for the whole job.""" 

152 

153 exitCode: int | None = None 

154 """Job exit code, this obviously cannot be set in pipetask.""" 

155 

156 exceptionInfo: ExceptionInfo | None = None 

157 """Exception information if exception was raised.""" 

158 

159 quantaReports: list[QuantumReport] = [] 

160 """List of per-quantum reports, ordering is not specified. Some or all 

161 quanta may not produce a report. 

162 """ 

163 

164 @validator("cmdLine", always=True) 

165 def _set_cmdLine(cls, v: list[str] | None) -> list[str]: # noqa: N805 

166 if v is None: 

167 v = sys.argv 

168 return v 

169 

170 def set_exception(self, exception: Exception) -> None: 

171 """Update exception information from an exception object.""" 

172 self.exceptionInfo = ExceptionInfo.from_exception(exception)