Coverage for python/lsst/ctrl/mpexec/log_capture.py: 22%

94 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-19 11:22 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["LogCapture"] 

25 

26import logging 

27import os 

28import shutil 

29import tempfile 

30from collections.abc import Iterator 

31from contextlib import contextmanager 

32from logging import FileHandler 

33 

34from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum 

35from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

36from lsst.pipe.base import InvalidQuantumError, TaskDef 

37 

38_LOG = logging.getLogger(__name__) 

39 

40 

41class _LogCaptureFlag: 

42 """Simple flag to enable/disable log-to-butler saving.""" 

43 

44 store: bool = True 

45 

46 

47class LogCapture: 

48 """Class handling capture of logging messages and their export to butler. 

49 

50 Parameters 

51 ---------- 

52 butler : `~lsst.daf.butler.LimitedButler` 

53 Data butler with limited API. 

54 full_butler : `~lsst.daf.butler.Butler` or `None` 

55 Data butler with full API, or `None` if full Butler is not available. 

56 If not none, then this must be the same instance as ``butler``. 

57 """ 

58 

59 stream_json_logs = True 

60 """If True each log record is written to a temporary file and ingested 

61 when quantum completes. If False the records are accumulated in memory 

62 and stored in butler on quantum completion. If full butler is not available 

63 then temporary file is not used.""" 

64 

65 def __init__( 

66 self, 

67 butler: LimitedButler, 

68 full_butler: Butler | None, 

69 ): 

70 self.butler = butler 

71 self.full_butler = full_butler 

72 

73 @classmethod 

74 def from_limited(cls, butler: LimitedButler) -> LogCapture: 

75 return cls(butler, None) 

76 

77 @classmethod 

78 def from_full(cls, butler: Butler) -> LogCapture: 

79 return cls(butler, butler) 

80 

81 @contextmanager 

82 def capture_logging(self, taskDef: TaskDef, quantum: Quantum) -> Iterator[_LogCaptureFlag]: 

83 """Configure logging system to capture logs for execution of this task. 

84 

85 Parameters 

86 ---------- 

87 taskDef : `lsst.pipe.base.TaskDef` 

88 The task definition. 

89 quantum : `~lsst.daf.butler.Quantum` 

90 Single Quantum instance. 

91 

92 Notes 

93 ----- 

94 Expected to be used as a context manager to ensure that logging 

95 records are inserted into the butler once the quantum has been 

96 executed: 

97 

98 .. code-block:: py 

99 

100 with self.capture_logging(taskDef, quantum): 

101 # Run quantum and capture logs. 

102 

103 Ths method can also setup logging to attach task- or 

104 quantum-specific information to log messages. Potentially this can 

105 take into account some info from task configuration as well. 

106 """ 

107 # include quantum dataId and task label into MDC 

108 mdc = {"LABEL": taskDef.label, "RUN": ""} 

109 if quantum.dataId: 

110 mdc["LABEL"] += f":{quantum.dataId}" 

111 if self.full_butler is not None: 

112 mdc["RUN"] = self.full_butler.run or "" 

113 ctx = _LogCaptureFlag() 

114 

115 # Add a handler to the root logger to capture execution log output. 

116 if taskDef.logOutputDatasetName is not None: 

117 # Either accumulate into ButlerLogRecords or stream JSON records to 

118 # file and ingest that (ingest is possible only with full butler). 

119 if self.stream_json_logs and self.full_butler is not None: 

120 # Create the log file in a temporary directory rather than 

121 # creating a temporary file. This is necessary because 

122 # temporary files are created with restrictive permissions 

123 # and during file ingest these permissions persist in the 

124 # datastore. Using a temp directory allows us to create 

125 # a file with umask default permissions. 

126 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

127 

128 # Construct a file to receive the log records and "touch" it. 

129 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

130 with open(log_file, "w"): 

131 pass 

132 log_handler_file = FileHandler(log_file) 

133 log_handler_file.setFormatter(JsonLogFormatter()) 

134 logging.getLogger().addHandler(log_handler_file) 

135 

136 try: 

137 with ButlerMDC.set_mdc(mdc): 

138 yield ctx 

139 finally: 

140 # Ensure that the logs are stored in butler. 

141 logging.getLogger().removeHandler(log_handler_file) 

142 log_handler_file.close() 

143 if ctx.store: 

144 self._ingest_log_records(quantum, taskDef.logOutputDatasetName, log_file) 

145 shutil.rmtree(tmpdir, ignore_errors=True) 

146 

147 else: 

148 log_handler_memory = ButlerLogRecordHandler() 

149 logging.getLogger().addHandler(log_handler_memory) 

150 

151 try: 

152 with ButlerMDC.set_mdc(mdc): 

153 yield ctx 

154 finally: 

155 # Ensure that the logs are stored in butler. 

156 logging.getLogger().removeHandler(log_handler_memory) 

157 if ctx.store: 

158 self._store_log_records(quantum, taskDef.logOutputDatasetName, log_handler_memory) 

159 log_handler_memory.records.clear() 

160 

161 else: 

162 with ButlerMDC.set_mdc(mdc): 

163 yield ctx 

164 

165 def _store_log_records( 

166 self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler 

167 ) -> None: 

168 # DatasetRef has to be in the Quantum outputs, can lookup by name. 

169 try: 

170 [ref] = quantum.outputs[dataset_type] 

171 except LookupError as exc: 

172 raise InvalidQuantumError( 

173 f"Quantum outputs is missing log output dataset type {dataset_type};" 

174 " this could happen due to inconsistent options between QuantumGraph generation" 

175 " and execution" 

176 ) from exc 

177 

178 if self.full_butler is None: 

179 # If full butler is not available then we need fully 

180 # resolved reference for limited butler. 

181 if ref.id is None: 

182 raise InvalidQuantumError( 

183 f"Quantum contains unresolved reference for task log output dataset type {dataset_type}." 

184 ) 

185 self.butler.put(log_handler.records, ref) 

186 else: 

187 self.full_butler.put(log_handler.records, ref) 

188 

189 def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None: 

190 # If we are logging to an external file we must always try to 

191 # close it. 

192 assert self.full_butler is not None, "Expected to have full butler for ingest" 

193 ingested = False 

194 try: 

195 # DatasetRef has to be in the Quantum outputs, can lookup by name. 

196 try: 

197 [ref] = quantum.outputs[dataset_type] 

198 except LookupError as exc: 

199 raise InvalidQuantumError( 

200 f"Quantum outputs is missing log output dataset type {dataset_type};" 

201 " this could happen due to inconsistent options between QuantumGraph generation" 

202 " and execution" 

203 ) from exc 

204 

205 # Need to ingest this file directly into butler. 

206 dataset = FileDataset(path=filename, refs=ref) 

207 try: 

208 self.full_butler.ingest(dataset, transfer="move") 

209 ingested = True 

210 except NotImplementedError: 

211 # Some datastores can't receive files (e.g. in-memory datastore 

212 # when testing), we store empty list for those just to have a 

213 # dataset. Alternative is to read the file as a 

214 # ButlerLogRecords object and put it. 

215 _LOG.info( 

216 "Log records could not be stored in this butler because the" 

217 " datastore can not ingest files, empty record list is stored instead." 

218 ) 

219 records = ButlerLogRecords.from_records([]) 

220 self.full_butler.put(records, ref) 

221 finally: 

222 # remove file if it is not ingested 

223 if not ingested: 

224 try: 

225 os.remove(filename) 

226 except OSError: 

227 pass