Coverage for python/lsst/ctrl/mpexec/log_capture.py: 28%

88 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-23 10:58 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["LogCapture"] 

31 

32import logging 

33import os 

34import shutil 

35import tempfile 

36from collections.abc import Iterator 

37from contextlib import contextmanager, suppress 

38from logging import FileHandler 

39 

40from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum 

41from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

42from lsst.pipe.base import InvalidQuantumError, TaskDef 

43 

44_LOG = logging.getLogger(__name__) 

45 

46 

47class _LogCaptureFlag: 

48 """Simple flag to enable/disable log-to-butler saving.""" 

49 

50 store: bool = True 

51 

52 

53class LogCapture: 

54 """Class handling capture of logging messages and their export to butler. 

55 

56 Parameters 

57 ---------- 

58 butler : `~lsst.daf.butler.LimitedButler` 

59 Data butler with limited API. 

60 full_butler : `~lsst.daf.butler.Butler` or `None` 

61 Data butler with full API, or `None` if full Butler is not available. 

62 If not none, then this must be the same instance as ``butler``. 

63 """ 

64 

65 stream_json_logs = True 

66 """If True each log record is written to a temporary file and ingested 

67 when quantum completes. If False the records are accumulated in memory 

68 and stored in butler on quantum completion. If full butler is not available 

69 then temporary file is not used.""" 

70 

71 def __init__( 

72 self, 

73 butler: LimitedButler, 

74 full_butler: Butler | None, 

75 ): 

76 self.butler = butler 

77 self.full_butler = full_butler 

78 

79 @classmethod 

80 def from_limited(cls, butler: LimitedButler) -> LogCapture: 

81 return cls(butler, None) 

82 

83 @classmethod 

84 def from_full(cls, butler: Butler) -> LogCapture: 

85 return cls(butler, butler) 

86 

87 @contextmanager 

88 def capture_logging(self, taskDef: TaskDef, quantum: Quantum) -> Iterator[_LogCaptureFlag]: 

89 """Configure logging system to capture logs for execution of this task. 

90 

91 Parameters 

92 ---------- 

93 taskDef : `lsst.pipe.base.TaskDef` 

94 The task definition. 

95 quantum : `~lsst.daf.butler.Quantum` 

96 Single Quantum instance. 

97 

98 Notes 

99 ----- 

100 Expected to be used as a context manager to ensure that logging 

101 records are inserted into the butler once the quantum has been 

102 executed: 

103 

104 .. code-block:: py 

105 

106 with self.capture_logging(taskDef, quantum): 

107 # Run quantum and capture logs. 

108 

109 Ths method can also setup logging to attach task- or 

110 quantum-specific information to log messages. Potentially this can 

111 take into account some info from task configuration as well. 

112 """ 

113 # include quantum dataId and task label into MDC 

114 mdc = {"LABEL": taskDef.label, "RUN": ""} 

115 if quantum.dataId: 

116 mdc["LABEL"] += f":{quantum.dataId}" 

117 if self.full_butler is not None: 

118 mdc["RUN"] = self.full_butler.run or "" 

119 ctx = _LogCaptureFlag() 

120 

121 # Add a handler to the root logger to capture execution log output. 

122 if taskDef.logOutputDatasetName is not None: 

123 # Either accumulate into ButlerLogRecords or stream JSON records to 

124 # file and ingest that (ingest is possible only with full butler). 

125 if self.stream_json_logs and self.full_butler is not None: 

126 # Create the log file in a temporary directory rather than 

127 # creating a temporary file. This is necessary because 

128 # temporary files are created with restrictive permissions 

129 # and during file ingest these permissions persist in the 

130 # datastore. Using a temp directory allows us to create 

131 # a file with umask default permissions. 

132 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

133 

134 # Construct a file to receive the log records and "touch" it. 

135 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

136 with open(log_file, "w"): 

137 pass 

138 log_handler_file = FileHandler(log_file) 

139 log_handler_file.setFormatter(JsonLogFormatter()) 

140 logging.getLogger().addHandler(log_handler_file) 

141 

142 try: 

143 with ButlerMDC.set_mdc(mdc): 

144 yield ctx 

145 finally: 

146 # Ensure that the logs are stored in butler. 

147 logging.getLogger().removeHandler(log_handler_file) 

148 log_handler_file.close() 

149 if ctx.store: 

150 self._ingest_log_records(quantum, taskDef.logOutputDatasetName, log_file) 

151 shutil.rmtree(tmpdir, ignore_errors=True) 

152 

153 else: 

154 log_handler_memory = ButlerLogRecordHandler() 

155 logging.getLogger().addHandler(log_handler_memory) 

156 

157 try: 

158 with ButlerMDC.set_mdc(mdc): 

159 yield ctx 

160 finally: 

161 # Ensure that the logs are stored in butler. 

162 logging.getLogger().removeHandler(log_handler_memory) 

163 if ctx.store: 

164 self._store_log_records(quantum, taskDef.logOutputDatasetName, log_handler_memory) 

165 log_handler_memory.records.clear() 

166 

167 else: 

168 with ButlerMDC.set_mdc(mdc): 

169 yield ctx 

170 

171 def _store_log_records( 

172 self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler 

173 ) -> None: 

174 # DatasetRef has to be in the Quantum outputs, can lookup by name. 

175 try: 

176 [ref] = quantum.outputs[dataset_type] 

177 except LookupError as exc: 

178 raise InvalidQuantumError( 

179 f"Quantum outputs is missing log output dataset type {dataset_type};" 

180 " this could happen due to inconsistent options between QuantumGraph generation" 

181 " and execution" 

182 ) from exc 

183 

184 self.butler.put(log_handler.records, ref) 

185 

186 def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None: 

187 # If we are logging to an external file we must always try to 

188 # close it. 

189 assert self.full_butler is not None, "Expected to have full butler for ingest" 

190 ingested = False 

191 try: 

192 # DatasetRef has to be in the Quantum outputs, can lookup by name. 

193 try: 

194 [ref] = quantum.outputs[dataset_type] 

195 except LookupError as exc: 

196 raise InvalidQuantumError( 

197 f"Quantum outputs is missing log output dataset type {dataset_type};" 

198 " this could happen due to inconsistent options between QuantumGraph generation" 

199 " and execution" 

200 ) from exc 

201 

202 # Need to ingest this file directly into butler. 

203 dataset = FileDataset(path=filename, refs=ref) 

204 try: 

205 self.full_butler.ingest(dataset, transfer="move") 

206 ingested = True 

207 except NotImplementedError: 

208 # Some datastores can't receive files (e.g. in-memory datastore 

209 # when testing), we store empty list for those just to have a 

210 # dataset. Alternative is to read the file as a 

211 # ButlerLogRecords object and put it. 

212 _LOG.info( 

213 "Log records could not be stored in this butler because the" 

214 " datastore can not ingest files, empty record list is stored instead." 

215 ) 

216 records = ButlerLogRecords.from_records([]) 

217 self.full_butler.put(records, ref) 

218 finally: 

219 # remove file if it is not ingested 

220 if not ingested: 

221 with suppress(OSError): 

222 os.remove(filename)