Coverage for python/lsst/ctrl/mpexec/log_capture.py: 24%
90 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 09:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-05 09:15 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["LogCapture"]
26import logging
27import os
28import shutil
29import tempfile
30from collections.abc import Iterator
31from contextlib import contextmanager
32from logging import FileHandler
34from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
35from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
36from lsst.pipe.base import InvalidQuantumError, TaskDef
38_LOG = logging.getLogger(__name__)
41class _LogCaptureFlag:
42 """Simple flag to enable/disable log-to-butler saving."""
44 store: bool = True
47class LogCapture:
48 """Class handling capture of logging messages and their export to butler.
50 Parameters
51 ----------
52 butler : `~lsst.daf.butler.LimitedButler`
53 Data butler with limited API.
54 full_butler : `~lsst.daf.butler.Butler` or `None`
55 Data butler with full API, or `None` if full Butler is not available.
56 If not none, then this must be the same instance as ``butler``.
57 """
59 stream_json_logs = True
60 """If True each log record is written to a temporary file and ingested
61 when quantum completes. If False the records are accumulated in memory
62 and stored in butler on quantum completion. If full butler is not available
63 then temporary file is not used."""
65 def __init__(
66 self,
67 butler: LimitedButler,
68 full_butler: Butler | None,
69 ):
70 self.butler = butler
71 self.full_butler = full_butler
73 @classmethod
74 def from_limited(cls, butler: LimitedButler) -> LogCapture:
75 return cls(butler, None)
77 @classmethod
78 def from_full(cls, butler: Butler) -> LogCapture:
79 return cls(butler, butler)
81 @contextmanager
82 def capture_logging(self, taskDef: TaskDef, quantum: Quantum) -> Iterator[_LogCaptureFlag]:
83 """Configure logging system to capture logs for execution of this task.
85 Parameters
86 ----------
87 taskDef : `lsst.pipe.base.TaskDef`
88 The task definition.
89 quantum : `~lsst.daf.butler.Quantum`
90 Single Quantum instance.
92 Notes
93 -----
94 Expected to be used as a context manager to ensure that logging
95 records are inserted into the butler once the quantum has been
96 executed:
98 .. code-block:: py
100 with self.capture_logging(taskDef, quantum):
101 # Run quantum and capture logs.
103 Ths method can also setup logging to attach task- or
104 quantum-specific information to log messages. Potentially this can
105 take into account some info from task configuration as well.
106 """
107 # include quantum dataId and task label into MDC
108 mdc = {"LABEL": taskDef.label, "RUN": ""}
109 if quantum.dataId:
110 mdc["LABEL"] += f":{quantum.dataId}"
111 if self.full_butler is not None:
112 mdc["RUN"] = self.full_butler.run or ""
113 ctx = _LogCaptureFlag()
115 # Add a handler to the root logger to capture execution log output.
116 if taskDef.logOutputDatasetName is not None:
117 # Either accumulate into ButlerLogRecords or stream JSON records to
118 # file and ingest that (ingest is possible only with full butler).
119 if self.stream_json_logs and self.full_butler is not None:
120 # Create the log file in a temporary directory rather than
121 # creating a temporary file. This is necessary because
122 # temporary files are created with restrictive permissions
123 # and during file ingest these permissions persist in the
124 # datastore. Using a temp directory allows us to create
125 # a file with umask default permissions.
126 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
128 # Construct a file to receive the log records and "touch" it.
129 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json")
130 with open(log_file, "w"):
131 pass
132 log_handler_file = FileHandler(log_file)
133 log_handler_file.setFormatter(JsonLogFormatter())
134 logging.getLogger().addHandler(log_handler_file)
136 try:
137 with ButlerMDC.set_mdc(mdc):
138 yield ctx
139 finally:
140 # Ensure that the logs are stored in butler.
141 logging.getLogger().removeHandler(log_handler_file)
142 log_handler_file.close()
143 if ctx.store:
144 self._ingest_log_records(quantum, taskDef.logOutputDatasetName, log_file)
145 shutil.rmtree(tmpdir, ignore_errors=True)
147 else:
148 log_handler_memory = ButlerLogRecordHandler()
149 logging.getLogger().addHandler(log_handler_memory)
151 try:
152 with ButlerMDC.set_mdc(mdc):
153 yield ctx
154 finally:
155 # Ensure that the logs are stored in butler.
156 logging.getLogger().removeHandler(log_handler_memory)
157 if ctx.store:
158 self._store_log_records(quantum, taskDef.logOutputDatasetName, log_handler_memory)
159 log_handler_memory.records.clear()
161 else:
162 with ButlerMDC.set_mdc(mdc):
163 yield ctx
165 def _store_log_records(
166 self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler
167 ) -> None:
168 # DatasetRef has to be in the Quantum outputs, can lookup by name.
169 try:
170 [ref] = quantum.outputs[dataset_type]
171 except LookupError as exc:
172 raise InvalidQuantumError(
173 f"Quantum outputs is missing log output dataset type {dataset_type};"
174 " this could happen due to inconsistent options between QuantumGraph generation"
175 " and execution"
176 ) from exc
178 self.butler.put(log_handler.records, ref)
180 def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
181 # If we are logging to an external file we must always try to
182 # close it.
183 assert self.full_butler is not None, "Expected to have full butler for ingest"
184 ingested = False
185 try:
186 # DatasetRef has to be in the Quantum outputs, can lookup by name.
187 try:
188 [ref] = quantum.outputs[dataset_type]
189 except LookupError as exc:
190 raise InvalidQuantumError(
191 f"Quantum outputs is missing log output dataset type {dataset_type};"
192 " this could happen due to inconsistent options between QuantumGraph generation"
193 " and execution"
194 ) from exc
196 # Need to ingest this file directly into butler.
197 dataset = FileDataset(path=filename, refs=ref)
198 try:
199 self.full_butler.ingest(dataset, transfer="move")
200 ingested = True
201 except NotImplementedError:
202 # Some datastores can't receive files (e.g. in-memory datastore
203 # when testing), we store empty list for those just to have a
204 # dataset. Alternative is to read the file as a
205 # ButlerLogRecords object and put it.
206 _LOG.info(
207 "Log records could not be stored in this butler because the"
208 " datastore can not ingest files, empty record list is stored instead."
209 )
210 records = ButlerLogRecords.from_records([])
211 self.full_butler.put(records, ref)
212 finally:
213 # remove file if it is not ingested
214 if not ingested:
215 try:
216 os.remove(filename)
217 except OSError:
218 pass