Coverage for python/lsst/ctrl/mpexec/log_capture.py: 28%
88 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:56 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:56 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["LogCapture"]
32import logging
33import os
34import shutil
35import tempfile
36from collections.abc import Iterator
37from contextlib import contextmanager, suppress
38from logging import FileHandler
40from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
41from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
42from lsst.pipe.base import InvalidQuantumError, TaskDef
44_LOG = logging.getLogger(__name__)
47class _LogCaptureFlag:
48 """Simple flag to enable/disable log-to-butler saving."""
50 store: bool = True
53class LogCapture:
54 """Class handling capture of logging messages and their export to butler.
56 Parameters
57 ----------
58 butler : `~lsst.daf.butler.LimitedButler`
59 Data butler with limited API.
60 full_butler : `~lsst.daf.butler.Butler` or `None`
61 Data butler with full API, or `None` if full Butler is not available.
62 If not none, then this must be the same instance as ``butler``.
63 """
65 stream_json_logs = True
66 """If True each log record is written to a temporary file and ingested
67 when quantum completes. If False the records are accumulated in memory
68 and stored in butler on quantum completion. If full butler is not available
69 then temporary file is not used."""
71 def __init__(
72 self,
73 butler: LimitedButler,
74 full_butler: Butler | None,
75 ):
76 self.butler = butler
77 self.full_butler = full_butler
79 @classmethod
80 def from_limited(cls, butler: LimitedButler) -> LogCapture:
81 return cls(butler, None)
83 @classmethod
84 def from_full(cls, butler: Butler) -> LogCapture:
85 return cls(butler, butler)
87 @contextmanager
88 def capture_logging(self, taskDef: TaskDef, quantum: Quantum) -> Iterator[_LogCaptureFlag]:
89 """Configure logging system to capture logs for execution of this task.
91 Parameters
92 ----------
93 taskDef : `lsst.pipe.base.TaskDef`
94 The task definition.
95 quantum : `~lsst.daf.butler.Quantum`
96 Single Quantum instance.
98 Notes
99 -----
100 Expected to be used as a context manager to ensure that logging
101 records are inserted into the butler once the quantum has been
102 executed:
104 .. code-block:: py
106 with self.capture_logging(taskDef, quantum):
107 # Run quantum and capture logs.
109 Ths method can also setup logging to attach task- or
110 quantum-specific information to log messages. Potentially this can
111 take into account some info from task configuration as well.
112 """
113 # include quantum dataId and task label into MDC
114 mdc = {"LABEL": taskDef.label, "RUN": ""}
115 if quantum.dataId:
116 mdc["LABEL"] += f":{quantum.dataId}"
117 if self.full_butler is not None:
118 mdc["RUN"] = self.full_butler.run or ""
119 ctx = _LogCaptureFlag()
121 # Add a handler to the root logger to capture execution log output.
122 if taskDef.logOutputDatasetName is not None:
123 # Either accumulate into ButlerLogRecords or stream JSON records to
124 # file and ingest that (ingest is possible only with full butler).
125 if self.stream_json_logs and self.full_butler is not None:
126 # Create the log file in a temporary directory rather than
127 # creating a temporary file. This is necessary because
128 # temporary files are created with restrictive permissions
129 # and during file ingest these permissions persist in the
130 # datastore. Using a temp directory allows us to create
131 # a file with umask default permissions.
132 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
134 # Construct a file to receive the log records and "touch" it.
135 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json")
136 with open(log_file, "w"):
137 pass
138 log_handler_file = FileHandler(log_file)
139 log_handler_file.setFormatter(JsonLogFormatter())
140 logging.getLogger().addHandler(log_handler_file)
142 try:
143 with ButlerMDC.set_mdc(mdc):
144 yield ctx
145 finally:
146 # Ensure that the logs are stored in butler.
147 logging.getLogger().removeHandler(log_handler_file)
148 log_handler_file.close()
149 if ctx.store:
150 self._ingest_log_records(quantum, taskDef.logOutputDatasetName, log_file)
151 shutil.rmtree(tmpdir, ignore_errors=True)
153 else:
154 log_handler_memory = ButlerLogRecordHandler()
155 logging.getLogger().addHandler(log_handler_memory)
157 try:
158 with ButlerMDC.set_mdc(mdc):
159 yield ctx
160 finally:
161 # Ensure that the logs are stored in butler.
162 logging.getLogger().removeHandler(log_handler_memory)
163 if ctx.store:
164 self._store_log_records(quantum, taskDef.logOutputDatasetName, log_handler_memory)
165 log_handler_memory.records.clear()
167 else:
168 with ButlerMDC.set_mdc(mdc):
169 yield ctx
171 def _store_log_records(
172 self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler
173 ) -> None:
174 # DatasetRef has to be in the Quantum outputs, can lookup by name.
175 try:
176 [ref] = quantum.outputs[dataset_type]
177 except LookupError as exc:
178 raise InvalidQuantumError(
179 f"Quantum outputs is missing log output dataset type {dataset_type};"
180 " this could happen due to inconsistent options between QuantumGraph generation"
181 " and execution"
182 ) from exc
184 self.butler.put(log_handler.records, ref)
186 def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
187 # If we are logging to an external file we must always try to
188 # close it.
189 assert self.full_butler is not None, "Expected to have full butler for ingest"
190 ingested = False
191 try:
192 # DatasetRef has to be in the Quantum outputs, can lookup by name.
193 try:
194 [ref] = quantum.outputs[dataset_type]
195 except LookupError as exc:
196 raise InvalidQuantumError(
197 f"Quantum outputs is missing log output dataset type {dataset_type};"
198 " this could happen due to inconsistent options between QuantumGraph generation"
199 " and execution"
200 ) from exc
202 # Need to ingest this file directly into butler.
203 dataset = FileDataset(path=filename, refs=ref)
204 try:
205 self.full_butler.ingest(dataset, transfer="move")
206 ingested = True
207 except NotImplementedError:
208 # Some datastores can't receive files (e.g. in-memory datastore
209 # when testing), we store empty list for those just to have a
210 # dataset. Alternative is to read the file as a
211 # ButlerLogRecords object and put it.
212 _LOG.info(
213 "Log records could not be stored in this butler because the"
214 " datastore can not ingest files, empty record list is stored instead."
215 )
216 records = ButlerLogRecords.from_records([])
217 self.full_butler.put(records, ref)
218 finally:
219 # remove file if it is not ingested
220 if not ingested:
221 with suppress(OSError):
222 os.remove(filename)