Coverage for python/lsst/ctrl/mpexec/log_capture.py: 28%
95 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:28 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:28 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["LogCapture"]
32import logging
33import os
34import shutil
35import tempfile
36import warnings
37from collections.abc import Iterator
38from contextlib import contextmanager, suppress
39from logging import FileHandler
41from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
42from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
43from lsst.pipe.base import InvalidQuantumError, TaskDef
44from lsst.pipe.base.pipeline_graph import TaskNode
45from lsst.utils.introspection import find_outside_stacklevel
47_LOG = logging.getLogger(__name__)
50class _LogCaptureFlag:
51 """Simple flag to enable/disable log-to-butler saving."""
53 store: bool = True
56class LogCapture:
57 """Class handling capture of logging messages and their export to butler.
59 Parameters
60 ----------
61 butler : `~lsst.daf.butler.LimitedButler`
62 Data butler with limited API.
63 full_butler : `~lsst.daf.butler.Butler` or `None`
64 Data butler with full API, or `None` if full Butler is not available.
65 If not none, then this must be the same instance as ``butler``.
66 """
68 stream_json_logs = True
69 """If True each log record is written to a temporary file and ingested
70 when quantum completes. If False the records are accumulated in memory
71 and stored in butler on quantum completion. If full butler is not available
72 then temporary file is not used."""
74 def __init__(
75 self,
76 butler: LimitedButler,
77 full_butler: Butler | None,
78 ):
79 self.butler = butler
80 self.full_butler = full_butler
82 @classmethod
83 def from_limited(cls, butler: LimitedButler) -> LogCapture:
84 return cls(butler, None)
86 @classmethod
87 def from_full(cls, butler: Butler) -> LogCapture:
88 return cls(butler, butler)
90 @contextmanager
91 def capture_logging(
92 self, task_node: TaskDef | TaskNode, /, quantum: Quantum
93 ) -> Iterator[_LogCaptureFlag]:
94 """Configure logging system to capture logs for execution of this task.
96 Parameters
97 ----------
98 task_node : `lsst.pipe.base.TaskDef` or \
99 `~lsst.pipe.base.pipeline_graph.TaskNode`
100 The task definition. Support for `~lsst.pipe.base.TaskDef` is
101 deprecated and will be removed after v27.
102 quantum : `~lsst.daf.butler.Quantum`
103 Single Quantum instance.
105 Notes
106 -----
107 Expected to be used as a context manager to ensure that logging
108 records are inserted into the butler once the quantum has been
109 executed:
111 .. code-block:: py
113 with self.capture_logging(task_node, quantum):
114 # Run quantum and capture logs.
116 Ths method can also setup logging to attach task- or
117 quantum-specific information to log messages. Potentially this can
118 take into account some info from task configuration as well.
119 """
120 # include quantum dataId and task label into MDC
121 mdc = {"LABEL": task_node.label, "RUN": ""}
122 if quantum.dataId:
123 mdc["LABEL"] += f":{quantum.dataId}"
124 if self.full_butler is not None:
125 mdc["RUN"] = self.full_butler.run or ""
126 ctx = _LogCaptureFlag()
128 if isinstance(task_node, TaskDef):
129 # TODO: remove this block and associated docs and annotations on
130 # DM-40443.
131 log_dataset_name = task_node.logOutputDatasetName
132 warnings.warn(
133 "Passing TaskDef instances to LogCapture is deprecated and will not be supported after v27.",
134 FutureWarning,
135 find_outside_stacklevel("lsst.ctrl.mpexec"),
136 )
137 else:
138 log_dataset_name = (
139 task_node.log_output.dataset_type_name if task_node.log_output is not None else None
140 )
142 # Add a handler to the root logger to capture execution log output.
143 if log_dataset_name is not None:
144 # Either accumulate into ButlerLogRecords or stream JSON records to
145 # file and ingest that (ingest is possible only with full butler).
146 if self.stream_json_logs and self.full_butler is not None:
147 # Create the log file in a temporary directory rather than
148 # creating a temporary file. This is necessary because
149 # temporary files are created with restrictive permissions
150 # and during file ingest these permissions persist in the
151 # datastore. Using a temp directory allows us to create
152 # a file with umask default permissions.
153 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
155 # Construct a file to receive the log records and "touch" it.
156 log_file = os.path.join(tmpdir, f"butler-log-{task_node.label}.json")
157 with open(log_file, "w"):
158 pass
159 log_handler_file = FileHandler(log_file)
160 log_handler_file.setFormatter(JsonLogFormatter())
161 logging.getLogger().addHandler(log_handler_file)
163 try:
164 with ButlerMDC.set_mdc(mdc):
165 yield ctx
166 finally:
167 # Ensure that the logs are stored in butler.
168 logging.getLogger().removeHandler(log_handler_file)
169 log_handler_file.close()
170 if ctx.store:
171 self._ingest_log_records(quantum, log_dataset_name, log_file)
172 shutil.rmtree(tmpdir, ignore_errors=True)
174 else:
175 log_handler_memory = ButlerLogRecordHandler()
176 logging.getLogger().addHandler(log_handler_memory)
178 try:
179 with ButlerMDC.set_mdc(mdc):
180 yield ctx
181 finally:
182 # Ensure that the logs are stored in butler.
183 logging.getLogger().removeHandler(log_handler_memory)
184 if ctx.store:
185 self._store_log_records(quantum, log_dataset_name, log_handler_memory)
186 log_handler_memory.records.clear()
188 else:
189 with ButlerMDC.set_mdc(mdc):
190 yield ctx
192 def _store_log_records(
193 self, quantum: Quantum, dataset_type: str, log_handler: ButlerLogRecordHandler
194 ) -> None:
195 # DatasetRef has to be in the Quantum outputs, can lookup by name.
196 try:
197 [ref] = quantum.outputs[dataset_type]
198 except LookupError as exc:
199 raise InvalidQuantumError(
200 f"Quantum outputs is missing log output dataset type {dataset_type};"
201 " this could happen due to inconsistent options between QuantumGraph generation"
202 " and execution"
203 ) from exc
205 self.butler.put(log_handler.records, ref)
207 def _ingest_log_records(self, quantum: Quantum, dataset_type: str, filename: str) -> None:
208 # If we are logging to an external file we must always try to
209 # close it.
210 assert self.full_butler is not None, "Expected to have full butler for ingest"
211 ingested = False
212 try:
213 # DatasetRef has to be in the Quantum outputs, can lookup by name.
214 try:
215 [ref] = quantum.outputs[dataset_type]
216 except LookupError as exc:
217 raise InvalidQuantumError(
218 f"Quantum outputs is missing log output dataset type {dataset_type};"
219 " this could happen due to inconsistent options between QuantumGraph generation"
220 " and execution"
221 ) from exc
223 # Need to ingest this file directly into butler.
224 dataset = FileDataset(path=filename, refs=ref)
225 try:
226 self.full_butler.ingest(dataset, transfer="move")
227 ingested = True
228 except NotImplementedError:
229 # Some datastores can't receive files (e.g. in-memory datastore
230 # when testing), we store empty list for those just to have a
231 # dataset. Alternative is to read the file as a
232 # ButlerLogRecords object and put it.
233 _LOG.info(
234 "Log records could not be stored in this butler because the"
235 " datastore can not ingest files, empty record list is stored instead."
236 )
237 records = ButlerLogRecords.from_records([])
238 self.full_butler.put(records, ref)
239 finally:
240 # remove file if it is not ingested
241 if not ingested:
242 with suppress(OSError):
243 os.remove(filename)