Coverage for python / lsst / summit / extras / logUtils.py: 13%
128 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-25 09:04 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-25 09:04 +0000
1# This file is part of summit_extras.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import logging
23import math
25import lsst.daf.butler as dafButler
27__all__ = ["LogBrowser"]
29_LOG = logging.getLogger(__name__)
32class LogBrowser:
33 """A convenience class for helping identify different failure modes within
34 a processing collection.
36 Parameters
37 ----------
38 butler : `lsst.daf.butler.Butler`
39 The butler. Must contain the collection to be examined.
40 taskName : `str`
41 The name of the task, e.g. ``isr``, ``characterizeImage``, etc.
42 collection : `str`
43 The processing collection to use.
44 where : `str`, optional
45 A dataId search string formatted appropriately (i.e. similary to a
46 SQL WHERE clause) for a where clause in butler.registry.queryDatasets.
47 E.g.
48 where = ("instrument=\'{}\' AND skymap=\'{}\' AND
49 "visit IN (0..100).format("LATISS", "latiss_v1"))
50 bind : `~collections.abc.Mapping`, optional
51 Mapping containing literal values to be injected into the ``where``
52 expression, keyed by the identifiers they replace (note that the name
53 of the bind key cannot be the same as any butler dimension name).
54 E.g.
55 where = "exposure IN (exposures)"
56 bind = {"exposures": exposure_list}
58 Notes
59 -----
60 Many tasks throw errors with values in them, meaning the ``doFailZoology``
61 function doesn't collapse them down to a single failure case as one would
62 like. If this is the case, take the first part of the message that is
63 common among the ones you would like to be classed together, and add it to
64 the class property ``SPECIAL_ZOO_CASES`` to declare a new type of error
65 animal.
67 example usage:
68 logBrowser = LogBrowser(butler, taskName, collection, where=where,
69 bind=bind)
70 fail = 'TaskError: Fatal astrometry failure detected: mean on-sky distance'
71 logBrowser.SPECIAL_ZOO_CASES.append(fail)
72 logBrowser.doFailZoology()
73 """
75 IGNORE_LOGS_FROM = [
76 # butler.datastores is verbose by default and not interesting to most
77 "lsst.daf.butler.datastores",
78 ]
79 SPECIAL_ZOO_CASES = [
80 "with gufunc signature (n?,k),(k,m?)->(n?,m?)",
81 ]
83 def __init__(
84 self,
85 butler: dafButler.Butler,
86 taskName: str,
87 collection: str,
88 where: str = "",
89 bind: dict | None = None,
90 ):
91 self.taskName = taskName
92 self.collection = collection
93 self.where = where
94 self.bind = bind
96 self.log = _LOG.getChild("logBrowser")
97 self.butler = butler
99 if self.bind is not None:
100 for key in self.bind.keys():
101 if key not in self.where:
102 self.log.warn(
103 f"Key '{key}' in bind is not in the where string provided: "
104 f"'{self.where}', so no binding will take effect."
105 )
107 self.dataRefs = self._getDataRefs()
108 self.logs = self._loadLogs(self.dataRefs)
110 def _getDataRefs(self) -> list[dafButler.DatasetRef]:
111 """Get the dataRefs for the specified task and collection.
113 Returns
114 -------
115 dataRefs : `list` [`lsst.daf.butler.DatasetRef`]
116 """
117 results = self.butler.registry.queryDatasets(
118 f"{self.taskName}_log",
119 collections=self.collection,
120 findFirst=True,
121 where=self.where,
122 bind=self.bind,
123 )
124 results = list(set(results))
125 self.log.info(f"Found {len(results)} datasets in collection for task {self.taskName}")
126 return sorted(results)
128 def _loadLogs(self, dataRefs: list) -> dict[dafButler.DatasetRef, dafButler.ButlerLogRecords]:
129 """Load all the logs for the dataRefs.
131 Returns
132 -------
133 logs : `dict` {`lsst.daf.butler.DatasetRef`:
134 `lsst.daf.butler.ButlerLogRecords`}
135 A dict of all the logs, keyed by their dataRef.
136 """
137 logs = {}
138 for i, dataRef in enumerate(dataRefs):
139 if (i + 1) % 100 == 0:
140 self.log.info(f"Loaded {i + 1} logs...")
141 log = self.butler.get(dataRef)
142 logs[dataRef] = log
143 return logs
145 def getPassingDataIds(self) -> list[dafButler.DataCoordinate]:
146 """Get the dataIds for all passes within the collection for the task.
148 Returns
149 -------
150 dataIds : `list` [`lsst.daf.butler.dimensions.DataCoordinate`]
151 """
152 fails = self._getFailDataRefs()
153 passes = [r.dataId for r in self.dataRefs if r not in fails]
154 return passes
156 def getFailingDataIds(self) -> list[dafButler.DataCoordinate]:
157 """Get the dataIds for all fails within the collection for the task.
159 Returns
160 -------
161 dataIds : `list` [`lsst.daf.butler.dimensions.DataCoordinate`]
162 """
163 fails = self._getFailDataRefs()
164 return [r.dataId for r in fails]
166 def printPasses(self) -> None:
167 """Print out all the passing dataIds."""
168 passes = self.getPassingDataIds()
169 for dataId in passes:
170 print(dataId)
172 def printFails(self) -> None:
173 """Print out all the failing dataIds."""
174 fails = self.getFailingDataIds()
175 for dataId in fails:
176 print(dataId)
178 def countFails(self) -> None:
179 """Print a count of all the failing dataIds."""
180 print(f"{len(self._getFailDataRefs())} failing cases found")
182 def countPasses(self) -> None:
183 """Print a count of all the passing dataIds."""
184 print(f"{len(self.getPassingDataIds())} passing cases found")
186 def _getFailDataRefs(self) -> list[dafButler.DatasetRef]:
187 """Get a list of all the failing dataRefs.
189 Note that these are dataset references to the logs, and as such are
190 not fails themselves, but logs containing the fail messages, and as
191 such the item of interest for the failures are their dataIds. This is
192 why ``_getFailDataRefs()`` is a private method, but getFailingDataIds
193 is the public API.
195 Returns
196 -------
197 logs : `list` [`lsst.daf.butler.DatasetRef`]
198 A list of all the failing dataRefs.
199 """
200 fails = []
201 for dataRef, log in self.logs.items():
202 # dereferencing a log with [] gives the individual lines in it,
203 # each containing a level, message, etc.
204 # the final task failure message always comes in the last line
205 # of the log and contains the string 'failed' as this is the
206 # pipeline executor reporting on success/fail and the time and id.
207 if log[-1].message.find("failed") != -1:
208 fails.append(dataRef)
209 return fails
211 def _printLineIf(self, logLine: dafButler.logging.ButlerLogRecord) -> None:
212 """Print the line if the name of the logger isn't in IGNORE_LOGS_FROM.
214 Parameters
215 ----------
216 logLine : `lsst.daf.butler.logging.ButlerLogRecord`
217 The log line to print the message from.
218 """
219 skip = False
220 for skipTask in self.IGNORE_LOGS_FROM:
221 if logLine.name.find(skipTask) != -1:
222 skip = True
223 break
224 if not skip:
225 self._printFormattedLine(logLine)
227 @staticmethod
228 def _printFormattedLine(logLine: dafButler.logging.ButlerLogRecord) -> None:
229 """Print the line, formatted as it would be for a normal task.
231 Parameters
232 ----------
233 logLine : `lsst.daf.butler.logging.ButlerLogRecord`
234 The log line to print the message from.
235 """
236 print(f"{logLine.levelname} {logLine.name}: {logLine.message}")
238 def printFailLogs(self, full: bool = False) -> None:
239 """Print the logs of all failing task instances.
241 Parameters
242 ----------
243 full : `bool`, optional
244 Prints the full log if true, otherwise just prints the last line
245 containing the exception message. This defaults to False because
246 logs can be very long when printed in full, and printing all in
247 full can be many many thousands of lines.
248 """
249 fails = self._getFailDataRefs()
250 for dataRef in fails:
251 print(f"\n{dataRef.dataId}:")
252 log = self.logs[dataRef]
253 if full: # print the whole thing
254 for line in log:
255 self._printLineIf.print(line)
256 else:
257 # print the last line from the Exception onwards if found,
258 # failing over to printing the whole thing just in case.
259 msg = log[-1].message
260 parts = msg.split("Exception ")
261 if len(parts) == 2:
262 print(parts[1])
263 else:
264 print(msg)
266 def doFailZoology(self, giveExampleId: bool = False) -> None:
267 """Print all the different types of error, with a count for how many of
268 each type occurred.
270 Parameters
271 ----------
272 giveExampleId : `bool`, optional
273 If true, for each type of error seen, print an example dataId. This
274 can be useful if you want to rerun a single image from the command
275 line to debug a particular type of failure mode.
276 """
277 zoo = {}
278 examples = {}
279 fails = self._getFailDataRefs()
280 for dataRef in fails:
281 log = self.logs[dataRef]
282 msg = log[-1].message # log[-1].message is the text of the last line of the log
283 parts = msg.split("Exception ")
284 if len(parts) != 2: # pretty sure all fails contain one and only one 'Exception' but be safe
285 self.log.warning(f"Surprise parsing log for {dataRef.dataId}")
286 continue
287 else:
288 error = parts[1]
289 for error_string in self.SPECIAL_ZOO_CASES:
290 if error.find(error_string) != -1:
291 error = error.split(error_string)[0] + error_string + "..."
292 if error not in zoo:
293 zoo[error] = 1
294 if giveExampleId:
295 examples[error] = dataRef.dataId
296 else:
297 zoo[error] += 1
299 pad = 0 # don't pad when giving examples, it looks weird
300 if not giveExampleId:
301 if zoo.values():
302 maxCount = max([v for v in zoo.values()])
303 pad = math.ceil(math.log10(maxCount)) # number of digits in the largest count
305 for error in sorted(zoo.keys()):
306 count = zoo[error]
307 print(f"{count:{pad}} instance{'s' if count > 1 else ' '} of {error}")
308 if giveExampleId:
309 print(f"example dataId: {examples[error]}\n")
311 def printSingleLog(self, dataId: dict | dafButler.DataCoordinate, full: bool = True) -> None:
312 """Convenience function for printing a single log by its dataId.
314 Useful because you are given example dataIds by `doFailZoology()` but
315 printing all the logs and looking for that id is not practical.
317 Parameters
318 ----------
319 dataId : `dict` or `lsst.daf.butler.dimensions.DataCoordinate`
320 The dataId.
321 full : `bool`, optional
322 Print the log in full, or just the exception?
323 """
324 dRefs = [d for d in self.dataRefs if d.dataId == dataId]
325 if len(dRefs) != 1:
326 raise ValueError(f"Found {len(dRefs)} for {dataId}, expected exactly 1.")
327 dataRef = dRefs[0]
329 print(f"\n{dataRef.dataId}:")
330 log = self.logs[dataRef]
331 if full:
332 for line in log:
333 self._printLineIf(line)
334 else:
335 msg = log[-1].message # log[-1].message is the text of the last line of the log
336 parts = msg.split("Exception ")
337 if len(parts) == 2:
338 print(parts[1])
339 else:
340 print(msg)