Coverage for python/lsst/pipe/base/execution_reports.py: 27%
133 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 09:56 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-18 09:56 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "QuantumGraphExecutionReport",
25 "TaskExecutionReport",
26 "DatasetTypeExecutionReport",
27 "lookup_quantum_data_id",
28)
30import dataclasses
31import itertools
32import logging
33import uuid
34from collections.abc import Iterable, Mapping
35from typing import Any
37import networkx
38import yaml
39from lsst.daf.butler import Butler, DataCoordinate, DatasetRef, Quantum
40from lsst.resources import ResourcePathExpression
42from .graph import QuantumGraph
45@dataclasses.dataclass
46class DatasetTypeExecutionReport:
47 """A report on the number of produced datasets as well as the status of
48 missing datasets based on metadata.
50 A `DatasetTypeExecutionReport` is created for each
51 `~lsst.daf.butler.DatasetType` in a `TaskExecutionReport`.
52 """
54 failed: set[DatasetRef] = dataclasses.field(default_factory=set)
55 """Datasets not produced because their quanta failed directly in this
56 run (`set`).
57 """
59 not_produced: set[DatasetRef] = dataclasses.field(default_factory=set)
60 """Missing datasets which were not produced by successful quanta.
61 """
63 blocked: set[DatasetRef] = dataclasses.field(default_factory=set)
64 """Datasets not produced due to an upstream failure (`set`).
65 """
67 n_produced: int = 0
68 """Count of datasets produced (`int`).
69 """
71 def to_summary_dict(self) -> dict[str, Any]:
72 r"""Summarize the DatasetTypeExecutionReport in a dictionary.
74 Returns
75 -------
76 summary_dict : `dict`
77 A count of the datasets with each outcome; the number of
78 produced, ``failed``, ``not_produced``, and ``blocked``
79 `~lsst.daf.butler.DatasetType`\ s.
80 See above for attribute descriptions.
81 """
82 return {
83 "produced": self.n_produced,
84 "failed": len(self.failed),
85 "not_produced": len(self.not_produced),
86 "blocked": len(self.blocked),
87 }
90@dataclasses.dataclass
91class TaskExecutionReport:
92 """A report on the status and content of a task in an executed quantum
93 graph.
95 Use task metadata to identify and inspect failures and report on output
96 datasets.
98 See Also
99 --------
100 QuantumGraphExecutionReport : Quantum graph report.
101 DatasetTypeExecutionReport : DatasetType report.
102 """
104 failed: dict[uuid.UUID, DatasetRef] = dataclasses.field(default_factory=dict)
105 """A mapping from quantum data ID to log dataset reference for quanta that
106 failed directly in this run (`dict`).
107 """
109 n_succeeded: int = 0
110 """A count of successful quanta.
112 This may include quanta that did not produce any datasets; ie, raised
113 `NoWorkFound`.
114 """
116 blocked: dict[uuid.UUID, DataCoordinate] = dataclasses.field(default_factory=dict)
117 """A mapping of data IDs of quanta that were not attempted due to an
118 upstream failure (`dict`).
119 """
121 output_datasets: dict[str, DatasetTypeExecutionReport] = dataclasses.field(default_factory=dict)
122 """Missing and produced outputs of each `~lsst.daf.butler.DatasetType`
123 (`dict`).
124 """
126 def inspect_quantum(
127 self,
128 quantum_id: uuid.UUID,
129 quantum: Quantum,
130 status_graph: networkx.DiGraph,
131 refs: Mapping[str, Mapping[uuid.UUID, DatasetRef]],
132 metadata_name: str,
133 log_name: str,
134 ) -> None:
135 """Inspect a quantum of a quantum graph and ascertain the status of
136 each associated data product.
138 Parameters
139 ----------
140 quantum_id : `uuid.UUID`
141 Unique identifier for the quantum to inspect.
142 quantum : `Quantum`
143 The specific node of the quantum graph to be inspected.
144 status_graph : `networkx.DiGraph`
145 The quantum graph produced by
146 `QuantumGraphExecutionReport.make_reports` which steps through the
147 quantum graph of a run and logs the status of each quantum.
148 refs : `~collections.abc.Mapping` [ `str`,\
149 `~collections.abc.Mapping` [ `uuid.UUID`,\
150 `~lsst.daf.butler.DatasetRef` ] ]
151 The DatasetRefs of each of the DatasetTypes produced by the task.
152 Includes initialization, intermediate and output data products.
153 metadata_name : `str`
154 The metadata dataset name for the node.
155 log_name : `str`
156 The name of the log files for the node.
158 See Also
159 --------
160 QuantumGraphExecutionReport.make_reports : Make reports.
161 """
162 (metadata_ref,) = quantum.outputs[metadata_name]
163 (log_ref,) = quantum.outputs[log_name]
164 blocked = False
165 if metadata_ref.id not in refs[metadata_name]:
166 if any(
167 status_graph.nodes[upstream_quantum_id]["failed"]
168 for upstream_dataset_id in status_graph.predecessors(quantum_id)
169 for upstream_quantum_id in status_graph.predecessors(upstream_dataset_id)
170 ):
171 assert quantum.dataId is not None
172 self.blocked[quantum_id] = quantum.dataId
173 blocked = True
174 else:
175 self.failed[quantum_id] = log_ref
176 # note: log_ref may or may not actually exist
177 failed = True
178 else:
179 failed = False
180 self.n_succeeded += 1
181 status_graph.nodes[quantum_id]["failed"] = failed
183 # Now, loop over the datasets to make a DatasetTypeExecutionReport.
184 for output_ref in itertools.chain.from_iterable(quantum.outputs.values()):
185 if output_ref == metadata_ref or output_ref == log_ref:
186 continue
187 if (dataset_type_report := self.output_datasets.get(output_ref.datasetType.name)) is None:
188 dataset_type_report = DatasetTypeExecutionReport()
189 self.output_datasets[output_ref.datasetType.name] = dataset_type_report
190 if output_ref.id not in refs[output_ref.datasetType.name]:
191 if failed:
192 if blocked:
193 dataset_type_report.blocked.add(output_ref)
194 else:
195 dataset_type_report.failed.add(output_ref)
196 else:
197 dataset_type_report.not_produced.add(output_ref)
198 else:
199 dataset_type_report.n_produced += 1
201 def to_summary_dict(
202 self, butler: Butler, do_store_logs: bool = True, human_readable: bool = False
203 ) -> dict[str, Any]:
204 """Summarize the results of the TaskExecutionReport in a dictionary.
206 Parameters
207 ----------
208 butler : `lsst.daf.butler.Butler`
209 The Butler used for this report.
210 do_store_logs : `bool`
211 Store the logs in the summary dictionary.
212 human_readable : `bool`
213 Store more human-readable information to be printed out to the
214 command-line.
216 Returns
217 -------
218 summary_dict : `dict`
219 A dictionary containing:
221 - outputs: A dictionary summarizing the
222 DatasetTypeExecutionReport for each DatasetType associated with
223 the task
224 - failed_quanta: A dictionary of quanta which failed and their
225 dataIDs by quantum graph node id
226 - n_quanta_blocked: The number of quanta which failed due to
227 upstream failures.
228 - n_succeded: The number of quanta which succeeded.
230 And possibly, if human-readable is passed:
232 - errors: A dictionary of data ids associated with each error
233 message. If `human-readable` and `do_store_logs`, this is stored
234 here. Otherwise, if `do_store_logs`, it is stored in
235 `failed_quanta` keyed by the quantum graph node id.
236 """
237 failed_quanta = {}
238 failed_data_ids = []
239 errors = []
240 for node_id, log_ref in self.failed.items():
241 data_id = dict(log_ref.dataId.required)
242 quantum_info: dict[str, Any] = {"data_id": data_id}
243 if do_store_logs:
244 try:
245 log = butler.get(log_ref)
246 except LookupError:
247 quantum_info["error"] = []
248 except FileNotFoundError:
249 quantum_info["error"] = None
250 else:
251 quantum_info["error"] = [
252 record.message for record in log if record.levelno >= logging.ERROR
253 ]
254 if human_readable:
255 failed_data_ids.append(data_id)
256 if do_store_logs:
257 errors.append(quantum_info)
259 else:
260 failed_quanta[str(node_id)] = quantum_info
261 result = {
262 "outputs": {name: r.to_summary_dict() for name, r in self.output_datasets.items()},
263 "n_quanta_blocked": len(self.blocked),
264 "n_succeeded": self.n_succeeded,
265 }
266 if human_readable:
267 result["failed_quanta"] = failed_data_ids
268 result["errors"] = errors
269 else:
270 result["failed_quanta"] = failed_quanta
271 return result
273 def __str__(self) -> str:
274 """Return a count of the failed and blocked tasks in the
275 TaskExecutionReport.
276 """
277 return f"failed: {len(self.failed)}\nblocked: {len(self.blocked)}\n"
280@dataclasses.dataclass
281class QuantumGraphExecutionReport:
282 """A report on the execution of a quantum graph.
284 Report the detailed status of each failure; whether tasks were not run,
285 data is missing from upstream failures, or specific errors occurred during
286 task execution (and report the errors). Contains a count of expected,
287 produced DatasetTypes for each task. This report can be output as a
288 dictionary or a yaml file.
290 Attributes
291 ----------
292 tasks : `dict`
293 A dictionary of TaskExecutionReports by task label.
295 See Also
296 --------
297 TaskExecutionReport : A task report.
298 DatasetTypeExecutionReport : A dataset type report.
299 """
301 tasks: dict[str, TaskExecutionReport] = dataclasses.field(default_factory=dict)
302 """A dictionary of TaskExecutionReports by task label (`dict`)."""
304 def to_summary_dict(
305 self, butler: Butler, do_store_logs: bool = True, human_readable: bool = False
306 ) -> dict[str, Any]:
307 """Summarize the results of the `QuantumGraphExecutionReport` in a
308 dictionary.
310 Parameters
311 ----------
312 butler : `lsst.daf.butler.Butler`
313 The Butler used for this report.
314 do_store_logs : `bool`
315 Store the logs in the summary dictionary.
316 human_readable : `bool`
317 Store more human-readable information to be printed out to the
318 command-line.
320 Returns
321 -------
322 summary_dict : `dict`
323 A dictionary containing a summary of a `TaskExecutionReport` for
324 each task in the quantum graph.
325 """
326 return {
327 task: report.to_summary_dict(butler, do_store_logs=do_store_logs, human_readable=human_readable)
328 for task, report in self.tasks.items()
329 }
331 def write_summary_yaml(self, butler: Butler, filename: str, do_store_logs: bool = True) -> None:
332 """Take the dictionary from
333 `QuantumGraphExecutionReport.to_summary_dict` and store its contents in
334 a yaml file.
336 Parameters
337 ----------
338 butler : `lsst.daf.butler.Butler`
339 The Butler used for this report.
340 filename : `str`
341 The name to be used for the summary yaml file.
342 do_store_logs : `bool`
343 Store the logs in the summary dictionary.
344 """
345 with open(filename, "w") as stream:
346 yaml.safe_dump(self.to_summary_dict(butler, do_store_logs=do_store_logs), stream)
348 @classmethod
349 def make_reports(
350 cls,
351 butler: Butler,
352 graph: QuantumGraph | ResourcePathExpression,
353 ) -> QuantumGraphExecutionReport:
354 """Make a `QuantumGraphExecutionReport`.
356 Step through the quantum graph associated with a run, creating a
357 `networkx.DiGraph` called status_graph to annotate the status of each
358 quantum node. For each task in the quantum graph, use
359 `TaskExecutionReport.inspect_quantum` to make a `TaskExecutionReport`
360 based on the status of each node. Return a `TaskExecutionReport` for
361 each task in the quantum graph.
363 Parameters
364 ----------
365 butler : `lsst.daf.butler.Butler`
366 The Butler used for this report. This should match the Butler used
367 for the run associated with the executed quantum graph.
368 graph : `QuantumGraph` | `ResourcePathExpression`
369 Either the associated quantum graph object or the uri of the
370 location of said quantum graph.
372 Returns
373 -------
374 report: `QuantumGraphExecutionReport`
375 The `TaskExecutionReport` for each task in the quantum graph.
376 """
377 refs = {} # type: dict[str, Any]
378 status_graph = networkx.DiGraph()
379 if not isinstance(graph, QuantumGraph):
380 qg = QuantumGraph.loadUri(graph)
381 else:
382 qg = graph
383 assert qg.metadata is not None, "Saved QGs always have metadata."
384 collection = qg.metadata["output_run"]
385 report = cls()
386 for dataset_type_node in qg.pipeline_graph.dataset_types.values():
387 if qg.pipeline_graph.producer_of(dataset_type_node.name) is None:
388 continue
389 refs[dataset_type_node.name] = {
390 ref.id: ref
391 for ref in butler.registry.queryDatasets(
392 dataset_type_node.name, collections=collection, findFirst=False
393 )
394 }
395 for task_node in qg.pipeline_graph.tasks.values():
396 for quantum_id, quantum in qg.get_task_quanta(task_node.label).items():
397 status_graph.add_node(quantum_id)
398 for ref in itertools.chain.from_iterable(quantum.outputs.values()):
399 status_graph.add_edge(quantum_id, ref.id)
400 for ref in itertools.chain.from_iterable(quantum.inputs.values()):
401 status_graph.add_edge(ref.id, quantum_id)
403 for task_node in qg.pipeline_graph.tasks.values():
404 task_report = TaskExecutionReport()
405 if task_node.log_output is None:
406 raise RuntimeError("QG must have log outputs to use execution reports.")
407 for quantum_id, quantum in qg.get_task_quanta(task_node.label).items():
408 task_report.inspect_quantum(
409 quantum_id,
410 quantum,
411 status_graph,
412 refs,
413 metadata_name=task_node.metadata_output.dataset_type_name,
414 log_name=task_node.log_output.dataset_type_name,
415 )
416 report.tasks[task_node.label] = task_report
417 return report
419 def __str__(self) -> str:
420 return "\n".join(f"{tasklabel}:{report}" for tasklabel, report in self.tasks.items())
423def lookup_quantum_data_id(
424 graph_uri: ResourcePathExpression, nodes: Iterable[uuid.UUID]
425) -> list[DataCoordinate | None]:
426 """Look up a dataId from a quantum graph and a list of quantum graph
427 nodeIDs.
429 Parameters
430 ----------
431 graph_uri : `ResourcePathExpression`
432 URI of the quantum graph of the run.
433 nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
434 Quantum graph nodeID.
436 Returns
437 -------
438 data_ids : `list` [ `lsst.daf.butler.DataCoordinate` ]
439 A list of human-readable dataIDs which map to the nodeIDs on the
440 quantum graph at graph_uri.
441 """
442 qg = QuantumGraph.loadUri(graph_uri, nodes=nodes)
443 return [qg.getQuantumNodeByNodeId(node).quantum.dataId for node in nodes]