Coverage for python/lsst/pipe/base/execution_reports.py: 30%
123 statements
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-21 10:57 +0000
« prev ^ index » next coverage.py v7.4.2, created at 2024-02-21 10:57 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "QuantumGraphExecutionReport",
25 "TaskExecutionReport",
26 "DatasetTypeExecutionReport",
27 "lookup_quantum_data_id",
28)
30import dataclasses
31import itertools
32import logging
33import uuid
34from collections.abc import Iterable, Mapping
35from typing import Any
37import networkx
38import yaml
39from lsst.daf.butler import Butler, DataCoordinate, DatasetRef
40from lsst.resources import ResourcePathExpression
42from .graph import QuantumGraph, QuantumNode
43from .pipeline import PipelineDatasetTypes
46@dataclasses.dataclass
47class DatasetTypeExecutionReport:
48 """A report on the number of produced datasets as well as the status of
49 missing datasets based on metadata.
51 A `DatasetTypeExecutionReport` is created for each
52 `~lsst.daf.butler.DatasetType` in a `TaskExecutionReport`.
53 """
55 failed: set[DatasetRef] = dataclasses.field(default_factory=set)
56 """Datasets not produced because their quanta failed directly in this
57 run (`set`).
58 """
60 not_produced: set[DatasetRef] = dataclasses.field(default_factory=set)
61 """Missing datasets which were not produced by successful quanta.
62 """
64 blocked: set[DatasetRef] = dataclasses.field(default_factory=set)
65 """Datasets not produced due to an upstream failure (`set`).
66 """
68 n_produced: int = 0
69 """Count of datasets produced (`int`).
70 """
72 def to_summary_dict(self) -> dict[str, Any]:
73 r"""Summarize the DatasetTypeExecutionReport in a dictionary.
75 Returns
76 -------
77 summary_dict : `dict`
78 A count of the datasets with each outcome; the number of
79 produced, ``failed``, ``not_produced``, and ``blocked``
80 `~lsst.daf.butler.DatasetType`\ s.
81 See above for attribute descriptions.
82 """
83 return {
84 "produced": self.n_produced,
85 "failed": len(self.failed),
86 "not_produced": len(self.not_produced),
87 "blocked": len(self.blocked),
88 }
91@dataclasses.dataclass
92class TaskExecutionReport:
93 """A report on the status and content of a task in an executed quantum
94 graph.
96 Use task metadata to identify and inspect failures and report on output
97 datasets.
99 See Also
100 --------
101 QuantumGraphExecutionReport : Quantum graph report.
102 DatasetTypeExecutionReport : DatasetType report.
103 """
105 failed: dict[uuid.UUID, DatasetRef] = dataclasses.field(default_factory=dict)
106 """A mapping from quantum data ID to log dataset reference for quanta that
107 failed directly in this run (`dict`).
108 """
110 n_succeeded: int = 0
111 """A count of successful quanta.
113 This may include quanta that did not produce any datasets; ie, raised
114 `NoWorkFound`.
115 """
117 blocked: dict[uuid.UUID, DataCoordinate] = dataclasses.field(default_factory=dict)
118 """A mapping of data IDs of quanta that were not attempted due to an
119 upstream failure (`dict`).
120 """
122 output_datasets: dict[str, DatasetTypeExecutionReport] = dataclasses.field(default_factory=dict)
123 """Missing and produced outputs of each `~lsst.daf.butler.DatasetType`
124 (`dict`).
125 """
127 def inspect_quantum(
128 self,
129 quantum_node: QuantumNode,
130 status_graph: networkx.DiGraph,
131 refs: Mapping[str, Mapping[uuid.UUID, DatasetRef]],
132 metadata_name: str,
133 log_name: str,
134 ) -> None:
135 """Inspect a quantum of a quantum graph and ascertain the status of
136 each associated data product.
138 Parameters
139 ----------
140 quantum_node : `QuantumNode`
141 The specific node of the quantum graph to be inspected.
142 status_graph : `networkx.DiGraph`
143 The quantum graph produced by
144 `QuantumGraphExecutionReport.make_reports` which steps through the
145 quantum graph of a run and logs the status of each quantum.
146 refs : `~collections.abc.Mapping` [ `str`,\
147 `~collections.abc.Mapping` [ `uuid.UUID`,\
148 `~lsst.daf.butler.DatasetRef` ] ]
149 The DatasetRefs of each of the DatasetTypes produced by the task.
150 Includes initialization, intermediate and output data products.
151 metadata_name : `str`
152 The metadata dataset name for the node.
153 log_name : `str`
154 The name of the log files for the node.
156 See Also
157 --------
158 QuantumGraphExecutionReport.make_reports : Make reports.
159 """
160 quantum = quantum_node.quantum
161 (metadata_ref,) = quantum.outputs[metadata_name]
162 (log_ref,) = quantum.outputs[log_name]
163 blocked = False
164 if metadata_ref.id not in refs[metadata_name]:
165 if any(
166 status_graph.nodes[upstream_quantum_id]["failed"]
167 for upstream_dataset_id in status_graph.predecessors(quantum_node.nodeId)
168 for upstream_quantum_id in status_graph.predecessors(upstream_dataset_id)
169 ):
170 assert quantum.dataId is not None
171 self.blocked[quantum_node.nodeId] = quantum.dataId
172 blocked = True
173 else:
174 self.failed[quantum_node.nodeId] = log_ref
175 # note: log_ref may or may not actually exist
176 failed = True
177 else:
178 failed = False
179 self.n_succeeded += 1
180 status_graph.nodes[quantum_node.nodeId]["failed"] = failed
182 # Now, loop over the datasets to make a DatasetTypeExecutionReport.
183 for output_ref in itertools.chain.from_iterable(quantum.outputs.values()):
184 if output_ref == metadata_ref or output_ref == log_ref:
185 continue
186 if (dataset_type_report := self.output_datasets.get(output_ref.datasetType.name)) is None:
187 dataset_type_report = DatasetTypeExecutionReport()
188 self.output_datasets[output_ref.datasetType.name] = dataset_type_report
189 if output_ref.id not in refs[output_ref.datasetType.name]:
190 if failed:
191 if blocked:
192 dataset_type_report.blocked.add(output_ref)
193 else:
194 dataset_type_report.failed.add(output_ref)
195 else:
196 dataset_type_report.not_produced.add(output_ref)
197 else:
198 dataset_type_report.n_produced += 1
200 def to_summary_dict(self, butler: Butler, do_store_logs: bool = True) -> dict[str, Any]:
201 """Summarize the results of the TaskExecutionReport in a dictionary.
203 Parameters
204 ----------
205 butler : `lsst.daf.butler.Butler`
206 The Butler used for this report.
207 do_store_logs : `bool`
208 Store the logs in the summary dictionary.
210 Returns
211 -------
212 summary_dict : `dict`
213 A dictionary containing:
215 - outputs: A dictionary summarizing the
216 DatasetTypeExecutionReport for each DatasetType associated with
217 the task
218 - failed_quanta: A dictionary of quanta which failed and their
219 dataIDs by quantum graph node id
220 - n_quanta_blocked: The number of quanta which failed due to
221 upstream failures.
222 - n_succeded: The number of quanta which succeeded.
223 """
224 failed_quanta = {}
225 for node_id, log_ref in self.failed.items():
226 quantum_info: dict[str, Any] = {"data_id": dict(log_ref.dataId.required)}
227 if do_store_logs:
228 try:
229 log = butler.get(log_ref)
230 except LookupError:
231 quantum_info["error"] = []
232 except FileNotFoundError:
233 quantum_info["error"] = None
234 else:
235 quantum_info["error"] = [
236 record.message for record in log if record.levelno >= logging.ERROR
237 ]
238 failed_quanta[str(node_id)] = quantum_info
239 return {
240 "outputs": {name: r.to_summary_dict() for name, r in self.output_datasets.items()},
241 "failed_quanta": failed_quanta,
242 "n_quanta_blocked": len(self.blocked),
243 "n_succeeded": self.n_succeeded,
244 }
246 def __str__(self) -> str:
247 """Return a count of the failed and blocked tasks in the
248 TaskExecutionReport.
249 """
250 return f"failed: {len(self.failed)}\nblocked: {len(self.blocked)}\n"
253@dataclasses.dataclass
254class QuantumGraphExecutionReport:
255 """A report on the execution of a quantum graph.
257 Report the detailed status of each failure; whether tasks were not run,
258 data is missing from upstream failures, or specific errors occurred during
259 task execution (and report the errors). Contains a count of expected,
260 produced DatasetTypes for each task. This report can be output as a
261 dictionary or a yaml file.
263 Attributes
264 ----------
265 tasks : `dict`
266 A dictionary of TaskExecutionReports by task label.
268 See Also
269 --------
270 TaskExecutionReport : A task report.
271 DatasetTypeExecutionReport : A dataset type report.
272 """
274 tasks: dict[str, TaskExecutionReport] = dataclasses.field(default_factory=dict)
275 """A dictionary of TaskExecutionReports by task label (`dict`)."""
277 def to_summary_dict(self, butler: Butler, do_store_logs: bool = True) -> dict[str, Any]:
278 """Summarize the results of the `QuantumGraphExecutionReport` in a
279 dictionary.
281 Parameters
282 ----------
283 butler : `lsst.daf.butler.Butler`
284 The Butler used for this report.
285 do_store_logs : `bool`
286 Store the logs in the summary dictionary.
288 Returns
289 -------
290 summary_dict : `dict`
291 A dictionary containing a summary of a `TaskExecutionReport` for
292 each task in the quantum graph.
293 """
294 return {
295 task: report.to_summary_dict(butler, do_store_logs=do_store_logs)
296 for task, report in self.tasks.items()
297 }
299 def write_summary_yaml(self, butler: Butler, filename: str, do_store_logs: bool = True) -> None:
300 """Take the dictionary from
301 `QuantumGraphExecutionReport.to_summary_dict` and store its contents in
302 a yaml file.
304 Parameters
305 ----------
306 butler : `lsst.daf.butler.Butler`
307 The Butler used for this report.
308 filename : `str`
309 The name to be used for the summary yaml file.
310 do_store_logs : `bool`
311 Store the logs in the summary dictionary.
312 """
313 with open(filename, "w") as stream:
314 yaml.safe_dump(self.to_summary_dict(butler, do_store_logs=do_store_logs), stream)
316 @classmethod
317 def make_reports(
318 cls,
319 butler: Butler,
320 graph: QuantumGraph | ResourcePathExpression,
321 ) -> QuantumGraphExecutionReport:
322 """Make a `QuantumGraphExecutionReport`.
324 Step through the quantum graph associated with a run, creating a
325 `networkx.DiGraph` called status_graph to annotate the status of each
326 quantum node. For each task in the quantum graph, use
327 `TaskExecutionReport.inspect_quantum` to make a `TaskExecutionReport`
328 based on the status of each node. Return a `TaskExecutionReport` for
329 each task in the quantum graph.
331 Parameters
332 ----------
333 butler : `lsst.daf.butler.Butler`
334 The Butler used for this report. This should match the Butler used
335 for the run associated with the executed quantum graph.
336 graph : `QuantumGraph` | `ResourcePathExpression`
337 Either the associated quantum graph object or the uri of the
338 location of said quantum graph.
340 Returns
341 -------
342 report: `QuantumGraphExecutionReport`
343 The `TaskExecutionReport` for each task in the quantum graph.
344 """
345 refs = {} # type: dict[str, Any]
346 status_graph = networkx.DiGraph()
347 if not isinstance(graph, QuantumGraph):
348 qg = QuantumGraph.loadUri(graph)
349 else:
350 qg = graph
351 assert qg.metadata is not None, "Saved QGs always have metadata."
352 collection = qg.metadata["output_run"]
353 report = cls()
354 task_defs = list(qg.iterTaskGraph())
355 pipeline_dataset_types = PipelineDatasetTypes.fromPipeline(task_defs, registry=butler.registry)
356 for dataset_type in itertools.chain(
357 pipeline_dataset_types.initIntermediates,
358 pipeline_dataset_types.initOutputs,
359 pipeline_dataset_types.intermediates,
360 pipeline_dataset_types.outputs,
361 ):
362 refs[dataset_type.name] = {
363 ref.id: ref
364 for ref in butler.registry.queryDatasets(
365 dataset_type.name, collections=collection, findFirst=False
366 )
367 }
368 for task_def in qg.iterTaskGraph():
369 for node in qg.getNodesForTask(task_def):
370 status_graph.add_node(node.nodeId)
371 for ref in itertools.chain.from_iterable(node.quantum.outputs.values()):
372 status_graph.add_edge(node.nodeId, ref.id)
373 for ref in itertools.chain.from_iterable(node.quantum.inputs.values()):
374 status_graph.add_edge(ref.id, node.nodeId)
376 for task_def in qg.iterTaskGraph():
377 task_report = TaskExecutionReport()
378 if task_def.logOutputDatasetName is None:
379 raise RuntimeError("QG must have log outputs to use execution reports.")
380 for node in qg.getNodesForTask(task_def):
381 task_report.inspect_quantum(
382 node,
383 status_graph,
384 refs,
385 metadata_name=task_def.metadataDatasetName,
386 log_name=task_def.logOutputDatasetName,
387 )
388 report.tasks[task_def.label] = task_report
389 return report
391 def __str__(self) -> str:
392 return "\n".join(f"{tasklabel}:{report}" for tasklabel, report in self.tasks.items())
395def lookup_quantum_data_id(
396 graph_uri: ResourcePathExpression, nodes: Iterable[uuid.UUID]
397) -> list[DataCoordinate | None]:
398 """Look up a dataId from a quantum graph and a list of quantum graph
399 nodeIDs.
401 Parameters
402 ----------
403 graph_uri : `ResourcePathExpression`
404 URI of the quantum graph of the run.
405 nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
406 Quantum graph nodeID.
408 Returns
409 -------
410 data_ids : `list` [ `lsst.daf.butler.DataCoordinate` ]
411 A list of human-readable dataIDs which map to the nodeIDs on the
412 quantum graph at graph_uri.
413 """
414 qg = QuantumGraph.loadUri(graph_uri, nodes=nodes)
415 return [qg.getQuantumNodeByNodeId(node).quantum.dataId for node in nodes]