Coverage for python/lsst/pipe/base/execution_reports.py: 30%
122 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-11 09:32 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-11 09:32 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "QuantumGraphExecutionReport",
25 "TaskExecutionReport",
26 "DatasetTypeExecutionReport",
27 "lookup_quantum_data_id",
28)
30import dataclasses
31import itertools
32import logging
33import uuid
34from collections.abc import Iterable, Mapping
35from typing import Any
37import networkx
38import yaml
39from lsst.daf.butler import Butler, DataCoordinate, DatasetRef
40from lsst.resources import ResourcePathExpression
42from .graph import QuantumGraph, QuantumNode
43from .pipeline import PipelineDatasetTypes
46@dataclasses.dataclass
47class DatasetTypeExecutionReport:
48 """A report on the number of produced datasets as well as the status of
49 missing datasets based on metadata.
51 A `DatasetTypeExecutionReport` is created for each `DatasetType` in a
52 `TaskExecutionReport`.
53 """
55 missing_failed: set[DatasetRef] = dataclasses.field(default_factory=set)
56 """Datasets not produced because their quanta failed directly in this
57 run (`set`).
58 """
60 missing_not_produced: dict[DatasetRef, bool] = dataclasses.field(default_factory=dict)
61 """Missing datasets which were not produced due either missing inputs or a
62 failure in finding inputs (`dict`).
63 bool: were predicted inputs produced?
64 """
66 missing_upstream_failed: set[DatasetRef] = dataclasses.field(default_factory=set)
67 """Datasets not produced due to an upstream failure (`set`).
68 """
70 n_produced: int = 0
71 """Count of datasets produced (`int`).
72 """
74 def to_summary_dict(self) -> dict[str, Any]:
75 """Summarize the DatasetTypeExecutionReport in a dictionary.
77 Returns
78 -------
79 summary_dict : `dict`
80 A count of the datasets with each outcome; the number of
81 produced, `missing_failed`, `missing_not_produced`, and
82 `missing_upstream_failed` `DatasetTypes`. See above for attribute
83 descriptions.
84 """
85 return {
86 "produced": self.n_produced,
87 "missing_failed": len(self.missing_failed),
88 "missing_not_produced": len(self.missing_not_produced),
89 "missing_upstream_failed": len(self.missing_upstream_failed),
90 }
92 def handle_missing_dataset(
93 self, output_ref: DatasetRef, failed: bool, status_graph: networkx.DiGraph
94 ) -> None:
95 """Sort missing datasets into outcomes.
97 Parameters
98 ----------
99 output_ref : `~lsst.daf.butler.DatasetRef`
100 Dataset reference of the missing dataset.
101 failed : `bool`
102 Whether the task associated with the missing dataset failed.
103 status_graph : `networkx.DiGraph`
104 The quantum graph produced by `TaskExecutionReport.inspect_quantum`
105 which steps through the run quantum graph and logs the status of
106 each quanta.
107 """
108 if failed:
109 for upstream_quantum_id in status_graph.predecessors(output_ref.id):
110 if status_graph.nodes[upstream_quantum_id]["failed"]:
111 self.missing_upstream_failed.add(output_ref)
112 break
113 else:
114 self.missing_failed.add(output_ref)
115 else:
116 status_graph.nodes[output_ref.id]["not_produced"] = True
117 self.missing_not_produced[output_ref] = any(
118 status_graph.nodes[upstream_dataset_id].get("not_produced", False)
119 for upstream_quantum_id in status_graph.predecessors(output_ref.id)
120 for upstream_dataset_id in status_graph.predecessors(upstream_quantum_id)
121 )
123 def handle_produced_dataset(self, output_ref: DatasetRef, status_graph: networkx.DiGraph) -> None:
124 """Account for produced datasets.
126 Parameters
127 ----------
128 output_ref : `~lsst.daf.butler.DatasetRef`
129 Dataset reference of the dataset.
130 status_graph : `networkx.DiGraph`
131 The quantum graph produced by
132 `QuantumGraphExecutionReport.make_reports` which steps through the
133 quantum graph of a run and logs the status of each quantum.
135 See Also
136 --------
137 TaskExecutionReport.inspect_quantum
138 """
139 status_graph.nodes[output_ref.id]["not_produced"] = False
140 self.n_produced += 1
143@dataclasses.dataclass
144class TaskExecutionReport:
145 """A report on the status and content of a task in an executed quantum
146 graph.
148 Use task metadata to identify and inspect failures and report on output
149 datasets.
151 See Also
152 --------
153 QuantumGraphExecutionReport
154 DatasetTypeExecutionReport
155 """
157 failed: dict[uuid.UUID, DatasetRef] = dataclasses.field(default_factory=dict)
158 """A mapping from quantum data ID to log dataset reference for quanta that
159 failed directly in this run (`dict`).
160 """
162 failed_upstream: dict[uuid.UUID, DataCoordinate] = dataclasses.field(default_factory=dict)
163 """A mapping of data IDs of quanta that were not attempted due to an
164 upstream failure (`dict`).
165 """
167 output_datasets: dict[str, DatasetTypeExecutionReport] = dataclasses.field(default_factory=dict)
168 """Missing and produced outputs of each `DatasetType` (`dict`).
169 """
171 def inspect_quantum(
172 self,
173 quantum_node: QuantumNode,
174 status_graph: networkx.DiGraph,
175 refs: Mapping[str, Mapping[uuid.UUID, DatasetRef]],
176 metadata_name: str,
177 log_name: str,
178 ) -> None:
179 """Inspect a quantum of a quantum graph and ascertain the status of
180 each associated data product.
182 Parameters
183 ----------
184 quantum_node : `QuantumNode`
185 The specific node of the quantum graph to be inspected.
186 status_graph : `networkx.DiGraph`
187 The quantum graph produced by
188 `QuantumGraphExecutionReport.make_reports` which steps through the
189 quantum graph of a run and logs the status of each quantum.
190 refs : `~collections.abc.Mapping` [ `str`,\
191 `~collections.abc.Mapping` [ `uuid.UUID`,\
192 `~lsst.daf.butler.DatasetRef` ] ]
193 The DatasetRefs of each of the DatasetTypes produced by the task.
194 Includes initialization, intermediate and output data products.
195 metadata_name : `str`
196 The metadata dataset name for the node.
197 log_name : `str`
198 The name of the log files for the node.
200 See Also
201 --------
202 DatasetTypeExecutionReport.handle_missing_dataset
203 DatasetTypeExecutionReport.handle_produced_dataset
204 QuantumGraphExecutionReport.make_reports
205 """
206 quantum = quantum_node.quantum
207 (metadata_ref,) = quantum.outputs[metadata_name]
208 (log_ref,) = quantum.outputs[log_name]
209 if metadata_ref.id not in refs[metadata_name]:
210 if any(
211 status_graph.nodes[upstream_quantum_id]["failed"]
212 for upstream_dataset_id in status_graph.predecessors(quantum_node.nodeId)
213 for upstream_quantum_id in status_graph.predecessors(upstream_dataset_id)
214 ):
215 assert quantum.dataId is not None
216 self.failed_upstream[quantum_node.nodeId] = quantum.dataId
217 else:
218 self.failed[quantum_node.nodeId] = log_ref
219 # note: log_ref may or may not actually exist
220 failed = True
221 else:
222 failed = False
223 status_graph.nodes[quantum_node.nodeId]["failed"] = failed
224 for output_ref in itertools.chain.from_iterable(quantum.outputs.values()):
225 if (dataset_type_report := self.output_datasets.get(output_ref.datasetType.name)) is None:
226 dataset_type_report = DatasetTypeExecutionReport()
227 self.output_datasets[output_ref.datasetType.name] = dataset_type_report
228 if output_ref.id not in refs[output_ref.datasetType.name]:
229 dataset_type_report.handle_missing_dataset(output_ref, failed, status_graph)
230 else:
231 dataset_type_report.handle_produced_dataset(output_ref, status_graph)
233 def to_summary_dict(self, butler: Butler, logs: bool = True) -> dict[str, Any]:
234 """Summarize the results of the TaskExecutionReport in a dictionary.
236 Parameters
237 ----------
238 butler : `lsst.daf.butler.Butler`
239 The Butler used for this report.
240 logs : `bool`
241 Store the logs in the summary dictionary.
243 Returns
244 -------
245 summary_dict : `dict`
246 A dictionary containing:
248 - outputs: A dictionary summarizing the
249 DatasetTypeExecutionReport for each DatasetType associated with
250 the task
251 - failed_quanta: A dictionary of quanta which failed and their
252 dataIDs by quantum graph node id
253 - n_quanta_blocked: The number of quanta which failed due to
254 upstream failures.
256 """
257 failed_quanta = {}
258 for node_id, log_ref in self.failed.items():
259 quantum_info: dict[str, Any] = {"data_id": log_ref.dataId.byName()}
260 if logs:
261 try:
262 log = butler.get(log_ref)
263 except LookupError:
264 quantum_info["error"] = []
265 else:
266 quantum_info["error"] = [
267 record.message for record in log if record.levelno >= logging.ERROR
268 ]
269 failed_quanta[str(node_id)] = quantum_info
270 return {
271 "outputs": {name: r.to_summary_dict() for name, r in self.output_datasets.items()},
272 "failed_quanta": failed_quanta,
273 "n_quanta_blocked": len(self.failed_upstream),
274 }
276 def __str__(self) -> str:
277 """Return a count of the failed and failed_upstream tasks in the
278 TaskExecutionReport.
279 """
280 return f"failed: {len(self.failed)}\nfailed upstream: {len(self.failed_upstream)}\n"
283@dataclasses.dataclass
284class QuantumGraphExecutionReport:
285 """A report on the execution of a quantum graph.
287 Report the detailed status of each failure; whether tasks were not run,
288 data is missing from upstream failures, or specific errors occurred during
289 task execution (and report the errors). Contains a count of expected,
290 produced DatasetTypes for each task. This report can be output as a
291 dictionary or a yaml file.
293 Parameters
294 ----------
295 tasks : `dict`
296 A dictionary of TaskExecutionReports by task label.
298 See Also
299 --------
300 TaskExecutionReport
301 DatasetTypeExecutionReport
302 """
304 tasks: dict[str, TaskExecutionReport] = dataclasses.field(default_factory=dict)
305 """A dictionary of TaskExecutionReports by task label (`dict`).
306 """
308 def to_summary_dict(self, butler: Butler, logs: bool = True) -> dict[str, Any]:
309 """Summarize the results of the `QuantumGraphExecutionReport` in a
310 dictionary.
312 Parameters
313 ----------
314 butler : `lsst.daf.butler.Butler`
315 The Butler used for this report.
316 logs : `bool`
317 Store the logs in the summary dictionary.
319 Returns
320 -------
321 summary_dict : `dict`
322 A dictionary containing a summary of a `TaskExecutionReport` for
323 each task in the quantum graph.
324 """
325 return {task: report.to_summary_dict(butler, logs=logs) for task, report in self.tasks.items()}
327 def write_summary_yaml(self, butler: Butler, filename: str, logs: bool = True) -> None:
328 """Take the dictionary from
329 `QuantumGraphExecutionReport.to_summary_dict` and store its contents in
330 a yaml file.
332 Parameters
333 ----------
334 butler : `lsst.daf.butler.Butler`
335 The Butler used for this report.
336 filename : `str`
337 The name to be used for the summary yaml file.
338 logs : `bool`
339 Store the logs in the summary dictionary.
340 """
341 with open(filename, "w") as stream:
342 yaml.safe_dump(self.to_summary_dict(butler, logs=logs), stream)
344 @classmethod
345 def make_reports(
346 cls,
347 butler: Butler,
348 graph: QuantumGraph | ResourcePathExpression,
349 ) -> QuantumGraphExecutionReport:
350 """Make a `QuantumGraphExecutionReport`.
352 Step through the quantum graph associated with a run, creating a
353 `networkx.DiGraph` called status_graph to annotate the status of each
354 quantum node. For each task in the quantum graph, use
355 `TaskExecutionReport.inspect_quantum` to make a `TaskExecutionReport`
356 based on the status of each node. Return a `TaskExecutionReport` for
357 each task in the quantum graph.
359 Parameters
360 ----------
361 butler : `lsst.daf.butler.Butler`
362 The Butler used for this report. This should match the Butler used
363 for the run associated with the executed quantum graph.
364 graph : `QuantumGraph` | `ResourcePathExpression`
365 Either the associated quantum graph object or the uri of the
366 location of said quantum graph.
368 Returns
369 -------
370 report: `QuantumGraphExecutionReport`
371 The `TaskExecutionReport` for each task in the quantum graph.
372 """
373 refs = {} # type: dict[str, Any]
374 status_graph = networkx.DiGraph()
375 if not isinstance(graph, QuantumGraph):
376 qg = QuantumGraph.loadUri(graph)
377 else:
378 qg = graph
379 assert qg.metadata is not None, "Saved QGs always have metadata."
380 collection = qg.metadata["output_run"]
381 report = cls()
382 task_defs = list(qg.iterTaskGraph())
383 pipeline_dataset_types = PipelineDatasetTypes.fromPipeline(task_defs, registry=butler.registry)
384 for dataset_type in itertools.chain(
385 pipeline_dataset_types.initIntermediates,
386 pipeline_dataset_types.initOutputs,
387 pipeline_dataset_types.intermediates,
388 pipeline_dataset_types.outputs,
389 ):
390 refs[dataset_type.name] = {
391 ref.id: ref
392 for ref in butler.registry.queryDatasets(
393 dataset_type.name, collections=collection, findFirst=False
394 )
395 }
396 for task_def in qg.iterTaskGraph():
397 for node in qg.getNodesForTask(task_def):
398 status_graph.add_node(node.nodeId)
399 for ref in itertools.chain.from_iterable(node.quantum.outputs.values()):
400 status_graph.add_edge(node.nodeId, ref.id)
401 for ref in itertools.chain.from_iterable(node.quantum.inputs.values()):
402 status_graph.add_edge(ref.id, node.nodeId)
404 for task_def in qg.iterTaskGraph():
405 task_report = TaskExecutionReport()
406 if task_def.logOutputDatasetName is None:
407 raise RuntimeError("QG must have log outputs to use execution reports.")
408 for node in qg.getNodesForTask(task_def):
409 task_report.inspect_quantum(
410 node,
411 status_graph,
412 refs,
413 metadata_name=task_def.metadataDatasetName,
414 log_name=task_def.logOutputDatasetName,
415 )
416 report.tasks[task_def.label] = task_report
417 return report
419 def __str__(self) -> str:
420 return "\n".join(f"{tasklabel}:{report}" for tasklabel, report in self.tasks.items())
423def lookup_quantum_data_id(
424 graph_uri: ResourcePathExpression, nodes: Iterable[uuid.UUID]
425) -> list[DataCoordinate | None]:
426 """Look up a dataId from a quantum graph and a list of quantum graph
427 nodeIDs.
429 Parameters
430 ----------
431 graph_uri : `ResourcePathExpression`
432 URI of the quantum graph of the run.
433 nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
434 Quantum graph nodeID.
436 Returns
437 -------
438 data_ids : `list` [ `lsst.daf.butler.DataCoordinate` ]
439 A list of human-readable dataIDs which map to the nodeIDs on the
440 quantum graph at graph_uri.
441 """
442 qg = QuantumGraph.loadUri(graph_uri, nodes=nodes)
443 return [qg.getQuantumNodeByNodeId(node).quantum.dataId for node in nodes]