Coverage for python/lsst/pipe/base/execution_reports.py: 28%
125 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:31 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:31 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "QuantumGraphExecutionReport",
25 "TaskExecutionReport",
26 "DatasetTypeExecutionReport",
27 "lookup_quantum_data_id",
28)
30import dataclasses
31import itertools
32import logging
33import uuid
34from collections.abc import Iterable, Mapping
35from typing import Any
37import networkx
38import yaml
39from lsst.daf.butler import Butler, DataCoordinate, DatasetRef, Quantum
40from lsst.resources import ResourcePathExpression
42from .graph import QuantumGraph
45@dataclasses.dataclass
46class DatasetTypeExecutionReport:
47 """A report on the number of produced datasets as well as the status of
48 missing datasets based on metadata.
50 A `DatasetTypeExecutionReport` is created for each
51 `~lsst.daf.butler.DatasetType` in a `TaskExecutionReport`.
52 """
54 failed: set[DatasetRef] = dataclasses.field(default_factory=set)
55 """Datasets not produced because their quanta failed directly in this
56 run (`set`).
57 """
59 not_produced: set[DatasetRef] = dataclasses.field(default_factory=set)
60 """Missing datasets which were not produced by successful quanta.
61 """
63 blocked: set[DatasetRef] = dataclasses.field(default_factory=set)
64 """Datasets not produced due to an upstream failure (`set`).
65 """
67 n_produced: int = 0
68 """Count of datasets produced (`int`).
69 """
71 def to_summary_dict(self) -> dict[str, Any]:
72 r"""Summarize the DatasetTypeExecutionReport in a dictionary.
74 Returns
75 -------
76 summary_dict : `dict`
77 A count of the datasets with each outcome; the number of
78 produced, ``failed``, ``not_produced``, and ``blocked``
79 `~lsst.daf.butler.DatasetType`\ s.
80 See above for attribute descriptions.
81 """
82 return {
83 "produced": self.n_produced,
84 "failed": len(self.failed),
85 "not_produced": len(self.not_produced),
86 "blocked": len(self.blocked),
87 }
90@dataclasses.dataclass
91class TaskExecutionReport:
92 """A report on the status and content of a task in an executed quantum
93 graph.
95 Use task metadata to identify and inspect failures and report on output
96 datasets.
98 See Also
99 --------
100 QuantumGraphExecutionReport : Quantum graph report.
101 DatasetTypeExecutionReport : DatasetType report.
102 """
104 failed: dict[uuid.UUID, DatasetRef] = dataclasses.field(default_factory=dict)
105 """A mapping from quantum data ID to log dataset reference for quanta that
106 failed directly in this run (`dict`).
107 """
109 n_succeeded: int = 0
110 """A count of successful quanta.
112 This may include quanta that did not produce any datasets; ie, raised
113 `NoWorkFound`.
114 """
116 blocked: dict[uuid.UUID, DataCoordinate] = dataclasses.field(default_factory=dict)
117 """A mapping of data IDs of quanta that were not attempted due to an
118 upstream failure (`dict`).
119 """
121 output_datasets: dict[str, DatasetTypeExecutionReport] = dataclasses.field(default_factory=dict)
122 """Missing and produced outputs of each `~lsst.daf.butler.DatasetType`
123 (`dict`).
124 """
126 def inspect_quantum(
127 self,
128 quantum_id: uuid.UUID,
129 quantum: Quantum,
130 status_graph: networkx.DiGraph,
131 refs: Mapping[str, Mapping[uuid.UUID, DatasetRef]],
132 metadata_name: str,
133 log_name: str,
134 ) -> None:
135 """Inspect a quantum of a quantum graph and ascertain the status of
136 each associated data product.
138 Parameters
139 ----------
140 quantum_id : `uuid.UUID`
141 Unique identifier for the quantum to inspect.
142 quantum : `Quantum`
143 The specific node of the quantum graph to be inspected.
144 status_graph : `networkx.DiGraph`
145 The quantum graph produced by
146 `QuantumGraphExecutionReport.make_reports` which steps through the
147 quantum graph of a run and logs the status of each quantum.
148 refs : `~collections.abc.Mapping` [ `str`,\
149 `~collections.abc.Mapping` [ `uuid.UUID`,\
150 `~lsst.daf.butler.DatasetRef` ] ]
151 The DatasetRefs of each of the DatasetTypes produced by the task.
152 Includes initialization, intermediate and output data products.
153 metadata_name : `str`
154 The metadata dataset name for the node.
155 log_name : `str`
156 The name of the log files for the node.
158 See Also
159 --------
160 QuantumGraphExecutionReport.make_reports : Make reports.
161 """
162 (metadata_ref,) = quantum.outputs[metadata_name]
163 (log_ref,) = quantum.outputs[log_name]
164 blocked = False
165 if metadata_ref.id not in refs[metadata_name]:
166 if any(
167 status_graph.nodes[upstream_quantum_id]["failed"]
168 for upstream_dataset_id in status_graph.predecessors(quantum_id)
169 for upstream_quantum_id in status_graph.predecessors(upstream_dataset_id)
170 ):
171 assert quantum.dataId is not None
172 self.blocked[quantum_id] = quantum.dataId
173 blocked = True
174 else:
175 self.failed[quantum_id] = log_ref
176 # note: log_ref may or may not actually exist
177 failed = True
178 else:
179 failed = False
180 self.n_succeeded += 1
181 status_graph.nodes[quantum_id]["failed"] = failed
183 # Now, loop over the datasets to make a DatasetTypeExecutionReport.
184 for output_ref in itertools.chain.from_iterable(quantum.outputs.values()):
185 if output_ref == metadata_ref or output_ref == log_ref:
186 continue
187 if (dataset_type_report := self.output_datasets.get(output_ref.datasetType.name)) is None:
188 dataset_type_report = DatasetTypeExecutionReport()
189 self.output_datasets[output_ref.datasetType.name] = dataset_type_report
190 if output_ref.id not in refs[output_ref.datasetType.name]:
191 if failed:
192 if blocked:
193 dataset_type_report.blocked.add(output_ref)
194 else:
195 dataset_type_report.failed.add(output_ref)
196 else:
197 dataset_type_report.not_produced.add(output_ref)
198 else:
199 dataset_type_report.n_produced += 1
201 def to_summary_dict(
202 self, butler: Butler, do_store_logs: bool = True, human_readable: bool = False
203 ) -> dict[str, Any]:
204 """Summarize the results of the TaskExecutionReport in a dictionary.
206 Parameters
207 ----------
208 butler : `lsst.daf.butler.Butler`
209 The Butler used for this report.
210 do_store_logs : `bool`
211 Store the logs in the summary dictionary.
212 human_readable : `bool`
213 Store more human-readable information to be printed out to the
214 command-line.
216 Returns
217 -------
218 summary_dict : `dict`
219 A dictionary containing:
221 - outputs: A dictionary summarizing the
222 DatasetTypeExecutionReport for each DatasetType associated with
223 the task
224 - failed_quanta: A dictionary of quanta which failed and their
225 dataIDs by quantum graph node id
226 - n_quanta_blocked: The number of quanta which failed due to
227 upstream failures.
228 - n_succeded: The number of quanta which succeeded.
230 And possibly, if human-readable is passed:
232 - errors: A dictionary of data ids associated with each error
233 message. If `human-readable` and `do_store_logs`, this is stored
234 here. Otherwise, if `do_store_logs`, it is stored in
235 `failed_quanta` keyed by the quantum graph node id.
236 """
237 failed_quanta = {}
238 for node_id, log_ref in self.failed.items():
239 data_ids = dict(log_ref.dataId.required)
240 quantum_info: dict[str, Any] = {"data_id": data_ids}
241 if do_store_logs:
242 try:
243 log = butler.get(log_ref)
244 except LookupError:
245 quantum_info["error"] = []
246 except FileNotFoundError:
247 quantum_info["error"] = None
248 else:
249 quantum_info["error"] = [
250 record.message for record in log if record.levelno >= logging.ERROR
251 ]
252 if human_readable:
253 failed_quanta["data_id"] = data_ids
254 return {
255 "outputs": {name: r.to_summary_dict() for name, r in self.output_datasets.items()},
256 "failed_quanta": failed_quanta,
257 "n_quanta_blocked": len(self.blocked),
258 "n_succeeded": self.n_succeeded,
259 "errors": quantum_info,
260 }
261 else:
262 failed_quanta[str(node_id)] = quantum_info
263 return {
264 "outputs": {name: r.to_summary_dict() for name, r in self.output_datasets.items()},
265 "failed_quanta": failed_quanta,
266 "n_quanta_blocked": len(self.blocked),
267 "n_succeeded": self.n_succeeded,
268 }
270 def __str__(self) -> str:
271 """Return a count of the failed and blocked tasks in the
272 TaskExecutionReport.
273 """
274 return f"failed: {len(self.failed)}\nblocked: {len(self.blocked)}\n"
277@dataclasses.dataclass
278class QuantumGraphExecutionReport:
279 """A report on the execution of a quantum graph.
281 Report the detailed status of each failure; whether tasks were not run,
282 data is missing from upstream failures, or specific errors occurred during
283 task execution (and report the errors). Contains a count of expected,
284 produced DatasetTypes for each task. This report can be output as a
285 dictionary or a yaml file.
287 Attributes
288 ----------
289 tasks : `dict`
290 A dictionary of TaskExecutionReports by task label.
292 See Also
293 --------
294 TaskExecutionReport : A task report.
295 DatasetTypeExecutionReport : A dataset type report.
296 """
298 tasks: dict[str, TaskExecutionReport] = dataclasses.field(default_factory=dict)
299 """A dictionary of TaskExecutionReports by task label (`dict`)."""
301 def to_summary_dict(
302 self, butler: Butler, do_store_logs: bool = True, human_readable: bool = False
303 ) -> dict[str, Any]:
304 """Summarize the results of the `QuantumGraphExecutionReport` in a
305 dictionary.
307 Parameters
308 ----------
309 butler : `lsst.daf.butler.Butler`
310 The Butler used for this report.
311 do_store_logs : `bool`
312 Store the logs in the summary dictionary.
313 human_readable : `bool`
314 Store more human-readable information to be printed out to the
315 command-line.
317 Returns
318 -------
319 summary_dict : `dict`
320 A dictionary containing a summary of a `TaskExecutionReport` for
321 each task in the quantum graph.
322 """
323 return {
324 task: report.to_summary_dict(butler, do_store_logs=do_store_logs, human_readable=human_readable)
325 for task, report in self.tasks.items()
326 }
328 def write_summary_yaml(self, butler: Butler, filename: str, do_store_logs: bool = True) -> None:
329 """Take the dictionary from
330 `QuantumGraphExecutionReport.to_summary_dict` and store its contents in
331 a yaml file.
333 Parameters
334 ----------
335 butler : `lsst.daf.butler.Butler`
336 The Butler used for this report.
337 filename : `str`
338 The name to be used for the summary yaml file.
339 do_store_logs : `bool`
340 Store the logs in the summary dictionary.
341 """
342 with open(filename, "w") as stream:
343 yaml.safe_dump(self.to_summary_dict(butler, do_store_logs=do_store_logs), stream)
345 @classmethod
346 def make_reports(
347 cls,
348 butler: Butler,
349 graph: QuantumGraph | ResourcePathExpression,
350 ) -> QuantumGraphExecutionReport:
351 """Make a `QuantumGraphExecutionReport`.
353 Step through the quantum graph associated with a run, creating a
354 `networkx.DiGraph` called status_graph to annotate the status of each
355 quantum node. For each task in the quantum graph, use
356 `TaskExecutionReport.inspect_quantum` to make a `TaskExecutionReport`
357 based on the status of each node. Return a `TaskExecutionReport` for
358 each task in the quantum graph.
360 Parameters
361 ----------
362 butler : `lsst.daf.butler.Butler`
363 The Butler used for this report. This should match the Butler used
364 for the run associated with the executed quantum graph.
365 graph : `QuantumGraph` | `ResourcePathExpression`
366 Either the associated quantum graph object or the uri of the
367 location of said quantum graph.
369 Returns
370 -------
371 report: `QuantumGraphExecutionReport`
372 The `TaskExecutionReport` for each task in the quantum graph.
373 """
374 refs = {} # type: dict[str, Any]
375 status_graph = networkx.DiGraph()
376 if not isinstance(graph, QuantumGraph):
377 qg = QuantumGraph.loadUri(graph)
378 else:
379 qg = graph
380 assert qg.metadata is not None, "Saved QGs always have metadata."
381 collection = qg.metadata["output_run"]
382 report = cls()
383 for dataset_type_node in qg.pipeline_graph.dataset_types.values():
384 if qg.pipeline_graph.producer_of(dataset_type_node.name) is None:
385 continue
386 refs[dataset_type_node.name] = {
387 ref.id: ref
388 for ref in butler.registry.queryDatasets(
389 dataset_type_node.name, collections=collection, findFirst=False
390 )
391 }
392 for task_node in qg.pipeline_graph.tasks.values():
393 for quantum_id, quantum in qg.get_task_quanta(task_node.label).items():
394 status_graph.add_node(quantum_id)
395 for ref in itertools.chain.from_iterable(quantum.outputs.values()):
396 status_graph.add_edge(quantum_id, ref.id)
397 for ref in itertools.chain.from_iterable(quantum.inputs.values()):
398 status_graph.add_edge(ref.id, quantum_id)
400 for task_node in qg.pipeline_graph.tasks.values():
401 task_report = TaskExecutionReport()
402 if task_node.log_output is None:
403 raise RuntimeError("QG must have log outputs to use execution reports.")
404 for quantum_id, quantum in qg.get_task_quanta(task_node.label).items():
405 task_report.inspect_quantum(
406 quantum_id,
407 quantum,
408 status_graph,
409 refs,
410 metadata_name=task_node.metadata_output.dataset_type_name,
411 log_name=task_node.log_output.dataset_type_name,
412 )
413 report.tasks[task_node.label] = task_report
414 return report
416 def __str__(self) -> str:
417 return "\n".join(f"{tasklabel}:{report}" for tasklabel, report in self.tasks.items())
420def lookup_quantum_data_id(
421 graph_uri: ResourcePathExpression, nodes: Iterable[uuid.UUID]
422) -> list[DataCoordinate | None]:
423 """Look up a dataId from a quantum graph and a list of quantum graph
424 nodeIDs.
426 Parameters
427 ----------
428 graph_uri : `ResourcePathExpression`
429 URI of the quantum graph of the run.
430 nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
431 Quantum graph nodeID.
433 Returns
434 -------
435 data_ids : `list` [ `lsst.daf.butler.DataCoordinate` ]
436 A list of human-readable dataIDs which map to the nodeIDs on the
437 quantum graph at graph_uri.
438 """
439 qg = QuantumGraph.loadUri(graph_uri, nodes=nodes)
440 return [qg.getQuantumNodeByNodeId(node).quantum.dataId for node in nodes]