Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%
199 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:55 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:55 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ShowInfo"]
32import fnmatch
33import re
34import sys
35from collections import defaultdict
36from collections.abc import Mapping
37from types import SimpleNamespace
38from typing import Any
40import lsst.pex.config as pexConfig
41import lsst.pex.config.history as pexConfigHistory
42from lsst.daf.butler import Butler, DatasetRef, DatasetType, NamedKeyMapping
43from lsst.daf.butler.datastore.record_data import DatastoreRecordData
44from lsst.pipe.base import Pipeline, QuantumGraph
45from lsst.pipe.base.pipeline_graph import visualization
47from . import util
48from .cmdLineFwk import _ButlerFactory
51class _FilteredStream:
52 """A file-like object that filters some config fields.
54 Note
55 ----
56 This class depends on implementation details of ``Config.saveToStream``
57 methods, in particular that that method uses single call to write()
58 method to save information about single config field, and that call
59 combines comments string(s) for a field and field path and value.
60 This class will not work reliably on the "import" strings, so imports
61 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
62 """
64 def __init__(self, pattern: str, stream: Any = None) -> None:
65 if stream is None:
66 stream = sys.stdout
67 self.stream = stream
68 # obey case if pattern isn't lowercase or requests NOIGNORECASE
69 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
71 if mat:
72 pattern = mat.group(1)
73 self._pattern = re.compile(fnmatch.translate(pattern))
74 else:
75 if pattern != pattern.lower():
76 print(
77 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)',
78 file=self.stream,
79 )
80 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
82 def write(self, showStr: str) -> None:
83 # Strip off doc string line(s) and cut off at "=" for string matching
84 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
85 if self._pattern.search(matchStr):
86 self.stream.write(showStr)
89class ShowInfo:
90 """Show information about a pipeline or quantum graph.
92 Parameters
93 ----------
94 show : `list` [`str`]
95 A list of show commands, some of which may have additional parameters
96 specified using an ``=``.
97 stream : I/O stream or None
98 The output stream to use. `None` will be treated as `sys.stdout`.
100 Raises
101 ------
102 ValueError
103 Raised if some show commands are not recognized.
104 """
106 pipeline_commands = {
107 "pipeline",
108 "config",
109 "history",
110 "tasks",
111 "dump-config",
112 "pipeline-graph",
113 "task-graph",
114 }
115 graph_commands = {"graph", "workflow", "uri"}
117 def __init__(self, show: list[str], stream: Any = None) -> None:
118 if stream is None:
119 # Defer assigning sys.stdout to allow click to redefine it if
120 # it wants. Assigning the default at class definition leads
121 # to confusion on reassignment.
122 stream = sys.stdout
123 commands: dict[str, list[str]] = defaultdict(list)
124 for value in show:
125 command, _, args = value.partition("=")
126 commands[command].append(args)
127 self.commands = commands
128 self.stream = stream
129 self.handled: set[str] = set()
131 known = self.pipeline_commands | self.graph_commands
132 unknown = set(commands) - known
133 if unknown:
134 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')")
136 @property
137 def unhandled(self) -> frozenset[str]:
138 """Return the commands that have not yet been processed."""
139 return frozenset(set(self.commands) - self.handled)
141 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None:
142 """Display useful information about the pipeline.
144 Parameters
145 ----------
146 pipeline : `lsst.pipe.base.Pipeline`
147 The pipeline to use when reporting information.
148 butler : `~lsst.daf.butler.Butler`
149 Butler to use for querying.
150 """
151 if butler is not None:
152 registry = butler.registry
153 else:
154 registry = None
155 for command in self.pipeline_commands:
156 if command not in self.commands:
157 continue
158 args = self.commands[command]
160 match command:
161 case "pipeline":
162 print(pipeline, file=self.stream)
163 case "config":
164 for arg in args:
165 self._showConfig(pipeline, arg, False)
166 case "dump-config":
167 for arg in args:
168 self._showConfig(pipeline, arg, True)
169 case "history":
170 for arg in args:
171 self._showConfigHistory(pipeline, arg)
172 case "tasks":
173 self._showTaskHierarchy(pipeline)
174 case "pipeline-graph":
175 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True)
176 case "task-graph":
177 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False)
178 case _:
179 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
180 self.handled.add(command)
182 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None:
183 """Show information associated with this graph.
185 Parameters
186 ----------
187 graph : `lsst.pipe.base.QuantumGraph`
188 Graph to use when reporting information.
189 args : `types.SimpleNamespace`, optional
190 Parsed command-line parameters. Used to obtain additional external
191 information such as the location of a usable Butler.
192 """
193 for command in self.graph_commands:
194 if command not in self.commands:
195 continue
196 match command:
197 case "graph":
198 self._showGraph(graph)
199 case "uri":
200 if args is None:
201 raise ValueError("The uri option requires additional command line arguments.")
202 self._showUri(graph, args)
203 case "workflow":
204 self._showWorkflow(graph)
205 case _:
206 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
207 self.handled.add(command)
209 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None:
210 """Show task configuration
212 Parameters
213 ----------
214 pipeline : `lsst.pipe.base.Pipeline`
215 Pipeline definition
216 showArgs : `str`
217 Defines what to show
218 dumpFullConfig : `bool`
219 If true then dump complete task configuration with all imports.
220 """
221 stream: Any = self.stream
222 if dumpFullConfig:
223 # Task label can be given with this option
224 taskName = showArgs
225 else:
226 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
227 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
228 assert matConfig is not None, "regex always matches"
229 taskName = matConfig.group(1)
230 pattern = matConfig.group(2)
231 if pattern:
232 stream = _FilteredStream(pattern, stream=stream)
234 tasks = util.filterTasks(pipeline, taskName)
235 if not tasks:
236 raise ValueError(f"Pipeline has no tasks named {taskName}")
238 for taskDef in tasks:
239 print(f"### Configuration for task `{taskDef.label}'", file=self.stream)
240 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
242 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None:
243 """Show history for task configuration.
245 Parameters
246 ----------
247 pipeline : `lsst.pipe.base.Pipeline`
248 Pipeline definition
249 showArgs : `str`
250 Defines what to show
251 """
252 taskName = None
253 pattern = None
254 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
255 if matHistory:
256 taskName = matHistory.group(1)
257 pattern = matHistory.group(2)
258 if not pattern:
259 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)")
261 tasks = util.filterTasks(pipeline, taskName)
262 if not tasks:
263 raise ValueError(f"Pipeline has no tasks named {taskName}")
265 found = False
266 for taskDef in tasks:
267 config = taskDef.config
269 # Look for any matches in the config hierarchy for this name
270 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
271 if nmatch > 0:
272 print("", file=self.stream)
274 cpath, _, cname = thisName.rpartition(".")
275 try:
276 if not cpath:
277 # looking for top-level field
278 hconfig = taskDef.config
279 else:
280 hconfig = eval("config." + cpath, {}, {"config": config})
281 except AttributeError:
282 print(
283 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
284 file=sys.stderr,
285 )
286 hconfig = None
288 # Sometimes we end up with a non-Config so skip those
289 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr(
290 hconfig, cname
291 ):
292 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream)
293 print(pexConfigHistory.format(hconfig, cname), file=self.stream)
294 found = True
296 if not found:
297 raise ValueError(f"None of the tasks has field matching {pattern}")
299 def _showTaskHierarchy(self, pipeline: Pipeline) -> None:
300 """Print task hierarchy to stdout
302 Parameters
303 ----------
304 pipeline : `lsst.pipe.base.Pipeline`
305 Pipeline definition.
306 """
307 for taskDef in pipeline.toExpandedPipeline():
308 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream)
310 for configName, taskName in util.subTaskIter(taskDef.config):
311 print(f"{configName}: {taskName}", file=self.stream)
313 def _showGraph(self, graph: QuantumGraph) -> None:
314 """Print quanta information to stdout
316 Parameters
317 ----------
318 graph : `lsst.pipe.base.QuantumGraph`
319 Execution graph.
320 """
322 def _print_refs(
323 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]],
324 datastore_records: Mapping[str, DatastoreRecordData],
325 ) -> None:
326 """Print complete information on quantum input or output refs."""
327 for key, refs in mapping.items():
328 if refs:
329 print(f" {key}:", file=self.stream)
330 for ref in refs:
331 print(f" - {ref}", file=self.stream)
332 for datastore_name, record_data in datastore_records.items():
333 if record_map := record_data.records.get(ref.id):
334 print(f" records for {datastore_name}:", file=self.stream)
335 for table_name, records in record_map.items():
336 print(f" - {table_name}:", file=self.stream)
337 for record in records:
338 print(f" - {record}:", file=self.stream)
339 else:
340 print(f" {key}: []", file=self.stream)
342 for taskNode in graph.iterTaskGraph():
343 print(taskNode, file=self.stream)
345 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)):
346 quantum = quantum_node.quantum
347 print(
348 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream
349 )
350 print(" inputs:", file=self.stream)
351 _print_refs(quantum.inputs, quantum.datastore_records)
352 print(" outputs:", file=self.stream)
353 _print_refs(quantum.outputs, quantum.datastore_records)
355 def _showWorkflow(self, graph: QuantumGraph) -> None:
356 """Print quanta information and dependency to stdout
358 Parameters
359 ----------
360 graph : `lsst.pipe.base.QuantumGraph`
361 Execution graph.
362 """
363 for node in graph:
364 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
365 for parent in graph.determineInputsToQuantumNode(node):
366 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream)
368 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None:
369 """Print input and predicted output URIs to stdout
371 Parameters
372 ----------
373 graph : `lsst.pipe.base.QuantumGraph`
374 Execution graph
375 args : `types.SimpleNamespace`
376 Parsed command line
377 """
379 def dumpURIs(thisRef: DatasetRef) -> None:
380 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
381 if primary:
382 print(f" {primary}", file=self.stream)
383 else:
384 print(" (disassembled artifact)", file=self.stream)
385 for compName, compUri in components.items():
386 print(f" {compName}: {compUri}", file=self.stream)
388 butler = _ButlerFactory.makeReadButler(args)
389 for node in graph:
390 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
391 print(" inputs:", file=self.stream)
392 for refs in node.quantum.inputs.values():
393 for ref in refs:
394 dumpURIs(ref)
395 print(" outputs:", file=self.stream)
396 for refs in node.quantum.outputs.values():
397 for ref in refs:
398 dumpURIs(ref)