Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%
198 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 11:05 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 11:05 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ShowInfo"]
32import fnmatch
33import re
34import sys
35from collections import defaultdict
36from collections.abc import Mapping
37from types import SimpleNamespace
38from typing import Any
40import lsst.pex.config as pexConfig
41import lsst.pex.config.history as pexConfigHistory
42from lsst.daf.butler import Butler, DatasetRef, DatasetType, DatastoreRecordData, NamedKeyMapping
43from lsst.pipe.base import Pipeline, QuantumGraph
44from lsst.pipe.base.pipeline_graph import visualization
46from . import util
47from .cmdLineFwk import _ButlerFactory
50class _FilteredStream:
51 """A file-like object that filters some config fields.
53 Note
54 ----
55 This class depends on implementation details of ``Config.saveToStream``
56 methods, in particular that that method uses single call to write()
57 method to save information about single config field, and that call
58 combines comments string(s) for a field and field path and value.
59 This class will not work reliably on the "import" strings, so imports
60 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
61 """
63 def __init__(self, pattern: str, stream: Any = None) -> None:
64 if stream is None:
65 stream = sys.stdout
66 self.stream = stream
67 # obey case if pattern isn't lowercase or requests NOIGNORECASE
68 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
70 if mat:
71 pattern = mat.group(1)
72 self._pattern = re.compile(fnmatch.translate(pattern))
73 else:
74 if pattern != pattern.lower():
75 print(
76 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)',
77 file=self.stream,
78 )
79 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
81 def write(self, showStr: str) -> None:
82 # Strip off doc string line(s) and cut off at "=" for string matching
83 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
84 if self._pattern.search(matchStr):
85 self.stream.write(showStr)
88class ShowInfo:
89 """Show information about a pipeline or quantum graph.
91 Parameters
92 ----------
93 show : `list` [`str`]
94 A list of show commands, some of which may have additional parameters
95 specified using an ``=``.
96 stream : I/O stream or None.
97 The output stream to use. `None` will be treated as `sys.stdout`.
99 Raises
100 ------
101 ValueError
102 Raised if some show commands are not recognized.
103 """
105 pipeline_commands = {
106 "pipeline",
107 "config",
108 "history",
109 "tasks",
110 "dump-config",
111 "pipeline-graph",
112 "task-graph",
113 }
114 graph_commands = {"graph", "workflow", "uri"}
116 def __init__(self, show: list[str], stream: Any = None) -> None:
117 if stream is None:
118 # Defer assigning sys.stdout to allow click to redefine it if
119 # it wants. Assigning the default at class definition leads
120 # to confusion on reassignment.
121 stream = sys.stdout
122 commands: dict[str, list[str]] = defaultdict(list)
123 for value in show:
124 command, _, args = value.partition("=")
125 commands[command].append(args)
126 self.commands = commands
127 self.stream = stream
128 self.handled: set[str] = set()
130 known = self.pipeline_commands | self.graph_commands
131 unknown = set(commands) - known
132 if unknown:
133 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')")
135 @property
136 def unhandled(self) -> frozenset[str]:
137 """Return the commands that have not yet been processed."""
138 return frozenset(set(self.commands) - self.handled)
140 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None:
141 """Display useful information about the pipeline.
143 Parameters
144 ----------
145 pipeline : `lsst.pipe.base.Pipeline`
146 The pipeline to use when reporting information.
147 """
148 if butler is not None:
149 registry = butler.registry
150 else:
151 registry = None
152 for command in self.pipeline_commands:
153 if command not in self.commands:
154 continue
155 args = self.commands[command]
157 match command:
158 case "pipeline":
159 print(pipeline, file=self.stream)
160 case "config":
161 for arg in args:
162 self._showConfig(pipeline, arg, False)
163 case "dump-config":
164 for arg in args:
165 self._showConfig(pipeline, arg, True)
166 case "history":
167 for arg in args:
168 self._showConfigHistory(pipeline, arg)
169 case "tasks":
170 self._showTaskHierarchy(pipeline)
171 case "pipeline-graph":
172 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True)
173 case "task-graph":
174 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False)
175 case _:
176 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
177 self.handled.add(command)
179 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None:
180 """Show information associated with this graph.
182 Parameters
183 ----------
184 graph : `lsst.pipe.base.QuantumGraph`
185 Graph to use when reporting information.
186 args : `types.SimpleNamespace`, optional
187 Parsed command-line parameters. Used to obtain additional external
188 information such as the location of a usable Butler.
189 """
190 for command in self.graph_commands:
191 if command not in self.commands:
192 continue
193 match command:
194 case "graph":
195 self._showGraph(graph)
196 case "uri":
197 if args is None:
198 raise ValueError("The uri option requires additional command line arguments.")
199 self._showUri(graph, args)
200 case "workflow":
201 self._showWorkflow(graph)
202 case _:
203 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
204 self.handled.add(command)
206 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None:
207 """Show task configuration
209 Parameters
210 ----------
211 pipeline : `lsst.pipe.base.Pipeline`
212 Pipeline definition
213 showArgs : `str`
214 Defines what to show
215 dumpFullConfig : `bool`
216 If true then dump complete task configuration with all imports.
217 """
218 stream: Any = self.stream
219 if dumpFullConfig:
220 # Task label can be given with this option
221 taskName = showArgs
222 else:
223 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
224 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
225 assert matConfig is not None, "regex always matches"
226 taskName = matConfig.group(1)
227 pattern = matConfig.group(2)
228 if pattern:
229 stream = _FilteredStream(pattern, stream=stream)
231 tasks = util.filterTasks(pipeline, taskName)
232 if not tasks:
233 raise ValueError(f"Pipeline has no tasks named {taskName}")
235 for taskDef in tasks:
236 print(f"### Configuration for task `{taskDef.label}'", file=self.stream)
237 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
239 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None:
240 """Show history for task configuration.
242 Parameters
243 ----------
244 pipeline : `lsst.pipe.base.Pipeline`
245 Pipeline definition
246 showArgs : `str`
247 Defines what to show
248 """
249 taskName = None
250 pattern = None
251 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
252 if matHistory:
253 taskName = matHistory.group(1)
254 pattern = matHistory.group(2)
255 if not pattern:
256 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)")
258 tasks = util.filterTasks(pipeline, taskName)
259 if not tasks:
260 raise ValueError(f"Pipeline has no tasks named {taskName}")
262 found = False
263 for taskDef in tasks:
264 config = taskDef.config
266 # Look for any matches in the config hierarchy for this name
267 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
268 if nmatch > 0:
269 print("", file=self.stream)
271 cpath, _, cname = thisName.rpartition(".")
272 try:
273 if not cpath:
274 # looking for top-level field
275 hconfig = taskDef.config
276 else:
277 hconfig = eval("config." + cpath, {}, {"config": config})
278 except AttributeError:
279 print(
280 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
281 file=sys.stderr,
282 )
283 hconfig = None
285 # Sometimes we end up with a non-Config so skip those
286 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr(
287 hconfig, cname
288 ):
289 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream)
290 print(pexConfigHistory.format(hconfig, cname), file=self.stream)
291 found = True
293 if not found:
294 raise ValueError(f"None of the tasks has field matching {pattern}")
296 def _showTaskHierarchy(self, pipeline: Pipeline) -> None:
297 """Print task hierarchy to stdout
299 Parameters
300 ----------
301 pipeline: `lsst.pipe.base.Pipeline`
302 Pipeline definition.
303 """
304 for taskDef in pipeline.toExpandedPipeline():
305 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream)
307 for configName, taskName in util.subTaskIter(taskDef.config):
308 print(f"{configName}: {taskName}", file=self.stream)
310 def _showGraph(self, graph: QuantumGraph) -> None:
311 """Print quanta information to stdout
313 Parameters
314 ----------
315 graph : `lsst.pipe.base.QuantumGraph`
316 Execution graph.
317 """
319 def _print_refs(
320 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]],
321 datastore_records: Mapping[str, DatastoreRecordData],
322 ) -> None:
323 """Print complete information on quantum input or output refs."""
324 for key, refs in mapping.items():
325 if refs:
326 print(f" {key}:", file=self.stream)
327 for ref in refs:
328 print(f" - {ref}", file=self.stream)
329 for datastore_name, record_data in datastore_records.items():
330 if record_map := record_data.records.get(ref.id):
331 print(f" records for {datastore_name}:", file=self.stream)
332 for table_name, records in record_map.items():
333 print(f" - {table_name}:", file=self.stream)
334 for record in records:
335 print(f" - {record}:", file=self.stream)
336 else:
337 print(f" {key}: []", file=self.stream)
339 for taskNode in graph.iterTaskGraph():
340 print(taskNode, file=self.stream)
342 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)):
343 quantum = quantum_node.quantum
344 print(
345 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream
346 )
347 print(" inputs:", file=self.stream)
348 _print_refs(quantum.inputs, quantum.datastore_records)
349 print(" outputs:", file=self.stream)
350 _print_refs(quantum.outputs, quantum.datastore_records)
352 def _showWorkflow(self, graph: QuantumGraph) -> None:
353 """Print quanta information and dependency to stdout
355 Parameters
356 ----------
357 graph : `lsst.pipe.base.QuantumGraph`
358 Execution graph.
359 """
360 for node in graph:
361 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
362 for parent in graph.determineInputsToQuantumNode(node):
363 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream)
365 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None:
366 """Print input and predicted output URIs to stdout
368 Parameters
369 ----------
370 graph : `lsst.pipe.base.QuantumGraph`
371 Execution graph
372 args : `types.SimpleNamespace`
373 Parsed command line
374 """
376 def dumpURIs(thisRef: DatasetRef) -> None:
377 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
378 if primary:
379 print(f" {primary}", file=self.stream)
380 else:
381 print(" (disassembled artifact)", file=self.stream)
382 for compName, compUri in components.items():
383 print(f" {compName}: {compUri}", file=self.stream)
385 butler = _ButlerFactory.makeReadButler(args)
386 for node in graph:
387 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
388 print(" inputs:", file=self.stream)
389 for refs in node.quantum.inputs.values():
390 for ref in refs:
391 dumpURIs(ref)
392 print(" outputs:", file=self.stream)
393 for refs in node.quantum.outputs.values():
394 for ref in refs:
395 dumpURIs(ref)