Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%
199 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-18 09:41 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-18 09:41 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ShowInfo"]
32import fnmatch
33import re
34import sys
35from collections import defaultdict
36from collections.abc import Mapping
37from types import SimpleNamespace
38from typing import Any
40import lsst.pex.config as pexConfig
41import lsst.pex.config.history as pexConfigHistory
42from lsst.daf.butler import Butler, DatasetRef, DatasetType, NamedKeyMapping
43from lsst.daf.butler.datastore.record_data import DatastoreRecordData
44from lsst.pipe.base import Pipeline, QuantumGraph
45from lsst.pipe.base.pipeline_graph import visualization
47from . import util
48from .cmdLineFwk import _ButlerFactory
51class _FilteredStream:
52 """A file-like object that filters some config fields.
54 Note
55 ----
56 This class depends on implementation details of ``Config.saveToStream``
57 methods, in particular that that method uses single call to write()
58 method to save information about single config field, and that call
59 combines comments string(s) for a field and field path and value.
60 This class will not work reliably on the "import" strings, so imports
61 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
62 """
64 def __init__(self, pattern: str, stream: Any = None) -> None:
65 if stream is None:
66 stream = sys.stdout
67 self.stream = stream
68 # obey case if pattern isn't lowercase or requests NOIGNORECASE
69 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
71 if mat:
72 pattern = mat.group(1)
73 self._pattern = re.compile(fnmatch.translate(pattern))
74 else:
75 if pattern != pattern.lower():
76 print(
77 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)',
78 file=self.stream,
79 )
80 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
82 def write(self, showStr: str) -> None:
83 # Strip off doc string line(s) and cut off at "=" for string matching
84 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
85 if self._pattern.search(matchStr):
86 self.stream.write(showStr)
89class ShowInfo:
90 """Show information about a pipeline or quantum graph.
92 Parameters
93 ----------
94 show : `list` [`str`]
95 A list of show commands, some of which may have additional parameters
96 specified using an ``=``.
97 stream : I/O stream or None.
98 The output stream to use. `None` will be treated as `sys.stdout`.
100 Raises
101 ------
102 ValueError
103 Raised if some show commands are not recognized.
104 """
106 pipeline_commands = {
107 "pipeline",
108 "config",
109 "history",
110 "tasks",
111 "dump-config",
112 "pipeline-graph",
113 "task-graph",
114 }
115 graph_commands = {"graph", "workflow", "uri"}
117 def __init__(self, show: list[str], stream: Any = None) -> None:
118 if stream is None:
119 # Defer assigning sys.stdout to allow click to redefine it if
120 # it wants. Assigning the default at class definition leads
121 # to confusion on reassignment.
122 stream = sys.stdout
123 commands: dict[str, list[str]] = defaultdict(list)
124 for value in show:
125 command, _, args = value.partition("=")
126 commands[command].append(args)
127 self.commands = commands
128 self.stream = stream
129 self.handled: set[str] = set()
131 known = self.pipeline_commands | self.graph_commands
132 unknown = set(commands) - known
133 if unknown:
134 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')")
136 @property
137 def unhandled(self) -> frozenset[str]:
138 """Return the commands that have not yet been processed."""
139 return frozenset(set(self.commands) - self.handled)
141 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None:
142 """Display useful information about the pipeline.
144 Parameters
145 ----------
146 pipeline : `lsst.pipe.base.Pipeline`
147 The pipeline to use when reporting information.
148 """
149 if butler is not None:
150 registry = butler.registry
151 else:
152 registry = None
153 for command in self.pipeline_commands:
154 if command not in self.commands:
155 continue
156 args = self.commands[command]
158 match command:
159 case "pipeline":
160 print(pipeline, file=self.stream)
161 case "config":
162 for arg in args:
163 self._showConfig(pipeline, arg, False)
164 case "dump-config":
165 for arg in args:
166 self._showConfig(pipeline, arg, True)
167 case "history":
168 for arg in args:
169 self._showConfigHistory(pipeline, arg)
170 case "tasks":
171 self._showTaskHierarchy(pipeline)
172 case "pipeline-graph":
173 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True)
174 case "task-graph":
175 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False)
176 case _:
177 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
178 self.handled.add(command)
180 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None:
181 """Show information associated with this graph.
183 Parameters
184 ----------
185 graph : `lsst.pipe.base.QuantumGraph`
186 Graph to use when reporting information.
187 args : `types.SimpleNamespace`, optional
188 Parsed command-line parameters. Used to obtain additional external
189 information such as the location of a usable Butler.
190 """
191 for command in self.graph_commands:
192 if command not in self.commands:
193 continue
194 match command:
195 case "graph":
196 self._showGraph(graph)
197 case "uri":
198 if args is None:
199 raise ValueError("The uri option requires additional command line arguments.")
200 self._showUri(graph, args)
201 case "workflow":
202 self._showWorkflow(graph)
203 case _:
204 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.")
205 self.handled.add(command)
207 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None:
208 """Show task configuration
210 Parameters
211 ----------
212 pipeline : `lsst.pipe.base.Pipeline`
213 Pipeline definition
214 showArgs : `str`
215 Defines what to show
216 dumpFullConfig : `bool`
217 If true then dump complete task configuration with all imports.
218 """
219 stream: Any = self.stream
220 if dumpFullConfig:
221 # Task label can be given with this option
222 taskName = showArgs
223 else:
224 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
225 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
226 assert matConfig is not None, "regex always matches"
227 taskName = matConfig.group(1)
228 pattern = matConfig.group(2)
229 if pattern:
230 stream = _FilteredStream(pattern, stream=stream)
232 tasks = util.filterTasks(pipeline, taskName)
233 if not tasks:
234 raise ValueError(f"Pipeline has no tasks named {taskName}")
236 for taskDef in tasks:
237 print(f"### Configuration for task `{taskDef.label}'", file=self.stream)
238 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
240 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None:
241 """Show history for task configuration.
243 Parameters
244 ----------
245 pipeline : `lsst.pipe.base.Pipeline`
246 Pipeline definition
247 showArgs : `str`
248 Defines what to show
249 """
250 taskName = None
251 pattern = None
252 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
253 if matHistory:
254 taskName = matHistory.group(1)
255 pattern = matHistory.group(2)
256 if not pattern:
257 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)")
259 tasks = util.filterTasks(pipeline, taskName)
260 if not tasks:
261 raise ValueError(f"Pipeline has no tasks named {taskName}")
263 found = False
264 for taskDef in tasks:
265 config = taskDef.config
267 # Look for any matches in the config hierarchy for this name
268 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
269 if nmatch > 0:
270 print("", file=self.stream)
272 cpath, _, cname = thisName.rpartition(".")
273 try:
274 if not cpath:
275 # looking for top-level field
276 hconfig = taskDef.config
277 else:
278 hconfig = eval("config." + cpath, {}, {"config": config})
279 except AttributeError:
280 print(
281 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
282 file=sys.stderr,
283 )
284 hconfig = None
286 # Sometimes we end up with a non-Config so skip those
287 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr(
288 hconfig, cname
289 ):
290 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream)
291 print(pexConfigHistory.format(hconfig, cname), file=self.stream)
292 found = True
294 if not found:
295 raise ValueError(f"None of the tasks has field matching {pattern}")
297 def _showTaskHierarchy(self, pipeline: Pipeline) -> None:
298 """Print task hierarchy to stdout
300 Parameters
301 ----------
302 pipeline: `lsst.pipe.base.Pipeline`
303 Pipeline definition.
304 """
305 for taskDef in pipeline.toExpandedPipeline():
306 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream)
308 for configName, taskName in util.subTaskIter(taskDef.config):
309 print(f"{configName}: {taskName}", file=self.stream)
311 def _showGraph(self, graph: QuantumGraph) -> None:
312 """Print quanta information to stdout
314 Parameters
315 ----------
316 graph : `lsst.pipe.base.QuantumGraph`
317 Execution graph.
318 """
320 def _print_refs(
321 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]],
322 datastore_records: Mapping[str, DatastoreRecordData],
323 ) -> None:
324 """Print complete information on quantum input or output refs."""
325 for key, refs in mapping.items():
326 if refs:
327 print(f" {key}:", file=self.stream)
328 for ref in refs:
329 print(f" - {ref}", file=self.stream)
330 for datastore_name, record_data in datastore_records.items():
331 if record_map := record_data.records.get(ref.id):
332 print(f" records for {datastore_name}:", file=self.stream)
333 for table_name, records in record_map.items():
334 print(f" - {table_name}:", file=self.stream)
335 for record in records:
336 print(f" - {record}:", file=self.stream)
337 else:
338 print(f" {key}: []", file=self.stream)
340 for taskNode in graph.iterTaskGraph():
341 print(taskNode, file=self.stream)
343 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)):
344 quantum = quantum_node.quantum
345 print(
346 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream
347 )
348 print(" inputs:", file=self.stream)
349 _print_refs(quantum.inputs, quantum.datastore_records)
350 print(" outputs:", file=self.stream)
351 _print_refs(quantum.outputs, quantum.datastore_records)
353 def _showWorkflow(self, graph: QuantumGraph) -> None:
354 """Print quanta information and dependency to stdout
356 Parameters
357 ----------
358 graph : `lsst.pipe.base.QuantumGraph`
359 Execution graph.
360 """
361 for node in graph:
362 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
363 for parent in graph.determineInputsToQuantumNode(node):
364 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream)
366 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None:
367 """Print input and predicted output URIs to stdout
369 Parameters
370 ----------
371 graph : `lsst.pipe.base.QuantumGraph`
372 Execution graph
373 args : `types.SimpleNamespace`
374 Parsed command line
375 """
377 def dumpURIs(thisRef: DatasetRef) -> None:
378 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
379 if primary:
380 print(f" {primary}", file=self.stream)
381 else:
382 print(" (disassembled artifact)", file=self.stream)
383 for compName, compUri in components.items():
384 print(f" {compName}: {compUri}", file=self.stream)
386 butler = _ButlerFactory.makeReadButler(args)
387 for node in graph:
388 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream)
389 print(" inputs:", file=self.stream)
390 for refs in node.quantum.inputs.values():
391 for ref in refs:
392 dumpURIs(ref)
393 print(" outputs:", file=self.stream)
394 for refs in node.quantum.outputs.values():
395 for ref in refs:
396 dumpURIs(ref)