Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%

186 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-06 02:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ShowInfo"] 

25 

26import fnmatch 

27import re 

28import sys 

29from collections import defaultdict 

30from collections.abc import Mapping 

31from types import SimpleNamespace 

32from typing import Any 

33 

34import lsst.pex.config as pexConfig 

35import lsst.pex.config.history as pexConfigHistory 

36from lsst.daf.butler import DatasetRef, DatasetType, DatastoreRecordData, NamedKeyMapping 

37from lsst.pipe.base import Pipeline, QuantumGraph 

38 

39from . import util 

40from .cmdLineFwk import _ButlerFactory 

41 

42 

43class _FilteredStream: 

44 """A file-like object that filters some config fields. 

45 

46 Note 

47 ---- 

48 This class depends on implementation details of ``Config.saveToStream`` 

49 methods, in particular that that method uses single call to write() 

50 method to save information about single config field, and that call 

51 combines comments string(s) for a field and field path and value. 

52 This class will not work reliably on the "import" strings, so imports 

53 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

54 """ 

55 

56 def __init__(self, pattern: str, stream: Any = None) -> None: 

57 if stream is None: 

58 stream = sys.stdout 

59 self.stream = stream 

60 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

61 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

62 

63 if mat: 

64 pattern = mat.group(1) 

65 self._pattern = re.compile(fnmatch.translate(pattern)) 

66 else: 

67 if pattern != pattern.lower(): 

68 print( 

69 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)', 

70 file=self.stream, 

71 ) 

72 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

73 

74 def write(self, showStr: str) -> None: 

75 # Strip off doc string line(s) and cut off at "=" for string matching 

76 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

77 if self._pattern.search(matchStr): 

78 self.stream.write(showStr) 

79 

80 

81class ShowInfo: 

82 """Show information about a pipeline or quantum graph. 

83 

84 Parameters 

85 ---------- 

86 show : `list` [`str`] 

87 A list of show commands, some of which may have additional parameters 

88 specified using an ``=``. 

89 stream : I/O stream or None. 

90 The output stream to use. `None` will be treated as `sys.stdout`. 

91 

92 Raises 

93 ------ 

94 ValueError 

95 Raised if some show commands are not recognized. 

96 """ 

97 

98 pipeline_commands = {"pipeline", "config", "history", "tasks", "dump-config"} 

99 graph_commands = {"graph", "workflow", "uri"} 

100 

101 def __init__(self, show: list[str], stream: Any = None) -> None: 

102 if stream is None: 

103 # Defer assigning sys.stdout to allow click to redefine it if 

104 # it wants. Assigning the default at class definition leads 

105 # to confusion on reassignment. 

106 stream = sys.stdout 

107 commands: dict[str, list[str]] = defaultdict(list) 

108 for value in show: 

109 command, _, args = value.partition("=") 

110 commands[command].append(args) 

111 self.commands = commands 

112 self.stream = stream 

113 self.handled: set[str] = set() 

114 

115 known = self.pipeline_commands | self.graph_commands 

116 unknown = set(commands) - known 

117 if unknown: 

118 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')") 

119 

120 @property 

121 def unhandled(self) -> frozenset[str]: 

122 """Return the commands that have not yet been processed.""" 

123 return frozenset(set(self.commands) - self.handled) 

124 

125 def show_pipeline_info(self, pipeline: Pipeline) -> None: 

126 """Display useful information about the pipeline. 

127 

128 Parameters 

129 ---------- 

130 pipeline : `lsst.pipe.base.Pipeline` 

131 The pipeline to use when reporting information. 

132 """ 

133 for command in self.pipeline_commands: 

134 if command not in self.commands: 

135 continue 

136 args = self.commands[command] 

137 

138 if command == "pipeline": 

139 print(pipeline, file=self.stream) 

140 elif command == "config": 

141 for arg in args: 

142 self._showConfig(pipeline, arg, False) 

143 elif command == "dump-config": 

144 for arg in args: 

145 self._showConfig(pipeline, arg, True) 

146 elif command == "history": 

147 for arg in args: 

148 self._showConfigHistory(pipeline, arg) 

149 elif command == "tasks": 

150 self._showTaskHierarchy(pipeline) 

151 else: 

152 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

153 self.handled.add(command) 

154 

155 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None: 

156 """Show information associated with this graph. 

157 

158 Parameters 

159 ---------- 

160 graph : `lsst.pipe.base.QuantumGraph` 

161 Graph to use when reporting information. 

162 args : `types.SimpleNamespace`, optional 

163 Parsed command-line parameters. Used to obtain additional external 

164 information such as the location of a usable Butler. 

165 """ 

166 for command in self.graph_commands: 

167 if command not in self.commands: 

168 continue 

169 if command == "graph": 

170 self._showGraph(graph) 

171 elif command == "uri": 

172 if args is None: 

173 raise ValueError("The uri option requires additional command line arguments.") 

174 self._showUri(graph, args) 

175 elif command == "workflow": 

176 self._showWorkflow(graph) 

177 else: 

178 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

179 self.handled.add(command) 

180 

181 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

182 """Show task configuration 

183 

184 Parameters 

185 ---------- 

186 pipeline : `lsst.pipe.base.Pipeline` 

187 Pipeline definition 

188 showArgs : `str` 

189 Defines what to show 

190 dumpFullConfig : `bool` 

191 If true then dump complete task configuration with all imports. 

192 """ 

193 stream: Any = self.stream 

194 if dumpFullConfig: 

195 # Task label can be given with this option 

196 taskName = showArgs 

197 else: 

198 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

199 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

200 assert matConfig is not None, "regex always matches" 

201 taskName = matConfig.group(1) 

202 pattern = matConfig.group(2) 

203 if pattern: 

204 stream = _FilteredStream(pattern, stream=stream) 

205 

206 tasks = util.filterTasks(pipeline, taskName) 

207 if not tasks: 

208 raise ValueError(f"Pipeline has no tasks named {taskName}") 

209 

210 for taskDef in tasks: 

211 print(f"### Configuration for task `{taskDef.label}'", file=self.stream) 

212 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

213 

214 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

215 """Show history for task configuration. 

216 

217 Parameters 

218 ---------- 

219 pipeline : `lsst.pipe.base.Pipeline` 

220 Pipeline definition 

221 showArgs : `str` 

222 Defines what to show 

223 """ 

224 taskName = None 

225 pattern = None 

226 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

227 if matHistory: 

228 taskName = matHistory.group(1) 

229 pattern = matHistory.group(2) 

230 if not pattern: 

231 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)") 

232 

233 tasks = util.filterTasks(pipeline, taskName) 

234 if not tasks: 

235 raise ValueError(f"Pipeline has no tasks named {taskName}") 

236 

237 found = False 

238 for taskDef in tasks: 

239 config = taskDef.config 

240 

241 # Look for any matches in the config hierarchy for this name 

242 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

243 if nmatch > 0: 

244 print("", file=self.stream) 

245 

246 cpath, _, cname = thisName.rpartition(".") 

247 try: 

248 if not cpath: 

249 # looking for top-level field 

250 hconfig = taskDef.config 

251 else: 

252 hconfig = eval("config." + cpath, {}, {"config": config}) 

253 except AttributeError: 

254 print( 

255 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

256 file=sys.stderr, 

257 ) 

258 hconfig = None 

259 

260 # Sometimes we end up with a non-Config so skip those 

261 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr( 

262 hconfig, cname 

263 ): 

264 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream) 

265 print(pexConfigHistory.format(hconfig, cname), file=self.stream) 

266 found = True 

267 

268 if not found: 

269 raise ValueError(f"None of the tasks has field matching {pattern}") 

270 

271 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

272 """Print task hierarchy to stdout 

273 

274 Parameters 

275 ---------- 

276 pipeline: `lsst.pipe.base.Pipeline` 

277 Pipeline definition. 

278 """ 

279 for taskDef in pipeline.toExpandedPipeline(): 

280 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream) 

281 

282 for configName, taskName in util.subTaskIter(taskDef.config): 

283 print(f"{configName}: {taskName}", file=self.stream) 

284 

285 def _showGraph(self, graph: QuantumGraph) -> None: 

286 """Print quanta information to stdout 

287 

288 Parameters 

289 ---------- 

290 graph : `lsst.pipe.base.QuantumGraph` 

291 Execution graph. 

292 """ 

293 

294 def _print_refs( 

295 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]], 

296 datastore_records: Mapping[str, DatastoreRecordData], 

297 ) -> None: 

298 """Print complete information on quantum input or output refs.""" 

299 for key, refs in mapping.items(): 

300 if refs: 

301 print(f" {key}:", file=self.stream) 

302 for ref in refs: 

303 print(f" - {ref}", file=self.stream) 

304 for datastore_name, record_data in datastore_records.items(): 

305 if record_map := record_data.records.get(ref.id): 

306 print(f" records for {datastore_name}:", file=self.stream) 

307 for table_name, records in record_map.items(): 

308 print(f" - {table_name}:", file=self.stream) 

309 for record in records: 

310 print(f" - {record}:", file=self.stream) 

311 else: 

312 print(f" {key}: []", file=self.stream) 

313 

314 for taskNode in graph.iterTaskGraph(): 

315 print(taskNode, file=self.stream) 

316 

317 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)): 

318 quantum = quantum_node.quantum 

319 print( 

320 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream 

321 ) 

322 print(" inputs:", file=self.stream) 

323 _print_refs(quantum.inputs, quantum.datastore_records) 

324 print(" outputs:", file=self.stream) 

325 _print_refs(quantum.outputs, quantum.datastore_records) 

326 

327 def _showWorkflow(self, graph: QuantumGraph) -> None: 

328 """Print quanta information and dependency to stdout 

329 

330 Parameters 

331 ---------- 

332 graph : `lsst.pipe.base.QuantumGraph` 

333 Execution graph. 

334 """ 

335 for node in graph: 

336 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

337 for parent in graph.determineInputsToQuantumNode(node): 

338 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream) 

339 

340 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

341 """Print input and predicted output URIs to stdout 

342 

343 Parameters 

344 ---------- 

345 graph : `lsst.pipe.base.QuantumGraph` 

346 Execution graph 

347 args : `types.SimpleNamespace` 

348 Parsed command line 

349 """ 

350 

351 def dumpURIs(thisRef: DatasetRef) -> None: 

352 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

353 if primary: 

354 print(f" {primary}", file=self.stream) 

355 else: 

356 print(" (disassembled artifact)", file=self.stream) 

357 for compName, compUri in components.items(): 

358 print(f" {compName}: {compUri}", file=self.stream) 

359 

360 butler = _ButlerFactory.makeReadButler(args) 

361 for node in graph: 

362 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

363 print(" inputs:", file=self.stream) 

364 for refs in node.quantum.inputs.values(): 

365 for ref in refs: 

366 dumpURIs(ref) 

367 print(" outputs:", file=self.stream) 

368 for refs in node.quantum.outputs.values(): 

369 for ref in refs: 

370 dumpURIs(ref)