Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%

198 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-20 11:05 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ShowInfo"] 

31 

32import fnmatch 

33import re 

34import sys 

35from collections import defaultdict 

36from collections.abc import Mapping 

37from types import SimpleNamespace 

38from typing import Any 

39 

40import lsst.pex.config as pexConfig 

41import lsst.pex.config.history as pexConfigHistory 

42from lsst.daf.butler import Butler, DatasetRef, DatasetType, DatastoreRecordData, NamedKeyMapping 

43from lsst.pipe.base import Pipeline, QuantumGraph 

44from lsst.pipe.base.pipeline_graph import visualization 

45 

46from . import util 

47from .cmdLineFwk import _ButlerFactory 

48 

49 

50class _FilteredStream: 

51 """A file-like object that filters some config fields. 

52 

53 Note 

54 ---- 

55 This class depends on implementation details of ``Config.saveToStream`` 

56 methods, in particular that that method uses single call to write() 

57 method to save information about single config field, and that call 

58 combines comments string(s) for a field and field path and value. 

59 This class will not work reliably on the "import" strings, so imports 

60 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

61 """ 

62 

63 def __init__(self, pattern: str, stream: Any = None) -> None: 

64 if stream is None: 

65 stream = sys.stdout 

66 self.stream = stream 

67 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

68 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

69 

70 if mat: 

71 pattern = mat.group(1) 

72 self._pattern = re.compile(fnmatch.translate(pattern)) 

73 else: 

74 if pattern != pattern.lower(): 

75 print( 

76 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)', 

77 file=self.stream, 

78 ) 

79 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

80 

81 def write(self, showStr: str) -> None: 

82 # Strip off doc string line(s) and cut off at "=" for string matching 

83 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

84 if self._pattern.search(matchStr): 

85 self.stream.write(showStr) 

86 

87 

88class ShowInfo: 

89 """Show information about a pipeline or quantum graph. 

90 

91 Parameters 

92 ---------- 

93 show : `list` [`str`] 

94 A list of show commands, some of which may have additional parameters 

95 specified using an ``=``. 

96 stream : I/O stream or None. 

97 The output stream to use. `None` will be treated as `sys.stdout`. 

98 

99 Raises 

100 ------ 

101 ValueError 

102 Raised if some show commands are not recognized. 

103 """ 

104 

105 pipeline_commands = { 

106 "pipeline", 

107 "config", 

108 "history", 

109 "tasks", 

110 "dump-config", 

111 "pipeline-graph", 

112 "task-graph", 

113 } 

114 graph_commands = {"graph", "workflow", "uri"} 

115 

116 def __init__(self, show: list[str], stream: Any = None) -> None: 

117 if stream is None: 

118 # Defer assigning sys.stdout to allow click to redefine it if 

119 # it wants. Assigning the default at class definition leads 

120 # to confusion on reassignment. 

121 stream = sys.stdout 

122 commands: dict[str, list[str]] = defaultdict(list) 

123 for value in show: 

124 command, _, args = value.partition("=") 

125 commands[command].append(args) 

126 self.commands = commands 

127 self.stream = stream 

128 self.handled: set[str] = set() 

129 

130 known = self.pipeline_commands | self.graph_commands 

131 unknown = set(commands) - known 

132 if unknown: 

133 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')") 

134 

135 @property 

136 def unhandled(self) -> frozenset[str]: 

137 """Return the commands that have not yet been processed.""" 

138 return frozenset(set(self.commands) - self.handled) 

139 

140 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None: 

141 """Display useful information about the pipeline. 

142 

143 Parameters 

144 ---------- 

145 pipeline : `lsst.pipe.base.Pipeline` 

146 The pipeline to use when reporting information. 

147 """ 

148 if butler is not None: 

149 registry = butler.registry 

150 else: 

151 registry = None 

152 for command in self.pipeline_commands: 

153 if command not in self.commands: 

154 continue 

155 args = self.commands[command] 

156 

157 match command: 

158 case "pipeline": 

159 print(pipeline, file=self.stream) 

160 case "config": 

161 for arg in args: 

162 self._showConfig(pipeline, arg, False) 

163 case "dump-config": 

164 for arg in args: 

165 self._showConfig(pipeline, arg, True) 

166 case "history": 

167 for arg in args: 

168 self._showConfigHistory(pipeline, arg) 

169 case "tasks": 

170 self._showTaskHierarchy(pipeline) 

171 case "pipeline-graph": 

172 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True) 

173 case "task-graph": 

174 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False) 

175 case _: 

176 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

177 self.handled.add(command) 

178 

179 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None: 

180 """Show information associated with this graph. 

181 

182 Parameters 

183 ---------- 

184 graph : `lsst.pipe.base.QuantumGraph` 

185 Graph to use when reporting information. 

186 args : `types.SimpleNamespace`, optional 

187 Parsed command-line parameters. Used to obtain additional external 

188 information such as the location of a usable Butler. 

189 """ 

190 for command in self.graph_commands: 

191 if command not in self.commands: 

192 continue 

193 match command: 

194 case "graph": 

195 self._showGraph(graph) 

196 case "uri": 

197 if args is None: 

198 raise ValueError("The uri option requires additional command line arguments.") 

199 self._showUri(graph, args) 

200 case "workflow": 

201 self._showWorkflow(graph) 

202 case _: 

203 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

204 self.handled.add(command) 

205 

206 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

207 """Show task configuration 

208 

209 Parameters 

210 ---------- 

211 pipeline : `lsst.pipe.base.Pipeline` 

212 Pipeline definition 

213 showArgs : `str` 

214 Defines what to show 

215 dumpFullConfig : `bool` 

216 If true then dump complete task configuration with all imports. 

217 """ 

218 stream: Any = self.stream 

219 if dumpFullConfig: 

220 # Task label can be given with this option 

221 taskName = showArgs 

222 else: 

223 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

224 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

225 assert matConfig is not None, "regex always matches" 

226 taskName = matConfig.group(1) 

227 pattern = matConfig.group(2) 

228 if pattern: 

229 stream = _FilteredStream(pattern, stream=stream) 

230 

231 tasks = util.filterTasks(pipeline, taskName) 

232 if not tasks: 

233 raise ValueError(f"Pipeline has no tasks named {taskName}") 

234 

235 for taskDef in tasks: 

236 print(f"### Configuration for task `{taskDef.label}'", file=self.stream) 

237 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

238 

239 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

240 """Show history for task configuration. 

241 

242 Parameters 

243 ---------- 

244 pipeline : `lsst.pipe.base.Pipeline` 

245 Pipeline definition 

246 showArgs : `str` 

247 Defines what to show 

248 """ 

249 taskName = None 

250 pattern = None 

251 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

252 if matHistory: 

253 taskName = matHistory.group(1) 

254 pattern = matHistory.group(2) 

255 if not pattern: 

256 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)") 

257 

258 tasks = util.filterTasks(pipeline, taskName) 

259 if not tasks: 

260 raise ValueError(f"Pipeline has no tasks named {taskName}") 

261 

262 found = False 

263 for taskDef in tasks: 

264 config = taskDef.config 

265 

266 # Look for any matches in the config hierarchy for this name 

267 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

268 if nmatch > 0: 

269 print("", file=self.stream) 

270 

271 cpath, _, cname = thisName.rpartition(".") 

272 try: 

273 if not cpath: 

274 # looking for top-level field 

275 hconfig = taskDef.config 

276 else: 

277 hconfig = eval("config." + cpath, {}, {"config": config}) 

278 except AttributeError: 

279 print( 

280 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

281 file=sys.stderr, 

282 ) 

283 hconfig = None 

284 

285 # Sometimes we end up with a non-Config so skip those 

286 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr( 

287 hconfig, cname 

288 ): 

289 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream) 

290 print(pexConfigHistory.format(hconfig, cname), file=self.stream) 

291 found = True 

292 

293 if not found: 

294 raise ValueError(f"None of the tasks has field matching {pattern}") 

295 

296 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

297 """Print task hierarchy to stdout 

298 

299 Parameters 

300 ---------- 

301 pipeline: `lsst.pipe.base.Pipeline` 

302 Pipeline definition. 

303 """ 

304 for taskDef in pipeline.toExpandedPipeline(): 

305 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream) 

306 

307 for configName, taskName in util.subTaskIter(taskDef.config): 

308 print(f"{configName}: {taskName}", file=self.stream) 

309 

310 def _showGraph(self, graph: QuantumGraph) -> None: 

311 """Print quanta information to stdout 

312 

313 Parameters 

314 ---------- 

315 graph : `lsst.pipe.base.QuantumGraph` 

316 Execution graph. 

317 """ 

318 

319 def _print_refs( 

320 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]], 

321 datastore_records: Mapping[str, DatastoreRecordData], 

322 ) -> None: 

323 """Print complete information on quantum input or output refs.""" 

324 for key, refs in mapping.items(): 

325 if refs: 

326 print(f" {key}:", file=self.stream) 

327 for ref in refs: 

328 print(f" - {ref}", file=self.stream) 

329 for datastore_name, record_data in datastore_records.items(): 

330 if record_map := record_data.records.get(ref.id): 

331 print(f" records for {datastore_name}:", file=self.stream) 

332 for table_name, records in record_map.items(): 

333 print(f" - {table_name}:", file=self.stream) 

334 for record in records: 

335 print(f" - {record}:", file=self.stream) 

336 else: 

337 print(f" {key}: []", file=self.stream) 

338 

339 for taskNode in graph.iterTaskGraph(): 

340 print(taskNode, file=self.stream) 

341 

342 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)): 

343 quantum = quantum_node.quantum 

344 print( 

345 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream 

346 ) 

347 print(" inputs:", file=self.stream) 

348 _print_refs(quantum.inputs, quantum.datastore_records) 

349 print(" outputs:", file=self.stream) 

350 _print_refs(quantum.outputs, quantum.datastore_records) 

351 

352 def _showWorkflow(self, graph: QuantumGraph) -> None: 

353 """Print quanta information and dependency to stdout 

354 

355 Parameters 

356 ---------- 

357 graph : `lsst.pipe.base.QuantumGraph` 

358 Execution graph. 

359 """ 

360 for node in graph: 

361 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

362 for parent in graph.determineInputsToQuantumNode(node): 

363 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream) 

364 

365 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

366 """Print input and predicted output URIs to stdout 

367 

368 Parameters 

369 ---------- 

370 graph : `lsst.pipe.base.QuantumGraph` 

371 Execution graph 

372 args : `types.SimpleNamespace` 

373 Parsed command line 

374 """ 

375 

376 def dumpURIs(thisRef: DatasetRef) -> None: 

377 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

378 if primary: 

379 print(f" {primary}", file=self.stream) 

380 else: 

381 print(" (disassembled artifact)", file=self.stream) 

382 for compName, compUri in components.items(): 

383 print(f" {compName}: {compUri}", file=self.stream) 

384 

385 butler = _ButlerFactory.makeReadButler(args) 

386 for node in graph: 

387 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

388 print(" inputs:", file=self.stream) 

389 for refs in node.quantum.inputs.values(): 

390 for ref in refs: 

391 dumpURIs(ref) 

392 print(" outputs:", file=self.stream) 

393 for refs in node.quantum.outputs.values(): 

394 for ref in refs: 

395 dumpURIs(ref)