Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%

199 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-28 03:02 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ShowInfo"] 

31 

32import fnmatch 

33import re 

34import sys 

35from collections import defaultdict 

36from collections.abc import Mapping 

37from types import SimpleNamespace 

38from typing import Any 

39 

40import lsst.pex.config as pexConfig 

41import lsst.pex.config.history as pexConfigHistory 

42from lsst.daf.butler import Butler, DatasetRef, DatasetType, NamedKeyMapping 

43from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

44from lsst.pipe.base import Pipeline, QuantumGraph 

45from lsst.pipe.base.pipeline_graph import visualization 

46 

47from . import util 

48from .cmdLineFwk import _ButlerFactory 

49 

50 

51class _FilteredStream: 

52 """A file-like object that filters some config fields. 

53 

54 Note 

55 ---- 

56 This class depends on implementation details of ``Config.saveToStream`` 

57 methods, in particular that that method uses single call to write() 

58 method to save information about single config field, and that call 

59 combines comments string(s) for a field and field path and value. 

60 This class will not work reliably on the "import" strings, so imports 

61 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

62 """ 

63 

64 def __init__(self, pattern: str, stream: Any = None) -> None: 

65 if stream is None: 

66 stream = sys.stdout 

67 self.stream = stream 

68 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

69 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

70 

71 if mat: 

72 pattern = mat.group(1) 

73 self._pattern = re.compile(fnmatch.translate(pattern)) 

74 else: 

75 if pattern != pattern.lower(): 

76 print( 

77 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)', 

78 file=self.stream, 

79 ) 

80 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

81 

82 def write(self, showStr: str) -> None: 

83 # Strip off doc string line(s) and cut off at "=" for string matching 

84 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

85 if self._pattern.search(matchStr): 

86 self.stream.write(showStr) 

87 

88 

89class ShowInfo: 

90 """Show information about a pipeline or quantum graph. 

91 

92 Parameters 

93 ---------- 

94 show : `list` [`str`] 

95 A list of show commands, some of which may have additional parameters 

96 specified using an ``=``. 

97 stream : I/O stream or None 

98 The output stream to use. `None` will be treated as `sys.stdout`. 

99 

100 Raises 

101 ------ 

102 ValueError 

103 Raised if some show commands are not recognized. 

104 """ 

105 

106 pipeline_commands = { 

107 "pipeline", 

108 "config", 

109 "history", 

110 "tasks", 

111 "dump-config", 

112 "pipeline-graph", 

113 "task-graph", 

114 } 

115 graph_commands = {"graph", "workflow", "uri"} 

116 

117 def __init__(self, show: list[str], stream: Any = None) -> None: 

118 if stream is None: 

119 # Defer assigning sys.stdout to allow click to redefine it if 

120 # it wants. Assigning the default at class definition leads 

121 # to confusion on reassignment. 

122 stream = sys.stdout 

123 commands: dict[str, list[str]] = defaultdict(list) 

124 for value in show: 

125 command, _, args = value.partition("=") 

126 commands[command].append(args) 

127 self.commands = commands 

128 self.stream = stream 

129 self.handled: set[str] = set() 

130 

131 known = self.pipeline_commands | self.graph_commands 

132 unknown = set(commands) - known 

133 if unknown: 

134 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')") 

135 

136 @property 

137 def unhandled(self) -> frozenset[str]: 

138 """Return the commands that have not yet been processed.""" 

139 return frozenset(set(self.commands) - self.handled) 

140 

141 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None: 

142 """Display useful information about the pipeline. 

143 

144 Parameters 

145 ---------- 

146 pipeline : `lsst.pipe.base.Pipeline` 

147 The pipeline to use when reporting information. 

148 butler : `~lsst.daf.butler.Butler` 

149 Butler to use for querying. 

150 """ 

151 if butler is not None: 

152 registry = butler.registry 

153 else: 

154 registry = None 

155 for command in self.pipeline_commands: 

156 if command not in self.commands: 

157 continue 

158 args = self.commands[command] 

159 

160 match command: 

161 case "pipeline": 

162 print(pipeline, file=self.stream) 

163 case "config": 

164 for arg in args: 

165 self._showConfig(pipeline, arg, False) 

166 case "dump-config": 

167 for arg in args: 

168 self._showConfig(pipeline, arg, True) 

169 case "history": 

170 for arg in args: 

171 self._showConfigHistory(pipeline, arg) 

172 case "tasks": 

173 self._showTaskHierarchy(pipeline) 

174 case "pipeline-graph": 

175 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True) 

176 case "task-graph": 

177 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False) 

178 case _: 

179 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

180 self.handled.add(command) 

181 

182 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None: 

183 """Show information associated with this graph. 

184 

185 Parameters 

186 ---------- 

187 graph : `lsst.pipe.base.QuantumGraph` 

188 Graph to use when reporting information. 

189 args : `types.SimpleNamespace`, optional 

190 Parsed command-line parameters. Used to obtain additional external 

191 information such as the location of a usable Butler. 

192 """ 

193 for command in self.graph_commands: 

194 if command not in self.commands: 

195 continue 

196 match command: 

197 case "graph": 

198 self._showGraph(graph) 

199 case "uri": 

200 if args is None: 

201 raise ValueError("The uri option requires additional command line arguments.") 

202 self._showUri(graph, args) 

203 case "workflow": 

204 self._showWorkflow(graph) 

205 case _: 

206 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

207 self.handled.add(command) 

208 

209 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

210 """Show task configuration 

211 

212 Parameters 

213 ---------- 

214 pipeline : `lsst.pipe.base.Pipeline` 

215 Pipeline definition 

216 showArgs : `str` 

217 Defines what to show 

218 dumpFullConfig : `bool` 

219 If true then dump complete task configuration with all imports. 

220 """ 

221 stream: Any = self.stream 

222 if dumpFullConfig: 

223 # Task label can be given with this option 

224 taskName = showArgs 

225 else: 

226 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

227 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

228 assert matConfig is not None, "regex always matches" 

229 taskName = matConfig.group(1) 

230 pattern = matConfig.group(2) 

231 if pattern: 

232 stream = _FilteredStream(pattern, stream=stream) 

233 

234 tasks = util.filterTasks(pipeline, taskName) 

235 if not tasks: 

236 raise ValueError(f"Pipeline has no tasks named {taskName}") 

237 

238 for taskDef in tasks: 

239 print(f"### Configuration for task `{taskDef.label}'", file=self.stream) 

240 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

241 

242 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

243 """Show history for task configuration. 

244 

245 Parameters 

246 ---------- 

247 pipeline : `lsst.pipe.base.Pipeline` 

248 Pipeline definition 

249 showArgs : `str` 

250 Defines what to show 

251 """ 

252 taskName = None 

253 pattern = None 

254 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

255 if matHistory: 

256 taskName = matHistory.group(1) 

257 pattern = matHistory.group(2) 

258 if not pattern: 

259 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)") 

260 

261 tasks = util.filterTasks(pipeline, taskName) 

262 if not tasks: 

263 raise ValueError(f"Pipeline has no tasks named {taskName}") 

264 

265 found = False 

266 for taskDef in tasks: 

267 config = taskDef.config 

268 

269 # Look for any matches in the config hierarchy for this name 

270 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

271 if nmatch > 0: 

272 print("", file=self.stream) 

273 

274 cpath, _, cname = thisName.rpartition(".") 

275 try: 

276 if not cpath: 

277 # looking for top-level field 

278 hconfig = taskDef.config 

279 else: 

280 hconfig = eval("config." + cpath, {}, {"config": config}) 

281 except AttributeError: 

282 print( 

283 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

284 file=sys.stderr, 

285 ) 

286 hconfig = None 

287 

288 # Sometimes we end up with a non-Config so skip those 

289 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr( 

290 hconfig, cname 

291 ): 

292 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream) 

293 print(pexConfigHistory.format(hconfig, cname), file=self.stream) 

294 found = True 

295 

296 if not found: 

297 raise ValueError(f"None of the tasks has field matching {pattern}") 

298 

299 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

300 """Print task hierarchy to stdout 

301 

302 Parameters 

303 ---------- 

304 pipeline : `lsst.pipe.base.Pipeline` 

305 Pipeline definition. 

306 """ 

307 for taskDef in pipeline.toExpandedPipeline(): 

308 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream) 

309 

310 for configName, taskName in util.subTaskIter(taskDef.config): 

311 print(f"{configName}: {taskName}", file=self.stream) 

312 

313 def _showGraph(self, graph: QuantumGraph) -> None: 

314 """Print quanta information to stdout 

315 

316 Parameters 

317 ---------- 

318 graph : `lsst.pipe.base.QuantumGraph` 

319 Execution graph. 

320 """ 

321 

322 def _print_refs( 

323 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]], 

324 datastore_records: Mapping[str, DatastoreRecordData], 

325 ) -> None: 

326 """Print complete information on quantum input or output refs.""" 

327 for key, refs in mapping.items(): 

328 if refs: 

329 print(f" {key}:", file=self.stream) 

330 for ref in refs: 

331 print(f" - {ref}", file=self.stream) 

332 for datastore_name, record_data in datastore_records.items(): 

333 if record_map := record_data.records.get(ref.id): 

334 print(f" records for {datastore_name}:", file=self.stream) 

335 for table_name, records in record_map.items(): 

336 print(f" - {table_name}:", file=self.stream) 

337 for record in records: 

338 print(f" - {record}:", file=self.stream) 

339 else: 

340 print(f" {key}: []", file=self.stream) 

341 

342 for taskNode in graph.iterTaskGraph(): 

343 print(taskNode, file=self.stream) 

344 

345 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)): 

346 quantum = quantum_node.quantum 

347 print( 

348 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream 

349 ) 

350 print(" inputs:", file=self.stream) 

351 _print_refs(quantum.inputs, quantum.datastore_records) 

352 print(" outputs:", file=self.stream) 

353 _print_refs(quantum.outputs, quantum.datastore_records) 

354 

355 def _showWorkflow(self, graph: QuantumGraph) -> None: 

356 """Print quanta information and dependency to stdout 

357 

358 Parameters 

359 ---------- 

360 graph : `lsst.pipe.base.QuantumGraph` 

361 Execution graph. 

362 """ 

363 for node in graph: 

364 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

365 for parent in graph.determineInputsToQuantumNode(node): 

366 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream) 

367 

368 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

369 """Print input and predicted output URIs to stdout 

370 

371 Parameters 

372 ---------- 

373 graph : `lsst.pipe.base.QuantumGraph` 

374 Execution graph 

375 args : `types.SimpleNamespace` 

376 Parsed command line 

377 """ 

378 

379 def dumpURIs(thisRef: DatasetRef) -> None: 

380 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

381 if primary: 

382 print(f" {primary}", file=self.stream) 

383 else: 

384 print(" (disassembled artifact)", file=self.stream) 

385 for compName, compUri in components.items(): 

386 print(f" {compName}: {compUri}", file=self.stream) 

387 

388 butler = _ButlerFactory.makeReadButler(args) 

389 for node in graph: 

390 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

391 print(" inputs:", file=self.stream) 

392 for refs in node.quantum.inputs.values(): 

393 for ref in refs: 

394 dumpURIs(ref) 

395 print(" outputs:", file=self.stream) 

396 for refs in node.quantum.outputs.values(): 

397 for ref in refs: 

398 dumpURIs(ref)