Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%

199 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-03 10:43 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ShowInfo"] 

31 

32import fnmatch 

33import re 

34import sys 

35from collections import defaultdict 

36from collections.abc import Mapping 

37from types import SimpleNamespace 

38from typing import Any 

39 

40import lsst.pex.config as pexConfig 

41import lsst.pex.config.history as pexConfigHistory 

42from lsst.daf.butler import Butler, DatasetRef, DatasetType, NamedKeyMapping 

43from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

44from lsst.pipe.base import Pipeline, QuantumGraph 

45from lsst.pipe.base.pipeline_graph import visualization 

46 

47from . import util 

48from .cmdLineFwk import _ButlerFactory 

49 

50 

51class _FilteredStream: 

52 """A file-like object that filters some config fields. 

53 

54 Note 

55 ---- 

56 This class depends on implementation details of ``Config.saveToStream`` 

57 methods, in particular that that method uses single call to write() 

58 method to save information about single config field, and that call 

59 combines comments string(s) for a field and field path and value. 

60 This class will not work reliably on the "import" strings, so imports 

61 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

62 """ 

63 

64 def __init__(self, pattern: str, stream: Any = None) -> None: 

65 if stream is None: 

66 stream = sys.stdout 

67 self.stream = stream 

68 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

69 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

70 

71 if mat: 

72 pattern = mat.group(1) 

73 self._pattern = re.compile(fnmatch.translate(pattern)) 

74 else: 

75 if pattern != pattern.lower(): 

76 print( 

77 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)', 

78 file=self.stream, 

79 ) 

80 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

81 

82 def write(self, showStr: str) -> None: 

83 # Strip off doc string line(s) and cut off at "=" for string matching 

84 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

85 if self._pattern.search(matchStr): 

86 self.stream.write(showStr) 

87 

88 

89class ShowInfo: 

90 """Show information about a pipeline or quantum graph. 

91 

92 Parameters 

93 ---------- 

94 show : `list` [`str`] 

95 A list of show commands, some of which may have additional parameters 

96 specified using an ``=``. 

97 stream : I/O stream or None. 

98 The output stream to use. `None` will be treated as `sys.stdout`. 

99 

100 Raises 

101 ------ 

102 ValueError 

103 Raised if some show commands are not recognized. 

104 """ 

105 

106 pipeline_commands = { 

107 "pipeline", 

108 "config", 

109 "history", 

110 "tasks", 

111 "dump-config", 

112 "pipeline-graph", 

113 "task-graph", 

114 } 

115 graph_commands = {"graph", "workflow", "uri"} 

116 

117 def __init__(self, show: list[str], stream: Any = None) -> None: 

118 if stream is None: 

119 # Defer assigning sys.stdout to allow click to redefine it if 

120 # it wants. Assigning the default at class definition leads 

121 # to confusion on reassignment. 

122 stream = sys.stdout 

123 commands: dict[str, list[str]] = defaultdict(list) 

124 for value in show: 

125 command, _, args = value.partition("=") 

126 commands[command].append(args) 

127 self.commands = commands 

128 self.stream = stream 

129 self.handled: set[str] = set() 

130 

131 known = self.pipeline_commands | self.graph_commands 

132 unknown = set(commands) - known 

133 if unknown: 

134 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')") 

135 

136 @property 

137 def unhandled(self) -> frozenset[str]: 

138 """Return the commands that have not yet been processed.""" 

139 return frozenset(set(self.commands) - self.handled) 

140 

141 def show_pipeline_info(self, pipeline: Pipeline, butler: Butler | None) -> None: 

142 """Display useful information about the pipeline. 

143 

144 Parameters 

145 ---------- 

146 pipeline : `lsst.pipe.base.Pipeline` 

147 The pipeline to use when reporting information. 

148 """ 

149 if butler is not None: 

150 registry = butler.registry 

151 else: 

152 registry = None 

153 for command in self.pipeline_commands: 

154 if command not in self.commands: 

155 continue 

156 args = self.commands[command] 

157 

158 match command: 

159 case "pipeline": 

160 print(pipeline, file=self.stream) 

161 case "config": 

162 for arg in args: 

163 self._showConfig(pipeline, arg, False) 

164 case "dump-config": 

165 for arg in args: 

166 self._showConfig(pipeline, arg, True) 

167 case "history": 

168 for arg in args: 

169 self._showConfigHistory(pipeline, arg) 

170 case "tasks": 

171 self._showTaskHierarchy(pipeline) 

172 case "pipeline-graph": 

173 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=True) 

174 case "task-graph": 

175 visualization.show(pipeline.to_graph(registry), self.stream, dataset_types=False) 

176 case _: 

177 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

178 self.handled.add(command) 

179 

180 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None: 

181 """Show information associated with this graph. 

182 

183 Parameters 

184 ---------- 

185 graph : `lsst.pipe.base.QuantumGraph` 

186 Graph to use when reporting information. 

187 args : `types.SimpleNamespace`, optional 

188 Parsed command-line parameters. Used to obtain additional external 

189 information such as the location of a usable Butler. 

190 """ 

191 for command in self.graph_commands: 

192 if command not in self.commands: 

193 continue 

194 match command: 

195 case "graph": 

196 self._showGraph(graph) 

197 case "uri": 

198 if args is None: 

199 raise ValueError("The uri option requires additional command line arguments.") 

200 self._showUri(graph, args) 

201 case "workflow": 

202 self._showWorkflow(graph) 

203 case _: 

204 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

205 self.handled.add(command) 

206 

207 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

208 """Show task configuration 

209 

210 Parameters 

211 ---------- 

212 pipeline : `lsst.pipe.base.Pipeline` 

213 Pipeline definition 

214 showArgs : `str` 

215 Defines what to show 

216 dumpFullConfig : `bool` 

217 If true then dump complete task configuration with all imports. 

218 """ 

219 stream: Any = self.stream 

220 if dumpFullConfig: 

221 # Task label can be given with this option 

222 taskName = showArgs 

223 else: 

224 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

225 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

226 assert matConfig is not None, "regex always matches" 

227 taskName = matConfig.group(1) 

228 pattern = matConfig.group(2) 

229 if pattern: 

230 stream = _FilteredStream(pattern, stream=stream) 

231 

232 tasks = util.filterTasks(pipeline, taskName) 

233 if not tasks: 

234 raise ValueError(f"Pipeline has no tasks named {taskName}") 

235 

236 for taskDef in tasks: 

237 print(f"### Configuration for task `{taskDef.label}'", file=self.stream) 

238 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

239 

240 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

241 """Show history for task configuration. 

242 

243 Parameters 

244 ---------- 

245 pipeline : `lsst.pipe.base.Pipeline` 

246 Pipeline definition 

247 showArgs : `str` 

248 Defines what to show 

249 """ 

250 taskName = None 

251 pattern = None 

252 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

253 if matHistory: 

254 taskName = matHistory.group(1) 

255 pattern = matHistory.group(2) 

256 if not pattern: 

257 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)") 

258 

259 tasks = util.filterTasks(pipeline, taskName) 

260 if not tasks: 

261 raise ValueError(f"Pipeline has no tasks named {taskName}") 

262 

263 found = False 

264 for taskDef in tasks: 

265 config = taskDef.config 

266 

267 # Look for any matches in the config hierarchy for this name 

268 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

269 if nmatch > 0: 

270 print("", file=self.stream) 

271 

272 cpath, _, cname = thisName.rpartition(".") 

273 try: 

274 if not cpath: 

275 # looking for top-level field 

276 hconfig = taskDef.config 

277 else: 

278 hconfig = eval("config." + cpath, {}, {"config": config}) 

279 except AttributeError: 

280 print( 

281 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

282 file=sys.stderr, 

283 ) 

284 hconfig = None 

285 

286 # Sometimes we end up with a non-Config so skip those 

287 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr( 

288 hconfig, cname 

289 ): 

290 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream) 

291 print(pexConfigHistory.format(hconfig, cname), file=self.stream) 

292 found = True 

293 

294 if not found: 

295 raise ValueError(f"None of the tasks has field matching {pattern}") 

296 

297 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

298 """Print task hierarchy to stdout 

299 

300 Parameters 

301 ---------- 

302 pipeline: `lsst.pipe.base.Pipeline` 

303 Pipeline definition. 

304 """ 

305 for taskDef in pipeline.toExpandedPipeline(): 

306 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream) 

307 

308 for configName, taskName in util.subTaskIter(taskDef.config): 

309 print(f"{configName}: {taskName}", file=self.stream) 

310 

311 def _showGraph(self, graph: QuantumGraph) -> None: 

312 """Print quanta information to stdout 

313 

314 Parameters 

315 ---------- 

316 graph : `lsst.pipe.base.QuantumGraph` 

317 Execution graph. 

318 """ 

319 

320 def _print_refs( 

321 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]], 

322 datastore_records: Mapping[str, DatastoreRecordData], 

323 ) -> None: 

324 """Print complete information on quantum input or output refs.""" 

325 for key, refs in mapping.items(): 

326 if refs: 

327 print(f" {key}:", file=self.stream) 

328 for ref in refs: 

329 print(f" - {ref}", file=self.stream) 

330 for datastore_name, record_data in datastore_records.items(): 

331 if record_map := record_data.records.get(ref.id): 

332 print(f" records for {datastore_name}:", file=self.stream) 

333 for table_name, records in record_map.items(): 

334 print(f" - {table_name}:", file=self.stream) 

335 for record in records: 

336 print(f" - {record}:", file=self.stream) 

337 else: 

338 print(f" {key}: []", file=self.stream) 

339 

340 for taskNode in graph.iterTaskGraph(): 

341 print(taskNode, file=self.stream) 

342 

343 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)): 

344 quantum = quantum_node.quantum 

345 print( 

346 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream 

347 ) 

348 print(" inputs:", file=self.stream) 

349 _print_refs(quantum.inputs, quantum.datastore_records) 

350 print(" outputs:", file=self.stream) 

351 _print_refs(quantum.outputs, quantum.datastore_records) 

352 

353 def _showWorkflow(self, graph: QuantumGraph) -> None: 

354 """Print quanta information and dependency to stdout 

355 

356 Parameters 

357 ---------- 

358 graph : `lsst.pipe.base.QuantumGraph` 

359 Execution graph. 

360 """ 

361 for node in graph: 

362 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

363 for parent in graph.determineInputsToQuantumNode(node): 

364 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream) 

365 

366 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

367 """Print input and predicted output URIs to stdout 

368 

369 Parameters 

370 ---------- 

371 graph : `lsst.pipe.base.QuantumGraph` 

372 Execution graph 

373 args : `types.SimpleNamespace` 

374 Parsed command line 

375 """ 

376 

377 def dumpURIs(thisRef: DatasetRef) -> None: 

378 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

379 if primary: 

380 print(f" {primary}", file=self.stream) 

381 else: 

382 print(" (disassembled artifact)", file=self.stream) 

383 for compName, compUri in components.items(): 

384 print(f" {compName}: {compUri}", file=self.stream) 

385 

386 butler = _ButlerFactory.makeReadButler(args) 

387 for node in graph: 

388 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

389 print(" inputs:", file=self.stream) 

390 for refs in node.quantum.inputs.values(): 

391 for ref in refs: 

392 dumpURIs(ref) 

393 print(" outputs:", file=self.stream) 

394 for refs in node.quantum.outputs.values(): 

395 for ref in refs: 

396 dumpURIs(ref)