Coverage for python/lsst/ctrl/mpexec/showInfo.py: 11%

186 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-13 09:53 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ShowInfo"] 

31 

32import fnmatch 

33import re 

34import sys 

35from collections import defaultdict 

36from collections.abc import Mapping 

37from types import SimpleNamespace 

38from typing import Any 

39 

40import lsst.pex.config as pexConfig 

41import lsst.pex.config.history as pexConfigHistory 

42from lsst.daf.butler import DatasetRef, DatasetType, DatastoreRecordData, NamedKeyMapping 

43from lsst.pipe.base import Pipeline, QuantumGraph 

44 

45from . import util 

46from .cmdLineFwk import _ButlerFactory 

47 

48 

49class _FilteredStream: 

50 """A file-like object that filters some config fields. 

51 

52 Note 

53 ---- 

54 This class depends on implementation details of ``Config.saveToStream`` 

55 methods, in particular that that method uses single call to write() 

56 method to save information about single config field, and that call 

57 combines comments string(s) for a field and field path and value. 

58 This class will not work reliably on the "import" strings, so imports 

59 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

60 """ 

61 

62 def __init__(self, pattern: str, stream: Any = None) -> None: 

63 if stream is None: 

64 stream = sys.stdout 

65 self.stream = stream 

66 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

67 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

68 

69 if mat: 

70 pattern = mat.group(1) 

71 self._pattern = re.compile(fnmatch.translate(pattern)) 

72 else: 

73 if pattern != pattern.lower(): 

74 print( 

75 f'Matching "{pattern}" without regard to case (append :NOIGNORECASE to prevent this)', 

76 file=self.stream, 

77 ) 

78 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

79 

80 def write(self, showStr: str) -> None: 

81 # Strip off doc string line(s) and cut off at "=" for string matching 

82 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

83 if self._pattern.search(matchStr): 

84 self.stream.write(showStr) 

85 

86 

87class ShowInfo: 

88 """Show information about a pipeline or quantum graph. 

89 

90 Parameters 

91 ---------- 

92 show : `list` [`str`] 

93 A list of show commands, some of which may have additional parameters 

94 specified using an ``=``. 

95 stream : I/O stream or None. 

96 The output stream to use. `None` will be treated as `sys.stdout`. 

97 

98 Raises 

99 ------ 

100 ValueError 

101 Raised if some show commands are not recognized. 

102 """ 

103 

104 pipeline_commands = {"pipeline", "config", "history", "tasks", "dump-config"} 

105 graph_commands = {"graph", "workflow", "uri"} 

106 

107 def __init__(self, show: list[str], stream: Any = None) -> None: 

108 if stream is None: 

109 # Defer assigning sys.stdout to allow click to redefine it if 

110 # it wants. Assigning the default at class definition leads 

111 # to confusion on reassignment. 

112 stream = sys.stdout 

113 commands: dict[str, list[str]] = defaultdict(list) 

114 for value in show: 

115 command, _, args = value.partition("=") 

116 commands[command].append(args) 

117 self.commands = commands 

118 self.stream = stream 

119 self.handled: set[str] = set() 

120 

121 known = self.pipeline_commands | self.graph_commands 

122 unknown = set(commands) - known 

123 if unknown: 

124 raise ValueError(f"Unknown value(s) for show: {unknown} (choose from '{', '.join(known)}')") 

125 

126 @property 

127 def unhandled(self) -> frozenset[str]: 

128 """Return the commands that have not yet been processed.""" 

129 return frozenset(set(self.commands) - self.handled) 

130 

131 def show_pipeline_info(self, pipeline: Pipeline) -> None: 

132 """Display useful information about the pipeline. 

133 

134 Parameters 

135 ---------- 

136 pipeline : `lsst.pipe.base.Pipeline` 

137 The pipeline to use when reporting information. 

138 """ 

139 for command in self.pipeline_commands: 

140 if command not in self.commands: 

141 continue 

142 args = self.commands[command] 

143 

144 if command == "pipeline": 

145 print(pipeline, file=self.stream) 

146 elif command == "config": 

147 for arg in args: 

148 self._showConfig(pipeline, arg, False) 

149 elif command == "dump-config": 

150 for arg in args: 

151 self._showConfig(pipeline, arg, True) 

152 elif command == "history": 

153 for arg in args: 

154 self._showConfigHistory(pipeline, arg) 

155 elif command == "tasks": 

156 self._showTaskHierarchy(pipeline) 

157 else: 

158 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

159 self.handled.add(command) 

160 

161 def show_graph_info(self, graph: QuantumGraph, args: SimpleNamespace | None = None) -> None: 

162 """Show information associated with this graph. 

163 

164 Parameters 

165 ---------- 

166 graph : `lsst.pipe.base.QuantumGraph` 

167 Graph to use when reporting information. 

168 args : `types.SimpleNamespace`, optional 

169 Parsed command-line parameters. Used to obtain additional external 

170 information such as the location of a usable Butler. 

171 """ 

172 for command in self.graph_commands: 

173 if command not in self.commands: 

174 continue 

175 if command == "graph": 

176 self._showGraph(graph) 

177 elif command == "uri": 

178 if args is None: 

179 raise ValueError("The uri option requires additional command line arguments.") 

180 self._showUri(graph, args) 

181 elif command == "workflow": 

182 self._showWorkflow(graph) 

183 else: 

184 raise RuntimeError(f"Unexpectedly tried to process command {command!r}.") 

185 self.handled.add(command) 

186 

187 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

188 """Show task configuration 

189 

190 Parameters 

191 ---------- 

192 pipeline : `lsst.pipe.base.Pipeline` 

193 Pipeline definition 

194 showArgs : `str` 

195 Defines what to show 

196 dumpFullConfig : `bool` 

197 If true then dump complete task configuration with all imports. 

198 """ 

199 stream: Any = self.stream 

200 if dumpFullConfig: 

201 # Task label can be given with this option 

202 taskName = showArgs 

203 else: 

204 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

205 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

206 assert matConfig is not None, "regex always matches" 

207 taskName = matConfig.group(1) 

208 pattern = matConfig.group(2) 

209 if pattern: 

210 stream = _FilteredStream(pattern, stream=stream) 

211 

212 tasks = util.filterTasks(pipeline, taskName) 

213 if not tasks: 

214 raise ValueError(f"Pipeline has no tasks named {taskName}") 

215 

216 for taskDef in tasks: 

217 print(f"### Configuration for task `{taskDef.label}'", file=self.stream) 

218 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

219 

220 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

221 """Show history for task configuration. 

222 

223 Parameters 

224 ---------- 

225 pipeline : `lsst.pipe.base.Pipeline` 

226 Pipeline definition 

227 showArgs : `str` 

228 Defines what to show 

229 """ 

230 taskName = None 

231 pattern = None 

232 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

233 if matHistory: 

234 taskName = matHistory.group(1) 

235 pattern = matHistory.group(2) 

236 if not pattern: 

237 raise ValueError("Please provide a value with --show history (e.g. history=Task::param)") 

238 

239 tasks = util.filterTasks(pipeline, taskName) 

240 if not tasks: 

241 raise ValueError(f"Pipeline has no tasks named {taskName}") 

242 

243 found = False 

244 for taskDef in tasks: 

245 config = taskDef.config 

246 

247 # Look for any matches in the config hierarchy for this name 

248 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

249 if nmatch > 0: 

250 print("", file=self.stream) 

251 

252 cpath, _, cname = thisName.rpartition(".") 

253 try: 

254 if not cpath: 

255 # looking for top-level field 

256 hconfig = taskDef.config 

257 else: 

258 hconfig = eval("config." + cpath, {}, {"config": config}) 

259 except AttributeError: 

260 print( 

261 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

262 file=sys.stderr, 

263 ) 

264 hconfig = None 

265 

266 # Sometimes we end up with a non-Config so skip those 

267 if isinstance(hconfig, pexConfig.Config | pexConfig.ConfigurableInstance) and hasattr( 

268 hconfig, cname 

269 ): 

270 print(f"### Configuration field for task `{taskDef.label}'", file=self.stream) 

271 print(pexConfigHistory.format(hconfig, cname), file=self.stream) 

272 found = True 

273 

274 if not found: 

275 raise ValueError(f"None of the tasks has field matching {pattern}") 

276 

277 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

278 """Print task hierarchy to stdout 

279 

280 Parameters 

281 ---------- 

282 pipeline: `lsst.pipe.base.Pipeline` 

283 Pipeline definition. 

284 """ 

285 for taskDef in pipeline.toExpandedPipeline(): 

286 print(f"### Subtasks for task `{taskDef.taskName}'", file=self.stream) 

287 

288 for configName, taskName in util.subTaskIter(taskDef.config): 

289 print(f"{configName}: {taskName}", file=self.stream) 

290 

291 def _showGraph(self, graph: QuantumGraph) -> None: 

292 """Print quanta information to stdout 

293 

294 Parameters 

295 ---------- 

296 graph : `lsst.pipe.base.QuantumGraph` 

297 Execution graph. 

298 """ 

299 

300 def _print_refs( 

301 mapping: NamedKeyMapping[DatasetType, tuple[DatasetRef, ...]], 

302 datastore_records: Mapping[str, DatastoreRecordData], 

303 ) -> None: 

304 """Print complete information on quantum input or output refs.""" 

305 for key, refs in mapping.items(): 

306 if refs: 

307 print(f" {key}:", file=self.stream) 

308 for ref in refs: 

309 print(f" - {ref}", file=self.stream) 

310 for datastore_name, record_data in datastore_records.items(): 

311 if record_map := record_data.records.get(ref.id): 

312 print(f" records for {datastore_name}:", file=self.stream) 

313 for table_name, records in record_map.items(): 

314 print(f" - {table_name}:", file=self.stream) 

315 for record in records: 

316 print(f" - {record}:", file=self.stream) 

317 else: 

318 print(f" {key}: []", file=self.stream) 

319 

320 for taskNode in graph.iterTaskGraph(): 

321 print(taskNode, file=self.stream) 

322 

323 for iq, quantum_node in enumerate(graph.getNodesForTask(taskNode)): 

324 quantum = quantum_node.quantum 

325 print( 

326 f" Quantum {iq} dataId={quantum.dataId} nodeId={quantum_node.nodeId}:", file=self.stream 

327 ) 

328 print(" inputs:", file=self.stream) 

329 _print_refs(quantum.inputs, quantum.datastore_records) 

330 print(" outputs:", file=self.stream) 

331 _print_refs(quantum.outputs, quantum.datastore_records) 

332 

333 def _showWorkflow(self, graph: QuantumGraph) -> None: 

334 """Print quanta information and dependency to stdout 

335 

336 Parameters 

337 ---------- 

338 graph : `lsst.pipe.base.QuantumGraph` 

339 Execution graph. 

340 """ 

341 for node in graph: 

342 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

343 for parent in graph.determineInputsToQuantumNode(node): 

344 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}", file=self.stream) 

345 

346 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

347 """Print input and predicted output URIs to stdout 

348 

349 Parameters 

350 ---------- 

351 graph : `lsst.pipe.base.QuantumGraph` 

352 Execution graph 

353 args : `types.SimpleNamespace` 

354 Parsed command line 

355 """ 

356 

357 def dumpURIs(thisRef: DatasetRef) -> None: 

358 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

359 if primary: 

360 print(f" {primary}", file=self.stream) 

361 else: 

362 print(" (disassembled artifact)", file=self.stream) 

363 for compName, compUri in components.items(): 

364 print(f" {compName}: {compUri}", file=self.stream) 

365 

366 butler = _ButlerFactory.makeReadButler(args) 

367 for node in graph: 

368 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}", file=self.stream) 

369 print(" inputs:", file=self.stream) 

370 for refs in node.quantum.inputs.values(): 

371 for ref in refs: 

372 dumpURIs(ref) 

373 print(" outputs:", file=self.stream) 

374 for refs in node.quantum.outputs.values(): 

375 for ref in refs: 

376 dumpURIs(ref)