Coverage for python / lsst / pipe / base / pipeline_graph / __main__.py: 0%

127 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:59 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = () 

30 

31import argparse 

32import dataclasses 

33import sys 

34import textwrap 

35from collections.abc import Sequence 

36from contextlib import ExitStack 

37 

38from lsst.daf.butler import Butler 

39from lsst.resources import ResourcePath 

40 

41from ..pipeline import Pipeline 

42from ._pipeline_graph import PipelineGraph, TaskImportMode 

43from .visualization._options import NodeAttributeOptions 

44from .visualization._show import show 

45 

46 

47def main(argv: Sequence[str]) -> int: 

48 """Run the development command-line interface. 

49 

50 Parameters 

51 ---------- 

52 argv : `~collections.abc.Sequence` [ `str` ] 

53 Commmand-line arguments, not including the program (typically 

54 ``sys.argv[1:]``). 

55 

56 Notes 

57 ----- 

58 This CLI is much more capable than pipetask's for very specific things, but 

59 not as polished or user friendly. It is intended primarily for use during 

60 development of pipeline graph (especially visualization, where tests can't 

61 really be used to judge the quality of the outputs, and a fast turnaround 

62 is desirable). 

63 """ 

64 parser = argparse.ArgumentParser( 

65 description="Expand, resolve, and display pipelines as graphs.", 

66 epilog=textwrap.dedent( 

67 """ 

68 WARNING: This is an experimental/development command-line interface 

69 that is subject to change or removal without warning. The `--show` 

70 option to 'pipetask build' (with 'pipeline-graph', 'task-graph', or 

71 'dataset-type-graph' as the argument) is the preferred way to 

72 display text-based pipeline graphs. 

73 """ 

74 ), 

75 ) 

76 Arguments.add_args_to_parser(parser) 

77 args = Arguments.from_parsed_args(parser.parse_args(argv)) 

78 pipeline_graph = read_input_pipeline(args.input_pipeline) 

79 if args.resolve: 

80 butler = Butler.from_config(args.resolve, writeable=False) 

81 pipeline_graph.resolve(butler.registry) 

82 else: 

83 pipeline_graph.resolve(visualization_only=True) 

84 if args.save: 

85 pipeline_graph._write_uri(ResourcePath(args.save)) 

86 if args.show: 

87 with ExitStack() as stack: 

88 if args.show == "-": 

89 stream = sys.stdout 

90 else: 

91 path = ResourcePath(args.show) 

92 stream = stack.enter_context(path.open("r")) 

93 show( 

94 pipeline_graph, 

95 stream, 

96 dataset_types=args.display.dataset_types, 

97 init=args.display.init, 

98 color=args.display.color, 

99 dimensions=args.display.node_attributes.dimensions, 

100 task_classes=args.display.node_attributes.task_classes, 

101 storage_classes=args.display.node_attributes.storage_classes, 

102 merge_input_trees=args.display.merge_input_trees, 

103 merge_output_trees=args.display.merge_output_trees, 

104 merge_intermediates=args.display.merge_intermediates, 

105 include_automatic_connections=args.display.include_automatic_connections, 

106 width=args.display.width, 

107 column_crossing_penalty=args.display.column_crossing_penalty, 

108 column_insertion_penalty=args.display.column_insertion_penalty, 

109 column_interior_penalty=args.display.column_interior_penalty, 

110 ) 

111 return 0 

112 

113 

114def read_input_pipeline(uri: str) -> PipelineGraph: 

115 """Read an input pipeline or pipeline graph from a URI. 

116 

117 Parameters 

118 ---------- 

119 uri : `str` 

120 URI to read. Extension is used to determine whether this is a pipeline 

121 (.yaml) or pipeline graph (.json.gz). 

122 

123 Returns 

124 ------- 

125 graph : `.PipelineGraph` 

126 Pipeline graph. 

127 """ 

128 path = ResourcePath(uri) 

129 match path.getExtension(): 

130 case ".yaml": 

131 pipeline = Pipeline.from_uri(path) 

132 return pipeline.to_graph() 

133 case ".json.gz": 

134 return PipelineGraph._read_uri(path, import_mode=TaskImportMode.DO_NOT_IMPORT) 

135 case other: 

136 raise ValueError(f"Unexpected extension for pipeline file: {other!r}.") 

137 

138 

139@dataclasses.dataclass 

140class Arguments: 

141 """Struct that manages the CLI arguments and options.""" 

142 

143 input_pipeline: str 

144 """URI to the input pipeline.""" 

145 

146 save: str | None 

147 """URI for the saved pipeline, or `None` if it will not be saved.""" 

148 

149 show: str | None 

150 """File to write the graph visualization to; ``-`` for STDOUT, or `None` 

151 for no visualization. 

152 """ 

153 

154 resolve: str | None 

155 """Butler repository URI to use to resolve the graph.""" 

156 

157 display: DisplayArguments 

158 """Additional options specific to visualization.""" 

159 

160 @classmethod 

161 def from_parsed_args(cls, args: argparse.Namespace) -> Arguments: 

162 """Interpret parsed arguments. 

163 

164 Parameters 

165 ---------- 

166 args : `argparse.Namespace` 

167 Parsed argument struct, as returned by 

168 `argparse.ArgumentParser.parse_args`. 

169 

170 Returns 

171 ------- 

172 arguments : `Arguments` 

173 Intepreted arguments struct. 

174 """ 

175 return cls( 

176 input_pipeline=args.input_pipeline, 

177 save=args.save, 

178 show=args.show, 

179 resolve=args.resolve, 

180 display=DisplayArguments.from_parsed_args(args), 

181 ) 

182 

183 @classmethod 

184 def add_args_to_parser(cls, parser: argparse.ArgumentParser) -> None: 

185 """Add the options and arguments described by this class to a parser. 

186 

187 Parameters 

188 ---------- 

189 parser : `argparse.ArgumentParser` 

190 Argument parser to modify in-place. 

191 """ 

192 parser.add_argument( 

193 "input_pipeline", 

194 type=str, 

195 metavar="URI", 

196 help=""" 

197 Filename or URI for the input pipeline specification (.yaml) or 

198 graph (.json.gz) file to read. 

199 """, 

200 ) 

201 parser.add_argument( 

202 "--save", 

203 type=str, 

204 metavar="FILE", 

205 help=""" 

206 Save the pipeline graph content to this file. Should have a 

207 .json.gz extension or no extension (in which case .json.gz will 

208 be added). 

209 """, 

210 default=None, 

211 ) 

212 parser.add_argument( 

213 "--show", 

214 type=str, 

215 nargs="?", 

216 metavar="FILE", 

217 help=""" 

218 Print the pipeline graph in human-readable form using unicode 

219 lines and symbols. May be '-' or have no value for STDOUT. 

220 """, 

221 default=None, 

222 const="-", 

223 ) 

224 parser.add_argument( 

225 "--resolve", 

226 type=str, 

227 nargs=1, 

228 metavar="REPO", 

229 help=""" 

230 A butler data repository to use to resolve the graph's dataset 

231 types and dimensions. 

232 """, 

233 default=None, 

234 ) 

235 DisplayArguments.add_args_to_parser(parser) 

236 

237 

238@dataclasses.dataclass 

239class DisplayArguments: 

240 """Struct that manages the CLI arguments and options specific to 

241 visualization. 

242 """ 

243 

244 dataset_types: bool 

245 """Whether the visualization should show dataset type nodes.""" 

246 

247 init: bool | None 

248 """Whether to include task initialization its inputs and outputs. 

249 

250 `False` shows only runtime nodes. `True` shows all init nodes only. 

251 `None` shows all nodes. 

252 """ 

253 

254 color: Sequence[str] | bool | None 

255 """Colors to use for node symbols and edge lines. 

256 

257 See the `show` argument of the same name for details. 

258 """ 

259 

260 node_attributes: NodeAttributeOptions 

261 """Options for which attributes of ndoes to display and simplify.""" 

262 

263 merge_input_trees: int 

264 """Whether/how to merge input trees with the same structure. 

265 

266 See the `show` argument of the same name for details. 

267 """ 

268 

269 merge_output_trees: int 

270 """Whether/how to merge output trees with the same structure. 

271 

272 See the `show` argument of the same name for details. 

273 """ 

274 

275 merge_intermediates: bool 

276 """Whether to merge internal parallel subgraphs. 

277 

278 See the `show` argument of the same name for details. 

279 """ 

280 

281 include_automatic_connections: bool 

282 """Whether to include automatic connections like config, metadata, and 

283 logs. 

284 """ 

285 

286 width: int 

287 """Width of the graph in columns. 

288 

289 See the `show` argument of the same name for details. 

290 """ 

291 

292 column_crossing_penalty: int 

293 """Graph layout tuning parameter; see the `show` argument of the same name 

294 for details. 

295 """ 

296 

297 column_insertion_penalty: int 

298 """Graph layout tuning parameter; see the `show` argument of the same name 

299 for details. 

300 """ 

301 

302 column_interior_penalty: int 

303 """Graph layout tuning parameter; see the `show` argument of the same name 

304 for details. 

305 """ 

306 

307 def __post_init__(self) -> None: 

308 if self.node_attributes.storage_classes and not self.dataset_types: 

309 raise argparse.ArgumentError( 

310 None, 

311 "--storage-classes does nothing unless --dataset-types or --only-dataset-types is passed.", 

312 ) 

313 

314 @classmethod 

315 def from_parsed_args(cls, args: argparse.Namespace) -> DisplayArguments: 

316 """Interpret parsed arguments. 

317 

318 Parameters 

319 ---------- 

320 args : `argparse.Namespace` 

321 Parsed argument struct, as returned by 

322 `argparse.ArgumentParser.parse_args`. 

323 

324 Returns 

325 ------- 

326 arguments : `Arguments` 

327 Intepreted arguments struct. 

328 """ 

329 return cls( 

330 dataset_types=args.dataset_types or args.dataset_types_only, 

331 init=args.init, 

332 color=args.color, 

333 node_attributes=NodeAttributeOptions( 

334 dimensions=args.dimensions, 

335 task_classes=args.task_classes, 

336 storage_classes=args.storage_classes, 

337 status=None, 

338 ), 

339 merge_input_trees=args.merge_input_trees, 

340 merge_output_trees=args.merge_output_trees, 

341 merge_intermediates=args.merge_intermediates, 

342 include_automatic_connections=args.include_automatic_connections, 

343 width=args.width, 

344 column_crossing_penalty=args.column_crossing_penalty, 

345 column_insertion_penalty=args.column_insertion_penalty, 

346 column_interior_penalty=args.column_interior_penalty, 

347 ) 

348 

349 @classmethod 

350 def add_args_to_parser(cls, parser: argparse.ArgumentParser) -> None: 

351 """Add the options and arguments described by this class to a parser. 

352 

353 Parameters 

354 ---------- 

355 parser : `argparse.ArgumentParser` 

356 Argument parser to modify in-place. 

357 """ 

358 group = parser.add_argument_group("additional options for --show") 

359 dataset_type_inclusion = group.add_mutually_exclusive_group() 

360 dataset_type_inclusion.add_argument( 

361 "--dataset-types", 

362 action="store_true", 

363 help="Show a graph containing both dataset types and tasks. Default is a task-only graph.", 

364 ) 

365 group.add_argument( 

366 "--init-only", 

367 action="store_true", 

368 dest="init", 

369 help=""" 

370 Show a graph of init-input and init-output dataset types and/or 

371 task initializations instead of the usual runtime graph. 

372 """, 

373 default=False, 

374 ) 

375 group.add_argument( 

376 "--init", 

377 action="store_const", 

378 const=None, 

379 help=""" 

380 Show a graph of init-input and init-output dataset types and 

381 task initializations in addition to the usual runtime graph. 

382 Requires --dataset-types. 

383 """, 

384 default=False, 

385 ) 

386 color_group = group.add_mutually_exclusive_group() 

387 color_group.add_argument( 

388 "--color", 

389 action="store_true", 

390 help=""" 

391 Always use terminal escape codes to add color to the graph. 

392 Default is to use color only if an interactive terminal is 

393 detected. 

394 """, 

395 default=None, 

396 ) 

397 color_group.add_argument( 

398 "--no-color", 

399 action="store_false", 

400 help=""" 

401 Never use terminal escape codes to add color to the graph. 

402 Default is to use color only if an interactive terminal is 

403 detected. 

404 """, 

405 default=None, 

406 ) 

407 color_group.add_argument( 

408 "--palette", 

409 type=str, 

410 nargs="+", 

411 metavar="COLORS", 

412 help=""" 

413 A list of colors to use for nodes. Options include 'red', 

414 'green', 'blue', 'cyan', 'yellow', 'magenta', and any of these 

415 preceded by 'light' (case insensitive). Implies --color. 

416 """, 

417 dest="color", 

418 ) 

419 dimensions_group = group.add_mutually_exclusive_group() 

420 dimensions_group.add_argument( 

421 "--no-dimensions", 

422 action="store_false", 

423 help=""" 

424 Do not include dimensions in node descriptions or merge 

425 comparisons at all. This is the default if the loaded graph 

426 was not resolved and --resolve was not passed. 

427 """, 

428 dest="dimensions", 

429 default=None, 

430 ) 

431 dimensions_group.add_argument( 

432 "--full-dimensions", 

433 action="store_const", 

434 help=""" 

435 Show full dimensions in node descriptions, including those that 

436 are implied or required by another dimension in the set. 

437 """, 

438 dest="dimensions", 

439 const="full", 

440 ) 

441 dimensions_group.add_argument( 

442 "--concise-dimensions", 

443 action="store_const", 

444 help=""" 

445 Show concise dimensions in node descriptions, removing those 

446 that are implied or required by another dimension in the set. 

447 This is the default if the loaded graph was already resolved 

448 or --resolve is passed. 

449 """, 

450 dest="dimensions", 

451 const="concise", 

452 ) 

453 task_classes_group = group.add_mutually_exclusive_group() 

454 dimensions_group.add_argument( 

455 "--no-task-classes", 

456 action="store_false", 

457 help=""" 

458 Do not include task classes in node descriptions or merge 

459 comparisons at all. 

460 """, 

461 dest="task_classes", 

462 default=None, 

463 ) 

464 task_classes_group.add_argument( 

465 "--full-task-classes", 

466 action="store_const", 

467 help=""" 

468 Show fully-qualified task classes in task node descriptions, 

469 and use task classes in merge comparisons. 

470 """, 

471 dest="task_classes", 

472 const="full", 

473 ) 

474 task_classes_group.add_argument( 

475 "--concise-task-classes", 

476 action="store_const", 

477 help=""" 

478 Show unqualified task classes in task node descriptions, and 

479 use task classes in merge comparisons. This is the default. 

480 """, 

481 const="concise", 

482 dest="task_classes", 

483 ) 

484 storage_classes_group = group.add_mutually_exclusive_group() 

485 storage_classes_group.add_argument( 

486 "--no-storage-classes", 

487 action="store_false", 

488 help=""" 

489 Show storage classes in dataset type node descriptions, and use 

490 storage classes in merge comparisons. This is the default if 

491 the loaded graph was not resolved and --resolve was not passed. 

492 """, 

493 dest="storage_classes", 

494 default=None, 

495 ) 

496 storage_classes_group.add_argument( 

497 "--storage-classes", 

498 action="store_true", 

499 help=""" 

500 Show storage classes in dataset type node descriptions, and use 

501 storage classes in merge comparisons. This is the default if 

502 the loaded graph was already resolved or --resolve is passed. 

503 """, 

504 dest="storage_classes", 

505 default=None, 

506 ) 

507 group.add_argument( 

508 "--merge-input-trees", 

509 type=int, 

510 default=4, 

511 help=""" 

512 Depth at which to merge input trees with the same outputs, 

513 dimensions, task classes, and storage classes. Zero disables 

514 merging. 

515 """, 

516 ) 

517 group.add_argument( 

518 "--merge-output-trees", 

519 type=int, 

520 default=4, 

521 help=""" 

522 Depth at which to merge output trees with the same inputs, 

523 dimensions, task classes, and storage classes. Zero disables 

524 merging. 

525 """, 

526 ) 

527 group.add_argument( 

528 "--no-merge-intermediates", 

529 action="store_false", 

530 dest="merge_intermediates", 

531 help=""" 

532 Disable merging of intermediate nodes that share the same 

533 inputs, outputs, dimensions, task classes, and storage classes. 

534 """, 

535 ) 

536 group.add_argument( 

537 "--include-automatic-connections", 

538 action="store_true", 

539 help=""" 

540 Include output datasets added by the execution system, such 

541 as configs, metadata, and logs. 

542 """, 

543 ) 

544 group.add_argument( 

545 "--width", 

546 type=int, 

547 default=-1, 

548 help=""" 

549 Width in characters for the graph and node descriptions. 

550 Default (-1) is to use the terminal width. May be 0 to put no 

551 limit on the width. This only sets whether node descriptions 

552 are truncated and moved below the graph, so it may be exceeded 

553 by the graph itself. 

554 """, 

555 ) 

556 group.add_argument( 

557 "--column-crossing-penalty", 

558 type=int, 

559 default=1, 

560 help=""" 

561 When selecting the column for a new node, penalize a 

562 candidate column by multiplying the number of ongoing vertical 

563 edges this node's horizontal incoming edges would have to 'hop' 

564 by this value. 

565 """, 

566 ) 

567 group.add_argument( 

568 "--column-insertion-penalty", 

569 type=int, 

570 default=2, 

571 help=""" 

572 When selecting the column for a new node, penalize adding new 

573 columns by this amount. 

574 """, 

575 ) 

576 group.add_argument( 

577 "--column-interior-penalty", 

578 type=int, 

579 default=1, 

580 help=""" 

581 When selecting the column for a new node, penalize adding new 

582 columns between two existing columns by this amount (in 

583 addition to the --column-insertion-penalty applied to all new 

584 columns). 

585 """, 

586 ) 

587 

588 

589if __name__ == "__main__": 

590 main(sys.argv[1:])