Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import datetime 

32import fnmatch 

33import logging 

34import os 

35import re 

36import sys 

37from typing import List, Optional, Tuple 

38import warnings 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from lsst.daf.butler import ( 

44 Butler, 

45 CollectionSearch, 

46 CollectionType, 

47 DatasetTypeRestriction, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError 

51import lsst.log 

52import lsst.pex.config as pexConfig 

53from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph 

54from .dotTools import graph2dot, pipeline2dot 

55from .executionGraphFixup import ExecutionGraphFixup 

56from .mpGraphExecutor import MPGraphExecutor 

57from .preExecInit import PreExecInit 

58from .singleQuantumExecutor import SingleQuantumExecutor 

59from . import util 

60from lsst.utils import doImport 

61 

62# ---------------------------------- 

63# Local non-exported definitions -- 

64# ---------------------------------- 

65 

66# logging properties 

67_LOG_PROP = """\ 

68log4j.rootLogger=INFO, A1 

69log4j.appender.A1=ConsoleAppender 

70log4j.appender.A1.Target=System.err 

71log4j.appender.A1.layout=PatternLayout 

72log4j.appender.A1.layout.ConversionPattern={} 

73""" 

74 

75_LOG = logging.getLogger(__name__.partition(".")[2]) 

76 

77 

78class _OutputChainedCollectionInfo: 

79 """A helper class for handling command-line arguments related to an output 

80 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

81 

82 Parameters 

83 ---------- 

84 registry : `lsst.daf.butler.Registry` 

85 Butler registry that collections will be added to and/or queried from. 

86 name : `str` 

87 Name of the collection given on the command line. 

88 """ 

89 def __init__(self, registry: Registry, name: str): 

90 self.name = name 

91 try: 

92 self.chain = list(registry.getCollectionChain(name)) 

93 self.exists = True 

94 except MissingCollectionError: 

95 self.chain = [] 

96 self.exists = False 

97 

98 def __str__(self): 

99 return self.name 

100 

101 name: str 

102 """Name of the collection provided on the command line (`str`). 

103 """ 

104 

105 exists: bool 

106 """Whether this collection already exists in the registry (`bool`). 

107 """ 

108 

109 chain: List[str] 

110 """The definition of the collection, if it already exists (`list`). 

111 

112 Empty if the collection does not alredy exist. 

113 """ 

114 

115 

116class _OutputRunCollectionInfo: 

117 """A helper class for handling command-line arguments related to an output 

118 `~lsst.daf.butler.CollectionType.RUN` collection. 

119 

120 Parameters 

121 ---------- 

122 registry : `lsst.daf.butler.Registry` 

123 Butler registry that collections will be added to and/or queried from. 

124 name : `str` 

125 Name of the collection given on the command line. 

126 """ 

127 def __init__(self, registry: Registry, name: str): 

128 self.name = name 

129 try: 

130 actualType = registry.getCollectionType(name) 

131 if actualType is not CollectionType.RUN: 

132 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

133 self.exists = True 

134 except MissingCollectionError: 

135 self.exists = False 

136 

137 name: str 

138 """Name of the collection provided on the command line (`str`). 

139 """ 

140 

141 exists: bool 

142 """Whether this collection already exists in the registry (`bool`). 

143 """ 

144 

145 

146class _ButlerFactory: 

147 """A helper class for processing command-line arguments related to input 

148 and output collections. 

149 

150 Parameters 

151 ---------- 

152 registry : `lsst.daf.butler.Registry` 

153 Butler registry that collections will be added to and/or queried from. 

154 

155 args : `argparse.Namespace` 

156 Parsed command-line arguments. The following attributes are used, 

157 either at construction or in later methods. 

158 

159 ``output`` 

160 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

161 input/output collection. 

162 

163 ``output_run`` 

164 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

165 collection. 

166 

167 ``extend_run`` 

168 A boolean indicating whether ``output_run`` should already exist 

169 and be extended. 

170 

171 ``replace_run`` 

172 A boolean indicating that (if `True`) ``output_run`` should already 

173 exist but will be removed from the output chained collection and 

174 replaced with a new one. 

175 

176 ``prune_replaced`` 

177 A boolean indicating whether to prune the replaced run (requires 

178 ``replace_run``). 

179 

180 ``inputs`` 

181 Input collections of any type; may be any type handled by 

182 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

183 

184 ``butler_config`` 

185 Path to a data repository root or configuration file. 

186 

187 writeable : `bool` 

188 If `True`, a `Butler` is being initialized in a context where actual 

189 writes should happens, and hence no output run is necessary. 

190 

191 Raises 

192 ------ 

193 ValueError 

194 Raised if ``writeable is True`` but there are no output collections. 

195 """ 

196 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

197 if args.output is not None: 

198 self.output = _OutputChainedCollectionInfo(registry, args.output) 

199 else: 

200 self.output = None 

201 if args.output_run is not None: 

202 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

203 elif self.output is not None: 

204 if args.extend_run: 

205 runName, _ = self.output.chain[0] 

206 else: 

207 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now()) 

208 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

209 elif not writeable: 

210 # If we're not writing yet, ok to have no output run. 

211 self.outputRun = None 

212 else: 

213 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

214 self.inputs = list(CollectionSearch.fromExpression(args.input)) if args.input else [] 

215 

216 def check(self, args: argparse.Namespace): 

217 """Check command-line options for consistency with each other and the 

218 data repository. 

219 

220 Parameters 

221 ---------- 

222 args : `argparse.Namespace` 

223 Parsed command-line arguments. See class documentation for the 

224 construction parameter of the same name. 

225 """ 

226 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

227 if self.inputs and self.output is not None and self.output.exists: 

228 raise ValueError("Cannot use --output with existing collection with --inputs.") 

229 if args.extend_run and self.outputRun is None: 

230 raise ValueError("Cannot --extend-run when no output collection is given.") 

231 if args.extend_run and not self.outputRun.exists: 

232 raise ValueError(f"Cannot --extend-run; output collection " 

233 f"'{self.outputRun.name}' does not exist.") 

234 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

235 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

236 f"--extend-run was not given.") 

237 if args.prune_replaced and not args.replace_run: 

238 raise ValueError("--prune-replaced requires --replace-run.") 

239 if args.replace_run and (self.output is None or not self.output.exists): 

240 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

241 

242 @classmethod 

243 def _makeReadParts(cls, args: argparse.Namespace): 

244 """Common implementation for `makeReadButler` and 

245 `makeRegistryAndCollections`. 

246 

247 Parameters 

248 ---------- 

249 args : `argparse.Namespace` 

250 Parsed command-line arguments. See class documentation for the 

251 construction parameter of the same name. 

252 

253 Returns 

254 ------- 

255 butler : `lsst.daf.butler.Butler` 

256 A read-only butler constructed from the repo at 

257 ``args.butler_config``, but with no default collections. 

258 inputs : `lsst.daf.butler.registry.CollectionSearch` 

259 A collection search path constructed according to ``args``. 

260 self : `_ButlerFactory` 

261 A new `_ButlerFactory` instance representing the processed version 

262 of ``args``. 

263 """ 

264 butler = Butler(args.butler_config, writeable=False) 

265 self = cls(butler.registry, args, writeable=False) 

266 self.check(args) 

267 if self.output and self.output.exists: 

268 if args.replace_run: 

269 replaced = self.output.chain[0] 

270 inputs = self.output.chain[1:] 

271 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

272 self.output.name, replaced) 

273 else: 

274 inputs = [self.output.name] 

275 else: 

276 inputs = list(self.inputs) 

277 if args.extend_run: 

278 inputs.insert(0, self.outputRun.name) 

279 inputs = CollectionSearch.fromExpression(inputs) 

280 return butler, inputs, self 

281 

282 @classmethod 

283 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

284 """Construct a read-only butler according to the given command-line 

285 arguments. 

286 

287 Parameters 

288 ---------- 

289 args : `argparse.Namespace` 

290 Parsed command-line arguments. See class documentation for the 

291 construction parameter of the same name. 

292 

293 Returns 

294 ------- 

295 butler : `lsst.daf.butler.Butler` 

296 A read-only butler initialized with the collections specified by 

297 ``args``. 

298 """ 

299 butler, inputs, _ = cls._makeReadParts(args) 

300 _LOG.debug("Preparing butler to read from %s.", inputs) 

301 return Butler(butler=butler, collections=inputs) 

302 

303 @classmethod 

304 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \ 

305 Tuple[Registry, CollectionSearch, Optional[str]]: 

306 """Return a read-only registry, a collection search path, and the name 

307 of the run to be used for future writes. 

308 

309 Parameters 

310 ---------- 

311 args : `argparse.Namespace` 

312 Parsed command-line arguments. See class documentation for the 

313 construction parameter of the same name. 

314 

315 Returns 

316 ------- 

317 registry : `lsst.daf.butler.Registry` 

318 Butler registry that collections will be added to and/or queried 

319 from. 

320 inputs : `lsst.daf.butler.registry.CollectionSearch` 

321 Collections to search for datasets. 

322 run : `str` or `None` 

323 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

324 if it already exists, or `None` if it does not. 

325 """ 

326 butler, inputs, self = cls._makeReadParts(args) 

327 run = self.outputRun.name if args.extend_run else None 

328 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

329 return butler.registry, inputs, run 

330 

331 @classmethod 

332 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

333 """Return a read-write butler initialized to write to and read from 

334 the collections specified by the given command-line arguments. 

335 

336 Parameters 

337 ---------- 

338 args : `argparse.Namespace` 

339 Parsed command-line arguments. See class documentation for the 

340 construction parameter of the same name. 

341 

342 Returns 

343 ------- 

344 butler : `lsst.daf.butler.Butler` 

345 A read-write butler initialized according to the given arguments. 

346 """ 

347 butler = Butler(args.butler_config, writeable=True) 

348 self = cls(butler.registry, args, writeable=True) 

349 self.check(args) 

350 if self.output is not None: 

351 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

352 if args.replace_run: 

353 replaced = chainDefinition.pop(0) 

354 if args.prune_replaced == "unstore": 

355 # Remove datasets from datastore 

356 with butler.transaction(): 

357 refs = butler.registry.queryDatasets(..., collections=replaced) 

358 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

359 elif args.prune_replaced == "purge": 

360 # Erase entire collection and all datasets, need to remove 

361 # collection from its chain collection first. 

362 with butler.transaction(): 

363 butler.registry.setCollectionChain(self.output.name, chainDefinition) 

364 butler.pruneCollection(replaced, purge=True, unstore=True) 

365 elif args.prune_replaced is not None: 

366 raise NotImplementedError( 

367 f"Unsupported --prune-replaced option '{args.prune_replaced}'." 

368 ) 

369 chainDefinition.insert(0, self.outputRun.name) 

370 chainDefinition = CollectionSearch.fromExpression(chainDefinition) 

371 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

372 self.outputRun.name, self.output.name, chainDefinition) 

373 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name, 

374 chains={self.output.name: chainDefinition}) 

375 else: 

376 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs) 

377 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

378 return Butler(butler=butler, run=self.outputRun.name, collections=inputs) 

379 

380 output: Optional[_OutputChainedCollectionInfo] 

381 """Information about the output chained collection, if there is or will be 

382 one (`_OutputChainedCollectionInfo` or `None`). 

383 """ 

384 

385 outputRun: Optional[_OutputRunCollectionInfo] 

386 """Information about the output run collection, if there is or will be 

387 one (`_OutputRunCollectionInfo` or `None`). 

388 """ 

389 

390 inputs: List[Tuple[str, DatasetTypeRestriction]] 

391 """Input collections, including those also used for outputs and any 

392 restrictions on dataset types (`list`). 

393 """ 

394 

395 

396class _FilteredStream: 

397 """A file-like object that filters some config fields. 

398 

399 Note 

400 ---- 

401 This class depends on implementation details of ``Config.saveToStream`` 

402 methods, in particular that that method uses single call to write() 

403 method to save information about single config field, and that call 

404 combines comments string(s) for a field and field path and value. 

405 This class will not work reliably on the "import" strings, so imports 

406 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

407 """ 

408 def __init__(self, pattern): 

409 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

410 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

411 

412 if mat: 

413 pattern = mat.group(1) 

414 self._pattern = re.compile(fnmatch.translate(pattern)) 

415 else: 

416 if pattern != pattern.lower(): 

417 print(f"Matching \"{pattern}\" without regard to case " 

418 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

419 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

420 

421 def write(self, showStr): 

422 # Strip off doc string line(s) and cut off at "=" for string matching 

423 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

424 if self._pattern.search(matchStr): 

425 sys.stdout.write(showStr) 

426 

427# ------------------------ 

428# Exported definitions -- 

429# ------------------------ 

430 

431 

432class CmdLineFwk: 

433 """PipelineTask framework which executes tasks from command line. 

434 

435 In addition to executing tasks this activator provides additional methods 

436 for task management like dumping configuration or execution chain. 

437 """ 

438 

439 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

440 

441 def __init__(self): 

442 pass 

443 

444 @staticmethod 

445 def configLog(longlog, logLevels): 

446 """Configure logging system. 

447 

448 Parameters 

449 ---------- 

450 longlog : `bool` 

451 If True then make log messages appear in "long format" 

452 logLevels : `list` of `tuple` 

453 per-component logging levels, each item in the list is a tuple 

454 (component, level), `component` is a logger name or `None` for root 

455 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) 

456 """ 

457 if longlog: 

458 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" 

459 else: 

460 message_fmt = "%c %p: %m%n" 

461 

462 # Initialize global logging config. Skip if the env var LSST_LOG_CONFIG exists. 

463 # The file it points to would already configure lsst.log. 

464 if not os.path.isfile(os.environ.get("LSST_LOG_CONFIG", "")): 

465 lsst.log.configure_prop(_LOG_PROP.format(message_fmt)) 

466 

467 # Forward all Python logging to lsst.log 

468 lgr = logging.getLogger() 

469 lgr.setLevel(logging.INFO) # same as in log4cxx config above 

470 lgr.addHandler(lsst.log.LogHandler()) 

471 

472 # also capture warnings and send them to logging 

473 logging.captureWarnings(True) 

474 

475 # configure individual loggers 

476 for component, level in logLevels: 

477 level = getattr(lsst.log.Log, level.upper(), None) 

478 if level is not None: 

479 # set logging level for lsst.log 

480 logger = lsst.log.Log.getLogger(component or "") 

481 logger.setLevel(level) 

482 # set logging level for Python logging 

483 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) 

484 logging.getLogger(component).setLevel(pyLevel) 

485 

486 def makePipeline(self, args): 

487 """Build a pipeline from command line arguments. 

488 

489 Parameters 

490 ---------- 

491 args : `argparse.Namespace` 

492 Parsed command line 

493 

494 Returns 

495 ------- 

496 pipeline : `~lsst.pipe.base.Pipeline` 

497 """ 

498 if args.pipeline: 

499 pipeline = Pipeline.fromFile(args.pipeline) 

500 else: 

501 pipeline = Pipeline("anonymous") 

502 

503 # loop over all pipeline actions and apply them in order 

504 for action in args.pipeline_actions: 

505 if action.action == "add_instrument": 

506 

507 pipeline.addInstrument(action.value) 

508 

509 elif action.action == "new_task": 

510 

511 pipeline.addTask(action.value, action.label) 

512 

513 elif action.action == "delete_task": 

514 

515 pipeline.removeTask(action.label) 

516 

517 elif action.action == "config": 

518 

519 # action value string is "field=value", split it at '=' 

520 field, _, value = action.value.partition("=") 

521 pipeline.addConfigOverride(action.label, field, value) 

522 

523 elif action.action == "configfile": 

524 

525 pipeline.addConfigFile(action.label, action.value) 

526 

527 else: 

528 

529 raise ValueError(f"Unexpected pipeline action: {action.action}") 

530 

531 if args.save_pipeline: 

532 pipeline.toFile(args.save_pipeline) 

533 

534 if args.pipeline_dot: 

535 pipeline2dot(pipeline, args.pipeline_dot) 

536 

537 return pipeline 

538 

539 def makeGraph(self, pipeline, args): 

540 """Build a graph from command line arguments. 

541 

542 Parameters 

543 ---------- 

544 pipeline : `~lsst.pipe.base.Pipeline` 

545 Pipeline, can be empty or ``None`` if graph is read from a file. 

546 args : `argparse.Namespace` 

547 Parsed command line 

548 

549 Returns 

550 ------- 

551 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

552 If resulting graph is empty then `None` is returned. 

553 """ 

554 

555 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

556 

557 if args.qgraph: 

558 

559 with open(args.qgraph, 'rb') as pickleFile: 

560 qgraph = QuantumGraph.load(pickleFile, registry.dimensions) 

561 

562 # pipeline can not be provided in this case 

563 if pipeline: 

564 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

565 

566 else: 

567 

568 # make execution plan (a.k.a. DAG) for pipeline 

569 graphBuilder = GraphBuilder(registry, 

570 skipExisting=args.skip_existing) 

571 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query) 

572 

573 # count quanta in graph and give a warning if it's empty and return None 

574 nQuanta = len(qgraph) 

575 if nQuanta == 0: 

576 warnings.warn("QuantumGraph is empty", stacklevel=2) 

577 return None 

578 else: 

579 _LOG.info("QuantumGraph contains %d quanta for %d tasks", 

580 nQuanta, len(qgraph.taskGraph)) 

581 

582 if args.save_qgraph: 

583 with open(args.save_qgraph, "wb") as pickleFile: 

584 qgraph.save(pickleFile) 

585 

586 if args.save_single_quanta: 

587 for quantumNode in qgraph: 

588 sqgraph = qgraph.subset(quantumNode) 

589 filename = args.save_single_quanta.format(quantumNode.nodeId.number) 

590 with open(filename, "wb") as pickleFile: 

591 sqgraph.save(pickleFile) 

592 

593 if args.qgraph_dot: 

594 graph2dot(qgraph, args.qgraph_dot) 

595 

596 return qgraph 

597 

598 def runPipeline(self, graph, taskFactory, args, butler=None): 

599 """Execute complete QuantumGraph. 

600 

601 Parameters 

602 ---------- 

603 graph : `QuantumGraph` 

604 Execution graph. 

605 taskFactory : `~lsst.pipe.base.TaskFactory` 

606 Task factory 

607 args : `argparse.Namespace` 

608 Parsed command line 

609 butler : `~lsst.daf.butler.Butler`, optional 

610 Data Butler instance, if not defined then new instance is made 

611 using command line options. 

612 """ 

613 # make butler instance 

614 if butler is None: 

615 butler = _ButlerFactory.makeWriteButler(args) 

616 

617 # Enable lsstDebug debugging. Note that this is done once in the 

618 # main process before PreExecInit and it is also repeated before 

619 # running each task in SingleQuantumExecutor (which may not be 

620 # needed if `multipocessing` always uses fork start method). 

621 if args.enableLsstDebug: 

622 try: 

623 _LOG.debug("Will try to import debug.py") 

624 import debug # noqa:F401 

625 except ImportError: 

626 _LOG.warn("No 'debug' module found.") 

627 

628 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing) 

629 preExecInit.initialize(graph, 

630 saveInitOutputs=not args.skip_init_writes, 

631 registerDatasetTypes=args.register_dataset_types, 

632 saveVersions=not args.no_versions) 

633 

634 if not args.init_only: 

635 graphFixup = self._importGraphFixup(args) 

636 quantumExecutor = SingleQuantumExecutor(taskFactory, 

637 skipExisting=args.skip_existing, 

638 clobberPartialOutputs=args.clobber_partial_outputs, 

639 enableLsstDebug=args.enableLsstDebug) 

640 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

641 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout, 

642 quantumExecutor=quantumExecutor, 

643 failFast=args.fail_fast, 

644 executionGraphFixup=graphFixup) 

645 with util.profile(args.profile, _LOG): 

646 executor.execute(graph, butler) 

647 

648 def showInfo(self, args, pipeline, graph=None): 

649 """Display useful info about pipeline and environment. 

650 

651 Parameters 

652 ---------- 

653 args : `argparse.Namespace` 

654 Parsed command line 

655 pipeline : `Pipeline` 

656 Pipeline definition 

657 graph : `QuantumGraph`, optional 

658 Execution graph 

659 """ 

660 showOpts = args.show 

661 for what in showOpts: 

662 showCommand, _, showArgs = what.partition("=") 

663 

664 if showCommand in ["pipeline", "config", "history", "tasks"]: 

665 if not pipeline: 

666 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

667 continue 

668 

669 if showCommand in ["graph", "workflow", "uri"]: 

670 if not graph: 

671 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

672 continue 

673 

674 if showCommand == "pipeline": 

675 print(pipeline) 

676 elif showCommand == "config": 

677 self._showConfig(pipeline, showArgs, False) 

678 elif showCommand == "dump-config": 

679 self._showConfig(pipeline, showArgs, True) 

680 elif showCommand == "history": 

681 self._showConfigHistory(pipeline, showArgs) 

682 elif showCommand == "tasks": 

683 self._showTaskHierarchy(pipeline) 

684 elif showCommand == "graph": 

685 if graph: 

686 self._showGraph(graph) 

687 elif showCommand == "uri": 

688 if graph: 

689 self._showUri(graph, args) 

690 elif showCommand == "workflow": 

691 if graph: 

692 self._showWorkflow(graph, args) 

693 else: 

694 print("Unknown value for show: %s (choose from '%s')" % 

695 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

696 file=sys.stderr) 

697 sys.exit(1) 

698 

699 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

700 """Show task configuration 

701 

702 Parameters 

703 ---------- 

704 pipeline : `Pipeline` 

705 Pipeline definition 

706 showArgs : `str` 

707 Defines what to show 

708 dumpFullConfig : `bool` 

709 If true then dump complete task configuration with all imports. 

710 """ 

711 stream = sys.stdout 

712 if dumpFullConfig: 

713 # Task label can be given with this option 

714 taskName = showArgs 

715 else: 

716 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

717 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

718 taskName = matConfig.group(1) 

719 pattern = matConfig.group(2) 

720 if pattern: 

721 stream = _FilteredStream(pattern) 

722 

723 tasks = util.filterTasks(pipeline, taskName) 

724 if not tasks: 

725 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

726 sys.exit(1) 

727 

728 for taskDef in tasks: 

729 print("### Configuration for task `{}'".format(taskDef.label)) 

730 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

731 

732 def _showConfigHistory(self, pipeline, showArgs): 

733 """Show history for task configuration 

734 

735 Parameters 

736 ---------- 

737 pipeline : `Pipeline` 

738 Pipeline definition 

739 showArgs : `str` 

740 Defines what to show 

741 """ 

742 

743 taskName = None 

744 pattern = None 

745 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

746 if matHistory: 

747 taskName = matHistory.group(1) 

748 pattern = matHistory.group(2) 

749 if not pattern: 

750 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

751 sys.exit(1) 

752 

753 tasks = util.filterTasks(pipeline, taskName) 

754 if not tasks: 

755 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

756 sys.exit(1) 

757 

758 found = False 

759 for taskDef in tasks: 

760 

761 config = taskDef.config 

762 

763 # Look for any matches in the config hierarchy for this name 

764 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

765 if nmatch > 0: 

766 print("") 

767 

768 cpath, _, cname = thisName.rpartition(".") 

769 try: 

770 if not cpath: 

771 # looking for top-level field 

772 hconfig = taskDef.config 

773 else: 

774 hconfig = eval("config." + cpath, {}, {"config": config}) 

775 except AttributeError: 

776 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

777 file=sys.stderr) 

778 hconfig = None 

779 

780 # Sometimes we end up with a non-Config so skip those 

781 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \ 

782 hasattr(hconfig, cname): 

783 print(f"### Configuration field for task `{taskDef.label}'") 

784 print(pexConfig.history.format(hconfig, cname)) 

785 found = True 

786 

787 if not found: 

788 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

789 sys.exit(1) 

790 

791 def _showTaskHierarchy(self, pipeline): 

792 """Print task hierarchy to stdout 

793 

794 Parameters 

795 ---------- 

796 pipeline: `Pipeline` 

797 """ 

798 for taskDef in pipeline.toExpandedPipeline(): 

799 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

800 

801 for configName, taskName in util.subTaskIter(taskDef.config): 

802 print("{}: {}".format(configName, taskName)) 

803 

804 def _showGraph(self, graph): 

805 """Print quanta information to stdout 

806 

807 Parameters 

808 ---------- 

809 graph : `QuantumGraph` 

810 Execution graph. 

811 """ 

812 for taskNode in graph.taskGraph: 

813 print(taskNode) 

814 

815 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

816 print(" Quantum {}:".format(iq)) 

817 print(" inputs:") 

818 for key, refs in quantum.inputs.items(): 

819 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

820 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

821 print(" outputs:") 

822 for key, refs in quantum.outputs.items(): 

823 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

824 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

825 

826 def _showWorkflow(self, graph, args): 

827 """Print quanta information and dependency to stdout 

828 

829 Parameters 

830 ---------- 

831 graph : `QuantumGraph` 

832 Execution graph. 

833 args : `argparse.Namespace` 

834 Parsed command line 

835 """ 

836 for node in graph: 

837 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

838 for parent in graph.determineInputsToQuantumNode(node): 

839 print(f"Parent Quantum {parent.nodeId.number} - Child Quantum {node.nodeId.number}") 

840 

841 def _showUri(self, graph, args): 

842 """Print input and predicted output URIs to stdout 

843 

844 Parameters 

845 ---------- 

846 graph : `QuantumGraph` 

847 Execution graph 

848 args : `argparse.Namespace` 

849 Parsed command line 

850 """ 

851 def dumpURIs(thisRef): 

852 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

853 if primary: 

854 print(f" {primary}") 

855 else: 

856 print(" (disassembled artifact)") 

857 for compName, compUri in components.items(): 

858 print(f" {compName}: {compUri}") 

859 

860 butler = _ButlerFactory.makeReadButler(args) 

861 for node in graph: 

862 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

863 print(" inputs:") 

864 for key, refs in node.quantum.inputs.items(): 

865 for ref in refs: 

866 dumpURIs(ref) 

867 print(" outputs:") 

868 for key, refs in node.quantum.outputs.items(): 

869 for ref in refs: 

870 dumpURIs(ref) 

871 

872 def _importGraphFixup(self, args): 

873 """Import/instantiate graph fixup object. 

874 

875 Parameters 

876 ---------- 

877 args : `argparse.Namespace` 

878 Parsed command line. 

879 

880 Returns 

881 ------- 

882 fixup : `ExecutionGraphFixup` or `None` 

883 

884 Raises 

885 ------ 

886 ValueError 

887 Raised if import fails, method call raises exception, or returned 

888 instance has unexpected type. 

889 """ 

890 if args.graph_fixup: 

891 try: 

892 factory = doImport(args.graph_fixup) 

893 except Exception as exc: 

894 raise ValueError("Failed to import graph fixup class/method") from exc 

895 try: 

896 fixup = factory() 

897 except Exception as exc: 

898 raise ValueError("Failed to make instance of graph fixup") from exc 

899 if not isinstance(fixup, ExecutionGraphFixup): 

900 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

901 return fixup