Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import copy 

32import datetime 

33import fnmatch 

34import getpass 

35import logging 

36import re 

37import sys 

38from typing import Optional, Tuple 

39import warnings 

40 

41# ----------------------------- 

42# Imports for other modules -- 

43# ----------------------------- 

44from lsst.daf.butler import ( 

45 Butler, 

46 CollectionSearch, 

47 CollectionType, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

51import lsst.pex.config as pexConfig 

52from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph, buildExecutionButler 

53from lsst.obs.base import Instrument 

54from .dotTools import graph2dot, pipeline2dot 

55from .executionGraphFixup import ExecutionGraphFixup 

56from .mpGraphExecutor import MPGraphExecutor 

57from .preExecInit import PreExecInit 

58from .singleQuantumExecutor import SingleQuantumExecutor 

59from . import util 

60from lsst.utils import doImport 

61 

62# ---------------------------------- 

63# Local non-exported definitions -- 

64# ---------------------------------- 

65 

66_LOG = logging.getLogger(__name__.partition(".")[2]) 

67 

68 

69class _OutputChainedCollectionInfo: 

70 """A helper class for handling command-line arguments related to an output 

71 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

72 

73 Parameters 

74 ---------- 

75 registry : `lsst.daf.butler.Registry` 

76 Butler registry that collections will be added to and/or queried from. 

77 name : `str` 

78 Name of the collection given on the command line. 

79 """ 

80 def __init__(self, registry: Registry, name: str): 

81 self.name = name 

82 try: 

83 self.chain = tuple(registry.getCollectionChain(name)) 

84 self.exists = True 

85 except MissingCollectionError: 

86 self.chain = () 

87 self.exists = False 

88 

89 def __str__(self): 

90 return self.name 

91 

92 name: str 

93 """Name of the collection provided on the command line (`str`). 

94 """ 

95 

96 exists: bool 

97 """Whether this collection already exists in the registry (`bool`). 

98 """ 

99 

100 chain: Tuple[str, ...] 

101 """The definition of the collection, if it already exists (`tuple`[`str`]). 

102 

103 Empty if the collection does not already exist. 

104 """ 

105 

106 

107class _OutputRunCollectionInfo: 

108 """A helper class for handling command-line arguments related to an output 

109 `~lsst.daf.butler.CollectionType.RUN` collection. 

110 

111 Parameters 

112 ---------- 

113 registry : `lsst.daf.butler.Registry` 

114 Butler registry that collections will be added to and/or queried from. 

115 name : `str` 

116 Name of the collection given on the command line. 

117 """ 

118 def __init__(self, registry: Registry, name: str): 

119 self.name = name 

120 try: 

121 actualType = registry.getCollectionType(name) 

122 if actualType is not CollectionType.RUN: 

123 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

124 self.exists = True 

125 except MissingCollectionError: 

126 self.exists = False 

127 

128 name: str 

129 """Name of the collection provided on the command line (`str`). 

130 """ 

131 

132 exists: bool 

133 """Whether this collection already exists in the registry (`bool`). 

134 """ 

135 

136 

137class _ButlerFactory: 

138 """A helper class for processing command-line arguments related to input 

139 and output collections. 

140 

141 Parameters 

142 ---------- 

143 registry : `lsst.daf.butler.Registry` 

144 Butler registry that collections will be added to and/or queried from. 

145 

146 args : `argparse.Namespace` 

147 Parsed command-line arguments. The following attributes are used, 

148 either at construction or in later methods. 

149 

150 ``output`` 

151 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

152 input/output collection. 

153 

154 ``output_run`` 

155 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

156 collection. 

157 

158 ``extend_run`` 

159 A boolean indicating whether ``output_run`` should already exist 

160 and be extended. 

161 

162 ``replace_run`` 

163 A boolean indicating that (if `True`) ``output_run`` should already 

164 exist but will be removed from the output chained collection and 

165 replaced with a new one. 

166 

167 ``prune_replaced`` 

168 A boolean indicating whether to prune the replaced run (requires 

169 ``replace_run``). 

170 

171 ``inputs`` 

172 Input collections of any type; may be any type handled by 

173 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

174 

175 ``butler_config`` 

176 Path to a data repository root or configuration file. 

177 

178 writeable : `bool` 

179 If `True`, a `Butler` is being initialized in a context where actual 

180 writes should happens, and hence no output run is necessary. 

181 

182 Raises 

183 ------ 

184 ValueError 

185 Raised if ``writeable is True`` but there are no output collections. 

186 """ 

187 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

188 if args.output is not None: 

189 self.output = _OutputChainedCollectionInfo(registry, args.output) 

190 else: 

191 self.output = None 

192 if args.output_run is not None: 

193 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

194 elif self.output is not None: 

195 if args.extend_run: 

196 runName = self.output.chain[0] 

197 else: 

198 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

199 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

200 elif not writeable: 

201 # If we're not writing yet, ok to have no output run. 

202 self.outputRun = None 

203 else: 

204 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

205 # Recursively flatten any input CHAINED collections. We do this up 

206 # front so we can tell if the user passes the same inputs on subsequent 

207 # calls, even though we also flatten when we define the output CHAINED 

208 # collection. 

209 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

210 

211 def check(self, args: argparse.Namespace): 

212 """Check command-line options for consistency with each other and the 

213 data repository. 

214 

215 Parameters 

216 ---------- 

217 args : `argparse.Namespace` 

218 Parsed command-line arguments. See class documentation for the 

219 construction parameter of the same name. 

220 """ 

221 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

222 if self.inputs and self.output is not None and self.output.exists: 

223 # Passing the same inputs that were used to initialize the output 

224 # collection is allowed; this means they must _end_ with the same 

225 # collections, because we push new runs to the front of the chain. 

226 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

227 if c1 != c2: 

228 raise ValueError( 

229 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

230 "a different sequence of input collections than those given: " 

231 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

232 f"{self.output.name}={self.output.chain}." 

233 ) 

234 if len(self.inputs) > len(self.output.chain): 

235 nNew = len(self.inputs) - len(self.output.chain) 

236 raise ValueError( 

237 f"Cannot add new input collections {self.inputs[:nNew]} after " 

238 "output collection is first created." 

239 ) 

240 if args.extend_run and self.outputRun is None: 

241 raise ValueError("Cannot --extend-run when no output collection is given.") 

242 if args.extend_run and not self.outputRun.exists: 

243 raise ValueError(f"Cannot --extend-run; output collection " 

244 f"'{self.outputRun.name}' does not exist.") 

245 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

246 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

247 f"--extend-run was not given.") 

248 if args.prune_replaced and not args.replace_run: 

249 raise ValueError("--prune-replaced requires --replace-run.") 

250 if args.replace_run and (self.output is None or not self.output.exists): 

251 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

252 

253 @classmethod 

254 def _makeReadParts(cls, args: argparse.Namespace): 

255 """Common implementation for `makeReadButler` and 

256 `makeRegistryAndCollections`. 

257 

258 Parameters 

259 ---------- 

260 args : `argparse.Namespace` 

261 Parsed command-line arguments. See class documentation for the 

262 construction parameter of the same name. 

263 

264 Returns 

265 ------- 

266 butler : `lsst.daf.butler.Butler` 

267 A read-only butler constructed from the repo at 

268 ``args.butler_config``, but with no default collections. 

269 inputs : `lsst.daf.butler.registry.CollectionSearch` 

270 A collection search path constructed according to ``args``. 

271 self : `_ButlerFactory` 

272 A new `_ButlerFactory` instance representing the processed version 

273 of ``args``. 

274 """ 

275 butler = Butler(args.butler_config, writeable=False) 

276 self = cls(butler.registry, args, writeable=False) 

277 self.check(args) 

278 if self.output and self.output.exists: 

279 if args.replace_run: 

280 replaced = self.output.chain[0] 

281 inputs = self.output.chain[1:] 

282 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

283 self.output.name, replaced) 

284 else: 

285 inputs = [self.output.name] 

286 else: 

287 inputs = list(self.inputs) 

288 if args.extend_run: 

289 inputs.insert(0, self.outputRun.name) 

290 inputs = CollectionSearch.fromExpression(inputs) 

291 return butler, inputs, self 

292 

293 @classmethod 

294 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

295 """Construct a read-only butler according to the given command-line 

296 arguments. 

297 

298 Parameters 

299 ---------- 

300 args : `argparse.Namespace` 

301 Parsed command-line arguments. See class documentation for the 

302 construction parameter of the same name. 

303 

304 Returns 

305 ------- 

306 butler : `lsst.daf.butler.Butler` 

307 A read-only butler initialized with the collections specified by 

308 ``args``. 

309 """ 

310 butler, inputs, _ = cls._makeReadParts(args) 

311 _LOG.debug("Preparing butler to read from %s.", inputs) 

312 return Butler(butler=butler, collections=inputs) 

313 

314 @classmethod 

315 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \ 

316 Tuple[Registry, CollectionSearch, Optional[str]]: 

317 """Return a read-only registry, a collection search path, and the name 

318 of the run to be used for future writes. 

319 

320 Parameters 

321 ---------- 

322 args : `argparse.Namespace` 

323 Parsed command-line arguments. See class documentation for the 

324 construction parameter of the same name. 

325 

326 Returns 

327 ------- 

328 registry : `lsst.daf.butler.Registry` 

329 Butler registry that collections will be added to and/or queried 

330 from. 

331 inputs : `lsst.daf.butler.registry.CollectionSearch` 

332 Collections to search for datasets. 

333 run : `str` or `None` 

334 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

335 if it already exists, or `None` if it does not. 

336 """ 

337 butler, inputs, self = cls._makeReadParts(args) 

338 run = self.outputRun.name if args.extend_run else None 

339 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

340 return butler.registry, inputs, run 

341 

342 @classmethod 

343 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

344 """Return a read-write butler initialized to write to and read from 

345 the collections specified by the given command-line arguments. 

346 

347 Parameters 

348 ---------- 

349 args : `argparse.Namespace` 

350 Parsed command-line arguments. See class documentation for the 

351 construction parameter of the same name. 

352 

353 Returns 

354 ------- 

355 butler : `lsst.daf.butler.Butler` 

356 A read-write butler initialized according to the given arguments. 

357 """ 

358 butler = Butler(args.butler_config, writeable=True) 

359 self = cls(butler.registry, args, writeable=True) 

360 self.check(args) 

361 if self.output is not None: 

362 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

363 if args.replace_run: 

364 replaced = chainDefinition.pop(0) 

365 if args.prune_replaced == "unstore": 

366 # Remove datasets from datastore 

367 with butler.transaction(): 

368 refs = butler.registry.queryDatasets(..., collections=replaced) 

369 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

370 elif args.prune_replaced == "purge": 

371 # Erase entire collection and all datasets, need to remove 

372 # collection from its chain collection first. 

373 with butler.transaction(): 

374 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

375 butler.pruneCollection(replaced, purge=True, unstore=True) 

376 elif args.prune_replaced is not None: 

377 raise NotImplementedError( 

378 f"Unsupported --prune-replaced option '{args.prune_replaced}'." 

379 ) 

380 if not self.output.exists: 

381 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

382 if not args.extend_run: 

383 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

384 chainDefinition.insert(0, self.outputRun.name) 

385 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

386 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

387 self.outputRun.name, self.output.name, chainDefinition) 

388 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

389 else: 

390 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

391 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

392 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

393 return butler 

394 

395 output: Optional[_OutputChainedCollectionInfo] 

396 """Information about the output chained collection, if there is or will be 

397 one (`_OutputChainedCollectionInfo` or `None`). 

398 """ 

399 

400 outputRun: Optional[_OutputRunCollectionInfo] 

401 """Information about the output run collection, if there is or will be 

402 one (`_OutputRunCollectionInfo` or `None`). 

403 """ 

404 

405 inputs: Tuple[str, ...] 

406 """Input collections provided directly by the user (`tuple` [ `str` ]). 

407 """ 

408 

409 

410class _FilteredStream: 

411 """A file-like object that filters some config fields. 

412 

413 Note 

414 ---- 

415 This class depends on implementation details of ``Config.saveToStream`` 

416 methods, in particular that that method uses single call to write() 

417 method to save information about single config field, and that call 

418 combines comments string(s) for a field and field path and value. 

419 This class will not work reliably on the "import" strings, so imports 

420 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

421 """ 

422 def __init__(self, pattern): 

423 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

424 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

425 

426 if mat: 

427 pattern = mat.group(1) 

428 self._pattern = re.compile(fnmatch.translate(pattern)) 

429 else: 

430 if pattern != pattern.lower(): 

431 print(f"Matching \"{pattern}\" without regard to case " 

432 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

433 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

434 

435 def write(self, showStr): 

436 # Strip off doc string line(s) and cut off at "=" for string matching 

437 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

438 if self._pattern.search(matchStr): 

439 sys.stdout.write(showStr) 

440 

441# ------------------------ 

442# Exported definitions -- 

443# ------------------------ 

444 

445 

446class CmdLineFwk: 

447 """PipelineTask framework which executes tasks from command line. 

448 

449 In addition to executing tasks this activator provides additional methods 

450 for task management like dumping configuration or execution chain. 

451 """ 

452 

453 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

454 

455 def __init__(self): 

456 pass 

457 

458 def makePipeline(self, args): 

459 """Build a pipeline from command line arguments. 

460 

461 Parameters 

462 ---------- 

463 args : `argparse.Namespace` 

464 Parsed command line 

465 

466 Returns 

467 ------- 

468 pipeline : `~lsst.pipe.base.Pipeline` 

469 """ 

470 if args.pipeline: 

471 pipeline = Pipeline.from_uri(args.pipeline) 

472 else: 

473 pipeline = Pipeline("anonymous") 

474 

475 # loop over all pipeline actions and apply them in order 

476 for action in args.pipeline_actions: 

477 if action.action == "add_instrument": 

478 

479 pipeline.addInstrument(action.value) 

480 

481 elif action.action == "new_task": 

482 

483 pipeline.addTask(action.value, action.label) 

484 

485 elif action.action == "delete_task": 

486 

487 pipeline.removeTask(action.label) 

488 

489 elif action.action == "config": 

490 

491 # action value string is "field=value", split it at '=' 

492 field, _, value = action.value.partition("=") 

493 pipeline.addConfigOverride(action.label, field, value) 

494 

495 elif action.action == "configfile": 

496 

497 pipeline.addConfigFile(action.label, action.value) 

498 

499 else: 

500 

501 raise ValueError(f"Unexpected pipeline action: {action.action}") 

502 

503 if args.save_pipeline: 

504 pipeline.write_to_uri(args.save_pipeline) 

505 

506 if args.pipeline_dot: 

507 pipeline2dot(pipeline, args.pipeline_dot) 

508 

509 return pipeline 

510 

511 def makeGraph(self, pipeline, args): 

512 """Build a graph from command line arguments. 

513 

514 Parameters 

515 ---------- 

516 pipeline : `~lsst.pipe.base.Pipeline` 

517 Pipeline, can be empty or ``None`` if graph is read from a file. 

518 args : `argparse.Namespace` 

519 Parsed command line 

520 

521 Returns 

522 ------- 

523 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

524 If resulting graph is empty then `None` is returned. 

525 """ 

526 

527 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

528 

529 if args.qgraph: 

530 # click passes empty tuple as default value for qgraph_node_id 

531 nodes = args.qgraph_node_id or None 

532 qgraph = QuantumGraph.loadUri(args.qgraph, registry.dimensions, 

533 nodes=nodes, graphID=args.qgraph_id) 

534 

535 # pipeline can not be provided in this case 

536 if pipeline: 

537 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

538 

539 else: 

540 

541 # make execution plan (a.k.a. DAG) for pipeline 

542 graphBuilder = GraphBuilder(registry, 

543 skipExisting=args.skip_existing, 

544 clobberOutputs=args.clobber_outputs) 

545 # accumulate metadata 

546 metadata = {"input": args.input, "output": args.output, "butler_argument": args.butler_config, 

547 "output_run": args.output_run, "extend_run": args.extend_run, 

548 "skip_existing": args.skip_existing, "data_query": args.data_query, 

549 "user": getpass.getuser(), "time": f"{datetime.datetime.now()}"} 

550 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query, metadata=metadata) 

551 

552 # Count quanta in graph and give a warning if it's empty and return 

553 # None. 

554 nQuanta = len(qgraph) 

555 if nQuanta == 0: 

556 warnings.warn("QuantumGraph is empty", stacklevel=2) 

557 return None 

558 else: 

559 _LOG.info("QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

560 nQuanta, len(qgraph.taskGraph), qgraph.graphID) 

561 

562 if args.save_qgraph: 

563 qgraph.saveUri(args.save_qgraph) 

564 

565 if args.save_single_quanta: 

566 for quantumNode in qgraph: 

567 sqgraph = qgraph.subset(quantumNode) 

568 uri = args.save_single_quanta.format(quantumNode.nodeId.number) 

569 sqgraph.saveUri(uri) 

570 

571 if args.qgraph_dot: 

572 graph2dot(qgraph, args.qgraph_dot) 

573 

574 if args.execution_butler_location: 

575 butler = Butler(args.butler_config) 

576 newArgs = copy.deepcopy(args) 

577 

578 def builderShim(butler): 

579 newArgs.butler_config = butler._config 

580 # Calling makeWriteButler is done for the side effects of 

581 # calling that method, maining parsing all the args into 

582 # collection names, creating collections, etc. 

583 newButler = _ButlerFactory.makeWriteButler(newArgs) 

584 return newButler 

585 

586 buildExecutionButler(butler, qgraph, args.execution_butler_location, run, 

587 butlerModifier=builderShim, collections=args.input, 

588 clobber=args.clobber_execution_butler) 

589 

590 return qgraph 

591 

592 def runPipeline(self, graph, taskFactory, args, butler=None): 

593 """Execute complete QuantumGraph. 

594 

595 Parameters 

596 ---------- 

597 graph : `QuantumGraph` 

598 Execution graph. 

599 taskFactory : `~lsst.pipe.base.TaskFactory` 

600 Task factory 

601 args : `argparse.Namespace` 

602 Parsed command line 

603 butler : `~lsst.daf.butler.Butler`, optional 

604 Data Butler instance, if not defined then new instance is made 

605 using command line options. 

606 """ 

607 # make butler instance 

608 if butler is None: 

609 butler = _ButlerFactory.makeWriteButler(args) 

610 

611 # Enable lsstDebug debugging. Note that this is done once in the 

612 # main process before PreExecInit and it is also repeated before 

613 # running each task in SingleQuantumExecutor (which may not be 

614 # needed if `multipocessing` always uses fork start method). 

615 if args.enableLsstDebug: 

616 try: 

617 _LOG.debug("Will try to import debug.py") 

618 import debug # noqa:F401 

619 except ImportError: 

620 _LOG.warn("No 'debug' module found.") 

621 

622 # --skip-existing should have no effect unless --extend-run is passed 

623 # so we make PreExecInit's skipExisting depend on the latter as well. 

624 preExecInit = PreExecInit(butler, taskFactory, skipExisting=(args.skip_existing and args.extend_run)) 

625 preExecInit.initialize(graph, 

626 saveInitOutputs=not args.skip_init_writes, 

627 registerDatasetTypes=args.register_dataset_types, 

628 saveVersions=not args.no_versions) 

629 

630 if not args.init_only: 

631 graphFixup = self._importGraphFixup(args) 

632 quantumExecutor = SingleQuantumExecutor(taskFactory, 

633 skipExisting=args.skip_existing, 

634 clobberOutputs=args.clobber_outputs, 

635 enableLsstDebug=args.enableLsstDebug, 

636 exitOnKnownError=args.fail_fast) 

637 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

638 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout, 

639 startMethod=args.start_method, 

640 quantumExecutor=quantumExecutor, 

641 failFast=args.fail_fast, 

642 executionGraphFixup=graphFixup) 

643 with util.profile(args.profile, _LOG): 

644 executor.execute(graph, butler) 

645 

646 def showInfo(self, args, pipeline, graph=None): 

647 """Display useful info about pipeline and environment. 

648 

649 Parameters 

650 ---------- 

651 args : `argparse.Namespace` 

652 Parsed command line 

653 pipeline : `Pipeline` 

654 Pipeline definition 

655 graph : `QuantumGraph`, optional 

656 Execution graph 

657 """ 

658 showOpts = args.show 

659 for what in showOpts: 

660 showCommand, _, showArgs = what.partition("=") 

661 

662 if showCommand in ["pipeline", "config", "history", "tasks"]: 

663 if not pipeline: 

664 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

665 continue 

666 

667 if showCommand in ["graph", "workflow", "uri"]: 

668 if not graph: 

669 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

670 continue 

671 

672 if showCommand == "pipeline": 

673 print(pipeline) 

674 elif showCommand == "config": 

675 self._showConfig(pipeline, showArgs, False) 

676 elif showCommand == "dump-config": 

677 self._showConfig(pipeline, showArgs, True) 

678 elif showCommand == "history": 

679 self._showConfigHistory(pipeline, showArgs) 

680 elif showCommand == "tasks": 

681 self._showTaskHierarchy(pipeline) 

682 elif showCommand == "graph": 

683 if graph: 

684 self._showGraph(graph) 

685 elif showCommand == "uri": 

686 if graph: 

687 self._showUri(graph, args) 

688 elif showCommand == "workflow": 

689 if graph: 

690 self._showWorkflow(graph, args) 

691 else: 

692 print("Unknown value for show: %s (choose from '%s')" % 

693 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

694 file=sys.stderr) 

695 sys.exit(1) 

696 

697 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

698 """Show task configuration 

699 

700 Parameters 

701 ---------- 

702 pipeline : `Pipeline` 

703 Pipeline definition 

704 showArgs : `str` 

705 Defines what to show 

706 dumpFullConfig : `bool` 

707 If true then dump complete task configuration with all imports. 

708 """ 

709 stream = sys.stdout 

710 if dumpFullConfig: 

711 # Task label can be given with this option 

712 taskName = showArgs 

713 else: 

714 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

715 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

716 taskName = matConfig.group(1) 

717 pattern = matConfig.group(2) 

718 if pattern: 

719 stream = _FilteredStream(pattern) 

720 

721 tasks = util.filterTasks(pipeline, taskName) 

722 if not tasks: 

723 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

724 sys.exit(1) 

725 

726 for taskDef in tasks: 

727 print("### Configuration for task `{}'".format(taskDef.label)) 

728 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

729 

730 def _showConfigHistory(self, pipeline, showArgs): 

731 """Show history for task configuration 

732 

733 Parameters 

734 ---------- 

735 pipeline : `Pipeline` 

736 Pipeline definition 

737 showArgs : `str` 

738 Defines what to show 

739 """ 

740 

741 taskName = None 

742 pattern = None 

743 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

744 if matHistory: 

745 taskName = matHistory.group(1) 

746 pattern = matHistory.group(2) 

747 if not pattern: 

748 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

749 sys.exit(1) 

750 

751 tasks = util.filterTasks(pipeline, taskName) 

752 if not tasks: 

753 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

754 sys.exit(1) 

755 

756 found = False 

757 for taskDef in tasks: 

758 

759 config = taskDef.config 

760 

761 # Look for any matches in the config hierarchy for this name 

762 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

763 if nmatch > 0: 

764 print("") 

765 

766 cpath, _, cname = thisName.rpartition(".") 

767 try: 

768 if not cpath: 

769 # looking for top-level field 

770 hconfig = taskDef.config 

771 else: 

772 hconfig = eval("config." + cpath, {}, {"config": config}) 

773 except AttributeError: 

774 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

775 file=sys.stderr) 

776 hconfig = None 

777 

778 # Sometimes we end up with a non-Config so skip those 

779 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \ 

780 hasattr(hconfig, cname): 

781 print(f"### Configuration field for task `{taskDef.label}'") 

782 print(pexConfig.history.format(hconfig, cname)) 

783 found = True 

784 

785 if not found: 

786 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

787 sys.exit(1) 

788 

789 def _showTaskHierarchy(self, pipeline): 

790 """Print task hierarchy to stdout 

791 

792 Parameters 

793 ---------- 

794 pipeline: `Pipeline` 

795 """ 

796 for taskDef in pipeline.toExpandedPipeline(): 

797 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

798 

799 for configName, taskName in util.subTaskIter(taskDef.config): 

800 print("{}: {}".format(configName, taskName)) 

801 

802 def _showGraph(self, graph): 

803 """Print quanta information to stdout 

804 

805 Parameters 

806 ---------- 

807 graph : `QuantumGraph` 

808 Execution graph. 

809 """ 

810 for taskNode in graph.taskGraph: 

811 print(taskNode) 

812 

813 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

814 print(" Quantum {}:".format(iq)) 

815 print(" inputs:") 

816 for key, refs in quantum.inputs.items(): 

817 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

818 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

819 print(" outputs:") 

820 for key, refs in quantum.outputs.items(): 

821 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

822 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

823 

824 def _showWorkflow(self, graph, args): 

825 """Print quanta information and dependency to stdout 

826 

827 Parameters 

828 ---------- 

829 graph : `QuantumGraph` 

830 Execution graph. 

831 args : `argparse.Namespace` 

832 Parsed command line 

833 """ 

834 for node in graph: 

835 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

836 for parent in graph.determineInputsToQuantumNode(node): 

837 print(f"Parent Quantum {parent.nodeId.number} - Child Quantum {node.nodeId.number}") 

838 

839 def _showUri(self, graph, args): 

840 """Print input and predicted output URIs to stdout 

841 

842 Parameters 

843 ---------- 

844 graph : `QuantumGraph` 

845 Execution graph 

846 args : `argparse.Namespace` 

847 Parsed command line 

848 """ 

849 def dumpURIs(thisRef): 

850 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

851 if primary: 

852 print(f" {primary}") 

853 else: 

854 print(" (disassembled artifact)") 

855 for compName, compUri in components.items(): 

856 print(f" {compName}: {compUri}") 

857 

858 butler = _ButlerFactory.makeReadButler(args) 

859 for node in graph: 

860 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

861 print(" inputs:") 

862 for key, refs in node.quantum.inputs.items(): 

863 for ref in refs: 

864 dumpURIs(ref) 

865 print(" outputs:") 

866 for key, refs in node.quantum.outputs.items(): 

867 for ref in refs: 

868 dumpURIs(ref) 

869 

870 def _importGraphFixup(self, args): 

871 """Import/instantiate graph fixup object. 

872 

873 Parameters 

874 ---------- 

875 args : `argparse.Namespace` 

876 Parsed command line. 

877 

878 Returns 

879 ------- 

880 fixup : `ExecutionGraphFixup` or `None` 

881 

882 Raises 

883 ------ 

884 ValueError 

885 Raised if import fails, method call raises exception, or returned 

886 instance has unexpected type. 

887 """ 

888 if args.graph_fixup: 

889 try: 

890 factory = doImport(args.graph_fixup) 

891 except Exception as exc: 

892 raise ValueError("Failed to import graph fixup class/method") from exc 

893 try: 

894 fixup = factory() 

895 except Exception as exc: 

896 raise ValueError("Failed to make instance of graph fixup") from exc 

897 if not isinstance(fixup, ExecutionGraphFixup): 

898 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

899 return fixup