Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import datetime 

32import fnmatch 

33import logging 

34import pickle 

35import re 

36import sys 

37from typing import List, Optional, Tuple 

38import warnings 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from lsst.daf.butler import ( 

44 Butler, 

45 CollectionSearch, 

46 CollectionType, 

47 DatasetRef, 

48 DatasetTypeRestriction, 

49 Registry, 

50) 

51from lsst.daf.butler.registry import MissingCollectionError 

52import lsst.log 

53import lsst.pex.config as pexConfig 

54from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph 

55from .cmdLineParser import makeParser 

56from .dotTools import graph2dot, pipeline2dot 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .taskFactory import TaskFactory 

60from . import util 

61 

62# ---------------------------------- 

63# Local non-exported definitions -- 

64# ---------------------------------- 

65 

66# logging properties 

67_LOG_PROP = """\ 

68log4j.rootLogger=INFO, A1 

69log4j.appender.A1=ConsoleAppender 

70log4j.appender.A1.Target=System.err 

71log4j.appender.A1.layout=PatternLayout 

72log4j.appender.A1.layout.ConversionPattern={} 

73""" 

74 

75_LOG = logging.getLogger(__name__.partition(".")[2]) 

76 

77 

78class _OutputChainedCollectionInfo: 

79 """A helper class for handling command-line arguments related to an output 

80 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

81 

82 Parameters 

83 ---------- 

84 registry : `lsst.daf.butler.Registry` 

85 Butler registry that collections will be added to and/or queried from. 

86 name : `str` 

87 Name of the collection given on the command line. 

88 """ 

89 def __init__(self, registry: Registry, name: str): 

90 self.name = name 

91 try: 

92 self.chain = list(registry.getCollectionChain(name)) 

93 self.exists = True 

94 except MissingCollectionError: 

95 self.chain = [] 

96 self.exists = False 

97 

98 def __str__(self): 

99 return self.name 

100 

101 name: str 

102 """Name of the collection provided on the command line (`str`). 

103 """ 

104 

105 exists: bool 

106 """Whether this collection already exists in the registry (`bool`). 

107 """ 

108 

109 chain: List[Tuple[str, DatasetTypeRestriction]] 

110 """The definition of the collection, if it already exists (`list`). 

111 

112 Empty if the collection does not alredy exist. 

113 """ 

114 

115 

116class _OutputRunCollectionInfo: 

117 """A helper class for handling command-line arguments related to an output 

118 `~lsst.daf.butler.CollectionType.RUN` collection. 

119 

120 Parameters 

121 ---------- 

122 registry : `lsst.daf.butler.Registry` 

123 Butler registry that collections will be added to and/or queried from. 

124 name : `str` 

125 Name of the collection given on the command line. 

126 """ 

127 def __init__(self, registry: Registry, name: str): 

128 self.name = name 

129 try: 

130 actualType = registry.getCollectionType(name) 

131 if actualType is not CollectionType.RUN: 

132 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

133 self.exists = True 

134 except MissingCollectionError: 

135 self.exists = False 

136 

137 name: str 

138 """Name of the collection provided on the command line (`str`). 

139 """ 

140 

141 exists: bool 

142 """Whether this collection already exists in the registry (`bool`). 

143 """ 

144 

145 

146class _ButlerFactory: 

147 """A helper class for processing command-line arguments related to input 

148 and output collections. 

149 

150 Parameters 

151 ---------- 

152 registry : `lsst.daf.butler.Registry` 

153 Butler registry that collections will be added to and/or queried from. 

154 

155 args : `argparse.Namespace` 

156 Parsed command-line arguments. The following attributes are used, 

157 either at construction or in later methods. 

158 

159 ``output`` 

160 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

161 input/output collection. 

162 

163 ``output_run`` 

164 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

165 collection. 

166 

167 ``extend_run`` 

168 A boolean indicating whether ``output_run`` should already exist 

169 and be extended. 

170 

171 ``replace_run`` 

172 A boolean indicating that (if `True`) ``output_run`` should already 

173 exist but will be removed from the output chained collection and 

174 replaced with a new one. 

175 

176 ``prune_replaced`` 

177 A boolean indicating whether to prune the replaced run (requires 

178 ``replace_run``). 

179 

180 ``inputs`` 

181 Input collections of any type; may be any type handled by 

182 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

183 

184 ``butler_config`` 

185 Path to a data repository root or configuration file. 

186 

187 writeable : `bool` 

188 If `True`, a `Butler` is being initialized in a context where actual 

189 writes should happens, and hence no output run is necessary. 

190 

191 Raises 

192 ------ 

193 ValueError 

194 Raised if ``writeable is True`` but there are no output collections. 

195 """ 

196 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

197 if args.output is not None: 

198 self.output = _OutputChainedCollectionInfo(registry, args.output) 

199 else: 

200 self.output = None 

201 if args.output_run is not None: 

202 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

203 elif self.output is not None: 

204 if args.extend_run: 

205 runName, _ = self.output.chain[0] 

206 else: 

207 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now()) 

208 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

209 elif not writeable: 

210 # If we're not writing yet, ok to have no output run. 

211 self.outputRun = None 

212 else: 

213 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

214 self.inputs = list(CollectionSearch.fromExpression(args.input)) 

215 

216 def check(self, args: argparse.Namespace): 

217 """Check command-line options for consistency with each other and the 

218 data repository. 

219 

220 Parameters 

221 ---------- 

222 args : `argparse.Namespace` 

223 Parsed command-line arguments. See class documentation for the 

224 construction parameter of the same name. 

225 """ 

226 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

227 if self.inputs and self.output is not None and self.output.exists: 

228 raise ValueError("Cannot use --output with existing collection with --inputs.") 

229 if args.extend_run and self.outputRun is None: 

230 raise ValueError("Cannot --extend-run when no output collection is given.") 

231 if args.extend_run and not self.outputRun.exists: 

232 raise ValueError(f"Cannot --extend-run; output collection " 

233 f"'{self.outputRun.name}' does not exist.") 

234 if not args.extend_run and self.outputRun.exists: 

235 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

236 f"--extend-run was not given.") 

237 if args.prune_replaced and not args.replace_run: 

238 raise ValueError(f"--prune-replaced requires --replace-run.") 

239 if args.replace_run and (self.output is None or not self.output.exists): 

240 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.") 

241 

242 @classmethod 

243 def _makeReadParts(cls, args: argparse.Namespace): 

244 """Common implementation for `makeReadButler` and 

245 `makeRegistryAndCollections`. 

246 

247 Parameters 

248 ---------- 

249 args : `argparse.Namespace` 

250 Parsed command-line arguments. See class documentation for the 

251 construction parameter of the same name. 

252 

253 Returns 

254 ------- 

255 butler : `lsst.daf.butler.Butler` 

256 A read-only butler constructed from the repo at 

257 ``args.butler_config``, but with no default collections. 

258 inputs : `lsst.daf.butler.registry.CollectionSearch` 

259 A collection search path constructed according to ``args``. 

260 self : `_ButlerFactory` 

261 A new `_ButlerFactory` instance representing the processed version 

262 of ``args``. 

263 """ 

264 butler = Butler(args.butler_config, writeable=False) 

265 self = cls(butler.registry, args, writeable=False) 

266 self.check(args) 

267 if self.output and self.output.exists: 

268 if args.replace_run: 

269 replaced, _ = self.output.chain[0] 

270 inputs = self.output.chain[1:] 

271 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

272 self.output.name, replaced) 

273 else: 

274 inputs = [self.output.name] 

275 else: 

276 inputs = list(self.inputs) 

277 if args.extend_run: 

278 inputs.insert(0, self.outputRun.name) 

279 inputs = CollectionSearch.fromExpression(inputs) 

280 return butler, inputs, self 

281 

282 @classmethod 

283 def makeReadButler(cls, args: argparse.Namespace): 

284 """Construct a read-only butler according to the given command-line 

285 arguments. 

286 

287 Parameters 

288 ---------- 

289 args : `argparse.Namespace` 

290 Parsed command-line arguments. See class documentation for the 

291 construction parameter of the same name. 

292 

293 Returns 

294 ------- 

295 butler : `lsst.daf.butler.Butler` 

296 A read-only butler initialized with the collections specified by 

297 ``args``. 

298 """ 

299 butler, inputs, _ = cls._makeReadParts(args) 

300 _LOG.debug("Preparing butler to read from %s.", inputs) 

301 return Butler(butler=butler, collections=inputs) 

302 

303 @classmethod 

304 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch: 

305 """Return a read-only registry, a collection search path, and the name 

306 of the run to be used for future writes. 

307 

308 Parameters 

309 ---------- 

310 args : `argparse.Namespace` 

311 Parsed command-line arguments. See class documentation for the 

312 construction parameter of the same name. 

313 

314 Returns 

315 ------- 

316 registry : `lsst.daf.butler.Registry` 

317 Butler registry that collections will be added to and/or queried 

318 from. 

319 inputs : `lsst.daf.butler.registry.CollectionSearch` 

320 Collections to search for datasets. 

321 run : `str` or `None` 

322 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

323 if it already exists, or `None` if it does not. 

324 """ 

325 butler, inputs, self = cls._makeReadParts(args) 

326 run = self.outputRun.name if args.extend_run else None 

327 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

328 return butler.registry, inputs, run 

329 

330 @classmethod 

331 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

332 """Return a read-write butler initialized to write to and read from 

333 the collections specified by the given command-line arguments. 

334 

335 Parameters 

336 ---------- 

337 args : `argparse.Namespace` 

338 Parsed command-line arguments. See class documentation for the 

339 construction parameter of the same name. 

340 

341 Returns 

342 ------- 

343 butler : `lsst.daf.butler.Butler` 

344 A read-write butler initialized according to the given arguments. 

345 """ 

346 butler = Butler(args.butler_config, writeable=True) 

347 self = cls(butler.registry, args, writeable=True) 

348 self.check(args) 

349 if self.output is not None: 

350 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

351 if args.replace_run: 

352 replaced, _ = chainDefinition.pop(0) 

353 if args.prune_replaced: 

354 # TODO: DM-23671: need a butler API for pruning an 

355 # entire RUN collection, then apply it to 'replaced' 

356 # here. 

357 raise NotImplementedError("Support for --prune-replaced is not yet implemented.") 

358 chainDefinition.insert(0, self.outputRun.name) 

359 chainDefinition = CollectionSearch.fromExpression(chainDefinition) 

360 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

361 self.outputRun.name, self.output.name, chainDefinition) 

362 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name, 

363 chains={self.output.name: chainDefinition}) 

364 else: 

365 inputs = CollectionSearch.fromExpression([self.outputRun] + self.inputs) 

366 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

367 return Butler(butler=butler, run=self.outputRun.name, collections=inputs) 

368 

369 output: Optional[_OutputChainedCollectionInfo] 

370 """Information about the output chained collection, if there is or will be 

371 one (`_OutputChainedCollectionInfo` or `None`). 

372 """ 

373 

374 outputRun: Optional[_OutputRunCollectionInfo] 

375 """Information about the output run collection, if there is or will be 

376 one (`_OutputRunCollectionInfo` or `None`). 

377 """ 

378 

379 inputs: List[Tuple[str, DatasetTypeRestriction]] 

380 """Input collections, including those also used for outputs and any 

381 restrictions on dataset types (`list`). 

382 """ 

383 

384 

385class _FilteredStream: 

386 """A file-like object that filters some config fields. 

387 

388 Note 

389 ---- 

390 This class depends on implementation details of ``Config.saveToStream`` 

391 methods, in particular that that method uses single call to write() 

392 method to save information about single config field, and that call 

393 combines comments string(s) for a field and field path and value. 

394 This class will not work reliably on the "import" strings, so imports 

395 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

396 """ 

397 def __init__(self, pattern): 

398 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

399 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

400 

401 if mat: 

402 pattern = mat.group(1) 

403 self._pattern = re.compile(fnmatch.translate(pattern)) 

404 else: 

405 if pattern != pattern.lower(): 

406 print(f"Matching \"{pattern}\" without regard to case " 

407 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

408 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

409 

410 def write(self, showStr): 

411 # Strip off doc string line(s) and cut off at "=" for string matching 

412 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

413 if self._pattern.search(matchStr): 

414 sys.stdout.write(showStr) 

415 

416# ------------------------ 

417# Exported definitions -- 

418# ------------------------ 

419 

420 

421class CmdLineFwk: 

422 """PipelineTask framework which executes tasks from command line. 

423 

424 In addition to executing tasks this activator provides additional methods 

425 for task management like dumping configuration or execution chain. 

426 """ 

427 

428 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

429 

430 def __init__(self): 

431 pass 

432 

433 def parseAndRun(self, argv=None): 

434 """ 

435 This method is a main entry point for this class, it parses command 

436 line and executes all commands. 

437 

438 Parameters 

439 ---------- 

440 argv : `list` of `str`, optional 

441 list of command line arguments, if not specified then 

442 `sys.argv[1:]` is used 

443 """ 

444 

445 if argv is None: 

446 argv = sys.argv[1:] 

447 

448 # start with parsing command line, only do partial parsing now as 

449 # the tasks can add more arguments later 

450 parser = makeParser() 

451 args = parser.parse_args(argv) 

452 

453 # First thing to do is to setup logging. 

454 self.configLog(args.longlog, args.loglevel) 

455 

456 taskFactory = TaskFactory() 

457 

458 # make pipeline out of command line arguments (can return empty pipeline) 

459 try: 

460 pipeline = self.makePipeline(args) 

461 except Exception as exc: 

462 print("Failed to build pipeline: {}".format(exc), file=sys.stderr) 

463 raise 

464 

465 if args.subcommand == "build": 

466 # stop here but process --show option first 

467 self.showInfo(args, pipeline) 

468 return 0 

469 

470 # make quantum graph 

471 try: 

472 qgraph = self.makeGraph(pipeline, args) 

473 except Exception as exc: 

474 print("Failed to build graph: {}".format(exc), file=sys.stderr) 

475 raise 

476 

477 # optionally dump some info 

478 self.showInfo(args, pipeline, qgraph) 

479 

480 if qgraph is None: 

481 # No need to raise an exception here, code that makes graph 

482 # should have printed warning message already. 

483 return 2 

484 

485 if args.subcommand == "qgraph": 

486 # stop here 

487 return 0 

488 

489 # execute 

490 if args.subcommand == "run": 

491 return self.runPipeline(qgraph, taskFactory, args) 

492 

493 @staticmethod 

494 def configLog(longlog, logLevels): 

495 """Configure logging system. 

496 

497 Parameters 

498 ---------- 

499 longlog : `bool` 

500 If True then make log messages appear in "long format" 

501 logLevels : `list` of `tuple` 

502 per-component logging levels, each item in the list is a tuple 

503 (component, level), `component` is a logger name or `None` for root 

504 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) 

505 """ 

506 if longlog: 

507 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" 

508 else: 

509 message_fmt = "%c %p: %m%n" 

510 

511 # global logging config 

512 lsst.log.configure_prop(_LOG_PROP.format(message_fmt)) 

513 

514 # Forward all Python logging to lsst.log 

515 lgr = logging.getLogger() 

516 lgr.setLevel(logging.INFO) # same as in log4cxx config above 

517 lgr.addHandler(lsst.log.LogHandler()) 

518 

519 # also capture warnings and send them to logging 

520 logging.captureWarnings(True) 

521 

522 # configure individual loggers 

523 for component, level in logLevels: 

524 level = getattr(lsst.log.Log, level.upper(), None) 

525 if level is not None: 

526 # set logging level for lsst.log 

527 logger = lsst.log.Log.getLogger(component or "") 

528 logger.setLevel(level) 

529 # set logging level for Python logging 

530 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) 

531 logging.getLogger(component).setLevel(pyLevel) 

532 

533 def makePipeline(self, args): 

534 """Build a pipeline from command line arguments. 

535 

536 Parameters 

537 ---------- 

538 args : `argparse.Namespace` 

539 Parsed command line 

540 

541 Returns 

542 ------- 

543 pipeline : `~lsst.pipe.base.Pipeline` 

544 """ 

545 if args.pipeline: 

546 pipeline = Pipeline.fromFile(args.pipeline) 

547 else: 

548 pipeline = Pipeline("anonymous") 

549 

550 # loop over all pipeline actions and apply them in order 

551 for action in args.pipeline_actions: 

552 if action.action == "add_instrument": 

553 

554 pipeline.addInstrument(action.value) 

555 

556 elif action.action == "new_task": 

557 

558 pipeline.addTask(action.value, action.label) 

559 

560 elif action.action == "delete_task": 

561 

562 pipeline.removeTask(action.label) 

563 

564 elif action.action == "config": 

565 

566 # action value string is "field=value", split it at '=' 

567 field, _, value = action.value.partition("=") 

568 pipeline.addConfigOverride(action.label, field, value) 

569 

570 elif action.action == "configfile": 

571 

572 pipeline.addConfigFile(action.label, action.value) 

573 

574 else: 

575 

576 raise ValueError(f"Unexpected pipeline action: {action.action}") 

577 

578 if args.save_pipeline: 

579 pipeline.toFile(args.save_pipeline) 

580 

581 if args.pipeline_dot: 

582 pipeline2dot(pipeline, args.pipeline_dot) 

583 

584 return pipeline 

585 

586 def makeGraph(self, pipeline, args): 

587 """Build a graph from command line arguments. 

588 

589 Parameters 

590 ---------- 

591 pipeline : `~lsst.pipe.base.Pipeline` 

592 Pipeline, can be empty or ``None`` if graph is read from pickle 

593 file. 

594 args : `argparse.Namespace` 

595 Parsed command line 

596 

597 Returns 

598 ------- 

599 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

600 If resulting graph is empty then `None` is returned. 

601 """ 

602 

603 if args.qgraph: 

604 

605 with open(args.qgraph, 'rb') as pickleFile: 

606 qgraph = pickle.load(pickleFile) 

607 if not isinstance(qgraph, QuantumGraph): 

608 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format( 

609 type(qgraph))) 

610 

611 # pipeline can not be provided in this case 

612 if pipeline: 

613 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

614 

615 else: 

616 

617 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

618 

619 # make execution plan (a.k.a. DAG) for pipeline 

620 graphBuilder = GraphBuilder(registry, 

621 skipExisting=args.skip_existing) 

622 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query) 

623 

624 # count quanta in graph and give a warning if it's empty and return None 

625 nQuanta = qgraph.countQuanta() 

626 if nQuanta == 0: 

627 warnings.warn("QuantumGraph is empty", stacklevel=2) 

628 return None 

629 else: 

630 _LOG.info("QuantumGraph contains %d quanta for %d tasks", 

631 nQuanta, len(qgraph)) 

632 

633 if args.save_qgraph: 

634 with open(args.save_qgraph, "wb") as pickleFile: 

635 pickle.dump(qgraph, pickleFile) 

636 

637 if args.save_single_quanta: 

638 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()): 

639 filename = args.save_single_quanta.format(iq) 

640 with open(filename, "wb") as pickleFile: 

641 pickle.dump(sqgraph, pickleFile) 

642 

643 if args.qgraph_dot: 

644 graph2dot(qgraph, args.qgraph_dot) 

645 

646 return qgraph 

647 

648 def runPipeline(self, graph, taskFactory, args, butler=None): 

649 """Execute complete QuantumGraph. 

650 

651 Parameters 

652 ---------- 

653 graph : `QuantumGraph` 

654 Execution graph. 

655 taskFactory : `~lsst.pipe.base.TaskFactory` 

656 Task factory 

657 args : `argparse.Namespace` 

658 Parsed command line 

659 butler : `~lsst.daf.butler.Butler`, optional 

660 Data Butler instance, if not defined then new instance is made 

661 using command line options. 

662 """ 

663 # make butler instance 

664 if butler is None: 

665 butler = _ButlerFactory.makeWriteButler(args) 

666 

667 # Enable lsstDebug debugging. Note that this is done once in the 

668 # main process before PreExecInit and it is also repeated before 

669 # running each task in SingleQuantumExecutor (which may not be 

670 # needed if `multipocessing` always uses fork start method). 

671 if args.enableLsstDebug: 

672 try: 

673 _LOG.debug("Will try to import debug.py") 

674 import debug # noqa:F401 

675 except ImportError: 

676 _LOG.warn("No 'debug' module found.") 

677 

678 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing) 

679 preExecInit.initialize(graph, 

680 saveInitOutputs=not args.skip_init_writes, 

681 registerDatasetTypes=args.register_dataset_types) 

682 

683 if not args.init_only: 

684 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT, 

685 skipExisting=args.skip_existing, 

686 enableLsstDebug=args.enableLsstDebug) 

687 with util.profile(args.profile, _LOG): 

688 executor.execute(graph, butler, taskFactory) 

689 

690 def showInfo(self, args, pipeline, graph=None): 

691 """Display useful info about pipeline and environment. 

692 

693 Parameters 

694 ---------- 

695 args : `argparse.Namespace` 

696 Parsed command line 

697 pipeline : `Pipeline` 

698 Pipeline definition 

699 graph : `QuantumGraph`, optional 

700 Execution graph 

701 """ 

702 showOpts = args.show 

703 for what in showOpts: 

704 showCommand, _, showArgs = what.partition("=") 

705 

706 if showCommand in ["pipeline", "config", "history", "tasks"]: 

707 if not pipeline: 

708 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

709 continue 

710 

711 if showCommand in ["graph", "workflow"]: 

712 if not graph: 

713 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

714 continue 

715 

716 if showCommand == "pipeline": 

717 print(pipeline) 

718 elif showCommand == "config": 

719 self._showConfig(pipeline, showArgs, False) 

720 elif showCommand == "dump-config": 

721 self._showConfig(pipeline, showArgs, True) 

722 elif showCommand == "history": 

723 self._showConfigHistory(pipeline, showArgs) 

724 elif showCommand == "tasks": 

725 self._showTaskHierarchy(pipeline) 

726 elif showCommand == "graph": 

727 if graph: 

728 self._showGraph(graph) 

729 elif showCommand == "workflow": 

730 if graph: 

731 self._showWorkflow(graph, args) 

732 else: 

733 print("Unknown value for show: %s (choose from '%s')" % 

734 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

735 file=sys.stderr) 

736 sys.exit(1) 

737 

738 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

739 """Show task configuration 

740 

741 Parameters 

742 ---------- 

743 pipeline : `Pipeline` 

744 Pipeline definition 

745 showArgs : `str` 

746 Defines what to show 

747 dumpFullConfig : `bool` 

748 If true then dump complete task configuration with all imports. 

749 """ 

750 stream = sys.stdout 

751 if dumpFullConfig: 

752 # Task label can be given with this option 

753 taskName = showArgs 

754 else: 

755 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

756 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

757 taskName = matConfig.group(1) 

758 pattern = matConfig.group(2) 

759 if pattern: 

760 stream = _FilteredStream(pattern) 

761 

762 tasks = util.filterTasks(pipeline, taskName) 

763 if not tasks: 

764 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

765 sys.exit(1) 

766 

767 for taskDef in tasks: 

768 print("### Configuration for task `{}'".format(taskDef.label)) 

769 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

770 

771 def _showConfigHistory(self, pipeline, showArgs): 

772 """Show history for task configuration 

773 

774 Parameters 

775 ---------- 

776 pipeline : `Pipeline` 

777 Pipeline definition 

778 showArgs : `str` 

779 Defines what to show 

780 """ 

781 

782 taskName = None 

783 pattern = None 

784 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

785 if matHistory: 

786 taskName = matHistory.group(1) 

787 pattern = matHistory.group(2) 

788 if not pattern: 

789 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

790 sys.exit(1) 

791 

792 tasks = util.filterTasks(pipeline, taskName) 

793 if not tasks: 

794 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

795 sys.exit(1) 

796 

797 cpath, _, cname = pattern.rpartition(".") 

798 found = False 

799 for taskDef in tasks: 

800 try: 

801 if not cpath: 

802 # looking for top-level field 

803 hconfig = taskDef.config 

804 else: 

805 hconfig = eval("config." + cpath, {}, {"config": taskDef.config}) 

806 except AttributeError: 

807 # Means this config object has no such field, but maybe some other task has it. 

808 continue 

809 except Exception: 

810 # Any other exception probably means some error in the expression. 

811 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr) 

812 sys.exit(1) 

813 

814 if hasattr(hconfig, cname): 

815 print(f"### Configuration field for task `{taskDef.label}'") 

816 print(pexConfig.history.format(hconfig, cname)) 

817 found = True 

818 

819 if not found: 

820 print(f"None of the tasks has field named {pattern}", file=sys.stderr) 

821 sys.exit(1) 

822 

823 def _showTaskHierarchy(self, pipeline): 

824 """Print task hierarchy to stdout 

825 

826 Parameters 

827 ---------- 

828 pipeline: `Pipeline` 

829 """ 

830 for taskDef in pipeline.toExpandedPipeline(): 

831 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

832 

833 for configName, taskName in util.subTaskIter(taskDef.config): 

834 print("{}: {}".format(configName, taskName)) 

835 

836 def _showGraph(self, graph): 

837 """Print quanta information to stdout 

838 

839 Parameters 

840 ---------- 

841 graph : `QuantumGraph` 

842 Execution graph. 

843 """ 

844 for taskNodes in graph: 

845 print(taskNodes.taskDef) 

846 

847 for iq, quantum in enumerate(taskNodes.quanta): 

848 print(" Quantum {}:".format(iq)) 

849 print(" inputs:") 

850 for key, refs in quantum.predictedInputs.items(): 

851 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

852 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

853 print(" outputs:") 

854 for key, refs in quantum.outputs.items(): 

855 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

856 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

857 

858 def _showWorkflow(self, graph, args): 

859 """Print quanta information and dependency to stdout 

860 

861 The input and predicted output URIs based on the Butler repo are printed. 

862 

863 Parameters 

864 ---------- 

865 graph : `QuantumGraph` 

866 Execution graph. 

867 args : `argparse.Namespace` 

868 Parsed command line 

869 """ 

870 butler = _ButlerFactory.makeReadButler(args) 

871 hashToParent = {} 

872 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

873 shortname = taskDef.taskName.split('.')[-1] 

874 print("Quantum {}: {}".format(iq, shortname)) 

875 print(" inputs:") 

876 for key, refs in quantum.predictedInputs.items(): 

877 for ref in refs: 

878 if butler.datastore.exists(ref): 

879 print(" {}".format(butler.datastore.getUri(ref))) 

880 else: 

881 fakeRef = DatasetRef(ref.datasetType, ref.dataId) 

882 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) 

883 print(" outputs:") 

884 for key, refs in quantum.outputs.items(): 

885 for ref in refs: 

886 if butler.datastore.exists(ref): 

887 print(" {}".format(butler.datastore.getUri(ref))) 

888 else: 

889 fakeRef = DatasetRef(ref.datasetType, ref.dataId) 

890 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) 

891 # Store hash to figure out dependency 

892 dhash = hash((key, ref.dataId)) 

893 hashToParent[dhash] = iq 

894 

895 uses = set() 

896 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

897 for key, refs in quantum.predictedInputs.items(): 

898 for ref in refs: 

899 dhash = hash((key, ref.dataId)) 

900 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses: 

901 parentIq = hashToParent[dhash] 

902 uses.add((iq, parentIq)) # iq uses parentIq 

903 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq))