Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import datetime 

32import fnmatch 

33import logging 

34import pickle 

35import re 

36import sys 

37from typing import List, Optional, Tuple 

38import warnings 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from lsst.daf.butler import ( 

44 Butler, 

45 CollectionSearch, 

46 CollectionType, 

47 DatasetTypeRestriction, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError 

51import lsst.log 

52import lsst.pex.config as pexConfig 

53from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph 

54from .cmdLineParser import makeParser 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60from .taskFactory import TaskFactory 

61from . import util 

62from lsst.utils import doImport 

63 

64# ---------------------------------- 

65# Local non-exported definitions -- 

66# ---------------------------------- 

67 

68# logging properties 

69_LOG_PROP = """\ 

70log4j.rootLogger=INFO, A1 

71log4j.appender.A1=ConsoleAppender 

72log4j.appender.A1.Target=System.err 

73log4j.appender.A1.layout=PatternLayout 

74log4j.appender.A1.layout.ConversionPattern={} 

75""" 

76 

77_LOG = logging.getLogger(__name__.partition(".")[2]) 

78 

79 

80class _OutputChainedCollectionInfo: 

81 """A helper class for handling command-line arguments related to an output 

82 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

83 

84 Parameters 

85 ---------- 

86 registry : `lsst.daf.butler.Registry` 

87 Butler registry that collections will be added to and/or queried from. 

88 name : `str` 

89 Name of the collection given on the command line. 

90 """ 

91 def __init__(self, registry: Registry, name: str): 

92 self.name = name 

93 try: 

94 self.chain = list(registry.getCollectionChain(name)) 

95 self.exists = True 

96 except MissingCollectionError: 

97 self.chain = [] 

98 self.exists = False 

99 

100 def __str__(self): 

101 return self.name 

102 

103 name: str 

104 """Name of the collection provided on the command line (`str`). 

105 """ 

106 

107 exists: bool 

108 """Whether this collection already exists in the registry (`bool`). 

109 """ 

110 

111 chain: List[Tuple[str, DatasetTypeRestriction]] 

112 """The definition of the collection, if it already exists (`list`). 

113 

114 Empty if the collection does not alredy exist. 

115 """ 

116 

117 

118class _OutputRunCollectionInfo: 

119 """A helper class for handling command-line arguments related to an output 

120 `~lsst.daf.butler.CollectionType.RUN` collection. 

121 

122 Parameters 

123 ---------- 

124 registry : `lsst.daf.butler.Registry` 

125 Butler registry that collections will be added to and/or queried from. 

126 name : `str` 

127 Name of the collection given on the command line. 

128 """ 

129 def __init__(self, registry: Registry, name: str): 

130 self.name = name 

131 try: 

132 actualType = registry.getCollectionType(name) 

133 if actualType is not CollectionType.RUN: 

134 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

135 self.exists = True 

136 except MissingCollectionError: 

137 self.exists = False 

138 

139 name: str 

140 """Name of the collection provided on the command line (`str`). 

141 """ 

142 

143 exists: bool 

144 """Whether this collection already exists in the registry (`bool`). 

145 """ 

146 

147 

148class _ButlerFactory: 

149 """A helper class for processing command-line arguments related to input 

150 and output collections. 

151 

152 Parameters 

153 ---------- 

154 registry : `lsst.daf.butler.Registry` 

155 Butler registry that collections will be added to and/or queried from. 

156 

157 args : `argparse.Namespace` 

158 Parsed command-line arguments. The following attributes are used, 

159 either at construction or in later methods. 

160 

161 ``output`` 

162 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

163 input/output collection. 

164 

165 ``output_run`` 

166 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

167 collection. 

168 

169 ``extend_run`` 

170 A boolean indicating whether ``output_run`` should already exist 

171 and be extended. 

172 

173 ``replace_run`` 

174 A boolean indicating that (if `True`) ``output_run`` should already 

175 exist but will be removed from the output chained collection and 

176 replaced with a new one. 

177 

178 ``prune_replaced`` 

179 A boolean indicating whether to prune the replaced run (requires 

180 ``replace_run``). 

181 

182 ``inputs`` 

183 Input collections of any type; may be any type handled by 

184 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

185 

186 ``butler_config`` 

187 Path to a data repository root or configuration file. 

188 

189 writeable : `bool` 

190 If `True`, a `Butler` is being initialized in a context where actual 

191 writes should happens, and hence no output run is necessary. 

192 

193 Raises 

194 ------ 

195 ValueError 

196 Raised if ``writeable is True`` but there are no output collections. 

197 """ 

198 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

199 if args.output is not None: 

200 self.output = _OutputChainedCollectionInfo(registry, args.output) 

201 else: 

202 self.output = None 

203 if args.output_run is not None: 

204 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

205 elif self.output is not None: 

206 if args.extend_run: 

207 runName, _ = self.output.chain[0] 

208 else: 

209 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now()) 

210 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

211 elif not writeable: 

212 # If we're not writing yet, ok to have no output run. 

213 self.outputRun = None 

214 else: 

215 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

216 self.inputs = list(CollectionSearch.fromExpression(args.input)) 

217 

218 def check(self, args: argparse.Namespace): 

219 """Check command-line options for consistency with each other and the 

220 data repository. 

221 

222 Parameters 

223 ---------- 

224 args : `argparse.Namespace` 

225 Parsed command-line arguments. See class documentation for the 

226 construction parameter of the same name. 

227 """ 

228 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

229 if self.inputs and self.output is not None and self.output.exists: 

230 raise ValueError("Cannot use --output with existing collection with --inputs.") 

231 if args.extend_run and self.outputRun is None: 

232 raise ValueError("Cannot --extend-run when no output collection is given.") 

233 if args.extend_run and not self.outputRun.exists: 

234 raise ValueError(f"Cannot --extend-run; output collection " 

235 f"'{self.outputRun.name}' does not exist.") 

236 if not args.extend_run and self.outputRun.exists: 

237 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

238 f"--extend-run was not given.") 

239 if args.prune_replaced and not args.replace_run: 

240 raise ValueError(f"--prune-replaced requires --replace-run.") 

241 if args.replace_run and (self.output is None or not self.output.exists): 

242 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.") 

243 

244 @classmethod 

245 def _makeReadParts(cls, args: argparse.Namespace): 

246 """Common implementation for `makeReadButler` and 

247 `makeRegistryAndCollections`. 

248 

249 Parameters 

250 ---------- 

251 args : `argparse.Namespace` 

252 Parsed command-line arguments. See class documentation for the 

253 construction parameter of the same name. 

254 

255 Returns 

256 ------- 

257 butler : `lsst.daf.butler.Butler` 

258 A read-only butler constructed from the repo at 

259 ``args.butler_config``, but with no default collections. 

260 inputs : `lsst.daf.butler.registry.CollectionSearch` 

261 A collection search path constructed according to ``args``. 

262 self : `_ButlerFactory` 

263 A new `_ButlerFactory` instance representing the processed version 

264 of ``args``. 

265 """ 

266 butler = Butler(args.butler_config, writeable=False) 

267 self = cls(butler.registry, args, writeable=False) 

268 self.check(args) 

269 if self.output and self.output.exists: 

270 if args.replace_run: 

271 replaced, _ = self.output.chain[0] 

272 inputs = self.output.chain[1:] 

273 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

274 self.output.name, replaced) 

275 else: 

276 inputs = [self.output.name] 

277 else: 

278 inputs = list(self.inputs) 

279 if args.extend_run: 

280 inputs.insert(0, self.outputRun.name) 

281 inputs = CollectionSearch.fromExpression(inputs) 

282 return butler, inputs, self 

283 

284 @classmethod 

285 def makeReadButler(cls, args: argparse.Namespace): 

286 """Construct a read-only butler according to the given command-line 

287 arguments. 

288 

289 Parameters 

290 ---------- 

291 args : `argparse.Namespace` 

292 Parsed command-line arguments. See class documentation for the 

293 construction parameter of the same name. 

294 

295 Returns 

296 ------- 

297 butler : `lsst.daf.butler.Butler` 

298 A read-only butler initialized with the collections specified by 

299 ``args``. 

300 """ 

301 butler, inputs, _ = cls._makeReadParts(args) 

302 _LOG.debug("Preparing butler to read from %s.", inputs) 

303 return Butler(butler=butler, collections=inputs) 

304 

305 @classmethod 

306 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch: 

307 """Return a read-only registry, a collection search path, and the name 

308 of the run to be used for future writes. 

309 

310 Parameters 

311 ---------- 

312 args : `argparse.Namespace` 

313 Parsed command-line arguments. See class documentation for the 

314 construction parameter of the same name. 

315 

316 Returns 

317 ------- 

318 registry : `lsst.daf.butler.Registry` 

319 Butler registry that collections will be added to and/or queried 

320 from. 

321 inputs : `lsst.daf.butler.registry.CollectionSearch` 

322 Collections to search for datasets. 

323 run : `str` or `None` 

324 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

325 if it already exists, or `None` if it does not. 

326 """ 

327 butler, inputs, self = cls._makeReadParts(args) 

328 run = self.outputRun.name if args.extend_run else None 

329 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

330 return butler.registry, inputs, run 

331 

332 @classmethod 

333 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

334 """Return a read-write butler initialized to write to and read from 

335 the collections specified by the given command-line arguments. 

336 

337 Parameters 

338 ---------- 

339 args : `argparse.Namespace` 

340 Parsed command-line arguments. See class documentation for the 

341 construction parameter of the same name. 

342 

343 Returns 

344 ------- 

345 butler : `lsst.daf.butler.Butler` 

346 A read-write butler initialized according to the given arguments. 

347 """ 

348 butler = Butler(args.butler_config, writeable=True) 

349 self = cls(butler.registry, args, writeable=True) 

350 self.check(args) 

351 if self.output is not None: 

352 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

353 if args.replace_run: 

354 replaced, _ = chainDefinition.pop(0) 

355 if args.prune_replaced: 

356 # TODO: DM-23671: need a butler API for pruning an 

357 # entire RUN collection, then apply it to 'replaced' 

358 # here. 

359 raise NotImplementedError("Support for --prune-replaced is not yet implemented.") 

360 chainDefinition.insert(0, self.outputRun.name) 

361 chainDefinition = CollectionSearch.fromExpression(chainDefinition) 

362 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

363 self.outputRun.name, self.output.name, chainDefinition) 

364 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name, 

365 chains={self.output.name: chainDefinition}) 

366 else: 

367 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs) 

368 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

369 return Butler(butler=butler, run=self.outputRun.name, collections=inputs) 

370 

371 output: Optional[_OutputChainedCollectionInfo] 

372 """Information about the output chained collection, if there is or will be 

373 one (`_OutputChainedCollectionInfo` or `None`). 

374 """ 

375 

376 outputRun: Optional[_OutputRunCollectionInfo] 

377 """Information about the output run collection, if there is or will be 

378 one (`_OutputRunCollectionInfo` or `None`). 

379 """ 

380 

381 inputs: List[Tuple[str, DatasetTypeRestriction]] 

382 """Input collections, including those also used for outputs and any 

383 restrictions on dataset types (`list`). 

384 """ 

385 

386 

387class _FilteredStream: 

388 """A file-like object that filters some config fields. 

389 

390 Note 

391 ---- 

392 This class depends on implementation details of ``Config.saveToStream`` 

393 methods, in particular that that method uses single call to write() 

394 method to save information about single config field, and that call 

395 combines comments string(s) for a field and field path and value. 

396 This class will not work reliably on the "import" strings, so imports 

397 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

398 """ 

399 def __init__(self, pattern): 

400 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

401 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

402 

403 if mat: 

404 pattern = mat.group(1) 

405 self._pattern = re.compile(fnmatch.translate(pattern)) 

406 else: 

407 if pattern != pattern.lower(): 

408 print(f"Matching \"{pattern}\" without regard to case " 

409 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

410 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

411 

412 def write(self, showStr): 

413 # Strip off doc string line(s) and cut off at "=" for string matching 

414 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

415 if self._pattern.search(matchStr): 

416 sys.stdout.write(showStr) 

417 

418# ------------------------ 

419# Exported definitions -- 

420# ------------------------ 

421 

422 

423class CmdLineFwk: 

424 """PipelineTask framework which executes tasks from command line. 

425 

426 In addition to executing tasks this activator provides additional methods 

427 for task management like dumping configuration or execution chain. 

428 """ 

429 

430 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

431 

432 def __init__(self): 

433 pass 

434 

435 def parseAndRun(self, argv=None): 

436 """ 

437 This method is a main entry point for this class, it parses command 

438 line and executes all commands. 

439 

440 Parameters 

441 ---------- 

442 argv : `list` of `str`, optional 

443 list of command line arguments, if not specified then 

444 `sys.argv[1:]` is used 

445 """ 

446 

447 if argv is None: 

448 argv = sys.argv[1:] 

449 

450 # start with parsing command line, only do partial parsing now as 

451 # the tasks can add more arguments later 

452 parser = makeParser() 

453 args = parser.parse_args(argv) 

454 

455 # First thing to do is to setup logging. 

456 self.configLog(args.longlog, args.loglevel) 

457 

458 taskFactory = TaskFactory() 

459 

460 # make pipeline out of command line arguments (can return empty pipeline) 

461 try: 

462 pipeline = self.makePipeline(args) 

463 except Exception as exc: 

464 print("Failed to build pipeline: {}".format(exc), file=sys.stderr) 

465 raise 

466 

467 if args.subcommand == "build": 

468 # stop here but process --show option first 

469 self.showInfo(args, pipeline) 

470 return 0 

471 

472 # make quantum graph 

473 try: 

474 qgraph = self.makeGraph(pipeline, args) 

475 except Exception as exc: 

476 print("Failed to build graph: {}".format(exc), file=sys.stderr) 

477 raise 

478 

479 # optionally dump some info 

480 self.showInfo(args, pipeline, qgraph) 

481 

482 if qgraph is None: 

483 # No need to raise an exception here, code that makes graph 

484 # should have printed warning message already. 

485 return 2 

486 

487 if args.subcommand == "qgraph": 

488 # stop here 

489 return 0 

490 

491 # execute 

492 if args.subcommand == "run": 

493 return self.runPipeline(qgraph, taskFactory, args) 

494 

495 @staticmethod 

496 def configLog(longlog, logLevels): 

497 """Configure logging system. 

498 

499 Parameters 

500 ---------- 

501 longlog : `bool` 

502 If True then make log messages appear in "long format" 

503 logLevels : `list` of `tuple` 

504 per-component logging levels, each item in the list is a tuple 

505 (component, level), `component` is a logger name or `None` for root 

506 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) 

507 """ 

508 if longlog: 

509 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" 

510 else: 

511 message_fmt = "%c %p: %m%n" 

512 

513 # global logging config 

514 lsst.log.configure_prop(_LOG_PROP.format(message_fmt)) 

515 

516 # Forward all Python logging to lsst.log 

517 lgr = logging.getLogger() 

518 lgr.setLevel(logging.INFO) # same as in log4cxx config above 

519 lgr.addHandler(lsst.log.LogHandler()) 

520 

521 # also capture warnings and send them to logging 

522 logging.captureWarnings(True) 

523 

524 # configure individual loggers 

525 for component, level in logLevels: 

526 level = getattr(lsst.log.Log, level.upper(), None) 

527 if level is not None: 

528 # set logging level for lsst.log 

529 logger = lsst.log.Log.getLogger(component or "") 

530 logger.setLevel(level) 

531 # set logging level for Python logging 

532 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) 

533 logging.getLogger(component).setLevel(pyLevel) 

534 

535 def makePipeline(self, args): 

536 """Build a pipeline from command line arguments. 

537 

538 Parameters 

539 ---------- 

540 args : `argparse.Namespace` 

541 Parsed command line 

542 

543 Returns 

544 ------- 

545 pipeline : `~lsst.pipe.base.Pipeline` 

546 """ 

547 if args.pipeline: 

548 pipeline = Pipeline.fromFile(args.pipeline) 

549 else: 

550 pipeline = Pipeline("anonymous") 

551 

552 # loop over all pipeline actions and apply them in order 

553 for action in args.pipeline_actions: 

554 if action.action == "add_instrument": 

555 

556 pipeline.addInstrument(action.value) 

557 

558 elif action.action == "new_task": 

559 

560 pipeline.addTask(action.value, action.label) 

561 

562 elif action.action == "delete_task": 

563 

564 pipeline.removeTask(action.label) 

565 

566 elif action.action == "config": 

567 

568 # action value string is "field=value", split it at '=' 

569 field, _, value = action.value.partition("=") 

570 pipeline.addConfigOverride(action.label, field, value) 

571 

572 elif action.action == "configfile": 

573 

574 pipeline.addConfigFile(action.label, action.value) 

575 

576 else: 

577 

578 raise ValueError(f"Unexpected pipeline action: {action.action}") 

579 

580 if args.save_pipeline: 

581 pipeline.toFile(args.save_pipeline) 

582 

583 if args.pipeline_dot: 

584 pipeline2dot(pipeline, args.pipeline_dot) 

585 

586 return pipeline 

587 

588 def makeGraph(self, pipeline, args): 

589 """Build a graph from command line arguments. 

590 

591 Parameters 

592 ---------- 

593 pipeline : `~lsst.pipe.base.Pipeline` 

594 Pipeline, can be empty or ``None`` if graph is read from pickle 

595 file. 

596 args : `argparse.Namespace` 

597 Parsed command line 

598 

599 Returns 

600 ------- 

601 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

602 If resulting graph is empty then `None` is returned. 

603 """ 

604 

605 if args.qgraph: 

606 

607 with open(args.qgraph, 'rb') as pickleFile: 

608 qgraph = pickle.load(pickleFile) 

609 if not isinstance(qgraph, QuantumGraph): 

610 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format( 

611 type(qgraph))) 

612 

613 # pipeline can not be provided in this case 

614 if pipeline: 

615 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

616 

617 else: 

618 

619 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

620 

621 # make execution plan (a.k.a. DAG) for pipeline 

622 graphBuilder = GraphBuilder(registry, 

623 skipExisting=args.skip_existing) 

624 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query) 

625 

626 # count quanta in graph and give a warning if it's empty and return None 

627 nQuanta = qgraph.countQuanta() 

628 if nQuanta == 0: 

629 warnings.warn("QuantumGraph is empty", stacklevel=2) 

630 return None 

631 else: 

632 _LOG.info("QuantumGraph contains %d quanta for %d tasks", 

633 nQuanta, len(qgraph)) 

634 

635 if args.save_qgraph: 

636 with open(args.save_qgraph, "wb") as pickleFile: 

637 pickle.dump(qgraph, pickleFile) 

638 

639 if args.save_single_quanta: 

640 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()): 

641 filename = args.save_single_quanta.format(iq) 

642 with open(filename, "wb") as pickleFile: 

643 pickle.dump(sqgraph, pickleFile) 

644 

645 if args.qgraph_dot: 

646 graph2dot(qgraph, args.qgraph_dot) 

647 

648 return qgraph 

649 

650 def runPipeline(self, graph, taskFactory, args, butler=None): 

651 """Execute complete QuantumGraph. 

652 

653 Parameters 

654 ---------- 

655 graph : `QuantumGraph` 

656 Execution graph. 

657 taskFactory : `~lsst.pipe.base.TaskFactory` 

658 Task factory 

659 args : `argparse.Namespace` 

660 Parsed command line 

661 butler : `~lsst.daf.butler.Butler`, optional 

662 Data Butler instance, if not defined then new instance is made 

663 using command line options. 

664 """ 

665 # make butler instance 

666 if butler is None: 

667 butler = _ButlerFactory.makeWriteButler(args) 

668 

669 # Enable lsstDebug debugging. Note that this is done once in the 

670 # main process before PreExecInit and it is also repeated before 

671 # running each task in SingleQuantumExecutor (which may not be 

672 # needed if `multipocessing` always uses fork start method). 

673 if args.enableLsstDebug: 

674 try: 

675 _LOG.debug("Will try to import debug.py") 

676 import debug # noqa:F401 

677 except ImportError: 

678 _LOG.warn("No 'debug' module found.") 

679 

680 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing) 

681 preExecInit.initialize(graph, 

682 saveInitOutputs=not args.skip_init_writes, 

683 registerDatasetTypes=args.register_dataset_types, 

684 saveVersions=not args.no_versions) 

685 

686 if not args.init_only: 

687 graphFixup = self._importGraphFixup(args) 

688 quantumExecutor = SingleQuantumExecutor(taskFactory, 

689 skipExisting=args.skip_existing, 

690 enableLsstDebug=args.enableLsstDebug) 

691 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT, 

692 quantumExecutor=quantumExecutor, 

693 executionGraphFixup=graphFixup) 

694 with util.profile(args.profile, _LOG): 

695 executor.execute(graph, butler) 

696 

697 def showInfo(self, args, pipeline, graph=None): 

698 """Display useful info about pipeline and environment. 

699 

700 Parameters 

701 ---------- 

702 args : `argparse.Namespace` 

703 Parsed command line 

704 pipeline : `Pipeline` 

705 Pipeline definition 

706 graph : `QuantumGraph`, optional 

707 Execution graph 

708 """ 

709 showOpts = args.show 

710 for what in showOpts: 

711 showCommand, _, showArgs = what.partition("=") 

712 

713 if showCommand in ["pipeline", "config", "history", "tasks"]: 

714 if not pipeline: 

715 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

716 continue 

717 

718 if showCommand in ["graph", "workflow"]: 

719 if not graph: 

720 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

721 continue 

722 

723 if showCommand == "pipeline": 

724 print(pipeline) 

725 elif showCommand == "config": 

726 self._showConfig(pipeline, showArgs, False) 

727 elif showCommand == "dump-config": 

728 self._showConfig(pipeline, showArgs, True) 

729 elif showCommand == "history": 

730 self._showConfigHistory(pipeline, showArgs) 

731 elif showCommand == "tasks": 

732 self._showTaskHierarchy(pipeline) 

733 elif showCommand == "graph": 

734 if graph: 

735 self._showGraph(graph) 

736 elif showCommand == "workflow": 

737 if graph: 

738 self._showWorkflow(graph, args) 

739 else: 

740 print("Unknown value for show: %s (choose from '%s')" % 

741 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

742 file=sys.stderr) 

743 sys.exit(1) 

744 

745 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

746 """Show task configuration 

747 

748 Parameters 

749 ---------- 

750 pipeline : `Pipeline` 

751 Pipeline definition 

752 showArgs : `str` 

753 Defines what to show 

754 dumpFullConfig : `bool` 

755 If true then dump complete task configuration with all imports. 

756 """ 

757 stream = sys.stdout 

758 if dumpFullConfig: 

759 # Task label can be given with this option 

760 taskName = showArgs 

761 else: 

762 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

763 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

764 taskName = matConfig.group(1) 

765 pattern = matConfig.group(2) 

766 if pattern: 

767 stream = _FilteredStream(pattern) 

768 

769 tasks = util.filterTasks(pipeline, taskName) 

770 if not tasks: 

771 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

772 sys.exit(1) 

773 

774 for taskDef in tasks: 

775 print("### Configuration for task `{}'".format(taskDef.label)) 

776 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

777 

778 def _showConfigHistory(self, pipeline, showArgs): 

779 """Show history for task configuration 

780 

781 Parameters 

782 ---------- 

783 pipeline : `Pipeline` 

784 Pipeline definition 

785 showArgs : `str` 

786 Defines what to show 

787 """ 

788 

789 taskName = None 

790 pattern = None 

791 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

792 if matHistory: 

793 taskName = matHistory.group(1) 

794 pattern = matHistory.group(2) 

795 if not pattern: 

796 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

797 sys.exit(1) 

798 

799 tasks = util.filterTasks(pipeline, taskName) 

800 if not tasks: 

801 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

802 sys.exit(1) 

803 

804 cpath, _, cname = pattern.rpartition(".") 

805 found = False 

806 for taskDef in tasks: 

807 try: 

808 if not cpath: 

809 # looking for top-level field 

810 hconfig = taskDef.config 

811 else: 

812 hconfig = eval("config." + cpath, {}, {"config": taskDef.config}) 

813 except AttributeError: 

814 # Means this config object has no such field, but maybe some other task has it. 

815 continue 

816 except Exception: 

817 # Any other exception probably means some error in the expression. 

818 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr) 

819 sys.exit(1) 

820 

821 if hasattr(hconfig, cname): 

822 print(f"### Configuration field for task `{taskDef.label}'") 

823 print(pexConfig.history.format(hconfig, cname)) 

824 found = True 

825 

826 if not found: 

827 print(f"None of the tasks has field named {pattern}", file=sys.stderr) 

828 sys.exit(1) 

829 

830 def _showTaskHierarchy(self, pipeline): 

831 """Print task hierarchy to stdout 

832 

833 Parameters 

834 ---------- 

835 pipeline: `Pipeline` 

836 """ 

837 for taskDef in pipeline.toExpandedPipeline(): 

838 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

839 

840 for configName, taskName in util.subTaskIter(taskDef.config): 

841 print("{}: {}".format(configName, taskName)) 

842 

843 def _showGraph(self, graph): 

844 """Print quanta information to stdout 

845 

846 Parameters 

847 ---------- 

848 graph : `QuantumGraph` 

849 Execution graph. 

850 """ 

851 for taskNodes in graph: 

852 print(taskNodes.taskDef) 

853 

854 for iq, quantum in enumerate(taskNodes.quanta): 

855 print(" Quantum {}:".format(iq)) 

856 print(" inputs:") 

857 for key, refs in quantum.predictedInputs.items(): 

858 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

859 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

860 print(" outputs:") 

861 for key, refs in quantum.outputs.items(): 

862 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

863 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

864 

865 def _showWorkflow(self, graph, args): 

866 """Print quanta information and dependency to stdout 

867 

868 The input and predicted output URIs based on the Butler repo are printed. 

869 

870 Parameters 

871 ---------- 

872 graph : `QuantumGraph` 

873 Execution graph. 

874 args : `argparse.Namespace` 

875 Parsed command line 

876 """ 

877 def dumpURIs(thisRef): 

878 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

879 if primary: 

880 print(f" {primary}") 

881 else: 

882 print(f" (disassembled artifact)") 

883 for compName, compUri in components.items(): 

884 print(f" {compName}: {compUri}") 

885 

886 butler = _ButlerFactory.makeReadButler(args) 

887 hashToParent = {} 

888 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

889 shortname = taskDef.taskName.split('.')[-1] 

890 print("Quantum {}: {}".format(iq, shortname)) 

891 print(" inputs:") 

892 for key, refs in quantum.predictedInputs.items(): 

893 for ref in refs: 

894 dumpURIs(ref) 

895 print(" outputs:") 

896 for key, refs in quantum.outputs.items(): 

897 for ref in refs: 

898 dumpURIs(ref) 

899 # Store hash to figure out dependency 

900 dhash = hash((key, ref.dataId)) 

901 hashToParent[dhash] = iq 

902 

903 uses = set() 

904 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

905 for key, refs in quantum.predictedInputs.items(): 

906 for ref in refs: 

907 dhash = hash((key, ref.dataId)) 

908 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses: 

909 parentIq = hashToParent[dhash] 

910 uses.add((iq, parentIq)) # iq uses parentIq 

911 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq)) 

912 

913 def _importGraphFixup(self, args): 

914 """Import/instantiate graph fixup object. 

915 

916 Parameters 

917 ---------- 

918 args : `argparse.Namespace` 

919 Parsed command line. 

920 

921 Returns 

922 ------- 

923 fixup : `ExecutionGraphFixup` or `None` 

924 

925 Raises 

926 ------ 

927 ValueError 

928 Raised if import fails, method call raises exception, or returned 

929 instance has unexpected type. 

930 """ 

931 if args.graph_fixup: 

932 try: 

933 factory = doImport(args.graph_fixup) 

934 except Exception as exc: 

935 raise ValueError("Failed to import graph fixup class/method") from exc 

936 try: 

937 fixup = factory() 

938 except Exception as exc: 

939 raise ValueError("Failed to make instance of graph fixup") from exc 

940 if not isinstance(fixup, ExecutionGraphFixup): 

941 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

942 return fixup