Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import datetime 

32import fnmatch 

33import logging 

34import pickle 

35import re 

36import sys 

37from typing import List, Optional, Tuple 

38import warnings 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from lsst.daf.butler import ( 

44 Butler, 

45 CollectionSearch, 

46 CollectionType, 

47 DatasetRef, 

48 DatasetTypeRestriction, 

49 Registry, 

50) 

51from lsst.daf.butler.registry import MissingCollectionError 

52import lsst.log 

53import lsst.pex.config as pexConfig 

54from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph 

55from .cmdLineParser import makeParser 

56from .dotTools import graph2dot, pipeline2dot 

57from .executionGraphFixup import ExecutionGraphFixup 

58from .mpGraphExecutor import MPGraphExecutor 

59from .preExecInit import PreExecInit 

60from .singleQuantumExecutor import SingleQuantumExecutor 

61from .taskFactory import TaskFactory 

62from . import util 

63from lsst.utils import doImport 

64 

65# ---------------------------------- 

66# Local non-exported definitions -- 

67# ---------------------------------- 

68 

69# logging properties 

70_LOG_PROP = """\ 

71log4j.rootLogger=INFO, A1 

72log4j.appender.A1=ConsoleAppender 

73log4j.appender.A1.Target=System.err 

74log4j.appender.A1.layout=PatternLayout 

75log4j.appender.A1.layout.ConversionPattern={} 

76""" 

77 

78_LOG = logging.getLogger(__name__.partition(".")[2]) 

79 

80 

81class _OutputChainedCollectionInfo: 

82 """A helper class for handling command-line arguments related to an output 

83 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

84 

85 Parameters 

86 ---------- 

87 registry : `lsst.daf.butler.Registry` 

88 Butler registry that collections will be added to and/or queried from. 

89 name : `str` 

90 Name of the collection given on the command line. 

91 """ 

92 def __init__(self, registry: Registry, name: str): 

93 self.name = name 

94 try: 

95 self.chain = list(registry.getCollectionChain(name)) 

96 self.exists = True 

97 except MissingCollectionError: 

98 self.chain = [] 

99 self.exists = False 

100 

101 def __str__(self): 

102 return self.name 

103 

104 name: str 

105 """Name of the collection provided on the command line (`str`). 

106 """ 

107 

108 exists: bool 

109 """Whether this collection already exists in the registry (`bool`). 

110 """ 

111 

112 chain: List[Tuple[str, DatasetTypeRestriction]] 

113 """The definition of the collection, if it already exists (`list`). 

114 

115 Empty if the collection does not alredy exist. 

116 """ 

117 

118 

119class _OutputRunCollectionInfo: 

120 """A helper class for handling command-line arguments related to an output 

121 `~lsst.daf.butler.CollectionType.RUN` collection. 

122 

123 Parameters 

124 ---------- 

125 registry : `lsst.daf.butler.Registry` 

126 Butler registry that collections will be added to and/or queried from. 

127 name : `str` 

128 Name of the collection given on the command line. 

129 """ 

130 def __init__(self, registry: Registry, name: str): 

131 self.name = name 

132 try: 

133 actualType = registry.getCollectionType(name) 

134 if actualType is not CollectionType.RUN: 

135 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

136 self.exists = True 

137 except MissingCollectionError: 

138 self.exists = False 

139 

140 name: str 

141 """Name of the collection provided on the command line (`str`). 

142 """ 

143 

144 exists: bool 

145 """Whether this collection already exists in the registry (`bool`). 

146 """ 

147 

148 

149class _ButlerFactory: 

150 """A helper class for processing command-line arguments related to input 

151 and output collections. 

152 

153 Parameters 

154 ---------- 

155 registry : `lsst.daf.butler.Registry` 

156 Butler registry that collections will be added to and/or queried from. 

157 

158 args : `argparse.Namespace` 

159 Parsed command-line arguments. The following attributes are used, 

160 either at construction or in later methods. 

161 

162 ``output`` 

163 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

164 input/output collection. 

165 

166 ``output_run`` 

167 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

168 collection. 

169 

170 ``extend_run`` 

171 A boolean indicating whether ``output_run`` should already exist 

172 and be extended. 

173 

174 ``replace_run`` 

175 A boolean indicating that (if `True`) ``output_run`` should already 

176 exist but will be removed from the output chained collection and 

177 replaced with a new one. 

178 

179 ``prune_replaced`` 

180 A boolean indicating whether to prune the replaced run (requires 

181 ``replace_run``). 

182 

183 ``inputs`` 

184 Input collections of any type; may be any type handled by 

185 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

186 

187 ``butler_config`` 

188 Path to a data repository root or configuration file. 

189 

190 writeable : `bool` 

191 If `True`, a `Butler` is being initialized in a context where actual 

192 writes should happens, and hence no output run is necessary. 

193 

194 Raises 

195 ------ 

196 ValueError 

197 Raised if ``writeable is True`` but there are no output collections. 

198 """ 

199 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

200 if args.output is not None: 

201 self.output = _OutputChainedCollectionInfo(registry, args.output) 

202 else: 

203 self.output = None 

204 if args.output_run is not None: 

205 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

206 elif self.output is not None: 

207 if args.extend_run: 

208 runName, _ = self.output.chain[0] 

209 else: 

210 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now()) 

211 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

212 elif not writeable: 

213 # If we're not writing yet, ok to have no output run. 

214 self.outputRun = None 

215 else: 

216 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

217 self.inputs = list(CollectionSearch.fromExpression(args.input)) 

218 

219 def check(self, args: argparse.Namespace): 

220 """Check command-line options for consistency with each other and the 

221 data repository. 

222 

223 Parameters 

224 ---------- 

225 args : `argparse.Namespace` 

226 Parsed command-line arguments. See class documentation for the 

227 construction parameter of the same name. 

228 """ 

229 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

230 if self.inputs and self.output is not None and self.output.exists: 

231 raise ValueError("Cannot use --output with existing collection with --inputs.") 

232 if args.extend_run and self.outputRun is None: 

233 raise ValueError("Cannot --extend-run when no output collection is given.") 

234 if args.extend_run and not self.outputRun.exists: 

235 raise ValueError(f"Cannot --extend-run; output collection " 

236 f"'{self.outputRun.name}' does not exist.") 

237 if not args.extend_run and self.outputRun.exists: 

238 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

239 f"--extend-run was not given.") 

240 if args.prune_replaced and not args.replace_run: 

241 raise ValueError(f"--prune-replaced requires --replace-run.") 

242 if args.replace_run and (self.output is None or not self.output.exists): 

243 raise ValueError(f"--output must point to an existing CHAINED collection for --replace-run.") 

244 

245 @classmethod 

246 def _makeReadParts(cls, args: argparse.Namespace): 

247 """Common implementation for `makeReadButler` and 

248 `makeRegistryAndCollections`. 

249 

250 Parameters 

251 ---------- 

252 args : `argparse.Namespace` 

253 Parsed command-line arguments. See class documentation for the 

254 construction parameter of the same name. 

255 

256 Returns 

257 ------- 

258 butler : `lsst.daf.butler.Butler` 

259 A read-only butler constructed from the repo at 

260 ``args.butler_config``, but with no default collections. 

261 inputs : `lsst.daf.butler.registry.CollectionSearch` 

262 A collection search path constructed according to ``args``. 

263 self : `_ButlerFactory` 

264 A new `_ButlerFactory` instance representing the processed version 

265 of ``args``. 

266 """ 

267 butler = Butler(args.butler_config, writeable=False) 

268 self = cls(butler.registry, args, writeable=False) 

269 self.check(args) 

270 if self.output and self.output.exists: 

271 if args.replace_run: 

272 replaced, _ = self.output.chain[0] 

273 inputs = self.output.chain[1:] 

274 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

275 self.output.name, replaced) 

276 else: 

277 inputs = [self.output.name] 

278 else: 

279 inputs = list(self.inputs) 

280 if args.extend_run: 

281 inputs.insert(0, self.outputRun.name) 

282 inputs = CollectionSearch.fromExpression(inputs) 

283 return butler, inputs, self 

284 

285 @classmethod 

286 def makeReadButler(cls, args: argparse.Namespace): 

287 """Construct a read-only butler according to the given command-line 

288 arguments. 

289 

290 Parameters 

291 ---------- 

292 args : `argparse.Namespace` 

293 Parsed command-line arguments. See class documentation for the 

294 construction parameter of the same name. 

295 

296 Returns 

297 ------- 

298 butler : `lsst.daf.butler.Butler` 

299 A read-only butler initialized with the collections specified by 

300 ``args``. 

301 """ 

302 butler, inputs, _ = cls._makeReadParts(args) 

303 _LOG.debug("Preparing butler to read from %s.", inputs) 

304 return Butler(butler=butler, collections=inputs) 

305 

306 @classmethod 

307 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> CollectionSearch: 

308 """Return a read-only registry, a collection search path, and the name 

309 of the run to be used for future writes. 

310 

311 Parameters 

312 ---------- 

313 args : `argparse.Namespace` 

314 Parsed command-line arguments. See class documentation for the 

315 construction parameter of the same name. 

316 

317 Returns 

318 ------- 

319 registry : `lsst.daf.butler.Registry` 

320 Butler registry that collections will be added to and/or queried 

321 from. 

322 inputs : `lsst.daf.butler.registry.CollectionSearch` 

323 Collections to search for datasets. 

324 run : `str` or `None` 

325 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

326 if it already exists, or `None` if it does not. 

327 """ 

328 butler, inputs, self = cls._makeReadParts(args) 

329 run = self.outputRun.name if args.extend_run else None 

330 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

331 return butler.registry, inputs, run 

332 

333 @classmethod 

334 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

335 """Return a read-write butler initialized to write to and read from 

336 the collections specified by the given command-line arguments. 

337 

338 Parameters 

339 ---------- 

340 args : `argparse.Namespace` 

341 Parsed command-line arguments. See class documentation for the 

342 construction parameter of the same name. 

343 

344 Returns 

345 ------- 

346 butler : `lsst.daf.butler.Butler` 

347 A read-write butler initialized according to the given arguments. 

348 """ 

349 butler = Butler(args.butler_config, writeable=True) 

350 self = cls(butler.registry, args, writeable=True) 

351 self.check(args) 

352 if self.output is not None: 

353 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

354 if args.replace_run: 

355 replaced, _ = chainDefinition.pop(0) 

356 if args.prune_replaced: 

357 # TODO: DM-23671: need a butler API for pruning an 

358 # entire RUN collection, then apply it to 'replaced' 

359 # here. 

360 raise NotImplementedError("Support for --prune-replaced is not yet implemented.") 

361 chainDefinition.insert(0, self.outputRun.name) 

362 chainDefinition = CollectionSearch.fromExpression(chainDefinition) 

363 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

364 self.outputRun.name, self.output.name, chainDefinition) 

365 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name, 

366 chains={self.output.name: chainDefinition}) 

367 else: 

368 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs) 

369 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

370 return Butler(butler=butler, run=self.outputRun.name, collections=inputs) 

371 

372 output: Optional[_OutputChainedCollectionInfo] 

373 """Information about the output chained collection, if there is or will be 

374 one (`_OutputChainedCollectionInfo` or `None`). 

375 """ 

376 

377 outputRun: Optional[_OutputRunCollectionInfo] 

378 """Information about the output run collection, if there is or will be 

379 one (`_OutputRunCollectionInfo` or `None`). 

380 """ 

381 

382 inputs: List[Tuple[str, DatasetTypeRestriction]] 

383 """Input collections, including those also used for outputs and any 

384 restrictions on dataset types (`list`). 

385 """ 

386 

387 

388class _FilteredStream: 

389 """A file-like object that filters some config fields. 

390 

391 Note 

392 ---- 

393 This class depends on implementation details of ``Config.saveToStream`` 

394 methods, in particular that that method uses single call to write() 

395 method to save information about single config field, and that call 

396 combines comments string(s) for a field and field path and value. 

397 This class will not work reliably on the "import" strings, so imports 

398 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

399 """ 

400 def __init__(self, pattern): 

401 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

402 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

403 

404 if mat: 

405 pattern = mat.group(1) 

406 self._pattern = re.compile(fnmatch.translate(pattern)) 

407 else: 

408 if pattern != pattern.lower(): 

409 print(f"Matching \"{pattern}\" without regard to case " 

410 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

411 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

412 

413 def write(self, showStr): 

414 # Strip off doc string line(s) and cut off at "=" for string matching 

415 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

416 if self._pattern.search(matchStr): 

417 sys.stdout.write(showStr) 

418 

419# ------------------------ 

420# Exported definitions -- 

421# ------------------------ 

422 

423 

424class CmdLineFwk: 

425 """PipelineTask framework which executes tasks from command line. 

426 

427 In addition to executing tasks this activator provides additional methods 

428 for task management like dumping configuration or execution chain. 

429 """ 

430 

431 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

432 

433 def __init__(self): 

434 pass 

435 

436 def parseAndRun(self, argv=None): 

437 """ 

438 This method is a main entry point for this class, it parses command 

439 line and executes all commands. 

440 

441 Parameters 

442 ---------- 

443 argv : `list` of `str`, optional 

444 list of command line arguments, if not specified then 

445 `sys.argv[1:]` is used 

446 """ 

447 

448 if argv is None: 

449 argv = sys.argv[1:] 

450 

451 # start with parsing command line, only do partial parsing now as 

452 # the tasks can add more arguments later 

453 parser = makeParser() 

454 args = parser.parse_args(argv) 

455 

456 # First thing to do is to setup logging. 

457 self.configLog(args.longlog, args.loglevel) 

458 

459 taskFactory = TaskFactory() 

460 

461 # make pipeline out of command line arguments (can return empty pipeline) 

462 try: 

463 pipeline = self.makePipeline(args) 

464 except Exception as exc: 

465 print("Failed to build pipeline: {}".format(exc), file=sys.stderr) 

466 raise 

467 

468 if args.subcommand == "build": 

469 # stop here but process --show option first 

470 self.showInfo(args, pipeline) 

471 return 0 

472 

473 # make quantum graph 

474 try: 

475 qgraph = self.makeGraph(pipeline, args) 

476 except Exception as exc: 

477 print("Failed to build graph: {}".format(exc), file=sys.stderr) 

478 raise 

479 

480 # optionally dump some info 

481 self.showInfo(args, pipeline, qgraph) 

482 

483 if qgraph is None: 

484 # No need to raise an exception here, code that makes graph 

485 # should have printed warning message already. 

486 return 2 

487 

488 if args.subcommand == "qgraph": 

489 # stop here 

490 return 0 

491 

492 # execute 

493 if args.subcommand == "run": 

494 return self.runPipeline(qgraph, taskFactory, args) 

495 

496 @staticmethod 

497 def configLog(longlog, logLevels): 

498 """Configure logging system. 

499 

500 Parameters 

501 ---------- 

502 longlog : `bool` 

503 If True then make log messages appear in "long format" 

504 logLevels : `list` of `tuple` 

505 per-component logging levels, each item in the list is a tuple 

506 (component, level), `component` is a logger name or `None` for root 

507 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) 

508 """ 

509 if longlog: 

510 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" 

511 else: 

512 message_fmt = "%c %p: %m%n" 

513 

514 # global logging config 

515 lsst.log.configure_prop(_LOG_PROP.format(message_fmt)) 

516 

517 # Forward all Python logging to lsst.log 

518 lgr = logging.getLogger() 

519 lgr.setLevel(logging.INFO) # same as in log4cxx config above 

520 lgr.addHandler(lsst.log.LogHandler()) 

521 

522 # also capture warnings and send them to logging 

523 logging.captureWarnings(True) 

524 

525 # configure individual loggers 

526 for component, level in logLevels: 

527 level = getattr(lsst.log.Log, level.upper(), None) 

528 if level is not None: 

529 # set logging level for lsst.log 

530 logger = lsst.log.Log.getLogger(component or "") 

531 logger.setLevel(level) 

532 # set logging level for Python logging 

533 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) 

534 logging.getLogger(component).setLevel(pyLevel) 

535 

536 def makePipeline(self, args): 

537 """Build a pipeline from command line arguments. 

538 

539 Parameters 

540 ---------- 

541 args : `argparse.Namespace` 

542 Parsed command line 

543 

544 Returns 

545 ------- 

546 pipeline : `~lsst.pipe.base.Pipeline` 

547 """ 

548 if args.pipeline: 

549 pipeline = Pipeline.fromFile(args.pipeline) 

550 else: 

551 pipeline = Pipeline("anonymous") 

552 

553 # loop over all pipeline actions and apply them in order 

554 for action in args.pipeline_actions: 

555 if action.action == "add_instrument": 

556 

557 pipeline.addInstrument(action.value) 

558 

559 elif action.action == "new_task": 

560 

561 pipeline.addTask(action.value, action.label) 

562 

563 elif action.action == "delete_task": 

564 

565 pipeline.removeTask(action.label) 

566 

567 elif action.action == "config": 

568 

569 # action value string is "field=value", split it at '=' 

570 field, _, value = action.value.partition("=") 

571 pipeline.addConfigOverride(action.label, field, value) 

572 

573 elif action.action == "configfile": 

574 

575 pipeline.addConfigFile(action.label, action.value) 

576 

577 else: 

578 

579 raise ValueError(f"Unexpected pipeline action: {action.action}") 

580 

581 if args.save_pipeline: 

582 pipeline.toFile(args.save_pipeline) 

583 

584 if args.pipeline_dot: 

585 pipeline2dot(pipeline, args.pipeline_dot) 

586 

587 return pipeline 

588 

589 def makeGraph(self, pipeline, args): 

590 """Build a graph from command line arguments. 

591 

592 Parameters 

593 ---------- 

594 pipeline : `~lsst.pipe.base.Pipeline` 

595 Pipeline, can be empty or ``None`` if graph is read from pickle 

596 file. 

597 args : `argparse.Namespace` 

598 Parsed command line 

599 

600 Returns 

601 ------- 

602 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

603 If resulting graph is empty then `None` is returned. 

604 """ 

605 

606 if args.qgraph: 

607 

608 with open(args.qgraph, 'rb') as pickleFile: 

609 qgraph = pickle.load(pickleFile) 

610 if not isinstance(qgraph, QuantumGraph): 

611 raise TypeError("QuantumGraph pickle file has incorrect object type: {}".format( 

612 type(qgraph))) 

613 

614 # pipeline can not be provided in this case 

615 if pipeline: 

616 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

617 

618 else: 

619 

620 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

621 

622 # make execution plan (a.k.a. DAG) for pipeline 

623 graphBuilder = GraphBuilder(registry, 

624 skipExisting=args.skip_existing) 

625 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query) 

626 

627 # count quanta in graph and give a warning if it's empty and return None 

628 nQuanta = qgraph.countQuanta() 

629 if nQuanta == 0: 

630 warnings.warn("QuantumGraph is empty", stacklevel=2) 

631 return None 

632 else: 

633 _LOG.info("QuantumGraph contains %d quanta for %d tasks", 

634 nQuanta, len(qgraph)) 

635 

636 if args.save_qgraph: 

637 with open(args.save_qgraph, "wb") as pickleFile: 

638 pickle.dump(qgraph, pickleFile) 

639 

640 if args.save_single_quanta: 

641 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()): 

642 filename = args.save_single_quanta.format(iq) 

643 with open(filename, "wb") as pickleFile: 

644 pickle.dump(sqgraph, pickleFile) 

645 

646 if args.qgraph_dot: 

647 graph2dot(qgraph, args.qgraph_dot) 

648 

649 return qgraph 

650 

651 def runPipeline(self, graph, taskFactory, args, butler=None): 

652 """Execute complete QuantumGraph. 

653 

654 Parameters 

655 ---------- 

656 graph : `QuantumGraph` 

657 Execution graph. 

658 taskFactory : `~lsst.pipe.base.TaskFactory` 

659 Task factory 

660 args : `argparse.Namespace` 

661 Parsed command line 

662 butler : `~lsst.daf.butler.Butler`, optional 

663 Data Butler instance, if not defined then new instance is made 

664 using command line options. 

665 """ 

666 # make butler instance 

667 if butler is None: 

668 butler = _ButlerFactory.makeWriteButler(args) 

669 

670 # Enable lsstDebug debugging. Note that this is done once in the 

671 # main process before PreExecInit and it is also repeated before 

672 # running each task in SingleQuantumExecutor (which may not be 

673 # needed if `multipocessing` always uses fork start method). 

674 if args.enableLsstDebug: 

675 try: 

676 _LOG.debug("Will try to import debug.py") 

677 import debug # noqa:F401 

678 except ImportError: 

679 _LOG.warn("No 'debug' module found.") 

680 

681 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing) 

682 preExecInit.initialize(graph, 

683 saveInitOutputs=not args.skip_init_writes, 

684 registerDatasetTypes=args.register_dataset_types, 

685 saveVersions=not args.no_versions) 

686 

687 if not args.init_only: 

688 graphFixup = self._importGraphFixup(args) 

689 quantumExecutor = SingleQuantumExecutor(taskFactory, 

690 skipExisting=args.skip_existing, 

691 enableLsstDebug=args.enableLsstDebug) 

692 executor = MPGraphExecutor(numProc=args.processes, timeout=self.MP_TIMEOUT, 

693 quantumExecutor=quantumExecutor, 

694 executionGraphFixup=graphFixup) 

695 with util.profile(args.profile, _LOG): 

696 executor.execute(graph, butler) 

697 

698 def showInfo(self, args, pipeline, graph=None): 

699 """Display useful info about pipeline and environment. 

700 

701 Parameters 

702 ---------- 

703 args : `argparse.Namespace` 

704 Parsed command line 

705 pipeline : `Pipeline` 

706 Pipeline definition 

707 graph : `QuantumGraph`, optional 

708 Execution graph 

709 """ 

710 showOpts = args.show 

711 for what in showOpts: 

712 showCommand, _, showArgs = what.partition("=") 

713 

714 if showCommand in ["pipeline", "config", "history", "tasks"]: 

715 if not pipeline: 

716 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

717 continue 

718 

719 if showCommand in ["graph", "workflow"]: 

720 if not graph: 

721 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

722 continue 

723 

724 if showCommand == "pipeline": 

725 print(pipeline) 

726 elif showCommand == "config": 

727 self._showConfig(pipeline, showArgs, False) 

728 elif showCommand == "dump-config": 

729 self._showConfig(pipeline, showArgs, True) 

730 elif showCommand == "history": 

731 self._showConfigHistory(pipeline, showArgs) 

732 elif showCommand == "tasks": 

733 self._showTaskHierarchy(pipeline) 

734 elif showCommand == "graph": 

735 if graph: 

736 self._showGraph(graph) 

737 elif showCommand == "workflow": 

738 if graph: 

739 self._showWorkflow(graph, args) 

740 else: 

741 print("Unknown value for show: %s (choose from '%s')" % 

742 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

743 file=sys.stderr) 

744 sys.exit(1) 

745 

746 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

747 """Show task configuration 

748 

749 Parameters 

750 ---------- 

751 pipeline : `Pipeline` 

752 Pipeline definition 

753 showArgs : `str` 

754 Defines what to show 

755 dumpFullConfig : `bool` 

756 If true then dump complete task configuration with all imports. 

757 """ 

758 stream = sys.stdout 

759 if dumpFullConfig: 

760 # Task label can be given with this option 

761 taskName = showArgs 

762 else: 

763 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

764 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

765 taskName = matConfig.group(1) 

766 pattern = matConfig.group(2) 

767 if pattern: 

768 stream = _FilteredStream(pattern) 

769 

770 tasks = util.filterTasks(pipeline, taskName) 

771 if not tasks: 

772 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

773 sys.exit(1) 

774 

775 for taskDef in tasks: 

776 print("### Configuration for task `{}'".format(taskDef.label)) 

777 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

778 

779 def _showConfigHistory(self, pipeline, showArgs): 

780 """Show history for task configuration 

781 

782 Parameters 

783 ---------- 

784 pipeline : `Pipeline` 

785 Pipeline definition 

786 showArgs : `str` 

787 Defines what to show 

788 """ 

789 

790 taskName = None 

791 pattern = None 

792 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

793 if matHistory: 

794 taskName = matHistory.group(1) 

795 pattern = matHistory.group(2) 

796 if not pattern: 

797 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

798 sys.exit(1) 

799 

800 tasks = util.filterTasks(pipeline, taskName) 

801 if not tasks: 

802 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

803 sys.exit(1) 

804 

805 cpath, _, cname = pattern.rpartition(".") 

806 found = False 

807 for taskDef in tasks: 

808 try: 

809 if not cpath: 

810 # looking for top-level field 

811 hconfig = taskDef.config 

812 else: 

813 hconfig = eval("config." + cpath, {}, {"config": taskDef.config}) 

814 except AttributeError: 

815 # Means this config object has no such field, but maybe some other task has it. 

816 continue 

817 except Exception: 

818 # Any other exception probably means some error in the expression. 

819 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr) 

820 sys.exit(1) 

821 

822 if hasattr(hconfig, cname): 

823 print(f"### Configuration field for task `{taskDef.label}'") 

824 print(pexConfig.history.format(hconfig, cname)) 

825 found = True 

826 

827 if not found: 

828 print(f"None of the tasks has field named {pattern}", file=sys.stderr) 

829 sys.exit(1) 

830 

831 def _showTaskHierarchy(self, pipeline): 

832 """Print task hierarchy to stdout 

833 

834 Parameters 

835 ---------- 

836 pipeline: `Pipeline` 

837 """ 

838 for taskDef in pipeline.toExpandedPipeline(): 

839 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

840 

841 for configName, taskName in util.subTaskIter(taskDef.config): 

842 print("{}: {}".format(configName, taskName)) 

843 

844 def _showGraph(self, graph): 

845 """Print quanta information to stdout 

846 

847 Parameters 

848 ---------- 

849 graph : `QuantumGraph` 

850 Execution graph. 

851 """ 

852 for taskNodes in graph: 

853 print(taskNodes.taskDef) 

854 

855 for iq, quantum in enumerate(taskNodes.quanta): 

856 print(" Quantum {}:".format(iq)) 

857 print(" inputs:") 

858 for key, refs in quantum.predictedInputs.items(): 

859 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

860 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

861 print(" outputs:") 

862 for key, refs in quantum.outputs.items(): 

863 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

864 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

865 

866 def _showWorkflow(self, graph, args): 

867 """Print quanta information and dependency to stdout 

868 

869 The input and predicted output URIs based on the Butler repo are printed. 

870 

871 Parameters 

872 ---------- 

873 graph : `QuantumGraph` 

874 Execution graph. 

875 args : `argparse.Namespace` 

876 Parsed command line 

877 """ 

878 butler = _ButlerFactory.makeReadButler(args) 

879 hashToParent = {} 

880 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

881 shortname = taskDef.taskName.split('.')[-1] 

882 print("Quantum {}: {}".format(iq, shortname)) 

883 print(" inputs:") 

884 for key, refs in quantum.predictedInputs.items(): 

885 for ref in refs: 

886 if butler.datastore.exists(ref): 

887 print(" {}".format(butler.datastore.getUri(ref))) 

888 else: 

889 fakeRef = DatasetRef(ref.datasetType, ref.dataId) 

890 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) 

891 print(" outputs:") 

892 for key, refs in quantum.outputs.items(): 

893 for ref in refs: 

894 if butler.datastore.exists(ref): 

895 print(" {}".format(butler.datastore.getUri(ref))) 

896 else: 

897 fakeRef = DatasetRef(ref.datasetType, ref.dataId) 

898 print(" {}".format(butler.datastore.getUri(fakeRef, predict=True))) 

899 # Store hash to figure out dependency 

900 dhash = hash((key, ref.dataId)) 

901 hashToParent[dhash] = iq 

902 

903 uses = set() 

904 for iq, (taskDef, quantum) in enumerate(graph.quanta()): 

905 for key, refs in quantum.predictedInputs.items(): 

906 for ref in refs: 

907 dhash = hash((key, ref.dataId)) 

908 if dhash in hashToParent and (iq, hashToParent[dhash]) not in uses: 

909 parentIq = hashToParent[dhash] 

910 uses.add((iq, parentIq)) # iq uses parentIq 

911 print("Parent Quantum {} - Child Quantum {}".format(parentIq, iq)) 

912 

913 def _importGraphFixup(self, args): 

914 """Import/instantiate graph fixup object. 

915 

916 Parameters 

917 ---------- 

918 args : `argparse.Namespace` 

919 Parsed command line. 

920 

921 Returns 

922 ------- 

923 fixup : `ExecutionGraphFixup` or `None` 

924 

925 Raises 

926 ------ 

927 ValueError 

928 Raised if import fails, method call raises exception, or returned 

929 instance has unexpected type. 

930 """ 

931 if args.graph_fixup: 

932 try: 

933 factory = doImport(args.graph_fixup) 

934 except Exception as exc: 

935 raise ValueError("Failed to import graph fixup class/method") from exc 

936 try: 

937 fixup = factory() 

938 except Exception as exc: 

939 raise ValueError("Failed to make instance of graph fixup") from exc 

940 if not isinstance(fixup, ExecutionGraphFixup): 

941 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

942 return fixup