Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import datetime 

32import fnmatch 

33import logging 

34import os 

35import re 

36import sys 

37from typing import List, Optional, Tuple 

38import warnings 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from lsst.daf.butler import ( 

44 Butler, 

45 CollectionSearch, 

46 CollectionType, 

47 DatasetTypeRestriction, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError 

51import lsst.log 

52import lsst.pex.config as pexConfig 

53from lsst.pipe.base import GraphBuilder, Pipeline, QuantumGraph 

54from .cmdLineParser import makeParser 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60from .taskFactory import TaskFactory 

61from . import util 

62from lsst.utils import doImport 

63 

64# ---------------------------------- 

65# Local non-exported definitions -- 

66# ---------------------------------- 

67 

68# logging properties 

69_LOG_PROP = """\ 

70log4j.rootLogger=INFO, A1 

71log4j.appender.A1=ConsoleAppender 

72log4j.appender.A1.Target=System.err 

73log4j.appender.A1.layout=PatternLayout 

74log4j.appender.A1.layout.ConversionPattern={} 

75""" 

76 

77_LOG = logging.getLogger(__name__.partition(".")[2]) 

78 

79 

80class _OutputChainedCollectionInfo: 

81 """A helper class for handling command-line arguments related to an output 

82 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

83 

84 Parameters 

85 ---------- 

86 registry : `lsst.daf.butler.Registry` 

87 Butler registry that collections will be added to and/or queried from. 

88 name : `str` 

89 Name of the collection given on the command line. 

90 """ 

91 def __init__(self, registry: Registry, name: str): 

92 self.name = name 

93 try: 

94 self.chain = list(registry.getCollectionChain(name)) 

95 self.exists = True 

96 except MissingCollectionError: 

97 self.chain = [] 

98 self.exists = False 

99 

100 def __str__(self): 

101 return self.name 

102 

103 name: str 

104 """Name of the collection provided on the command line (`str`). 

105 """ 

106 

107 exists: bool 

108 """Whether this collection already exists in the registry (`bool`). 

109 """ 

110 

111 chain: List[Tuple[str, DatasetTypeRestriction]] 

112 """The definition of the collection, if it already exists (`list`). 

113 

114 Empty if the collection does not alredy exist. 

115 """ 

116 

117 

118class _OutputRunCollectionInfo: 

119 """A helper class for handling command-line arguments related to an output 

120 `~lsst.daf.butler.CollectionType.RUN` collection. 

121 

122 Parameters 

123 ---------- 

124 registry : `lsst.daf.butler.Registry` 

125 Butler registry that collections will be added to and/or queried from. 

126 name : `str` 

127 Name of the collection given on the command line. 

128 """ 

129 def __init__(self, registry: Registry, name: str): 

130 self.name = name 

131 try: 

132 actualType = registry.getCollectionType(name) 

133 if actualType is not CollectionType.RUN: 

134 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

135 self.exists = True 

136 except MissingCollectionError: 

137 self.exists = False 

138 

139 name: str 

140 """Name of the collection provided on the command line (`str`). 

141 """ 

142 

143 exists: bool 

144 """Whether this collection already exists in the registry (`bool`). 

145 """ 

146 

147 

148class _ButlerFactory: 

149 """A helper class for processing command-line arguments related to input 

150 and output collections. 

151 

152 Parameters 

153 ---------- 

154 registry : `lsst.daf.butler.Registry` 

155 Butler registry that collections will be added to and/or queried from. 

156 

157 args : `argparse.Namespace` 

158 Parsed command-line arguments. The following attributes are used, 

159 either at construction or in later methods. 

160 

161 ``output`` 

162 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

163 input/output collection. 

164 

165 ``output_run`` 

166 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

167 collection. 

168 

169 ``extend_run`` 

170 A boolean indicating whether ``output_run`` should already exist 

171 and be extended. 

172 

173 ``replace_run`` 

174 A boolean indicating that (if `True`) ``output_run`` should already 

175 exist but will be removed from the output chained collection and 

176 replaced with a new one. 

177 

178 ``prune_replaced`` 

179 A boolean indicating whether to prune the replaced run (requires 

180 ``replace_run``). 

181 

182 ``inputs`` 

183 Input collections of any type; may be any type handled by 

184 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

185 

186 ``butler_config`` 

187 Path to a data repository root or configuration file. 

188 

189 writeable : `bool` 

190 If `True`, a `Butler` is being initialized in a context where actual 

191 writes should happens, and hence no output run is necessary. 

192 

193 Raises 

194 ------ 

195 ValueError 

196 Raised if ``writeable is True`` but there are no output collections. 

197 """ 

198 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

199 if args.output is not None: 

200 self.output = _OutputChainedCollectionInfo(registry, args.output) 

201 else: 

202 self.output = None 

203 if args.output_run is not None: 

204 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

205 elif self.output is not None: 

206 if args.extend_run: 

207 runName, _ = self.output.chain[0] 

208 else: 

209 runName = "{}/{:%Y%m%dT%Hh%Mm%Ss}".format(self.output, datetime.datetime.now()) 

210 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

211 elif not writeable: 

212 # If we're not writing yet, ok to have no output run. 

213 self.outputRun = None 

214 else: 

215 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

216 self.inputs = list(CollectionSearch.fromExpression(args.input)) if args.input else [] 

217 

218 def check(self, args: argparse.Namespace): 

219 """Check command-line options for consistency with each other and the 

220 data repository. 

221 

222 Parameters 

223 ---------- 

224 args : `argparse.Namespace` 

225 Parsed command-line arguments. See class documentation for the 

226 construction parameter of the same name. 

227 """ 

228 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

229 if self.inputs and self.output is not None and self.output.exists: 

230 raise ValueError("Cannot use --output with existing collection with --inputs.") 

231 if args.extend_run and self.outputRun is None: 

232 raise ValueError("Cannot --extend-run when no output collection is given.") 

233 if args.extend_run and not self.outputRun.exists: 

234 raise ValueError(f"Cannot --extend-run; output collection " 

235 f"'{self.outputRun.name}' does not exist.") 

236 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

237 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

238 f"--extend-run was not given.") 

239 if args.prune_replaced and not args.replace_run: 

240 raise ValueError("--prune-replaced requires --replace-run.") 

241 if args.replace_run and (self.output is None or not self.output.exists): 

242 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

243 

244 @classmethod 

245 def _makeReadParts(cls, args: argparse.Namespace): 

246 """Common implementation for `makeReadButler` and 

247 `makeRegistryAndCollections`. 

248 

249 Parameters 

250 ---------- 

251 args : `argparse.Namespace` 

252 Parsed command-line arguments. See class documentation for the 

253 construction parameter of the same name. 

254 

255 Returns 

256 ------- 

257 butler : `lsst.daf.butler.Butler` 

258 A read-only butler constructed from the repo at 

259 ``args.butler_config``, but with no default collections. 

260 inputs : `lsst.daf.butler.registry.CollectionSearch` 

261 A collection search path constructed according to ``args``. 

262 self : `_ButlerFactory` 

263 A new `_ButlerFactory` instance representing the processed version 

264 of ``args``. 

265 """ 

266 butler = Butler(args.butler_config, writeable=False) 

267 self = cls(butler.registry, args, writeable=False) 

268 self.check(args) 

269 if self.output and self.output.exists: 

270 if args.replace_run: 

271 replaced, _ = self.output.chain[0] 

272 inputs = self.output.chain[1:] 

273 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

274 self.output.name, replaced) 

275 else: 

276 inputs = [self.output.name] 

277 else: 

278 inputs = list(self.inputs) 

279 if args.extend_run: 

280 inputs.insert(0, self.outputRun.name) 

281 inputs = CollectionSearch.fromExpression(inputs) 

282 return butler, inputs, self 

283 

284 @classmethod 

285 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

286 """Construct a read-only butler according to the given command-line 

287 arguments. 

288 

289 Parameters 

290 ---------- 

291 args : `argparse.Namespace` 

292 Parsed command-line arguments. See class documentation for the 

293 construction parameter of the same name. 

294 

295 Returns 

296 ------- 

297 butler : `lsst.daf.butler.Butler` 

298 A read-only butler initialized with the collections specified by 

299 ``args``. 

300 """ 

301 butler, inputs, _ = cls._makeReadParts(args) 

302 _LOG.debug("Preparing butler to read from %s.", inputs) 

303 return Butler(butler=butler, collections=inputs) 

304 

305 @classmethod 

306 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \ 

307 Tuple[Registry, CollectionSearch, Optional[str]]: 

308 """Return a read-only registry, a collection search path, and the name 

309 of the run to be used for future writes. 

310 

311 Parameters 

312 ---------- 

313 args : `argparse.Namespace` 

314 Parsed command-line arguments. See class documentation for the 

315 construction parameter of the same name. 

316 

317 Returns 

318 ------- 

319 registry : `lsst.daf.butler.Registry` 

320 Butler registry that collections will be added to and/or queried 

321 from. 

322 inputs : `lsst.daf.butler.registry.CollectionSearch` 

323 Collections to search for datasets. 

324 run : `str` or `None` 

325 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

326 if it already exists, or `None` if it does not. 

327 """ 

328 butler, inputs, self = cls._makeReadParts(args) 

329 run = self.outputRun.name if args.extend_run else None 

330 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

331 return butler.registry, inputs, run 

332 

333 @classmethod 

334 def makeWriteButler(cls, args: argparse.Namespace) -> Butler: 

335 """Return a read-write butler initialized to write to and read from 

336 the collections specified by the given command-line arguments. 

337 

338 Parameters 

339 ---------- 

340 args : `argparse.Namespace` 

341 Parsed command-line arguments. See class documentation for the 

342 construction parameter of the same name. 

343 

344 Returns 

345 ------- 

346 butler : `lsst.daf.butler.Butler` 

347 A read-write butler initialized according to the given arguments. 

348 """ 

349 butler = Butler(args.butler_config, writeable=True) 

350 self = cls(butler.registry, args, writeable=True) 

351 self.check(args) 

352 if self.output is not None: 

353 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

354 if args.replace_run: 

355 replaced, _ = chainDefinition.pop(0) 

356 if args.prune_replaced == "unstore": 

357 # Remove datasets from datastore 

358 with butler.transaction(): 

359 refs = butler.registry.queryDatasets(..., collections=replaced) 

360 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

361 elif args.prune_replaced == "purge": 

362 # Erase entire collection and all datasets, need to remove 

363 # collection from its chain collection first. 

364 with butler.transaction(): 

365 butler.registry.setCollectionChain(self.output.name, chainDefinition) 

366 butler.pruneCollection(replaced, purge=True, unstore=True) 

367 elif args.prune_replaced is not None: 

368 raise NotImplementedError( 

369 f"Unsupported --prune-replaced option '{args.prune_replaced}'." 

370 ) 

371 chainDefinition.insert(0, self.outputRun.name) 

372 chainDefinition = CollectionSearch.fromExpression(chainDefinition) 

373 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

374 self.outputRun.name, self.output.name, chainDefinition) 

375 return Butler(butler=butler, run=self.outputRun.name, collections=self.output.name, 

376 chains={self.output.name: chainDefinition}) 

377 else: 

378 inputs = CollectionSearch.fromExpression([self.outputRun.name] + self.inputs) 

379 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

380 return Butler(butler=butler, run=self.outputRun.name, collections=inputs) 

381 

382 output: Optional[_OutputChainedCollectionInfo] 

383 """Information about the output chained collection, if there is or will be 

384 one (`_OutputChainedCollectionInfo` or `None`). 

385 """ 

386 

387 outputRun: Optional[_OutputRunCollectionInfo] 

388 """Information about the output run collection, if there is or will be 

389 one (`_OutputRunCollectionInfo` or `None`). 

390 """ 

391 

392 inputs: List[Tuple[str, DatasetTypeRestriction]] 

393 """Input collections, including those also used for outputs and any 

394 restrictions on dataset types (`list`). 

395 """ 

396 

397 

398class _FilteredStream: 

399 """A file-like object that filters some config fields. 

400 

401 Note 

402 ---- 

403 This class depends on implementation details of ``Config.saveToStream`` 

404 methods, in particular that that method uses single call to write() 

405 method to save information about single config field, and that call 

406 combines comments string(s) for a field and field path and value. 

407 This class will not work reliably on the "import" strings, so imports 

408 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

409 """ 

410 def __init__(self, pattern): 

411 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

412 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

413 

414 if mat: 

415 pattern = mat.group(1) 

416 self._pattern = re.compile(fnmatch.translate(pattern)) 

417 else: 

418 if pattern != pattern.lower(): 

419 print(f"Matching \"{pattern}\" without regard to case " 

420 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

421 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

422 

423 def write(self, showStr): 

424 # Strip off doc string line(s) and cut off at "=" for string matching 

425 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

426 if self._pattern.search(matchStr): 

427 sys.stdout.write(showStr) 

428 

429# ------------------------ 

430# Exported definitions -- 

431# ------------------------ 

432 

433 

434class CmdLineFwk: 

435 """PipelineTask framework which executes tasks from command line. 

436 

437 In addition to executing tasks this activator provides additional methods 

438 for task management like dumping configuration or execution chain. 

439 """ 

440 

441 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

442 

443 def __init__(self): 

444 pass 

445 

446 def parseAndRun(self, argv=None): 

447 """ 

448 This method is a main entry point for this class, it parses command 

449 line and executes all commands. 

450 

451 Parameters 

452 ---------- 

453 argv : `list` of `str`, optional 

454 list of command line arguments, if not specified then 

455 `sys.argv[1:]` is used 

456 """ 

457 

458 if argv is None: 

459 argv = sys.argv[1:] 

460 

461 # start with parsing command line, only do partial parsing now as 

462 # the tasks can add more arguments later 

463 parser = makeParser() 

464 args = parser.parse_args(argv) 

465 

466 # First thing to do is to setup logging. 

467 self.configLog(args.longlog, args.loglevel) 

468 

469 taskFactory = TaskFactory() 

470 

471 # make pipeline out of command line arguments (can return empty pipeline) 

472 try: 

473 pipeline = self.makePipeline(args) 

474 except Exception as exc: 

475 print("Failed to build pipeline: {}".format(exc), file=sys.stderr) 

476 raise 

477 

478 if args.subcommand == "build": 

479 # stop here but process --show option first 

480 self.showInfo(args, pipeline) 

481 return 0 

482 

483 # make quantum graph 

484 try: 

485 qgraph = self.makeGraph(pipeline, args) 

486 except Exception as exc: 

487 print("Failed to build graph: {}".format(exc), file=sys.stderr) 

488 raise 

489 

490 # optionally dump some info 

491 self.showInfo(args, pipeline, qgraph) 

492 

493 if qgraph is None: 

494 # No need to raise an exception here, code that makes graph 

495 # should have printed warning message already. 

496 return 2 

497 

498 if args.subcommand == "qgraph": 

499 # stop here 

500 return 0 

501 

502 # execute 

503 if args.subcommand == "run": 

504 return self.runPipeline(qgraph, taskFactory, args) 

505 

506 @staticmethod 

507 def configLog(longlog, logLevels): 

508 """Configure logging system. 

509 

510 Parameters 

511 ---------- 

512 longlog : `bool` 

513 If True then make log messages appear in "long format" 

514 logLevels : `list` of `tuple` 

515 per-component logging levels, each item in the list is a tuple 

516 (component, level), `component` is a logger name or `None` for root 

517 logger, `level` is a logging level name ('DEBUG', 'INFO', etc.) 

518 """ 

519 if longlog: 

520 message_fmt = "%-5p %d{yyyy-MM-ddTHH:mm:ss.SSSZ} %c (%X{LABEL})(%F:%L)- %m%n" 

521 else: 

522 message_fmt = "%c %p: %m%n" 

523 

524 # Initialize global logging config. Skip if the env var LSST_LOG_CONFIG exists. 

525 # The file it points to would already configure lsst.log. 

526 if not os.path.isfile(os.environ.get("LSST_LOG_CONFIG", "")): 

527 lsst.log.configure_prop(_LOG_PROP.format(message_fmt)) 

528 

529 # Forward all Python logging to lsst.log 

530 lgr = logging.getLogger() 

531 lgr.setLevel(logging.INFO) # same as in log4cxx config above 

532 lgr.addHandler(lsst.log.LogHandler()) 

533 

534 # also capture warnings and send them to logging 

535 logging.captureWarnings(True) 

536 

537 # configure individual loggers 

538 for component, level in logLevels: 

539 level = getattr(lsst.log.Log, level.upper(), None) 

540 if level is not None: 

541 # set logging level for lsst.log 

542 logger = lsst.log.Log.getLogger(component or "") 

543 logger.setLevel(level) 

544 # set logging level for Python logging 

545 pyLevel = lsst.log.LevelTranslator.lsstLog2logging(level) 

546 logging.getLogger(component).setLevel(pyLevel) 

547 

548 def makePipeline(self, args): 

549 """Build a pipeline from command line arguments. 

550 

551 Parameters 

552 ---------- 

553 args : `argparse.Namespace` 

554 Parsed command line 

555 

556 Returns 

557 ------- 

558 pipeline : `~lsst.pipe.base.Pipeline` 

559 """ 

560 if args.pipeline: 

561 pipeline = Pipeline.fromFile(args.pipeline) 

562 else: 

563 pipeline = Pipeline("anonymous") 

564 

565 # loop over all pipeline actions and apply them in order 

566 for action in args.pipeline_actions: 

567 if action.action == "add_instrument": 

568 

569 pipeline.addInstrument(action.value) 

570 

571 elif action.action == "new_task": 

572 

573 pipeline.addTask(action.value, action.label) 

574 

575 elif action.action == "delete_task": 

576 

577 pipeline.removeTask(action.label) 

578 

579 elif action.action == "config": 

580 

581 # action value string is "field=value", split it at '=' 

582 field, _, value = action.value.partition("=") 

583 pipeline.addConfigOverride(action.label, field, value) 

584 

585 elif action.action == "configfile": 

586 

587 pipeline.addConfigFile(action.label, action.value) 

588 

589 else: 

590 

591 raise ValueError(f"Unexpected pipeline action: {action.action}") 

592 

593 if args.save_pipeline: 

594 pipeline.toFile(args.save_pipeline) 

595 

596 if args.pipeline_dot: 

597 pipeline2dot(pipeline, args.pipeline_dot) 

598 

599 return pipeline 

600 

601 def makeGraph(self, pipeline, args): 

602 """Build a graph from command line arguments. 

603 

604 Parameters 

605 ---------- 

606 pipeline : `~lsst.pipe.base.Pipeline` 

607 Pipeline, can be empty or ``None`` if graph is read from a file. 

608 args : `argparse.Namespace` 

609 Parsed command line 

610 

611 Returns 

612 ------- 

613 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

614 If resulting graph is empty then `None` is returned. 

615 """ 

616 

617 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

618 

619 if args.qgraph: 

620 

621 with open(args.qgraph, 'rb') as pickleFile: 

622 qgraph = QuantumGraph.load(pickleFile, registry.dimensions) 

623 

624 # pipeline can not be provided in this case 

625 if pipeline: 

626 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

627 

628 else: 

629 

630 # make execution plan (a.k.a. DAG) for pipeline 

631 graphBuilder = GraphBuilder(registry, 

632 skipExisting=args.skip_existing) 

633 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query) 

634 

635 # count quanta in graph and give a warning if it's empty and return None 

636 nQuanta = qgraph.countQuanta() 

637 if nQuanta == 0: 

638 warnings.warn("QuantumGraph is empty", stacklevel=2) 

639 return None 

640 else: 

641 _LOG.info("QuantumGraph contains %d quanta for %d tasks", 

642 nQuanta, len(qgraph)) 

643 

644 if args.save_qgraph: 

645 with open(args.save_qgraph, "wb") as pickleFile: 

646 qgraph.save(pickleFile) 

647 

648 if args.save_single_quanta: 

649 for iq, sqgraph in enumerate(qgraph.quantaAsQgraph()): 

650 filename = args.save_single_quanta.format(iq) 

651 with open(filename, "wb") as pickleFile: 

652 sqgraph.save(pickleFile) 

653 

654 if args.qgraph_dot: 

655 graph2dot(qgraph, args.qgraph_dot) 

656 

657 return qgraph 

658 

659 def runPipeline(self, graph, taskFactory, args, butler=None): 

660 """Execute complete QuantumGraph. 

661 

662 Parameters 

663 ---------- 

664 graph : `QuantumGraph` 

665 Execution graph. 

666 taskFactory : `~lsst.pipe.base.TaskFactory` 

667 Task factory 

668 args : `argparse.Namespace` 

669 Parsed command line 

670 butler : `~lsst.daf.butler.Butler`, optional 

671 Data Butler instance, if not defined then new instance is made 

672 using command line options. 

673 """ 

674 # make butler instance 

675 if butler is None: 

676 butler = _ButlerFactory.makeWriteButler(args) 

677 

678 # Enable lsstDebug debugging. Note that this is done once in the 

679 # main process before PreExecInit and it is also repeated before 

680 # running each task in SingleQuantumExecutor (which may not be 

681 # needed if `multipocessing` always uses fork start method). 

682 if args.enableLsstDebug: 

683 try: 

684 _LOG.debug("Will try to import debug.py") 

685 import debug # noqa:F401 

686 except ImportError: 

687 _LOG.warn("No 'debug' module found.") 

688 

689 preExecInit = PreExecInit(butler, taskFactory, args.skip_existing) 

690 preExecInit.initialize(graph, 

691 saveInitOutputs=not args.skip_init_writes, 

692 registerDatasetTypes=args.register_dataset_types, 

693 saveVersions=not args.no_versions) 

694 

695 if not args.init_only: 

696 graphFixup = self._importGraphFixup(args) 

697 quantumExecutor = SingleQuantumExecutor(taskFactory, 

698 skipExisting=args.skip_existing, 

699 clobberPartialOutputs=args.clobber_partial_outputs, 

700 enableLsstDebug=args.enableLsstDebug) 

701 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

702 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout, 

703 quantumExecutor=quantumExecutor, 

704 failFast=args.fail_fast, 

705 executionGraphFixup=graphFixup) 

706 with util.profile(args.profile, _LOG): 

707 executor.execute(graph, butler) 

708 

709 def showInfo(self, args, pipeline, graph=None): 

710 """Display useful info about pipeline and environment. 

711 

712 Parameters 

713 ---------- 

714 args : `argparse.Namespace` 

715 Parsed command line 

716 pipeline : `Pipeline` 

717 Pipeline definition 

718 graph : `QuantumGraph`, optional 

719 Execution graph 

720 """ 

721 showOpts = args.show 

722 for what in showOpts: 

723 showCommand, _, showArgs = what.partition("=") 

724 

725 if showCommand in ["pipeline", "config", "history", "tasks"]: 

726 if not pipeline: 

727 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

728 continue 

729 

730 if showCommand in ["graph", "workflow"]: 

731 if not graph: 

732 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

733 continue 

734 

735 if showCommand == "pipeline": 

736 print(pipeline) 

737 elif showCommand == "config": 

738 self._showConfig(pipeline, showArgs, False) 

739 elif showCommand == "dump-config": 

740 self._showConfig(pipeline, showArgs, True) 

741 elif showCommand == "history": 

742 self._showConfigHistory(pipeline, showArgs) 

743 elif showCommand == "tasks": 

744 self._showTaskHierarchy(pipeline) 

745 elif showCommand == "graph": 

746 if graph: 

747 self._showGraph(graph) 

748 elif showCommand == "workflow": 

749 if graph: 

750 self._showWorkflow(graph, args) 

751 else: 

752 print("Unknown value for show: %s (choose from '%s')" % 

753 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

754 file=sys.stderr) 

755 sys.exit(1) 

756 

757 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

758 """Show task configuration 

759 

760 Parameters 

761 ---------- 

762 pipeline : `Pipeline` 

763 Pipeline definition 

764 showArgs : `str` 

765 Defines what to show 

766 dumpFullConfig : `bool` 

767 If true then dump complete task configuration with all imports. 

768 """ 

769 stream = sys.stdout 

770 if dumpFullConfig: 

771 # Task label can be given with this option 

772 taskName = showArgs 

773 else: 

774 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

775 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

776 taskName = matConfig.group(1) 

777 pattern = matConfig.group(2) 

778 if pattern: 

779 stream = _FilteredStream(pattern) 

780 

781 tasks = util.filterTasks(pipeline, taskName) 

782 if not tasks: 

783 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

784 sys.exit(1) 

785 

786 for taskDef in tasks: 

787 print("### Configuration for task `{}'".format(taskDef.label)) 

788 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

789 

790 def _showConfigHistory(self, pipeline, showArgs): 

791 """Show history for task configuration 

792 

793 Parameters 

794 ---------- 

795 pipeline : `Pipeline` 

796 Pipeline definition 

797 showArgs : `str` 

798 Defines what to show 

799 """ 

800 

801 taskName = None 

802 pattern = None 

803 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

804 if matHistory: 

805 taskName = matHistory.group(1) 

806 pattern = matHistory.group(2) 

807 if not pattern: 

808 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

809 sys.exit(1) 

810 

811 tasks = util.filterTasks(pipeline, taskName) 

812 if not tasks: 

813 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

814 sys.exit(1) 

815 

816 cpath, _, cname = pattern.rpartition(".") 

817 found = False 

818 for taskDef in tasks: 

819 try: 

820 if not cpath: 

821 # looking for top-level field 

822 hconfig = taskDef.config 

823 else: 

824 hconfig = eval("config." + cpath, {}, {"config": taskDef.config}) 

825 except AttributeError: 

826 # Means this config object has no such field, but maybe some other task has it. 

827 continue 

828 except Exception: 

829 # Any other exception probably means some error in the expression. 

830 print(f"ERROR: Failed to evaluate field expression `{pattern}'", file=sys.stderr) 

831 sys.exit(1) 

832 

833 if hasattr(hconfig, cname): 

834 print(f"### Configuration field for task `{taskDef.label}'") 

835 print(pexConfig.history.format(hconfig, cname)) 

836 found = True 

837 

838 if not found: 

839 print(f"None of the tasks has field named {pattern}", file=sys.stderr) 

840 sys.exit(1) 

841 

842 def _showTaskHierarchy(self, pipeline): 

843 """Print task hierarchy to stdout 

844 

845 Parameters 

846 ---------- 

847 pipeline: `Pipeline` 

848 """ 

849 for taskDef in pipeline.toExpandedPipeline(): 

850 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

851 

852 for configName, taskName in util.subTaskIter(taskDef.config): 

853 print("{}: {}".format(configName, taskName)) 

854 

855 def _showGraph(self, graph): 

856 """Print quanta information to stdout 

857 

858 Parameters 

859 ---------- 

860 graph : `QuantumGraph` 

861 Execution graph. 

862 """ 

863 for taskNodes in graph: 

864 print(taskNodes.taskDef) 

865 

866 for iq, quantum in enumerate(taskNodes.quanta): 

867 print(" Quantum {}:".format(iq)) 

868 print(" inputs:") 

869 for key, refs in quantum.predictedInputs.items(): 

870 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

871 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

872 print(" outputs:") 

873 for key, refs in quantum.outputs.items(): 

874 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

875 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

876 

877 def _showWorkflow(self, graph, args): 

878 """Print quanta information and dependency to stdout 

879 

880 The input and predicted output URIs based on the Butler repo are printed. 

881 

882 Parameters 

883 ---------- 

884 graph : `QuantumGraph` 

885 Execution graph. 

886 args : `argparse.Namespace` 

887 Parsed command line 

888 """ 

889 def dumpURIs(thisRef): 

890 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

891 if primary: 

892 print(f" {primary}") 

893 else: 

894 print(" (disassembled artifact)") 

895 for compName, compUri in components.items(): 

896 print(f" {compName}: {compUri}") 

897 

898 butler = _ButlerFactory.makeReadButler(args) 

899 for qdata in graph.traverse(): 

900 shortname = qdata.taskDef.taskName.split('.')[-1] 

901 print("Quantum {}: {}".format(qdata.index, shortname)) 

902 print(" inputs:") 

903 for key, refs in qdata.quantum.predictedInputs.items(): 

904 for ref in refs: 

905 dumpURIs(ref) 

906 print(" outputs:") 

907 for key, refs in qdata.quantum.outputs.items(): 

908 for ref in refs: 

909 dumpURIs(ref) 

910 for parent in qdata.dependencies: 

911 print("Parent Quantum {} - Child Quantum {}".format(parent, qdata.index)) 

912 

913 def _importGraphFixup(self, args): 

914 """Import/instantiate graph fixup object. 

915 

916 Parameters 

917 ---------- 

918 args : `argparse.Namespace` 

919 Parsed command line. 

920 

921 Returns 

922 ------- 

923 fixup : `ExecutionGraphFixup` or `None` 

924 

925 Raises 

926 ------ 

927 ValueError 

928 Raised if import fails, method call raises exception, or returned 

929 instance has unexpected type. 

930 """ 

931 if args.graph_fixup: 

932 try: 

933 factory = doImport(args.graph_fixup) 

934 except Exception as exc: 

935 raise ValueError("Failed to import graph fixup class/method") from exc 

936 try: 

937 fixup = factory() 

938 except Exception as exc: 

939 raise ValueError("Failed to make instance of graph fixup") from exc 

940 if not isinstance(fixup, ExecutionGraphFixup): 

941 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

942 return fixup