Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import copy 

32import datetime 

33import fnmatch 

34import getpass 

35import logging 

36import re 

37import sys 

38from typing import Iterable, Optional, Tuple 

39import warnings 

40 

41# ----------------------------- 

42# Imports for other modules -- 

43# ----------------------------- 

44from lsst.daf.butler import ( 

45 Butler, 

46 CollectionSearch, 

47 CollectionType, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

51import lsst.pex.config as pexConfig 

52from lsst.pipe.base import (buildExecutionButler, GraphBuilder, Pipeline, 

53 PipelineDatasetTypes, QuantumGraph, TaskDef) 

54from lsst.obs.base import Instrument 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60from . import util 

61from lsst.utils import doImport 

62 

63# ---------------------------------- 

64# Local non-exported definitions -- 

65# ---------------------------------- 

66 

67_LOG = logging.getLogger(__name__.partition(".")[2]) 

68 

69 

70class _OutputChainedCollectionInfo: 

71 """A helper class for handling command-line arguments related to an output 

72 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

73 

74 Parameters 

75 ---------- 

76 registry : `lsst.daf.butler.Registry` 

77 Butler registry that collections will be added to and/or queried from. 

78 name : `str` 

79 Name of the collection given on the command line. 

80 """ 

81 def __init__(self, registry: Registry, name: str): 

82 self.name = name 

83 try: 

84 self.chain = tuple(registry.getCollectionChain(name)) 

85 self.exists = True 

86 except MissingCollectionError: 

87 self.chain = () 

88 self.exists = False 

89 

90 def __str__(self): 

91 return self.name 

92 

93 name: str 

94 """Name of the collection provided on the command line (`str`). 

95 """ 

96 

97 exists: bool 

98 """Whether this collection already exists in the registry (`bool`). 

99 """ 

100 

101 chain: Tuple[str, ...] 

102 """The definition of the collection, if it already exists (`tuple`[`str`]). 

103 

104 Empty if the collection does not already exist. 

105 """ 

106 

107 

108class _OutputRunCollectionInfo: 

109 """A helper class for handling command-line arguments related to an output 

110 `~lsst.daf.butler.CollectionType.RUN` collection. 

111 

112 Parameters 

113 ---------- 

114 registry : `lsst.daf.butler.Registry` 

115 Butler registry that collections will be added to and/or queried from. 

116 name : `str` 

117 Name of the collection given on the command line. 

118 """ 

119 def __init__(self, registry: Registry, name: str): 

120 self.name = name 

121 try: 

122 actualType = registry.getCollectionType(name) 

123 if actualType is not CollectionType.RUN: 

124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

125 self.exists = True 

126 except MissingCollectionError: 

127 self.exists = False 

128 

129 name: str 

130 """Name of the collection provided on the command line (`str`). 

131 """ 

132 

133 exists: bool 

134 """Whether this collection already exists in the registry (`bool`). 

135 """ 

136 

137 

138class _ButlerFactory: 

139 """A helper class for processing command-line arguments related to input 

140 and output collections. 

141 

142 Parameters 

143 ---------- 

144 registry : `lsst.daf.butler.Registry` 

145 Butler registry that collections will be added to and/or queried from. 

146 

147 args : `types.SimpleNamespace` 

148 Parsed command-line arguments. The following attributes are used, 

149 either at construction or in later methods. 

150 

151 ``output`` 

152 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

153 input/output collection. 

154 

155 ``output_run`` 

156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

157 collection. 

158 

159 ``extend_run`` 

160 A boolean indicating whether ``output_run`` should already exist 

161 and be extended. 

162 

163 ``replace_run`` 

164 A boolean indicating that (if `True`) ``output_run`` should already 

165 exist but will be removed from the output chained collection and 

166 replaced with a new one. 

167 

168 ``prune_replaced`` 

169 A boolean indicating whether to prune the replaced run (requires 

170 ``replace_run``). 

171 

172 ``inputs`` 

173 Input collections of any type; may be any type handled by 

174 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

175 

176 ``butler_config`` 

177 Path to a data repository root or configuration file. 

178 

179 writeable : `bool` 

180 If `True`, a `Butler` is being initialized in a context where actual 

181 writes should happens, and hence no output run is necessary. 

182 

183 Raises 

184 ------ 

185 ValueError 

186 Raised if ``writeable is True`` but there are no output collections. 

187 """ 

188 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

189 if args.output is not None: 

190 self.output = _OutputChainedCollectionInfo(registry, args.output) 

191 else: 

192 self.output = None 

193 if args.output_run is not None: 

194 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

195 elif self.output is not None: 

196 if args.extend_run: 

197 runName = self.output.chain[0] 

198 else: 

199 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

200 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

201 elif not writeable: 

202 # If we're not writing yet, ok to have no output run. 

203 self.outputRun = None 

204 else: 

205 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

206 # Recursively flatten any input CHAINED collections. We do this up 

207 # front so we can tell if the user passes the same inputs on subsequent 

208 # calls, even though we also flatten when we define the output CHAINED 

209 # collection. 

210 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

211 

212 def check(self, args: argparse.Namespace): 

213 """Check command-line options for consistency with each other and the 

214 data repository. 

215 

216 Parameters 

217 ---------- 

218 args : `types.SimpleNamespace` 

219 Parsed command-line arguments. See class documentation for the 

220 construction parameter of the same name. 

221 """ 

222 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

223 if self.inputs and self.output is not None and self.output.exists: 

224 # Passing the same inputs that were used to initialize the output 

225 # collection is allowed; this means they must _end_ with the same 

226 # collections, because we push new runs to the front of the chain. 

227 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

228 if c1 != c2: 

229 raise ValueError( 

230 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

231 "a different sequence of input collections than those given: " 

232 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

233 f"{self.output.name}={self.output.chain}." 

234 ) 

235 if len(self.inputs) > len(self.output.chain): 

236 nNew = len(self.inputs) - len(self.output.chain) 

237 raise ValueError( 

238 f"Cannot add new input collections {self.inputs[:nNew]} after " 

239 "output collection is first created." 

240 ) 

241 if args.extend_run and self.outputRun is None: 

242 raise ValueError("Cannot --extend-run when no output collection is given.") 

243 if args.extend_run and not self.outputRun.exists: 

244 raise ValueError(f"Cannot --extend-run; output collection " 

245 f"'{self.outputRun.name}' does not exist.") 

246 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

247 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

248 f"--extend-run was not given.") 

249 if args.prune_replaced and not args.replace_run: 

250 raise ValueError("--prune-replaced requires --replace-run.") 

251 if args.replace_run and (self.output is None or not self.output.exists): 

252 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

253 

254 @classmethod 

255 def _makeReadParts(cls, args: argparse.Namespace): 

256 """Common implementation for `makeReadButler` and 

257 `makeRegistryAndCollections`. 

258 

259 Parameters 

260 ---------- 

261 args : `types.SimpleNamespace` 

262 Parsed command-line arguments. See class documentation for the 

263 construction parameter of the same name. 

264 

265 Returns 

266 ------- 

267 butler : `lsst.daf.butler.Butler` 

268 A read-only butler constructed from the repo at 

269 ``args.butler_config``, but with no default collections. 

270 inputs : `lsst.daf.butler.registry.CollectionSearch` 

271 A collection search path constructed according to ``args``. 

272 self : `_ButlerFactory` 

273 A new `_ButlerFactory` instance representing the processed version 

274 of ``args``. 

275 """ 

276 butler = Butler(args.butler_config, writeable=False) 

277 self = cls(butler.registry, args, writeable=False) 

278 self.check(args) 

279 if self.output and self.output.exists: 

280 if args.replace_run: 

281 replaced = self.output.chain[0] 

282 inputs = self.output.chain[1:] 

283 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

284 self.output.name, replaced) 

285 else: 

286 inputs = [self.output.name] 

287 else: 

288 inputs = list(self.inputs) 

289 if args.extend_run: 

290 inputs.insert(0, self.outputRun.name) 

291 inputs = CollectionSearch.fromExpression(inputs) 

292 return butler, inputs, self 

293 

294 @classmethod 

295 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

296 """Construct a read-only butler according to the given command-line 

297 arguments. 

298 

299 Parameters 

300 ---------- 

301 args : `types.SimpleNamespace` 

302 Parsed command-line arguments. See class documentation for the 

303 construction parameter of the same name. 

304 

305 Returns 

306 ------- 

307 butler : `lsst.daf.butler.Butler` 

308 A read-only butler initialized with the collections specified by 

309 ``args``. 

310 """ 

311 butler, inputs, _ = cls._makeReadParts(args) 

312 _LOG.debug("Preparing butler to read from %s.", inputs) 

313 return Butler(butler=butler, collections=inputs) 

314 

315 @classmethod 

316 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \ 

317 Tuple[Registry, CollectionSearch, Optional[str]]: 

318 """Return a read-only registry, a collection search path, and the name 

319 of the run to be used for future writes. 

320 

321 Parameters 

322 ---------- 

323 args : `types.SimpleNamespace` 

324 Parsed command-line arguments. See class documentation for the 

325 construction parameter of the same name. 

326 

327 Returns 

328 ------- 

329 registry : `lsst.daf.butler.Registry` 

330 Butler registry that collections will be added to and/or queried 

331 from. 

332 inputs : `lsst.daf.butler.registry.CollectionSearch` 

333 Collections to search for datasets. 

334 run : `str` or `None` 

335 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

336 if it already exists, or `None` if it does not. 

337 """ 

338 butler, inputs, self = cls._makeReadParts(args) 

339 run = self.outputRun.name if args.extend_run else None 

340 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

341 return butler.registry, inputs, run 

342 

343 @classmethod 

344 def makeWriteButler(cls, args: argparse.Namespace, 

345 taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

346 """Return a read-write butler initialized to write to and read from 

347 the collections specified by the given command-line arguments. 

348 

349 Parameters 

350 ---------- 

351 args : `types.SimpleNamespace` 

352 Parsed command-line arguments. See class documentation for the 

353 construction parameter of the same name. 

354 taskDefs : iterable of `TaskDef`, optional 

355 Definitions for tasks in a pipeline. This argument is only needed 

356 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

357 "unstore". 

358 

359 Returns 

360 ------- 

361 butler : `lsst.daf.butler.Butler` 

362 A read-write butler initialized according to the given arguments. 

363 """ 

364 butler = Butler(args.butler_config, writeable=True) 

365 self = cls(butler.registry, args, writeable=True) 

366 self.check(args) 

367 if self.output is not None: 

368 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

369 if args.replace_run: 

370 replaced = chainDefinition.pop(0) 

371 if args.prune_replaced == "unstore": 

372 # Remove datasets from datastore 

373 with butler.transaction(): 

374 refs = butler.registry.queryDatasets(..., collections=replaced) 

375 # we want to remove regular outputs but keep 

376 # initOutputs, configs, and versions. 

377 if taskDefs is not None: 

378 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

379 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

380 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

381 elif args.prune_replaced == "purge": 

382 # Erase entire collection and all datasets, need to remove 

383 # collection from its chain collection first. 

384 with butler.transaction(): 

385 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

386 butler.pruneCollection(replaced, purge=True, unstore=True) 

387 elif args.prune_replaced is not None: 

388 raise NotImplementedError( 

389 f"Unsupported --prune-replaced option '{args.prune_replaced}'." 

390 ) 

391 if not self.output.exists: 

392 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

393 if not args.extend_run: 

394 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

395 chainDefinition.insert(0, self.outputRun.name) 

396 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

397 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

398 self.outputRun.name, self.output.name, chainDefinition) 

399 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

400 else: 

401 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

402 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

403 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

404 return butler 

405 

406 output: Optional[_OutputChainedCollectionInfo] 

407 """Information about the output chained collection, if there is or will be 

408 one (`_OutputChainedCollectionInfo` or `None`). 

409 """ 

410 

411 outputRun: Optional[_OutputRunCollectionInfo] 

412 """Information about the output run collection, if there is or will be 

413 one (`_OutputRunCollectionInfo` or `None`). 

414 """ 

415 

416 inputs: Tuple[str, ...] 

417 """Input collections provided directly by the user (`tuple` [ `str` ]). 

418 """ 

419 

420 

421class _FilteredStream: 

422 """A file-like object that filters some config fields. 

423 

424 Note 

425 ---- 

426 This class depends on implementation details of ``Config.saveToStream`` 

427 methods, in particular that that method uses single call to write() 

428 method to save information about single config field, and that call 

429 combines comments string(s) for a field and field path and value. 

430 This class will not work reliably on the "import" strings, so imports 

431 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

432 """ 

433 def __init__(self, pattern): 

434 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

435 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

436 

437 if mat: 

438 pattern = mat.group(1) 

439 self._pattern = re.compile(fnmatch.translate(pattern)) 

440 else: 

441 if pattern != pattern.lower(): 

442 print(f"Matching \"{pattern}\" without regard to case " 

443 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

444 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

445 

446 def write(self, showStr): 

447 # Strip off doc string line(s) and cut off at "=" for string matching 

448 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

449 if self._pattern.search(matchStr): 

450 sys.stdout.write(showStr) 

451 

452# ------------------------ 

453# Exported definitions -- 

454# ------------------------ 

455 

456 

457class CmdLineFwk: 

458 """PipelineTask framework which executes tasks from command line. 

459 

460 In addition to executing tasks this activator provides additional methods 

461 for task management like dumping configuration or execution chain. 

462 """ 

463 

464 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

465 

466 def __init__(self): 

467 pass 

468 

469 def makePipeline(self, args): 

470 """Build a pipeline from command line arguments. 

471 

472 Parameters 

473 ---------- 

474 args : `types.SimpleNamespace` 

475 Parsed command line 

476 

477 Returns 

478 ------- 

479 pipeline : `~lsst.pipe.base.Pipeline` 

480 """ 

481 if args.pipeline: 

482 pipeline = Pipeline.from_uri(args.pipeline) 

483 else: 

484 pipeline = Pipeline("anonymous") 

485 

486 # loop over all pipeline actions and apply them in order 

487 for action in args.pipeline_actions: 

488 if action.action == "add_instrument": 

489 

490 pipeline.addInstrument(action.value) 

491 

492 elif action.action == "new_task": 

493 

494 pipeline.addTask(action.value, action.label) 

495 

496 elif action.action == "delete_task": 

497 

498 pipeline.removeTask(action.label) 

499 

500 elif action.action == "config": 

501 

502 # action value string is "field=value", split it at '=' 

503 field, _, value = action.value.partition("=") 

504 pipeline.addConfigOverride(action.label, field, value) 

505 

506 elif action.action == "configfile": 

507 

508 pipeline.addConfigFile(action.label, action.value) 

509 

510 else: 

511 

512 raise ValueError(f"Unexpected pipeline action: {action.action}") 

513 

514 if args.save_pipeline: 

515 pipeline.write_to_uri(args.save_pipeline) 

516 

517 if args.pipeline_dot: 

518 pipeline2dot(pipeline, args.pipeline_dot) 

519 

520 return pipeline 

521 

522 def makeGraph(self, pipeline, args): 

523 """Build a graph from command line arguments. 

524 

525 Parameters 

526 ---------- 

527 pipeline : `~lsst.pipe.base.Pipeline` 

528 Pipeline, can be empty or ``None`` if graph is read from a file. 

529 args : `types.SimpleNamespace` 

530 Parsed command line 

531 

532 Returns 

533 ------- 

534 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

535 If resulting graph is empty then `None` is returned. 

536 """ 

537 

538 # make sure that --extend-run always enables --skip-existing 

539 if args.extend_run: 

540 args.skip_existing = True 

541 

542 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

543 

544 if args.skip_existing and run: 

545 args.skip_existing_in += (run,) 

546 

547 if args.qgraph: 

548 # click passes empty tuple as default value for qgraph_node_id 

549 nodes = args.qgraph_node_id or None 

550 qgraph = QuantumGraph.loadUri(args.qgraph, registry.dimensions, 

551 nodes=nodes, graphID=args.qgraph_id) 

552 

553 # pipeline can not be provided in this case 

554 if pipeline: 

555 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

556 

557 else: 

558 

559 # make execution plan (a.k.a. DAG) for pipeline 

560 graphBuilder = GraphBuilder(registry, 

561 skipExistingIn=args.skip_existing_in, 

562 clobberOutputs=args.clobber_outputs) 

563 # accumulate metadata 

564 metadata = {"input": args.input, "output": args.output, "butler_argument": args.butler_config, 

565 "output_run": args.output_run, "extend_run": args.extend_run, 

566 "skip_existing_in": args.skip_existing_in, "skip_existing": args.skip_existing, 

567 "data_query": args.data_query, "user": getpass.getuser(), 

568 "time": f"{datetime.datetime.now()}"} 

569 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query, metadata=metadata) 

570 

571 # Count quanta in graph and give a warning if it's empty and return 

572 # None. 

573 nQuanta = len(qgraph) 

574 if nQuanta == 0: 

575 warnings.warn("QuantumGraph is empty", stacklevel=2) 

576 return None 

577 else: 

578 _LOG.info("QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

579 nQuanta, len(qgraph.taskGraph), qgraph.graphID) 

580 

581 if args.save_qgraph: 

582 qgraph.saveUri(args.save_qgraph) 

583 

584 if args.save_single_quanta: 

585 for quantumNode in qgraph: 

586 sqgraph = qgraph.subset(quantumNode) 

587 uri = args.save_single_quanta.format(quantumNode.nodeId.number) 

588 sqgraph.saveUri(uri) 

589 

590 if args.qgraph_dot: 

591 graph2dot(qgraph, args.qgraph_dot) 

592 

593 if args.execution_butler_location: 

594 butler = Butler(args.butler_config) 

595 newArgs = copy.deepcopy(args) 

596 

597 def builderShim(butler): 

598 newArgs.butler_config = butler._config 

599 # Calling makeWriteButler is done for the side effects of 

600 # calling that method, maining parsing all the args into 

601 # collection names, creating collections, etc. 

602 newButler = _ButlerFactory.makeWriteButler(newArgs) 

603 return newButler 

604 

605 buildExecutionButler(butler, qgraph, args.execution_butler_location, run, 

606 butlerModifier=builderShim, collections=args.input, 

607 clobber=args.clobber_execution_butler) 

608 

609 return qgraph 

610 

611 def runPipeline(self, graph, taskFactory, args, butler=None): 

612 """Execute complete QuantumGraph. 

613 

614 Parameters 

615 ---------- 

616 graph : `QuantumGraph` 

617 Execution graph. 

618 taskFactory : `~lsst.pipe.base.TaskFactory` 

619 Task factory 

620 args : `types.SimpleNamespace` 

621 Parsed command line 

622 butler : `~lsst.daf.butler.Butler`, optional 

623 Data Butler instance, if not defined then new instance is made 

624 using command line options. 

625 """ 

626 # make sure that --extend-run always enables --skip-existing 

627 if args.extend_run: 

628 args.skip_existing = True 

629 

630 # make butler instance 

631 if butler is None: 

632 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

633 

634 if args.skip_existing: 

635 args.skip_existing_in += (butler.run, ) 

636 

637 # Enable lsstDebug debugging. Note that this is done once in the 

638 # main process before PreExecInit and it is also repeated before 

639 # running each task in SingleQuantumExecutor (which may not be 

640 # needed if `multipocessing` always uses fork start method). 

641 if args.enableLsstDebug: 

642 try: 

643 _LOG.debug("Will try to import debug.py") 

644 import debug # noqa:F401 

645 except ImportError: 

646 _LOG.warn("No 'debug' module found.") 

647 

648 # Save all InitOutputs, configs, etc. 

649 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

650 preExecInit.initialize(graph, 

651 saveInitOutputs=not args.skip_init_writes, 

652 registerDatasetTypes=args.register_dataset_types, 

653 saveVersions=not args.no_versions) 

654 

655 if not args.init_only: 

656 graphFixup = self._importGraphFixup(args) 

657 quantumExecutor = SingleQuantumExecutor(taskFactory, 

658 skipExistingIn=args.skip_existing_in, 

659 clobberOutputs=args.clobber_outputs, 

660 enableLsstDebug=args.enableLsstDebug, 

661 exitOnKnownError=args.fail_fast) 

662 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

663 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout, 

664 startMethod=args.start_method, 

665 quantumExecutor=quantumExecutor, 

666 failFast=args.fail_fast, 

667 executionGraphFixup=graphFixup) 

668 with util.profile(args.profile, _LOG): 

669 executor.execute(graph, butler) 

670 

671 def showInfo(self, args, pipeline, graph=None): 

672 """Display useful info about pipeline and environment. 

673 

674 Parameters 

675 ---------- 

676 args : `types.SimpleNamespace` 

677 Parsed command line 

678 pipeline : `Pipeline` 

679 Pipeline definition 

680 graph : `QuantumGraph`, optional 

681 Execution graph 

682 """ 

683 showOpts = args.show 

684 for what in showOpts: 

685 showCommand, _, showArgs = what.partition("=") 

686 

687 if showCommand in ["pipeline", "config", "history", "tasks"]: 

688 if not pipeline: 

689 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

690 continue 

691 

692 if showCommand in ["graph", "workflow", "uri"]: 

693 if not graph: 

694 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

695 continue 

696 

697 if showCommand == "pipeline": 

698 print(pipeline) 

699 elif showCommand == "config": 

700 self._showConfig(pipeline, showArgs, False) 

701 elif showCommand == "dump-config": 

702 self._showConfig(pipeline, showArgs, True) 

703 elif showCommand == "history": 

704 self._showConfigHistory(pipeline, showArgs) 

705 elif showCommand == "tasks": 

706 self._showTaskHierarchy(pipeline) 

707 elif showCommand == "graph": 

708 if graph: 

709 self._showGraph(graph) 

710 elif showCommand == "uri": 

711 if graph: 

712 self._showUri(graph, args) 

713 elif showCommand == "workflow": 

714 if graph: 

715 self._showWorkflow(graph, args) 

716 else: 

717 print("Unknown value for show: %s (choose from '%s')" % 

718 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

719 file=sys.stderr) 

720 sys.exit(1) 

721 

722 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

723 """Show task configuration 

724 

725 Parameters 

726 ---------- 

727 pipeline : `Pipeline` 

728 Pipeline definition 

729 showArgs : `str` 

730 Defines what to show 

731 dumpFullConfig : `bool` 

732 If true then dump complete task configuration with all imports. 

733 """ 

734 stream = sys.stdout 

735 if dumpFullConfig: 

736 # Task label can be given with this option 

737 taskName = showArgs 

738 else: 

739 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

740 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

741 taskName = matConfig.group(1) 

742 pattern = matConfig.group(2) 

743 if pattern: 

744 stream = _FilteredStream(pattern) 

745 

746 tasks = util.filterTasks(pipeline, taskName) 

747 if not tasks: 

748 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

749 sys.exit(1) 

750 

751 for taskDef in tasks: 

752 print("### Configuration for task `{}'".format(taskDef.label)) 

753 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

754 

755 def _showConfigHistory(self, pipeline, showArgs): 

756 """Show history for task configuration 

757 

758 Parameters 

759 ---------- 

760 pipeline : `Pipeline` 

761 Pipeline definition 

762 showArgs : `str` 

763 Defines what to show 

764 """ 

765 

766 taskName = None 

767 pattern = None 

768 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

769 if matHistory: 

770 taskName = matHistory.group(1) 

771 pattern = matHistory.group(2) 

772 if not pattern: 

773 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

774 sys.exit(1) 

775 

776 tasks = util.filterTasks(pipeline, taskName) 

777 if not tasks: 

778 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

779 sys.exit(1) 

780 

781 found = False 

782 for taskDef in tasks: 

783 

784 config = taskDef.config 

785 

786 # Look for any matches in the config hierarchy for this name 

787 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

788 if nmatch > 0: 

789 print("") 

790 

791 cpath, _, cname = thisName.rpartition(".") 

792 try: 

793 if not cpath: 

794 # looking for top-level field 

795 hconfig = taskDef.config 

796 else: 

797 hconfig = eval("config." + cpath, {}, {"config": config}) 

798 except AttributeError: 

799 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

800 file=sys.stderr) 

801 hconfig = None 

802 

803 # Sometimes we end up with a non-Config so skip those 

804 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \ 

805 hasattr(hconfig, cname): 

806 print(f"### Configuration field for task `{taskDef.label}'") 

807 print(pexConfig.history.format(hconfig, cname)) 

808 found = True 

809 

810 if not found: 

811 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

812 sys.exit(1) 

813 

814 def _showTaskHierarchy(self, pipeline): 

815 """Print task hierarchy to stdout 

816 

817 Parameters 

818 ---------- 

819 pipeline: `Pipeline` 

820 """ 

821 for taskDef in pipeline.toExpandedPipeline(): 

822 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

823 

824 for configName, taskName in util.subTaskIter(taskDef.config): 

825 print("{}: {}".format(configName, taskName)) 

826 

827 def _showGraph(self, graph): 

828 """Print quanta information to stdout 

829 

830 Parameters 

831 ---------- 

832 graph : `QuantumGraph` 

833 Execution graph. 

834 """ 

835 for taskNode in graph.taskGraph: 

836 print(taskNode) 

837 

838 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

839 print(" Quantum {}:".format(iq)) 

840 print(" inputs:") 

841 for key, refs in quantum.inputs.items(): 

842 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

843 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

844 print(" outputs:") 

845 for key, refs in quantum.outputs.items(): 

846 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

847 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

848 

849 def _showWorkflow(self, graph, args): 

850 """Print quanta information and dependency to stdout 

851 

852 Parameters 

853 ---------- 

854 graph : `QuantumGraph` 

855 Execution graph. 

856 args : `types.SimpleNamespace` 

857 Parsed command line 

858 """ 

859 for node in graph: 

860 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

861 for parent in graph.determineInputsToQuantumNode(node): 

862 print(f"Parent Quantum {parent.nodeId.number} - Child Quantum {node.nodeId.number}") 

863 

864 def _showUri(self, graph, args): 

865 """Print input and predicted output URIs to stdout 

866 

867 Parameters 

868 ---------- 

869 graph : `QuantumGraph` 

870 Execution graph 

871 args : `types.SimpleNamespace` 

872 Parsed command line 

873 """ 

874 def dumpURIs(thisRef): 

875 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

876 if primary: 

877 print(f" {primary}") 

878 else: 

879 print(" (disassembled artifact)") 

880 for compName, compUri in components.items(): 

881 print(f" {compName}: {compUri}") 

882 

883 butler = _ButlerFactory.makeReadButler(args) 

884 for node in graph: 

885 print(f"Quantum {node.nodeId.number}: {node.taskDef.taskName}") 

886 print(" inputs:") 

887 for key, refs in node.quantum.inputs.items(): 

888 for ref in refs: 

889 dumpURIs(ref) 

890 print(" outputs:") 

891 for key, refs in node.quantum.outputs.items(): 

892 for ref in refs: 

893 dumpURIs(ref) 

894 

895 def _importGraphFixup(self, args): 

896 """Import/instantiate graph fixup object. 

897 

898 Parameters 

899 ---------- 

900 args : `types.SimpleNamespace` 

901 Parsed command line. 

902 

903 Returns 

904 ------- 

905 fixup : `ExecutionGraphFixup` or `None` 

906 

907 Raises 

908 ------ 

909 ValueError 

910 Raised if import fails, method call raises exception, or returned 

911 instance has unexpected type. 

912 """ 

913 if args.graph_fixup: 

914 try: 

915 factory = doImport(args.graph_fixup) 

916 except Exception as exc: 

917 raise ValueError("Failed to import graph fixup class/method") from exc 

918 try: 

919 fixup = factory() 

920 except Exception as exc: 

921 raise ValueError("Failed to make instance of graph fixup") from exc 

922 if not isinstance(fixup, ExecutionGraphFixup): 

923 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

924 return fixup