Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 13%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

403 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ['CmdLineFwk'] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import copy 

32import datetime 

33import fnmatch 

34import getpass 

35import logging 

36import re 

37import sys 

38from typing import Iterable, Optional, Tuple 

39import warnings 

40 

41# ----------------------------- 

42# Imports for other modules -- 

43# ----------------------------- 

44from lsst.daf.butler import ( 

45 Butler, 

46 CollectionSearch, 

47 CollectionType, 

48 Registry, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

51import lsst.pex.config as pexConfig 

52from lsst.pipe.base import (buildExecutionButler, GraphBuilder, Pipeline, 

53 PipelineDatasetTypes, QuantumGraph, TaskDef) 

54from lsst.obs.base import Instrument 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60from . import util 

61from lsst.utils import doImport 

62 

63# ---------------------------------- 

64# Local non-exported definitions -- 

65# ---------------------------------- 

66 

67_LOG = logging.getLogger(__name__) 

68 

69 

70class _OutputChainedCollectionInfo: 

71 """A helper class for handling command-line arguments related to an output 

72 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

73 

74 Parameters 

75 ---------- 

76 registry : `lsst.daf.butler.Registry` 

77 Butler registry that collections will be added to and/or queried from. 

78 name : `str` 

79 Name of the collection given on the command line. 

80 """ 

81 def __init__(self, registry: Registry, name: str): 

82 self.name = name 

83 try: 

84 self.chain = tuple(registry.getCollectionChain(name)) 

85 self.exists = True 

86 except MissingCollectionError: 

87 self.chain = () 

88 self.exists = False 

89 

90 def __str__(self): 

91 return self.name 

92 

93 name: str 

94 """Name of the collection provided on the command line (`str`). 

95 """ 

96 

97 exists: bool 

98 """Whether this collection already exists in the registry (`bool`). 

99 """ 

100 

101 chain: Tuple[str, ...] 

102 """The definition of the collection, if it already exists (`tuple`[`str`]). 

103 

104 Empty if the collection does not already exist. 

105 """ 

106 

107 

108class _OutputRunCollectionInfo: 

109 """A helper class for handling command-line arguments related to an output 

110 `~lsst.daf.butler.CollectionType.RUN` collection. 

111 

112 Parameters 

113 ---------- 

114 registry : `lsst.daf.butler.Registry` 

115 Butler registry that collections will be added to and/or queried from. 

116 name : `str` 

117 Name of the collection given on the command line. 

118 """ 

119 def __init__(self, registry: Registry, name: str): 

120 self.name = name 

121 try: 

122 actualType = registry.getCollectionType(name) 

123 if actualType is not CollectionType.RUN: 

124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

125 self.exists = True 

126 except MissingCollectionError: 

127 self.exists = False 

128 

129 name: str 

130 """Name of the collection provided on the command line (`str`). 

131 """ 

132 

133 exists: bool 

134 """Whether this collection already exists in the registry (`bool`). 

135 """ 

136 

137 

138class _ButlerFactory: 

139 """A helper class for processing command-line arguments related to input 

140 and output collections. 

141 

142 Parameters 

143 ---------- 

144 registry : `lsst.daf.butler.Registry` 

145 Butler registry that collections will be added to and/or queried from. 

146 

147 args : `types.SimpleNamespace` 

148 Parsed command-line arguments. The following attributes are used, 

149 either at construction or in later methods. 

150 

151 ``output`` 

152 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

153 input/output collection. 

154 

155 ``output_run`` 

156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

157 collection. 

158 

159 ``extend_run`` 

160 A boolean indicating whether ``output_run`` should already exist 

161 and be extended. 

162 

163 ``replace_run`` 

164 A boolean indicating that (if `True`) ``output_run`` should already 

165 exist but will be removed from the output chained collection and 

166 replaced with a new one. 

167 

168 ``prune_replaced`` 

169 A boolean indicating whether to prune the replaced run (requires 

170 ``replace_run``). 

171 

172 ``inputs`` 

173 Input collections of any type; may be any type handled by 

174 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

175 

176 ``butler_config`` 

177 Path to a data repository root or configuration file. 

178 

179 writeable : `bool` 

180 If `True`, a `Butler` is being initialized in a context where actual 

181 writes should happens, and hence no output run is necessary. 

182 

183 Raises 

184 ------ 

185 ValueError 

186 Raised if ``writeable is True`` but there are no output collections. 

187 """ 

188 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

189 if args.output is not None: 

190 self.output = _OutputChainedCollectionInfo(registry, args.output) 

191 else: 

192 self.output = None 

193 if args.output_run is not None: 

194 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

195 elif self.output is not None: 

196 if args.extend_run: 

197 if not self.output.chain: 

198 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

199 runName = self.output.chain[0] 

200 else: 

201 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

202 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

203 elif not writeable: 

204 # If we're not writing yet, ok to have no output run. 

205 self.outputRun = None 

206 else: 

207 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

208 # Recursively flatten any input CHAINED collections. We do this up 

209 # front so we can tell if the user passes the same inputs on subsequent 

210 # calls, even though we also flatten when we define the output CHAINED 

211 # collection. 

212 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

213 

214 def check(self, args: argparse.Namespace): 

215 """Check command-line options for consistency with each other and the 

216 data repository. 

217 

218 Parameters 

219 ---------- 

220 args : `types.SimpleNamespace` 

221 Parsed command-line arguments. See class documentation for the 

222 construction parameter of the same name. 

223 """ 

224 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

225 if self.inputs and self.output is not None and self.output.exists: 

226 # Passing the same inputs that were used to initialize the output 

227 # collection is allowed; this means they must _end_ with the same 

228 # collections, because we push new runs to the front of the chain. 

229 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

230 if c1 != c2: 

231 raise ValueError( 

232 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

233 "a different sequence of input collections than those given: " 

234 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

235 f"{self.output.name}={self.output.chain}." 

236 ) 

237 if len(self.inputs) > len(self.output.chain): 

238 nNew = len(self.inputs) - len(self.output.chain) 

239 raise ValueError( 

240 f"Cannot add new input collections {self.inputs[:nNew]} after " 

241 "output collection is first created." 

242 ) 

243 if args.extend_run and self.outputRun is None: 

244 raise ValueError("Cannot --extend-run when no output collection is given.") 

245 if args.extend_run and not self.outputRun.exists: 

246 raise ValueError(f"Cannot --extend-run; output collection " 

247 f"'{self.outputRun.name}' does not exist.") 

248 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

249 raise ValueError(f"Output run '{self.outputRun.name}' already exists, but " 

250 f"--extend-run was not given.") 

251 if args.prune_replaced and not args.replace_run: 

252 raise ValueError("--prune-replaced requires --replace-run.") 

253 if args.replace_run and (self.output is None or not self.output.exists): 

254 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

255 

256 @classmethod 

257 def _makeReadParts(cls, args: argparse.Namespace): 

258 """Common implementation for `makeReadButler` and 

259 `makeRegistryAndCollections`. 

260 

261 Parameters 

262 ---------- 

263 args : `types.SimpleNamespace` 

264 Parsed command-line arguments. See class documentation for the 

265 construction parameter of the same name. 

266 

267 Returns 

268 ------- 

269 butler : `lsst.daf.butler.Butler` 

270 A read-only butler constructed from the repo at 

271 ``args.butler_config``, but with no default collections. 

272 inputs : `lsst.daf.butler.registry.CollectionSearch` 

273 A collection search path constructed according to ``args``. 

274 self : `_ButlerFactory` 

275 A new `_ButlerFactory` instance representing the processed version 

276 of ``args``. 

277 """ 

278 butler = Butler(args.butler_config, writeable=False) 

279 self = cls(butler.registry, args, writeable=False) 

280 self.check(args) 

281 if self.output and self.output.exists: 

282 if args.replace_run: 

283 replaced = self.output.chain[0] 

284 inputs = self.output.chain[1:] 

285 _LOG.debug("Simulating collection search in '%s' after removing '%s'.", 

286 self.output.name, replaced) 

287 else: 

288 inputs = [self.output.name] 

289 else: 

290 inputs = list(self.inputs) 

291 if args.extend_run: 

292 inputs.insert(0, self.outputRun.name) 

293 inputs = CollectionSearch.fromExpression(inputs) 

294 return butler, inputs, self 

295 

296 @classmethod 

297 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

298 """Construct a read-only butler according to the given command-line 

299 arguments. 

300 

301 Parameters 

302 ---------- 

303 args : `types.SimpleNamespace` 

304 Parsed command-line arguments. See class documentation for the 

305 construction parameter of the same name. 

306 

307 Returns 

308 ------- 

309 butler : `lsst.daf.butler.Butler` 

310 A read-only butler initialized with the collections specified by 

311 ``args``. 

312 """ 

313 butler, inputs, _ = cls._makeReadParts(args) 

314 _LOG.debug("Preparing butler to read from %s.", inputs) 

315 return Butler(butler=butler, collections=inputs) 

316 

317 @classmethod 

318 def makeRegistryAndCollections(cls, args: argparse.Namespace) -> \ 

319 Tuple[Registry, CollectionSearch, Optional[str]]: 

320 """Return a read-only registry, a collection search path, and the name 

321 of the run to be used for future writes. 

322 

323 Parameters 

324 ---------- 

325 args : `types.SimpleNamespace` 

326 Parsed command-line arguments. See class documentation for the 

327 construction parameter of the same name. 

328 

329 Returns 

330 ------- 

331 registry : `lsst.daf.butler.Registry` 

332 Butler registry that collections will be added to and/or queried 

333 from. 

334 inputs : `lsst.daf.butler.registry.CollectionSearch` 

335 Collections to search for datasets. 

336 run : `str` or `None` 

337 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

338 if it already exists, or `None` if it does not. 

339 """ 

340 butler, inputs, self = cls._makeReadParts(args) 

341 run = self.outputRun.name if args.extend_run else None 

342 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

343 return butler.registry, inputs, run 

344 

345 @classmethod 

346 def makeWriteButler(cls, args: argparse.Namespace, 

347 taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

348 """Return a read-write butler initialized to write to and read from 

349 the collections specified by the given command-line arguments. 

350 

351 Parameters 

352 ---------- 

353 args : `types.SimpleNamespace` 

354 Parsed command-line arguments. See class documentation for the 

355 construction parameter of the same name. 

356 taskDefs : iterable of `TaskDef`, optional 

357 Definitions for tasks in a pipeline. This argument is only needed 

358 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

359 "unstore". 

360 

361 Returns 

362 ------- 

363 butler : `lsst.daf.butler.Butler` 

364 A read-write butler initialized according to the given arguments. 

365 """ 

366 butler = Butler(args.butler_config, writeable=True) 

367 self = cls(butler.registry, args, writeable=True) 

368 self.check(args) 

369 if self.output is not None: 

370 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

371 if args.replace_run: 

372 replaced = chainDefinition.pop(0) 

373 if args.prune_replaced == "unstore": 

374 # Remove datasets from datastore 

375 with butler.transaction(): 

376 refs = butler.registry.queryDatasets(..., collections=replaced) 

377 # we want to remove regular outputs but keep 

378 # initOutputs, configs, and versions. 

379 if taskDefs is not None: 

380 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

381 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

382 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

383 elif args.prune_replaced == "purge": 

384 # Erase entire collection and all datasets, need to remove 

385 # collection from its chain collection first. 

386 with butler.transaction(): 

387 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

388 butler.pruneCollection(replaced, purge=True, unstore=True) 

389 elif args.prune_replaced is not None: 

390 raise NotImplementedError( 

391 f"Unsupported --prune-replaced option '{args.prune_replaced}'." 

392 ) 

393 if not self.output.exists: 

394 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

395 if not args.extend_run: 

396 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

397 chainDefinition.insert(0, self.outputRun.name) 

398 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

399 _LOG.debug("Preparing butler to write to '%s' and read from '%s'=%s", 

400 self.outputRun.name, self.output.name, chainDefinition) 

401 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

402 else: 

403 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

404 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

405 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

406 return butler 

407 

408 output: Optional[_OutputChainedCollectionInfo] 

409 """Information about the output chained collection, if there is or will be 

410 one (`_OutputChainedCollectionInfo` or `None`). 

411 """ 

412 

413 outputRun: Optional[_OutputRunCollectionInfo] 

414 """Information about the output run collection, if there is or will be 

415 one (`_OutputRunCollectionInfo` or `None`). 

416 """ 

417 

418 inputs: Tuple[str, ...] 

419 """Input collections provided directly by the user (`tuple` [ `str` ]). 

420 """ 

421 

422 

423class _FilteredStream: 

424 """A file-like object that filters some config fields. 

425 

426 Note 

427 ---- 

428 This class depends on implementation details of ``Config.saveToStream`` 

429 methods, in particular that that method uses single call to write() 

430 method to save information about single config field, and that call 

431 combines comments string(s) for a field and field path and value. 

432 This class will not work reliably on the "import" strings, so imports 

433 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

434 """ 

435 def __init__(self, pattern): 

436 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

437 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

438 

439 if mat: 

440 pattern = mat.group(1) 

441 self._pattern = re.compile(fnmatch.translate(pattern)) 

442 else: 

443 if pattern != pattern.lower(): 

444 print(f"Matching \"{pattern}\" without regard to case " 

445 "(append :NOIGNORECASE to prevent this)", file=sys.stdout) 

446 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

447 

448 def write(self, showStr): 

449 # Strip off doc string line(s) and cut off at "=" for string matching 

450 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

451 if self._pattern.search(matchStr): 

452 sys.stdout.write(showStr) 

453 

454# ------------------------ 

455# Exported definitions -- 

456# ------------------------ 

457 

458 

459class CmdLineFwk: 

460 """PipelineTask framework which executes tasks from command line. 

461 

462 In addition to executing tasks this activator provides additional methods 

463 for task management like dumping configuration or execution chain. 

464 """ 

465 

466 MP_TIMEOUT = 9999 # Default timeout (sec) for multiprocessing 

467 

468 def __init__(self): 

469 pass 

470 

471 def makePipeline(self, args): 

472 """Build a pipeline from command line arguments. 

473 

474 Parameters 

475 ---------- 

476 args : `types.SimpleNamespace` 

477 Parsed command line 

478 

479 Returns 

480 ------- 

481 pipeline : `~lsst.pipe.base.Pipeline` 

482 """ 

483 if args.pipeline: 

484 pipeline = Pipeline.from_uri(args.pipeline) 

485 else: 

486 pipeline = Pipeline("anonymous") 

487 

488 # loop over all pipeline actions and apply them in order 

489 for action in args.pipeline_actions: 

490 if action.action == "add_instrument": 

491 

492 pipeline.addInstrument(action.value) 

493 

494 elif action.action == "new_task": 

495 

496 pipeline.addTask(action.value, action.label) 

497 

498 elif action.action == "delete_task": 

499 

500 pipeline.removeTask(action.label) 

501 

502 elif action.action == "config": 

503 

504 # action value string is "field=value", split it at '=' 

505 field, _, value = action.value.partition("=") 

506 pipeline.addConfigOverride(action.label, field, value) 

507 

508 elif action.action == "configfile": 

509 

510 pipeline.addConfigFile(action.label, action.value) 

511 

512 else: 

513 

514 raise ValueError(f"Unexpected pipeline action: {action.action}") 

515 

516 if args.save_pipeline: 

517 pipeline.write_to_uri(args.save_pipeline) 

518 

519 if args.pipeline_dot: 

520 pipeline2dot(pipeline, args.pipeline_dot) 

521 

522 return pipeline 

523 

524 def makeGraph(self, pipeline, args): 

525 """Build a graph from command line arguments. 

526 

527 Parameters 

528 ---------- 

529 pipeline : `~lsst.pipe.base.Pipeline` 

530 Pipeline, can be empty or ``None`` if graph is read from a file. 

531 args : `types.SimpleNamespace` 

532 Parsed command line 

533 

534 Returns 

535 ------- 

536 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

537 If resulting graph is empty then `None` is returned. 

538 """ 

539 

540 # make sure that --extend-run always enables --skip-existing 

541 if args.extend_run: 

542 args.skip_existing = True 

543 

544 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

545 

546 if args.skip_existing and run: 

547 args.skip_existing_in += (run,) 

548 

549 if args.qgraph: 

550 # click passes empty tuple as default value for qgraph_node_id 

551 nodes = args.qgraph_node_id or None 

552 qgraph = QuantumGraph.loadUri(args.qgraph, registry.dimensions, 

553 nodes=nodes, graphID=args.qgraph_id) 

554 

555 # pipeline can not be provided in this case 

556 if pipeline: 

557 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

558 if args.show_qgraph_header: 

559 print(QuantumGraph.readHeader(args.qgraph)) 

560 else: 

561 # make execution plan (a.k.a. DAG) for pipeline 

562 graphBuilder = GraphBuilder(registry, 

563 skipExistingIn=args.skip_existing_in, 

564 clobberOutputs=args.clobber_outputs) 

565 # accumulate metadata 

566 metadata = {"input": args.input, "output": args.output, "butler_argument": args.butler_config, 

567 "output_run": args.output_run, "extend_run": args.extend_run, 

568 "skip_existing_in": args.skip_existing_in, "skip_existing": args.skip_existing, 

569 "data_query": args.data_query, "user": getpass.getuser(), 

570 "time": f"{datetime.datetime.now()}"} 

571 qgraph = graphBuilder.makeGraph(pipeline, collections, run, args.data_query, metadata=metadata, 

572 datasetQueryConstraint=args.dataset_query_constraint) 

573 if args.show_qgraph_header: 

574 print(qgraph.buildAndPrintHeader()) 

575 

576 # Count quanta in graph and give a warning if it's empty and return 

577 # None. 

578 nQuanta = len(qgraph) 

579 if nQuanta == 0: 

580 warnings.warn("QuantumGraph is empty", stacklevel=2) 

581 return None 

582 else: 

583 _LOG.info("QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

584 nQuanta, len(qgraph.taskGraph), qgraph.graphID) 

585 

586 if args.save_qgraph: 

587 qgraph.saveUri(args.save_qgraph) 

588 

589 if args.save_single_quanta: 

590 for quantumNode in qgraph: 

591 sqgraph = qgraph.subset(quantumNode) 

592 uri = args.save_single_quanta.format(quantumNode) 

593 sqgraph.saveUri(uri) 

594 

595 if args.qgraph_dot: 

596 graph2dot(qgraph, args.qgraph_dot) 

597 

598 if args.execution_butler_location: 

599 butler = Butler(args.butler_config) 

600 newArgs = copy.deepcopy(args) 

601 

602 def builderShim(butler): 

603 newArgs.butler_config = butler._config 

604 # Calling makeWriteButler is done for the side effects of 

605 # calling that method, maining parsing all the args into 

606 # collection names, creating collections, etc. 

607 newButler = _ButlerFactory.makeWriteButler(newArgs) 

608 return newButler 

609 

610 # Include output collection in collections for input 

611 # files if it exists in the repo. 

612 all_inputs = args.input 

613 if args.output is not None: 

614 try: 

615 all_inputs += (next(iter(butler.registry.queryCollections(args.output))), ) 

616 except MissingCollectionError: 

617 pass 

618 

619 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

620 buildExecutionButler(butler, qgraph, args.execution_butler_location, run, 

621 butlerModifier=builderShim, collections=all_inputs, 

622 clobber=args.clobber_execution_butler) 

623 

624 return qgraph 

625 

626 def runPipeline(self, graph, taskFactory, args, butler=None): 

627 """Execute complete QuantumGraph. 

628 

629 Parameters 

630 ---------- 

631 graph : `QuantumGraph` 

632 Execution graph. 

633 taskFactory : `~lsst.pipe.base.TaskFactory` 

634 Task factory 

635 args : `types.SimpleNamespace` 

636 Parsed command line 

637 butler : `~lsst.daf.butler.Butler`, optional 

638 Data Butler instance, if not defined then new instance is made 

639 using command line options. 

640 """ 

641 # make sure that --extend-run always enables --skip-existing 

642 if args.extend_run: 

643 args.skip_existing = True 

644 

645 # make butler instance 

646 if butler is None: 

647 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

648 

649 if args.skip_existing: 

650 args.skip_existing_in += (butler.run, ) 

651 

652 # Enable lsstDebug debugging. Note that this is done once in the 

653 # main process before PreExecInit and it is also repeated before 

654 # running each task in SingleQuantumExecutor (which may not be 

655 # needed if `multipocessing` always uses fork start method). 

656 if args.enableLsstDebug: 

657 try: 

658 _LOG.debug("Will try to import debug.py") 

659 import debug # noqa:F401 

660 except ImportError: 

661 _LOG.warn("No 'debug' module found.") 

662 

663 # Save all InitOutputs, configs, etc. 

664 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

665 preExecInit.initialize(graph, 

666 saveInitOutputs=not args.skip_init_writes, 

667 registerDatasetTypes=args.register_dataset_types, 

668 saveVersions=not args.no_versions) 

669 

670 if not args.init_only: 

671 graphFixup = self._importGraphFixup(args) 

672 quantumExecutor = SingleQuantumExecutor(taskFactory, 

673 skipExistingIn=args.skip_existing_in, 

674 clobberOutputs=args.clobber_outputs, 

675 enableLsstDebug=args.enableLsstDebug, 

676 exitOnKnownError=args.fail_fast) 

677 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

678 executor = MPGraphExecutor(numProc=args.processes, timeout=timeout, 

679 startMethod=args.start_method, 

680 quantumExecutor=quantumExecutor, 

681 failFast=args.fail_fast, 

682 executionGraphFixup=graphFixup) 

683 with util.profile(args.profile, _LOG): 

684 executor.execute(graph, butler) 

685 

686 def showInfo(self, args, pipeline, graph=None): 

687 """Display useful info about pipeline and environment. 

688 

689 Parameters 

690 ---------- 

691 args : `types.SimpleNamespace` 

692 Parsed command line 

693 pipeline : `Pipeline` 

694 Pipeline definition 

695 graph : `QuantumGraph`, optional 

696 Execution graph 

697 """ 

698 showOpts = args.show 

699 for what in showOpts: 

700 showCommand, _, showArgs = what.partition("=") 

701 

702 if showCommand in ["pipeline", "config", "history", "tasks"]: 

703 if not pipeline: 

704 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

705 continue 

706 

707 if showCommand in ["graph", "workflow", "uri"]: 

708 if not graph: 

709 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

710 continue 

711 

712 if showCommand == "pipeline": 

713 print(pipeline) 

714 elif showCommand == "config": 

715 self._showConfig(pipeline, showArgs, False) 

716 elif showCommand == "dump-config": 

717 self._showConfig(pipeline, showArgs, True) 

718 elif showCommand == "history": 

719 self._showConfigHistory(pipeline, showArgs) 

720 elif showCommand == "tasks": 

721 self._showTaskHierarchy(pipeline) 

722 elif showCommand == "graph": 

723 if graph: 

724 self._showGraph(graph) 

725 elif showCommand == "uri": 

726 if graph: 

727 self._showUri(graph, args) 

728 elif showCommand == "workflow": 

729 if graph: 

730 self._showWorkflow(graph, args) 

731 else: 

732 print("Unknown value for show: %s (choose from '%s')" % 

733 (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

734 file=sys.stderr) 

735 sys.exit(1) 

736 

737 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

738 """Show task configuration 

739 

740 Parameters 

741 ---------- 

742 pipeline : `Pipeline` 

743 Pipeline definition 

744 showArgs : `str` 

745 Defines what to show 

746 dumpFullConfig : `bool` 

747 If true then dump complete task configuration with all imports. 

748 """ 

749 stream = sys.stdout 

750 if dumpFullConfig: 

751 # Task label can be given with this option 

752 taskName = showArgs 

753 else: 

754 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

755 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

756 taskName = matConfig.group(1) 

757 pattern = matConfig.group(2) 

758 if pattern: 

759 stream = _FilteredStream(pattern) 

760 

761 tasks = util.filterTasks(pipeline, taskName) 

762 if not tasks: 

763 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

764 sys.exit(1) 

765 

766 for taskDef in tasks: 

767 print("### Configuration for task `{}'".format(taskDef.label)) 

768 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

769 

770 def _showConfigHistory(self, pipeline, showArgs): 

771 """Show history for task configuration 

772 

773 Parameters 

774 ---------- 

775 pipeline : `Pipeline` 

776 Pipeline definition 

777 showArgs : `str` 

778 Defines what to show 

779 """ 

780 

781 taskName = None 

782 pattern = None 

783 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

784 if matHistory: 

785 taskName = matHistory.group(1) 

786 pattern = matHistory.group(2) 

787 if not pattern: 

788 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

789 sys.exit(1) 

790 

791 tasks = util.filterTasks(pipeline, taskName) 

792 if not tasks: 

793 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

794 sys.exit(1) 

795 

796 found = False 

797 for taskDef in tasks: 

798 

799 config = taskDef.config 

800 

801 # Look for any matches in the config hierarchy for this name 

802 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

803 if nmatch > 0: 

804 print("") 

805 

806 cpath, _, cname = thisName.rpartition(".") 

807 try: 

808 if not cpath: 

809 # looking for top-level field 

810 hconfig = taskDef.config 

811 else: 

812 hconfig = eval("config." + cpath, {}, {"config": config}) 

813 except AttributeError: 

814 print(f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

815 file=sys.stderr) 

816 hconfig = None 

817 

818 # Sometimes we end up with a non-Config so skip those 

819 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and \ 

820 hasattr(hconfig, cname): 

821 print(f"### Configuration field for task `{taskDef.label}'") 

822 print(pexConfig.history.format(hconfig, cname)) 

823 found = True 

824 

825 if not found: 

826 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

827 sys.exit(1) 

828 

829 def _showTaskHierarchy(self, pipeline): 

830 """Print task hierarchy to stdout 

831 

832 Parameters 

833 ---------- 

834 pipeline: `Pipeline` 

835 """ 

836 for taskDef in pipeline.toExpandedPipeline(): 

837 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

838 

839 for configName, taskName in util.subTaskIter(taskDef.config): 

840 print("{}: {}".format(configName, taskName)) 

841 

842 def _showGraph(self, graph): 

843 """Print quanta information to stdout 

844 

845 Parameters 

846 ---------- 

847 graph : `QuantumGraph` 

848 Execution graph. 

849 """ 

850 for taskNode in graph.taskGraph: 

851 print(taskNode) 

852 

853 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

854 print(" Quantum {}:".format(iq)) 

855 print(" inputs:") 

856 for key, refs in quantum.inputs.items(): 

857 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

858 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

859 print(" outputs:") 

860 for key, refs in quantum.outputs.items(): 

861 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

862 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

863 

864 def _showWorkflow(self, graph, args): 

865 """Print quanta information and dependency to stdout 

866 

867 Parameters 

868 ---------- 

869 graph : `QuantumGraph` 

870 Execution graph. 

871 args : `types.SimpleNamespace` 

872 Parsed command line 

873 """ 

874 for node in graph: 

875 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

876 for parent in graph.determineInputsToQuantumNode(node): 

877 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}") 

878 

879 def _showUri(self, graph, args): 

880 """Print input and predicted output URIs to stdout 

881 

882 Parameters 

883 ---------- 

884 graph : `QuantumGraph` 

885 Execution graph 

886 args : `types.SimpleNamespace` 

887 Parsed command line 

888 """ 

889 def dumpURIs(thisRef): 

890 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

891 if primary: 

892 print(f" {primary}") 

893 else: 

894 print(" (disassembled artifact)") 

895 for compName, compUri in components.items(): 

896 print(f" {compName}: {compUri}") 

897 

898 butler = _ButlerFactory.makeReadButler(args) 

899 for node in graph: 

900 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

901 print(" inputs:") 

902 for key, refs in node.quantum.inputs.items(): 

903 for ref in refs: 

904 dumpURIs(ref) 

905 print(" outputs:") 

906 for key, refs in node.quantum.outputs.items(): 

907 for ref in refs: 

908 dumpURIs(ref) 

909 

910 def _importGraphFixup(self, args): 

911 """Import/instantiate graph fixup object. 

912 

913 Parameters 

914 ---------- 

915 args : `types.SimpleNamespace` 

916 Parsed command line. 

917 

918 Returns 

919 ------- 

920 fixup : `ExecutionGraphFixup` or `None` 

921 

922 Raises 

923 ------ 

924 ValueError 

925 Raised if import fails, method call raises exception, or returned 

926 instance has unexpected type. 

927 """ 

928 if args.graph_fixup: 

929 try: 

930 factory = doImport(args.graph_fixup) 

931 except Exception as exc: 

932 raise ValueError("Failed to import graph fixup class/method") from exc 

933 try: 

934 fixup = factory() 

935 except Exception as exc: 

936 raise ValueError("Failed to make instance of graph fixup") from exc 

937 if not isinstance(fixup, ExecutionGraphFixup): 

938 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

939 return fixup