Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 13%

414 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-06-01 12:18 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ["CmdLineFwk"] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import copy 

32import datetime 

33import fnmatch 

34import getpass 

35import logging 

36import re 

37import sys 

38import warnings 

39from typing import Iterable, Optional, Tuple 

40 

41import lsst.pex.config as pexConfig 

42import lsst.pex.config.history as pexConfigHistory 

43 

44# ----------------------------- 

45# Imports for other modules -- 

46# ----------------------------- 

47from lsst.daf.butler import Butler, CollectionSearch, CollectionType, Registry 

48from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

49from lsst.pipe.base import ( 

50 GraphBuilder, 

51 Instrument, 

52 Pipeline, 

53 PipelineDatasetTypes, 

54 QuantumGraph, 

55 TaskDef, 

56 buildExecutionButler, 

57) 

58from lsst.utils import doImport 

59 

60from . import util 

61from .dotTools import graph2dot, pipeline2dot 

62from .executionGraphFixup import ExecutionGraphFixup 

63from .mpGraphExecutor import MPGraphExecutor 

64from .preExecInit import PreExecInit 

65from .singleQuantumExecutor import SingleQuantumExecutor 

66 

67# ---------------------------------- 

68# Local non-exported definitions -- 

69# ---------------------------------- 

70 

71_LOG = logging.getLogger(__name__) 

72 

73 

74class _OutputChainedCollectionInfo: 

75 """A helper class for handling command-line arguments related to an output 

76 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

77 

78 Parameters 

79 ---------- 

80 registry : `lsst.daf.butler.Registry` 

81 Butler registry that collections will be added to and/or queried from. 

82 name : `str` 

83 Name of the collection given on the command line. 

84 """ 

85 

86 def __init__(self, registry: Registry, name: str): 

87 self.name = name 

88 try: 

89 self.chain = tuple(registry.getCollectionChain(name)) 

90 self.exists = True 

91 except MissingCollectionError: 

92 self.chain = () 

93 self.exists = False 

94 

95 def __str__(self): 

96 return self.name 

97 

98 name: str 

99 """Name of the collection provided on the command line (`str`). 

100 """ 

101 

102 exists: bool 

103 """Whether this collection already exists in the registry (`bool`). 

104 """ 

105 

106 chain: Tuple[str, ...] 

107 """The definition of the collection, if it already exists (`tuple`[`str`]). 

108 

109 Empty if the collection does not already exist. 

110 """ 

111 

112 

113class _OutputRunCollectionInfo: 

114 """A helper class for handling command-line arguments related to an output 

115 `~lsst.daf.butler.CollectionType.RUN` collection. 

116 

117 Parameters 

118 ---------- 

119 registry : `lsst.daf.butler.Registry` 

120 Butler registry that collections will be added to and/or queried from. 

121 name : `str` 

122 Name of the collection given on the command line. 

123 """ 

124 

125 def __init__(self, registry: Registry, name: str): 

126 self.name = name 

127 try: 

128 actualType = registry.getCollectionType(name) 

129 if actualType is not CollectionType.RUN: 

130 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

131 self.exists = True 

132 except MissingCollectionError: 

133 self.exists = False 

134 

135 name: str 

136 """Name of the collection provided on the command line (`str`). 

137 """ 

138 

139 exists: bool 

140 """Whether this collection already exists in the registry (`bool`). 

141 """ 

142 

143 

144class _ButlerFactory: 

145 """A helper class for processing command-line arguments related to input 

146 and output collections. 

147 

148 Parameters 

149 ---------- 

150 registry : `lsst.daf.butler.Registry` 

151 Butler registry that collections will be added to and/or queried from. 

152 

153 args : `types.SimpleNamespace` 

154 Parsed command-line arguments. The following attributes are used, 

155 either at construction or in later methods. 

156 

157 ``output`` 

158 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

159 input/output collection. 

160 

161 ``output_run`` 

162 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

163 collection. 

164 

165 ``extend_run`` 

166 A boolean indicating whether ``output_run`` should already exist 

167 and be extended. 

168 

169 ``replace_run`` 

170 A boolean indicating that (if `True`) ``output_run`` should already 

171 exist but will be removed from the output chained collection and 

172 replaced with a new one. 

173 

174 ``prune_replaced`` 

175 A boolean indicating whether to prune the replaced run (requires 

176 ``replace_run``). 

177 

178 ``inputs`` 

179 Input collections of any type; may be any type handled by 

180 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

181 

182 ``butler_config`` 

183 Path to a data repository root or configuration file. 

184 

185 writeable : `bool` 

186 If `True`, a `Butler` is being initialized in a context where actual 

187 writes should happens, and hence no output run is necessary. 

188 

189 Raises 

190 ------ 

191 ValueError 

192 Raised if ``writeable is True`` but there are no output collections. 

193 """ 

194 

195 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

196 if args.output is not None: 

197 self.output = _OutputChainedCollectionInfo(registry, args.output) 

198 else: 

199 self.output = None 

200 if args.output_run is not None: 

201 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

202 elif self.output is not None: 

203 if args.extend_run: 

204 if not self.output.chain: 

205 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

206 runName = self.output.chain[0] 

207 else: 

208 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

209 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

210 elif not writeable: 

211 # If we're not writing yet, ok to have no output run. 

212 self.outputRun = None 

213 else: 

214 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

215 # Recursively flatten any input CHAINED collections. We do this up 

216 # front so we can tell if the user passes the same inputs on subsequent 

217 # calls, even though we also flatten when we define the output CHAINED 

218 # collection. 

219 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

220 

221 def check(self, args: argparse.Namespace): 

222 """Check command-line options for consistency with each other and the 

223 data repository. 

224 

225 Parameters 

226 ---------- 

227 args : `types.SimpleNamespace` 

228 Parsed command-line arguments. See class documentation for the 

229 construction parameter of the same name. 

230 """ 

231 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

232 if self.inputs and self.output is not None and self.output.exists: 

233 # Passing the same inputs that were used to initialize the output 

234 # collection is allowed; this means they must _end_ with the same 

235 # collections, because we push new runs to the front of the chain. 

236 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

237 if c1 != c2: 

238 raise ValueError( 

239 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

240 "a different sequence of input collections than those given: " 

241 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

242 f"{self.output.name}={self.output.chain}." 

243 ) 

244 if len(self.inputs) > len(self.output.chain): 

245 nNew = len(self.inputs) - len(self.output.chain) 

246 raise ValueError( 

247 f"Cannot add new input collections {self.inputs[:nNew]} after " 

248 "output collection is first created." 

249 ) 

250 if args.extend_run and self.outputRun is None: 

251 raise ValueError("Cannot --extend-run when no output collection is given.") 

252 if args.extend_run and not self.outputRun.exists: 

253 raise ValueError( 

254 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

255 ) 

256 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

257 raise ValueError( 

258 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

259 ) 

260 if args.prune_replaced and not args.replace_run: 

261 raise ValueError("--prune-replaced requires --replace-run.") 

262 if args.replace_run and (self.output is None or not self.output.exists): 

263 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

264 

265 @classmethod 

266 def _makeReadParts(cls, args: argparse.Namespace): 

267 """Common implementation for `makeReadButler` and 

268 `makeButlerAndCollections`. 

269 

270 Parameters 

271 ---------- 

272 args : `types.SimpleNamespace` 

273 Parsed command-line arguments. See class documentation for the 

274 construction parameter of the same name. 

275 

276 Returns 

277 ------- 

278 butler : `lsst.daf.butler.Butler` 

279 A read-only butler constructed from the repo at 

280 ``args.butler_config``, but with no default collections. 

281 inputs : `lsst.daf.butler.registry.CollectionSearch` 

282 A collection search path constructed according to ``args``. 

283 self : `_ButlerFactory` 

284 A new `_ButlerFactory` instance representing the processed version 

285 of ``args``. 

286 """ 

287 butler = Butler(args.butler_config, writeable=False) 

288 self = cls(butler.registry, args, writeable=False) 

289 self.check(args) 

290 if self.output and self.output.exists: 

291 if args.replace_run: 

292 replaced = self.output.chain[0] 

293 inputs = self.output.chain[1:] 

294 _LOG.debug( 

295 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

296 ) 

297 else: 

298 inputs = [self.output.name] 

299 else: 

300 inputs = list(self.inputs) 

301 if args.extend_run: 

302 inputs.insert(0, self.outputRun.name) 

303 inputs = CollectionSearch.fromExpression(inputs) 

304 return butler, inputs, self 

305 

306 @classmethod 

307 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

308 """Construct a read-only butler according to the given command-line 

309 arguments. 

310 

311 Parameters 

312 ---------- 

313 args : `types.SimpleNamespace` 

314 Parsed command-line arguments. See class documentation for the 

315 construction parameter of the same name. 

316 

317 Returns 

318 ------- 

319 butler : `lsst.daf.butler.Butler` 

320 A read-only butler initialized with the collections specified by 

321 ``args``. 

322 """ 

323 butler, inputs, _ = cls._makeReadParts(args) 

324 _LOG.debug("Preparing butler to read from %s.", inputs) 

325 return Butler(butler=butler, collections=inputs) 

326 

327 @classmethod 

328 def makeButlerAndCollections( 

329 cls, args: argparse.Namespace 

330 ) -> Tuple[Butler, CollectionSearch, Optional[str]]: 

331 """Return a read-only registry, a collection search path, and the name 

332 of the run to be used for future writes. 

333 

334 Parameters 

335 ---------- 

336 args : `types.SimpleNamespace` 

337 Parsed command-line arguments. See class documentation for the 

338 construction parameter of the same name. 

339 

340 Returns 

341 ------- 

342 butler : `lsst.daf.butler.Butler` 

343 A read-only butler that collections will be added to and/or queried 

344 from. 

345 inputs : `lsst.daf.butler.registry.CollectionSearch` 

346 Collections to search for datasets. 

347 run : `str` or `None` 

348 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

349 if it already exists, or `None` if it does not. 

350 """ 

351 butler, inputs, self = cls._makeReadParts(args) 

352 run = self.outputRun.name if args.extend_run else None 

353 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

354 return butler, inputs, run 

355 

356 @classmethod 

357 def makeWriteButler( 

358 cls, args: argparse.Namespace, taskDefs: Optional[Iterable[TaskDef]] = None 

359 ) -> Butler: 

360 """Return a read-write butler initialized to write to and read from 

361 the collections specified by the given command-line arguments. 

362 

363 Parameters 

364 ---------- 

365 args : `types.SimpleNamespace` 

366 Parsed command-line arguments. See class documentation for the 

367 construction parameter of the same name. 

368 taskDefs : iterable of `TaskDef`, optional 

369 Definitions for tasks in a pipeline. This argument is only needed 

370 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

371 "unstore". 

372 

373 Returns 

374 ------- 

375 butler : `lsst.daf.butler.Butler` 

376 A read-write butler initialized according to the given arguments. 

377 """ 

378 butler = Butler(args.butler_config, writeable=True) 

379 self = cls(butler.registry, args, writeable=True) 

380 self.check(args) 

381 if self.output is not None: 

382 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

383 if args.replace_run: 

384 replaced = chainDefinition.pop(0) 

385 if args.prune_replaced == "unstore": 

386 # Remove datasets from datastore 

387 with butler.transaction(): 

388 refs = butler.registry.queryDatasets(..., collections=replaced) 

389 # we want to remove regular outputs but keep 

390 # initOutputs, configs, and versions. 

391 if taskDefs is not None: 

392 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

393 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

394 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

395 elif args.prune_replaced == "purge": 

396 # Erase entire collection and all datasets, need to remove 

397 # collection from its chain collection first. 

398 with butler.transaction(): 

399 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

400 butler.pruneCollection(replaced, purge=True, unstore=True) 

401 elif args.prune_replaced is not None: 

402 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

403 if not self.output.exists: 

404 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

405 if not args.extend_run: 

406 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

407 chainDefinition.insert(0, self.outputRun.name) 

408 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

409 _LOG.debug( 

410 "Preparing butler to write to '%s' and read from '%s'=%s", 

411 self.outputRun.name, 

412 self.output.name, 

413 chainDefinition, 

414 ) 

415 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

416 else: 

417 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

418 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

419 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

420 return butler 

421 

422 output: Optional[_OutputChainedCollectionInfo] 

423 """Information about the output chained collection, if there is or will be 

424 one (`_OutputChainedCollectionInfo` or `None`). 

425 """ 

426 

427 outputRun: Optional[_OutputRunCollectionInfo] 

428 """Information about the output run collection, if there is or will be 

429 one (`_OutputRunCollectionInfo` or `None`). 

430 """ 

431 

432 inputs: Tuple[str, ...] 

433 """Input collections provided directly by the user (`tuple` [ `str` ]). 

434 """ 

435 

436 

437class _FilteredStream: 

438 """A file-like object that filters some config fields. 

439 

440 Note 

441 ---- 

442 This class depends on implementation details of ``Config.saveToStream`` 

443 methods, in particular that that method uses single call to write() 

444 method to save information about single config field, and that call 

445 combines comments string(s) for a field and field path and value. 

446 This class will not work reliably on the "import" strings, so imports 

447 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

448 """ 

449 

450 def __init__(self, pattern): 

451 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

452 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

453 

454 if mat: 

455 pattern = mat.group(1) 

456 self._pattern = re.compile(fnmatch.translate(pattern)) 

457 else: 

458 if pattern != pattern.lower(): 

459 print( 

460 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)", 

461 file=sys.stdout, 

462 ) 

463 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

464 

465 def write(self, showStr): 

466 # Strip off doc string line(s) and cut off at "=" for string matching 

467 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

468 if self._pattern.search(matchStr): 

469 sys.stdout.write(showStr) 

470 

471 

472# ------------------------ 

473# Exported definitions -- 

474# ------------------------ 

475 

476 

477class CmdLineFwk: 

478 """PipelineTask framework which executes tasks from command line. 

479 

480 In addition to executing tasks this activator provides additional methods 

481 for task management like dumping configuration or execution chain. 

482 """ 

483 

484 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

485 

486 def __init__(self): 

487 pass 

488 

489 def makePipeline(self, args): 

490 """Build a pipeline from command line arguments. 

491 

492 Parameters 

493 ---------- 

494 args : `types.SimpleNamespace` 

495 Parsed command line 

496 

497 Returns 

498 ------- 

499 pipeline : `~lsst.pipe.base.Pipeline` 

500 """ 

501 if args.pipeline: 

502 pipeline = Pipeline.from_uri(args.pipeline) 

503 else: 

504 pipeline = Pipeline("anonymous") 

505 

506 # loop over all pipeline actions and apply them in order 

507 for action in args.pipeline_actions: 

508 if action.action == "add_instrument": 

509 

510 pipeline.addInstrument(action.value) 

511 

512 elif action.action == "new_task": 

513 

514 pipeline.addTask(action.value, action.label) 

515 

516 elif action.action == "delete_task": 

517 

518 pipeline.removeTask(action.label) 

519 

520 elif action.action == "config": 

521 

522 # action value string is "field=value", split it at '=' 

523 field, _, value = action.value.partition("=") 

524 pipeline.addConfigOverride(action.label, field, value) 

525 

526 elif action.action == "configfile": 

527 

528 pipeline.addConfigFile(action.label, action.value) 

529 

530 else: 

531 

532 raise ValueError(f"Unexpected pipeline action: {action.action}") 

533 

534 if args.save_pipeline: 

535 pipeline.write_to_uri(args.save_pipeline) 

536 

537 if args.pipeline_dot: 

538 pipeline2dot(pipeline, args.pipeline_dot) 

539 

540 return pipeline 

541 

542 def makeGraph(self, pipeline, args): 

543 """Build a graph from command line arguments. 

544 

545 Parameters 

546 ---------- 

547 pipeline : `~lsst.pipe.base.Pipeline` 

548 Pipeline, can be empty or ``None`` if graph is read from a file. 

549 args : `types.SimpleNamespace` 

550 Parsed command line 

551 

552 Returns 

553 ------- 

554 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

555 If resulting graph is empty then `None` is returned. 

556 """ 

557 

558 # make sure that --extend-run always enables --skip-existing 

559 if args.extend_run: 

560 args.skip_existing = True 

561 

562 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

563 

564 if args.skip_existing and run: 

565 args.skip_existing_in += (run,) 

566 

567 if args.qgraph: 

568 # click passes empty tuple as default value for qgraph_node_id 

569 nodes = args.qgraph_node_id or None 

570 qgraph = QuantumGraph.loadUri( 

571 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

572 ) 

573 

574 # pipeline can not be provided in this case 

575 if pipeline: 

576 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

577 if args.show_qgraph_header: 

578 print(QuantumGraph.readHeader(args.qgraph)) 

579 else: 

580 # make execution plan (a.k.a. DAG) for pipeline 

581 graphBuilder = GraphBuilder( 

582 butler.registry, 

583 skipExistingIn=args.skip_existing_in, 

584 clobberOutputs=args.clobber_outputs, 

585 datastore=butler.datastore if args.qgraph_datastore_records else None, 

586 ) 

587 # accumulate metadata 

588 metadata = { 

589 "input": args.input, 

590 "output": args.output, 

591 "butler_argument": args.butler_config, 

592 "output_run": args.output_run, 

593 "extend_run": args.extend_run, 

594 "skip_existing_in": args.skip_existing_in, 

595 "skip_existing": args.skip_existing, 

596 "data_query": args.data_query, 

597 "user": getpass.getuser(), 

598 "time": f"{datetime.datetime.now()}", 

599 } 

600 qgraph = graphBuilder.makeGraph( 

601 pipeline, 

602 collections, 

603 run, 

604 args.data_query, 

605 metadata=metadata, 

606 datasetQueryConstraint=args.dataset_query_constraint, 

607 ) 

608 if args.show_qgraph_header: 

609 print(qgraph.buildAndPrintHeader()) 

610 

611 # Count quanta in graph and give a warning if it's empty and return 

612 # None. 

613 nQuanta = len(qgraph) 

614 if nQuanta == 0: 

615 warnings.warn("QuantumGraph is empty", stacklevel=2) 

616 return None 

617 else: 

618 _LOG.info( 

619 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

620 nQuanta, 

621 len(qgraph.taskGraph), 

622 qgraph.graphID, 

623 ) 

624 

625 if args.save_qgraph: 

626 qgraph.saveUri(args.save_qgraph) 

627 

628 if args.save_single_quanta: 

629 for quantumNode in qgraph: 

630 sqgraph = qgraph.subset(quantumNode) 

631 uri = args.save_single_quanta.format(quantumNode) 

632 sqgraph.saveUri(uri) 

633 

634 if args.qgraph_dot: 

635 graph2dot(qgraph, args.qgraph_dot) 

636 

637 if args.execution_butler_location: 

638 butler = Butler(args.butler_config) 

639 newArgs = copy.deepcopy(args) 

640 

641 def builderShim(butler): 

642 newArgs.butler_config = butler._config 

643 # Calling makeWriteButler is done for the side effects of 

644 # calling that method, maining parsing all the args into 

645 # collection names, creating collections, etc. 

646 newButler = _ButlerFactory.makeWriteButler(newArgs) 

647 return newButler 

648 

649 # Include output collection in collections for input 

650 # files if it exists in the repo. 

651 all_inputs = args.input 

652 if args.output is not None: 

653 try: 

654 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

655 except MissingCollectionError: 

656 pass 

657 

658 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

659 buildExecutionButler( 

660 butler, 

661 qgraph, 

662 args.execution_butler_location, 

663 run, 

664 butlerModifier=builderShim, 

665 collections=all_inputs, 

666 clobber=args.clobber_execution_butler, 

667 ) 

668 

669 return qgraph 

670 

671 def runPipeline(self, graph, taskFactory, args, butler=None): 

672 """Execute complete QuantumGraph. 

673 

674 Parameters 

675 ---------- 

676 graph : `QuantumGraph` 

677 Execution graph. 

678 taskFactory : `~lsst.pipe.base.TaskFactory` 

679 Task factory 

680 args : `types.SimpleNamespace` 

681 Parsed command line 

682 butler : `~lsst.daf.butler.Butler`, optional 

683 Data Butler instance, if not defined then new instance is made 

684 using command line options. 

685 """ 

686 # make sure that --extend-run always enables --skip-existing 

687 if args.extend_run: 

688 args.skip_existing = True 

689 

690 # make butler instance 

691 if butler is None: 

692 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

693 

694 if args.skip_existing: 

695 args.skip_existing_in += (butler.run,) 

696 

697 # Enable lsstDebug debugging. Note that this is done once in the 

698 # main process before PreExecInit and it is also repeated before 

699 # running each task in SingleQuantumExecutor (which may not be 

700 # needed if `multipocessing` always uses fork start method). 

701 if args.enableLsstDebug: 

702 try: 

703 _LOG.debug("Will try to import debug.py") 

704 import debug # noqa:F401 

705 except ImportError: 

706 _LOG.warn("No 'debug' module found.") 

707 

708 # Save all InitOutputs, configs, etc. 

709 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

710 preExecInit.initialize( 

711 graph, 

712 saveInitOutputs=not args.skip_init_writes, 

713 registerDatasetTypes=args.register_dataset_types, 

714 saveVersions=not args.no_versions, 

715 ) 

716 

717 if not args.init_only: 

718 graphFixup = self._importGraphFixup(args) 

719 quantumExecutor = SingleQuantumExecutor( 

720 taskFactory, 

721 skipExistingIn=args.skip_existing_in, 

722 clobberOutputs=args.clobber_outputs, 

723 enableLsstDebug=args.enableLsstDebug, 

724 exitOnKnownError=args.fail_fast, 

725 mock=args.mock, 

726 mock_configs=args.mock_configs, 

727 ) 

728 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

729 executor = MPGraphExecutor( 

730 numProc=args.processes, 

731 timeout=timeout, 

732 startMethod=args.start_method, 

733 quantumExecutor=quantumExecutor, 

734 failFast=args.fail_fast, 

735 pdb=args.pdb, 

736 executionGraphFixup=graphFixup, 

737 ) 

738 try: 

739 with util.profile(args.profile, _LOG): 

740 executor.execute(graph, butler) 

741 finally: 

742 if args.summary: 

743 report = executor.getReport() 

744 if report: 

745 with open(args.summary, "w") as out: 

746 # Do not save fields that are not set. 

747 out.write(report.json(exclude_none=True, indent=2)) 

748 

749 def showInfo(self, args, pipeline, graph=None): 

750 """Display useful info about pipeline and environment. 

751 

752 Parameters 

753 ---------- 

754 args : `types.SimpleNamespace` 

755 Parsed command line 

756 pipeline : `Pipeline` 

757 Pipeline definition 

758 graph : `QuantumGraph`, optional 

759 Execution graph 

760 """ 

761 showOpts = args.show 

762 for what in showOpts: 

763 showCommand, _, showArgs = what.partition("=") 

764 

765 if showCommand in ["pipeline", "config", "history", "tasks"]: 

766 if not pipeline: 

767 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

768 continue 

769 

770 if showCommand in ["graph", "workflow", "uri"]: 

771 if not graph: 

772 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

773 continue 

774 

775 if showCommand == "pipeline": 

776 print(pipeline) 

777 elif showCommand == "config": 

778 self._showConfig(pipeline, showArgs, False) 

779 elif showCommand == "dump-config": 

780 self._showConfig(pipeline, showArgs, True) 

781 elif showCommand == "history": 

782 self._showConfigHistory(pipeline, showArgs) 

783 elif showCommand == "tasks": 

784 self._showTaskHierarchy(pipeline) 

785 elif showCommand == "graph": 

786 if graph: 

787 self._showGraph(graph) 

788 elif showCommand == "uri": 

789 if graph: 

790 self._showUri(graph, args) 

791 elif showCommand == "workflow": 

792 if graph: 

793 self._showWorkflow(graph, args) 

794 else: 

795 print( 

796 "Unknown value for show: %s (choose from '%s')" 

797 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

798 file=sys.stderr, 

799 ) 

800 sys.exit(1) 

801 

802 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

803 """Show task configuration 

804 

805 Parameters 

806 ---------- 

807 pipeline : `Pipeline` 

808 Pipeline definition 

809 showArgs : `str` 

810 Defines what to show 

811 dumpFullConfig : `bool` 

812 If true then dump complete task configuration with all imports. 

813 """ 

814 stream = sys.stdout 

815 if dumpFullConfig: 

816 # Task label can be given with this option 

817 taskName = showArgs 

818 else: 

819 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

820 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

821 taskName = matConfig.group(1) 

822 pattern = matConfig.group(2) 

823 if pattern: 

824 stream = _FilteredStream(pattern) 

825 

826 tasks = util.filterTasks(pipeline, taskName) 

827 if not tasks: 

828 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

829 sys.exit(1) 

830 

831 for taskDef in tasks: 

832 print("### Configuration for task `{}'".format(taskDef.label)) 

833 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

834 

835 def _showConfigHistory(self, pipeline, showArgs): 

836 """Show history for task configuration 

837 

838 Parameters 

839 ---------- 

840 pipeline : `Pipeline` 

841 Pipeline definition 

842 showArgs : `str` 

843 Defines what to show 

844 """ 

845 

846 taskName = None 

847 pattern = None 

848 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

849 if matHistory: 

850 taskName = matHistory.group(1) 

851 pattern = matHistory.group(2) 

852 if not pattern: 

853 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

854 sys.exit(1) 

855 

856 tasks = util.filterTasks(pipeline, taskName) 

857 if not tasks: 

858 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

859 sys.exit(1) 

860 

861 found = False 

862 for taskDef in tasks: 

863 

864 config = taskDef.config 

865 

866 # Look for any matches in the config hierarchy for this name 

867 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

868 if nmatch > 0: 

869 print("") 

870 

871 cpath, _, cname = thisName.rpartition(".") 

872 try: 

873 if not cpath: 

874 # looking for top-level field 

875 hconfig = taskDef.config 

876 else: 

877 hconfig = eval("config." + cpath, {}, {"config": config}) 

878 except AttributeError: 

879 print( 

880 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

881 file=sys.stderr, 

882 ) 

883 hconfig = None 

884 

885 # Sometimes we end up with a non-Config so skip those 

886 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr( 

887 hconfig, cname 

888 ): 

889 print(f"### Configuration field for task `{taskDef.label}'") 

890 print(pexConfigHistory.format(hconfig, cname)) 

891 found = True 

892 

893 if not found: 

894 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

895 sys.exit(1) 

896 

897 def _showTaskHierarchy(self, pipeline): 

898 """Print task hierarchy to stdout 

899 

900 Parameters 

901 ---------- 

902 pipeline: `Pipeline` 

903 """ 

904 for taskDef in pipeline.toExpandedPipeline(): 

905 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

906 

907 for configName, taskName in util.subTaskIter(taskDef.config): 

908 print("{}: {}".format(configName, taskName)) 

909 

910 def _showGraph(self, graph): 

911 """Print quanta information to stdout 

912 

913 Parameters 

914 ---------- 

915 graph : `QuantumGraph` 

916 Execution graph. 

917 """ 

918 for taskNode in graph.taskGraph: 

919 print(taskNode) 

920 

921 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

922 print(" Quantum {}:".format(iq)) 

923 print(" inputs:") 

924 for key, refs in quantum.inputs.items(): 

925 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

926 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

927 print(" outputs:") 

928 for key, refs in quantum.outputs.items(): 

929 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

930 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

931 

932 def _showWorkflow(self, graph, args): 

933 """Print quanta information and dependency to stdout 

934 

935 Parameters 

936 ---------- 

937 graph : `QuantumGraph` 

938 Execution graph. 

939 args : `types.SimpleNamespace` 

940 Parsed command line 

941 """ 

942 for node in graph: 

943 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

944 for parent in graph.determineInputsToQuantumNode(node): 

945 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}") 

946 

947 def _showUri(self, graph, args): 

948 """Print input and predicted output URIs to stdout 

949 

950 Parameters 

951 ---------- 

952 graph : `QuantumGraph` 

953 Execution graph 

954 args : `types.SimpleNamespace` 

955 Parsed command line 

956 """ 

957 

958 def dumpURIs(thisRef): 

959 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

960 if primary: 

961 print(f" {primary}") 

962 else: 

963 print(" (disassembled artifact)") 

964 for compName, compUri in components.items(): 

965 print(f" {compName}: {compUri}") 

966 

967 butler = _ButlerFactory.makeReadButler(args) 

968 for node in graph: 

969 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

970 print(" inputs:") 

971 for key, refs in node.quantum.inputs.items(): 

972 for ref in refs: 

973 dumpURIs(ref) 

974 print(" outputs:") 

975 for key, refs in node.quantum.outputs.items(): 

976 for ref in refs: 

977 dumpURIs(ref) 

978 

979 def _importGraphFixup(self, args): 

980 """Import/instantiate graph fixup object. 

981 

982 Parameters 

983 ---------- 

984 args : `types.SimpleNamespace` 

985 Parsed command line. 

986 

987 Returns 

988 ------- 

989 fixup : `ExecutionGraphFixup` or `None` 

990 

991 Raises 

992 ------ 

993 ValueError 

994 Raised if import fails, method call raises exception, or returned 

995 instance has unexpected type. 

996 """ 

997 if args.graph_fixup: 

998 try: 

999 factory = doImport(args.graph_fixup) 

1000 except Exception as exc: 

1001 raise ValueError("Failed to import graph fixup class/method") from exc 

1002 try: 

1003 fixup = factory() 

1004 except Exception as exc: 

1005 raise ValueError("Failed to make instance of graph fixup") from exc 

1006 if not isinstance(fixup, ExecutionGraphFixup): 

1007 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

1008 return fixup