Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 13%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

403 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25__all__ = ["CmdLineFwk"] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import argparse 

31import copy 

32import datetime 

33import fnmatch 

34import getpass 

35import logging 

36import re 

37import sys 

38import warnings 

39from typing import Iterable, Optional, Tuple 

40 

41import lsst.pex.config as pexConfig 

42 

43# ----------------------------- 

44# Imports for other modules -- 

45# ----------------------------- 

46from lsst.daf.butler import Butler, CollectionSearch, CollectionType, Registry 

47from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

48from lsst.obs.base import Instrument 

49from lsst.pipe.base import ( 

50 GraphBuilder, 

51 Pipeline, 

52 PipelineDatasetTypes, 

53 QuantumGraph, 

54 TaskDef, 

55 buildExecutionButler, 

56) 

57from lsst.utils import doImport 

58 

59from . import util 

60from .dotTools import graph2dot, pipeline2dot 

61from .executionGraphFixup import ExecutionGraphFixup 

62from .mpGraphExecutor import MPGraphExecutor 

63from .preExecInit import PreExecInit 

64from .singleQuantumExecutor import SingleQuantumExecutor 

65 

66# ---------------------------------- 

67# Local non-exported definitions -- 

68# ---------------------------------- 

69 

70_LOG = logging.getLogger(__name__) 

71 

72 

73class _OutputChainedCollectionInfo: 

74 """A helper class for handling command-line arguments related to an output 

75 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

76 

77 Parameters 

78 ---------- 

79 registry : `lsst.daf.butler.Registry` 

80 Butler registry that collections will be added to and/or queried from. 

81 name : `str` 

82 Name of the collection given on the command line. 

83 """ 

84 

85 def __init__(self, registry: Registry, name: str): 

86 self.name = name 

87 try: 

88 self.chain = tuple(registry.getCollectionChain(name)) 

89 self.exists = True 

90 except MissingCollectionError: 

91 self.chain = () 

92 self.exists = False 

93 

94 def __str__(self): 

95 return self.name 

96 

97 name: str 

98 """Name of the collection provided on the command line (`str`). 

99 """ 

100 

101 exists: bool 

102 """Whether this collection already exists in the registry (`bool`). 

103 """ 

104 

105 chain: Tuple[str, ...] 

106 """The definition of the collection, if it already exists (`tuple`[`str`]). 

107 

108 Empty if the collection does not already exist. 

109 """ 

110 

111 

112class _OutputRunCollectionInfo: 

113 """A helper class for handling command-line arguments related to an output 

114 `~lsst.daf.butler.CollectionType.RUN` collection. 

115 

116 Parameters 

117 ---------- 

118 registry : `lsst.daf.butler.Registry` 

119 Butler registry that collections will be added to and/or queried from. 

120 name : `str` 

121 Name of the collection given on the command line. 

122 """ 

123 

124 def __init__(self, registry: Registry, name: str): 

125 self.name = name 

126 try: 

127 actualType = registry.getCollectionType(name) 

128 if actualType is not CollectionType.RUN: 

129 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

130 self.exists = True 

131 except MissingCollectionError: 

132 self.exists = False 

133 

134 name: str 

135 """Name of the collection provided on the command line (`str`). 

136 """ 

137 

138 exists: bool 

139 """Whether this collection already exists in the registry (`bool`). 

140 """ 

141 

142 

143class _ButlerFactory: 

144 """A helper class for processing command-line arguments related to input 

145 and output collections. 

146 

147 Parameters 

148 ---------- 

149 registry : `lsst.daf.butler.Registry` 

150 Butler registry that collections will be added to and/or queried from. 

151 

152 args : `types.SimpleNamespace` 

153 Parsed command-line arguments. The following attributes are used, 

154 either at construction or in later methods. 

155 

156 ``output`` 

157 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

158 input/output collection. 

159 

160 ``output_run`` 

161 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

162 collection. 

163 

164 ``extend_run`` 

165 A boolean indicating whether ``output_run`` should already exist 

166 and be extended. 

167 

168 ``replace_run`` 

169 A boolean indicating that (if `True`) ``output_run`` should already 

170 exist but will be removed from the output chained collection and 

171 replaced with a new one. 

172 

173 ``prune_replaced`` 

174 A boolean indicating whether to prune the replaced run (requires 

175 ``replace_run``). 

176 

177 ``inputs`` 

178 Input collections of any type; may be any type handled by 

179 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

180 

181 ``butler_config`` 

182 Path to a data repository root or configuration file. 

183 

184 writeable : `bool` 

185 If `True`, a `Butler` is being initialized in a context where actual 

186 writes should happens, and hence no output run is necessary. 

187 

188 Raises 

189 ------ 

190 ValueError 

191 Raised if ``writeable is True`` but there are no output collections. 

192 """ 

193 

194 def __init__(self, registry: Registry, args: argparse.Namespace, writeable: bool): 

195 if args.output is not None: 

196 self.output = _OutputChainedCollectionInfo(registry, args.output) 

197 else: 

198 self.output = None 

199 if args.output_run is not None: 

200 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

201 elif self.output is not None: 

202 if args.extend_run: 

203 if not self.output.chain: 

204 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

205 runName = self.output.chain[0] 

206 else: 

207 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

208 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

209 elif not writeable: 

210 # If we're not writing yet, ok to have no output run. 

211 self.outputRun = None 

212 else: 

213 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

214 # Recursively flatten any input CHAINED collections. We do this up 

215 # front so we can tell if the user passes the same inputs on subsequent 

216 # calls, even though we also flatten when we define the output CHAINED 

217 # collection. 

218 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

219 

220 def check(self, args: argparse.Namespace): 

221 """Check command-line options for consistency with each other and the 

222 data repository. 

223 

224 Parameters 

225 ---------- 

226 args : `types.SimpleNamespace` 

227 Parsed command-line arguments. See class documentation for the 

228 construction parameter of the same name. 

229 """ 

230 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

231 if self.inputs and self.output is not None and self.output.exists: 

232 # Passing the same inputs that were used to initialize the output 

233 # collection is allowed; this means they must _end_ with the same 

234 # collections, because we push new runs to the front of the chain. 

235 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

236 if c1 != c2: 

237 raise ValueError( 

238 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

239 "a different sequence of input collections than those given: " 

240 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

241 f"{self.output.name}={self.output.chain}." 

242 ) 

243 if len(self.inputs) > len(self.output.chain): 

244 nNew = len(self.inputs) - len(self.output.chain) 

245 raise ValueError( 

246 f"Cannot add new input collections {self.inputs[:nNew]} after " 

247 "output collection is first created." 

248 ) 

249 if args.extend_run and self.outputRun is None: 

250 raise ValueError("Cannot --extend-run when no output collection is given.") 

251 if args.extend_run and not self.outputRun.exists: 

252 raise ValueError( 

253 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

254 ) 

255 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

256 raise ValueError( 

257 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

258 ) 

259 if args.prune_replaced and not args.replace_run: 

260 raise ValueError("--prune-replaced requires --replace-run.") 

261 if args.replace_run and (self.output is None or not self.output.exists): 

262 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

263 

264 @classmethod 

265 def _makeReadParts(cls, args: argparse.Namespace): 

266 """Common implementation for `makeReadButler` and 

267 `makeRegistryAndCollections`. 

268 

269 Parameters 

270 ---------- 

271 args : `types.SimpleNamespace` 

272 Parsed command-line arguments. See class documentation for the 

273 construction parameter of the same name. 

274 

275 Returns 

276 ------- 

277 butler : `lsst.daf.butler.Butler` 

278 A read-only butler constructed from the repo at 

279 ``args.butler_config``, but with no default collections. 

280 inputs : `lsst.daf.butler.registry.CollectionSearch` 

281 A collection search path constructed according to ``args``. 

282 self : `_ButlerFactory` 

283 A new `_ButlerFactory` instance representing the processed version 

284 of ``args``. 

285 """ 

286 butler = Butler(args.butler_config, writeable=False) 

287 self = cls(butler.registry, args, writeable=False) 

288 self.check(args) 

289 if self.output and self.output.exists: 

290 if args.replace_run: 

291 replaced = self.output.chain[0] 

292 inputs = self.output.chain[1:] 

293 _LOG.debug( 

294 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

295 ) 

296 else: 

297 inputs = [self.output.name] 

298 else: 

299 inputs = list(self.inputs) 

300 if args.extend_run: 

301 inputs.insert(0, self.outputRun.name) 

302 inputs = CollectionSearch.fromExpression(inputs) 

303 return butler, inputs, self 

304 

305 @classmethod 

306 def makeReadButler(cls, args: argparse.Namespace) -> Butler: 

307 """Construct a read-only butler according to the given command-line 

308 arguments. 

309 

310 Parameters 

311 ---------- 

312 args : `types.SimpleNamespace` 

313 Parsed command-line arguments. See class documentation for the 

314 construction parameter of the same name. 

315 

316 Returns 

317 ------- 

318 butler : `lsst.daf.butler.Butler` 

319 A read-only butler initialized with the collections specified by 

320 ``args``. 

321 """ 

322 butler, inputs, _ = cls._makeReadParts(args) 

323 _LOG.debug("Preparing butler to read from %s.", inputs) 

324 return Butler(butler=butler, collections=inputs) 

325 

326 @classmethod 

327 def makeRegistryAndCollections( 

328 cls, args: argparse.Namespace 

329 ) -> Tuple[Registry, CollectionSearch, Optional[str]]: 

330 """Return a read-only registry, a collection search path, and the name 

331 of the run to be used for future writes. 

332 

333 Parameters 

334 ---------- 

335 args : `types.SimpleNamespace` 

336 Parsed command-line arguments. See class documentation for the 

337 construction parameter of the same name. 

338 

339 Returns 

340 ------- 

341 registry : `lsst.daf.butler.Registry` 

342 Butler registry that collections will be added to and/or queried 

343 from. 

344 inputs : `lsst.daf.butler.registry.CollectionSearch` 

345 Collections to search for datasets. 

346 run : `str` or `None` 

347 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

348 if it already exists, or `None` if it does not. 

349 """ 

350 butler, inputs, self = cls._makeReadParts(args) 

351 run = self.outputRun.name if args.extend_run else None 

352 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

353 return butler.registry, inputs, run 

354 

355 @classmethod 

356 def makeWriteButler( 

357 cls, args: argparse.Namespace, taskDefs: Optional[Iterable[TaskDef]] = None 

358 ) -> Butler: 

359 """Return a read-write butler initialized to write to and read from 

360 the collections specified by the given command-line arguments. 

361 

362 Parameters 

363 ---------- 

364 args : `types.SimpleNamespace` 

365 Parsed command-line arguments. See class documentation for the 

366 construction parameter of the same name. 

367 taskDefs : iterable of `TaskDef`, optional 

368 Definitions for tasks in a pipeline. This argument is only needed 

369 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

370 "unstore". 

371 

372 Returns 

373 ------- 

374 butler : `lsst.daf.butler.Butler` 

375 A read-write butler initialized according to the given arguments. 

376 """ 

377 butler = Butler(args.butler_config, writeable=True) 

378 self = cls(butler.registry, args, writeable=True) 

379 self.check(args) 

380 if self.output is not None: 

381 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

382 if args.replace_run: 

383 replaced = chainDefinition.pop(0) 

384 if args.prune_replaced == "unstore": 

385 # Remove datasets from datastore 

386 with butler.transaction(): 

387 refs = butler.registry.queryDatasets(..., collections=replaced) 

388 # we want to remove regular outputs but keep 

389 # initOutputs, configs, and versions. 

390 if taskDefs is not None: 

391 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

392 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

393 butler.pruneDatasets(refs, unstore=True, run=replaced, disassociate=False) 

394 elif args.prune_replaced == "purge": 

395 # Erase entire collection and all datasets, need to remove 

396 # collection from its chain collection first. 

397 with butler.transaction(): 

398 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

399 butler.pruneCollection(replaced, purge=True, unstore=True) 

400 elif args.prune_replaced is not None: 

401 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

402 if not self.output.exists: 

403 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

404 if not args.extend_run: 

405 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

406 chainDefinition.insert(0, self.outputRun.name) 

407 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

408 _LOG.debug( 

409 "Preparing butler to write to '%s' and read from '%s'=%s", 

410 self.outputRun.name, 

411 self.output.name, 

412 chainDefinition, 

413 ) 

414 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

415 else: 

416 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

417 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

418 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

419 return butler 

420 

421 output: Optional[_OutputChainedCollectionInfo] 

422 """Information about the output chained collection, if there is or will be 

423 one (`_OutputChainedCollectionInfo` or `None`). 

424 """ 

425 

426 outputRun: Optional[_OutputRunCollectionInfo] 

427 """Information about the output run collection, if there is or will be 

428 one (`_OutputRunCollectionInfo` or `None`). 

429 """ 

430 

431 inputs: Tuple[str, ...] 

432 """Input collections provided directly by the user (`tuple` [ `str` ]). 

433 """ 

434 

435 

436class _FilteredStream: 

437 """A file-like object that filters some config fields. 

438 

439 Note 

440 ---- 

441 This class depends on implementation details of ``Config.saveToStream`` 

442 methods, in particular that that method uses single call to write() 

443 method to save information about single config field, and that call 

444 combines comments string(s) for a field and field path and value. 

445 This class will not work reliably on the "import" strings, so imports 

446 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

447 """ 

448 

449 def __init__(self, pattern): 

450 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

451 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

452 

453 if mat: 

454 pattern = mat.group(1) 

455 self._pattern = re.compile(fnmatch.translate(pattern)) 

456 else: 

457 if pattern != pattern.lower(): 

458 print( 

459 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)", 

460 file=sys.stdout, 

461 ) 

462 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

463 

464 def write(self, showStr): 

465 # Strip off doc string line(s) and cut off at "=" for string matching 

466 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

467 if self._pattern.search(matchStr): 

468 sys.stdout.write(showStr) 

469 

470 

471# ------------------------ 

472# Exported definitions -- 

473# ------------------------ 

474 

475 

476class CmdLineFwk: 

477 """PipelineTask framework which executes tasks from command line. 

478 

479 In addition to executing tasks this activator provides additional methods 

480 for task management like dumping configuration or execution chain. 

481 """ 

482 

483 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

484 

485 def __init__(self): 

486 pass 

487 

488 def makePipeline(self, args): 

489 """Build a pipeline from command line arguments. 

490 

491 Parameters 

492 ---------- 

493 args : `types.SimpleNamespace` 

494 Parsed command line 

495 

496 Returns 

497 ------- 

498 pipeline : `~lsst.pipe.base.Pipeline` 

499 """ 

500 if args.pipeline: 

501 pipeline = Pipeline.from_uri(args.pipeline) 

502 else: 

503 pipeline = Pipeline("anonymous") 

504 

505 # loop over all pipeline actions and apply them in order 

506 for action in args.pipeline_actions: 

507 if action.action == "add_instrument": 

508 

509 pipeline.addInstrument(action.value) 

510 

511 elif action.action == "new_task": 

512 

513 pipeline.addTask(action.value, action.label) 

514 

515 elif action.action == "delete_task": 

516 

517 pipeline.removeTask(action.label) 

518 

519 elif action.action == "config": 

520 

521 # action value string is "field=value", split it at '=' 

522 field, _, value = action.value.partition("=") 

523 pipeline.addConfigOverride(action.label, field, value) 

524 

525 elif action.action == "configfile": 

526 

527 pipeline.addConfigFile(action.label, action.value) 

528 

529 else: 

530 

531 raise ValueError(f"Unexpected pipeline action: {action.action}") 

532 

533 if args.save_pipeline: 

534 pipeline.write_to_uri(args.save_pipeline) 

535 

536 if args.pipeline_dot: 

537 pipeline2dot(pipeline, args.pipeline_dot) 

538 

539 return pipeline 

540 

541 def makeGraph(self, pipeline, args): 

542 """Build a graph from command line arguments. 

543 

544 Parameters 

545 ---------- 

546 pipeline : `~lsst.pipe.base.Pipeline` 

547 Pipeline, can be empty or ``None`` if graph is read from a file. 

548 args : `types.SimpleNamespace` 

549 Parsed command line 

550 

551 Returns 

552 ------- 

553 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

554 If resulting graph is empty then `None` is returned. 

555 """ 

556 

557 # make sure that --extend-run always enables --skip-existing 

558 if args.extend_run: 

559 args.skip_existing = True 

560 

561 registry, collections, run = _ButlerFactory.makeRegistryAndCollections(args) 

562 

563 if args.skip_existing and run: 

564 args.skip_existing_in += (run,) 

565 

566 if args.qgraph: 

567 # click passes empty tuple as default value for qgraph_node_id 

568 nodes = args.qgraph_node_id or None 

569 qgraph = QuantumGraph.loadUri( 

570 args.qgraph, registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

571 ) 

572 

573 # pipeline can not be provided in this case 

574 if pipeline: 

575 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

576 if args.show_qgraph_header: 

577 print(QuantumGraph.readHeader(args.qgraph)) 

578 else: 

579 # make execution plan (a.k.a. DAG) for pipeline 

580 graphBuilder = GraphBuilder( 

581 registry, skipExistingIn=args.skip_existing_in, clobberOutputs=args.clobber_outputs 

582 ) 

583 # accumulate metadata 

584 metadata = { 

585 "input": args.input, 

586 "output": args.output, 

587 "butler_argument": args.butler_config, 

588 "output_run": args.output_run, 

589 "extend_run": args.extend_run, 

590 "skip_existing_in": args.skip_existing_in, 

591 "skip_existing": args.skip_existing, 

592 "data_query": args.data_query, 

593 "user": getpass.getuser(), 

594 "time": f"{datetime.datetime.now()}", 

595 } 

596 qgraph = graphBuilder.makeGraph( 

597 pipeline, 

598 collections, 

599 run, 

600 args.data_query, 

601 metadata=metadata, 

602 datasetQueryConstraint=args.dataset_query_constraint, 

603 ) 

604 if args.show_qgraph_header: 

605 print(qgraph.buildAndPrintHeader()) 

606 

607 # Count quanta in graph and give a warning if it's empty and return 

608 # None. 

609 nQuanta = len(qgraph) 

610 if nQuanta == 0: 

611 warnings.warn("QuantumGraph is empty", stacklevel=2) 

612 return None 

613 else: 

614 _LOG.info( 

615 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

616 nQuanta, 

617 len(qgraph.taskGraph), 

618 qgraph.graphID, 

619 ) 

620 

621 if args.save_qgraph: 

622 qgraph.saveUri(args.save_qgraph) 

623 

624 if args.save_single_quanta: 

625 for quantumNode in qgraph: 

626 sqgraph = qgraph.subset(quantumNode) 

627 uri = args.save_single_quanta.format(quantumNode) 

628 sqgraph.saveUri(uri) 

629 

630 if args.qgraph_dot: 

631 graph2dot(qgraph, args.qgraph_dot) 

632 

633 if args.execution_butler_location: 

634 butler = Butler(args.butler_config) 

635 newArgs = copy.deepcopy(args) 

636 

637 def builderShim(butler): 

638 newArgs.butler_config = butler._config 

639 # Calling makeWriteButler is done for the side effects of 

640 # calling that method, maining parsing all the args into 

641 # collection names, creating collections, etc. 

642 newButler = _ButlerFactory.makeWriteButler(newArgs) 

643 return newButler 

644 

645 # Include output collection in collections for input 

646 # files if it exists in the repo. 

647 all_inputs = args.input 

648 if args.output is not None: 

649 try: 

650 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

651 except MissingCollectionError: 

652 pass 

653 

654 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

655 buildExecutionButler( 

656 butler, 

657 qgraph, 

658 args.execution_butler_location, 

659 run, 

660 butlerModifier=builderShim, 

661 collections=all_inputs, 

662 clobber=args.clobber_execution_butler, 

663 ) 

664 

665 return qgraph 

666 

667 def runPipeline(self, graph, taskFactory, args, butler=None): 

668 """Execute complete QuantumGraph. 

669 

670 Parameters 

671 ---------- 

672 graph : `QuantumGraph` 

673 Execution graph. 

674 taskFactory : `~lsst.pipe.base.TaskFactory` 

675 Task factory 

676 args : `types.SimpleNamespace` 

677 Parsed command line 

678 butler : `~lsst.daf.butler.Butler`, optional 

679 Data Butler instance, if not defined then new instance is made 

680 using command line options. 

681 """ 

682 # make sure that --extend-run always enables --skip-existing 

683 if args.extend_run: 

684 args.skip_existing = True 

685 

686 # make butler instance 

687 if butler is None: 

688 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

689 

690 if args.skip_existing: 

691 args.skip_existing_in += (butler.run,) 

692 

693 # Enable lsstDebug debugging. Note that this is done once in the 

694 # main process before PreExecInit and it is also repeated before 

695 # running each task in SingleQuantumExecutor (which may not be 

696 # needed if `multipocessing` always uses fork start method). 

697 if args.enableLsstDebug: 

698 try: 

699 _LOG.debug("Will try to import debug.py") 

700 import debug # noqa:F401 

701 except ImportError: 

702 _LOG.warn("No 'debug' module found.") 

703 

704 # Save all InitOutputs, configs, etc. 

705 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

706 preExecInit.initialize( 

707 graph, 

708 saveInitOutputs=not args.skip_init_writes, 

709 registerDatasetTypes=args.register_dataset_types, 

710 saveVersions=not args.no_versions, 

711 ) 

712 

713 if not args.init_only: 

714 graphFixup = self._importGraphFixup(args) 

715 quantumExecutor = SingleQuantumExecutor( 

716 taskFactory, 

717 skipExistingIn=args.skip_existing_in, 

718 clobberOutputs=args.clobber_outputs, 

719 enableLsstDebug=args.enableLsstDebug, 

720 exitOnKnownError=args.fail_fast, 

721 mock=args.mock, 

722 mock_configs=args.mock_configs, 

723 ) 

724 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

725 executor = MPGraphExecutor( 

726 numProc=args.processes, 

727 timeout=timeout, 

728 startMethod=args.start_method, 

729 quantumExecutor=quantumExecutor, 

730 failFast=args.fail_fast, 

731 executionGraphFixup=graphFixup, 

732 ) 

733 with util.profile(args.profile, _LOG): 

734 executor.execute(graph, butler) 

735 

736 def showInfo(self, args, pipeline, graph=None): 

737 """Display useful info about pipeline and environment. 

738 

739 Parameters 

740 ---------- 

741 args : `types.SimpleNamespace` 

742 Parsed command line 

743 pipeline : `Pipeline` 

744 Pipeline definition 

745 graph : `QuantumGraph`, optional 

746 Execution graph 

747 """ 

748 showOpts = args.show 

749 for what in showOpts: 

750 showCommand, _, showArgs = what.partition("=") 

751 

752 if showCommand in ["pipeline", "config", "history", "tasks"]: 

753 if not pipeline: 

754 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

755 continue 

756 

757 if showCommand in ["graph", "workflow", "uri"]: 

758 if not graph: 

759 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

760 continue 

761 

762 if showCommand == "pipeline": 

763 print(pipeline) 

764 elif showCommand == "config": 

765 self._showConfig(pipeline, showArgs, False) 

766 elif showCommand == "dump-config": 

767 self._showConfig(pipeline, showArgs, True) 

768 elif showCommand == "history": 

769 self._showConfigHistory(pipeline, showArgs) 

770 elif showCommand == "tasks": 

771 self._showTaskHierarchy(pipeline) 

772 elif showCommand == "graph": 

773 if graph: 

774 self._showGraph(graph) 

775 elif showCommand == "uri": 

776 if graph: 

777 self._showUri(graph, args) 

778 elif showCommand == "workflow": 

779 if graph: 

780 self._showWorkflow(graph, args) 

781 else: 

782 print( 

783 "Unknown value for show: %s (choose from '%s')" 

784 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

785 file=sys.stderr, 

786 ) 

787 sys.exit(1) 

788 

789 def _showConfig(self, pipeline, showArgs, dumpFullConfig): 

790 """Show task configuration 

791 

792 Parameters 

793 ---------- 

794 pipeline : `Pipeline` 

795 Pipeline definition 

796 showArgs : `str` 

797 Defines what to show 

798 dumpFullConfig : `bool` 

799 If true then dump complete task configuration with all imports. 

800 """ 

801 stream = sys.stdout 

802 if dumpFullConfig: 

803 # Task label can be given with this option 

804 taskName = showArgs 

805 else: 

806 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

807 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

808 taskName = matConfig.group(1) 

809 pattern = matConfig.group(2) 

810 if pattern: 

811 stream = _FilteredStream(pattern) 

812 

813 tasks = util.filterTasks(pipeline, taskName) 

814 if not tasks: 

815 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

816 sys.exit(1) 

817 

818 for taskDef in tasks: 

819 print("### Configuration for task `{}'".format(taskDef.label)) 

820 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

821 

822 def _showConfigHistory(self, pipeline, showArgs): 

823 """Show history for task configuration 

824 

825 Parameters 

826 ---------- 

827 pipeline : `Pipeline` 

828 Pipeline definition 

829 showArgs : `str` 

830 Defines what to show 

831 """ 

832 

833 taskName = None 

834 pattern = None 

835 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

836 if matHistory: 

837 taskName = matHistory.group(1) 

838 pattern = matHistory.group(2) 

839 if not pattern: 

840 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

841 sys.exit(1) 

842 

843 tasks = util.filterTasks(pipeline, taskName) 

844 if not tasks: 

845 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

846 sys.exit(1) 

847 

848 found = False 

849 for taskDef in tasks: 

850 

851 config = taskDef.config 

852 

853 # Look for any matches in the config hierarchy for this name 

854 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

855 if nmatch > 0: 

856 print("") 

857 

858 cpath, _, cname = thisName.rpartition(".") 

859 try: 

860 if not cpath: 

861 # looking for top-level field 

862 hconfig = taskDef.config 

863 else: 

864 hconfig = eval("config." + cpath, {}, {"config": config}) 

865 except AttributeError: 

866 print( 

867 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

868 file=sys.stderr, 

869 ) 

870 hconfig = None 

871 

872 # Sometimes we end up with a non-Config so skip those 

873 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr( 

874 hconfig, cname 

875 ): 

876 print(f"### Configuration field for task `{taskDef.label}'") 

877 print(pexConfig.history.format(hconfig, cname)) 

878 found = True 

879 

880 if not found: 

881 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

882 sys.exit(1) 

883 

884 def _showTaskHierarchy(self, pipeline): 

885 """Print task hierarchy to stdout 

886 

887 Parameters 

888 ---------- 

889 pipeline: `Pipeline` 

890 """ 

891 for taskDef in pipeline.toExpandedPipeline(): 

892 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

893 

894 for configName, taskName in util.subTaskIter(taskDef.config): 

895 print("{}: {}".format(configName, taskName)) 

896 

897 def _showGraph(self, graph): 

898 """Print quanta information to stdout 

899 

900 Parameters 

901 ---------- 

902 graph : `QuantumGraph` 

903 Execution graph. 

904 """ 

905 for taskNode in graph.taskGraph: 

906 print(taskNode) 

907 

908 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

909 print(" Quantum {}:".format(iq)) 

910 print(" inputs:") 

911 for key, refs in quantum.inputs.items(): 

912 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

913 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

914 print(" outputs:") 

915 for key, refs in quantum.outputs.items(): 

916 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

917 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

918 

919 def _showWorkflow(self, graph, args): 

920 """Print quanta information and dependency to stdout 

921 

922 Parameters 

923 ---------- 

924 graph : `QuantumGraph` 

925 Execution graph. 

926 args : `types.SimpleNamespace` 

927 Parsed command line 

928 """ 

929 for node in graph: 

930 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

931 for parent in graph.determineInputsToQuantumNode(node): 

932 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}") 

933 

934 def _showUri(self, graph, args): 

935 """Print input and predicted output URIs to stdout 

936 

937 Parameters 

938 ---------- 

939 graph : `QuantumGraph` 

940 Execution graph 

941 args : `types.SimpleNamespace` 

942 Parsed command line 

943 """ 

944 

945 def dumpURIs(thisRef): 

946 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

947 if primary: 

948 print(f" {primary}") 

949 else: 

950 print(" (disassembled artifact)") 

951 for compName, compUri in components.items(): 

952 print(f" {compName}: {compUri}") 

953 

954 butler = _ButlerFactory.makeReadButler(args) 

955 for node in graph: 

956 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

957 print(" inputs:") 

958 for key, refs in node.quantum.inputs.items(): 

959 for ref in refs: 

960 dumpURIs(ref) 

961 print(" outputs:") 

962 for key, refs in node.quantum.outputs.items(): 

963 for ref in refs: 

964 dumpURIs(ref) 

965 

966 def _importGraphFixup(self, args): 

967 """Import/instantiate graph fixup object. 

968 

969 Parameters 

970 ---------- 

971 args : `types.SimpleNamespace` 

972 Parsed command line. 

973 

974 Returns 

975 ------- 

976 fixup : `ExecutionGraphFixup` or `None` 

977 

978 Raises 

979 ------ 

980 ValueError 

981 Raised if import fails, method call raises exception, or returned 

982 instance has unexpected type. 

983 """ 

984 if args.graph_fixup: 

985 try: 

986 factory = doImport(args.graph_fixup) 

987 except Exception as exc: 

988 raise ValueError("Failed to import graph fixup class/method") from exc 

989 try: 

990 fixup = factory() 

991 except Exception as exc: 

992 raise ValueError("Failed to make instance of graph fixup") from exc 

993 if not isinstance(fixup, ExecutionGraphFixup): 

994 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

995 return fixup