Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 16%

278 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-31 04:21 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import copy 

33import datetime 

34import getpass 

35import logging 

36import warnings 

37from types import SimpleNamespace 

38from typing import Iterable, Optional, Tuple 

39 

40from lsst.daf.butler import Butler, CollectionSearch, CollectionType, DatasetRef, Registry 

41from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

42from lsst.pipe.base import ( 

43 GraphBuilder, 

44 Instrument, 

45 Pipeline, 

46 PipelineDatasetTypes, 

47 QuantumGraph, 

48 TaskDef, 

49 TaskFactory, 

50 buildExecutionButler, 

51) 

52from lsst.utils import doImportType 

53 

54from . import util 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60 

61# ---------------------------------- 

62# Local non-exported definitions -- 

63# ---------------------------------- 

64 

65_LOG = logging.getLogger(__name__) 

66 

67 

68class _OutputChainedCollectionInfo: 

69 """A helper class for handling command-line arguments related to an output 

70 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

71 

72 Parameters 

73 ---------- 

74 registry : `lsst.daf.butler.Registry` 

75 Butler registry that collections will be added to and/or queried from. 

76 name : `str` 

77 Name of the collection given on the command line. 

78 """ 

79 

80 def __init__(self, registry: Registry, name: str): 

81 self.name = name 

82 try: 

83 self.chain = tuple(registry.getCollectionChain(name)) 

84 self.exists = True 

85 except MissingCollectionError: 

86 self.chain = () 

87 self.exists = False 

88 

89 def __str__(self) -> str: 

90 return self.name 

91 

92 name: str 

93 """Name of the collection provided on the command line (`str`). 

94 """ 

95 

96 exists: bool 

97 """Whether this collection already exists in the registry (`bool`). 

98 """ 

99 

100 chain: Tuple[str, ...] 

101 """The definition of the collection, if it already exists (`tuple`[`str`]). 

102 

103 Empty if the collection does not already exist. 

104 """ 

105 

106 

107class _OutputRunCollectionInfo: 

108 """A helper class for handling command-line arguments related to an output 

109 `~lsst.daf.butler.CollectionType.RUN` collection. 

110 

111 Parameters 

112 ---------- 

113 registry : `lsst.daf.butler.Registry` 

114 Butler registry that collections will be added to and/or queried from. 

115 name : `str` 

116 Name of the collection given on the command line. 

117 """ 

118 

119 def __init__(self, registry: Registry, name: str): 

120 self.name = name 

121 try: 

122 actualType = registry.getCollectionType(name) 

123 if actualType is not CollectionType.RUN: 

124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

125 self.exists = True 

126 except MissingCollectionError: 

127 self.exists = False 

128 

129 name: str 

130 """Name of the collection provided on the command line (`str`). 

131 """ 

132 

133 exists: bool 

134 """Whether this collection already exists in the registry (`bool`). 

135 """ 

136 

137 

138class _ButlerFactory: 

139 """A helper class for processing command-line arguments related to input 

140 and output collections. 

141 

142 Parameters 

143 ---------- 

144 registry : `lsst.daf.butler.Registry` 

145 Butler registry that collections will be added to and/or queried from. 

146 

147 args : `types.SimpleNamespace` 

148 Parsed command-line arguments. The following attributes are used, 

149 either at construction or in later methods. 

150 

151 ``output`` 

152 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

153 input/output collection. 

154 

155 ``output_run`` 

156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

157 collection. 

158 

159 ``extend_run`` 

160 A boolean indicating whether ``output_run`` should already exist 

161 and be extended. 

162 

163 ``replace_run`` 

164 A boolean indicating that (if `True`) ``output_run`` should already 

165 exist but will be removed from the output chained collection and 

166 replaced with a new one. 

167 

168 ``prune_replaced`` 

169 A boolean indicating whether to prune the replaced run (requires 

170 ``replace_run``). 

171 

172 ``inputs`` 

173 Input collections of any type; may be any type handled by 

174 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

175 

176 ``butler_config`` 

177 Path to a data repository root or configuration file. 

178 

179 writeable : `bool` 

180 If `True`, a `Butler` is being initialized in a context where actual 

181 writes should happens, and hence no output run is necessary. 

182 

183 Raises 

184 ------ 

185 ValueError 

186 Raised if ``writeable is True`` but there are no output collections. 

187 """ 

188 

189 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

190 if args.output is not None: 

191 self.output = _OutputChainedCollectionInfo(registry, args.output) 

192 else: 

193 self.output = None 

194 if args.output_run is not None: 

195 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

196 elif self.output is not None: 

197 if args.extend_run: 

198 if not self.output.chain: 

199 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

200 runName = self.output.chain[0] 

201 else: 

202 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

203 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

204 elif not writeable: 

205 # If we're not writing yet, ok to have no output run. 

206 self.outputRun = None 

207 else: 

208 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

209 # Recursively flatten any input CHAINED collections. We do this up 

210 # front so we can tell if the user passes the same inputs on subsequent 

211 # calls, even though we also flatten when we define the output CHAINED 

212 # collection. 

213 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

214 

215 def check(self, args: SimpleNamespace) -> None: 

216 """Check command-line options for consistency with each other and the 

217 data repository. 

218 

219 Parameters 

220 ---------- 

221 args : `types.SimpleNamespace` 

222 Parsed command-line arguments. See class documentation for the 

223 construction parameter of the same name. 

224 """ 

225 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

226 if self.inputs and self.output is not None and self.output.exists: 

227 # Passing the same inputs that were used to initialize the output 

228 # collection is allowed; this means they must _end_ with the same 

229 # collections, because we push new runs to the front of the chain. 

230 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

231 if c1 != c2: 

232 raise ValueError( 

233 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

234 "a different sequence of input collections than those given: " 

235 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

236 f"{self.output.name}={self.output.chain}." 

237 ) 

238 if len(self.inputs) > len(self.output.chain): 

239 nNew = len(self.inputs) - len(self.output.chain) 

240 raise ValueError( 

241 f"Cannot add new input collections {self.inputs[:nNew]} after " 

242 "output collection is first created." 

243 ) 

244 if args.extend_run: 

245 if self.outputRun is None: 

246 raise ValueError("Cannot --extend-run when no output collection is given.") 

247 elif not self.outputRun.exists: 

248 raise ValueError( 

249 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

250 ) 

251 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

252 raise ValueError( 

253 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

254 ) 

255 if args.prune_replaced and not args.replace_run: 

256 raise ValueError("--prune-replaced requires --replace-run.") 

257 if args.replace_run and (self.output is None or not self.output.exists): 

258 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

259 

260 @classmethod 

261 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, CollectionSearch, _ButlerFactory]: 

262 """Common implementation for `makeReadButler` and 

263 `makeButlerAndCollections`. 

264 

265 Parameters 

266 ---------- 

267 args : `types.SimpleNamespace` 

268 Parsed command-line arguments. See class documentation for the 

269 construction parameter of the same name. 

270 

271 Returns 

272 ------- 

273 butler : `lsst.daf.butler.Butler` 

274 A read-only butler constructed from the repo at 

275 ``args.butler_config``, but with no default collections. 

276 inputs : `lsst.daf.butler.registry.CollectionSearch` 

277 A collection search path constructed according to ``args``. 

278 self : `_ButlerFactory` 

279 A new `_ButlerFactory` instance representing the processed version 

280 of ``args``. 

281 """ 

282 butler = Butler(args.butler_config, writeable=False) 

283 self = cls(butler.registry, args, writeable=False) 

284 self.check(args) 

285 if self.output and self.output.exists: 

286 if args.replace_run: 

287 replaced = self.output.chain[0] 

288 inputs = list(self.output.chain[1:]) 

289 _LOG.debug( 

290 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

291 ) 

292 else: 

293 inputs = [self.output.name] 

294 else: 

295 inputs = list(self.inputs) 

296 if args.extend_run: 

297 assert self.outputRun is not None, "Output collection has to be specified." 

298 inputs.insert(0, self.outputRun.name) 

299 collSearch = CollectionSearch.fromExpression(inputs) 

300 return butler, collSearch, self 

301 

302 @classmethod 

303 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

304 """Construct a read-only butler according to the given command-line 

305 arguments. 

306 

307 Parameters 

308 ---------- 

309 args : `types.SimpleNamespace` 

310 Parsed command-line arguments. See class documentation for the 

311 construction parameter of the same name. 

312 

313 Returns 

314 ------- 

315 butler : `lsst.daf.butler.Butler` 

316 A read-only butler initialized with the collections specified by 

317 ``args``. 

318 """ 

319 butler, inputs, _ = cls._makeReadParts(args) 

320 _LOG.debug("Preparing butler to read from %s.", inputs) 

321 return Butler(butler=butler, collections=inputs) 

322 

323 @classmethod 

324 def makeButlerAndCollections( 

325 cls, args: SimpleNamespace 

326 ) -> Tuple[Butler, CollectionSearch, Optional[str]]: 

327 """Return a read-only registry, a collection search path, and the name 

328 of the run to be used for future writes. 

329 

330 Parameters 

331 ---------- 

332 args : `types.SimpleNamespace` 

333 Parsed command-line arguments. See class documentation for the 

334 construction parameter of the same name. 

335 

336 Returns 

337 ------- 

338 butler : `lsst.daf.butler.Butler` 

339 A read-only butler that collections will be added to and/or queried 

340 from. 

341 inputs : `lsst.daf.butler.registry.CollectionSearch` 

342 Collections to search for datasets. 

343 run : `str` or `None` 

344 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

345 if it already exists, or `None` if it does not. 

346 """ 

347 butler, inputs, self = cls._makeReadParts(args) 

348 run: Optional[str] = None 

349 if args.extend_run: 

350 assert self.outputRun is not None, "Output collection has to be specified." 

351 run = self.outputRun.name 

352 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

353 return butler, inputs, run 

354 

355 @classmethod 

356 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

357 """Return a read-write butler initialized to write to and read from 

358 the collections specified by the given command-line arguments. 

359 

360 Parameters 

361 ---------- 

362 args : `types.SimpleNamespace` 

363 Parsed command-line arguments. See class documentation for the 

364 construction parameter of the same name. 

365 taskDefs : iterable of `TaskDef`, optional 

366 Definitions for tasks in a pipeline. This argument is only needed 

367 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

368 "unstore". 

369 

370 Returns 

371 ------- 

372 butler : `lsst.daf.butler.Butler` 

373 A read-write butler initialized according to the given arguments. 

374 """ 

375 butler = Butler(args.butler_config, writeable=True) 

376 self = cls(butler.registry, args, writeable=True) 

377 self.check(args) 

378 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

379 if self.output is not None: 

380 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

381 if args.replace_run: 

382 replaced = chainDefinition.pop(0) 

383 if args.prune_replaced == "unstore": 

384 # Remove datasets from datastore 

385 with butler.transaction(): 

386 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

387 # we want to remove regular outputs but keep 

388 # initOutputs, configs, and versions. 

389 if taskDefs is not None: 

390 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

391 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

392 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

393 elif args.prune_replaced == "purge": 

394 # Erase entire collection and all datasets, need to remove 

395 # collection from its chain collection first. 

396 with butler.transaction(): 

397 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

398 butler.pruneCollection(replaced, purge=True, unstore=True) 

399 elif args.prune_replaced is not None: 

400 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

401 if not self.output.exists: 

402 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

403 if not args.extend_run: 

404 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

405 chainDefinition.insert(0, self.outputRun.name) 

406 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

407 _LOG.debug( 

408 "Preparing butler to write to '%s' and read from '%s'=%s", 

409 self.outputRun.name, 

410 self.output.name, 

411 chainDefinition, 

412 ) 

413 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

414 else: 

415 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

416 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

417 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

418 return butler 

419 

420 output: Optional[_OutputChainedCollectionInfo] 

421 """Information about the output chained collection, if there is or will be 

422 one (`_OutputChainedCollectionInfo` or `None`). 

423 """ 

424 

425 outputRun: Optional[_OutputRunCollectionInfo] 

426 """Information about the output run collection, if there is or will be 

427 one (`_OutputRunCollectionInfo` or `None`). 

428 """ 

429 

430 inputs: Tuple[str, ...] 

431 """Input collections provided directly by the user (`tuple` [ `str` ]). 

432 """ 

433 

434 

435# ------------------------ 

436# Exported definitions -- 

437# ------------------------ 

438 

439 

440class CmdLineFwk: 

441 """PipelineTask framework which executes tasks from command line. 

442 

443 In addition to executing tasks this activator provides additional methods 

444 for task management like dumping configuration or execution chain. 

445 """ 

446 

447 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

448 

449 def __init__(self) -> None: 

450 pass 

451 

452 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

453 """Build a pipeline from command line arguments. 

454 

455 Parameters 

456 ---------- 

457 args : `types.SimpleNamespace` 

458 Parsed command line 

459 

460 Returns 

461 ------- 

462 pipeline : `~lsst.pipe.base.Pipeline` 

463 """ 

464 if args.pipeline: 

465 pipeline = Pipeline.from_uri(args.pipeline) 

466 else: 

467 pipeline = Pipeline("anonymous") 

468 

469 # loop over all pipeline actions and apply them in order 

470 for action in args.pipeline_actions: 

471 if action.action == "add_instrument": 

472 

473 pipeline.addInstrument(action.value) 

474 

475 elif action.action == "new_task": 

476 

477 pipeline.addTask(action.value, action.label) 

478 

479 elif action.action == "delete_task": 

480 

481 pipeline.removeTask(action.label) 

482 

483 elif action.action == "config": 

484 

485 # action value string is "field=value", split it at '=' 

486 field, _, value = action.value.partition("=") 

487 pipeline.addConfigOverride(action.label, field, value) 

488 

489 elif action.action == "configfile": 

490 

491 pipeline.addConfigFile(action.label, action.value) 

492 

493 else: 

494 

495 raise ValueError(f"Unexpected pipeline action: {action.action}") 

496 

497 if args.save_pipeline: 

498 pipeline.write_to_uri(args.save_pipeline) 

499 

500 if args.pipeline_dot: 

501 pipeline2dot(pipeline, args.pipeline_dot) 

502 

503 return pipeline 

504 

505 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]: 

506 """Build a graph from command line arguments. 

507 

508 Parameters 

509 ---------- 

510 pipeline : `~lsst.pipe.base.Pipeline` 

511 Pipeline, can be empty or ``None`` if graph is read from a file. 

512 args : `types.SimpleNamespace` 

513 Parsed command line 

514 

515 Returns 

516 ------- 

517 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

518 If resulting graph is empty then `None` is returned. 

519 """ 

520 

521 # make sure that --extend-run always enables --skip-existing 

522 if args.extend_run: 

523 args.skip_existing = True 

524 

525 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

526 

527 if args.skip_existing and run: 

528 args.skip_existing_in += (run,) 

529 

530 if args.qgraph: 

531 # click passes empty tuple as default value for qgraph_node_id 

532 nodes = args.qgraph_node_id or None 

533 qgraph = QuantumGraph.loadUri( 

534 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

535 ) 

536 

537 # pipeline can not be provided in this case 

538 if pipeline: 

539 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

540 if args.show_qgraph_header: 

541 print(QuantumGraph.readHeader(args.qgraph)) 

542 else: 

543 # make execution plan (a.k.a. DAG) for pipeline 

544 graphBuilder = GraphBuilder( 

545 butler.registry, 

546 skipExistingIn=args.skip_existing_in, 

547 clobberOutputs=args.clobber_outputs, 

548 datastore=butler.datastore if args.qgraph_datastore_records else None, 

549 ) 

550 # accumulate metadata 

551 metadata = { 

552 "input": args.input, 

553 "output": args.output, 

554 "butler_argument": args.butler_config, 

555 "output_run": args.output_run, 

556 "extend_run": args.extend_run, 

557 "skip_existing_in": args.skip_existing_in, 

558 "skip_existing": args.skip_existing, 

559 "data_query": args.data_query, 

560 "user": getpass.getuser(), 

561 "time": f"{datetime.datetime.now()}", 

562 } 

563 qgraph = graphBuilder.makeGraph( 

564 pipeline, 

565 collections, 

566 run, 

567 args.data_query, 

568 metadata=metadata, 

569 datasetQueryConstraint=args.dataset_query_constraint, 

570 ) 

571 if args.show_qgraph_header: 

572 qgraph.buildAndPrintHeader() 

573 

574 # Count quanta in graph and give a warning if it's empty and return 

575 # None. 

576 nQuanta = len(qgraph) 

577 if nQuanta == 0: 

578 warnings.warn("QuantumGraph is empty", stacklevel=2) 

579 return None 

580 else: 

581 _LOG.info( 

582 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

583 nQuanta, 

584 len(qgraph.taskGraph), 

585 qgraph.graphID, 

586 ) 

587 

588 if args.save_qgraph: 

589 qgraph.saveUri(args.save_qgraph) 

590 

591 if args.save_single_quanta: 

592 for quantumNode in qgraph: 

593 sqgraph = qgraph.subset(quantumNode) 

594 uri = args.save_single_quanta.format(quantumNode) 

595 sqgraph.saveUri(uri) 

596 

597 if args.qgraph_dot: 

598 graph2dot(qgraph, args.qgraph_dot) 

599 

600 if args.execution_butler_location: 

601 butler = Butler(args.butler_config) 

602 newArgs = copy.deepcopy(args) 

603 

604 def builderShim(butler: Butler) -> Butler: 

605 newArgs.butler_config = butler._config 

606 # Calling makeWriteButler is done for the side effects of 

607 # calling that method, maining parsing all the args into 

608 # collection names, creating collections, etc. 

609 newButler = _ButlerFactory.makeWriteButler(newArgs) 

610 return newButler 

611 

612 # Include output collection in collections for input 

613 # files if it exists in the repo. 

614 all_inputs = args.input 

615 if args.output is not None: 

616 try: 

617 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

618 except MissingCollectionError: 

619 pass 

620 

621 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

622 buildExecutionButler( 

623 butler, 

624 qgraph, 

625 args.execution_butler_location, 

626 run, 

627 butlerModifier=builderShim, 

628 collections=all_inputs, 

629 clobber=args.clobber_execution_butler, 

630 datastoreRoot=args.target_datastore_root, 

631 transfer=args.transfer, 

632 ) 

633 

634 return qgraph 

635 

636 def runPipeline( 

637 self, 

638 graph: QuantumGraph, 

639 taskFactory: TaskFactory, 

640 args: SimpleNamespace, 

641 butler: Optional[Butler] = None, 

642 ) -> None: 

643 """Execute complete QuantumGraph. 

644 

645 Parameters 

646 ---------- 

647 graph : `QuantumGraph` 

648 Execution graph. 

649 taskFactory : `~lsst.pipe.base.TaskFactory` 

650 Task factory 

651 args : `types.SimpleNamespace` 

652 Parsed command line 

653 butler : `~lsst.daf.butler.Butler`, optional 

654 Data Butler instance, if not defined then new instance is made 

655 using command line options. 

656 """ 

657 # make sure that --extend-run always enables --skip-existing 

658 if args.extend_run: 

659 args.skip_existing = True 

660 

661 # make butler instance 

662 if butler is None: 

663 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

664 

665 if args.skip_existing: 

666 args.skip_existing_in += (butler.run,) 

667 

668 # Enable lsstDebug debugging. Note that this is done once in the 

669 # main process before PreExecInit and it is also repeated before 

670 # running each task in SingleQuantumExecutor (which may not be 

671 # needed if `multipocessing` always uses fork start method). 

672 if args.enableLsstDebug: 

673 try: 

674 _LOG.debug("Will try to import debug.py") 

675 import debug # type: ignore # noqa:F401 

676 except ImportError: 

677 _LOG.warn("No 'debug' module found.") 

678 

679 # Save all InitOutputs, configs, etc. 

680 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

681 preExecInit.initialize( 

682 graph, 

683 saveInitOutputs=not args.skip_init_writes, 

684 registerDatasetTypes=args.register_dataset_types, 

685 saveVersions=not args.no_versions, 

686 ) 

687 

688 if not args.init_only: 

689 graphFixup = self._importGraphFixup(args) 

690 quantumExecutor = SingleQuantumExecutor( 

691 taskFactory, 

692 skipExistingIn=args.skip_existing_in, 

693 clobberOutputs=args.clobber_outputs, 

694 enableLsstDebug=args.enableLsstDebug, 

695 exitOnKnownError=args.fail_fast, 

696 mock=args.mock, 

697 mock_configs=args.mock_configs, 

698 ) 

699 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

700 executor = MPGraphExecutor( 

701 numProc=args.processes, 

702 timeout=timeout, 

703 startMethod=args.start_method, 

704 quantumExecutor=quantumExecutor, 

705 failFast=args.fail_fast, 

706 pdb=args.pdb, 

707 executionGraphFixup=graphFixup, 

708 ) 

709 try: 

710 with util.profile(args.profile, _LOG): 

711 executor.execute(graph, butler) 

712 finally: 

713 if args.summary: 

714 report = executor.getReport() 

715 if report: 

716 with open(args.summary, "w") as out: 

717 # Do not save fields that are not set. 

718 out.write(report.json(exclude_none=True, indent=2)) 

719 

720 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]: 

721 """Import/instantiate graph fixup object. 

722 

723 Parameters 

724 ---------- 

725 args : `types.SimpleNamespace` 

726 Parsed command line. 

727 

728 Returns 

729 ------- 

730 fixup : `ExecutionGraphFixup` or `None` 

731 

732 Raises 

733 ------ 

734 ValueError 

735 Raised if import fails, method call raises exception, or returned 

736 instance has unexpected type. 

737 """ 

738 if args.graph_fixup: 

739 try: 

740 factory = doImportType(args.graph_fixup) 

741 except Exception as exc: 

742 raise ValueError("Failed to import graph fixup class/method") from exc 

743 try: 

744 fixup = factory() 

745 except Exception as exc: 

746 raise ValueError("Failed to make instance of graph fixup") from exc 

747 if not isinstance(fixup, ExecutionGraphFixup): 

748 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

749 return fixup 

750 return None