Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

276 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-01 01:56 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import copy 

33import datetime 

34import getpass 

35import logging 

36from types import SimpleNamespace 

37from typing import Iterable, Optional, Tuple 

38 

39from lsst.daf.butler import Butler, CollectionSearch, CollectionType, DatasetRef, Registry 

40from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

41from lsst.pipe.base import ( 

42 GraphBuilder, 

43 Instrument, 

44 Pipeline, 

45 PipelineDatasetTypes, 

46 QuantumGraph, 

47 TaskDef, 

48 TaskFactory, 

49 buildExecutionButler, 

50) 

51from lsst.utils import doImportType 

52 

53from . import util 

54from .dotTools import graph2dot, pipeline2dot 

55from .executionGraphFixup import ExecutionGraphFixup 

56from .mpGraphExecutor import MPGraphExecutor 

57from .preExecInit import PreExecInit 

58from .singleQuantumExecutor import SingleQuantumExecutor 

59 

60# ---------------------------------- 

61# Local non-exported definitions -- 

62# ---------------------------------- 

63 

64_LOG = logging.getLogger(__name__) 

65 

66 

67class _OutputChainedCollectionInfo: 

68 """A helper class for handling command-line arguments related to an output 

69 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

70 

71 Parameters 

72 ---------- 

73 registry : `lsst.daf.butler.Registry` 

74 Butler registry that collections will be added to and/or queried from. 

75 name : `str` 

76 Name of the collection given on the command line. 

77 """ 

78 

79 def __init__(self, registry: Registry, name: str): 

80 self.name = name 

81 try: 

82 self.chain = tuple(registry.getCollectionChain(name)) 

83 self.exists = True 

84 except MissingCollectionError: 

85 self.chain = () 

86 self.exists = False 

87 

88 def __str__(self) -> str: 

89 return self.name 

90 

91 name: str 

92 """Name of the collection provided on the command line (`str`). 

93 """ 

94 

95 exists: bool 

96 """Whether this collection already exists in the registry (`bool`). 

97 """ 

98 

99 chain: Tuple[str, ...] 

100 """The definition of the collection, if it already exists (`tuple`[`str`]). 

101 

102 Empty if the collection does not already exist. 

103 """ 

104 

105 

106class _OutputRunCollectionInfo: 

107 """A helper class for handling command-line arguments related to an output 

108 `~lsst.daf.butler.CollectionType.RUN` collection. 

109 

110 Parameters 

111 ---------- 

112 registry : `lsst.daf.butler.Registry` 

113 Butler registry that collections will be added to and/or queried from. 

114 name : `str` 

115 Name of the collection given on the command line. 

116 """ 

117 

118 def __init__(self, registry: Registry, name: str): 

119 self.name = name 

120 try: 

121 actualType = registry.getCollectionType(name) 

122 if actualType is not CollectionType.RUN: 

123 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

124 self.exists = True 

125 except MissingCollectionError: 

126 self.exists = False 

127 

128 name: str 

129 """Name of the collection provided on the command line (`str`). 

130 """ 

131 

132 exists: bool 

133 """Whether this collection already exists in the registry (`bool`). 

134 """ 

135 

136 

137class _ButlerFactory: 

138 """A helper class for processing command-line arguments related to input 

139 and output collections. 

140 

141 Parameters 

142 ---------- 

143 registry : `lsst.daf.butler.Registry` 

144 Butler registry that collections will be added to and/or queried from. 

145 

146 args : `types.SimpleNamespace` 

147 Parsed command-line arguments. The following attributes are used, 

148 either at construction or in later methods. 

149 

150 ``output`` 

151 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

152 input/output collection. 

153 

154 ``output_run`` 

155 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

156 collection. 

157 

158 ``extend_run`` 

159 A boolean indicating whether ``output_run`` should already exist 

160 and be extended. 

161 

162 ``replace_run`` 

163 A boolean indicating that (if `True`) ``output_run`` should already 

164 exist but will be removed from the output chained collection and 

165 replaced with a new one. 

166 

167 ``prune_replaced`` 

168 A boolean indicating whether to prune the replaced run (requires 

169 ``replace_run``). 

170 

171 ``inputs`` 

172 Input collections of any type; may be any type handled by 

173 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

174 

175 ``butler_config`` 

176 Path to a data repository root or configuration file. 

177 

178 writeable : `bool` 

179 If `True`, a `Butler` is being initialized in a context where actual 

180 writes should happens, and hence no output run is necessary. 

181 

182 Raises 

183 ------ 

184 ValueError 

185 Raised if ``writeable is True`` but there are no output collections. 

186 """ 

187 

188 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

189 if args.output is not None: 

190 self.output = _OutputChainedCollectionInfo(registry, args.output) 

191 else: 

192 self.output = None 

193 if args.output_run is not None: 

194 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

195 elif self.output is not None: 

196 if args.extend_run: 

197 if not self.output.chain: 

198 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

199 runName = self.output.chain[0] 

200 else: 

201 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

202 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

203 elif not writeable: 

204 # If we're not writing yet, ok to have no output run. 

205 self.outputRun = None 

206 else: 

207 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

208 # Recursively flatten any input CHAINED collections. We do this up 

209 # front so we can tell if the user passes the same inputs on subsequent 

210 # calls, even though we also flatten when we define the output CHAINED 

211 # collection. 

212 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

213 

214 def check(self, args: SimpleNamespace) -> None: 

215 """Check command-line options for consistency with each other and the 

216 data repository. 

217 

218 Parameters 

219 ---------- 

220 args : `types.SimpleNamespace` 

221 Parsed command-line arguments. See class documentation for the 

222 construction parameter of the same name. 

223 """ 

224 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

225 if self.inputs and self.output is not None and self.output.exists: 

226 # Passing the same inputs that were used to initialize the output 

227 # collection is allowed; this means they must _end_ with the same 

228 # collections, because we push new runs to the front of the chain. 

229 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

230 if c1 != c2: 

231 raise ValueError( 

232 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

233 "a different sequence of input collections than those given: " 

234 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

235 f"{self.output.name}={self.output.chain}." 

236 ) 

237 if len(self.inputs) > len(self.output.chain): 

238 nNew = len(self.inputs) - len(self.output.chain) 

239 raise ValueError( 

240 f"Cannot add new input collections {self.inputs[:nNew]} after " 

241 "output collection is first created." 

242 ) 

243 if args.extend_run: 

244 if self.outputRun is None: 

245 raise ValueError("Cannot --extend-run when no output collection is given.") 

246 elif not self.outputRun.exists: 

247 raise ValueError( 

248 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

249 ) 

250 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

251 raise ValueError( 

252 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

253 ) 

254 if args.prune_replaced and not args.replace_run: 

255 raise ValueError("--prune-replaced requires --replace-run.") 

256 if args.replace_run and (self.output is None or not self.output.exists): 

257 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

258 

259 @classmethod 

260 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, CollectionSearch, _ButlerFactory]: 

261 """Common implementation for `makeReadButler` and 

262 `makeButlerAndCollections`. 

263 

264 Parameters 

265 ---------- 

266 args : `types.SimpleNamespace` 

267 Parsed command-line arguments. See class documentation for the 

268 construction parameter of the same name. 

269 

270 Returns 

271 ------- 

272 butler : `lsst.daf.butler.Butler` 

273 A read-only butler constructed from the repo at 

274 ``args.butler_config``, but with no default collections. 

275 inputs : `lsst.daf.butler.registry.CollectionSearch` 

276 A collection search path constructed according to ``args``. 

277 self : `_ButlerFactory` 

278 A new `_ButlerFactory` instance representing the processed version 

279 of ``args``. 

280 """ 

281 butler = Butler(args.butler_config, writeable=False) 

282 self = cls(butler.registry, args, writeable=False) 

283 self.check(args) 

284 if self.output and self.output.exists: 

285 if args.replace_run: 

286 replaced = self.output.chain[0] 

287 inputs = list(self.output.chain[1:]) 

288 _LOG.debug( 

289 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

290 ) 

291 else: 

292 inputs = [self.output.name] 

293 else: 

294 inputs = list(self.inputs) 

295 if args.extend_run: 

296 assert self.outputRun is not None, "Output collection has to be specified." 

297 inputs.insert(0, self.outputRun.name) 

298 collSearch = CollectionSearch.fromExpression(inputs) 

299 return butler, collSearch, self 

300 

301 @classmethod 

302 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

303 """Construct a read-only butler according to the given command-line 

304 arguments. 

305 

306 Parameters 

307 ---------- 

308 args : `types.SimpleNamespace` 

309 Parsed command-line arguments. See class documentation for the 

310 construction parameter of the same name. 

311 

312 Returns 

313 ------- 

314 butler : `lsst.daf.butler.Butler` 

315 A read-only butler initialized with the collections specified by 

316 ``args``. 

317 """ 

318 butler, inputs, _ = cls._makeReadParts(args) 

319 _LOG.debug("Preparing butler to read from %s.", inputs) 

320 return Butler(butler=butler, collections=inputs) 

321 

322 @classmethod 

323 def makeButlerAndCollections( 

324 cls, args: SimpleNamespace 

325 ) -> Tuple[Butler, CollectionSearch, Optional[str]]: 

326 """Return a read-only registry, a collection search path, and the name 

327 of the run to be used for future writes. 

328 

329 Parameters 

330 ---------- 

331 args : `types.SimpleNamespace` 

332 Parsed command-line arguments. See class documentation for the 

333 construction parameter of the same name. 

334 

335 Returns 

336 ------- 

337 butler : `lsst.daf.butler.Butler` 

338 A read-only butler that collections will be added to and/or queried 

339 from. 

340 inputs : `lsst.daf.butler.registry.CollectionSearch` 

341 Collections to search for datasets. 

342 run : `str` or `None` 

343 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

344 if it already exists, or `None` if it does not. 

345 """ 

346 butler, inputs, self = cls._makeReadParts(args) 

347 run: Optional[str] = None 

348 if args.extend_run: 

349 assert self.outputRun is not None, "Output collection has to be specified." 

350 run = self.outputRun.name 

351 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

352 return butler, inputs, run 

353 

354 @classmethod 

355 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

356 """Return a read-write butler initialized to write to and read from 

357 the collections specified by the given command-line arguments. 

358 

359 Parameters 

360 ---------- 

361 args : `types.SimpleNamespace` 

362 Parsed command-line arguments. See class documentation for the 

363 construction parameter of the same name. 

364 taskDefs : iterable of `TaskDef`, optional 

365 Definitions for tasks in a pipeline. This argument is only needed 

366 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

367 "unstore". 

368 

369 Returns 

370 ------- 

371 butler : `lsst.daf.butler.Butler` 

372 A read-write butler initialized according to the given arguments. 

373 """ 

374 butler = Butler(args.butler_config, writeable=True) 

375 self = cls(butler.registry, args, writeable=True) 

376 self.check(args) 

377 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

378 if self.output is not None: 

379 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

380 if args.replace_run: 

381 replaced = chainDefinition.pop(0) 

382 if args.prune_replaced == "unstore": 

383 # Remove datasets from datastore 

384 with butler.transaction(): 

385 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

386 # we want to remove regular outputs but keep 

387 # initOutputs, configs, and versions. 

388 if taskDefs is not None: 

389 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

390 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

391 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

392 elif args.prune_replaced == "purge": 

393 # Erase entire collection and all datasets, need to remove 

394 # collection from its chain collection first. 

395 with butler.transaction(): 

396 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

397 butler.pruneCollection(replaced, purge=True, unstore=True) 

398 elif args.prune_replaced is not None: 

399 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

400 if not self.output.exists: 

401 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

402 if not args.extend_run: 

403 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

404 chainDefinition.insert(0, self.outputRun.name) 

405 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

406 _LOG.debug( 

407 "Preparing butler to write to '%s' and read from '%s'=%s", 

408 self.outputRun.name, 

409 self.output.name, 

410 chainDefinition, 

411 ) 

412 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

413 else: 

414 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

415 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

416 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

417 return butler 

418 

419 output: Optional[_OutputChainedCollectionInfo] 

420 """Information about the output chained collection, if there is or will be 

421 one (`_OutputChainedCollectionInfo` or `None`). 

422 """ 

423 

424 outputRun: Optional[_OutputRunCollectionInfo] 

425 """Information about the output run collection, if there is or will be 

426 one (`_OutputRunCollectionInfo` or `None`). 

427 """ 

428 

429 inputs: Tuple[str, ...] 

430 """Input collections provided directly by the user (`tuple` [ `str` ]). 

431 """ 

432 

433 

434# ------------------------ 

435# Exported definitions -- 

436# ------------------------ 

437 

438 

439class CmdLineFwk: 

440 """PipelineTask framework which executes tasks from command line. 

441 

442 In addition to executing tasks this activator provides additional methods 

443 for task management like dumping configuration or execution chain. 

444 """ 

445 

446 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

447 

448 def __init__(self) -> None: 

449 pass 

450 

451 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

452 """Build a pipeline from command line arguments. 

453 

454 Parameters 

455 ---------- 

456 args : `types.SimpleNamespace` 

457 Parsed command line 

458 

459 Returns 

460 ------- 

461 pipeline : `~lsst.pipe.base.Pipeline` 

462 """ 

463 if args.pipeline: 

464 pipeline = Pipeline.from_uri(args.pipeline) 

465 else: 

466 pipeline = Pipeline("anonymous") 

467 

468 # loop over all pipeline actions and apply them in order 

469 for action in args.pipeline_actions: 

470 if action.action == "add_instrument": 

471 

472 pipeline.addInstrument(action.value) 

473 

474 elif action.action == "new_task": 

475 

476 pipeline.addTask(action.value, action.label) 

477 

478 elif action.action == "delete_task": 

479 

480 pipeline.removeTask(action.label) 

481 

482 elif action.action == "config": 

483 

484 # action value string is "field=value", split it at '=' 

485 field, _, value = action.value.partition("=") 

486 pipeline.addConfigOverride(action.label, field, value) 

487 

488 elif action.action == "configfile": 

489 

490 pipeline.addConfigFile(action.label, action.value) 

491 

492 else: 

493 

494 raise ValueError(f"Unexpected pipeline action: {action.action}") 

495 

496 if args.save_pipeline: 

497 pipeline.write_to_uri(args.save_pipeline) 

498 

499 if args.pipeline_dot: 

500 pipeline2dot(pipeline, args.pipeline_dot) 

501 

502 return pipeline 

503 

504 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]: 

505 """Build a graph from command line arguments. 

506 

507 Parameters 

508 ---------- 

509 pipeline : `~lsst.pipe.base.Pipeline` 

510 Pipeline, can be empty or ``None`` if graph is read from a file. 

511 args : `types.SimpleNamespace` 

512 Parsed command line 

513 

514 Returns 

515 ------- 

516 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

517 If resulting graph is empty then `None` is returned. 

518 """ 

519 

520 # make sure that --extend-run always enables --skip-existing 

521 if args.extend_run: 

522 args.skip_existing = True 

523 

524 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

525 

526 if args.skip_existing and run: 

527 args.skip_existing_in += (run,) 

528 

529 if args.qgraph: 

530 # click passes empty tuple as default value for qgraph_node_id 

531 nodes = args.qgraph_node_id or None 

532 qgraph = QuantumGraph.loadUri( 

533 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

534 ) 

535 

536 # pipeline can not be provided in this case 

537 if pipeline: 

538 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

539 if args.show_qgraph_header: 

540 print(QuantumGraph.readHeader(args.qgraph)) 

541 else: 

542 # make execution plan (a.k.a. DAG) for pipeline 

543 graphBuilder = GraphBuilder( 

544 butler.registry, 

545 skipExistingIn=args.skip_existing_in, 

546 clobberOutputs=args.clobber_outputs, 

547 datastore=butler.datastore if args.qgraph_datastore_records else None, 

548 ) 

549 # accumulate metadata 

550 metadata = { 

551 "input": args.input, 

552 "output": args.output, 

553 "butler_argument": args.butler_config, 

554 "output_run": args.output_run, 

555 "extend_run": args.extend_run, 

556 "skip_existing_in": args.skip_existing_in, 

557 "skip_existing": args.skip_existing, 

558 "data_query": args.data_query, 

559 "user": getpass.getuser(), 

560 "time": f"{datetime.datetime.now()}", 

561 } 

562 qgraph = graphBuilder.makeGraph( 

563 pipeline, 

564 collections, 

565 run, 

566 args.data_query, 

567 metadata=metadata, 

568 datasetQueryConstraint=args.dataset_query_constraint, 

569 ) 

570 if args.show_qgraph_header: 

571 qgraph.buildAndPrintHeader() 

572 

573 # Count quanta in graph and give a warning if it's empty and return 

574 # None. 

575 nQuanta = len(qgraph) 

576 if nQuanta == 0: 

577 return None 

578 else: 

579 _LOG.info( 

580 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

581 nQuanta, 

582 len(qgraph.taskGraph), 

583 qgraph.graphID, 

584 ) 

585 

586 if args.save_qgraph: 

587 qgraph.saveUri(args.save_qgraph) 

588 

589 if args.save_single_quanta: 

590 for quantumNode in qgraph: 

591 sqgraph = qgraph.subset(quantumNode) 

592 uri = args.save_single_quanta.format(quantumNode) 

593 sqgraph.saveUri(uri) 

594 

595 if args.qgraph_dot: 

596 graph2dot(qgraph, args.qgraph_dot) 

597 

598 if args.execution_butler_location: 

599 butler = Butler(args.butler_config) 

600 newArgs = copy.deepcopy(args) 

601 

602 def builderShim(butler: Butler) -> Butler: 

603 newArgs.butler_config = butler._config 

604 # Calling makeWriteButler is done for the side effects of 

605 # calling that method, maining parsing all the args into 

606 # collection names, creating collections, etc. 

607 newButler = _ButlerFactory.makeWriteButler(newArgs) 

608 return newButler 

609 

610 # Include output collection in collections for input 

611 # files if it exists in the repo. 

612 all_inputs = args.input 

613 if args.output is not None: 

614 try: 

615 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

616 except MissingCollectionError: 

617 pass 

618 

619 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

620 buildExecutionButler( 

621 butler, 

622 qgraph, 

623 args.execution_butler_location, 

624 run, 

625 butlerModifier=builderShim, 

626 collections=all_inputs, 

627 clobber=args.clobber_execution_butler, 

628 datastoreRoot=args.target_datastore_root, 

629 transfer=args.transfer, 

630 ) 

631 

632 return qgraph 

633 

634 def runPipeline( 

635 self, 

636 graph: QuantumGraph, 

637 taskFactory: TaskFactory, 

638 args: SimpleNamespace, 

639 butler: Optional[Butler] = None, 

640 ) -> None: 

641 """Execute complete QuantumGraph. 

642 

643 Parameters 

644 ---------- 

645 graph : `QuantumGraph` 

646 Execution graph. 

647 taskFactory : `~lsst.pipe.base.TaskFactory` 

648 Task factory 

649 args : `types.SimpleNamespace` 

650 Parsed command line 

651 butler : `~lsst.daf.butler.Butler`, optional 

652 Data Butler instance, if not defined then new instance is made 

653 using command line options. 

654 """ 

655 # make sure that --extend-run always enables --skip-existing 

656 if args.extend_run: 

657 args.skip_existing = True 

658 

659 # make butler instance 

660 if butler is None: 

661 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

662 

663 if args.skip_existing: 

664 args.skip_existing_in += (butler.run,) 

665 

666 # Enable lsstDebug debugging. Note that this is done once in the 

667 # main process before PreExecInit and it is also repeated before 

668 # running each task in SingleQuantumExecutor (which may not be 

669 # needed if `multipocessing` always uses fork start method). 

670 if args.enableLsstDebug: 

671 try: 

672 _LOG.debug("Will try to import debug.py") 

673 import debug # type: ignore # noqa:F401 

674 except ImportError: 

675 _LOG.warn("No 'debug' module found.") 

676 

677 # Save all InitOutputs, configs, etc. 

678 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

679 preExecInit.initialize( 

680 graph, 

681 saveInitOutputs=not args.skip_init_writes, 

682 registerDatasetTypes=args.register_dataset_types, 

683 saveVersions=not args.no_versions, 

684 ) 

685 

686 if not args.init_only: 

687 graphFixup = self._importGraphFixup(args) 

688 quantumExecutor = SingleQuantumExecutor( 

689 taskFactory, 

690 skipExistingIn=args.skip_existing_in, 

691 clobberOutputs=args.clobber_outputs, 

692 enableLsstDebug=args.enableLsstDebug, 

693 exitOnKnownError=args.fail_fast, 

694 mock=args.mock, 

695 mock_configs=args.mock_configs, 

696 ) 

697 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

698 executor = MPGraphExecutor( 

699 numProc=args.processes, 

700 timeout=timeout, 

701 startMethod=args.start_method, 

702 quantumExecutor=quantumExecutor, 

703 failFast=args.fail_fast, 

704 pdb=args.pdb, 

705 executionGraphFixup=graphFixup, 

706 ) 

707 try: 

708 with util.profile(args.profile, _LOG): 

709 executor.execute(graph, butler) 

710 finally: 

711 if args.summary: 

712 report = executor.getReport() 

713 if report: 

714 with open(args.summary, "w") as out: 

715 # Do not save fields that are not set. 

716 out.write(report.json(exclude_none=True, indent=2)) 

717 

718 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]: 

719 """Import/instantiate graph fixup object. 

720 

721 Parameters 

722 ---------- 

723 args : `types.SimpleNamespace` 

724 Parsed command line. 

725 

726 Returns 

727 ------- 

728 fixup : `ExecutionGraphFixup` or `None` 

729 

730 Raises 

731 ------ 

732 ValueError 

733 Raised if import fails, method call raises exception, or returned 

734 instance has unexpected type. 

735 """ 

736 if args.graph_fixup: 

737 try: 

738 factory = doImportType(args.graph_fixup) 

739 except Exception as exc: 

740 raise ValueError("Failed to import graph fixup class/method") from exc 

741 try: 

742 fixup = factory() 

743 except Exception as exc: 

744 raise ValueError("Failed to make instance of graph fixup") from exc 

745 if not isinstance(fixup, ExecutionGraphFixup): 

746 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

747 return fixup 

748 return None