Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

288 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-08 02:03 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import copy 

31import datetime 

32import getpass 

33import logging 

34import shutil 

35from collections.abc import Iterable, Sequence 

36from types import SimpleNamespace 

37from typing import Optional, Tuple 

38 

39from lsst.daf.butler import Butler, CollectionType, DatasetRef, DatastoreCacheManager, Registry 

40from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

41from lsst.daf.butler.registry.wildcards import CollectionWildcard 

42from lsst.pipe.base import ( 

43 GraphBuilder, 

44 Instrument, 

45 Pipeline, 

46 PipelineDatasetTypes, 

47 QuantumGraph, 

48 TaskDef, 

49 TaskFactory, 

50 buildExecutionButler, 

51) 

52from lsst.utils import doImportType 

53 

54from . import util 

55from .dotTools import graph2dot, pipeline2dot 

56from .executionGraphFixup import ExecutionGraphFixup 

57from .mpGraphExecutor import MPGraphExecutor 

58from .preExecInit import PreExecInit 

59from .singleQuantumExecutor import SingleQuantumExecutor 

60 

61# ---------------------------------- 

62# Local non-exported definitions -- 

63# ---------------------------------- 

64 

65_LOG = logging.getLogger(__name__) 

66 

67 

68class _OutputChainedCollectionInfo: 

69 """A helper class for handling command-line arguments related to an output 

70 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

71 

72 Parameters 

73 ---------- 

74 registry : `lsst.daf.butler.Registry` 

75 Butler registry that collections will be added to and/or queried from. 

76 name : `str` 

77 Name of the collection given on the command line. 

78 """ 

79 

80 def __init__(self, registry: Registry, name: str): 

81 self.name = name 

82 try: 

83 self.chain = tuple(registry.getCollectionChain(name)) 

84 self.exists = True 

85 except MissingCollectionError: 

86 self.chain = () 

87 self.exists = False 

88 

89 def __str__(self) -> str: 

90 return self.name 

91 

92 name: str 

93 """Name of the collection provided on the command line (`str`). 

94 """ 

95 

96 exists: bool 

97 """Whether this collection already exists in the registry (`bool`). 

98 """ 

99 

100 chain: Tuple[str, ...] 

101 """The definition of the collection, if it already exists (`tuple`[`str`]). 

102 

103 Empty if the collection does not already exist. 

104 """ 

105 

106 

107class _OutputRunCollectionInfo: 

108 """A helper class for handling command-line arguments related to an output 

109 `~lsst.daf.butler.CollectionType.RUN` collection. 

110 

111 Parameters 

112 ---------- 

113 registry : `lsst.daf.butler.Registry` 

114 Butler registry that collections will be added to and/or queried from. 

115 name : `str` 

116 Name of the collection given on the command line. 

117 """ 

118 

119 def __init__(self, registry: Registry, name: str): 

120 self.name = name 

121 try: 

122 actualType = registry.getCollectionType(name) 

123 if actualType is not CollectionType.RUN: 

124 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

125 self.exists = True 

126 except MissingCollectionError: 

127 self.exists = False 

128 

129 name: str 

130 """Name of the collection provided on the command line (`str`). 

131 """ 

132 

133 exists: bool 

134 """Whether this collection already exists in the registry (`bool`). 

135 """ 

136 

137 

138class _ButlerFactory: 

139 """A helper class for processing command-line arguments related to input 

140 and output collections. 

141 

142 Parameters 

143 ---------- 

144 registry : `lsst.daf.butler.Registry` 

145 Butler registry that collections will be added to and/or queried from. 

146 

147 args : `types.SimpleNamespace` 

148 Parsed command-line arguments. The following attributes are used, 

149 either at construction or in later methods. 

150 

151 ``output`` 

152 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

153 input/output collection. 

154 

155 ``output_run`` 

156 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

157 collection. 

158 

159 ``extend_run`` 

160 A boolean indicating whether ``output_run`` should already exist 

161 and be extended. 

162 

163 ``replace_run`` 

164 A boolean indicating that (if `True`) ``output_run`` should already 

165 exist but will be removed from the output chained collection and 

166 replaced with a new one. 

167 

168 ``prune_replaced`` 

169 A boolean indicating whether to prune the replaced run (requires 

170 ``replace_run``). 

171 

172 ``inputs`` 

173 Input collections of any type; see 

174 :ref:`daf_butler_ordered_collection_searches` for details. 

175 

176 ``butler_config`` 

177 Path to a data repository root or configuration file. 

178 

179 writeable : `bool` 

180 If `True`, a `Butler` is being initialized in a context where actual 

181 writes should happens, and hence no output run is necessary. 

182 

183 Raises 

184 ------ 

185 ValueError 

186 Raised if ``writeable is True`` but there are no output collections. 

187 """ 

188 

189 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

190 if args.output is not None: 

191 self.output = _OutputChainedCollectionInfo(registry, args.output) 

192 else: 

193 self.output = None 

194 if args.output_run is not None: 

195 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

196 elif self.output is not None: 

197 if args.extend_run: 

198 if not self.output.chain: 

199 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

200 runName = self.output.chain[0] 

201 else: 

202 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

203 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

204 elif not writeable: 

205 # If we're not writing yet, ok to have no output run. 

206 self.outputRun = None 

207 else: 

208 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

209 # Recursively flatten any input CHAINED collections. We do this up 

210 # front so we can tell if the user passes the same inputs on subsequent 

211 # calls, even though we also flatten when we define the output CHAINED 

212 # collection. 

213 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

214 

215 def check(self, args: SimpleNamespace) -> None: 

216 """Check command-line options for consistency with each other and the 

217 data repository. 

218 

219 Parameters 

220 ---------- 

221 args : `types.SimpleNamespace` 

222 Parsed command-line arguments. See class documentation for the 

223 construction parameter of the same name. 

224 """ 

225 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

226 if self.inputs and self.output is not None and self.output.exists: 

227 # Passing the same inputs that were used to initialize the output 

228 # collection is allowed; this means they must _end_ with the same 

229 # collections, because we push new runs to the front of the chain. 

230 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

231 if c1 != c2: 

232 raise ValueError( 

233 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

234 "a different sequence of input collections than those given: " 

235 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

236 f"{self.output.name}={self.output.chain}." 

237 ) 

238 if len(self.inputs) > len(self.output.chain): 

239 nNew = len(self.inputs) - len(self.output.chain) 

240 raise ValueError( 

241 f"Cannot add new input collections {self.inputs[:nNew]} after " 

242 "output collection is first created." 

243 ) 

244 if args.extend_run: 

245 if self.outputRun is None: 

246 raise ValueError("Cannot --extend-run when no output collection is given.") 

247 elif not self.outputRun.exists: 

248 raise ValueError( 

249 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

250 ) 

251 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

252 raise ValueError( 

253 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

254 ) 

255 if args.prune_replaced and not args.replace_run: 

256 raise ValueError("--prune-replaced requires --replace-run.") 

257 if args.replace_run and (self.output is None or not self.output.exists): 

258 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

259 

260 @classmethod 

261 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

262 """Common implementation for `makeReadButler` and 

263 `makeButlerAndCollections`. 

264 

265 Parameters 

266 ---------- 

267 args : `types.SimpleNamespace` 

268 Parsed command-line arguments. See class documentation for the 

269 construction parameter of the same name. 

270 

271 Returns 

272 ------- 

273 butler : `lsst.daf.butler.Butler` 

274 A read-only butler constructed from the repo at 

275 ``args.butler_config``, but with no default collections. 

276 inputs : `Sequence` [ `str` ] 

277 A collection search path constructed according to ``args``. 

278 self : `_ButlerFactory` 

279 A new `_ButlerFactory` instance representing the processed version 

280 of ``args``. 

281 """ 

282 butler = Butler(args.butler_config, writeable=False) 

283 self = cls(butler.registry, args, writeable=False) 

284 self.check(args) 

285 if self.output and self.output.exists: 

286 if args.replace_run: 

287 replaced = self.output.chain[0] 

288 inputs = list(self.output.chain[1:]) 

289 _LOG.debug( 

290 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

291 ) 

292 else: 

293 inputs = [self.output.name] 

294 else: 

295 inputs = list(self.inputs) 

296 if args.extend_run: 

297 assert self.outputRun is not None, "Output collection has to be specified." 

298 inputs.insert(0, self.outputRun.name) 

299 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

300 return butler, collSearch, self 

301 

302 @classmethod 

303 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

304 """Construct a read-only butler according to the given command-line 

305 arguments. 

306 

307 Parameters 

308 ---------- 

309 args : `types.SimpleNamespace` 

310 Parsed command-line arguments. See class documentation for the 

311 construction parameter of the same name. 

312 

313 Returns 

314 ------- 

315 butler : `lsst.daf.butler.Butler` 

316 A read-only butler initialized with the collections specified by 

317 ``args``. 

318 """ 

319 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

320 butler, inputs, _ = cls._makeReadParts(args) 

321 _LOG.debug("Preparing butler to read from %s.", inputs) 

322 return Butler(butler=butler, collections=inputs) 

323 

324 @classmethod 

325 def makeButlerAndCollections(cls, args: SimpleNamespace) -> Tuple[Butler, Sequence[str], Optional[str]]: 

326 """Return a read-only registry, a collection search path, and the name 

327 of the run to be used for future writes. 

328 

329 Parameters 

330 ---------- 

331 args : `types.SimpleNamespace` 

332 Parsed command-line arguments. See class documentation for the 

333 construction parameter of the same name. 

334 

335 Returns 

336 ------- 

337 butler : `lsst.daf.butler.Butler` 

338 A read-only butler that collections will be added to and/or queried 

339 from. 

340 inputs : `Sequence` [ `str` ] 

341 Collections to search for datasets. 

342 run : `str` or `None` 

343 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

344 if it already exists, or `None` if it does not. 

345 """ 

346 butler, inputs, self = cls._makeReadParts(args) 

347 run: Optional[str] = None 

348 if args.extend_run: 

349 assert self.outputRun is not None, "Output collection has to be specified." 

350 run = self.outputRun.name 

351 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

352 return butler, inputs, run 

353 

354 @staticmethod 

355 def defineDatastoreCache() -> None: 

356 """Define where datastore cache directories should be found. 

357 

358 Notes 

359 ----- 

360 All the jobs should share a datastore cache if applicable. This 

361 method asks for a shared fallback cache to be defined and then 

362 configures an exit handler to clean it up. 

363 """ 

364 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

365 if defined: 

366 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

367 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

368 

369 @classmethod 

370 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

371 """Return a read-write butler initialized to write to and read from 

372 the collections specified by the given command-line arguments. 

373 

374 Parameters 

375 ---------- 

376 args : `types.SimpleNamespace` 

377 Parsed command-line arguments. See class documentation for the 

378 construction parameter of the same name. 

379 taskDefs : iterable of `TaskDef`, optional 

380 Definitions for tasks in a pipeline. This argument is only needed 

381 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

382 "unstore". 

383 

384 Returns 

385 ------- 

386 butler : `lsst.daf.butler.Butler` 

387 A read-write butler initialized according to the given arguments. 

388 """ 

389 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

390 butler = Butler(args.butler_config, writeable=True) 

391 self = cls(butler.registry, args, writeable=True) 

392 self.check(args) 

393 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

394 if self.output is not None: 

395 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

396 if args.replace_run: 

397 replaced = chainDefinition.pop(0) 

398 if args.prune_replaced == "unstore": 

399 # Remove datasets from datastore 

400 with butler.transaction(): 

401 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

402 # we want to remove regular outputs but keep 

403 # initOutputs, configs, and versions. 

404 if taskDefs is not None: 

405 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

406 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

407 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

408 elif args.prune_replaced == "purge": 

409 # Erase entire collection and all datasets, need to remove 

410 # collection from its chain collection first. 

411 with butler.transaction(): 

412 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

413 butler.pruneCollection(replaced, purge=True, unstore=True) 

414 elif args.prune_replaced is not None: 

415 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

416 if not self.output.exists: 

417 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

418 if not args.extend_run: 

419 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

420 chainDefinition.insert(0, self.outputRun.name) 

421 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

422 _LOG.debug( 

423 "Preparing butler to write to '%s' and read from '%s'=%s", 

424 self.outputRun.name, 

425 self.output.name, 

426 chainDefinition, 

427 ) 

428 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

429 else: 

430 inputs = (self.outputRun.name,) + self.inputs 

431 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

432 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

433 return butler 

434 

435 output: Optional[_OutputChainedCollectionInfo] 

436 """Information about the output chained collection, if there is or will be 

437 one (`_OutputChainedCollectionInfo` or `None`). 

438 """ 

439 

440 outputRun: Optional[_OutputRunCollectionInfo] 

441 """Information about the output run collection, if there is or will be 

442 one (`_OutputRunCollectionInfo` or `None`). 

443 """ 

444 

445 inputs: Tuple[str, ...] 

446 """Input collections provided directly by the user (`tuple` [ `str` ]). 

447 """ 

448 

449 

450# ------------------------ 

451# Exported definitions -- 

452# ------------------------ 

453 

454 

455class CmdLineFwk: 

456 """PipelineTask framework which executes tasks from command line. 

457 

458 In addition to executing tasks this activator provides additional methods 

459 for task management like dumping configuration or execution chain. 

460 """ 

461 

462 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

463 

464 def __init__(self) -> None: 

465 pass 

466 

467 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

468 """Build a pipeline from command line arguments. 

469 

470 Parameters 

471 ---------- 

472 args : `types.SimpleNamespace` 

473 Parsed command line 

474 

475 Returns 

476 ------- 

477 pipeline : `~lsst.pipe.base.Pipeline` 

478 """ 

479 if args.pipeline: 

480 pipeline = Pipeline.from_uri(args.pipeline) 

481 else: 

482 pipeline = Pipeline("anonymous") 

483 

484 # loop over all pipeline actions and apply them in order 

485 for action in args.pipeline_actions: 

486 if action.action == "add_instrument": 

487 

488 pipeline.addInstrument(action.value) 

489 

490 elif action.action == "new_task": 

491 

492 pipeline.addTask(action.value, action.label) 

493 

494 elif action.action == "delete_task": 

495 

496 pipeline.removeTask(action.label) 

497 

498 elif action.action == "config": 

499 

500 # action value string is "field=value", split it at '=' 

501 field, _, value = action.value.partition("=") 

502 pipeline.addConfigOverride(action.label, field, value) 

503 

504 elif action.action == "configfile": 

505 

506 pipeline.addConfigFile(action.label, action.value) 

507 

508 else: 

509 

510 raise ValueError(f"Unexpected pipeline action: {action.action}") 

511 

512 if args.save_pipeline: 

513 pipeline.write_to_uri(args.save_pipeline) 

514 

515 if args.pipeline_dot: 

516 pipeline2dot(pipeline, args.pipeline_dot) 

517 

518 return pipeline 

519 

520 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]: 

521 """Build a graph from command line arguments. 

522 

523 Parameters 

524 ---------- 

525 pipeline : `~lsst.pipe.base.Pipeline` 

526 Pipeline, can be empty or ``None`` if graph is read from a file. 

527 args : `types.SimpleNamespace` 

528 Parsed command line 

529 

530 Returns 

531 ------- 

532 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

533 If resulting graph is empty then `None` is returned. 

534 """ 

535 

536 # make sure that --extend-run always enables --skip-existing 

537 if args.extend_run: 

538 args.skip_existing = True 

539 

540 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

541 

542 if args.skip_existing and run: 

543 args.skip_existing_in += (run,) 

544 

545 if args.qgraph: 

546 # click passes empty tuple as default value for qgraph_node_id 

547 nodes = args.qgraph_node_id or None 

548 qgraph = QuantumGraph.loadUri( 

549 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

550 ) 

551 

552 # pipeline can not be provided in this case 

553 if pipeline: 

554 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

555 if args.show_qgraph_header: 

556 print(QuantumGraph.readHeader(args.qgraph)) 

557 else: 

558 # make execution plan (a.k.a. DAG) for pipeline 

559 graphBuilder = GraphBuilder( 

560 butler.registry, 

561 skipExistingIn=args.skip_existing_in, 

562 clobberOutputs=args.clobber_outputs, 

563 datastore=butler.datastore if args.qgraph_datastore_records else None, 

564 ) 

565 # accumulate metadata 

566 metadata = { 

567 "input": args.input, 

568 "output": args.output, 

569 "butler_argument": args.butler_config, 

570 "output_run": args.output_run, 

571 "extend_run": args.extend_run, 

572 "skip_existing_in": args.skip_existing_in, 

573 "skip_existing": args.skip_existing, 

574 "data_query": args.data_query, 

575 "user": getpass.getuser(), 

576 "time": f"{datetime.datetime.now()}", 

577 } 

578 qgraph = graphBuilder.makeGraph( 

579 pipeline, 

580 collections, 

581 run, 

582 args.data_query, 

583 metadata=metadata, 

584 datasetQueryConstraint=args.dataset_query_constraint, 

585 ) 

586 if args.show_qgraph_header: 

587 qgraph.buildAndPrintHeader() 

588 

589 # Count quanta in graph and give a warning if it's empty and return 

590 # None. 

591 nQuanta = len(qgraph) 

592 if nQuanta == 0: 

593 return None 

594 else: 

595 _LOG.info( 

596 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

597 nQuanta, 

598 len(qgraph.taskGraph), 

599 qgraph.graphID, 

600 ) 

601 

602 if args.save_qgraph: 

603 qgraph.saveUri(args.save_qgraph) 

604 

605 if args.save_single_quanta: 

606 for quantumNode in qgraph: 

607 sqgraph = qgraph.subset(quantumNode) 

608 uri = args.save_single_quanta.format(quantumNode) 

609 sqgraph.saveUri(uri) 

610 

611 if args.qgraph_dot: 

612 graph2dot(qgraph, args.qgraph_dot) 

613 

614 if args.execution_butler_location: 

615 butler = Butler(args.butler_config) 

616 newArgs = copy.deepcopy(args) 

617 

618 def builderShim(butler: Butler) -> Butler: 

619 newArgs.butler_config = butler._config 

620 # Calling makeWriteButler is done for the side effects of 

621 # calling that method, maining parsing all the args into 

622 # collection names, creating collections, etc. 

623 newButler = _ButlerFactory.makeWriteButler(newArgs) 

624 return newButler 

625 

626 # Include output collection in collections for input 

627 # files if it exists in the repo. 

628 all_inputs = args.input 

629 if args.output is not None: 

630 try: 

631 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

632 except MissingCollectionError: 

633 pass 

634 

635 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

636 buildExecutionButler( 

637 butler, 

638 qgraph, 

639 args.execution_butler_location, 

640 run, 

641 butlerModifier=builderShim, 

642 collections=all_inputs, 

643 clobber=args.clobber_execution_butler, 

644 datastoreRoot=args.target_datastore_root, 

645 transfer=args.transfer, 

646 ) 

647 

648 return qgraph 

649 

650 def runPipeline( 

651 self, 

652 graph: QuantumGraph, 

653 taskFactory: TaskFactory, 

654 args: SimpleNamespace, 

655 butler: Optional[Butler] = None, 

656 ) -> None: 

657 """Execute complete QuantumGraph. 

658 

659 Parameters 

660 ---------- 

661 graph : `QuantumGraph` 

662 Execution graph. 

663 taskFactory : `~lsst.pipe.base.TaskFactory` 

664 Task factory 

665 args : `types.SimpleNamespace` 

666 Parsed command line 

667 butler : `~lsst.daf.butler.Butler`, optional 

668 Data Butler instance, if not defined then new instance is made 

669 using command line options. 

670 """ 

671 # make sure that --extend-run always enables --skip-existing 

672 if args.extend_run: 

673 args.skip_existing = True 

674 

675 # make butler instance 

676 if butler is None: 

677 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

678 

679 if args.skip_existing: 

680 args.skip_existing_in += (butler.run,) 

681 

682 # Enable lsstDebug debugging. Note that this is done once in the 

683 # main process before PreExecInit and it is also repeated before 

684 # running each task in SingleQuantumExecutor (which may not be 

685 # needed if `multipocessing` always uses fork start method). 

686 if args.enableLsstDebug: 

687 try: 

688 _LOG.debug("Will try to import debug.py") 

689 import debug # type: ignore # noqa:F401 

690 except ImportError: 

691 _LOG.warn("No 'debug' module found.") 

692 

693 # Save all InitOutputs, configs, etc. 

694 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

695 preExecInit.initialize( 

696 graph, 

697 saveInitOutputs=not args.skip_init_writes, 

698 registerDatasetTypes=args.register_dataset_types, 

699 saveVersions=not args.no_versions, 

700 ) 

701 

702 if not args.init_only: 

703 graphFixup = self._importGraphFixup(args) 

704 quantumExecutor = SingleQuantumExecutor( 

705 taskFactory, 

706 skipExistingIn=args.skip_existing_in, 

707 clobberOutputs=args.clobber_outputs, 

708 enableLsstDebug=args.enableLsstDebug, 

709 exitOnKnownError=args.fail_fast, 

710 mock=args.mock, 

711 mock_configs=args.mock_configs, 

712 ) 

713 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

714 executor = MPGraphExecutor( 

715 numProc=args.processes, 

716 timeout=timeout, 

717 startMethod=args.start_method, 

718 quantumExecutor=quantumExecutor, 

719 failFast=args.fail_fast, 

720 pdb=args.pdb, 

721 executionGraphFixup=graphFixup, 

722 ) 

723 try: 

724 with util.profile(args.profile, _LOG): 

725 executor.execute(graph, butler) 

726 finally: 

727 if args.summary: 

728 report = executor.getReport() 

729 if report: 

730 with open(args.summary, "w") as out: 

731 # Do not save fields that are not set. 

732 out.write(report.json(exclude_none=True, indent=2)) 

733 

734 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]: 

735 """Import/instantiate graph fixup object. 

736 

737 Parameters 

738 ---------- 

739 args : `types.SimpleNamespace` 

740 Parsed command line. 

741 

742 Returns 

743 ------- 

744 fixup : `ExecutionGraphFixup` or `None` 

745 

746 Raises 

747 ------ 

748 ValueError 

749 Raised if import fails, method call raises exception, or returned 

750 instance has unexpected type. 

751 """ 

752 if args.graph_fixup: 

753 try: 

754 factory = doImportType(args.graph_fixup) 

755 except Exception as exc: 

756 raise ValueError("Failed to import graph fixup class/method") from exc 

757 try: 

758 fixup = factory() 

759 except Exception as exc: 

760 raise ValueError("Failed to make instance of graph fixup") from exc 

761 if not isinstance(fixup, ExecutionGraphFixup): 

762 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

763 return fixup 

764 return None