Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

299 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-29 02:35 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import copy 

31import datetime 

32import getpass 

33import logging 

34import shutil 

35from collections.abc import Iterable, Sequence 

36from types import SimpleNamespace 

37from typing import Optional, Tuple 

38 

39from astropy.table import Table 

40from lsst.daf.butler import Butler, CollectionType, DatasetRef, DatastoreCacheManager, Registry 

41from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

42from lsst.daf.butler.registry.wildcards import CollectionWildcard 

43from lsst.pipe.base import ( 

44 GraphBuilder, 

45 Instrument, 

46 Pipeline, 

47 PipelineDatasetTypes, 

48 QuantumGraph, 

49 TaskDef, 

50 TaskFactory, 

51 buildExecutionButler, 

52) 

53from lsst.utils import doImportType 

54 

55from . import util 

56from .dotTools import graph2dot, pipeline2dot 

57from .executionGraphFixup import ExecutionGraphFixup 

58from .mpGraphExecutor import MPGraphExecutor 

59from .preExecInit import PreExecInit 

60from .singleQuantumExecutor import SingleQuantumExecutor 

61 

62# ---------------------------------- 

63# Local non-exported definitions -- 

64# ---------------------------------- 

65 

66_LOG = logging.getLogger(__name__) 

67 

68 

69class _OutputChainedCollectionInfo: 

70 """A helper class for handling command-line arguments related to an output 

71 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

72 

73 Parameters 

74 ---------- 

75 registry : `lsst.daf.butler.Registry` 

76 Butler registry that collections will be added to and/or queried from. 

77 name : `str` 

78 Name of the collection given on the command line. 

79 """ 

80 

81 def __init__(self, registry: Registry, name: str): 

82 self.name = name 

83 try: 

84 self.chain = tuple(registry.getCollectionChain(name)) 

85 self.exists = True 

86 except MissingCollectionError: 

87 self.chain = () 

88 self.exists = False 

89 

90 def __str__(self) -> str: 

91 return self.name 

92 

93 name: str 

94 """Name of the collection provided on the command line (`str`). 

95 """ 

96 

97 exists: bool 

98 """Whether this collection already exists in the registry (`bool`). 

99 """ 

100 

101 chain: Tuple[str, ...] 

102 """The definition of the collection, if it already exists (`tuple`[`str`]). 

103 

104 Empty if the collection does not already exist. 

105 """ 

106 

107 

108class _OutputRunCollectionInfo: 

109 """A helper class for handling command-line arguments related to an output 

110 `~lsst.daf.butler.CollectionType.RUN` collection. 

111 

112 Parameters 

113 ---------- 

114 registry : `lsst.daf.butler.Registry` 

115 Butler registry that collections will be added to and/or queried from. 

116 name : `str` 

117 Name of the collection given on the command line. 

118 """ 

119 

120 def __init__(self, registry: Registry, name: str): 

121 self.name = name 

122 try: 

123 actualType = registry.getCollectionType(name) 

124 if actualType is not CollectionType.RUN: 

125 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

126 self.exists = True 

127 except MissingCollectionError: 

128 self.exists = False 

129 

130 name: str 

131 """Name of the collection provided on the command line (`str`). 

132 """ 

133 

134 exists: bool 

135 """Whether this collection already exists in the registry (`bool`). 

136 """ 

137 

138 

139class _ButlerFactory: 

140 """A helper class for processing command-line arguments related to input 

141 and output collections. 

142 

143 Parameters 

144 ---------- 

145 registry : `lsst.daf.butler.Registry` 

146 Butler registry that collections will be added to and/or queried from. 

147 

148 args : `types.SimpleNamespace` 

149 Parsed command-line arguments. The following attributes are used, 

150 either at construction or in later methods. 

151 

152 ``output`` 

153 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

154 input/output collection. 

155 

156 ``output_run`` 

157 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

158 collection. 

159 

160 ``extend_run`` 

161 A boolean indicating whether ``output_run`` should already exist 

162 and be extended. 

163 

164 ``replace_run`` 

165 A boolean indicating that (if `True`) ``output_run`` should already 

166 exist but will be removed from the output chained collection and 

167 replaced with a new one. 

168 

169 ``prune_replaced`` 

170 A boolean indicating whether to prune the replaced run (requires 

171 ``replace_run``). 

172 

173 ``inputs`` 

174 Input collections of any type; see 

175 :ref:`daf_butler_ordered_collection_searches` for details. 

176 

177 ``butler_config`` 

178 Path to a data repository root or configuration file. 

179 

180 writeable : `bool` 

181 If `True`, a `Butler` is being initialized in a context where actual 

182 writes should happens, and hence no output run is necessary. 

183 

184 Raises 

185 ------ 

186 ValueError 

187 Raised if ``writeable is True`` but there are no output collections. 

188 """ 

189 

190 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

191 if args.output is not None: 

192 self.output = _OutputChainedCollectionInfo(registry, args.output) 

193 else: 

194 self.output = None 

195 if args.output_run is not None: 

196 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

197 elif self.output is not None: 

198 if args.extend_run: 

199 if not self.output.chain: 

200 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

201 runName = self.output.chain[0] 

202 else: 

203 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

204 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

205 elif not writeable: 

206 # If we're not writing yet, ok to have no output run. 

207 self.outputRun = None 

208 else: 

209 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

210 # Recursively flatten any input CHAINED collections. We do this up 

211 # front so we can tell if the user passes the same inputs on subsequent 

212 # calls, even though we also flatten when we define the output CHAINED 

213 # collection. 

214 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

215 

216 def check(self, args: SimpleNamespace) -> None: 

217 """Check command-line options for consistency with each other and the 

218 data repository. 

219 

220 Parameters 

221 ---------- 

222 args : `types.SimpleNamespace` 

223 Parsed command-line arguments. See class documentation for the 

224 construction parameter of the same name. 

225 """ 

226 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

227 if self.inputs and self.output is not None and self.output.exists: 

228 # Passing the same inputs that were used to initialize the output 

229 # collection is allowed; this means they must _end_ with the same 

230 # collections, because we push new runs to the front of the chain. 

231 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

232 if c1 != c2: 

233 raise ValueError( 

234 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

235 "a different sequence of input collections than those given: " 

236 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

237 f"{self.output.name}={self.output.chain}." 

238 ) 

239 if len(self.inputs) > len(self.output.chain): 

240 nNew = len(self.inputs) - len(self.output.chain) 

241 raise ValueError( 

242 f"Cannot add new input collections {self.inputs[:nNew]} after " 

243 "output collection is first created." 

244 ) 

245 if args.extend_run: 

246 if self.outputRun is None: 

247 raise ValueError("Cannot --extend-run when no output collection is given.") 

248 elif not self.outputRun.exists: 

249 raise ValueError( 

250 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

251 ) 

252 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

253 raise ValueError( 

254 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

255 ) 

256 if args.prune_replaced and not args.replace_run: 

257 raise ValueError("--prune-replaced requires --replace-run.") 

258 if args.replace_run and (self.output is None or not self.output.exists): 

259 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

260 

261 @classmethod 

262 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

263 """Common implementation for `makeReadButler` and 

264 `makeButlerAndCollections`. 

265 

266 Parameters 

267 ---------- 

268 args : `types.SimpleNamespace` 

269 Parsed command-line arguments. See class documentation for the 

270 construction parameter of the same name. 

271 

272 Returns 

273 ------- 

274 butler : `lsst.daf.butler.Butler` 

275 A read-only butler constructed from the repo at 

276 ``args.butler_config``, but with no default collections. 

277 inputs : `Sequence` [ `str` ] 

278 A collection search path constructed according to ``args``. 

279 self : `_ButlerFactory` 

280 A new `_ButlerFactory` instance representing the processed version 

281 of ``args``. 

282 """ 

283 butler = Butler(args.butler_config, writeable=False) 

284 self = cls(butler.registry, args, writeable=False) 

285 self.check(args) 

286 if self.output and self.output.exists: 

287 if args.replace_run: 

288 replaced = self.output.chain[0] 

289 inputs = list(self.output.chain[1:]) 

290 _LOG.debug( 

291 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

292 ) 

293 else: 

294 inputs = [self.output.name] 

295 else: 

296 inputs = list(self.inputs) 

297 if args.extend_run: 

298 assert self.outputRun is not None, "Output collection has to be specified." 

299 inputs.insert(0, self.outputRun.name) 

300 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

301 return butler, collSearch, self 

302 

303 @classmethod 

304 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

305 """Construct a read-only butler according to the given command-line 

306 arguments. 

307 

308 Parameters 

309 ---------- 

310 args : `types.SimpleNamespace` 

311 Parsed command-line arguments. See class documentation for the 

312 construction parameter of the same name. 

313 

314 Returns 

315 ------- 

316 butler : `lsst.daf.butler.Butler` 

317 A read-only butler initialized with the collections specified by 

318 ``args``. 

319 """ 

320 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

321 butler, inputs, _ = cls._makeReadParts(args) 

322 _LOG.debug("Preparing butler to read from %s.", inputs) 

323 return Butler(butler=butler, collections=inputs) 

324 

325 @classmethod 

326 def makeButlerAndCollections(cls, args: SimpleNamespace) -> Tuple[Butler, Sequence[str], Optional[str]]: 

327 """Return a read-only registry, a collection search path, and the name 

328 of the run to be used for future writes. 

329 

330 Parameters 

331 ---------- 

332 args : `types.SimpleNamespace` 

333 Parsed command-line arguments. See class documentation for the 

334 construction parameter of the same name. 

335 

336 Returns 

337 ------- 

338 butler : `lsst.daf.butler.Butler` 

339 A read-only butler that collections will be added to and/or queried 

340 from. 

341 inputs : `Sequence` [ `str` ] 

342 Collections to search for datasets. 

343 run : `str` or `None` 

344 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

345 if it already exists, or `None` if it does not. 

346 """ 

347 butler, inputs, self = cls._makeReadParts(args) 

348 run: Optional[str] = None 

349 if args.extend_run: 

350 assert self.outputRun is not None, "Output collection has to be specified." 

351 run = self.outputRun.name 

352 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

353 return butler, inputs, run 

354 

355 @staticmethod 

356 def defineDatastoreCache() -> None: 

357 """Define where datastore cache directories should be found. 

358 

359 Notes 

360 ----- 

361 All the jobs should share a datastore cache if applicable. This 

362 method asks for a shared fallback cache to be defined and then 

363 configures an exit handler to clean it up. 

364 """ 

365 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

366 if defined: 

367 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

368 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

369 

370 @classmethod 

371 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

372 """Return a read-write butler initialized to write to and read from 

373 the collections specified by the given command-line arguments. 

374 

375 Parameters 

376 ---------- 

377 args : `types.SimpleNamespace` 

378 Parsed command-line arguments. See class documentation for the 

379 construction parameter of the same name. 

380 taskDefs : iterable of `TaskDef`, optional 

381 Definitions for tasks in a pipeline. This argument is only needed 

382 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

383 "unstore". 

384 

385 Returns 

386 ------- 

387 butler : `lsst.daf.butler.Butler` 

388 A read-write butler initialized according to the given arguments. 

389 """ 

390 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

391 butler = Butler(args.butler_config, writeable=True) 

392 self = cls(butler.registry, args, writeable=True) 

393 self.check(args) 

394 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

395 if self.output is not None: 

396 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

397 if args.replace_run: 

398 replaced = chainDefinition.pop(0) 

399 if args.prune_replaced == "unstore": 

400 # Remove datasets from datastore 

401 with butler.transaction(): 

402 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

403 # we want to remove regular outputs but keep 

404 # initOutputs, configs, and versions. 

405 if taskDefs is not None: 

406 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

407 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

408 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

409 elif args.prune_replaced == "purge": 

410 # Erase entire collection and all datasets, need to remove 

411 # collection from its chain collection first. 

412 with butler.transaction(): 

413 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

414 butler.pruneCollection(replaced, purge=True, unstore=True) 

415 elif args.prune_replaced is not None: 

416 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

417 if not self.output.exists: 

418 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

419 if not args.extend_run: 

420 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

421 chainDefinition.insert(0, self.outputRun.name) 

422 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

423 _LOG.debug( 

424 "Preparing butler to write to '%s' and read from '%s'=%s", 

425 self.outputRun.name, 

426 self.output.name, 

427 chainDefinition, 

428 ) 

429 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

430 else: 

431 inputs = (self.outputRun.name,) + self.inputs 

432 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

433 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

434 return butler 

435 

436 output: Optional[_OutputChainedCollectionInfo] 

437 """Information about the output chained collection, if there is or will be 

438 one (`_OutputChainedCollectionInfo` or `None`). 

439 """ 

440 

441 outputRun: Optional[_OutputRunCollectionInfo] 

442 """Information about the output run collection, if there is or will be 

443 one (`_OutputRunCollectionInfo` or `None`). 

444 """ 

445 

446 inputs: Tuple[str, ...] 

447 """Input collections provided directly by the user (`tuple` [ `str` ]). 

448 """ 

449 

450 

451# ------------------------ 

452# Exported definitions -- 

453# ------------------------ 

454 

455 

456class CmdLineFwk: 

457 """PipelineTask framework which executes tasks from command line. 

458 

459 In addition to executing tasks this activator provides additional methods 

460 for task management like dumping configuration or execution chain. 

461 """ 

462 

463 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

464 

465 def __init__(self) -> None: 

466 pass 

467 

468 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

469 """Build a pipeline from command line arguments. 

470 

471 Parameters 

472 ---------- 

473 args : `types.SimpleNamespace` 

474 Parsed command line 

475 

476 Returns 

477 ------- 

478 pipeline : `~lsst.pipe.base.Pipeline` 

479 """ 

480 if args.pipeline: 

481 pipeline = Pipeline.from_uri(args.pipeline) 

482 else: 

483 pipeline = Pipeline("anonymous") 

484 

485 # loop over all pipeline actions and apply them in order 

486 for action in args.pipeline_actions: 

487 if action.action == "add_instrument": 

488 

489 pipeline.addInstrument(action.value) 

490 

491 elif action.action == "new_task": 

492 

493 pipeline.addTask(action.value, action.label) 

494 

495 elif action.action == "delete_task": 

496 

497 pipeline.removeTask(action.label) 

498 

499 elif action.action == "config": 

500 

501 # action value string is "field=value", split it at '=' 

502 field, _, value = action.value.partition("=") 

503 pipeline.addConfigOverride(action.label, field, value) 

504 

505 elif action.action == "configfile": 

506 

507 pipeline.addConfigFile(action.label, action.value) 

508 

509 else: 

510 

511 raise ValueError(f"Unexpected pipeline action: {action.action}") 

512 

513 if args.save_pipeline: 

514 pipeline.write_to_uri(args.save_pipeline) 

515 

516 if args.pipeline_dot: 

517 pipeline2dot(pipeline, args.pipeline_dot) 

518 

519 return pipeline 

520 

521 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]: 

522 """Build a graph from command line arguments. 

523 

524 Parameters 

525 ---------- 

526 pipeline : `~lsst.pipe.base.Pipeline` 

527 Pipeline, can be empty or ``None`` if graph is read from a file. 

528 args : `types.SimpleNamespace` 

529 Parsed command line 

530 

531 Returns 

532 ------- 

533 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

534 If resulting graph is empty then `None` is returned. 

535 """ 

536 

537 # make sure that --extend-run always enables --skip-existing 

538 if args.extend_run: 

539 args.skip_existing = True 

540 

541 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

542 

543 if args.skip_existing and run: 

544 args.skip_existing_in += (run,) 

545 

546 if args.qgraph: 

547 # click passes empty tuple as default value for qgraph_node_id 

548 nodes = args.qgraph_node_id or None 

549 qgraph = QuantumGraph.loadUri( 

550 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

551 ) 

552 

553 # pipeline can not be provided in this case 

554 if pipeline: 

555 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

556 if args.show_qgraph_header: 

557 print(QuantumGraph.readHeader(args.qgraph)) 

558 else: 

559 # make execution plan (a.k.a. DAG) for pipeline 

560 graphBuilder = GraphBuilder( 

561 butler.registry, 

562 skipExistingIn=args.skip_existing_in, 

563 clobberOutputs=args.clobber_outputs, 

564 datastore=butler.datastore if args.qgraph_datastore_records else None, 

565 ) 

566 # accumulate metadata 

567 metadata = { 

568 "input": args.input, 

569 "output": args.output, 

570 "butler_argument": args.butler_config, 

571 "output_run": args.output_run, 

572 "extend_run": args.extend_run, 

573 "skip_existing_in": args.skip_existing_in, 

574 "skip_existing": args.skip_existing, 

575 "data_query": args.data_query, 

576 "user": getpass.getuser(), 

577 "time": f"{datetime.datetime.now()}", 

578 } 

579 qgraph = graphBuilder.makeGraph( 

580 pipeline, 

581 collections, 

582 run, 

583 args.data_query, 

584 metadata=metadata, 

585 datasetQueryConstraint=args.dataset_query_constraint, 

586 ) 

587 if args.show_qgraph_header: 

588 qgraph.buildAndPrintHeader() 

589 

590 # Count quanta in graph; give a warning if it's empty and return None. 

591 nQuanta = len(qgraph) 

592 if nQuanta == 0: 

593 return None 

594 else: 

595 if _LOG.isEnabledFor(logging.INFO): 

596 qg_task_table = self._generateTaskTable(qgraph) 

597 _LOG.info( 

598 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r\n%s", 

599 nQuanta, 

600 len(qgraph.taskGraph), 

601 qgraph.graphID, 

602 str(qg_task_table), 

603 ) 

604 

605 if args.save_qgraph: 

606 qgraph.saveUri(args.save_qgraph) 

607 

608 if args.save_single_quanta: 

609 for quantumNode in qgraph: 

610 sqgraph = qgraph.subset(quantumNode) 

611 uri = args.save_single_quanta.format(quantumNode) 

612 sqgraph.saveUri(uri) 

613 

614 if args.qgraph_dot: 

615 graph2dot(qgraph, args.qgraph_dot) 

616 

617 if args.execution_butler_location: 

618 butler = Butler(args.butler_config) 

619 newArgs = copy.deepcopy(args) 

620 

621 def builderShim(butler: Butler) -> Butler: 

622 newArgs.butler_config = butler._config 

623 # Calling makeWriteButler is done for the side effects of 

624 # calling that method, maining parsing all the args into 

625 # collection names, creating collections, etc. 

626 newButler = _ButlerFactory.makeWriteButler(newArgs) 

627 return newButler 

628 

629 # Include output collection in collections for input 

630 # files if it exists in the repo. 

631 all_inputs = args.input 

632 if args.output is not None: 

633 try: 

634 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

635 except MissingCollectionError: 

636 pass 

637 

638 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

639 buildExecutionButler( 

640 butler, 

641 qgraph, 

642 args.execution_butler_location, 

643 run, 

644 butlerModifier=builderShim, 

645 collections=all_inputs, 

646 clobber=args.clobber_execution_butler, 

647 datastoreRoot=args.target_datastore_root, 

648 transfer=args.transfer, 

649 ) 

650 

651 return qgraph 

652 

653 def runPipeline( 

654 self, 

655 graph: QuantumGraph, 

656 taskFactory: TaskFactory, 

657 args: SimpleNamespace, 

658 butler: Optional[Butler] = None, 

659 ) -> None: 

660 """Execute complete QuantumGraph. 

661 

662 Parameters 

663 ---------- 

664 graph : `QuantumGraph` 

665 Execution graph. 

666 taskFactory : `~lsst.pipe.base.TaskFactory` 

667 Task factory 

668 args : `types.SimpleNamespace` 

669 Parsed command line 

670 butler : `~lsst.daf.butler.Butler`, optional 

671 Data Butler instance, if not defined then new instance is made 

672 using command line options. 

673 """ 

674 # make sure that --extend-run always enables --skip-existing 

675 if args.extend_run: 

676 args.skip_existing = True 

677 

678 # make butler instance 

679 if butler is None: 

680 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

681 

682 if args.skip_existing: 

683 args.skip_existing_in += (butler.run,) 

684 

685 # Enable lsstDebug debugging. Note that this is done once in the 

686 # main process before PreExecInit and it is also repeated before 

687 # running each task in SingleQuantumExecutor (which may not be 

688 # needed if `multipocessing` always uses fork start method). 

689 if args.enableLsstDebug: 

690 try: 

691 _LOG.debug("Will try to import debug.py") 

692 import debug # type: ignore # noqa:F401 

693 except ImportError: 

694 _LOG.warn("No 'debug' module found.") 

695 

696 # Save all InitOutputs, configs, etc. 

697 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

698 preExecInit.initialize( 

699 graph, 

700 saveInitOutputs=not args.skip_init_writes, 

701 registerDatasetTypes=args.register_dataset_types, 

702 saveVersions=not args.no_versions, 

703 ) 

704 

705 if not args.init_only: 

706 graphFixup = self._importGraphFixup(args) 

707 quantumExecutor = SingleQuantumExecutor( 

708 taskFactory, 

709 skipExistingIn=args.skip_existing_in, 

710 clobberOutputs=args.clobber_outputs, 

711 enableLsstDebug=args.enableLsstDebug, 

712 exitOnKnownError=args.fail_fast, 

713 mock=args.mock, 

714 mock_configs=args.mock_configs, 

715 ) 

716 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

717 executor = MPGraphExecutor( 

718 numProc=args.processes, 

719 timeout=timeout, 

720 startMethod=args.start_method, 

721 quantumExecutor=quantumExecutor, 

722 failFast=args.fail_fast, 

723 pdb=args.pdb, 

724 executionGraphFixup=graphFixup, 

725 ) 

726 try: 

727 with util.profile(args.profile, _LOG): 

728 executor.execute(graph, butler) 

729 finally: 

730 if args.summary: 

731 report = executor.getReport() 

732 if report: 

733 with open(args.summary, "w") as out: 

734 # Do not save fields that are not set. 

735 out.write(report.json(exclude_none=True, indent=2)) 

736 

737 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

738 """Generate astropy table listing the number of quanta per task for a 

739 given quantum graph. 

740 

741 Parameters 

742 ---------- 

743 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

744 A QuantumGraph object. 

745 

746 Returns 

747 ------- 

748 qg_task_table : `astropy.table.table.Table` 

749 An astropy table containing columns: Quanta and Tasks. 

750 """ 

751 qg_quanta, qg_tasks = [], [] 

752 for task_def in qgraph.iterTaskGraph(): 

753 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

754 qg_quanta.append(num_qnodes) 

755 qg_tasks.append(task_def.label) 

756 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

757 return qg_task_table 

758 

759 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]: 

760 """Import/instantiate graph fixup object. 

761 

762 Parameters 

763 ---------- 

764 args : `types.SimpleNamespace` 

765 Parsed command line. 

766 

767 Returns 

768 ------- 

769 fixup : `ExecutionGraphFixup` or `None` 

770 

771 Raises 

772 ------ 

773 ValueError 

774 Raised if import fails, method call raises exception, or returned 

775 instance has unexpected type. 

776 """ 

777 if args.graph_fixup: 

778 try: 

779 factory = doImportType(args.graph_fixup) 

780 except Exception as exc: 

781 raise ValueError("Failed to import graph fixup class/method") from exc 

782 try: 

783 fixup = factory() 

784 except Exception as exc: 

785 raise ValueError("Failed to make instance of graph fixup") from exc 

786 if not isinstance(fixup, ExecutionGraphFixup): 

787 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

788 return fixup 

789 return None