Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 12%

423 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-06 13:01 -0800

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import copy 

33import datetime 

34import fnmatch 

35import getpass 

36import logging 

37import re 

38import sys 

39import warnings 

40from types import SimpleNamespace 

41from typing import Any, Iterable, Optional, Tuple 

42 

43import lsst.pex.config as pexConfig 

44import lsst.pex.config.history as pexConfigHistory 

45from lsst.daf.butler import Butler, CollectionSearch, CollectionType, DatasetRef, Registry 

46from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

47from lsst.pipe.base import ( 

48 GraphBuilder, 

49 Instrument, 

50 Pipeline, 

51 PipelineDatasetTypes, 

52 QuantumGraph, 

53 TaskDef, 

54 TaskFactory, 

55 buildExecutionButler, 

56) 

57from lsst.utils import doImportType 

58 

59from . import util 

60from .dotTools import graph2dot, pipeline2dot 

61from .executionGraphFixup import ExecutionGraphFixup 

62from .mpGraphExecutor import MPGraphExecutor 

63from .preExecInit import PreExecInit 

64from .singleQuantumExecutor import SingleQuantumExecutor 

65 

66# ---------------------------------- 

67# Local non-exported definitions -- 

68# ---------------------------------- 

69 

70_LOG = logging.getLogger(__name__) 

71 

72 

73class _OutputChainedCollectionInfo: 

74 """A helper class for handling command-line arguments related to an output 

75 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

76 

77 Parameters 

78 ---------- 

79 registry : `lsst.daf.butler.Registry` 

80 Butler registry that collections will be added to and/or queried from. 

81 name : `str` 

82 Name of the collection given on the command line. 

83 """ 

84 

85 def __init__(self, registry: Registry, name: str): 

86 self.name = name 

87 try: 

88 self.chain = tuple(registry.getCollectionChain(name)) 

89 self.exists = True 

90 except MissingCollectionError: 

91 self.chain = () 

92 self.exists = False 

93 

94 def __str__(self) -> str: 

95 return self.name 

96 

97 name: str 

98 """Name of the collection provided on the command line (`str`). 

99 """ 

100 

101 exists: bool 

102 """Whether this collection already exists in the registry (`bool`). 

103 """ 

104 

105 chain: Tuple[str, ...] 

106 """The definition of the collection, if it already exists (`tuple`[`str`]). 

107 

108 Empty if the collection does not already exist. 

109 """ 

110 

111 

112class _OutputRunCollectionInfo: 

113 """A helper class for handling command-line arguments related to an output 

114 `~lsst.daf.butler.CollectionType.RUN` collection. 

115 

116 Parameters 

117 ---------- 

118 registry : `lsst.daf.butler.Registry` 

119 Butler registry that collections will be added to and/or queried from. 

120 name : `str` 

121 Name of the collection given on the command line. 

122 """ 

123 

124 def __init__(self, registry: Registry, name: str): 

125 self.name = name 

126 try: 

127 actualType = registry.getCollectionType(name) 

128 if actualType is not CollectionType.RUN: 

129 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

130 self.exists = True 

131 except MissingCollectionError: 

132 self.exists = False 

133 

134 name: str 

135 """Name of the collection provided on the command line (`str`). 

136 """ 

137 

138 exists: bool 

139 """Whether this collection already exists in the registry (`bool`). 

140 """ 

141 

142 

143class _ButlerFactory: 

144 """A helper class for processing command-line arguments related to input 

145 and output collections. 

146 

147 Parameters 

148 ---------- 

149 registry : `lsst.daf.butler.Registry` 

150 Butler registry that collections will be added to and/or queried from. 

151 

152 args : `types.SimpleNamespace` 

153 Parsed command-line arguments. The following attributes are used, 

154 either at construction or in later methods. 

155 

156 ``output`` 

157 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

158 input/output collection. 

159 

160 ``output_run`` 

161 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

162 collection. 

163 

164 ``extend_run`` 

165 A boolean indicating whether ``output_run`` should already exist 

166 and be extended. 

167 

168 ``replace_run`` 

169 A boolean indicating that (if `True`) ``output_run`` should already 

170 exist but will be removed from the output chained collection and 

171 replaced with a new one. 

172 

173 ``prune_replaced`` 

174 A boolean indicating whether to prune the replaced run (requires 

175 ``replace_run``). 

176 

177 ``inputs`` 

178 Input collections of any type; may be any type handled by 

179 `lsst.daf.butler.registry.CollectionSearch.fromExpression`. 

180 

181 ``butler_config`` 

182 Path to a data repository root or configuration file. 

183 

184 writeable : `bool` 

185 If `True`, a `Butler` is being initialized in a context where actual 

186 writes should happens, and hence no output run is necessary. 

187 

188 Raises 

189 ------ 

190 ValueError 

191 Raised if ``writeable is True`` but there are no output collections. 

192 """ 

193 

194 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

195 if args.output is not None: 

196 self.output = _OutputChainedCollectionInfo(registry, args.output) 

197 else: 

198 self.output = None 

199 if args.output_run is not None: 

200 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

201 elif self.output is not None: 

202 if args.extend_run: 

203 if not self.output.chain: 

204 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

205 runName = self.output.chain[0] 

206 else: 

207 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

208 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

209 elif not writeable: 

210 # If we're not writing yet, ok to have no output run. 

211 self.outputRun = None 

212 else: 

213 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

214 # Recursively flatten any input CHAINED collections. We do this up 

215 # front so we can tell if the user passes the same inputs on subsequent 

216 # calls, even though we also flatten when we define the output CHAINED 

217 # collection. 

218 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

219 

220 def check(self, args: SimpleNamespace) -> None: 

221 """Check command-line options for consistency with each other and the 

222 data repository. 

223 

224 Parameters 

225 ---------- 

226 args : `types.SimpleNamespace` 

227 Parsed command-line arguments. See class documentation for the 

228 construction parameter of the same name. 

229 """ 

230 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

231 if self.inputs and self.output is not None and self.output.exists: 

232 # Passing the same inputs that were used to initialize the output 

233 # collection is allowed; this means they must _end_ with the same 

234 # collections, because we push new runs to the front of the chain. 

235 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

236 if c1 != c2: 

237 raise ValueError( 

238 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

239 "a different sequence of input collections than those given: " 

240 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

241 f"{self.output.name}={self.output.chain}." 

242 ) 

243 if len(self.inputs) > len(self.output.chain): 

244 nNew = len(self.inputs) - len(self.output.chain) 

245 raise ValueError( 

246 f"Cannot add new input collections {self.inputs[:nNew]} after " 

247 "output collection is first created." 

248 ) 

249 if args.extend_run: 

250 if self.outputRun is None: 

251 raise ValueError("Cannot --extend-run when no output collection is given.") 

252 elif not self.outputRun.exists: 

253 raise ValueError( 

254 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

255 ) 

256 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

257 raise ValueError( 

258 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

259 ) 

260 if args.prune_replaced and not args.replace_run: 

261 raise ValueError("--prune-replaced requires --replace-run.") 

262 if args.replace_run and (self.output is None or not self.output.exists): 

263 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

264 

265 @classmethod 

266 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, CollectionSearch, _ButlerFactory]: 

267 """Common implementation for `makeReadButler` and 

268 `makeButlerAndCollections`. 

269 

270 Parameters 

271 ---------- 

272 args : `types.SimpleNamespace` 

273 Parsed command-line arguments. See class documentation for the 

274 construction parameter of the same name. 

275 

276 Returns 

277 ------- 

278 butler : `lsst.daf.butler.Butler` 

279 A read-only butler constructed from the repo at 

280 ``args.butler_config``, but with no default collections. 

281 inputs : `lsst.daf.butler.registry.CollectionSearch` 

282 A collection search path constructed according to ``args``. 

283 self : `_ButlerFactory` 

284 A new `_ButlerFactory` instance representing the processed version 

285 of ``args``. 

286 """ 

287 butler = Butler(args.butler_config, writeable=False) 

288 self = cls(butler.registry, args, writeable=False) 

289 self.check(args) 

290 if self.output and self.output.exists: 

291 if args.replace_run: 

292 replaced = self.output.chain[0] 

293 inputs = list(self.output.chain[1:]) 

294 _LOG.debug( 

295 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

296 ) 

297 else: 

298 inputs = [self.output.name] 

299 else: 

300 inputs = list(self.inputs) 

301 if args.extend_run: 

302 assert self.outputRun is not None, "Output collection has to be specified." 

303 inputs.insert(0, self.outputRun.name) 

304 collSearch = CollectionSearch.fromExpression(inputs) 

305 return butler, collSearch, self 

306 

307 @classmethod 

308 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

309 """Construct a read-only butler according to the given command-line 

310 arguments. 

311 

312 Parameters 

313 ---------- 

314 args : `types.SimpleNamespace` 

315 Parsed command-line arguments. See class documentation for the 

316 construction parameter of the same name. 

317 

318 Returns 

319 ------- 

320 butler : `lsst.daf.butler.Butler` 

321 A read-only butler initialized with the collections specified by 

322 ``args``. 

323 """ 

324 butler, inputs, _ = cls._makeReadParts(args) 

325 _LOG.debug("Preparing butler to read from %s.", inputs) 

326 return Butler(butler=butler, collections=inputs) 

327 

328 @classmethod 

329 def makeButlerAndCollections( 

330 cls, args: SimpleNamespace 

331 ) -> Tuple[Butler, CollectionSearch, Optional[str]]: 

332 """Return a read-only registry, a collection search path, and the name 

333 of the run to be used for future writes. 

334 

335 Parameters 

336 ---------- 

337 args : `types.SimpleNamespace` 

338 Parsed command-line arguments. See class documentation for the 

339 construction parameter of the same name. 

340 

341 Returns 

342 ------- 

343 butler : `lsst.daf.butler.Butler` 

344 A read-only butler that collections will be added to and/or queried 

345 from. 

346 inputs : `lsst.daf.butler.registry.CollectionSearch` 

347 Collections to search for datasets. 

348 run : `str` or `None` 

349 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

350 if it already exists, or `None` if it does not. 

351 """ 

352 butler, inputs, self = cls._makeReadParts(args) 

353 run: Optional[str] = None 

354 if args.extend_run: 

355 assert self.outputRun is not None, "Output collection has to be specified." 

356 run = self.outputRun.name 

357 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

358 return butler, inputs, run 

359 

360 @classmethod 

361 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler: 

362 """Return a read-write butler initialized to write to and read from 

363 the collections specified by the given command-line arguments. 

364 

365 Parameters 

366 ---------- 

367 args : `types.SimpleNamespace` 

368 Parsed command-line arguments. See class documentation for the 

369 construction parameter of the same name. 

370 taskDefs : iterable of `TaskDef`, optional 

371 Definitions for tasks in a pipeline. This argument is only needed 

372 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

373 "unstore". 

374 

375 Returns 

376 ------- 

377 butler : `lsst.daf.butler.Butler` 

378 A read-write butler initialized according to the given arguments. 

379 """ 

380 butler = Butler(args.butler_config, writeable=True) 

381 self = cls(butler.registry, args, writeable=True) 

382 self.check(args) 

383 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

384 if self.output is not None: 

385 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

386 if args.replace_run: 

387 replaced = chainDefinition.pop(0) 

388 if args.prune_replaced == "unstore": 

389 # Remove datasets from datastore 

390 with butler.transaction(): 

391 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

392 # we want to remove regular outputs but keep 

393 # initOutputs, configs, and versions. 

394 if taskDefs is not None: 

395 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

396 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

397 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

398 elif args.prune_replaced == "purge": 

399 # Erase entire collection and all datasets, need to remove 

400 # collection from its chain collection first. 

401 with butler.transaction(): 

402 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

403 butler.pruneCollection(replaced, purge=True, unstore=True) 

404 elif args.prune_replaced is not None: 

405 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

406 if not self.output.exists: 

407 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

408 if not args.extend_run: 

409 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

410 chainDefinition.insert(0, self.outputRun.name) 

411 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

412 _LOG.debug( 

413 "Preparing butler to write to '%s' and read from '%s'=%s", 

414 self.outputRun.name, 

415 self.output.name, 

416 chainDefinition, 

417 ) 

418 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

419 else: 

420 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs) 

421 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

422 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

423 return butler 

424 

425 output: Optional[_OutputChainedCollectionInfo] 

426 """Information about the output chained collection, if there is or will be 

427 one (`_OutputChainedCollectionInfo` or `None`). 

428 """ 

429 

430 outputRun: Optional[_OutputRunCollectionInfo] 

431 """Information about the output run collection, if there is or will be 

432 one (`_OutputRunCollectionInfo` or `None`). 

433 """ 

434 

435 inputs: Tuple[str, ...] 

436 """Input collections provided directly by the user (`tuple` [ `str` ]). 

437 """ 

438 

439 

440class _FilteredStream: 

441 """A file-like object that filters some config fields. 

442 

443 Note 

444 ---- 

445 This class depends on implementation details of ``Config.saveToStream`` 

446 methods, in particular that that method uses single call to write() 

447 method to save information about single config field, and that call 

448 combines comments string(s) for a field and field path and value. 

449 This class will not work reliably on the "import" strings, so imports 

450 should be disabled by passing ``skipImports=True`` to ``saveToStream()``. 

451 """ 

452 

453 def __init__(self, pattern: str): 

454 # obey case if pattern isn't lowercase or requests NOIGNORECASE 

455 mat = re.search(r"(.*):NOIGNORECASE$", pattern) 

456 

457 if mat: 

458 pattern = mat.group(1) 

459 self._pattern = re.compile(fnmatch.translate(pattern)) 

460 else: 

461 if pattern != pattern.lower(): 

462 print( 

463 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)", 

464 file=sys.stdout, 

465 ) 

466 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE) 

467 

468 def write(self, showStr: str) -> None: 

469 # Strip off doc string line(s) and cut off at "=" for string matching 

470 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0] 

471 if self._pattern.search(matchStr): 

472 sys.stdout.write(showStr) 

473 

474 

475# ------------------------ 

476# Exported definitions -- 

477# ------------------------ 

478 

479 

480class CmdLineFwk: 

481 """PipelineTask framework which executes tasks from command line. 

482 

483 In addition to executing tasks this activator provides additional methods 

484 for task management like dumping configuration or execution chain. 

485 """ 

486 

487 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

488 

489 def __init__(self) -> None: 

490 pass 

491 

492 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

493 """Build a pipeline from command line arguments. 

494 

495 Parameters 

496 ---------- 

497 args : `types.SimpleNamespace` 

498 Parsed command line 

499 

500 Returns 

501 ------- 

502 pipeline : `~lsst.pipe.base.Pipeline` 

503 """ 

504 if args.pipeline: 

505 pipeline = Pipeline.from_uri(args.pipeline) 

506 else: 

507 pipeline = Pipeline("anonymous") 

508 

509 # loop over all pipeline actions and apply them in order 

510 for action in args.pipeline_actions: 

511 if action.action == "add_instrument": 

512 

513 pipeline.addInstrument(action.value) 

514 

515 elif action.action == "new_task": 

516 

517 pipeline.addTask(action.value, action.label) 

518 

519 elif action.action == "delete_task": 

520 

521 pipeline.removeTask(action.label) 

522 

523 elif action.action == "config": 

524 

525 # action value string is "field=value", split it at '=' 

526 field, _, value = action.value.partition("=") 

527 pipeline.addConfigOverride(action.label, field, value) 

528 

529 elif action.action == "configfile": 

530 

531 pipeline.addConfigFile(action.label, action.value) 

532 

533 else: 

534 

535 raise ValueError(f"Unexpected pipeline action: {action.action}") 

536 

537 if args.save_pipeline: 

538 pipeline.write_to_uri(args.save_pipeline) 

539 

540 if args.pipeline_dot: 

541 pipeline2dot(pipeline, args.pipeline_dot) 

542 

543 return pipeline 

544 

545 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]: 

546 """Build a graph from command line arguments. 

547 

548 Parameters 

549 ---------- 

550 pipeline : `~lsst.pipe.base.Pipeline` 

551 Pipeline, can be empty or ``None`` if graph is read from a file. 

552 args : `types.SimpleNamespace` 

553 Parsed command line 

554 

555 Returns 

556 ------- 

557 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

558 If resulting graph is empty then `None` is returned. 

559 """ 

560 

561 # make sure that --extend-run always enables --skip-existing 

562 if args.extend_run: 

563 args.skip_existing = True 

564 

565 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

566 

567 if args.skip_existing and run: 

568 args.skip_existing_in += (run,) 

569 

570 if args.qgraph: 

571 # click passes empty tuple as default value for qgraph_node_id 

572 nodes = args.qgraph_node_id or None 

573 qgraph = QuantumGraph.loadUri( 

574 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id 

575 ) 

576 

577 # pipeline can not be provided in this case 

578 if pipeline: 

579 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

580 if args.show_qgraph_header: 

581 print(QuantumGraph.readHeader(args.qgraph)) 

582 else: 

583 # make execution plan (a.k.a. DAG) for pipeline 

584 graphBuilder = GraphBuilder( 

585 butler.registry, 

586 skipExistingIn=args.skip_existing_in, 

587 clobberOutputs=args.clobber_outputs, 

588 datastore=butler.datastore if args.qgraph_datastore_records else None, 

589 ) 

590 # accumulate metadata 

591 metadata = { 

592 "input": args.input, 

593 "output": args.output, 

594 "butler_argument": args.butler_config, 

595 "output_run": args.output_run, 

596 "extend_run": args.extend_run, 

597 "skip_existing_in": args.skip_existing_in, 

598 "skip_existing": args.skip_existing, 

599 "data_query": args.data_query, 

600 "user": getpass.getuser(), 

601 "time": f"{datetime.datetime.now()}", 

602 } 

603 qgraph = graphBuilder.makeGraph( 

604 pipeline, 

605 collections, 

606 run, 

607 args.data_query, 

608 metadata=metadata, 

609 datasetQueryConstraint=args.dataset_query_constraint, 

610 ) 

611 if args.show_qgraph_header: 

612 qgraph.buildAndPrintHeader() 

613 

614 # Count quanta in graph and give a warning if it's empty and return 

615 # None. 

616 nQuanta = len(qgraph) 

617 if nQuanta == 0: 

618 warnings.warn("QuantumGraph is empty", stacklevel=2) 

619 return None 

620 else: 

621 _LOG.info( 

622 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r", 

623 nQuanta, 

624 len(qgraph.taskGraph), 

625 qgraph.graphID, 

626 ) 

627 

628 if args.save_qgraph: 

629 qgraph.saveUri(args.save_qgraph) 

630 

631 if args.save_single_quanta: 

632 for quantumNode in qgraph: 

633 sqgraph = qgraph.subset(quantumNode) 

634 uri = args.save_single_quanta.format(quantumNode) 

635 sqgraph.saveUri(uri) 

636 

637 if args.qgraph_dot: 

638 graph2dot(qgraph, args.qgraph_dot) 

639 

640 if args.execution_butler_location: 

641 butler = Butler(args.butler_config) 

642 newArgs = copy.deepcopy(args) 

643 

644 def builderShim(butler: Butler) -> Butler: 

645 newArgs.butler_config = butler._config 

646 # Calling makeWriteButler is done for the side effects of 

647 # calling that method, maining parsing all the args into 

648 # collection names, creating collections, etc. 

649 newButler = _ButlerFactory.makeWriteButler(newArgs) 

650 return newButler 

651 

652 # Include output collection in collections for input 

653 # files if it exists in the repo. 

654 all_inputs = args.input 

655 if args.output is not None: 

656 try: 

657 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

658 except MissingCollectionError: 

659 pass 

660 

661 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

662 buildExecutionButler( 

663 butler, 

664 qgraph, 

665 args.execution_butler_location, 

666 run, 

667 butlerModifier=builderShim, 

668 collections=all_inputs, 

669 clobber=args.clobber_execution_butler, 

670 ) 

671 

672 return qgraph 

673 

674 def runPipeline( 

675 self, 

676 graph: QuantumGraph, 

677 taskFactory: TaskFactory, 

678 args: SimpleNamespace, 

679 butler: Optional[Butler] = None, 

680 ) -> None: 

681 """Execute complete QuantumGraph. 

682 

683 Parameters 

684 ---------- 

685 graph : `QuantumGraph` 

686 Execution graph. 

687 taskFactory : `~lsst.pipe.base.TaskFactory` 

688 Task factory 

689 args : `types.SimpleNamespace` 

690 Parsed command line 

691 butler : `~lsst.daf.butler.Butler`, optional 

692 Data Butler instance, if not defined then new instance is made 

693 using command line options. 

694 """ 

695 # make sure that --extend-run always enables --skip-existing 

696 if args.extend_run: 

697 args.skip_existing = True 

698 

699 # make butler instance 

700 if butler is None: 

701 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

702 

703 if args.skip_existing: 

704 args.skip_existing_in += (butler.run,) 

705 

706 # Enable lsstDebug debugging. Note that this is done once in the 

707 # main process before PreExecInit and it is also repeated before 

708 # running each task in SingleQuantumExecutor (which may not be 

709 # needed if `multipocessing` always uses fork start method). 

710 if args.enableLsstDebug: 

711 try: 

712 _LOG.debug("Will try to import debug.py") 

713 import debug # type: ignore # noqa:F401 

714 except ImportError: 

715 _LOG.warn("No 'debug' module found.") 

716 

717 # Save all InitOutputs, configs, etc. 

718 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock) 

719 preExecInit.initialize( 

720 graph, 

721 saveInitOutputs=not args.skip_init_writes, 

722 registerDatasetTypes=args.register_dataset_types, 

723 saveVersions=not args.no_versions, 

724 ) 

725 

726 if not args.init_only: 

727 graphFixup = self._importGraphFixup(args) 

728 quantumExecutor = SingleQuantumExecutor( 

729 taskFactory, 

730 skipExistingIn=args.skip_existing_in, 

731 clobberOutputs=args.clobber_outputs, 

732 enableLsstDebug=args.enableLsstDebug, 

733 exitOnKnownError=args.fail_fast, 

734 mock=args.mock, 

735 mock_configs=args.mock_configs, 

736 ) 

737 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

738 executor = MPGraphExecutor( 

739 numProc=args.processes, 

740 timeout=timeout, 

741 startMethod=args.start_method, 

742 quantumExecutor=quantumExecutor, 

743 failFast=args.fail_fast, 

744 pdb=args.pdb, 

745 executionGraphFixup=graphFixup, 

746 ) 

747 try: 

748 with util.profile(args.profile, _LOG): 

749 executor.execute(graph, butler) 

750 finally: 

751 if args.summary: 

752 report = executor.getReport() 

753 if report: 

754 with open(args.summary, "w") as out: 

755 # Do not save fields that are not set. 

756 out.write(report.json(exclude_none=True, indent=2)) 

757 

758 def showInfo( 

759 self, args: SimpleNamespace, pipeline: Pipeline, graph: Optional[QuantumGraph] = None 

760 ) -> None: 

761 """Display useful info about pipeline and environment. 

762 

763 Parameters 

764 ---------- 

765 args : `types.SimpleNamespace` 

766 Parsed command line 

767 pipeline : `Pipeline` 

768 Pipeline definition 

769 graph : `QuantumGraph`, optional 

770 Execution graph 

771 """ 

772 showOpts = args.show 

773 for what in showOpts: 

774 showCommand, _, showArgs = what.partition("=") 

775 

776 if showCommand in ["pipeline", "config", "history", "tasks"]: 

777 if not pipeline: 

778 _LOG.warning("Pipeline is required for --show=%s", showCommand) 

779 continue 

780 

781 if showCommand in ["graph", "workflow", "uri"]: 

782 if not graph: 

783 _LOG.warning("QuantumGraph is required for --show=%s", showCommand) 

784 continue 

785 

786 if showCommand == "pipeline": 

787 print(pipeline) 

788 elif showCommand == "config": 

789 self._showConfig(pipeline, showArgs, False) 

790 elif showCommand == "dump-config": 

791 self._showConfig(pipeline, showArgs, True) 

792 elif showCommand == "history": 

793 self._showConfigHistory(pipeline, showArgs) 

794 elif showCommand == "tasks": 

795 self._showTaskHierarchy(pipeline) 

796 elif showCommand == "graph": 

797 if graph: 

798 self._showGraph(graph) 

799 elif showCommand == "uri": 

800 if graph: 

801 self._showUri(graph, args) 

802 elif showCommand == "workflow": 

803 if graph: 

804 self._showWorkflow(graph, args) 

805 else: 

806 print( 

807 "Unknown value for show: %s (choose from '%s')" 

808 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())), 

809 file=sys.stderr, 

810 ) 

811 sys.exit(1) 

812 

813 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None: 

814 """Show task configuration 

815 

816 Parameters 

817 ---------- 

818 pipeline : `Pipeline` 

819 Pipeline definition 

820 showArgs : `str` 

821 Defines what to show 

822 dumpFullConfig : `bool` 

823 If true then dump complete task configuration with all imports. 

824 """ 

825 stream: Any = sys.stdout 

826 if dumpFullConfig: 

827 # Task label can be given with this option 

828 taskName = showArgs 

829 else: 

830 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE] 

831 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs) 

832 assert matConfig is not None, "regex always matches" 

833 taskName = matConfig.group(1) 

834 pattern = matConfig.group(2) 

835 if pattern: 

836 stream = _FilteredStream(pattern) 

837 

838 tasks = util.filterTasks(pipeline, taskName) 

839 if not tasks: 

840 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr) 

841 sys.exit(1) 

842 

843 for taskDef in tasks: 

844 print("### Configuration for task `{}'".format(taskDef.label)) 

845 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig) 

846 

847 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None: 

848 """Show history for task configuration 

849 

850 Parameters 

851 ---------- 

852 pipeline : `Pipeline` 

853 Pipeline definition 

854 showArgs : `str` 

855 Defines what to show 

856 """ 

857 

858 taskName = None 

859 pattern = None 

860 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs) 

861 if matHistory: 

862 taskName = matHistory.group(1) 

863 pattern = matHistory.group(2) 

864 if not pattern: 

865 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr) 

866 sys.exit(1) 

867 

868 tasks = util.filterTasks(pipeline, taskName) 

869 if not tasks: 

870 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr) 

871 sys.exit(1) 

872 

873 found = False 

874 for taskDef in tasks: 

875 

876 config = taskDef.config 

877 

878 # Look for any matches in the config hierarchy for this name 

879 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)): 

880 if nmatch > 0: 

881 print("") 

882 

883 cpath, _, cname = thisName.rpartition(".") 

884 try: 

885 if not cpath: 

886 # looking for top-level field 

887 hconfig = taskDef.config 

888 else: 

889 hconfig = eval("config." + cpath, {}, {"config": config}) 

890 except AttributeError: 

891 print( 

892 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}", 

893 file=sys.stderr, 

894 ) 

895 hconfig = None 

896 

897 # Sometimes we end up with a non-Config so skip those 

898 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr( 

899 hconfig, cname 

900 ): 

901 print(f"### Configuration field for task `{taskDef.label}'") 

902 print(pexConfigHistory.format(hconfig, cname)) 

903 found = True 

904 

905 if not found: 

906 print(f"None of the tasks has field matching {pattern}", file=sys.stderr) 

907 sys.exit(1) 

908 

909 def _showTaskHierarchy(self, pipeline: Pipeline) -> None: 

910 """Print task hierarchy to stdout 

911 

912 Parameters 

913 ---------- 

914 pipeline: `Pipeline` 

915 """ 

916 for taskDef in pipeline.toExpandedPipeline(): 

917 print("### Subtasks for task `{}'".format(taskDef.taskName)) 

918 

919 for configName, taskName in util.subTaskIter(taskDef.config): 

920 print("{}: {}".format(configName, taskName)) 

921 

922 def _showGraph(self, graph: QuantumGraph) -> None: 

923 """Print quanta information to stdout 

924 

925 Parameters 

926 ---------- 

927 graph : `QuantumGraph` 

928 Execution graph. 

929 """ 

930 for taskNode in graph.taskGraph: 

931 print(taskNode) 

932 

933 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)): 

934 print(" Quantum {}:".format(iq)) 

935 print(" inputs:") 

936 for key, refs in quantum.inputs.items(): 

937 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

938 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

939 print(" outputs:") 

940 for key, refs in quantum.outputs.items(): 

941 dataIds = ["DataId({})".format(ref.dataId) for ref in refs] 

942 print(" {}: [{}]".format(key, ", ".join(dataIds))) 

943 

944 def _showWorkflow(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

945 """Print quanta information and dependency to stdout 

946 

947 Parameters 

948 ---------- 

949 graph : `QuantumGraph` 

950 Execution graph. 

951 args : `types.SimpleNamespace` 

952 Parsed command line 

953 """ 

954 for node in graph: 

955 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

956 for parent in graph.determineInputsToQuantumNode(node): 

957 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}") 

958 

959 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None: 

960 """Print input and predicted output URIs to stdout 

961 

962 Parameters 

963 ---------- 

964 graph : `QuantumGraph` 

965 Execution graph 

966 args : `types.SimpleNamespace` 

967 Parsed command line 

968 """ 

969 

970 def dumpURIs(thisRef: DatasetRef) -> None: 

971 primary, components = butler.getURIs(thisRef, predict=True, run="TBD") 

972 if primary: 

973 print(f" {primary}") 

974 else: 

975 print(" (disassembled artifact)") 

976 for compName, compUri in components.items(): 

977 print(f" {compName}: {compUri}") 

978 

979 butler = _ButlerFactory.makeReadButler(args) 

980 for node in graph: 

981 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}") 

982 print(" inputs:") 

983 for key, refs in node.quantum.inputs.items(): 

984 for ref in refs: 

985 dumpURIs(ref) 

986 print(" outputs:") 

987 for key, refs in node.quantum.outputs.items(): 

988 for ref in refs: 

989 dumpURIs(ref) 

990 

991 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]: 

992 """Import/instantiate graph fixup object. 

993 

994 Parameters 

995 ---------- 

996 args : `types.SimpleNamespace` 

997 Parsed command line. 

998 

999 Returns 

1000 ------- 

1001 fixup : `ExecutionGraphFixup` or `None` 

1002 

1003 Raises 

1004 ------ 

1005 ValueError 

1006 Raised if import fails, method call raises exception, or returned 

1007 instance has unexpected type. 

1008 """ 

1009 if args.graph_fixup: 

1010 try: 

1011 factory = doImportType(args.graph_fixup) 

1012 except Exception as exc: 

1013 raise ValueError("Failed to import graph fixup class/method") from exc 

1014 try: 

1015 fixup = factory() 

1016 except Exception as exc: 

1017 raise ValueError("Failed to make instance of graph fixup") from exc 

1018 if not isinstance(fixup, ExecutionGraphFixup): 

1019 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

1020 return fixup 

1021 return None