Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 16%

365 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-06 02:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import contextlib 

31import copy 

32import datetime 

33import getpass 

34import logging 

35import shutil 

36from collections.abc import Iterable, Mapping, Sequence 

37from types import SimpleNamespace 

38from typing import TYPE_CHECKING 

39 

40import astropy.units as u 

41from astropy.table import Table 

42from lsst.daf.butler import ( 

43 Butler, 

44 CollectionType, 

45 DatasetId, 

46 DatasetRef, 

47 DatastoreCacheManager, 

48 QuantumBackedButler, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

51from lsst.daf.butler.registry.wildcards import CollectionWildcard 

52from lsst.pipe.base import ( 

53 ExecutionResources, 

54 GraphBuilder, 

55 Instrument, 

56 Pipeline, 

57 PipelineDatasetTypes, 

58 QuantumGraph, 

59 buildExecutionButler, 

60) 

61from lsst.utils import doImportType 

62from lsst.utils.threads import disable_implicit_threading 

63 

64from . import util 

65from .dotTools import graph2dot, pipeline2dot 

66from .executionGraphFixup import ExecutionGraphFixup 

67from .mpGraphExecutor import MPGraphExecutor 

68from .preExecInit import PreExecInit, PreExecInitLimited 

69from .singleQuantumExecutor import SingleQuantumExecutor 

70 

71if TYPE_CHECKING: 

72 from lsst.daf.butler import ( 

73 Config, 

74 DatasetType, 

75 DatastoreRecordData, 

76 DimensionUniverse, 

77 LimitedButler, 

78 Quantum, 

79 Registry, 

80 ) 

81 from lsst.pipe.base import TaskDef, TaskFactory 

82 

83 

84# ---------------------------------- 

85# Local non-exported definitions -- 

86# ---------------------------------- 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class _OutputChainedCollectionInfo: 

92 """A helper class for handling command-line arguments related to an output 

93 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

94 

95 Parameters 

96 ---------- 

97 registry : `lsst.daf.butler.Registry` 

98 Butler registry that collections will be added to and/or queried from. 

99 name : `str` 

100 Name of the collection given on the command line. 

101 """ 

102 

103 def __init__(self, registry: Registry, name: str): 

104 self.name = name 

105 try: 

106 self.chain = tuple(registry.getCollectionChain(name)) 

107 self.exists = True 

108 except MissingCollectionError: 

109 self.chain = () 

110 self.exists = False 

111 

112 def __str__(self) -> str: 

113 return self.name 

114 

115 name: str 

116 """Name of the collection provided on the command line (`str`). 

117 """ 

118 

119 exists: bool 

120 """Whether this collection already exists in the registry (`bool`). 

121 """ 

122 

123 chain: tuple[str, ...] 

124 """The definition of the collection, if it already exists (`tuple`[`str`]). 

125 

126 Empty if the collection does not already exist. 

127 """ 

128 

129 

130class _OutputRunCollectionInfo: 

131 """A helper class for handling command-line arguments related to an output 

132 `~lsst.daf.butler.CollectionType.RUN` collection. 

133 

134 Parameters 

135 ---------- 

136 registry : `lsst.daf.butler.Registry` 

137 Butler registry that collections will be added to and/or queried from. 

138 name : `str` 

139 Name of the collection given on the command line. 

140 """ 

141 

142 def __init__(self, registry: Registry, name: str): 

143 self.name = name 

144 try: 

145 actualType = registry.getCollectionType(name) 

146 if actualType is not CollectionType.RUN: 

147 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

148 self.exists = True 

149 except MissingCollectionError: 

150 self.exists = False 

151 

152 name: str 

153 """Name of the collection provided on the command line (`str`). 

154 """ 

155 

156 exists: bool 

157 """Whether this collection already exists in the registry (`bool`). 

158 """ 

159 

160 

161class _ButlerFactory: 

162 """A helper class for processing command-line arguments related to input 

163 and output collections. 

164 

165 Parameters 

166 ---------- 

167 registry : `lsst.daf.butler.Registry` 

168 Butler registry that collections will be added to and/or queried from. 

169 

170 args : `types.SimpleNamespace` 

171 Parsed command-line arguments. The following attributes are used, 

172 either at construction or in later methods. 

173 

174 ``output`` 

175 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

176 input/output collection. 

177 

178 ``output_run`` 

179 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

180 collection. 

181 

182 ``extend_run`` 

183 A boolean indicating whether ``output_run`` should already exist 

184 and be extended. 

185 

186 ``replace_run`` 

187 A boolean indicating that (if `True`) ``output_run`` should already 

188 exist but will be removed from the output chained collection and 

189 replaced with a new one. 

190 

191 ``prune_replaced`` 

192 A boolean indicating whether to prune the replaced run (requires 

193 ``replace_run``). 

194 

195 ``inputs`` 

196 Input collections of any type; see 

197 :ref:`daf_butler_ordered_collection_searches` for details. 

198 

199 ``butler_config`` 

200 Path to a data repository root or configuration file. 

201 

202 writeable : `bool` 

203 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

204 context where actual writes should happens, and hence no output run 

205 is necessary. 

206 

207 Raises 

208 ------ 

209 ValueError 

210 Raised if ``writeable is True`` but there are no output collections. 

211 """ 

212 

213 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

214 if args.output is not None: 

215 self.output = _OutputChainedCollectionInfo(registry, args.output) 

216 else: 

217 self.output = None 

218 if args.output_run is not None: 

219 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

220 elif self.output is not None: 

221 if args.extend_run: 

222 if not self.output.chain: 

223 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

224 runName = self.output.chain[0] 

225 else: 

226 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

227 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

228 elif not writeable: 

229 # If we're not writing yet, ok to have no output run. 

230 self.outputRun = None 

231 else: 

232 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

233 # Recursively flatten any input CHAINED collections. We do this up 

234 # front so we can tell if the user passes the same inputs on subsequent 

235 # calls, even though we also flatten when we define the output CHAINED 

236 # collection. 

237 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

238 

239 def check(self, args: SimpleNamespace) -> None: 

240 """Check command-line options for consistency with each other and the 

241 data repository. 

242 

243 Parameters 

244 ---------- 

245 args : `types.SimpleNamespace` 

246 Parsed command-line arguments. See class documentation for the 

247 construction parameter of the same name. 

248 """ 

249 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

250 if self.inputs and self.output is not None and self.output.exists: 

251 # Passing the same inputs that were used to initialize the output 

252 # collection is allowed; this means they must _end_ with the same 

253 # collections, because we push new runs to the front of the chain. 

254 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1], strict=False): 

255 if c1 != c2: 

256 raise ValueError( 

257 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

258 "a different sequence of input collections than those given: " 

259 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

260 f"{self.output.name}={self.output.chain}." 

261 ) 

262 if len(self.inputs) > len(self.output.chain): 

263 nNew = len(self.inputs) - len(self.output.chain) 

264 raise ValueError( 

265 f"Cannot add new input collections {self.inputs[:nNew]} after " 

266 "output collection is first created." 

267 ) 

268 if args.extend_run: 

269 if self.outputRun is None: 

270 raise ValueError("Cannot --extend-run when no output collection is given.") 

271 elif not self.outputRun.exists: 

272 raise ValueError( 

273 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

274 ) 

275 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

276 raise ValueError( 

277 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

278 ) 

279 if args.prune_replaced and not args.replace_run: 

280 raise ValueError("--prune-replaced requires --replace-run.") 

281 if args.replace_run and (self.output is None or not self.output.exists): 

282 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

283 

284 @classmethod 

285 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

286 """Parse arguments to support implementations of `makeReadButler` and 

287 `makeButlerAndCollections`. 

288 

289 Parameters 

290 ---------- 

291 args : `types.SimpleNamespace` 

292 Parsed command-line arguments. See class documentation for the 

293 construction parameter of the same name. 

294 

295 Returns 

296 ------- 

297 butler : `lsst.daf.butler.Butler` 

298 A read-only butler constructed from the repo at 

299 ``args.butler_config``, but with no default collections. 

300 inputs : `~collections.abc.Sequence` [ `str` ] 

301 A collection search path constructed according to ``args``. 

302 self : `_ButlerFactory` 

303 A new `_ButlerFactory` instance representing the processed version 

304 of ``args``. 

305 """ 

306 butler = Butler(args.butler_config, writeable=False) 

307 self = cls(butler.registry, args, writeable=False) 

308 self.check(args) 

309 if self.output and self.output.exists: 

310 if args.replace_run: 

311 replaced = self.output.chain[0] 

312 inputs = list(self.output.chain[1:]) 

313 _LOG.debug( 

314 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

315 ) 

316 else: 

317 inputs = [self.output.name] 

318 else: 

319 inputs = list(self.inputs) 

320 if args.extend_run: 

321 assert self.outputRun is not None, "Output collection has to be specified." 

322 inputs.insert(0, self.outputRun.name) 

323 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

324 return butler, collSearch, self 

325 

326 @classmethod 

327 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

328 """Construct a read-only butler according to the given command-line 

329 arguments. 

330 

331 Parameters 

332 ---------- 

333 args : `types.SimpleNamespace` 

334 Parsed command-line arguments. See class documentation for the 

335 construction parameter of the same name. 

336 

337 Returns 

338 ------- 

339 butler : `lsst.daf.butler.Butler` 

340 A read-only butler initialized with the collections specified by 

341 ``args``. 

342 """ 

343 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

344 butler, inputs, _ = cls._makeReadParts(args) 

345 _LOG.debug("Preparing butler to read from %s.", inputs) 

346 return Butler(butler=butler, collections=inputs) 

347 

348 @classmethod 

349 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

350 """Return a read-only registry, a collection search path, and the name 

351 of the run to be used for future writes. 

352 

353 Parameters 

354 ---------- 

355 args : `types.SimpleNamespace` 

356 Parsed command-line arguments. See class documentation for the 

357 construction parameter of the same name. 

358 

359 Returns 

360 ------- 

361 butler : `lsst.daf.butler.Butler` 

362 A read-only butler that collections will be added to and/or queried 

363 from. 

364 inputs : `Sequence` [ `str` ] 

365 Collections to search for datasets. 

366 run : `str` or `None` 

367 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

368 if it already exists, or `None` if it does not. 

369 """ 

370 butler, inputs, self = cls._makeReadParts(args) 

371 run: str | None = None 

372 if args.extend_run: 

373 assert self.outputRun is not None, "Output collection has to be specified." 

374 if self.outputRun is not None: 

375 run = self.outputRun.name 

376 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

377 return butler, inputs, run 

378 

379 @staticmethod 

380 def defineDatastoreCache() -> None: 

381 """Define where datastore cache directories should be found. 

382 

383 Notes 

384 ----- 

385 All the jobs should share a datastore cache if applicable. This 

386 method asks for a shared fallback cache to be defined and then 

387 configures an exit handler to clean it up. 

388 """ 

389 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

390 if defined: 

391 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

392 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

393 

394 @classmethod 

395 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

396 """Return a read-write butler initialized to write to and read from 

397 the collections specified by the given command-line arguments. 

398 

399 Parameters 

400 ---------- 

401 args : `types.SimpleNamespace` 

402 Parsed command-line arguments. See class documentation for the 

403 construction parameter of the same name. 

404 taskDefs : iterable of `TaskDef`, optional 

405 Definitions for tasks in a pipeline. This argument is only needed 

406 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

407 "unstore". 

408 

409 Returns 

410 ------- 

411 butler : `lsst.daf.butler.Butler` 

412 A read-write butler initialized according to the given arguments. 

413 """ 

414 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

415 butler = Butler(args.butler_config, writeable=True) 

416 self = cls(butler.registry, args, writeable=True) 

417 self.check(args) 

418 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

419 if self.output is not None: 

420 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

421 if args.replace_run: 

422 replaced = chainDefinition.pop(0) 

423 if args.prune_replaced == "unstore": 

424 # Remove datasets from datastore 

425 with butler.transaction(): 

426 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

427 # we want to remove regular outputs but keep 

428 # initOutputs, configs, and versions. 

429 if taskDefs is not None: 

430 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

431 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

432 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

433 elif args.prune_replaced == "purge": 

434 # Erase entire collection and all datasets, need to remove 

435 # collection from its chain collection first. 

436 with butler.transaction(): 

437 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

438 butler.removeRuns([replaced], unstore=True) 

439 elif args.prune_replaced is not None: 

440 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

441 if not self.output.exists: 

442 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

443 if not args.extend_run: 

444 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

445 chainDefinition.insert(0, self.outputRun.name) 

446 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

447 _LOG.debug( 

448 "Preparing butler to write to '%s' and read from '%s'=%s", 

449 self.outputRun.name, 

450 self.output.name, 

451 chainDefinition, 

452 ) 

453 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

454 else: 

455 inputs = (self.outputRun.name,) + self.inputs 

456 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

457 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

458 return butler 

459 

460 output: _OutputChainedCollectionInfo | None 

461 """Information about the output chained collection, if there is or will be 

462 one (`_OutputChainedCollectionInfo` or `None`). 

463 """ 

464 

465 outputRun: _OutputRunCollectionInfo | None 

466 """Information about the output run collection, if there is or will be 

467 one (`_OutputRunCollectionInfo` or `None`). 

468 """ 

469 

470 inputs: tuple[str, ...] 

471 """Input collections provided directly by the user (`tuple` [ `str` ]). 

472 """ 

473 

474 

475class _QBBFactory: 

476 """Class which is a callable for making QBB instances.""" 

477 

478 def __init__( 

479 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

480 ): 

481 self.butler_config = butler_config 

482 self.dimensions = dimensions 

483 self.dataset_types = dataset_types 

484 

485 def __call__(self, quantum: Quantum) -> LimitedButler: 

486 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

487 

488 Factory method to create QuantumBackedButler instances. 

489 """ 

490 return QuantumBackedButler.initialize( 

491 config=self.butler_config, 

492 quantum=quantum, 

493 dimensions=self.dimensions, 

494 dataset_types=self.dataset_types, 

495 ) 

496 

497 

498# ------------------------ 

499# Exported definitions -- 

500# ------------------------ 

501 

502 

503class CmdLineFwk: 

504 """PipelineTask framework which executes tasks from command line. 

505 

506 In addition to executing tasks this activator provides additional methods 

507 for task management like dumping configuration or execution chain. 

508 """ 

509 

510 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

511 

512 def __init__(self) -> None: 

513 pass 

514 

515 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

516 """Build a pipeline from command line arguments. 

517 

518 Parameters 

519 ---------- 

520 args : `types.SimpleNamespace` 

521 Parsed command line 

522 

523 Returns 

524 ------- 

525 pipeline : `~lsst.pipe.base.Pipeline` 

526 """ 

527 if args.pipeline: 

528 pipeline = Pipeline.from_uri(args.pipeline) 

529 else: 

530 pipeline = Pipeline("anonymous") 

531 

532 # loop over all pipeline actions and apply them in order 

533 for action in args.pipeline_actions: 

534 if action.action == "add_instrument": 

535 pipeline.addInstrument(action.value) 

536 

537 elif action.action == "new_task": 

538 pipeline.addTask(action.value, action.label) 

539 

540 elif action.action == "delete_task": 

541 pipeline.removeTask(action.label) 

542 

543 elif action.action == "config": 

544 # action value string is "field=value", split it at '=' 

545 field, _, value = action.value.partition("=") 

546 pipeline.addConfigOverride(action.label, field, value) 

547 

548 elif action.action == "configfile": 

549 pipeline.addConfigFile(action.label, action.value) 

550 

551 else: 

552 raise ValueError(f"Unexpected pipeline action: {action.action}") 

553 

554 if args.save_pipeline: 

555 pipeline.write_to_uri(args.save_pipeline) 

556 

557 if args.pipeline_dot: 

558 pipeline2dot(pipeline, args.pipeline_dot) 

559 

560 return pipeline 

561 

562 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

563 """Build a graph from command line arguments. 

564 

565 Parameters 

566 ---------- 

567 pipeline : `~lsst.pipe.base.Pipeline` 

568 Pipeline, can be empty or ``None`` if graph is read from a file. 

569 args : `types.SimpleNamespace` 

570 Parsed command line 

571 

572 Returns 

573 ------- 

574 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

575 If resulting graph is empty then `None` is returned. 

576 """ 

577 # make sure that --extend-run always enables --skip-existing 

578 if args.extend_run: 

579 args.skip_existing = True 

580 

581 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

582 

583 if args.skip_existing and run: 

584 args.skip_existing_in += (run,) 

585 

586 if args.qgraph: 

587 # click passes empty tuple as default value for qgraph_node_id 

588 nodes = args.qgraph_node_id or None 

589 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

590 

591 # pipeline can not be provided in this case 

592 if pipeline: 

593 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

594 if args.show_qgraph_header: 

595 print(QuantumGraph.readHeader(args.qgraph)) 

596 else: 

597 task_defs = list(pipeline.toExpandedPipeline()) 

598 if args.mock: 

599 from lsst.pipe.base.tests.mocks import mock_task_defs 

600 

601 task_defs = mock_task_defs( 

602 task_defs, 

603 unmocked_dataset_types=args.unmocked_dataset_types, 

604 force_failures=args.mock_failure, 

605 ) 

606 # make execution plan (a.k.a. DAG) for pipeline 

607 graphBuilder = GraphBuilder( 

608 butler.registry, 

609 skipExistingIn=args.skip_existing_in, 

610 clobberOutputs=args.clobber_outputs, 

611 datastore=butler._datastore if args.qgraph_datastore_records else None, 

612 ) 

613 # accumulate metadata 

614 metadata = { 

615 "input": args.input, 

616 "output": args.output, 

617 "butler_argument": args.butler_config, 

618 "output_run": run, 

619 "extend_run": args.extend_run, 

620 "skip_existing_in": args.skip_existing_in, 

621 "skip_existing": args.skip_existing, 

622 "data_query": args.data_query, 

623 "user": getpass.getuser(), 

624 "time": f"{datetime.datetime.now()}", 

625 } 

626 assert run is not None, "Butler output run collection must be defined" 

627 qgraph = graphBuilder.makeGraph( 

628 task_defs, 

629 collections, 

630 run, 

631 args.data_query, 

632 metadata=metadata, 

633 datasetQueryConstraint=args.dataset_query_constraint, 

634 dataId=pipeline.get_data_id(butler.dimensions), 

635 ) 

636 if args.show_qgraph_header: 

637 qgraph.buildAndPrintHeader() 

638 

639 # Count quanta in graph; give a warning if it's empty and return None. 

640 nQuanta = len(qgraph) 

641 if nQuanta == 0: 

642 return None 

643 else: 

644 if _LOG.isEnabledFor(logging.INFO): 

645 qg_task_table = self._generateTaskTable(qgraph) 

646 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

647 _LOG.info( 

648 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r\n%s", 

649 nQuanta, 

650 len(qgraph.taskGraph), 

651 qgraph.graphID, 

652 qg_task_table_formatted, 

653 ) 

654 

655 if args.save_qgraph: 

656 qgraph.saveUri(args.save_qgraph) 

657 

658 if args.save_single_quanta: 

659 for quantumNode in qgraph: 

660 sqgraph = qgraph.subset(quantumNode) 

661 uri = args.save_single_quanta.format(quantumNode) 

662 sqgraph.saveUri(uri) 

663 

664 if args.qgraph_dot: 

665 graph2dot(qgraph, args.qgraph_dot) 

666 

667 if args.execution_butler_location: 

668 butler = Butler(args.butler_config) 

669 newArgs = copy.deepcopy(args) 

670 

671 def builderShim(butler: Butler) -> Butler: 

672 newArgs.butler_config = butler._config 

673 # Calling makeWriteButler is done for the side effects of 

674 # calling that method, maining parsing all the args into 

675 # collection names, creating collections, etc. 

676 newButler = _ButlerFactory.makeWriteButler(newArgs) 

677 return newButler 

678 

679 # Include output collection in collections for input 

680 # files if it exists in the repo. 

681 all_inputs = args.input 

682 if args.output is not None: 

683 with contextlib.suppress(MissingCollectionError): 

684 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

685 

686 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

687 buildExecutionButler( 

688 butler, 

689 qgraph, 

690 args.execution_butler_location, 

691 run, 

692 butlerModifier=builderShim, 

693 collections=all_inputs, 

694 clobber=args.clobber_execution_butler, 

695 datastoreRoot=args.target_datastore_root, 

696 transfer=args.transfer, 

697 ) 

698 

699 return qgraph 

700 

701 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

702 """Construct the execution resource class from arguments. 

703 

704 Parameters 

705 ---------- 

706 args : `types.SimpleNamespace` 

707 Parsed command line. 

708 

709 Returns 

710 ------- 

711 resources : `~lsst.pipe.base.ExecutionResources` 

712 The resources available to each quantum. 

713 """ 

714 return ExecutionResources( 

715 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

716 ) 

717 

718 def runPipeline( 

719 self, 

720 graph: QuantumGraph, 

721 taskFactory: TaskFactory, 

722 args: SimpleNamespace, 

723 butler: Butler | None = None, 

724 ) -> None: 

725 """Execute complete QuantumGraph. 

726 

727 Parameters 

728 ---------- 

729 graph : `~lsst.pipe.base.QuantumGraph` 

730 Execution graph. 

731 taskFactory : `~lsst.pipe.base.TaskFactory` 

732 Task factory 

733 args : `types.SimpleNamespace` 

734 Parsed command line 

735 butler : `~lsst.daf.butler.Butler`, optional 

736 Data Butler instance, if not defined then new instance is made 

737 using command line options. 

738 """ 

739 # Check that output run defined on command line is consistent with 

740 # quantum graph. 

741 if args.output_run and graph.metadata: 

742 graph_output_run = graph.metadata.get("output_run", args.output_run) 

743 if graph_output_run != args.output_run: 

744 raise ValueError( 

745 f"Output run defined on command line ({args.output_run}) has to be " 

746 f"identical to graph metadata ({graph_output_run}). " 

747 "To update graph metadata run `pipetask update-graph-run` command." 

748 ) 

749 

750 # Make sure that --extend-run always enables --skip-existing, 

751 # clobbering should be disabled if --extend-run is not specified. 

752 if args.extend_run: 

753 args.skip_existing = True 

754 else: 

755 args.clobber_outputs = False 

756 

757 if not args.enable_implicit_threading: 

758 disable_implicit_threading() 

759 

760 # Make butler instance. QuantumGraph should have an output run defined, 

761 # but we ignore it here and let command line decide actual output run. 

762 if butler is None: 

763 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

764 

765 if args.skip_existing: 

766 args.skip_existing_in += (butler.run,) 

767 

768 # Enable lsstDebug debugging. Note that this is done once in the 

769 # main process before PreExecInit and it is also repeated before 

770 # running each task in SingleQuantumExecutor (which may not be 

771 # needed if `multiprocessing` always uses fork start method). 

772 if args.enableLsstDebug: 

773 try: 

774 _LOG.debug("Will try to import debug.py") 

775 import debug # type: ignore # noqa:F401 

776 except ImportError: 

777 _LOG.warn("No 'debug' module found.") 

778 

779 # Save all InitOutputs, configs, etc. 

780 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

781 preExecInit.initialize( 

782 graph, 

783 saveInitOutputs=not args.skip_init_writes, 

784 registerDatasetTypes=args.register_dataset_types, 

785 saveVersions=not args.no_versions, 

786 ) 

787 

788 if not args.init_only: 

789 graphFixup = self._importGraphFixup(args) 

790 resources = self._make_execution_resources(args) 

791 quantumExecutor = SingleQuantumExecutor( 

792 butler, 

793 taskFactory, 

794 skipExistingIn=args.skip_existing_in, 

795 clobberOutputs=args.clobber_outputs, 

796 enableLsstDebug=args.enableLsstDebug, 

797 exitOnKnownError=args.fail_fast, 

798 resources=resources, 

799 ) 

800 

801 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

802 executor = MPGraphExecutor( 

803 numProc=args.processes, 

804 timeout=timeout, 

805 startMethod=args.start_method, 

806 quantumExecutor=quantumExecutor, 

807 failFast=args.fail_fast, 

808 pdb=args.pdb, 

809 executionGraphFixup=graphFixup, 

810 ) 

811 # Have to reset connection pool to avoid sharing connections with 

812 # forked processes. 

813 butler.registry.resetConnectionPool() 

814 try: 

815 with util.profile(args.profile, _LOG): 

816 executor.execute(graph) 

817 finally: 

818 if args.summary: 

819 report = executor.getReport() 

820 if report: 

821 with open(args.summary, "w") as out: 

822 # Do not save fields that are not set. 

823 out.write(report.json(exclude_none=True, indent=2)) 

824 

825 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

826 """Generate astropy table listing the number of quanta per task for a 

827 given quantum graph. 

828 

829 Parameters 

830 ---------- 

831 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

832 A QuantumGraph object. 

833 

834 Returns 

835 ------- 

836 qg_task_table : `astropy.table.table.Table` 

837 An astropy table containing columns: Quanta and Tasks. 

838 """ 

839 qg_quanta, qg_tasks = [], [] 

840 for task_def in qgraph.iterTaskGraph(): 

841 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

842 qg_quanta.append(num_qnodes) 

843 qg_tasks.append(task_def.label) 

844 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

845 return qg_task_table 

846 

847 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

848 """Import/instantiate graph fixup object. 

849 

850 Parameters 

851 ---------- 

852 args : `types.SimpleNamespace` 

853 Parsed command line. 

854 

855 Returns 

856 ------- 

857 fixup : `ExecutionGraphFixup` or `None` 

858 

859 Raises 

860 ------ 

861 ValueError 

862 Raised if import fails, method call raises exception, or returned 

863 instance has unexpected type. 

864 """ 

865 if args.graph_fixup: 

866 try: 

867 factory = doImportType(args.graph_fixup) 

868 except Exception as exc: 

869 raise ValueError("Failed to import graph fixup class/method") from exc 

870 try: 

871 fixup = factory() 

872 except Exception as exc: 

873 raise ValueError("Failed to make instance of graph fixup") from exc 

874 if not isinstance(fixup, ExecutionGraphFixup): 

875 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

876 return fixup 

877 return None 

878 

879 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

880 # Load quantum graph. We do not really need individual Quanta here, 

881 # but we need datastore records for initInputs, and those are only 

882 # available from Quanta, so load the whole thing. 

883 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

884 universe = qgraph.universe 

885 

886 # Collect all init input/output dataset IDs. 

887 predicted_inputs: set[DatasetId] = set() 

888 predicted_outputs: set[DatasetId] = set() 

889 for taskDef in qgraph.iterTaskGraph(): 

890 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

891 predicted_inputs.update(ref.id for ref in refs) 

892 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

893 predicted_outputs.update(ref.id for ref in refs) 

894 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

895 # remove intermediates from inputs 

896 predicted_inputs -= predicted_outputs 

897 

898 # Very inefficient way to extract datastore records from quantum graph, 

899 # we have to scan all quanta and look at their datastore records. 

900 datastore_records: dict[str, DatastoreRecordData] = {} 

901 for quantum_node in qgraph: 

902 for store_name, records in quantum_node.quantum.datastore_records.items(): 

903 subset = records.subset(predicted_inputs) 

904 if subset is not None: 

905 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

906 

907 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

908 

909 # Make butler from everything. 

910 butler = QuantumBackedButler.from_predicted( 

911 config=args.butler_config, 

912 predicted_inputs=predicted_inputs, 

913 predicted_outputs=predicted_outputs, 

914 dimensions=universe, 

915 datastore_records=datastore_records, 

916 search_paths=args.config_search_path, 

917 dataset_types=dataset_types, 

918 ) 

919 

920 # Save all InitOutputs, configs, etc. 

921 preExecInit = PreExecInitLimited(butler, task_factory) 

922 preExecInit.initialize(qgraph) 

923 

924 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

925 # Load quantum graph. 

926 nodes = args.qgraph_node_id or None 

927 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

928 

929 if qgraph.metadata is None: 

930 raise ValueError("QuantumGraph is missing metadata, cannot ") 

931 

932 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

933 

934 _butler_factory = _QBBFactory( 

935 butler_config=args.butler_config, 

936 dimensions=qgraph.universe, 

937 dataset_types=dataset_types, 

938 ) 

939 

940 # make special quantum executor 

941 resources = self._make_execution_resources(args) 

942 quantumExecutor = SingleQuantumExecutor( 

943 butler=None, 

944 taskFactory=task_factory, 

945 enableLsstDebug=args.enableLsstDebug, 

946 exitOnKnownError=args.fail_fast, 

947 limited_butler_factory=_butler_factory, 

948 resources=resources, 

949 ) 

950 

951 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

952 executor = MPGraphExecutor( 

953 numProc=args.processes, 

954 timeout=timeout, 

955 startMethod=args.start_method, 

956 quantumExecutor=quantumExecutor, 

957 failFast=args.fail_fast, 

958 pdb=args.pdb, 

959 ) 

960 try: 

961 with util.profile(args.profile, _LOG): 

962 executor.execute(qgraph) 

963 finally: 

964 if args.summary: 

965 report = executor.getReport() 

966 if report: 

967 with open(args.summary, "w") as out: 

968 # Do not save fields that are not set. 

969 out.write(report.json(exclude_none=True, indent=2))