Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 14%

366 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 09:15 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import copy 

31import datetime 

32import getpass 

33import logging 

34import shutil 

35from collections.abc import Iterable, Mapping, Sequence 

36from types import SimpleNamespace 

37from typing import TYPE_CHECKING 

38 

39import astropy.units as u 

40from astropy.table import Table 

41from lsst.daf.butler import ( 

42 Butler, 

43 CollectionType, 

44 DatasetId, 

45 DatasetRef, 

46 DatastoreCacheManager, 

47 QuantumBackedButler, 

48) 

49from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

50from lsst.daf.butler.registry.wildcards import CollectionWildcard 

51from lsst.pipe.base import ( 

52 ExecutionResources, 

53 GraphBuilder, 

54 Instrument, 

55 Pipeline, 

56 PipelineDatasetTypes, 

57 QuantumGraph, 

58 buildExecutionButler, 

59) 

60from lsst.utils import doImportType 

61from lsst.utils.threads import disable_implicit_threading 

62 

63from . import util 

64from .dotTools import graph2dot, pipeline2dot 

65from .executionGraphFixup import ExecutionGraphFixup 

66from .mpGraphExecutor import MPGraphExecutor 

67from .preExecInit import PreExecInit, PreExecInitLimited 

68from .singleQuantumExecutor import SingleQuantumExecutor 

69 

70if TYPE_CHECKING: 

71 from lsst.daf.butler import ( 

72 Config, 

73 DatasetType, 

74 DatastoreRecordData, 

75 DimensionUniverse, 

76 LimitedButler, 

77 Quantum, 

78 Registry, 

79 ) 

80 from lsst.pipe.base import TaskDef, TaskFactory 

81 

82 

83# ---------------------------------- 

84# Local non-exported definitions -- 

85# ---------------------------------- 

86 

87_LOG = logging.getLogger(__name__) 

88 

89 

90class _OutputChainedCollectionInfo: 

91 """A helper class for handling command-line arguments related to an output 

92 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

93 

94 Parameters 

95 ---------- 

96 registry : `lsst.daf.butler.Registry` 

97 Butler registry that collections will be added to and/or queried from. 

98 name : `str` 

99 Name of the collection given on the command line. 

100 """ 

101 

102 def __init__(self, registry: Registry, name: str): 

103 self.name = name 

104 try: 

105 self.chain = tuple(registry.getCollectionChain(name)) 

106 self.exists = True 

107 except MissingCollectionError: 

108 self.chain = () 

109 self.exists = False 

110 

111 def __str__(self) -> str: 

112 return self.name 

113 

114 name: str 

115 """Name of the collection provided on the command line (`str`). 

116 """ 

117 

118 exists: bool 

119 """Whether this collection already exists in the registry (`bool`). 

120 """ 

121 

122 chain: tuple[str, ...] 

123 """The definition of the collection, if it already exists (`tuple`[`str`]). 

124 

125 Empty if the collection does not already exist. 

126 """ 

127 

128 

129class _OutputRunCollectionInfo: 

130 """A helper class for handling command-line arguments related to an output 

131 `~lsst.daf.butler.CollectionType.RUN` collection. 

132 

133 Parameters 

134 ---------- 

135 registry : `lsst.daf.butler.Registry` 

136 Butler registry that collections will be added to and/or queried from. 

137 name : `str` 

138 Name of the collection given on the command line. 

139 """ 

140 

141 def __init__(self, registry: Registry, name: str): 

142 self.name = name 

143 try: 

144 actualType = registry.getCollectionType(name) 

145 if actualType is not CollectionType.RUN: 

146 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

147 self.exists = True 

148 except MissingCollectionError: 

149 self.exists = False 

150 

151 name: str 

152 """Name of the collection provided on the command line (`str`). 

153 """ 

154 

155 exists: bool 

156 """Whether this collection already exists in the registry (`bool`). 

157 """ 

158 

159 

160class _ButlerFactory: 

161 """A helper class for processing command-line arguments related to input 

162 and output collections. 

163 

164 Parameters 

165 ---------- 

166 registry : `lsst.daf.butler.Registry` 

167 Butler registry that collections will be added to and/or queried from. 

168 

169 args : `types.SimpleNamespace` 

170 Parsed command-line arguments. The following attributes are used, 

171 either at construction or in later methods. 

172 

173 ``output`` 

174 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

175 input/output collection. 

176 

177 ``output_run`` 

178 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

179 collection. 

180 

181 ``extend_run`` 

182 A boolean indicating whether ``output_run`` should already exist 

183 and be extended. 

184 

185 ``replace_run`` 

186 A boolean indicating that (if `True`) ``output_run`` should already 

187 exist but will be removed from the output chained collection and 

188 replaced with a new one. 

189 

190 ``prune_replaced`` 

191 A boolean indicating whether to prune the replaced run (requires 

192 ``replace_run``). 

193 

194 ``inputs`` 

195 Input collections of any type; see 

196 :ref:`daf_butler_ordered_collection_searches` for details. 

197 

198 ``butler_config`` 

199 Path to a data repository root or configuration file. 

200 

201 writeable : `bool` 

202 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

203 context where actual writes should happens, and hence no output run 

204 is necessary. 

205 

206 Raises 

207 ------ 

208 ValueError 

209 Raised if ``writeable is True`` but there are no output collections. 

210 """ 

211 

212 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

213 if args.output is not None: 

214 self.output = _OutputChainedCollectionInfo(registry, args.output) 

215 else: 

216 self.output = None 

217 if args.output_run is not None: 

218 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

219 elif self.output is not None: 

220 if args.extend_run: 

221 if not self.output.chain: 

222 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

223 runName = self.output.chain[0] 

224 else: 

225 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp()) 

226 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

227 elif not writeable: 

228 # If we're not writing yet, ok to have no output run. 

229 self.outputRun = None 

230 else: 

231 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

232 # Recursively flatten any input CHAINED collections. We do this up 

233 # front so we can tell if the user passes the same inputs on subsequent 

234 # calls, even though we also flatten when we define the output CHAINED 

235 # collection. 

236 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

237 

238 def check(self, args: SimpleNamespace) -> None: 

239 """Check command-line options for consistency with each other and the 

240 data repository. 

241 

242 Parameters 

243 ---------- 

244 args : `types.SimpleNamespace` 

245 Parsed command-line arguments. See class documentation for the 

246 construction parameter of the same name. 

247 """ 

248 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

249 if self.inputs and self.output is not None and self.output.exists: 

250 # Passing the same inputs that were used to initialize the output 

251 # collection is allowed; this means they must _end_ with the same 

252 # collections, because we push new runs to the front of the chain. 

253 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]): 

254 if c1 != c2: 

255 raise ValueError( 

256 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

257 "a different sequence of input collections than those given: " 

258 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

259 f"{self.output.name}={self.output.chain}." 

260 ) 

261 if len(self.inputs) > len(self.output.chain): 

262 nNew = len(self.inputs) - len(self.output.chain) 

263 raise ValueError( 

264 f"Cannot add new input collections {self.inputs[:nNew]} after " 

265 "output collection is first created." 

266 ) 

267 if args.extend_run: 

268 if self.outputRun is None: 

269 raise ValueError("Cannot --extend-run when no output collection is given.") 

270 elif not self.outputRun.exists: 

271 raise ValueError( 

272 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

273 ) 

274 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

275 raise ValueError( 

276 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

277 ) 

278 if args.prune_replaced and not args.replace_run: 

279 raise ValueError("--prune-replaced requires --replace-run.") 

280 if args.replace_run and (self.output is None or not self.output.exists): 

281 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

282 

283 @classmethod 

284 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

285 """Parse arguments to support implementations of `makeReadButler` and 

286 `makeButlerAndCollections`. 

287 

288 Parameters 

289 ---------- 

290 args : `types.SimpleNamespace` 

291 Parsed command-line arguments. See class documentation for the 

292 construction parameter of the same name. 

293 

294 Returns 

295 ------- 

296 butler : `lsst.daf.butler.Butler` 

297 A read-only butler constructed from the repo at 

298 ``args.butler_config``, but with no default collections. 

299 inputs : `Sequence` [ `str` ] 

300 A collection search path constructed according to ``args``. 

301 self : `_ButlerFactory` 

302 A new `_ButlerFactory` instance representing the processed version 

303 of ``args``. 

304 """ 

305 butler = Butler(args.butler_config, writeable=False) 

306 self = cls(butler.registry, args, writeable=False) 

307 self.check(args) 

308 if self.output and self.output.exists: 

309 if args.replace_run: 

310 replaced = self.output.chain[0] 

311 inputs = list(self.output.chain[1:]) 

312 _LOG.debug( 

313 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

314 ) 

315 else: 

316 inputs = [self.output.name] 

317 else: 

318 inputs = list(self.inputs) 

319 if args.extend_run: 

320 assert self.outputRun is not None, "Output collection has to be specified." 

321 inputs.insert(0, self.outputRun.name) 

322 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

323 return butler, collSearch, self 

324 

325 @classmethod 

326 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

327 """Construct a read-only butler according to the given command-line 

328 arguments. 

329 

330 Parameters 

331 ---------- 

332 args : `types.SimpleNamespace` 

333 Parsed command-line arguments. See class documentation for the 

334 construction parameter of the same name. 

335 

336 Returns 

337 ------- 

338 butler : `lsst.daf.butler.Butler` 

339 A read-only butler initialized with the collections specified by 

340 ``args``. 

341 """ 

342 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

343 butler, inputs, _ = cls._makeReadParts(args) 

344 _LOG.debug("Preparing butler to read from %s.", inputs) 

345 return Butler(butler=butler, collections=inputs) 

346 

347 @classmethod 

348 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

349 """Return a read-only registry, a collection search path, and the name 

350 of the run to be used for future writes. 

351 

352 Parameters 

353 ---------- 

354 args : `types.SimpleNamespace` 

355 Parsed command-line arguments. See class documentation for the 

356 construction parameter of the same name. 

357 

358 Returns 

359 ------- 

360 butler : `lsst.daf.butler.Butler` 

361 A read-only butler that collections will be added to and/or queried 

362 from. 

363 inputs : `Sequence` [ `str` ] 

364 Collections to search for datasets. 

365 run : `str` or `None` 

366 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

367 if it already exists, or `None` if it does not. 

368 """ 

369 butler, inputs, self = cls._makeReadParts(args) 

370 run: str | None = None 

371 if args.extend_run: 

372 assert self.outputRun is not None, "Output collection has to be specified." 

373 if self.outputRun is not None: 

374 run = self.outputRun.name 

375 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

376 return butler, inputs, run 

377 

378 @staticmethod 

379 def defineDatastoreCache() -> None: 

380 """Define where datastore cache directories should be found. 

381 

382 Notes 

383 ----- 

384 All the jobs should share a datastore cache if applicable. This 

385 method asks for a shared fallback cache to be defined and then 

386 configures an exit handler to clean it up. 

387 """ 

388 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

389 if defined: 

390 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

391 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

392 

393 @classmethod 

394 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

395 """Return a read-write butler initialized to write to and read from 

396 the collections specified by the given command-line arguments. 

397 

398 Parameters 

399 ---------- 

400 args : `types.SimpleNamespace` 

401 Parsed command-line arguments. See class documentation for the 

402 construction parameter of the same name. 

403 taskDefs : iterable of `TaskDef`, optional 

404 Definitions for tasks in a pipeline. This argument is only needed 

405 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

406 "unstore". 

407 

408 Returns 

409 ------- 

410 butler : `lsst.daf.butler.Butler` 

411 A read-write butler initialized according to the given arguments. 

412 """ 

413 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

414 butler = Butler(args.butler_config, writeable=True) 

415 self = cls(butler.registry, args, writeable=True) 

416 self.check(args) 

417 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

418 if self.output is not None: 

419 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

420 if args.replace_run: 

421 replaced = chainDefinition.pop(0) 

422 if args.prune_replaced == "unstore": 

423 # Remove datasets from datastore 

424 with butler.transaction(): 

425 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

426 # we want to remove regular outputs but keep 

427 # initOutputs, configs, and versions. 

428 if taskDefs is not None: 

429 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

430 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

431 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

432 elif args.prune_replaced == "purge": 

433 # Erase entire collection and all datasets, need to remove 

434 # collection from its chain collection first. 

435 with butler.transaction(): 

436 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

437 butler.removeRuns([replaced], unstore=True) 

438 elif args.prune_replaced is not None: 

439 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

440 if not self.output.exists: 

441 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

442 if not args.extend_run: 

443 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

444 chainDefinition.insert(0, self.outputRun.name) 

445 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

446 _LOG.debug( 

447 "Preparing butler to write to '%s' and read from '%s'=%s", 

448 self.outputRun.name, 

449 self.output.name, 

450 chainDefinition, 

451 ) 

452 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

453 else: 

454 inputs = (self.outputRun.name,) + self.inputs 

455 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

456 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

457 return butler 

458 

459 output: _OutputChainedCollectionInfo | None 

460 """Information about the output chained collection, if there is or will be 

461 one (`_OutputChainedCollectionInfo` or `None`). 

462 """ 

463 

464 outputRun: _OutputRunCollectionInfo | None 

465 """Information about the output run collection, if there is or will be 

466 one (`_OutputRunCollectionInfo` or `None`). 

467 """ 

468 

469 inputs: tuple[str, ...] 

470 """Input collections provided directly by the user (`tuple` [ `str` ]). 

471 """ 

472 

473 

474class _QBBFactory: 

475 """Class which is a callable for making QBB instances.""" 

476 

477 def __init__( 

478 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

479 ): 

480 self.butler_config = butler_config 

481 self.dimensions = dimensions 

482 self.dataset_types = dataset_types 

483 

484 def __call__(self, quantum: Quantum) -> LimitedButler: 

485 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

486 

487 Factory method to create QuantumBackedButler instances. 

488 """ 

489 return QuantumBackedButler.initialize( 

490 config=self.butler_config, 

491 quantum=quantum, 

492 dimensions=self.dimensions, 

493 dataset_types=self.dataset_types, 

494 ) 

495 

496 

497# ------------------------ 

498# Exported definitions -- 

499# ------------------------ 

500 

501 

502class CmdLineFwk: 

503 """PipelineTask framework which executes tasks from command line. 

504 

505 In addition to executing tasks this activator provides additional methods 

506 for task management like dumping configuration or execution chain. 

507 """ 

508 

509 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

510 

511 def __init__(self) -> None: 

512 pass 

513 

514 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

515 """Build a pipeline from command line arguments. 

516 

517 Parameters 

518 ---------- 

519 args : `types.SimpleNamespace` 

520 Parsed command line 

521 

522 Returns 

523 ------- 

524 pipeline : `~lsst.pipe.base.Pipeline` 

525 """ 

526 if args.pipeline: 

527 pipeline = Pipeline.from_uri(args.pipeline) 

528 else: 

529 pipeline = Pipeline("anonymous") 

530 

531 # loop over all pipeline actions and apply them in order 

532 for action in args.pipeline_actions: 

533 if action.action == "add_instrument": 

534 pipeline.addInstrument(action.value) 

535 

536 elif action.action == "new_task": 

537 pipeline.addTask(action.value, action.label) 

538 

539 elif action.action == "delete_task": 

540 pipeline.removeTask(action.label) 

541 

542 elif action.action == "config": 

543 # action value string is "field=value", split it at '=' 

544 field, _, value = action.value.partition("=") 

545 pipeline.addConfigOverride(action.label, field, value) 

546 

547 elif action.action == "configfile": 

548 pipeline.addConfigFile(action.label, action.value) 

549 

550 else: 

551 raise ValueError(f"Unexpected pipeline action: {action.action}") 

552 

553 if args.save_pipeline: 

554 pipeline.write_to_uri(args.save_pipeline) 

555 

556 if args.pipeline_dot: 

557 pipeline2dot(pipeline, args.pipeline_dot) 

558 

559 return pipeline 

560 

561 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

562 """Build a graph from command line arguments. 

563 

564 Parameters 

565 ---------- 

566 pipeline : `~lsst.pipe.base.Pipeline` 

567 Pipeline, can be empty or ``None`` if graph is read from a file. 

568 args : `types.SimpleNamespace` 

569 Parsed command line 

570 

571 Returns 

572 ------- 

573 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

574 If resulting graph is empty then `None` is returned. 

575 """ 

576 # make sure that --extend-run always enables --skip-existing 

577 if args.extend_run: 

578 args.skip_existing = True 

579 

580 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

581 

582 if args.skip_existing and run: 

583 args.skip_existing_in += (run,) 

584 

585 if args.qgraph: 

586 # click passes empty tuple as default value for qgraph_node_id 

587 nodes = args.qgraph_node_id or None 

588 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

589 

590 # pipeline can not be provided in this case 

591 if pipeline: 

592 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

593 if args.show_qgraph_header: 

594 print(QuantumGraph.readHeader(args.qgraph)) 

595 else: 

596 task_defs = list(pipeline.toExpandedPipeline()) 

597 if args.mock: 

598 from lsst.pipe.base.tests.mocks import mock_task_defs 

599 

600 task_defs = mock_task_defs( 

601 task_defs, 

602 unmocked_dataset_types=args.unmocked_dataset_types, 

603 force_failures=args.mock_failure, 

604 ) 

605 # make execution plan (a.k.a. DAG) for pipeline 

606 graphBuilder = GraphBuilder( 

607 butler.registry, 

608 skipExistingIn=args.skip_existing_in, 

609 clobberOutputs=args.clobber_outputs, 

610 datastore=butler.datastore if args.qgraph_datastore_records else None, 

611 ) 

612 # accumulate metadata 

613 metadata = { 

614 "input": args.input, 

615 "output": args.output, 

616 "butler_argument": args.butler_config, 

617 "output_run": run, 

618 "extend_run": args.extend_run, 

619 "skip_existing_in": args.skip_existing_in, 

620 "skip_existing": args.skip_existing, 

621 "data_query": args.data_query, 

622 "user": getpass.getuser(), 

623 "time": f"{datetime.datetime.now()}", 

624 } 

625 assert run is not None, "Butler output run collection must be defined" 

626 qgraph = graphBuilder.makeGraph( 

627 task_defs, 

628 collections, 

629 run, 

630 args.data_query, 

631 metadata=metadata, 

632 datasetQueryConstraint=args.dataset_query_constraint, 

633 dataId=pipeline.get_data_id(butler.dimensions), 

634 ) 

635 if args.show_qgraph_header: 

636 qgraph.buildAndPrintHeader() 

637 

638 # Count quanta in graph; give a warning if it's empty and return None. 

639 nQuanta = len(qgraph) 

640 if nQuanta == 0: 

641 return None 

642 else: 

643 if _LOG.isEnabledFor(logging.INFO): 

644 qg_task_table = self._generateTaskTable(qgraph) 

645 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

646 _LOG.info( 

647 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r\n%s", 

648 nQuanta, 

649 len(qgraph.taskGraph), 

650 qgraph.graphID, 

651 qg_task_table_formatted, 

652 ) 

653 

654 if args.save_qgraph: 

655 qgraph.saveUri(args.save_qgraph) 

656 

657 if args.save_single_quanta: 

658 for quantumNode in qgraph: 

659 sqgraph = qgraph.subset(quantumNode) 

660 uri = args.save_single_quanta.format(quantumNode) 

661 sqgraph.saveUri(uri) 

662 

663 if args.qgraph_dot: 

664 graph2dot(qgraph, args.qgraph_dot) 

665 

666 if args.execution_butler_location: 

667 butler = Butler(args.butler_config) 

668 newArgs = copy.deepcopy(args) 

669 

670 def builderShim(butler: Butler) -> Butler: 

671 newArgs.butler_config = butler._config 

672 # Calling makeWriteButler is done for the side effects of 

673 # calling that method, maining parsing all the args into 

674 # collection names, creating collections, etc. 

675 newButler = _ButlerFactory.makeWriteButler(newArgs) 

676 return newButler 

677 

678 # Include output collection in collections for input 

679 # files if it exists in the repo. 

680 all_inputs = args.input 

681 if args.output is not None: 

682 try: 

683 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

684 except MissingCollectionError: 

685 pass 

686 

687 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

688 buildExecutionButler( 

689 butler, 

690 qgraph, 

691 args.execution_butler_location, 

692 run, 

693 butlerModifier=builderShim, 

694 collections=all_inputs, 

695 clobber=args.clobber_execution_butler, 

696 datastoreRoot=args.target_datastore_root, 

697 transfer=args.transfer, 

698 ) 

699 

700 return qgraph 

701 

702 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

703 """Construct the execution resource class from arguments. 

704 

705 Parameters 

706 ---------- 

707 args : `types.SimpleNamespace` 

708 Parsed command line. 

709 

710 Returns 

711 ------- 

712 resources : `~lsst.pipe.base.ExecutionResources` 

713 The resources available to each quantum. 

714 """ 

715 return ExecutionResources( 

716 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

717 ) 

718 

719 def runPipeline( 

720 self, 

721 graph: QuantumGraph, 

722 taskFactory: TaskFactory, 

723 args: SimpleNamespace, 

724 butler: Butler | None = None, 

725 ) -> None: 

726 """Execute complete QuantumGraph. 

727 

728 Parameters 

729 ---------- 

730 graph : `~lsst.pipe.base.QuantumGraph` 

731 Execution graph. 

732 taskFactory : `~lsst.pipe.base.TaskFactory` 

733 Task factory 

734 args : `types.SimpleNamespace` 

735 Parsed command line 

736 butler : `~lsst.daf.butler.Butler`, optional 

737 Data Butler instance, if not defined then new instance is made 

738 using command line options. 

739 """ 

740 # Check that output run defined on command line is consistent with 

741 # quantum graph. 

742 if args.output_run and graph.metadata: 

743 graph_output_run = graph.metadata.get("output_run", args.output_run) 

744 if graph_output_run != args.output_run: 

745 raise ValueError( 

746 f"Output run defined on command line ({args.output_run}) has to be " 

747 f"identical to graph metadata ({graph_output_run}). " 

748 "To update graph metadata run `pipetask update-graph-run` command." 

749 ) 

750 

751 # Make sure that --extend-run always enables --skip-existing, 

752 # clobbering should be disabled if --extend-run is not specified. 

753 if args.extend_run: 

754 args.skip_existing = True 

755 else: 

756 args.clobber_outputs = False 

757 

758 if not args.enable_implicit_threading: 

759 disable_implicit_threading() 

760 

761 # Make butler instance. QuantumGraph should have an output run defined, 

762 # but we ignore it here and let command line decide actual output run. 

763 if butler is None: 

764 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

765 

766 if args.skip_existing: 

767 args.skip_existing_in += (butler.run,) 

768 

769 # Enable lsstDebug debugging. Note that this is done once in the 

770 # main process before PreExecInit and it is also repeated before 

771 # running each task in SingleQuantumExecutor (which may not be 

772 # needed if `multiprocessing` always uses fork start method). 

773 if args.enableLsstDebug: 

774 try: 

775 _LOG.debug("Will try to import debug.py") 

776 import debug # type: ignore # noqa:F401 

777 except ImportError: 

778 _LOG.warn("No 'debug' module found.") 

779 

780 # Save all InitOutputs, configs, etc. 

781 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

782 preExecInit.initialize( 

783 graph, 

784 saveInitOutputs=not args.skip_init_writes, 

785 registerDatasetTypes=args.register_dataset_types, 

786 saveVersions=not args.no_versions, 

787 ) 

788 

789 if not args.init_only: 

790 graphFixup = self._importGraphFixup(args) 

791 resources = self._make_execution_resources(args) 

792 quantumExecutor = SingleQuantumExecutor( 

793 butler, 

794 taskFactory, 

795 skipExistingIn=args.skip_existing_in, 

796 clobberOutputs=args.clobber_outputs, 

797 enableLsstDebug=args.enableLsstDebug, 

798 exitOnKnownError=args.fail_fast, 

799 resources=resources, 

800 ) 

801 

802 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

803 executor = MPGraphExecutor( 

804 numProc=args.processes, 

805 timeout=timeout, 

806 startMethod=args.start_method, 

807 quantumExecutor=quantumExecutor, 

808 failFast=args.fail_fast, 

809 pdb=args.pdb, 

810 executionGraphFixup=graphFixup, 

811 ) 

812 # Have to reset connection pool to avoid sharing connections with 

813 # forked processes. 

814 butler.registry.resetConnectionPool() 

815 try: 

816 with util.profile(args.profile, _LOG): 

817 executor.execute(graph) 

818 finally: 

819 if args.summary: 

820 report = executor.getReport() 

821 if report: 

822 with open(args.summary, "w") as out: 

823 # Do not save fields that are not set. 

824 out.write(report.json(exclude_none=True, indent=2)) 

825 

826 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

827 """Generate astropy table listing the number of quanta per task for a 

828 given quantum graph. 

829 

830 Parameters 

831 ---------- 

832 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

833 A QuantumGraph object. 

834 

835 Returns 

836 ------- 

837 qg_task_table : `astropy.table.table.Table` 

838 An astropy table containing columns: Quanta and Tasks. 

839 """ 

840 qg_quanta, qg_tasks = [], [] 

841 for task_def in qgraph.iterTaskGraph(): 

842 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

843 qg_quanta.append(num_qnodes) 

844 qg_tasks.append(task_def.label) 

845 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

846 return qg_task_table 

847 

848 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

849 """Import/instantiate graph fixup object. 

850 

851 Parameters 

852 ---------- 

853 args : `types.SimpleNamespace` 

854 Parsed command line. 

855 

856 Returns 

857 ------- 

858 fixup : `ExecutionGraphFixup` or `None` 

859 

860 Raises 

861 ------ 

862 ValueError 

863 Raised if import fails, method call raises exception, or returned 

864 instance has unexpected type. 

865 """ 

866 if args.graph_fixup: 

867 try: 

868 factory = doImportType(args.graph_fixup) 

869 except Exception as exc: 

870 raise ValueError("Failed to import graph fixup class/method") from exc 

871 try: 

872 fixup = factory() 

873 except Exception as exc: 

874 raise ValueError("Failed to make instance of graph fixup") from exc 

875 if not isinstance(fixup, ExecutionGraphFixup): 

876 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

877 return fixup 

878 return None 

879 

880 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

881 # Load quantum graph. We do not really need individual Quanta here, 

882 # but we need datastore records for initInputs, and those are only 

883 # available from Quanta, so load the whole thing. 

884 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

885 universe = qgraph.universe 

886 

887 # Collect all init input/output dataset IDs. 

888 predicted_inputs: set[DatasetId] = set() 

889 predicted_outputs: set[DatasetId] = set() 

890 for taskDef in qgraph.iterTaskGraph(): 

891 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

892 predicted_inputs.update(ref.id for ref in refs) 

893 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

894 predicted_outputs.update(ref.id for ref in refs) 

895 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

896 # remove intermediates from inputs 

897 predicted_inputs -= predicted_outputs 

898 

899 # Very inefficient way to extract datastore records from quantum graph, 

900 # we have to scan all quanta and look at their datastore records. 

901 datastore_records: dict[str, DatastoreRecordData] = {} 

902 for quantum_node in qgraph: 

903 for store_name, records in quantum_node.quantum.datastore_records.items(): 

904 subset = records.subset(predicted_inputs) 

905 if subset is not None: 

906 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

907 

908 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

909 

910 # Make butler from everything. 

911 butler = QuantumBackedButler.from_predicted( 

912 config=args.butler_config, 

913 predicted_inputs=predicted_inputs, 

914 predicted_outputs=predicted_outputs, 

915 dimensions=universe, 

916 datastore_records=datastore_records, 

917 search_paths=args.config_search_path, 

918 dataset_types=dataset_types, 

919 ) 

920 

921 # Save all InitOutputs, configs, etc. 

922 preExecInit = PreExecInitLimited(butler, task_factory) 

923 preExecInit.initialize(qgraph) 

924 

925 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

926 # Load quantum graph. 

927 nodes = args.qgraph_node_id or None 

928 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

929 

930 if qgraph.metadata is None: 

931 raise ValueError("QuantumGraph is missing metadata, cannot ") 

932 

933 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

934 

935 _butler_factory = _QBBFactory( 

936 butler_config=args.butler_config, 

937 dimensions=qgraph.universe, 

938 dataset_types=dataset_types, 

939 ) 

940 

941 # make special quantum executor 

942 resources = self._make_execution_resources(args) 

943 quantumExecutor = SingleQuantumExecutor( 

944 butler=None, 

945 taskFactory=task_factory, 

946 enableLsstDebug=args.enableLsstDebug, 

947 exitOnKnownError=args.fail_fast, 

948 limited_butler_factory=_butler_factory, 

949 resources=resources, 

950 ) 

951 

952 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

953 executor = MPGraphExecutor( 

954 numProc=args.processes, 

955 timeout=timeout, 

956 startMethod=args.start_method, 

957 quantumExecutor=quantumExecutor, 

958 failFast=args.fail_fast, 

959 pdb=args.pdb, 

960 ) 

961 try: 

962 with util.profile(args.profile, _LOG): 

963 executor.execute(qgraph) 

964 finally: 

965 if args.summary: 

966 report = executor.getReport() 

967 if report: 

968 with open(args.summary, "w") as out: 

969 # Do not save fields that are not set. 

970 out.write(report.json(exclude_none=True, indent=2))