Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

374 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-09 09:37 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import contextlib 

31import copy 

32import datetime 

33import getpass 

34import logging 

35import shutil 

36from collections.abc import Iterable, Mapping, Sequence 

37from types import SimpleNamespace 

38from typing import TYPE_CHECKING 

39 

40import astropy.units as u 

41from astropy.table import Table 

42from lsst.daf.butler import ( 

43 Butler, 

44 CollectionType, 

45 DatasetId, 

46 DatasetRef, 

47 DatastoreCacheManager, 

48 QuantumBackedButler, 

49) 

50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

51from lsst.daf.butler.registry.wildcards import CollectionWildcard 

52from lsst.pipe.base import ( 

53 ExecutionResources, 

54 GraphBuilder, 

55 Instrument, 

56 Pipeline, 

57 PipelineDatasetTypes, 

58 QuantumGraph, 

59 buildExecutionButler, 

60) 

61from lsst.utils import doImportType 

62from lsst.utils.threads import disable_implicit_threading 

63 

64from . import util 

65from .dotTools import graph2dot, pipeline2dot 

66from .executionGraphFixup import ExecutionGraphFixup 

67from .mpGraphExecutor import MPGraphExecutor 

68from .preExecInit import PreExecInit, PreExecInitLimited 

69from .singleQuantumExecutor import SingleQuantumExecutor 

70 

71if TYPE_CHECKING: 

72 from lsst.daf.butler import ( 

73 Config, 

74 DatasetType, 

75 DatastoreRecordData, 

76 DimensionUniverse, 

77 LimitedButler, 

78 Quantum, 

79 Registry, 

80 ) 

81 from lsst.pipe.base import TaskDef, TaskFactory 

82 

83 

84# ---------------------------------- 

85# Local non-exported definitions -- 

86# ---------------------------------- 

87 

88_LOG = logging.getLogger(__name__) 

89 

90 

91class _OutputChainedCollectionInfo: 

92 """A helper class for handling command-line arguments related to an output 

93 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

94 

95 Parameters 

96 ---------- 

97 registry : `lsst.daf.butler.Registry` 

98 Butler registry that collections will be added to and/or queried from. 

99 name : `str` 

100 Name of the collection given on the command line. 

101 """ 

102 

103 def __init__(self, registry: Registry, name: str): 

104 self.name = name 

105 try: 

106 self.chain = tuple(registry.getCollectionChain(name)) 

107 self.exists = True 

108 except MissingCollectionError: 

109 self.chain = () 

110 self.exists = False 

111 

112 def __str__(self) -> str: 

113 return self.name 

114 

115 name: str 

116 """Name of the collection provided on the command line (`str`). 

117 """ 

118 

119 exists: bool 

120 """Whether this collection already exists in the registry (`bool`). 

121 """ 

122 

123 chain: tuple[str, ...] 

124 """The definition of the collection, if it already exists (`tuple`[`str`]). 

125 

126 Empty if the collection does not already exist. 

127 """ 

128 

129 

130class _OutputRunCollectionInfo: 

131 """A helper class for handling command-line arguments related to an output 

132 `~lsst.daf.butler.CollectionType.RUN` collection. 

133 

134 Parameters 

135 ---------- 

136 registry : `lsst.daf.butler.Registry` 

137 Butler registry that collections will be added to and/or queried from. 

138 name : `str` 

139 Name of the collection given on the command line. 

140 """ 

141 

142 def __init__(self, registry: Registry, name: str): 

143 self.name = name 

144 try: 

145 actualType = registry.getCollectionType(name) 

146 if actualType is not CollectionType.RUN: 

147 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

148 self.exists = True 

149 except MissingCollectionError: 

150 self.exists = False 

151 

152 name: str 

153 """Name of the collection provided on the command line (`str`). 

154 """ 

155 

156 exists: bool 

157 """Whether this collection already exists in the registry (`bool`). 

158 """ 

159 

160 

161class _ButlerFactory: 

162 """A helper class for processing command-line arguments related to input 

163 and output collections. 

164 

165 Parameters 

166 ---------- 

167 registry : `lsst.daf.butler.Registry` 

168 Butler registry that collections will be added to and/or queried from. 

169 

170 args : `types.SimpleNamespace` 

171 Parsed command-line arguments. The following attributes are used, 

172 either at construction or in later methods. 

173 

174 ``output`` 

175 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

176 input/output collection. 

177 

178 ``output_run`` 

179 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

180 collection. 

181 

182 ``extend_run`` 

183 A boolean indicating whether ``output_run`` should already exist 

184 and be extended. 

185 

186 ``replace_run`` 

187 A boolean indicating that (if `True`) ``output_run`` should already 

188 exist but will be removed from the output chained collection and 

189 replaced with a new one. 

190 

191 ``prune_replaced`` 

192 A boolean indicating whether to prune the replaced run (requires 

193 ``replace_run``). 

194 

195 ``inputs`` 

196 Input collections of any type; see 

197 :ref:`daf_butler_ordered_collection_searches` for details. 

198 

199 ``butler_config`` 

200 Path to a data repository root or configuration file. 

201 

202 writeable : `bool` 

203 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

204 context where actual writes should happens, and hence no output run 

205 is necessary. 

206 

207 Raises 

208 ------ 

209 ValueError 

210 Raised if ``writeable is True`` but there are no output collections. 

211 """ 

212 

213 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

214 if args.output is not None: 

215 self.output = _OutputChainedCollectionInfo(registry, args.output) 

216 else: 

217 self.output = None 

218 if args.output_run is not None: 

219 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

220 elif self.output is not None: 

221 if args.extend_run: 

222 if not self.output.chain: 

223 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

224 runName = self.output.chain[0] 

225 else: 

226 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

227 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

228 elif not writeable: 

229 # If we're not writing yet, ok to have no output run. 

230 self.outputRun = None 

231 else: 

232 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

233 # Recursively flatten any input CHAINED collections. We do this up 

234 # front so we can tell if the user passes the same inputs on subsequent 

235 # calls, even though we also flatten when we define the output CHAINED 

236 # collection. 

237 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

238 

239 def check(self, args: SimpleNamespace) -> None: 

240 """Check command-line options for consistency with each other and the 

241 data repository. 

242 

243 Parameters 

244 ---------- 

245 args : `types.SimpleNamespace` 

246 Parsed command-line arguments. See class documentation for the 

247 construction parameter of the same name. 

248 """ 

249 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

250 if self.inputs and self.output is not None and self.output.exists: 

251 # Passing the same inputs that were used to initialize the output 

252 # collection is allowed; this means they must _end_ with the same 

253 # collections, because we push new runs to the front of the chain. 

254 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1], strict=False): 

255 if c1 != c2: 

256 raise ValueError( 

257 f"Output CHAINED collection {self.output.name!r} exists, but it ends with " 

258 "a different sequence of input collections than those given: " 

259 f"{c1!r} != {c2!r} in inputs={self.inputs} vs " 

260 f"{self.output.name}={self.output.chain}." 

261 ) 

262 if len(self.inputs) > len(self.output.chain): 

263 nNew = len(self.inputs) - len(self.output.chain) 

264 raise ValueError( 

265 f"Cannot add new input collections {self.inputs[:nNew]} after " 

266 "output collection is first created." 

267 ) 

268 if args.extend_run: 

269 if self.outputRun is None: 

270 raise ValueError("Cannot --extend-run when no output collection is given.") 

271 elif not self.outputRun.exists: 

272 raise ValueError( 

273 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

274 ) 

275 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

276 raise ValueError( 

277 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

278 ) 

279 if args.prune_replaced and not args.replace_run: 

280 raise ValueError("--prune-replaced requires --replace-run.") 

281 if args.replace_run and (self.output is None or not self.output.exists): 

282 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

283 

284 @classmethod 

285 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

286 """Parse arguments to support implementations of `makeReadButler` and 

287 `makeButlerAndCollections`. 

288 

289 Parameters 

290 ---------- 

291 args : `types.SimpleNamespace` 

292 Parsed command-line arguments. See class documentation for the 

293 construction parameter of the same name. 

294 

295 Returns 

296 ------- 

297 butler : `lsst.daf.butler.Butler` 

298 A read-only butler constructed from the repo at 

299 ``args.butler_config``, but with no default collections. 

300 inputs : `~collections.abc.Sequence` [ `str` ] 

301 A collection search path constructed according to ``args``. 

302 self : `_ButlerFactory` 

303 A new `_ButlerFactory` instance representing the processed version 

304 of ``args``. 

305 """ 

306 butler = Butler(args.butler_config, writeable=False) 

307 self = cls(butler.registry, args, writeable=False) 

308 self.check(args) 

309 if self.output and self.output.exists: 

310 if args.replace_run: 

311 replaced = self.output.chain[0] 

312 inputs = list(self.output.chain[1:]) 

313 _LOG.debug( 

314 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

315 ) 

316 else: 

317 inputs = [self.output.name] 

318 else: 

319 inputs = list(self.inputs) 

320 if args.extend_run: 

321 assert self.outputRun is not None, "Output collection has to be specified." 

322 inputs.insert(0, self.outputRun.name) 

323 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

324 return butler, collSearch, self 

325 

326 @classmethod 

327 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

328 """Construct a read-only butler according to the given command-line 

329 arguments. 

330 

331 Parameters 

332 ---------- 

333 args : `types.SimpleNamespace` 

334 Parsed command-line arguments. See class documentation for the 

335 construction parameter of the same name. 

336 

337 Returns 

338 ------- 

339 butler : `lsst.daf.butler.Butler` 

340 A read-only butler initialized with the collections specified by 

341 ``args``. 

342 """ 

343 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

344 butler, inputs, _ = cls._makeReadParts(args) 

345 _LOG.debug("Preparing butler to read from %s.", inputs) 

346 return Butler(butler=butler, collections=inputs) 

347 

348 @classmethod 

349 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

350 """Return a read-only registry, a collection search path, and the name 

351 of the run to be used for future writes. 

352 

353 Parameters 

354 ---------- 

355 args : `types.SimpleNamespace` 

356 Parsed command-line arguments. See class documentation for the 

357 construction parameter of the same name. 

358 

359 Returns 

360 ------- 

361 butler : `lsst.daf.butler.Butler` 

362 A read-only butler that collections will be added to and/or queried 

363 from. 

364 inputs : `Sequence` [ `str` ] 

365 Collections to search for datasets. 

366 run : `str` or `None` 

367 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

368 if it already exists, or `None` if it does not. 

369 """ 

370 butler, inputs, self = cls._makeReadParts(args) 

371 run: str | None = None 

372 if args.extend_run: 

373 assert self.outputRun is not None, "Output collection has to be specified." 

374 if self.outputRun is not None: 

375 run = self.outputRun.name 

376 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

377 return butler, inputs, run 

378 

379 @staticmethod 

380 def defineDatastoreCache() -> None: 

381 """Define where datastore cache directories should be found. 

382 

383 Notes 

384 ----- 

385 All the jobs should share a datastore cache if applicable. This 

386 method asks for a shared fallback cache to be defined and then 

387 configures an exit handler to clean it up. 

388 """ 

389 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

390 if defined: 

391 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

392 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

393 

394 @classmethod 

395 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

396 """Return a read-write butler initialized to write to and read from 

397 the collections specified by the given command-line arguments. 

398 

399 Parameters 

400 ---------- 

401 args : `types.SimpleNamespace` 

402 Parsed command-line arguments. See class documentation for the 

403 construction parameter of the same name. 

404 taskDefs : iterable of `TaskDef`, optional 

405 Definitions for tasks in a pipeline. This argument is only needed 

406 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

407 "unstore". 

408 

409 Returns 

410 ------- 

411 butler : `lsst.daf.butler.Butler` 

412 A read-write butler initialized according to the given arguments. 

413 """ 

414 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

415 butler = Butler(args.butler_config, writeable=True) 

416 self = cls(butler.registry, args, writeable=True) 

417 self.check(args) 

418 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

419 if self.output is not None: 

420 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

421 if args.replace_run: 

422 replaced = chainDefinition.pop(0) 

423 if args.prune_replaced == "unstore": 

424 # Remove datasets from datastore 

425 with butler.transaction(): 

426 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

427 # we want to remove regular outputs but keep 

428 # initOutputs, configs, and versions. 

429 if taskDefs is not None: 

430 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

431 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

432 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

433 elif args.prune_replaced == "purge": 

434 # Erase entire collection and all datasets, need to remove 

435 # collection from its chain collection first. 

436 with butler.transaction(): 

437 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

438 butler.removeRuns([replaced], unstore=True) 

439 elif args.prune_replaced is not None: 

440 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

441 if not self.output.exists: 

442 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

443 if not args.extend_run: 

444 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

445 chainDefinition.insert(0, self.outputRun.name) 

446 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

447 _LOG.debug( 

448 "Preparing butler to write to '%s' and read from '%s'=%s", 

449 self.outputRun.name, 

450 self.output.name, 

451 chainDefinition, 

452 ) 

453 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

454 else: 

455 inputs = (self.outputRun.name,) + self.inputs 

456 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

457 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

458 return butler 

459 

460 output: _OutputChainedCollectionInfo | None 

461 """Information about the output chained collection, if there is or will be 

462 one (`_OutputChainedCollectionInfo` or `None`). 

463 """ 

464 

465 outputRun: _OutputRunCollectionInfo | None 

466 """Information about the output run collection, if there is or will be 

467 one (`_OutputRunCollectionInfo` or `None`). 

468 """ 

469 

470 inputs: tuple[str, ...] 

471 """Input collections provided directly by the user (`tuple` [ `str` ]). 

472 """ 

473 

474 

475class _QBBFactory: 

476 """Class which is a callable for making QBB instances.""" 

477 

478 def __init__( 

479 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

480 ): 

481 self.butler_config = butler_config 

482 self.dimensions = dimensions 

483 self.dataset_types = dataset_types 

484 

485 def __call__(self, quantum: Quantum) -> LimitedButler: 

486 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

487 

488 Factory method to create QuantumBackedButler instances. 

489 """ 

490 return QuantumBackedButler.initialize( 

491 config=self.butler_config, 

492 quantum=quantum, 

493 dimensions=self.dimensions, 

494 dataset_types=self.dataset_types, 

495 ) 

496 

497 

498# ------------------------ 

499# Exported definitions -- 

500# ------------------------ 

501 

502 

503class CmdLineFwk: 

504 """PipelineTask framework which executes tasks from command line. 

505 

506 In addition to executing tasks this activator provides additional methods 

507 for task management like dumping configuration or execution chain. 

508 """ 

509 

510 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

511 

512 def __init__(self) -> None: 

513 pass 

514 

515 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

516 """Build a pipeline from command line arguments. 

517 

518 Parameters 

519 ---------- 

520 args : `types.SimpleNamespace` 

521 Parsed command line 

522 

523 Returns 

524 ------- 

525 pipeline : `~lsst.pipe.base.Pipeline` 

526 """ 

527 if args.pipeline: 

528 pipeline = Pipeline.from_uri(args.pipeline) 

529 else: 

530 pipeline = Pipeline("anonymous") 

531 

532 # loop over all pipeline actions and apply them in order 

533 for action in args.pipeline_actions: 

534 if action.action == "add_instrument": 

535 pipeline.addInstrument(action.value) 

536 

537 elif action.action == "new_task": 

538 pipeline.addTask(action.value, action.label) 

539 

540 elif action.action == "delete_task": 

541 pipeline.removeTask(action.label) 

542 

543 elif action.action == "config": 

544 # action value string is "field=value", split it at '=' 

545 field, _, value = action.value.partition("=") 

546 pipeline.addConfigOverride(action.label, field, value) 

547 

548 elif action.action == "configfile": 

549 pipeline.addConfigFile(action.label, action.value) 

550 

551 else: 

552 raise ValueError(f"Unexpected pipeline action: {action.action}") 

553 

554 if args.save_pipeline: 

555 pipeline.write_to_uri(args.save_pipeline) 

556 

557 if args.pipeline_dot: 

558 pipeline2dot(pipeline, args.pipeline_dot) 

559 

560 return pipeline 

561 

562 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

563 """Build a graph from command line arguments. 

564 

565 Parameters 

566 ---------- 

567 pipeline : `~lsst.pipe.base.Pipeline` 

568 Pipeline, can be empty or ``None`` if graph is read from a file. 

569 args : `types.SimpleNamespace` 

570 Parsed command line 

571 

572 Returns 

573 ------- 

574 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

575 If resulting graph is empty then `None` is returned. 

576 """ 

577 # make sure that --extend-run always enables --skip-existing 

578 if args.extend_run: 

579 args.skip_existing = True 

580 

581 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

582 

583 if args.skip_existing and run: 

584 args.skip_existing_in += (run,) 

585 

586 if args.qgraph: 

587 # click passes empty tuple as default value for qgraph_node_id 

588 nodes = args.qgraph_node_id or None 

589 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

590 

591 # pipeline can not be provided in this case 

592 if pipeline: 

593 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

594 if args.show_qgraph_header: 

595 print(QuantumGraph.readHeader(args.qgraph)) 

596 else: 

597 task_defs = list(pipeline.toExpandedPipeline()) 

598 if args.mock: 

599 from lsst.pipe.base.tests.mocks import mock_task_defs 

600 

601 task_defs = mock_task_defs( 

602 task_defs, 

603 unmocked_dataset_types=args.unmocked_dataset_types, 

604 force_failures=args.mock_failure, 

605 ) 

606 # make execution plan (a.k.a. DAG) for pipeline 

607 graphBuilder = GraphBuilder( 

608 butler.registry, 

609 skipExistingIn=args.skip_existing_in, 

610 clobberOutputs=args.clobber_outputs, 

611 datastore=butler._datastore if args.qgraph_datastore_records else None, 

612 ) 

613 # accumulate metadata 

614 metadata = { 

615 "input": args.input, 

616 "output": args.output, 

617 "butler_argument": args.butler_config, 

618 "output_run": run, 

619 "extend_run": args.extend_run, 

620 "skip_existing_in": args.skip_existing_in, 

621 "skip_existing": args.skip_existing, 

622 "data_query": args.data_query, 

623 "user": getpass.getuser(), 

624 "time": f"{datetime.datetime.now()}", 

625 } 

626 assert run is not None, "Butler output run collection must be defined" 

627 qgraph = graphBuilder.makeGraph( 

628 task_defs, 

629 collections, 

630 run, 

631 args.data_query, 

632 metadata=metadata, 

633 datasetQueryConstraint=args.dataset_query_constraint, 

634 dataId=pipeline.get_data_id(butler.dimensions), 

635 ) 

636 if args.show_qgraph_header: 

637 qgraph.buildAndPrintHeader() 

638 

639 if len(qgraph) == 0: 

640 # Nothing to do. 

641 return None 

642 self._summarize_qgraph(qgraph) 

643 

644 if args.save_qgraph: 

645 qgraph.saveUri(args.save_qgraph) 

646 

647 if args.save_single_quanta: 

648 for quantumNode in qgraph: 

649 sqgraph = qgraph.subset(quantumNode) 

650 uri = args.save_single_quanta.format(quantumNode) 

651 sqgraph.saveUri(uri) 

652 

653 if args.qgraph_dot: 

654 graph2dot(qgraph, args.qgraph_dot) 

655 

656 if args.execution_butler_location: 

657 butler = Butler(args.butler_config) 

658 newArgs = copy.deepcopy(args) 

659 

660 def builderShim(butler: Butler) -> Butler: 

661 newArgs.butler_config = butler._config 

662 # Calling makeWriteButler is done for the side effects of 

663 # calling that method, maining parsing all the args into 

664 # collection names, creating collections, etc. 

665 newButler = _ButlerFactory.makeWriteButler(newArgs) 

666 return newButler 

667 

668 # Include output collection in collections for input 

669 # files if it exists in the repo. 

670 all_inputs = args.input 

671 if args.output is not None: 

672 with contextlib.suppress(MissingCollectionError): 

673 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

674 

675 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

676 buildExecutionButler( 

677 butler, 

678 qgraph, 

679 args.execution_butler_location, 

680 run, 

681 butlerModifier=builderShim, 

682 collections=all_inputs, 

683 clobber=args.clobber_execution_butler, 

684 datastoreRoot=args.target_datastore_root, 

685 transfer=args.transfer, 

686 ) 

687 

688 return qgraph 

689 

690 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

691 """Construct the execution resource class from arguments. 

692 

693 Parameters 

694 ---------- 

695 args : `types.SimpleNamespace` 

696 Parsed command line. 

697 

698 Returns 

699 ------- 

700 resources : `~lsst.pipe.base.ExecutionResources` 

701 The resources available to each quantum. 

702 """ 

703 return ExecutionResources( 

704 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

705 ) 

706 

707 def runPipeline( 

708 self, 

709 graph: QuantumGraph, 

710 taskFactory: TaskFactory, 

711 args: SimpleNamespace, 

712 butler: Butler | None = None, 

713 ) -> None: 

714 """Execute complete QuantumGraph. 

715 

716 Parameters 

717 ---------- 

718 graph : `~lsst.pipe.base.QuantumGraph` 

719 Execution graph. 

720 taskFactory : `~lsst.pipe.base.TaskFactory` 

721 Task factory 

722 args : `types.SimpleNamespace` 

723 Parsed command line 

724 butler : `~lsst.daf.butler.Butler`, optional 

725 Data Butler instance, if not defined then new instance is made 

726 using command line options. 

727 """ 

728 # Check that output run defined on command line is consistent with 

729 # quantum graph. 

730 if args.output_run and graph.metadata: 

731 graph_output_run = graph.metadata.get("output_run", args.output_run) 

732 if graph_output_run != args.output_run: 

733 raise ValueError( 

734 f"Output run defined on command line ({args.output_run}) has to be " 

735 f"identical to graph metadata ({graph_output_run}). " 

736 "To update graph metadata run `pipetask update-graph-run` command." 

737 ) 

738 

739 # Make sure that --extend-run always enables --skip-existing, 

740 # clobbering should be disabled if --extend-run is not specified. 

741 if args.extend_run: 

742 args.skip_existing = True 

743 else: 

744 args.clobber_outputs = False 

745 

746 if not args.enable_implicit_threading: 

747 disable_implicit_threading() 

748 

749 # Make butler instance. QuantumGraph should have an output run defined, 

750 # but we ignore it here and let command line decide actual output run. 

751 if butler is None: 

752 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

753 

754 if args.skip_existing: 

755 args.skip_existing_in += (butler.run,) 

756 

757 # Enable lsstDebug debugging. Note that this is done once in the 

758 # main process before PreExecInit and it is also repeated before 

759 # running each task in SingleQuantumExecutor (which may not be 

760 # needed if `multiprocessing` always uses fork start method). 

761 if args.enableLsstDebug: 

762 try: 

763 _LOG.debug("Will try to import debug.py") 

764 import debug # type: ignore # noqa:F401 

765 except ImportError: 

766 _LOG.warn("No 'debug' module found.") 

767 

768 # Save all InitOutputs, configs, etc. 

769 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

770 preExecInit.initialize( 

771 graph, 

772 saveInitOutputs=not args.skip_init_writes, 

773 registerDatasetTypes=args.register_dataset_types, 

774 saveVersions=not args.no_versions, 

775 ) 

776 

777 if not args.init_only: 

778 graphFixup = self._importGraphFixup(args) 

779 resources = self._make_execution_resources(args) 

780 quantumExecutor = SingleQuantumExecutor( 

781 butler, 

782 taskFactory, 

783 skipExistingIn=args.skip_existing_in, 

784 clobberOutputs=args.clobber_outputs, 

785 enableLsstDebug=args.enableLsstDebug, 

786 exitOnKnownError=args.fail_fast, 

787 resources=resources, 

788 ) 

789 

790 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

791 executor = MPGraphExecutor( 

792 numProc=args.processes, 

793 timeout=timeout, 

794 startMethod=args.start_method, 

795 quantumExecutor=quantumExecutor, 

796 failFast=args.fail_fast, 

797 pdb=args.pdb, 

798 executionGraphFixup=graphFixup, 

799 ) 

800 # Have to reset connection pool to avoid sharing connections with 

801 # forked processes. 

802 butler.registry.resetConnectionPool() 

803 try: 

804 with util.profile(args.profile, _LOG): 

805 executor.execute(graph) 

806 finally: 

807 if args.summary: 

808 report = executor.getReport() 

809 if report: 

810 with open(args.summary, "w") as out: 

811 # Do not save fields that are not set. 

812 out.write(report.json(exclude_none=True, indent=2)) 

813 

814 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

815 """Generate astropy table listing the number of quanta per task for a 

816 given quantum graph. 

817 

818 Parameters 

819 ---------- 

820 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

821 A QuantumGraph object. 

822 

823 Returns 

824 ------- 

825 qg_task_table : `astropy.table.table.Table` 

826 An astropy table containing columns: Quanta and Tasks. 

827 """ 

828 qg_quanta, qg_tasks = [], [] 

829 for task_def in qgraph.iterTaskGraph(): 

830 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

831 qg_quanta.append(num_qnodes) 

832 qg_tasks.append(task_def.label) 

833 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

834 return qg_task_table 

835 

836 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int: 

837 """Report a summary of the quanta in the graph. 

838 

839 Parameters 

840 ---------- 

841 qgraph : `lsst.pipe.base.QuantumGraph` 

842 The graph to be summarized. 

843 

844 Returns 

845 ------- 

846 n_quanta : `int` 

847 The number of quanta in the graph. 

848 """ 

849 n_quanta = len(qgraph) 

850 if n_quanta == 0: 

851 _LOG.info("QuantumGraph contains no quanta.") 

852 else: 

853 if _LOG.isEnabledFor(logging.INFO): 

854 qg_task_table = self._generateTaskTable(qgraph) 

855 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

856 quanta_str = "quantum" if n_quanta == 1 else "quanta" 

857 n_tasks = len(qgraph.taskGraph) 

858 n_tasks_plural = "" if n_tasks == 1 else "s" 

859 _LOG.info( 

860 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s", 

861 n_quanta, 

862 quanta_str, 

863 n_tasks, 

864 n_tasks_plural, 

865 qgraph.graphID, 

866 qg_task_table_formatted, 

867 ) 

868 return n_quanta 

869 

870 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

871 """Import/instantiate graph fixup object. 

872 

873 Parameters 

874 ---------- 

875 args : `types.SimpleNamespace` 

876 Parsed command line. 

877 

878 Returns 

879 ------- 

880 fixup : `ExecutionGraphFixup` or `None` 

881 

882 Raises 

883 ------ 

884 ValueError 

885 Raised if import fails, method call raises exception, or returned 

886 instance has unexpected type. 

887 """ 

888 if args.graph_fixup: 

889 try: 

890 factory = doImportType(args.graph_fixup) 

891 except Exception as exc: 

892 raise ValueError("Failed to import graph fixup class/method") from exc 

893 try: 

894 fixup = factory() 

895 except Exception as exc: 

896 raise ValueError("Failed to make instance of graph fixup") from exc 

897 if not isinstance(fixup, ExecutionGraphFixup): 

898 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

899 return fixup 

900 return None 

901 

902 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

903 # Load quantum graph. We do not really need individual Quanta here, 

904 # but we need datastore records for initInputs, and those are only 

905 # available from Quanta, so load the whole thing. 

906 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

907 universe = qgraph.universe 

908 

909 # Collect all init input/output dataset IDs. 

910 predicted_inputs: set[DatasetId] = set() 

911 predicted_outputs: set[DatasetId] = set() 

912 for taskDef in qgraph.iterTaskGraph(): 

913 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

914 predicted_inputs.update(ref.id for ref in refs) 

915 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

916 predicted_outputs.update(ref.id for ref in refs) 

917 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

918 # remove intermediates from inputs 

919 predicted_inputs -= predicted_outputs 

920 

921 # Very inefficient way to extract datastore records from quantum graph, 

922 # we have to scan all quanta and look at their datastore records. 

923 datastore_records: dict[str, DatastoreRecordData] = {} 

924 for quantum_node in qgraph: 

925 for store_name, records in quantum_node.quantum.datastore_records.items(): 

926 subset = records.subset(predicted_inputs) 

927 if subset is not None: 

928 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

929 

930 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

931 

932 # Make butler from everything. 

933 butler = QuantumBackedButler.from_predicted( 

934 config=args.butler_config, 

935 predicted_inputs=predicted_inputs, 

936 predicted_outputs=predicted_outputs, 

937 dimensions=universe, 

938 datastore_records=datastore_records, 

939 search_paths=args.config_search_path, 

940 dataset_types=dataset_types, 

941 ) 

942 

943 # Save all InitOutputs, configs, etc. 

944 preExecInit = PreExecInitLimited(butler, task_factory) 

945 preExecInit.initialize(qgraph) 

946 

947 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

948 # Load quantum graph. 

949 nodes = args.qgraph_node_id or None 

950 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

951 

952 if qgraph.metadata is None: 

953 raise ValueError("QuantumGraph is missing metadata, cannot continue.") 

954 

955 self._summarize_qgraph(qgraph) 

956 

957 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

958 

959 _butler_factory = _QBBFactory( 

960 butler_config=args.butler_config, 

961 dimensions=qgraph.universe, 

962 dataset_types=dataset_types, 

963 ) 

964 

965 # make special quantum executor 

966 resources = self._make_execution_resources(args) 

967 quantumExecutor = SingleQuantumExecutor( 

968 butler=None, 

969 taskFactory=task_factory, 

970 enableLsstDebug=args.enableLsstDebug, 

971 exitOnKnownError=args.fail_fast, 

972 limited_butler_factory=_butler_factory, 

973 resources=resources, 

974 ) 

975 

976 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

977 executor = MPGraphExecutor( 

978 numProc=args.processes, 

979 timeout=timeout, 

980 startMethod=args.start_method, 

981 quantumExecutor=quantumExecutor, 

982 failFast=args.fail_fast, 

983 pdb=args.pdb, 

984 ) 

985 try: 

986 with util.profile(args.profile, _LOG): 

987 executor.execute(qgraph) 

988 finally: 

989 if args.summary: 

990 report = executor.getReport() 

991 if report: 

992 with open(args.summary, "w") as out: 

993 # Do not save fields that are not set. 

994 out.write(report.json(exclude_none=True, indent=2))