Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

393 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-01 03:36 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining CmdLineFwk class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["CmdLineFwk"] 

34 

35import atexit 

36import contextlib 

37import copy 

38import logging 

39import shutil 

40from collections.abc import Mapping, Sequence 

41from types import SimpleNamespace 

42 

43import astropy.units as u 

44import lsst.utils.timer 

45from astropy.table import Table 

46from lsst.daf.butler import ( 

47 Butler, 

48 CollectionType, 

49 Config, 

50 DatasetId, 

51 DatasetType, 

52 DimensionUniverse, 

53 LimitedButler, 

54 Quantum, 

55 QuantumBackedButler, 

56 Registry, 

57) 

58from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager 

59from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

60from lsst.daf.butler.direct_butler import DirectButler 

61from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

62from lsst.daf.butler.registry.wildcards import CollectionWildcard 

63from lsst.pipe.base import ( 

64 ExecutionResources, 

65 Instrument, 

66 Pipeline, 

67 PipelineGraph, 

68 QuantumGraph, 

69 TaskFactory, 

70 buildExecutionButler, 

71) 

72from lsst.pipe.base.all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

73from lsst.pipe.base.pipeline_graph import NodeType 

74from lsst.utils import doImportType 

75from lsst.utils.logging import getLogger 

76from lsst.utils.threads import disable_implicit_threading 

77 

78from .dotTools import graph2dot, pipeline2dot 

79from .executionGraphFixup import ExecutionGraphFixup 

80from .mpGraphExecutor import MPGraphExecutor 

81from .preExecInit import PreExecInit, PreExecInitLimited 

82from .reports import Report 

83from .singleQuantumExecutor import SingleQuantumExecutor 

84 

85# ---------------------------------- 

86# Local non-exported definitions -- 

87# ---------------------------------- 

88 

89_LOG = getLogger(__name__) 

90 

91 

92class _OutputChainedCollectionInfo: 

93 """A helper class for handling command-line arguments related to an output 

94 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

95 

96 Parameters 

97 ---------- 

98 registry : `lsst.daf.butler.Registry` 

99 Butler registry that collections will be added to and/or queried from. 

100 name : `str` 

101 Name of the collection given on the command line. 

102 """ 

103 

104 def __init__(self, registry: Registry, name: str): 

105 self.name = name 

106 try: 

107 self.chain = tuple(registry.getCollectionChain(name)) 

108 self.exists = True 

109 except MissingCollectionError: 

110 self.chain = () 

111 self.exists = False 

112 

113 def __str__(self) -> str: 

114 return self.name 

115 

116 name: str 

117 """Name of the collection provided on the command line (`str`). 

118 """ 

119 

120 exists: bool 

121 """Whether this collection already exists in the registry (`bool`). 

122 """ 

123 

124 chain: tuple[str, ...] 

125 """The definition of the collection, if it already exists (`tuple`[`str`]). 

126 

127 Empty if the collection does not already exist. 

128 """ 

129 

130 

131class _OutputRunCollectionInfo: 

132 """A helper class for handling command-line arguments related to an output 

133 `~lsst.daf.butler.CollectionType.RUN` collection. 

134 

135 Parameters 

136 ---------- 

137 registry : `lsst.daf.butler.Registry` 

138 Butler registry that collections will be added to and/or queried from. 

139 name : `str` 

140 Name of the collection given on the command line. 

141 """ 

142 

143 def __init__(self, registry: Registry, name: str): 

144 self.name = name 

145 try: 

146 actualType = registry.getCollectionType(name) 

147 if actualType is not CollectionType.RUN: 

148 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

149 self.exists = True 

150 except MissingCollectionError: 

151 self.exists = False 

152 

153 name: str 

154 """Name of the collection provided on the command line (`str`). 

155 """ 

156 

157 exists: bool 

158 """Whether this collection already exists in the registry (`bool`). 

159 """ 

160 

161 

162class _ButlerFactory: 

163 """A helper class for processing command-line arguments related to input 

164 and output collections. 

165 

166 Parameters 

167 ---------- 

168 registry : `lsst.daf.butler.Registry` 

169 Butler registry that collections will be added to and/or queried from. 

170 

171 args : `types.SimpleNamespace` 

172 Parsed command-line arguments. The following attributes are used, 

173 either at construction or in later methods. 

174 

175 ``output`` 

176 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

177 input/output collection. 

178 

179 ``output_run`` 

180 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

181 collection. 

182 

183 ``extend_run`` 

184 A boolean indicating whether ``output_run`` should already exist 

185 and be extended. 

186 

187 ``replace_run`` 

188 A boolean indicating that (if `True`) ``output_run`` should already 

189 exist but will be removed from the output chained collection and 

190 replaced with a new one. 

191 

192 ``prune_replaced`` 

193 A boolean indicating whether to prune the replaced run (requires 

194 ``replace_run``). 

195 

196 ``rebase`` 

197 A boolean indicating whether to force the ``output`` collection 

198 to be consistent with ``inputs`` and ``output`` run such that the 

199 ``output`` collection has output run collections first (i.e. those 

200 that start with the same prefix), then the new inputs, then any 

201 original inputs not included in the new inputs. 

202 

203 ``inputs`` 

204 Input collections of any type; see 

205 :ref:`daf_butler_ordered_collection_searches` for details. 

206 

207 ``butler_config`` 

208 Path to a data repository root or configuration file. 

209 

210 writeable : `bool` 

211 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

212 context where actual writes should happens, and hence no output run 

213 is necessary. 

214 

215 Raises 

216 ------ 

217 ValueError 

218 Raised if ``writeable is True`` but there are no output collections. 

219 """ 

220 

221 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

222 if args.output is not None: 

223 self.output = _OutputChainedCollectionInfo(registry, args.output) 

224 else: 

225 self.output = None 

226 if args.output_run is not None: 

227 if args.rebase and self.output and not args.output_run.startswith(self.output.name): 

228 raise ValueError("Cannot rebase if output run does not start with output collection name.") 

229 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

230 elif self.output is not None: 

231 if args.extend_run: 

232 if not self.output.chain: 

233 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

234 runName = self.output.chain[0] 

235 else: 

236 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

237 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

238 elif not writeable: 

239 # If we're not writing yet, ok to have no output run. 

240 self.outputRun = None 

241 else: 

242 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

243 # Recursively flatten any input CHAINED collections. We do this up 

244 # front so we can tell if the user passes the same inputs on subsequent 

245 # calls, even though we also flatten when we define the output CHAINED 

246 # collection. 

247 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

248 

249 # If things are inconsistent and user has asked for a rebase then 

250 # construct the new output chain. 

251 if args.rebase and self._checkOutputInputConsistency(): 

252 assert self.output is not None 

253 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)] 

254 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain]) 

255 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain]) 

256 self.output.chain = tuple(newOutputChain) 

257 

258 def check(self, args: SimpleNamespace) -> None: 

259 """Check command-line options for consistency with each other and the 

260 data repository. 

261 

262 Parameters 

263 ---------- 

264 args : `types.SimpleNamespace` 

265 Parsed command-line arguments. See class documentation for the 

266 construction parameter of the same name. 

267 """ 

268 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

269 if consistencyError := self._checkOutputInputConsistency(): 

270 raise ValueError(consistencyError) 

271 

272 if args.extend_run: 

273 if self.outputRun is None: 

274 raise ValueError("Cannot --extend-run when no output collection is given.") 

275 elif not self.outputRun.exists: 

276 raise ValueError( 

277 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

278 ) 

279 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

280 raise ValueError( 

281 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

282 ) 

283 if args.prune_replaced and not args.replace_run: 

284 raise ValueError("--prune-replaced requires --replace-run.") 

285 if args.replace_run and (self.output is None or not self.output.exists): 

286 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

287 

288 def _checkOutputInputConsistency(self) -> str | None: 

289 if self.inputs and self.output is not None and self.output.exists: 

290 # Passing the same inputs that were used to initialize the output 

291 # collection is allowed; this means the inputs must appear as a 

292 # contiguous subsequence of outputs (normally they're also at the 

293 # end, but --rebase will in general put them in the middle). 

294 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))): 

295 if self.inputs == self.output.chain[n : n + len(self.inputs)]: 

296 return None 

297 return ( 

298 f"Output CHAINED collection {self.output.name!r} exists and does not include the " 

299 f"same sequence of (flattened) input collections {self.inputs} as a contiguous " 

300 "subsequence. " 

301 "Use --rebase to ignore this problem and reset the output collection, but note that " 

302 "this may obfuscate what inputs were actually used to produce these outputs." 

303 ) 

304 return None 

305 

306 @classmethod 

307 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

308 """Parse arguments to support implementations of `makeReadButler` and 

309 `makeButlerAndCollections`. 

310 

311 Parameters 

312 ---------- 

313 args : `types.SimpleNamespace` 

314 Parsed command-line arguments. See class documentation for the 

315 construction parameter of the same name. 

316 

317 Returns 

318 ------- 

319 butler : `lsst.daf.butler.Butler` 

320 A read-only butler constructed from the repo at 

321 ``args.butler_config``, but with no default collections. 

322 inputs : `~collections.abc.Sequence` [ `str` ] 

323 A collection search path constructed according to ``args``. 

324 self : `_ButlerFactory` 

325 A new `_ButlerFactory` instance representing the processed version 

326 of ``args``. 

327 """ 

328 butler = Butler.from_config(args.butler_config, writeable=False) 

329 self = cls(butler.registry, args, writeable=False) 

330 self.check(args) 

331 if self.output and self.output.exists: 

332 if args.replace_run: 

333 replaced = self.output.chain[0] 

334 inputs = list(self.output.chain[1:]) 

335 _LOG.debug( 

336 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

337 ) 

338 else: 

339 inputs = [self.output.name] 

340 else: 

341 inputs = list(self.inputs) 

342 if args.extend_run: 

343 assert self.outputRun is not None, "Output collection has to be specified." 

344 inputs.insert(0, self.outputRun.name) 

345 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

346 return butler, collSearch, self 

347 

348 @classmethod 

349 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

350 """Construct a read-only butler according to the given command-line 

351 arguments. 

352 

353 Parameters 

354 ---------- 

355 args : `types.SimpleNamespace` 

356 Parsed command-line arguments. See class documentation for the 

357 construction parameter of the same name. 

358 

359 Returns 

360 ------- 

361 butler : `lsst.daf.butler.Butler` 

362 A read-only butler initialized with the collections specified by 

363 ``args``. 

364 """ 

365 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

366 butler, inputs, _ = cls._makeReadParts(args) 

367 _LOG.debug("Preparing butler to read from %s.", inputs) 

368 return Butler.from_config(butler=butler, collections=inputs) 

369 

370 @classmethod 

371 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

372 """Return a read-only registry, a collection search path, and the name 

373 of the run to be used for future writes. 

374 

375 Parameters 

376 ---------- 

377 args : `types.SimpleNamespace` 

378 Parsed command-line arguments. See class documentation for the 

379 construction parameter of the same name. 

380 

381 Returns 

382 ------- 

383 butler : `lsst.daf.butler.Butler` 

384 A read-only butler that collections will be added to and/or queried 

385 from. 

386 inputs : `Sequence` [ `str` ] 

387 Collections to search for datasets. 

388 run : `str` or `None` 

389 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

390 if it already exists, or `None` if it does not. 

391 """ 

392 butler, inputs, self = cls._makeReadParts(args) 

393 run: str | None = None 

394 if args.extend_run: 

395 assert self.outputRun is not None, "Output collection has to be specified." 

396 if self.outputRun is not None: 

397 run = self.outputRun.name 

398 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

399 return butler, inputs, run 

400 

401 @staticmethod 

402 def defineDatastoreCache() -> None: 

403 """Define where datastore cache directories should be found. 

404 

405 Notes 

406 ----- 

407 All the jobs should share a datastore cache if applicable. This 

408 method asks for a shared fallback cache to be defined and then 

409 configures an exit handler to clean it up. 

410 """ 

411 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

412 if defined: 

413 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

414 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

415 

416 @classmethod 

417 def makeWriteButler(cls, args: SimpleNamespace, pipeline_graph: PipelineGraph | None = None) -> Butler: 

418 """Return a read-write butler initialized to write to and read from 

419 the collections specified by the given command-line arguments. 

420 

421 Parameters 

422 ---------- 

423 args : `types.SimpleNamespace` 

424 Parsed command-line arguments. See class documentation for the 

425 construction parameter of the same name. 

426 pipeline_graph : `lsst.pipe.base.PipelineGraph`, optional 

427 Definitions for tasks in a pipeline. This argument is only needed 

428 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

429 "unstore". 

430 

431 Returns 

432 ------- 

433 butler : `lsst.daf.butler.Butler` 

434 A read-write butler initialized according to the given arguments. 

435 """ 

436 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

437 butler = Butler.from_config(args.butler_config, writeable=True) 

438 self = cls(butler.registry, args, writeable=True) 

439 self.check(args) 

440 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

441 if self.output is not None: 

442 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

443 if args.replace_run: 

444 replaced = chainDefinition.pop(0) 

445 if args.prune_replaced == "unstore": 

446 # Remove datasets from datastore 

447 with butler.transaction(): 

448 # we want to remove regular outputs from this pipeline, 

449 # but keep initOutputs, configs, and versions. 

450 if pipeline_graph is not None: 

451 refs = [ 

452 ref 

453 for ref in butler.registry.queryDatasets(..., collections=replaced) 

454 if ( 

455 (producer := pipeline_graph.producer_of(ref.datasetType.name)) is not None 

456 and producer.key.node_type is NodeType.TASK # i.e. not TASK_INIT 

457 ) 

458 ] 

459 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

460 elif args.prune_replaced == "purge": 

461 # Erase entire collection and all datasets, need to remove 

462 # collection from its chain collection first. 

463 with butler.transaction(): 

464 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

465 butler.removeRuns([replaced], unstore=True) 

466 elif args.prune_replaced is not None: 

467 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

468 if not self.output.exists: 

469 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

470 if not args.extend_run: 

471 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

472 chainDefinition.insert(0, self.outputRun.name) 

473 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

474 _LOG.debug( 

475 "Preparing butler to write to '%s' and read from '%s'=%s", 

476 self.outputRun.name, 

477 self.output.name, 

478 chainDefinition, 

479 ) 

480 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

481 else: 

482 inputs = (self.outputRun.name,) + self.inputs 

483 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

484 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

485 return butler 

486 

487 output: _OutputChainedCollectionInfo | None 

488 """Information about the output chained collection, if there is or will be 

489 one (`_OutputChainedCollectionInfo` or `None`). 

490 """ 

491 

492 outputRun: _OutputRunCollectionInfo | None 

493 """Information about the output run collection, if there is or will be 

494 one (`_OutputRunCollectionInfo` or `None`). 

495 """ 

496 

497 inputs: tuple[str, ...] 

498 """Input collections provided directly by the user (`tuple` [ `str` ]). 

499 """ 

500 

501 

502class _QBBFactory: 

503 """Class which is a callable for making QBB instances.""" 

504 

505 def __init__( 

506 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

507 ): 

508 self.butler_config = butler_config 

509 self.dimensions = dimensions 

510 self.dataset_types = dataset_types 

511 

512 def __call__(self, quantum: Quantum) -> LimitedButler: 

513 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

514 

515 Factory method to create QuantumBackedButler instances. 

516 """ 

517 return QuantumBackedButler.initialize( 

518 config=self.butler_config, 

519 quantum=quantum, 

520 dimensions=self.dimensions, 

521 dataset_types=self.dataset_types, 

522 ) 

523 

524 

525# ------------------------ 

526# Exported definitions -- 

527# ------------------------ 

528 

529 

530class CmdLineFwk: 

531 """PipelineTask framework which executes tasks from command line. 

532 

533 In addition to executing tasks this activator provides additional methods 

534 for task management like dumping configuration or execution chain. 

535 """ 

536 

537 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

538 

539 def __init__(self) -> None: 

540 pass 

541 

542 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

543 """Build a pipeline from command line arguments. 

544 

545 Parameters 

546 ---------- 

547 args : `types.SimpleNamespace` 

548 Parsed command line. 

549 

550 Returns 

551 ------- 

552 pipeline : `~lsst.pipe.base.Pipeline` 

553 Newly-constructed pipeline. 

554 """ 

555 if args.pipeline: 

556 pipeline = Pipeline.from_uri(args.pipeline) 

557 else: 

558 pipeline = Pipeline("anonymous") 

559 

560 # loop over all pipeline actions and apply them in order 

561 for action in args.pipeline_actions: 

562 if action.action == "add_instrument": 

563 pipeline.addInstrument(action.value) 

564 

565 elif action.action == "new_task": 

566 pipeline.addTask(action.value, action.label) 

567 

568 elif action.action == "delete_task": 

569 pipeline.removeTask(action.label) 

570 

571 elif action.action == "config": 

572 # action value string is "field=value", split it at '=' 

573 field, _, value = action.value.partition("=") 

574 pipeline.addConfigOverride(action.label, field, value) 

575 

576 elif action.action == "configfile": 

577 pipeline.addConfigFile(action.label, action.value) 

578 

579 else: 

580 raise ValueError(f"Unexpected pipeline action: {action.action}") 

581 

582 if args.save_pipeline: 

583 pipeline.write_to_uri(args.save_pipeline) 

584 

585 if args.pipeline_dot: 

586 pipeline2dot(pipeline, args.pipeline_dot) 

587 

588 return pipeline 

589 

590 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

591 """Build a graph from command line arguments. 

592 

593 Parameters 

594 ---------- 

595 pipeline : `~lsst.pipe.base.Pipeline` 

596 Pipeline, can be empty or ``None`` if graph is read from a file. 

597 args : `types.SimpleNamespace` 

598 Parsed command line. 

599 

600 Returns 

601 ------- 

602 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

603 If resulting graph is empty then `None` is returned. 

604 """ 

605 # make sure that --extend-run always enables --skip-existing 

606 if args.extend_run: 

607 args.skip_existing = True 

608 

609 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

610 

611 if args.skip_existing and run: 

612 args.skip_existing_in += (run,) 

613 

614 if args.qgraph: 

615 # click passes empty tuple as default value for qgraph_node_id 

616 nodes = args.qgraph_node_id or None 

617 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

618 

619 # pipeline can not be provided in this case 

620 if pipeline: 

621 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

622 if args.show_qgraph_header: 

623 print(QuantumGraph.readHeader(args.qgraph)) 

624 else: 

625 pipeline_graph = pipeline.to_graph() 

626 if args.mock: 

627 from lsst.pipe.base.tests.mocks import mock_pipeline_graph 

628 

629 pipeline_graph = mock_pipeline_graph( 

630 pipeline_graph, 

631 unmocked_dataset_types=args.unmocked_dataset_types, 

632 force_failures=args.mock_failure, 

633 ) 

634 # make execution plan (a.k.a. DAG) for pipeline 

635 graph_builder = AllDimensionsQuantumGraphBuilder( 

636 pipeline_graph, 

637 butler, 

638 where=args.data_query, 

639 skip_existing_in=args.skip_existing_in if args.skip_existing_in is not None else (), 

640 clobber=args.clobber_outputs, 

641 dataset_query_constraint=args.dataset_query_constraint, 

642 input_collections=collections, 

643 output_run=run, 

644 ) 

645 # accumulate metadata 

646 metadata = { 

647 "input": args.input, 

648 "output": args.output, 

649 "butler_argument": args.butler_config, 

650 "output_run": run, 

651 "extend_run": args.extend_run, 

652 "skip_existing_in": args.skip_existing_in, 

653 "skip_existing": args.skip_existing, 

654 "data_query": args.data_query, 

655 } 

656 assert run is not None, "Butler output run collection must be defined" 

657 qgraph = graph_builder.build(metadata, attach_datastore_records=args.qgraph_datastore_records) 

658 if args.show_qgraph_header: 

659 qgraph.buildAndPrintHeader() 

660 

661 if len(qgraph) == 0: 

662 # Nothing to do. 

663 return None 

664 self._summarize_qgraph(qgraph) 

665 

666 if args.save_qgraph: 

667 _LOG.verbose("Writing QuantumGraph to %r.", args.save_qgraph) 

668 qgraph.saveUri(args.save_qgraph) 

669 

670 if args.save_single_quanta: 

671 for quantumNode in qgraph: 

672 sqgraph = qgraph.subset(quantumNode) 

673 uri = args.save_single_quanta.format(quantumNode) 

674 sqgraph.saveUri(uri) 

675 

676 if args.qgraph_dot: 

677 _LOG.verbose("Writing quantum graph DOT visualization to %r.", args.qgraph_dot) 

678 graph2dot(qgraph, args.qgraph_dot) 

679 

680 if args.execution_butler_location: 

681 _LOG.verbose("Writing execution butler to %r.", args.execution_butler_location) 

682 butler = Butler.from_config(args.butler_config) 

683 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

684 newArgs = copy.deepcopy(args) 

685 

686 def builderShim(butler: Butler) -> Butler: 

687 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

688 newArgs.butler_config = butler._config 

689 # Calling makeWriteButler is done for the side effects of 

690 # calling that method, maining parsing all the args into 

691 # collection names, creating collections, etc. 

692 newButler = _ButlerFactory.makeWriteButler(newArgs) 

693 return newButler 

694 

695 # Include output collection in collections for input 

696 # files if it exists in the repo. 

697 all_inputs = args.input 

698 if args.output is not None: 

699 with contextlib.suppress(MissingCollectionError): 

700 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

701 

702 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

703 buildExecutionButler( 

704 butler, 

705 qgraph, 

706 args.execution_butler_location, 

707 run, 

708 butlerModifier=builderShim, 

709 collections=all_inputs, 

710 clobber=args.clobber_execution_butler, 

711 datastoreRoot=args.target_datastore_root, 

712 transfer=args.transfer, 

713 ) 

714 

715 return qgraph 

716 

717 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

718 """Construct the execution resource class from arguments. 

719 

720 Parameters 

721 ---------- 

722 args : `types.SimpleNamespace` 

723 Parsed command line. 

724 

725 Returns 

726 ------- 

727 resources : `~lsst.pipe.base.ExecutionResources` 

728 The resources available to each quantum. 

729 """ 

730 return ExecutionResources( 

731 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

732 ) 

733 

734 def runPipeline( 

735 self, 

736 graph: QuantumGraph, 

737 taskFactory: TaskFactory, 

738 args: SimpleNamespace, 

739 butler: Butler | None = None, 

740 ) -> None: 

741 """Execute complete QuantumGraph. 

742 

743 Parameters 

744 ---------- 

745 graph : `~lsst.pipe.base.QuantumGraph` 

746 Execution graph. 

747 taskFactory : `~lsst.pipe.base.TaskFactory` 

748 Task factory. 

749 args : `types.SimpleNamespace` 

750 Parsed command line. 

751 butler : `~lsst.daf.butler.Butler`, optional 

752 Data Butler instance, if not defined then new instance is made 

753 using command line options. 

754 """ 

755 if not args.enable_implicit_threading: 

756 disable_implicit_threading() 

757 

758 # Check that output run defined on command line is consistent with 

759 # quantum graph. 

760 if args.output_run and graph.metadata: 

761 graph_output_run = graph.metadata.get("output_run", args.output_run) 

762 if graph_output_run != args.output_run: 

763 raise ValueError( 

764 f"Output run defined on command line ({args.output_run}) has to be " 

765 f"identical to graph metadata ({graph_output_run}). " 

766 "To update graph metadata run `pipetask update-graph-run` command." 

767 ) 

768 

769 # Make sure that --extend-run always enables --skip-existing, 

770 # clobbering should be disabled if --extend-run is not specified. 

771 if args.extend_run: 

772 args.skip_existing = True 

773 else: 

774 args.clobber_outputs = False 

775 

776 # Make butler instance. QuantumGraph should have an output run defined, 

777 # but we ignore it here and let command line decide actual output run. 

778 if butler is None: 

779 butler = _ButlerFactory.makeWriteButler(args, graph.pipeline_graph) 

780 

781 if args.skip_existing: 

782 args.skip_existing_in += (butler.run,) 

783 

784 # Enable lsstDebug debugging. Note that this is done once in the 

785 # main process before PreExecInit and it is also repeated before 

786 # running each task in SingleQuantumExecutor (which may not be 

787 # needed if `multiprocessing` always uses fork start method). 

788 if args.enableLsstDebug: 

789 try: 

790 _LOG.debug("Will try to import debug.py") 

791 import debug # type: ignore # noqa:F401 

792 except ImportError: 

793 _LOG.warn("No 'debug' module found.") 

794 

795 # Save all InitOutputs, configs, etc. 

796 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

797 preExecInit.initialize( 

798 graph, 

799 saveInitOutputs=not args.skip_init_writes, 

800 registerDatasetTypes=args.register_dataset_types, 

801 saveVersions=not args.no_versions, 

802 ) 

803 

804 if not args.init_only: 

805 graphFixup = self._importGraphFixup(args) 

806 resources = self._make_execution_resources(args) 

807 quantumExecutor = SingleQuantumExecutor( 

808 butler, 

809 taskFactory, 

810 skipExistingIn=args.skip_existing_in, 

811 clobberOutputs=args.clobber_outputs, 

812 enableLsstDebug=args.enableLsstDebug, 

813 exitOnKnownError=args.fail_fast, 

814 resources=resources, 

815 ) 

816 

817 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

818 executor = MPGraphExecutor( 

819 numProc=args.processes, 

820 timeout=timeout, 

821 startMethod=args.start_method, 

822 quantumExecutor=quantumExecutor, 

823 failFast=args.fail_fast, 

824 pdb=args.pdb, 

825 executionGraphFixup=graphFixup, 

826 ) 

827 # Have to reset connection pool to avoid sharing connections with 

828 # forked processes. 

829 butler.registry.resetConnectionPool() 

830 try: 

831 with lsst.utils.timer.profile(args.profile, _LOG): 

832 executor.execute(graph) 

833 finally: 

834 if args.summary: 

835 report = executor.getReport() 

836 if report: 

837 with open(args.summary, "w") as out: 

838 # Do not save fields that are not set. 

839 out.write(report.model_dump_json(exclude_none=True, indent=2)) 

840 

841 def _generateTaskTable(self) -> Table: 

842 """Generate astropy table listing the number of quanta per task for a 

843 given quantum graph. 

844 

845 Returns 

846 ------- 

847 qg_task_table : `astropy.table.table.Table` 

848 An astropy table containing columns: Quanta and Tasks. 

849 """ 

850 qg_quanta, qg_tasks = [], [] 

851 for task_label, task_info in self.report.qgraphSummary.qgraphTaskSummaries.items(): 

852 qg_tasks.append(task_label) 

853 qg_quanta.append(task_info.numQuanta) 

854 

855 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

856 return qg_task_table 

857 

858 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int: 

859 """Report a summary of the quanta in the graph. 

860 

861 Parameters 

862 ---------- 

863 qgraph : `lsst.pipe.base.QuantumGraph` 

864 The graph to be summarized. 

865 

866 Returns 

867 ------- 

868 n_quanta : `int` 

869 The number of quanta in the graph. 

870 """ 

871 n_quanta = len(qgraph) 

872 if n_quanta == 0: 

873 _LOG.info("QuantumGraph contains no quanta.") 

874 else: 

875 self.report = Report(qgraphSummary=qgraph.getSummary()) 

876 if _LOG.isEnabledFor(logging.INFO): 

877 qg_task_table = self._generateTaskTable() 

878 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

879 quanta_str = "quantum" if n_quanta == 1 else "quanta" 

880 n_tasks = len(qgraph.taskGraph) 

881 n_tasks_plural = "" if n_tasks == 1 else "s" 

882 _LOG.info( 

883 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s", 

884 n_quanta, 

885 quanta_str, 

886 n_tasks, 

887 n_tasks_plural, 

888 qgraph.graphID, 

889 qg_task_table_formatted, 

890 ) 

891 return n_quanta 

892 

893 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

894 """Import/instantiate graph fixup object. 

895 

896 Parameters 

897 ---------- 

898 args : `types.SimpleNamespace` 

899 Parsed command line. 

900 

901 Returns 

902 ------- 

903 fixup : `ExecutionGraphFixup` or `None` 

904 

905 Raises 

906 ------ 

907 ValueError 

908 Raised if import fails, method call raises exception, or returned 

909 instance has unexpected type. 

910 """ 

911 if args.graph_fixup: 

912 try: 

913 factory = doImportType(args.graph_fixup) 

914 except Exception as exc: 

915 raise ValueError("Failed to import graph fixup class/method") from exc 

916 try: 

917 fixup = factory() 

918 except Exception as exc: 

919 raise ValueError("Failed to make instance of graph fixup") from exc 

920 if not isinstance(fixup, ExecutionGraphFixup): 

921 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

922 return fixup 

923 return None 

924 

925 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

926 # Load quantum graph. We do not really need individual Quanta here, 

927 # but we need datastore records for initInputs, and those are only 

928 # available from Quanta, so load the whole thing. 

929 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

930 universe = qgraph.universe 

931 

932 # Collect all init input/output dataset IDs. 

933 predicted_inputs: set[DatasetId] = set() 

934 predicted_outputs: set[DatasetId] = set() 

935 for taskDef in qgraph.iterTaskGraph(): 

936 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

937 predicted_inputs.update(ref.id for ref in refs) 

938 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

939 predicted_outputs.update(ref.id for ref in refs) 

940 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

941 # remove intermediates from inputs 

942 predicted_inputs -= predicted_outputs 

943 

944 # Very inefficient way to extract datastore records from quantum graph, 

945 # we have to scan all quanta and look at their datastore records. 

946 datastore_records: dict[str, DatastoreRecordData] = {} 

947 for quantum_node in qgraph: 

948 for store_name, records in quantum_node.quantum.datastore_records.items(): 

949 subset = records.subset(predicted_inputs) 

950 if subset is not None: 

951 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

952 

953 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

954 

955 # Make butler from everything. 

956 butler = QuantumBackedButler.from_predicted( 

957 config=args.butler_config, 

958 predicted_inputs=predicted_inputs, 

959 predicted_outputs=predicted_outputs, 

960 dimensions=universe, 

961 datastore_records=datastore_records, 

962 search_paths=args.config_search_path, 

963 dataset_types=dataset_types, 

964 ) 

965 

966 # Save all InitOutputs, configs, etc. 

967 preExecInit = PreExecInitLimited(butler, task_factory) 

968 preExecInit.initialize(qgraph) 

969 

970 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

971 if not args.enable_implicit_threading: 

972 disable_implicit_threading() 

973 

974 # Load quantum graph. 

975 nodes = args.qgraph_node_id or None 

976 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

977 

978 if qgraph.metadata is None: 

979 raise ValueError("QuantumGraph is missing metadata, cannot continue.") 

980 

981 self._summarize_qgraph(qgraph) 

982 

983 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

984 

985 _butler_factory = _QBBFactory( 

986 butler_config=args.butler_config, 

987 dimensions=qgraph.universe, 

988 dataset_types=dataset_types, 

989 ) 

990 

991 # make special quantum executor 

992 resources = self._make_execution_resources(args) 

993 quantumExecutor = SingleQuantumExecutor( 

994 butler=None, 

995 taskFactory=task_factory, 

996 enableLsstDebug=args.enableLsstDebug, 

997 exitOnKnownError=args.fail_fast, 

998 limited_butler_factory=_butler_factory, 

999 resources=resources, 

1000 clobberOutputs=True, 

1001 skipExisting=True, 

1002 ) 

1003 

1004 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

1005 executor = MPGraphExecutor( 

1006 numProc=args.processes, 

1007 timeout=timeout, 

1008 startMethod=args.start_method, 

1009 quantumExecutor=quantumExecutor, 

1010 failFast=args.fail_fast, 

1011 pdb=args.pdb, 

1012 ) 

1013 try: 

1014 with lsst.utils.timer.profile(args.profile, _LOG): 

1015 executor.execute(qgraph) 

1016 finally: 

1017 if args.summary: 

1018 report = executor.getReport() 

1019 if report: 

1020 with open(args.summary, "w") as out: 

1021 # Do not save fields that are not set. 

1022 out.write(report.model_dump_json(exclude_none=True, indent=2))