Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

388 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-09 12:06 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining CmdLineFwk class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["CmdLineFwk"] 

34 

35import atexit 

36import contextlib 

37import copy 

38import datetime 

39import getpass 

40import logging 

41import shutil 

42from collections.abc import Iterable, Mapping, Sequence 

43from types import SimpleNamespace 

44 

45import astropy.units as u 

46from astropy.table import Table 

47from lsst.daf.butler import ( 

48 Butler, 

49 CollectionType, 

50 Config, 

51 DatasetId, 

52 DatasetRef, 

53 DatasetType, 

54 DimensionUniverse, 

55 LimitedButler, 

56 Quantum, 

57 QuantumBackedButler, 

58 Registry, 

59) 

60from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager 

61from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

62from lsst.daf.butler.direct_butler import DirectButler 

63from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

64from lsst.daf.butler.registry.wildcards import CollectionWildcard 

65from lsst.pipe.base import ( 

66 ExecutionResources, 

67 GraphBuilder, 

68 Instrument, 

69 Pipeline, 

70 PipelineDatasetTypes, 

71 QuantumGraph, 

72 TaskDef, 

73 TaskFactory, 

74 buildExecutionButler, 

75) 

76from lsst.utils import doImportType 

77from lsst.utils.threads import disable_implicit_threading 

78 

79from . import util 

80from .dotTools import graph2dot, pipeline2dot 

81from .executionGraphFixup import ExecutionGraphFixup 

82from .mpGraphExecutor import MPGraphExecutor 

83from .preExecInit import PreExecInit, PreExecInitLimited 

84from .singleQuantumExecutor import SingleQuantumExecutor 

85 

86# ---------------------------------- 

87# Local non-exported definitions -- 

88# ---------------------------------- 

89 

90_LOG = logging.getLogger(__name__) 

91 

92 

93class _OutputChainedCollectionInfo: 

94 """A helper class for handling command-line arguments related to an output 

95 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

96 

97 Parameters 

98 ---------- 

99 registry : `lsst.daf.butler.Registry` 

100 Butler registry that collections will be added to and/or queried from. 

101 name : `str` 

102 Name of the collection given on the command line. 

103 """ 

104 

105 def __init__(self, registry: Registry, name: str): 

106 self.name = name 

107 try: 

108 self.chain = tuple(registry.getCollectionChain(name)) 

109 self.exists = True 

110 except MissingCollectionError: 

111 self.chain = () 

112 self.exists = False 

113 

114 def __str__(self) -> str: 

115 return self.name 

116 

117 name: str 

118 """Name of the collection provided on the command line (`str`). 

119 """ 

120 

121 exists: bool 

122 """Whether this collection already exists in the registry (`bool`). 

123 """ 

124 

125 chain: tuple[str, ...] 

126 """The definition of the collection, if it already exists (`tuple`[`str`]). 

127 

128 Empty if the collection does not already exist. 

129 """ 

130 

131 

132class _OutputRunCollectionInfo: 

133 """A helper class for handling command-line arguments related to an output 

134 `~lsst.daf.butler.CollectionType.RUN` collection. 

135 

136 Parameters 

137 ---------- 

138 registry : `lsst.daf.butler.Registry` 

139 Butler registry that collections will be added to and/or queried from. 

140 name : `str` 

141 Name of the collection given on the command line. 

142 """ 

143 

144 def __init__(self, registry: Registry, name: str): 

145 self.name = name 

146 try: 

147 actualType = registry.getCollectionType(name) 

148 if actualType is not CollectionType.RUN: 

149 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

150 self.exists = True 

151 except MissingCollectionError: 

152 self.exists = False 

153 

154 name: str 

155 """Name of the collection provided on the command line (`str`). 

156 """ 

157 

158 exists: bool 

159 """Whether this collection already exists in the registry (`bool`). 

160 """ 

161 

162 

163class _ButlerFactory: 

164 """A helper class for processing command-line arguments related to input 

165 and output collections. 

166 

167 Parameters 

168 ---------- 

169 registry : `lsst.daf.butler.Registry` 

170 Butler registry that collections will be added to and/or queried from. 

171 

172 args : `types.SimpleNamespace` 

173 Parsed command-line arguments. The following attributes are used, 

174 either at construction or in later methods. 

175 

176 ``output`` 

177 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

178 input/output collection. 

179 

180 ``output_run`` 

181 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

182 collection. 

183 

184 ``extend_run`` 

185 A boolean indicating whether ``output_run`` should already exist 

186 and be extended. 

187 

188 ``replace_run`` 

189 A boolean indicating that (if `True`) ``output_run`` should already 

190 exist but will be removed from the output chained collection and 

191 replaced with a new one. 

192 

193 ``prune_replaced`` 

194 A boolean indicating whether to prune the replaced run (requires 

195 ``replace_run``). 

196 

197 ``rebase`` 

198 A boolean indicating whether to force the ``output`` collection 

199 to be consistent with ``inputs`` and ``output`` run such that the 

200 ``output`` collection has output run collections first (i.e. those 

201 that start with the same prefix), then the new inputs, then any 

202 original inputs not included in the new inputs. 

203 

204 ``inputs`` 

205 Input collections of any type; see 

206 :ref:`daf_butler_ordered_collection_searches` for details. 

207 

208 ``butler_config`` 

209 Path to a data repository root or configuration file. 

210 

211 writeable : `bool` 

212 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

213 context where actual writes should happens, and hence no output run 

214 is necessary. 

215 

216 Raises 

217 ------ 

218 ValueError 

219 Raised if ``writeable is True`` but there are no output collections. 

220 """ 

221 

222 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

223 if args.output is not None: 

224 self.output = _OutputChainedCollectionInfo(registry, args.output) 

225 else: 

226 self.output = None 

227 if args.output_run is not None: 

228 if args.rebase and self.output and not args.output_run.startswith(self.output.name): 

229 raise ValueError("Cannot rebase if output run does not start with output collection name.") 

230 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

231 elif self.output is not None: 

232 if args.extend_run: 

233 if not self.output.chain: 

234 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

235 runName = self.output.chain[0] 

236 else: 

237 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

238 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

239 elif not writeable: 

240 # If we're not writing yet, ok to have no output run. 

241 self.outputRun = None 

242 else: 

243 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

244 # Recursively flatten any input CHAINED collections. We do this up 

245 # front so we can tell if the user passes the same inputs on subsequent 

246 # calls, even though we also flatten when we define the output CHAINED 

247 # collection. 

248 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

249 

250 # If things are inconsistent and user has asked for a rebase then 

251 # construct the new output chain. 

252 if args.rebase and self._checkOutputInputConsistency(): 

253 assert self.output is not None 

254 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)] 

255 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain]) 

256 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain]) 

257 self.output.chain = tuple(newOutputChain) 

258 

259 def check(self, args: SimpleNamespace) -> None: 

260 """Check command-line options for consistency with each other and the 

261 data repository. 

262 

263 Parameters 

264 ---------- 

265 args : `types.SimpleNamespace` 

266 Parsed command-line arguments. See class documentation for the 

267 construction parameter of the same name. 

268 """ 

269 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

270 if consistencyError := self._checkOutputInputConsistency(): 

271 raise ValueError(consistencyError) 

272 

273 if args.extend_run: 

274 if self.outputRun is None: 

275 raise ValueError("Cannot --extend-run when no output collection is given.") 

276 elif not self.outputRun.exists: 

277 raise ValueError( 

278 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

279 ) 

280 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

281 raise ValueError( 

282 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

283 ) 

284 if args.prune_replaced and not args.replace_run: 

285 raise ValueError("--prune-replaced requires --replace-run.") 

286 if args.replace_run and (self.output is None or not self.output.exists): 

287 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

288 

289 def _checkOutputInputConsistency(self) -> str | None: 

290 if self.inputs and self.output is not None and self.output.exists: 

291 # Passing the same inputs that were used to initialize the output 

292 # collection is allowed; this means the inputs must appear as a 

293 # contiguous subsequence of outputs (normally they're also at the 

294 # end, but --rebase will in general put them in the middle). 

295 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))): 

296 if self.inputs == self.output.chain[n : n + len(self.inputs)]: 

297 return None 

298 return ( 

299 f"Output CHAINED collection {self.output.name!r} exists and does not include the " 

300 f"same sequence of (flattened) input collections {self.inputs} as a contiguous " 

301 "subsequence. " 

302 "Use --rebase to ignore this problem and reset the output collection, but note that " 

303 "this may obfuscate what inputs were actually used to produce these outputs." 

304 ) 

305 return None 

306 

307 @classmethod 

308 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

309 """Parse arguments to support implementations of `makeReadButler` and 

310 `makeButlerAndCollections`. 

311 

312 Parameters 

313 ---------- 

314 args : `types.SimpleNamespace` 

315 Parsed command-line arguments. See class documentation for the 

316 construction parameter of the same name. 

317 

318 Returns 

319 ------- 

320 butler : `lsst.daf.butler.Butler` 

321 A read-only butler constructed from the repo at 

322 ``args.butler_config``, but with no default collections. 

323 inputs : `~collections.abc.Sequence` [ `str` ] 

324 A collection search path constructed according to ``args``. 

325 self : `_ButlerFactory` 

326 A new `_ButlerFactory` instance representing the processed version 

327 of ``args``. 

328 """ 

329 butler = Butler.from_config(args.butler_config, writeable=False) 

330 self = cls(butler.registry, args, writeable=False) 

331 self.check(args) 

332 if self.output and self.output.exists: 

333 if args.replace_run: 

334 replaced = self.output.chain[0] 

335 inputs = list(self.output.chain[1:]) 

336 _LOG.debug( 

337 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

338 ) 

339 else: 

340 inputs = [self.output.name] 

341 else: 

342 inputs = list(self.inputs) 

343 if args.extend_run: 

344 assert self.outputRun is not None, "Output collection has to be specified." 

345 inputs.insert(0, self.outputRun.name) 

346 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

347 return butler, collSearch, self 

348 

349 @classmethod 

350 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

351 """Construct a read-only butler according to the given command-line 

352 arguments. 

353 

354 Parameters 

355 ---------- 

356 args : `types.SimpleNamespace` 

357 Parsed command-line arguments. See class documentation for the 

358 construction parameter of the same name. 

359 

360 Returns 

361 ------- 

362 butler : `lsst.daf.butler.Butler` 

363 A read-only butler initialized with the collections specified by 

364 ``args``. 

365 """ 

366 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

367 butler, inputs, _ = cls._makeReadParts(args) 

368 _LOG.debug("Preparing butler to read from %s.", inputs) 

369 return Butler.from_config(butler=butler, collections=inputs) 

370 

371 @classmethod 

372 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

373 """Return a read-only registry, a collection search path, and the name 

374 of the run to be used for future writes. 

375 

376 Parameters 

377 ---------- 

378 args : `types.SimpleNamespace` 

379 Parsed command-line arguments. See class documentation for the 

380 construction parameter of the same name. 

381 

382 Returns 

383 ------- 

384 butler : `lsst.daf.butler.Butler` 

385 A read-only butler that collections will be added to and/or queried 

386 from. 

387 inputs : `Sequence` [ `str` ] 

388 Collections to search for datasets. 

389 run : `str` or `None` 

390 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

391 if it already exists, or `None` if it does not. 

392 """ 

393 butler, inputs, self = cls._makeReadParts(args) 

394 run: str | None = None 

395 if args.extend_run: 

396 assert self.outputRun is not None, "Output collection has to be specified." 

397 if self.outputRun is not None: 

398 run = self.outputRun.name 

399 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

400 return butler, inputs, run 

401 

402 @staticmethod 

403 def defineDatastoreCache() -> None: 

404 """Define where datastore cache directories should be found. 

405 

406 Notes 

407 ----- 

408 All the jobs should share a datastore cache if applicable. This 

409 method asks for a shared fallback cache to be defined and then 

410 configures an exit handler to clean it up. 

411 """ 

412 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

413 if defined: 

414 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

415 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

416 

417 @classmethod 

418 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

419 """Return a read-write butler initialized to write to and read from 

420 the collections specified by the given command-line arguments. 

421 

422 Parameters 

423 ---------- 

424 args : `types.SimpleNamespace` 

425 Parsed command-line arguments. See class documentation for the 

426 construction parameter of the same name. 

427 taskDefs : iterable of `TaskDef`, optional 

428 Definitions for tasks in a pipeline. This argument is only needed 

429 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

430 "unstore". 

431 

432 Returns 

433 ------- 

434 butler : `lsst.daf.butler.Butler` 

435 A read-write butler initialized according to the given arguments. 

436 """ 

437 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

438 butler = Butler.from_config(args.butler_config, writeable=True) 

439 self = cls(butler.registry, args, writeable=True) 

440 self.check(args) 

441 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

442 if self.output is not None: 

443 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

444 if args.replace_run: 

445 replaced = chainDefinition.pop(0) 

446 if args.prune_replaced == "unstore": 

447 # Remove datasets from datastore 

448 with butler.transaction(): 

449 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

450 # we want to remove regular outputs but keep 

451 # initOutputs, configs, and versions. 

452 if taskDefs is not None: 

453 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

454 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

455 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

456 elif args.prune_replaced == "purge": 

457 # Erase entire collection and all datasets, need to remove 

458 # collection from its chain collection first. 

459 with butler.transaction(): 

460 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

461 butler.removeRuns([replaced], unstore=True) 

462 elif args.prune_replaced is not None: 

463 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

464 if not self.output.exists: 

465 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

466 if not args.extend_run: 

467 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

468 chainDefinition.insert(0, self.outputRun.name) 

469 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

470 _LOG.debug( 

471 "Preparing butler to write to '%s' and read from '%s'=%s", 

472 self.outputRun.name, 

473 self.output.name, 

474 chainDefinition, 

475 ) 

476 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

477 else: 

478 inputs = (self.outputRun.name,) + self.inputs 

479 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

480 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

481 return butler 

482 

483 output: _OutputChainedCollectionInfo | None 

484 """Information about the output chained collection, if there is or will be 

485 one (`_OutputChainedCollectionInfo` or `None`). 

486 """ 

487 

488 outputRun: _OutputRunCollectionInfo | None 

489 """Information about the output run collection, if there is or will be 

490 one (`_OutputRunCollectionInfo` or `None`). 

491 """ 

492 

493 inputs: tuple[str, ...] 

494 """Input collections provided directly by the user (`tuple` [ `str` ]). 

495 """ 

496 

497 

498class _QBBFactory: 

499 """Class which is a callable for making QBB instances.""" 

500 

501 def __init__( 

502 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

503 ): 

504 self.butler_config = butler_config 

505 self.dimensions = dimensions 

506 self.dataset_types = dataset_types 

507 

508 def __call__(self, quantum: Quantum) -> LimitedButler: 

509 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

510 

511 Factory method to create QuantumBackedButler instances. 

512 """ 

513 return QuantumBackedButler.initialize( 

514 config=self.butler_config, 

515 quantum=quantum, 

516 dimensions=self.dimensions, 

517 dataset_types=self.dataset_types, 

518 ) 

519 

520 

521# ------------------------ 

522# Exported definitions -- 

523# ------------------------ 

524 

525 

526class CmdLineFwk: 

527 """PipelineTask framework which executes tasks from command line. 

528 

529 In addition to executing tasks this activator provides additional methods 

530 for task management like dumping configuration or execution chain. 

531 """ 

532 

533 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

534 

535 def __init__(self) -> None: 

536 pass 

537 

538 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

539 """Build a pipeline from command line arguments. 

540 

541 Parameters 

542 ---------- 

543 args : `types.SimpleNamespace` 

544 Parsed command line 

545 

546 Returns 

547 ------- 

548 pipeline : `~lsst.pipe.base.Pipeline` 

549 """ 

550 if args.pipeline: 

551 pipeline = Pipeline.from_uri(args.pipeline) 

552 else: 

553 pipeline = Pipeline("anonymous") 

554 

555 # loop over all pipeline actions and apply them in order 

556 for action in args.pipeline_actions: 

557 if action.action == "add_instrument": 

558 pipeline.addInstrument(action.value) 

559 

560 elif action.action == "new_task": 

561 pipeline.addTask(action.value, action.label) 

562 

563 elif action.action == "delete_task": 

564 pipeline.removeTask(action.label) 

565 

566 elif action.action == "config": 

567 # action value string is "field=value", split it at '=' 

568 field, _, value = action.value.partition("=") 

569 pipeline.addConfigOverride(action.label, field, value) 

570 

571 elif action.action == "configfile": 

572 pipeline.addConfigFile(action.label, action.value) 

573 

574 else: 

575 raise ValueError(f"Unexpected pipeline action: {action.action}") 

576 

577 if args.save_pipeline: 

578 pipeline.write_to_uri(args.save_pipeline) 

579 

580 if args.pipeline_dot: 

581 pipeline2dot(pipeline, args.pipeline_dot) 

582 

583 return pipeline 

584 

585 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

586 """Build a graph from command line arguments. 

587 

588 Parameters 

589 ---------- 

590 pipeline : `~lsst.pipe.base.Pipeline` 

591 Pipeline, can be empty or ``None`` if graph is read from a file. 

592 args : `types.SimpleNamespace` 

593 Parsed command line 

594 

595 Returns 

596 ------- 

597 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

598 If resulting graph is empty then `None` is returned. 

599 """ 

600 # make sure that --extend-run always enables --skip-existing 

601 if args.extend_run: 

602 args.skip_existing = True 

603 

604 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

605 

606 if args.skip_existing and run: 

607 args.skip_existing_in += (run,) 

608 

609 if args.qgraph: 

610 # click passes empty tuple as default value for qgraph_node_id 

611 nodes = args.qgraph_node_id or None 

612 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

613 

614 # pipeline can not be provided in this case 

615 if pipeline: 

616 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

617 if args.show_qgraph_header: 

618 print(QuantumGraph.readHeader(args.qgraph)) 

619 else: 

620 task_defs = list(pipeline.toExpandedPipeline()) 

621 if args.mock: 

622 from lsst.pipe.base.tests.mocks import mock_task_defs 

623 

624 task_defs = mock_task_defs( 

625 task_defs, 

626 unmocked_dataset_types=args.unmocked_dataset_types, 

627 force_failures=args.mock_failure, 

628 ) 

629 # make execution plan (a.k.a. DAG) for pipeline 

630 graphBuilder = GraphBuilder( 

631 butler.registry, 

632 skipExistingIn=args.skip_existing_in, 

633 clobberOutputs=args.clobber_outputs, 

634 datastore=butler._datastore if args.qgraph_datastore_records else None, 

635 ) 

636 # accumulate metadata 

637 metadata = { 

638 "input": args.input, 

639 "output": args.output, 

640 "butler_argument": args.butler_config, 

641 "output_run": run, 

642 "extend_run": args.extend_run, 

643 "skip_existing_in": args.skip_existing_in, 

644 "skip_existing": args.skip_existing, 

645 "data_query": args.data_query, 

646 "user": getpass.getuser(), 

647 "time": f"{datetime.datetime.now()}", 

648 } 

649 assert run is not None, "Butler output run collection must be defined" 

650 qgraph = graphBuilder.makeGraph( 

651 task_defs, 

652 collections, 

653 run, 

654 args.data_query, 

655 metadata=metadata, 

656 datasetQueryConstraint=args.dataset_query_constraint, 

657 dataId=pipeline.get_data_id(butler.dimensions), 

658 ) 

659 if args.show_qgraph_header: 

660 qgraph.buildAndPrintHeader() 

661 

662 if len(qgraph) == 0: 

663 # Nothing to do. 

664 return None 

665 self._summarize_qgraph(qgraph) 

666 

667 if args.save_qgraph: 

668 qgraph.saveUri(args.save_qgraph) 

669 

670 if args.save_single_quanta: 

671 for quantumNode in qgraph: 

672 sqgraph = qgraph.subset(quantumNode) 

673 uri = args.save_single_quanta.format(quantumNode) 

674 sqgraph.saveUri(uri) 

675 

676 if args.qgraph_dot: 

677 graph2dot(qgraph, args.qgraph_dot) 

678 

679 if args.execution_butler_location: 

680 butler = Butler.from_config(args.butler_config) 

681 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

682 newArgs = copy.deepcopy(args) 

683 

684 def builderShim(butler: Butler) -> Butler: 

685 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

686 newArgs.butler_config = butler._config 

687 # Calling makeWriteButler is done for the side effects of 

688 # calling that method, maining parsing all the args into 

689 # collection names, creating collections, etc. 

690 newButler = _ButlerFactory.makeWriteButler(newArgs) 

691 return newButler 

692 

693 # Include output collection in collections for input 

694 # files if it exists in the repo. 

695 all_inputs = args.input 

696 if args.output is not None: 

697 with contextlib.suppress(MissingCollectionError): 

698 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

699 

700 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

701 buildExecutionButler( 

702 butler, 

703 qgraph, 

704 args.execution_butler_location, 

705 run, 

706 butlerModifier=builderShim, 

707 collections=all_inputs, 

708 clobber=args.clobber_execution_butler, 

709 datastoreRoot=args.target_datastore_root, 

710 transfer=args.transfer, 

711 ) 

712 

713 return qgraph 

714 

715 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

716 """Construct the execution resource class from arguments. 

717 

718 Parameters 

719 ---------- 

720 args : `types.SimpleNamespace` 

721 Parsed command line. 

722 

723 Returns 

724 ------- 

725 resources : `~lsst.pipe.base.ExecutionResources` 

726 The resources available to each quantum. 

727 """ 

728 return ExecutionResources( 

729 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

730 ) 

731 

732 def runPipeline( 

733 self, 

734 graph: QuantumGraph, 

735 taskFactory: TaskFactory, 

736 args: SimpleNamespace, 

737 butler: Butler | None = None, 

738 ) -> None: 

739 """Execute complete QuantumGraph. 

740 

741 Parameters 

742 ---------- 

743 graph : `~lsst.pipe.base.QuantumGraph` 

744 Execution graph. 

745 taskFactory : `~lsst.pipe.base.TaskFactory` 

746 Task factory 

747 args : `types.SimpleNamespace` 

748 Parsed command line 

749 butler : `~lsst.daf.butler.Butler`, optional 

750 Data Butler instance, if not defined then new instance is made 

751 using command line options. 

752 """ 

753 # Check that output run defined on command line is consistent with 

754 # quantum graph. 

755 if args.output_run and graph.metadata: 

756 graph_output_run = graph.metadata.get("output_run", args.output_run) 

757 if graph_output_run != args.output_run: 

758 raise ValueError( 

759 f"Output run defined on command line ({args.output_run}) has to be " 

760 f"identical to graph metadata ({graph_output_run}). " 

761 "To update graph metadata run `pipetask update-graph-run` command." 

762 ) 

763 

764 # Make sure that --extend-run always enables --skip-existing, 

765 # clobbering should be disabled if --extend-run is not specified. 

766 if args.extend_run: 

767 args.skip_existing = True 

768 else: 

769 args.clobber_outputs = False 

770 

771 if not args.enable_implicit_threading: 

772 disable_implicit_threading() 

773 

774 # Make butler instance. QuantumGraph should have an output run defined, 

775 # but we ignore it here and let command line decide actual output run. 

776 if butler is None: 

777 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

778 

779 if args.skip_existing: 

780 args.skip_existing_in += (butler.run,) 

781 

782 # Enable lsstDebug debugging. Note that this is done once in the 

783 # main process before PreExecInit and it is also repeated before 

784 # running each task in SingleQuantumExecutor (which may not be 

785 # needed if `multiprocessing` always uses fork start method). 

786 if args.enableLsstDebug: 

787 try: 

788 _LOG.debug("Will try to import debug.py") 

789 import debug # type: ignore # noqa:F401 

790 except ImportError: 

791 _LOG.warn("No 'debug' module found.") 

792 

793 # Save all InitOutputs, configs, etc. 

794 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

795 preExecInit.initialize( 

796 graph, 

797 saveInitOutputs=not args.skip_init_writes, 

798 registerDatasetTypes=args.register_dataset_types, 

799 saveVersions=not args.no_versions, 

800 ) 

801 

802 if not args.init_only: 

803 graphFixup = self._importGraphFixup(args) 

804 resources = self._make_execution_resources(args) 

805 quantumExecutor = SingleQuantumExecutor( 

806 butler, 

807 taskFactory, 

808 skipExistingIn=args.skip_existing_in, 

809 clobberOutputs=args.clobber_outputs, 

810 enableLsstDebug=args.enableLsstDebug, 

811 exitOnKnownError=args.fail_fast, 

812 resources=resources, 

813 ) 

814 

815 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

816 executor = MPGraphExecutor( 

817 numProc=args.processes, 

818 timeout=timeout, 

819 startMethod=args.start_method, 

820 quantumExecutor=quantumExecutor, 

821 failFast=args.fail_fast, 

822 pdb=args.pdb, 

823 executionGraphFixup=graphFixup, 

824 ) 

825 # Have to reset connection pool to avoid sharing connections with 

826 # forked processes. 

827 butler.registry.resetConnectionPool() 

828 try: 

829 with util.profile(args.profile, _LOG): 

830 executor.execute(graph) 

831 finally: 

832 if args.summary: 

833 report = executor.getReport() 

834 if report: 

835 with open(args.summary, "w") as out: 

836 # Do not save fields that are not set. 

837 out.write(report.json(exclude_none=True, indent=2)) 

838 

839 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

840 """Generate astropy table listing the number of quanta per task for a 

841 given quantum graph. 

842 

843 Parameters 

844 ---------- 

845 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

846 A QuantumGraph object. 

847 

848 Returns 

849 ------- 

850 qg_task_table : `astropy.table.table.Table` 

851 An astropy table containing columns: Quanta and Tasks. 

852 """ 

853 qg_quanta, qg_tasks = [], [] 

854 for task_def in qgraph.iterTaskGraph(): 

855 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

856 qg_quanta.append(num_qnodes) 

857 qg_tasks.append(task_def.label) 

858 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

859 return qg_task_table 

860 

861 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int: 

862 """Report a summary of the quanta in the graph. 

863 

864 Parameters 

865 ---------- 

866 qgraph : `lsst.pipe.base.QuantumGraph` 

867 The graph to be summarized. 

868 

869 Returns 

870 ------- 

871 n_quanta : `int` 

872 The number of quanta in the graph. 

873 """ 

874 n_quanta = len(qgraph) 

875 if n_quanta == 0: 

876 _LOG.info("QuantumGraph contains no quanta.") 

877 else: 

878 if _LOG.isEnabledFor(logging.INFO): 

879 qg_task_table = self._generateTaskTable(qgraph) 

880 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

881 quanta_str = "quantum" if n_quanta == 1 else "quanta" 

882 n_tasks = len(qgraph.taskGraph) 

883 n_tasks_plural = "" if n_tasks == 1 else "s" 

884 _LOG.info( 

885 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s", 

886 n_quanta, 

887 quanta_str, 

888 n_tasks, 

889 n_tasks_plural, 

890 qgraph.graphID, 

891 qg_task_table_formatted, 

892 ) 

893 return n_quanta 

894 

895 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

896 """Import/instantiate graph fixup object. 

897 

898 Parameters 

899 ---------- 

900 args : `types.SimpleNamespace` 

901 Parsed command line. 

902 

903 Returns 

904 ------- 

905 fixup : `ExecutionGraphFixup` or `None` 

906 

907 Raises 

908 ------ 

909 ValueError 

910 Raised if import fails, method call raises exception, or returned 

911 instance has unexpected type. 

912 """ 

913 if args.graph_fixup: 

914 try: 

915 factory = doImportType(args.graph_fixup) 

916 except Exception as exc: 

917 raise ValueError("Failed to import graph fixup class/method") from exc 

918 try: 

919 fixup = factory() 

920 except Exception as exc: 

921 raise ValueError("Failed to make instance of graph fixup") from exc 

922 if not isinstance(fixup, ExecutionGraphFixup): 

923 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

924 return fixup 

925 return None 

926 

927 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

928 # Load quantum graph. We do not really need individual Quanta here, 

929 # but we need datastore records for initInputs, and those are only 

930 # available from Quanta, so load the whole thing. 

931 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

932 universe = qgraph.universe 

933 

934 # Collect all init input/output dataset IDs. 

935 predicted_inputs: set[DatasetId] = set() 

936 predicted_outputs: set[DatasetId] = set() 

937 for taskDef in qgraph.iterTaskGraph(): 

938 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

939 predicted_inputs.update(ref.id for ref in refs) 

940 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

941 predicted_outputs.update(ref.id for ref in refs) 

942 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

943 # remove intermediates from inputs 

944 predicted_inputs -= predicted_outputs 

945 

946 # Very inefficient way to extract datastore records from quantum graph, 

947 # we have to scan all quanta and look at their datastore records. 

948 datastore_records: dict[str, DatastoreRecordData] = {} 

949 for quantum_node in qgraph: 

950 for store_name, records in quantum_node.quantum.datastore_records.items(): 

951 subset = records.subset(predicted_inputs) 

952 if subset is not None: 

953 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

954 

955 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

956 

957 # Make butler from everything. 

958 butler = QuantumBackedButler.from_predicted( 

959 config=args.butler_config, 

960 predicted_inputs=predicted_inputs, 

961 predicted_outputs=predicted_outputs, 

962 dimensions=universe, 

963 datastore_records=datastore_records, 

964 search_paths=args.config_search_path, 

965 dataset_types=dataset_types, 

966 ) 

967 

968 # Save all InitOutputs, configs, etc. 

969 preExecInit = PreExecInitLimited(butler, task_factory) 

970 preExecInit.initialize(qgraph) 

971 

972 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

973 # Load quantum graph. 

974 nodes = args.qgraph_node_id or None 

975 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

976 

977 if qgraph.metadata is None: 

978 raise ValueError("QuantumGraph is missing metadata, cannot continue.") 

979 

980 self._summarize_qgraph(qgraph) 

981 

982 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

983 

984 _butler_factory = _QBBFactory( 

985 butler_config=args.butler_config, 

986 dimensions=qgraph.universe, 

987 dataset_types=dataset_types, 

988 ) 

989 

990 # make special quantum executor 

991 resources = self._make_execution_resources(args) 

992 quantumExecutor = SingleQuantumExecutor( 

993 butler=None, 

994 taskFactory=task_factory, 

995 enableLsstDebug=args.enableLsstDebug, 

996 exitOnKnownError=args.fail_fast, 

997 limited_butler_factory=_butler_factory, 

998 resources=resources, 

999 ) 

1000 

1001 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

1002 executor = MPGraphExecutor( 

1003 numProc=args.processes, 

1004 timeout=timeout, 

1005 startMethod=args.start_method, 

1006 quantumExecutor=quantumExecutor, 

1007 failFast=args.fail_fast, 

1008 pdb=args.pdb, 

1009 ) 

1010 try: 

1011 with util.profile(args.profile, _LOG): 

1012 executor.execute(qgraph) 

1013 finally: 

1014 if args.summary: 

1015 report = executor.getReport() 

1016 if report: 

1017 with open(args.summary, "w") as out: 

1018 # Do not save fields that are not set. 

1019 out.write(report.json(exclude_none=True, indent=2))