Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

383 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-01 09:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining CmdLineFwk class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["CmdLineFwk"] 

28 

29import atexit 

30import contextlib 

31import copy 

32import datetime 

33import getpass 

34import logging 

35import shutil 

36from collections.abc import Iterable, Mapping, Sequence 

37from types import SimpleNamespace 

38 

39import astropy.units as u 

40from astropy.table import Table 

41from lsst.daf.butler import ( 

42 Butler, 

43 CollectionType, 

44 Config, 

45 DatasetId, 

46 DatasetRef, 

47 DatasetType, 

48 DatastoreCacheManager, 

49 DatastoreRecordData, 

50 DimensionUniverse, 

51 LimitedButler, 

52 Quantum, 

53 QuantumBackedButler, 

54 Registry, 

55) 

56from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

57from lsst.daf.butler.registry.wildcards import CollectionWildcard 

58from lsst.pipe.base import ( 

59 ExecutionResources, 

60 GraphBuilder, 

61 Instrument, 

62 Pipeline, 

63 PipelineDatasetTypes, 

64 QuantumGraph, 

65 TaskDef, 

66 TaskFactory, 

67 buildExecutionButler, 

68) 

69from lsst.utils import doImportType 

70from lsst.utils.threads import disable_implicit_threading 

71 

72from . import util 

73from .dotTools import graph2dot, pipeline2dot 

74from .executionGraphFixup import ExecutionGraphFixup 

75from .mpGraphExecutor import MPGraphExecutor 

76from .preExecInit import PreExecInit, PreExecInitLimited 

77from .singleQuantumExecutor import SingleQuantumExecutor 

78 

79# ---------------------------------- 

80# Local non-exported definitions -- 

81# ---------------------------------- 

82 

83_LOG = logging.getLogger(__name__) 

84 

85 

86class _OutputChainedCollectionInfo: 

87 """A helper class for handling command-line arguments related to an output 

88 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

89 

90 Parameters 

91 ---------- 

92 registry : `lsst.daf.butler.Registry` 

93 Butler registry that collections will be added to and/or queried from. 

94 name : `str` 

95 Name of the collection given on the command line. 

96 """ 

97 

98 def __init__(self, registry: Registry, name: str): 

99 self.name = name 

100 try: 

101 self.chain = tuple(registry.getCollectionChain(name)) 

102 self.exists = True 

103 except MissingCollectionError: 

104 self.chain = () 

105 self.exists = False 

106 

107 def __str__(self) -> str: 

108 return self.name 

109 

110 name: str 

111 """Name of the collection provided on the command line (`str`). 

112 """ 

113 

114 exists: bool 

115 """Whether this collection already exists in the registry (`bool`). 

116 """ 

117 

118 chain: tuple[str, ...] 

119 """The definition of the collection, if it already exists (`tuple`[`str`]). 

120 

121 Empty if the collection does not already exist. 

122 """ 

123 

124 

125class _OutputRunCollectionInfo: 

126 """A helper class for handling command-line arguments related to an output 

127 `~lsst.daf.butler.CollectionType.RUN` collection. 

128 

129 Parameters 

130 ---------- 

131 registry : `lsst.daf.butler.Registry` 

132 Butler registry that collections will be added to and/or queried from. 

133 name : `str` 

134 Name of the collection given on the command line. 

135 """ 

136 

137 def __init__(self, registry: Registry, name: str): 

138 self.name = name 

139 try: 

140 actualType = registry.getCollectionType(name) 

141 if actualType is not CollectionType.RUN: 

142 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

143 self.exists = True 

144 except MissingCollectionError: 

145 self.exists = False 

146 

147 name: str 

148 """Name of the collection provided on the command line (`str`). 

149 """ 

150 

151 exists: bool 

152 """Whether this collection already exists in the registry (`bool`). 

153 """ 

154 

155 

156class _ButlerFactory: 

157 """A helper class for processing command-line arguments related to input 

158 and output collections. 

159 

160 Parameters 

161 ---------- 

162 registry : `lsst.daf.butler.Registry` 

163 Butler registry that collections will be added to and/or queried from. 

164 

165 args : `types.SimpleNamespace` 

166 Parsed command-line arguments. The following attributes are used, 

167 either at construction or in later methods. 

168 

169 ``output`` 

170 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

171 input/output collection. 

172 

173 ``output_run`` 

174 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

175 collection. 

176 

177 ``extend_run`` 

178 A boolean indicating whether ``output_run`` should already exist 

179 and be extended. 

180 

181 ``replace_run`` 

182 A boolean indicating that (if `True`) ``output_run`` should already 

183 exist but will be removed from the output chained collection and 

184 replaced with a new one. 

185 

186 ``prune_replaced`` 

187 A boolean indicating whether to prune the replaced run (requires 

188 ``replace_run``). 

189 

190 ``rebase`` 

191 A boolean indicating whether to force the ``output`` collection 

192 to be consistent with ``inputs`` and ``output`` run such that the 

193 ``output`` collection has output run collections first (i.e. those 

194 that start with the same prefix), then the new inputs, then any 

195 original inputs not included in the new inputs. 

196 

197 ``inputs`` 

198 Input collections of any type; see 

199 :ref:`daf_butler_ordered_collection_searches` for details. 

200 

201 ``butler_config`` 

202 Path to a data repository root or configuration file. 

203 

204 writeable : `bool` 

205 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

206 context where actual writes should happens, and hence no output run 

207 is necessary. 

208 

209 Raises 

210 ------ 

211 ValueError 

212 Raised if ``writeable is True`` but there are no output collections. 

213 """ 

214 

215 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

216 if args.output is not None: 

217 self.output = _OutputChainedCollectionInfo(registry, args.output) 

218 else: 

219 self.output = None 

220 if args.output_run is not None: 

221 if args.rebase and self.output and not args.output_run.startswith(self.output.name): 

222 raise ValueError("Cannot rebase if output run does not start with output collection name.") 

223 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

224 elif self.output is not None: 

225 if args.extend_run: 

226 if not self.output.chain: 

227 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

228 runName = self.output.chain[0] 

229 else: 

230 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

231 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

232 elif not writeable: 

233 # If we're not writing yet, ok to have no output run. 

234 self.outputRun = None 

235 else: 

236 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

237 # Recursively flatten any input CHAINED collections. We do this up 

238 # front so we can tell if the user passes the same inputs on subsequent 

239 # calls, even though we also flatten when we define the output CHAINED 

240 # collection. 

241 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

242 

243 # If things are inconsistent and user has asked for a rebase then 

244 # construct the new output chain. 

245 if args.rebase and self._checkOutputInputConsistency(): 

246 assert self.output is not None 

247 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)] 

248 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain]) 

249 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain]) 

250 self.output.chain = tuple(newOutputChain) 

251 

252 def check(self, args: SimpleNamespace) -> None: 

253 """Check command-line options for consistency with each other and the 

254 data repository. 

255 

256 Parameters 

257 ---------- 

258 args : `types.SimpleNamespace` 

259 Parsed command-line arguments. See class documentation for the 

260 construction parameter of the same name. 

261 """ 

262 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

263 if consistencyError := self._checkOutputInputConsistency(): 

264 raise ValueError(consistencyError) 

265 

266 if args.extend_run: 

267 if self.outputRun is None: 

268 raise ValueError("Cannot --extend-run when no output collection is given.") 

269 elif not self.outputRun.exists: 

270 raise ValueError( 

271 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

272 ) 

273 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

274 raise ValueError( 

275 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

276 ) 

277 if args.prune_replaced and not args.replace_run: 

278 raise ValueError("--prune-replaced requires --replace-run.") 

279 if args.replace_run and (self.output is None or not self.output.exists): 

280 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

281 

282 def _checkOutputInputConsistency(self) -> str | None: 

283 if self.inputs and self.output is not None and self.output.exists: 

284 # Passing the same inputs that were used to initialize the output 

285 # collection is allowed; this means the inputs must appear as a 

286 # contiguous subsequence of outputs (normally they're also at the 

287 # end, but --rebase will in general put them in the middle). 

288 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))): 

289 if self.inputs == self.output.chain[n : n + len(self.inputs)]: 

290 return None 

291 return ( 

292 f"Output CHAINED collection {self.output.name!r} exists and does not include the " 

293 f"same sequence of (flattened) input collections {self.inputs} as a contiguous " 

294 "subsequence. " 

295 "Use --rebase to ignore this problem and reset the output collection, but note that " 

296 "this may obfuscate what inputs were actually used to produce these outputs." 

297 ) 

298 return None 

299 

300 @classmethod 

301 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

302 """Parse arguments to support implementations of `makeReadButler` and 

303 `makeButlerAndCollections`. 

304 

305 Parameters 

306 ---------- 

307 args : `types.SimpleNamespace` 

308 Parsed command-line arguments. See class documentation for the 

309 construction parameter of the same name. 

310 

311 Returns 

312 ------- 

313 butler : `lsst.daf.butler.Butler` 

314 A read-only butler constructed from the repo at 

315 ``args.butler_config``, but with no default collections. 

316 inputs : `~collections.abc.Sequence` [ `str` ] 

317 A collection search path constructed according to ``args``. 

318 self : `_ButlerFactory` 

319 A new `_ButlerFactory` instance representing the processed version 

320 of ``args``. 

321 """ 

322 butler = Butler(args.butler_config, writeable=False) 

323 self = cls(butler.registry, args, writeable=False) 

324 self.check(args) 

325 if self.output and self.output.exists: 

326 if args.replace_run: 

327 replaced = self.output.chain[0] 

328 inputs = list(self.output.chain[1:]) 

329 _LOG.debug( 

330 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

331 ) 

332 else: 

333 inputs = [self.output.name] 

334 else: 

335 inputs = list(self.inputs) 

336 if args.extend_run: 

337 assert self.outputRun is not None, "Output collection has to be specified." 

338 inputs.insert(0, self.outputRun.name) 

339 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

340 return butler, collSearch, self 

341 

342 @classmethod 

343 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

344 """Construct a read-only butler according to the given command-line 

345 arguments. 

346 

347 Parameters 

348 ---------- 

349 args : `types.SimpleNamespace` 

350 Parsed command-line arguments. See class documentation for the 

351 construction parameter of the same name. 

352 

353 Returns 

354 ------- 

355 butler : `lsst.daf.butler.Butler` 

356 A read-only butler initialized with the collections specified by 

357 ``args``. 

358 """ 

359 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

360 butler, inputs, _ = cls._makeReadParts(args) 

361 _LOG.debug("Preparing butler to read from %s.", inputs) 

362 return Butler(butler=butler, collections=inputs) 

363 

364 @classmethod 

365 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

366 """Return a read-only registry, a collection search path, and the name 

367 of the run to be used for future writes. 

368 

369 Parameters 

370 ---------- 

371 args : `types.SimpleNamespace` 

372 Parsed command-line arguments. See class documentation for the 

373 construction parameter of the same name. 

374 

375 Returns 

376 ------- 

377 butler : `lsst.daf.butler.Butler` 

378 A read-only butler that collections will be added to and/or queried 

379 from. 

380 inputs : `Sequence` [ `str` ] 

381 Collections to search for datasets. 

382 run : `str` or `None` 

383 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

384 if it already exists, or `None` if it does not. 

385 """ 

386 butler, inputs, self = cls._makeReadParts(args) 

387 run: str | None = None 

388 if args.extend_run: 

389 assert self.outputRun is not None, "Output collection has to be specified." 

390 if self.outputRun is not None: 

391 run = self.outputRun.name 

392 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

393 return butler, inputs, run 

394 

395 @staticmethod 

396 def defineDatastoreCache() -> None: 

397 """Define where datastore cache directories should be found. 

398 

399 Notes 

400 ----- 

401 All the jobs should share a datastore cache if applicable. This 

402 method asks for a shared fallback cache to be defined and then 

403 configures an exit handler to clean it up. 

404 """ 

405 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

406 if defined: 

407 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

408 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

409 

410 @classmethod 

411 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

412 """Return a read-write butler initialized to write to and read from 

413 the collections specified by the given command-line arguments. 

414 

415 Parameters 

416 ---------- 

417 args : `types.SimpleNamespace` 

418 Parsed command-line arguments. See class documentation for the 

419 construction parameter of the same name. 

420 taskDefs : iterable of `TaskDef`, optional 

421 Definitions for tasks in a pipeline. This argument is only needed 

422 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

423 "unstore". 

424 

425 Returns 

426 ------- 

427 butler : `lsst.daf.butler.Butler` 

428 A read-write butler initialized according to the given arguments. 

429 """ 

430 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

431 butler = Butler(args.butler_config, writeable=True) 

432 self = cls(butler.registry, args, writeable=True) 

433 self.check(args) 

434 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

435 if self.output is not None: 

436 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

437 if args.replace_run: 

438 replaced = chainDefinition.pop(0) 

439 if args.prune_replaced == "unstore": 

440 # Remove datasets from datastore 

441 with butler.transaction(): 

442 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

443 # we want to remove regular outputs but keep 

444 # initOutputs, configs, and versions. 

445 if taskDefs is not None: 

446 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

447 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

448 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

449 elif args.prune_replaced == "purge": 

450 # Erase entire collection and all datasets, need to remove 

451 # collection from its chain collection first. 

452 with butler.transaction(): 

453 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

454 butler.removeRuns([replaced], unstore=True) 

455 elif args.prune_replaced is not None: 

456 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

457 if not self.output.exists: 

458 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

459 if not args.extend_run: 

460 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

461 chainDefinition.insert(0, self.outputRun.name) 

462 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

463 _LOG.debug( 

464 "Preparing butler to write to '%s' and read from '%s'=%s", 

465 self.outputRun.name, 

466 self.output.name, 

467 chainDefinition, 

468 ) 

469 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

470 else: 

471 inputs = (self.outputRun.name,) + self.inputs 

472 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

473 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

474 return butler 

475 

476 output: _OutputChainedCollectionInfo | None 

477 """Information about the output chained collection, if there is or will be 

478 one (`_OutputChainedCollectionInfo` or `None`). 

479 """ 

480 

481 outputRun: _OutputRunCollectionInfo | None 

482 """Information about the output run collection, if there is or will be 

483 one (`_OutputRunCollectionInfo` or `None`). 

484 """ 

485 

486 inputs: tuple[str, ...] 

487 """Input collections provided directly by the user (`tuple` [ `str` ]). 

488 """ 

489 

490 

491class _QBBFactory: 

492 """Class which is a callable for making QBB instances.""" 

493 

494 def __init__( 

495 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

496 ): 

497 self.butler_config = butler_config 

498 self.dimensions = dimensions 

499 self.dataset_types = dataset_types 

500 

501 def __call__(self, quantum: Quantum) -> LimitedButler: 

502 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

503 

504 Factory method to create QuantumBackedButler instances. 

505 """ 

506 return QuantumBackedButler.initialize( 

507 config=self.butler_config, 

508 quantum=quantum, 

509 dimensions=self.dimensions, 

510 dataset_types=self.dataset_types, 

511 ) 

512 

513 

514# ------------------------ 

515# Exported definitions -- 

516# ------------------------ 

517 

518 

519class CmdLineFwk: 

520 """PipelineTask framework which executes tasks from command line. 

521 

522 In addition to executing tasks this activator provides additional methods 

523 for task management like dumping configuration or execution chain. 

524 """ 

525 

526 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

527 

528 def __init__(self) -> None: 

529 pass 

530 

531 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

532 """Build a pipeline from command line arguments. 

533 

534 Parameters 

535 ---------- 

536 args : `types.SimpleNamespace` 

537 Parsed command line 

538 

539 Returns 

540 ------- 

541 pipeline : `~lsst.pipe.base.Pipeline` 

542 """ 

543 if args.pipeline: 

544 pipeline = Pipeline.from_uri(args.pipeline) 

545 else: 

546 pipeline = Pipeline("anonymous") 

547 

548 # loop over all pipeline actions and apply them in order 

549 for action in args.pipeline_actions: 

550 if action.action == "add_instrument": 

551 pipeline.addInstrument(action.value) 

552 

553 elif action.action == "new_task": 

554 pipeline.addTask(action.value, action.label) 

555 

556 elif action.action == "delete_task": 

557 pipeline.removeTask(action.label) 

558 

559 elif action.action == "config": 

560 # action value string is "field=value", split it at '=' 

561 field, _, value = action.value.partition("=") 

562 pipeline.addConfigOverride(action.label, field, value) 

563 

564 elif action.action == "configfile": 

565 pipeline.addConfigFile(action.label, action.value) 

566 

567 else: 

568 raise ValueError(f"Unexpected pipeline action: {action.action}") 

569 

570 if args.save_pipeline: 

571 pipeline.write_to_uri(args.save_pipeline) 

572 

573 if args.pipeline_dot: 

574 pipeline2dot(pipeline, args.pipeline_dot) 

575 

576 return pipeline 

577 

578 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

579 """Build a graph from command line arguments. 

580 

581 Parameters 

582 ---------- 

583 pipeline : `~lsst.pipe.base.Pipeline` 

584 Pipeline, can be empty or ``None`` if graph is read from a file. 

585 args : `types.SimpleNamespace` 

586 Parsed command line 

587 

588 Returns 

589 ------- 

590 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

591 If resulting graph is empty then `None` is returned. 

592 """ 

593 # make sure that --extend-run always enables --skip-existing 

594 if args.extend_run: 

595 args.skip_existing = True 

596 

597 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

598 

599 if args.skip_existing and run: 

600 args.skip_existing_in += (run,) 

601 

602 if args.qgraph: 

603 # click passes empty tuple as default value for qgraph_node_id 

604 nodes = args.qgraph_node_id or None 

605 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

606 

607 # pipeline can not be provided in this case 

608 if pipeline: 

609 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

610 if args.show_qgraph_header: 

611 print(QuantumGraph.readHeader(args.qgraph)) 

612 else: 

613 task_defs = list(pipeline.toExpandedPipeline()) 

614 if args.mock: 

615 from lsst.pipe.base.tests.mocks import mock_task_defs 

616 

617 task_defs = mock_task_defs( 

618 task_defs, 

619 unmocked_dataset_types=args.unmocked_dataset_types, 

620 force_failures=args.mock_failure, 

621 ) 

622 # make execution plan (a.k.a. DAG) for pipeline 

623 graphBuilder = GraphBuilder( 

624 butler.registry, 

625 skipExistingIn=args.skip_existing_in, 

626 clobberOutputs=args.clobber_outputs, 

627 datastore=butler._datastore if args.qgraph_datastore_records else None, 

628 ) 

629 # accumulate metadata 

630 metadata = { 

631 "input": args.input, 

632 "output": args.output, 

633 "butler_argument": args.butler_config, 

634 "output_run": run, 

635 "extend_run": args.extend_run, 

636 "skip_existing_in": args.skip_existing_in, 

637 "skip_existing": args.skip_existing, 

638 "data_query": args.data_query, 

639 "user": getpass.getuser(), 

640 "time": f"{datetime.datetime.now()}", 

641 } 

642 assert run is not None, "Butler output run collection must be defined" 

643 qgraph = graphBuilder.makeGraph( 

644 task_defs, 

645 collections, 

646 run, 

647 args.data_query, 

648 metadata=metadata, 

649 datasetQueryConstraint=args.dataset_query_constraint, 

650 dataId=pipeline.get_data_id(butler.dimensions), 

651 ) 

652 if args.show_qgraph_header: 

653 qgraph.buildAndPrintHeader() 

654 

655 if len(qgraph) == 0: 

656 # Nothing to do. 

657 return None 

658 self._summarize_qgraph(qgraph) 

659 

660 if args.save_qgraph: 

661 qgraph.saveUri(args.save_qgraph) 

662 

663 if args.save_single_quanta: 

664 for quantumNode in qgraph: 

665 sqgraph = qgraph.subset(quantumNode) 

666 uri = args.save_single_quanta.format(quantumNode) 

667 sqgraph.saveUri(uri) 

668 

669 if args.qgraph_dot: 

670 graph2dot(qgraph, args.qgraph_dot) 

671 

672 if args.execution_butler_location: 

673 butler = Butler(args.butler_config) 

674 newArgs = copy.deepcopy(args) 

675 

676 def builderShim(butler: Butler) -> Butler: 

677 newArgs.butler_config = butler._config 

678 # Calling makeWriteButler is done for the side effects of 

679 # calling that method, maining parsing all the args into 

680 # collection names, creating collections, etc. 

681 newButler = _ButlerFactory.makeWriteButler(newArgs) 

682 return newButler 

683 

684 # Include output collection in collections for input 

685 # files if it exists in the repo. 

686 all_inputs = args.input 

687 if args.output is not None: 

688 with contextlib.suppress(MissingCollectionError): 

689 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

690 

691 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

692 buildExecutionButler( 

693 butler, 

694 qgraph, 

695 args.execution_butler_location, 

696 run, 

697 butlerModifier=builderShim, 

698 collections=all_inputs, 

699 clobber=args.clobber_execution_butler, 

700 datastoreRoot=args.target_datastore_root, 

701 transfer=args.transfer, 

702 ) 

703 

704 return qgraph 

705 

706 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

707 """Construct the execution resource class from arguments. 

708 

709 Parameters 

710 ---------- 

711 args : `types.SimpleNamespace` 

712 Parsed command line. 

713 

714 Returns 

715 ------- 

716 resources : `~lsst.pipe.base.ExecutionResources` 

717 The resources available to each quantum. 

718 """ 

719 return ExecutionResources( 

720 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

721 ) 

722 

723 def runPipeline( 

724 self, 

725 graph: QuantumGraph, 

726 taskFactory: TaskFactory, 

727 args: SimpleNamespace, 

728 butler: Butler | None = None, 

729 ) -> None: 

730 """Execute complete QuantumGraph. 

731 

732 Parameters 

733 ---------- 

734 graph : `~lsst.pipe.base.QuantumGraph` 

735 Execution graph. 

736 taskFactory : `~lsst.pipe.base.TaskFactory` 

737 Task factory 

738 args : `types.SimpleNamespace` 

739 Parsed command line 

740 butler : `~lsst.daf.butler.Butler`, optional 

741 Data Butler instance, if not defined then new instance is made 

742 using command line options. 

743 """ 

744 # Check that output run defined on command line is consistent with 

745 # quantum graph. 

746 if args.output_run and graph.metadata: 

747 graph_output_run = graph.metadata.get("output_run", args.output_run) 

748 if graph_output_run != args.output_run: 

749 raise ValueError( 

750 f"Output run defined on command line ({args.output_run}) has to be " 

751 f"identical to graph metadata ({graph_output_run}). " 

752 "To update graph metadata run `pipetask update-graph-run` command." 

753 ) 

754 

755 # Make sure that --extend-run always enables --skip-existing, 

756 # clobbering should be disabled if --extend-run is not specified. 

757 if args.extend_run: 

758 args.skip_existing = True 

759 else: 

760 args.clobber_outputs = False 

761 

762 if not args.enable_implicit_threading: 

763 disable_implicit_threading() 

764 

765 # Make butler instance. QuantumGraph should have an output run defined, 

766 # but we ignore it here and let command line decide actual output run. 

767 if butler is None: 

768 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

769 

770 if args.skip_existing: 

771 args.skip_existing_in += (butler.run,) 

772 

773 # Enable lsstDebug debugging. Note that this is done once in the 

774 # main process before PreExecInit and it is also repeated before 

775 # running each task in SingleQuantumExecutor (which may not be 

776 # needed if `multiprocessing` always uses fork start method). 

777 if args.enableLsstDebug: 

778 try: 

779 _LOG.debug("Will try to import debug.py") 

780 import debug # type: ignore # noqa:F401 

781 except ImportError: 

782 _LOG.warn("No 'debug' module found.") 

783 

784 # Save all InitOutputs, configs, etc. 

785 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

786 preExecInit.initialize( 

787 graph, 

788 saveInitOutputs=not args.skip_init_writes, 

789 registerDatasetTypes=args.register_dataset_types, 

790 saveVersions=not args.no_versions, 

791 ) 

792 

793 if not args.init_only: 

794 graphFixup = self._importGraphFixup(args) 

795 resources = self._make_execution_resources(args) 

796 quantumExecutor = SingleQuantumExecutor( 

797 butler, 

798 taskFactory, 

799 skipExistingIn=args.skip_existing_in, 

800 clobberOutputs=args.clobber_outputs, 

801 enableLsstDebug=args.enableLsstDebug, 

802 exitOnKnownError=args.fail_fast, 

803 resources=resources, 

804 ) 

805 

806 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

807 executor = MPGraphExecutor( 

808 numProc=args.processes, 

809 timeout=timeout, 

810 startMethod=args.start_method, 

811 quantumExecutor=quantumExecutor, 

812 failFast=args.fail_fast, 

813 pdb=args.pdb, 

814 executionGraphFixup=graphFixup, 

815 ) 

816 # Have to reset connection pool to avoid sharing connections with 

817 # forked processes. 

818 butler.registry.resetConnectionPool() 

819 try: 

820 with util.profile(args.profile, _LOG): 

821 executor.execute(graph) 

822 finally: 

823 if args.summary: 

824 report = executor.getReport() 

825 if report: 

826 with open(args.summary, "w") as out: 

827 # Do not save fields that are not set. 

828 out.write(report.json(exclude_none=True, indent=2)) 

829 

830 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

831 """Generate astropy table listing the number of quanta per task for a 

832 given quantum graph. 

833 

834 Parameters 

835 ---------- 

836 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

837 A QuantumGraph object. 

838 

839 Returns 

840 ------- 

841 qg_task_table : `astropy.table.table.Table` 

842 An astropy table containing columns: Quanta and Tasks. 

843 """ 

844 qg_quanta, qg_tasks = [], [] 

845 for task_def in qgraph.iterTaskGraph(): 

846 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

847 qg_quanta.append(num_qnodes) 

848 qg_tasks.append(task_def.label) 

849 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

850 return qg_task_table 

851 

852 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int: 

853 """Report a summary of the quanta in the graph. 

854 

855 Parameters 

856 ---------- 

857 qgraph : `lsst.pipe.base.QuantumGraph` 

858 The graph to be summarized. 

859 

860 Returns 

861 ------- 

862 n_quanta : `int` 

863 The number of quanta in the graph. 

864 """ 

865 n_quanta = len(qgraph) 

866 if n_quanta == 0: 

867 _LOG.info("QuantumGraph contains no quanta.") 

868 else: 

869 if _LOG.isEnabledFor(logging.INFO): 

870 qg_task_table = self._generateTaskTable(qgraph) 

871 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

872 quanta_str = "quantum" if n_quanta == 1 else "quanta" 

873 n_tasks = len(qgraph.taskGraph) 

874 n_tasks_plural = "" if n_tasks == 1 else "s" 

875 _LOG.info( 

876 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s", 

877 n_quanta, 

878 quanta_str, 

879 n_tasks, 

880 n_tasks_plural, 

881 qgraph.graphID, 

882 qg_task_table_formatted, 

883 ) 

884 return n_quanta 

885 

886 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

887 """Import/instantiate graph fixup object. 

888 

889 Parameters 

890 ---------- 

891 args : `types.SimpleNamespace` 

892 Parsed command line. 

893 

894 Returns 

895 ------- 

896 fixup : `ExecutionGraphFixup` or `None` 

897 

898 Raises 

899 ------ 

900 ValueError 

901 Raised if import fails, method call raises exception, or returned 

902 instance has unexpected type. 

903 """ 

904 if args.graph_fixup: 

905 try: 

906 factory = doImportType(args.graph_fixup) 

907 except Exception as exc: 

908 raise ValueError("Failed to import graph fixup class/method") from exc 

909 try: 

910 fixup = factory() 

911 except Exception as exc: 

912 raise ValueError("Failed to make instance of graph fixup") from exc 

913 if not isinstance(fixup, ExecutionGraphFixup): 

914 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

915 return fixup 

916 return None 

917 

918 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

919 # Load quantum graph. We do not really need individual Quanta here, 

920 # but we need datastore records for initInputs, and those are only 

921 # available from Quanta, so load the whole thing. 

922 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

923 universe = qgraph.universe 

924 

925 # Collect all init input/output dataset IDs. 

926 predicted_inputs: set[DatasetId] = set() 

927 predicted_outputs: set[DatasetId] = set() 

928 for taskDef in qgraph.iterTaskGraph(): 

929 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

930 predicted_inputs.update(ref.id for ref in refs) 

931 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

932 predicted_outputs.update(ref.id for ref in refs) 

933 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

934 # remove intermediates from inputs 

935 predicted_inputs -= predicted_outputs 

936 

937 # Very inefficient way to extract datastore records from quantum graph, 

938 # we have to scan all quanta and look at their datastore records. 

939 datastore_records: dict[str, DatastoreRecordData] = {} 

940 for quantum_node in qgraph: 

941 for store_name, records in quantum_node.quantum.datastore_records.items(): 

942 subset = records.subset(predicted_inputs) 

943 if subset is not None: 

944 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

945 

946 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

947 

948 # Make butler from everything. 

949 butler = QuantumBackedButler.from_predicted( 

950 config=args.butler_config, 

951 predicted_inputs=predicted_inputs, 

952 predicted_outputs=predicted_outputs, 

953 dimensions=universe, 

954 datastore_records=datastore_records, 

955 search_paths=args.config_search_path, 

956 dataset_types=dataset_types, 

957 ) 

958 

959 # Save all InitOutputs, configs, etc. 

960 preExecInit = PreExecInitLimited(butler, task_factory) 

961 preExecInit.initialize(qgraph) 

962 

963 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

964 # Load quantum graph. 

965 nodes = args.qgraph_node_id or None 

966 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

967 

968 if qgraph.metadata is None: 

969 raise ValueError("QuantumGraph is missing metadata, cannot continue.") 

970 

971 self._summarize_qgraph(qgraph) 

972 

973 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

974 

975 _butler_factory = _QBBFactory( 

976 butler_config=args.butler_config, 

977 dimensions=qgraph.universe, 

978 dataset_types=dataset_types, 

979 ) 

980 

981 # make special quantum executor 

982 resources = self._make_execution_resources(args) 

983 quantumExecutor = SingleQuantumExecutor( 

984 butler=None, 

985 taskFactory=task_factory, 

986 enableLsstDebug=args.enableLsstDebug, 

987 exitOnKnownError=args.fail_fast, 

988 limited_butler_factory=_butler_factory, 

989 resources=resources, 

990 ) 

991 

992 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

993 executor = MPGraphExecutor( 

994 numProc=args.processes, 

995 timeout=timeout, 

996 startMethod=args.start_method, 

997 quantumExecutor=quantumExecutor, 

998 failFast=args.fail_fast, 

999 pdb=args.pdb, 

1000 ) 

1001 try: 

1002 with util.profile(args.profile, _LOG): 

1003 executor.execute(qgraph) 

1004 finally: 

1005 if args.summary: 

1006 report = executor.getReport() 

1007 if report: 

1008 with open(args.summary, "w") as out: 

1009 # Do not save fields that are not set. 

1010 out.write(report.json(exclude_none=True, indent=2))