Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%

390 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-20 11:03 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining CmdLineFwk class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["CmdLineFwk"] 

34 

35import atexit 

36import contextlib 

37import copy 

38import datetime 

39import getpass 

40import logging 

41import shutil 

42from collections.abc import Iterable, Mapping, Sequence 

43from types import SimpleNamespace 

44 

45import astropy.units as u 

46import lsst.utils.timer 

47from astropy.table import Table 

48from lsst.daf.butler import ( 

49 Butler, 

50 CollectionType, 

51 Config, 

52 DatasetId, 

53 DatasetRef, 

54 DatasetType, 

55 DimensionUniverse, 

56 LimitedButler, 

57 Quantum, 

58 QuantumBackedButler, 

59 Registry, 

60) 

61from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager 

62from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

63from lsst.daf.butler.direct_butler import DirectButler 

64from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults 

65from lsst.daf.butler.registry.wildcards import CollectionWildcard 

66from lsst.pipe.base import ( 

67 ExecutionResources, 

68 GraphBuilder, 

69 Instrument, 

70 Pipeline, 

71 PipelineDatasetTypes, 

72 QuantumGraph, 

73 TaskDef, 

74 TaskFactory, 

75 buildExecutionButler, 

76) 

77from lsst.utils import doImportType 

78from lsst.utils.threads import disable_implicit_threading 

79 

80from .dotTools import graph2dot, pipeline2dot 

81from .executionGraphFixup import ExecutionGraphFixup 

82from .mpGraphExecutor import MPGraphExecutor 

83from .preExecInit import PreExecInit, PreExecInitLimited 

84from .singleQuantumExecutor import SingleQuantumExecutor 

85 

86# ---------------------------------- 

87# Local non-exported definitions -- 

88# ---------------------------------- 

89 

90_LOG = logging.getLogger(__name__) 

91 

92 

93class _OutputChainedCollectionInfo: 

94 """A helper class for handling command-line arguments related to an output 

95 `~lsst.daf.butler.CollectionType.CHAINED` collection. 

96 

97 Parameters 

98 ---------- 

99 registry : `lsst.daf.butler.Registry` 

100 Butler registry that collections will be added to and/or queried from. 

101 name : `str` 

102 Name of the collection given on the command line. 

103 """ 

104 

105 def __init__(self, registry: Registry, name: str): 

106 self.name = name 

107 try: 

108 self.chain = tuple(registry.getCollectionChain(name)) 

109 self.exists = True 

110 except MissingCollectionError: 

111 self.chain = () 

112 self.exists = False 

113 

114 def __str__(self) -> str: 

115 return self.name 

116 

117 name: str 

118 """Name of the collection provided on the command line (`str`). 

119 """ 

120 

121 exists: bool 

122 """Whether this collection already exists in the registry (`bool`). 

123 """ 

124 

125 chain: tuple[str, ...] 

126 """The definition of the collection, if it already exists (`tuple`[`str`]). 

127 

128 Empty if the collection does not already exist. 

129 """ 

130 

131 

132class _OutputRunCollectionInfo: 

133 """A helper class for handling command-line arguments related to an output 

134 `~lsst.daf.butler.CollectionType.RUN` collection. 

135 

136 Parameters 

137 ---------- 

138 registry : `lsst.daf.butler.Registry` 

139 Butler registry that collections will be added to and/or queried from. 

140 name : `str` 

141 Name of the collection given on the command line. 

142 """ 

143 

144 def __init__(self, registry: Registry, name: str): 

145 self.name = name 

146 try: 

147 actualType = registry.getCollectionType(name) 

148 if actualType is not CollectionType.RUN: 

149 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.") 

150 self.exists = True 

151 except MissingCollectionError: 

152 self.exists = False 

153 

154 name: str 

155 """Name of the collection provided on the command line (`str`). 

156 """ 

157 

158 exists: bool 

159 """Whether this collection already exists in the registry (`bool`). 

160 """ 

161 

162 

163class _ButlerFactory: 

164 """A helper class for processing command-line arguments related to input 

165 and output collections. 

166 

167 Parameters 

168 ---------- 

169 registry : `lsst.daf.butler.Registry` 

170 Butler registry that collections will be added to and/or queried from. 

171 

172 args : `types.SimpleNamespace` 

173 Parsed command-line arguments. The following attributes are used, 

174 either at construction or in later methods. 

175 

176 ``output`` 

177 The name of a `~lsst.daf.butler.CollectionType.CHAINED` 

178 input/output collection. 

179 

180 ``output_run`` 

181 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output 

182 collection. 

183 

184 ``extend_run`` 

185 A boolean indicating whether ``output_run`` should already exist 

186 and be extended. 

187 

188 ``replace_run`` 

189 A boolean indicating that (if `True`) ``output_run`` should already 

190 exist but will be removed from the output chained collection and 

191 replaced with a new one. 

192 

193 ``prune_replaced`` 

194 A boolean indicating whether to prune the replaced run (requires 

195 ``replace_run``). 

196 

197 ``rebase`` 

198 A boolean indicating whether to force the ``output`` collection 

199 to be consistent with ``inputs`` and ``output`` run such that the 

200 ``output`` collection has output run collections first (i.e. those 

201 that start with the same prefix), then the new inputs, then any 

202 original inputs not included in the new inputs. 

203 

204 ``inputs`` 

205 Input collections of any type; see 

206 :ref:`daf_butler_ordered_collection_searches` for details. 

207 

208 ``butler_config`` 

209 Path to a data repository root or configuration file. 

210 

211 writeable : `bool` 

212 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a 

213 context where actual writes should happens, and hence no output run 

214 is necessary. 

215 

216 Raises 

217 ------ 

218 ValueError 

219 Raised if ``writeable is True`` but there are no output collections. 

220 """ 

221 

222 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool): 

223 if args.output is not None: 

224 self.output = _OutputChainedCollectionInfo(registry, args.output) 

225 else: 

226 self.output = None 

227 if args.output_run is not None: 

228 if args.rebase and self.output and not args.output_run.startswith(self.output.name): 

229 raise ValueError("Cannot rebase if output run does not start with output collection name.") 

230 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run) 

231 elif self.output is not None: 

232 if args.extend_run: 

233 if not self.output.chain: 

234 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain") 

235 runName = self.output.chain[0] 

236 else: 

237 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}" 

238 self.outputRun = _OutputRunCollectionInfo(registry, runName) 

239 elif not writeable: 

240 # If we're not writing yet, ok to have no output run. 

241 self.outputRun = None 

242 else: 

243 raise ValueError("Cannot write without at least one of (--output, --output-run).") 

244 # Recursively flatten any input CHAINED collections. We do this up 

245 # front so we can tell if the user passes the same inputs on subsequent 

246 # calls, even though we also flatten when we define the output CHAINED 

247 # collection. 

248 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else () 

249 

250 # If things are inconsistent and user has asked for a rebase then 

251 # construct the new output chain. 

252 if args.rebase and self._checkOutputInputConsistency(): 

253 assert self.output is not None 

254 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)] 

255 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain]) 

256 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain]) 

257 self.output.chain = tuple(newOutputChain) 

258 

259 def check(self, args: SimpleNamespace) -> None: 

260 """Check command-line options for consistency with each other and the 

261 data repository. 

262 

263 Parameters 

264 ---------- 

265 args : `types.SimpleNamespace` 

266 Parsed command-line arguments. See class documentation for the 

267 construction parameter of the same name. 

268 """ 

269 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser." 

270 if consistencyError := self._checkOutputInputConsistency(): 

271 raise ValueError(consistencyError) 

272 

273 if args.extend_run: 

274 if self.outputRun is None: 

275 raise ValueError("Cannot --extend-run when no output collection is given.") 

276 elif not self.outputRun.exists: 

277 raise ValueError( 

278 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist." 

279 ) 

280 if not args.extend_run and self.outputRun is not None and self.outputRun.exists: 

281 raise ValueError( 

282 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given." 

283 ) 

284 if args.prune_replaced and not args.replace_run: 

285 raise ValueError("--prune-replaced requires --replace-run.") 

286 if args.replace_run and (self.output is None or not self.output.exists): 

287 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.") 

288 

289 def _checkOutputInputConsistency(self) -> str | None: 

290 if self.inputs and self.output is not None and self.output.exists: 

291 # Passing the same inputs that were used to initialize the output 

292 # collection is allowed; this means the inputs must appear as a 

293 # contiguous subsequence of outputs (normally they're also at the 

294 # end, but --rebase will in general put them in the middle). 

295 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))): 

296 if self.inputs == self.output.chain[n : n + len(self.inputs)]: 

297 return None 

298 return ( 

299 f"Output CHAINED collection {self.output.name!r} exists and does not include the " 

300 f"same sequence of (flattened) input collections {self.inputs} as a contiguous " 

301 "subsequence. " 

302 "Use --rebase to ignore this problem and reset the output collection, but note that " 

303 "this may obfuscate what inputs were actually used to produce these outputs." 

304 ) 

305 return None 

306 

307 @classmethod 

308 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]: 

309 """Parse arguments to support implementations of `makeReadButler` and 

310 `makeButlerAndCollections`. 

311 

312 Parameters 

313 ---------- 

314 args : `types.SimpleNamespace` 

315 Parsed command-line arguments. See class documentation for the 

316 construction parameter of the same name. 

317 

318 Returns 

319 ------- 

320 butler : `lsst.daf.butler.Butler` 

321 A read-only butler constructed from the repo at 

322 ``args.butler_config``, but with no default collections. 

323 inputs : `~collections.abc.Sequence` [ `str` ] 

324 A collection search path constructed according to ``args``. 

325 self : `_ButlerFactory` 

326 A new `_ButlerFactory` instance representing the processed version 

327 of ``args``. 

328 """ 

329 butler = Butler.from_config(args.butler_config, writeable=False) 

330 self = cls(butler.registry, args, writeable=False) 

331 self.check(args) 

332 if self.output and self.output.exists: 

333 if args.replace_run: 

334 replaced = self.output.chain[0] 

335 inputs = list(self.output.chain[1:]) 

336 _LOG.debug( 

337 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced 

338 ) 

339 else: 

340 inputs = [self.output.name] 

341 else: 

342 inputs = list(self.inputs) 

343 if args.extend_run: 

344 assert self.outputRun is not None, "Output collection has to be specified." 

345 inputs.insert(0, self.outputRun.name) 

346 collSearch = CollectionWildcard.from_expression(inputs).require_ordered() 

347 return butler, collSearch, self 

348 

349 @classmethod 

350 def makeReadButler(cls, args: SimpleNamespace) -> Butler: 

351 """Construct a read-only butler according to the given command-line 

352 arguments. 

353 

354 Parameters 

355 ---------- 

356 args : `types.SimpleNamespace` 

357 Parsed command-line arguments. See class documentation for the 

358 construction parameter of the same name. 

359 

360 Returns 

361 ------- 

362 butler : `lsst.daf.butler.Butler` 

363 A read-only butler initialized with the collections specified by 

364 ``args``. 

365 """ 

366 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

367 butler, inputs, _ = cls._makeReadParts(args) 

368 _LOG.debug("Preparing butler to read from %s.", inputs) 

369 return Butler.from_config(butler=butler, collections=inputs) 

370 

371 @classmethod 

372 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]: 

373 """Return a read-only registry, a collection search path, and the name 

374 of the run to be used for future writes. 

375 

376 Parameters 

377 ---------- 

378 args : `types.SimpleNamespace` 

379 Parsed command-line arguments. See class documentation for the 

380 construction parameter of the same name. 

381 

382 Returns 

383 ------- 

384 butler : `lsst.daf.butler.Butler` 

385 A read-only butler that collections will be added to and/or queried 

386 from. 

387 inputs : `Sequence` [ `str` ] 

388 Collections to search for datasets. 

389 run : `str` or `None` 

390 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection 

391 if it already exists, or `None` if it does not. 

392 """ 

393 butler, inputs, self = cls._makeReadParts(args) 

394 run: str | None = None 

395 if args.extend_run: 

396 assert self.outputRun is not None, "Output collection has to be specified." 

397 if self.outputRun is not None: 

398 run = self.outputRun.name 

399 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run) 

400 return butler, inputs, run 

401 

402 @staticmethod 

403 def defineDatastoreCache() -> None: 

404 """Define where datastore cache directories should be found. 

405 

406 Notes 

407 ----- 

408 All the jobs should share a datastore cache if applicable. This 

409 method asks for a shared fallback cache to be defined and then 

410 configures an exit handler to clean it up. 

411 """ 

412 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

413 if defined: 

414 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True) 

415 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir) 

416 

417 @classmethod 

418 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler: 

419 """Return a read-write butler initialized to write to and read from 

420 the collections specified by the given command-line arguments. 

421 

422 Parameters 

423 ---------- 

424 args : `types.SimpleNamespace` 

425 Parsed command-line arguments. See class documentation for the 

426 construction parameter of the same name. 

427 taskDefs : iterable of `TaskDef`, optional 

428 Definitions for tasks in a pipeline. This argument is only needed 

429 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is 

430 "unstore". 

431 

432 Returns 

433 ------- 

434 butler : `lsst.daf.butler.Butler` 

435 A read-write butler initialized according to the given arguments. 

436 """ 

437 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache. 

438 butler = Butler.from_config(args.butler_config, writeable=True) 

439 self = cls(butler.registry, args, writeable=True) 

440 self.check(args) 

441 assert self.outputRun is not None, "Output collection has to be specified." # for mypy 

442 if self.output is not None: 

443 chainDefinition = list(self.output.chain if self.output.exists else self.inputs) 

444 if args.replace_run: 

445 replaced = chainDefinition.pop(0) 

446 if args.prune_replaced == "unstore": 

447 # Remove datasets from datastore 

448 with butler.transaction(): 

449 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced) 

450 # we want to remove regular outputs but keep 

451 # initOutputs, configs, and versions. 

452 if taskDefs is not None: 

453 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs)) 

454 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames] 

455 butler.pruneDatasets(refs, unstore=True, disassociate=False) 

456 elif args.prune_replaced == "purge": 

457 # Erase entire collection and all datasets, need to remove 

458 # collection from its chain collection first. 

459 with butler.transaction(): 

460 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

461 butler.removeRuns([replaced], unstore=True) 

462 elif args.prune_replaced is not None: 

463 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.") 

464 if not self.output.exists: 

465 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED) 

466 if not args.extend_run: 

467 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN) 

468 chainDefinition.insert(0, self.outputRun.name) 

469 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True) 

470 _LOG.debug( 

471 "Preparing butler to write to '%s' and read from '%s'=%s", 

472 self.outputRun.name, 

473 self.output.name, 

474 chainDefinition, 

475 ) 

476 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name) 

477 else: 

478 inputs = (self.outputRun.name,) + self.inputs 

479 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs) 

480 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs) 

481 return butler 

482 

483 output: _OutputChainedCollectionInfo | None 

484 """Information about the output chained collection, if there is or will be 

485 one (`_OutputChainedCollectionInfo` or `None`). 

486 """ 

487 

488 outputRun: _OutputRunCollectionInfo | None 

489 """Information about the output run collection, if there is or will be 

490 one (`_OutputRunCollectionInfo` or `None`). 

491 """ 

492 

493 inputs: tuple[str, ...] 

494 """Input collections provided directly by the user (`tuple` [ `str` ]). 

495 """ 

496 

497 

498class _QBBFactory: 

499 """Class which is a callable for making QBB instances.""" 

500 

501 def __init__( 

502 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType] 

503 ): 

504 self.butler_config = butler_config 

505 self.dimensions = dimensions 

506 self.dataset_types = dataset_types 

507 

508 def __call__(self, quantum: Quantum) -> LimitedButler: 

509 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`. 

510 

511 Factory method to create QuantumBackedButler instances. 

512 """ 

513 return QuantumBackedButler.initialize( 

514 config=self.butler_config, 

515 quantum=quantum, 

516 dimensions=self.dimensions, 

517 dataset_types=self.dataset_types, 

518 ) 

519 

520 

521# ------------------------ 

522# Exported definitions -- 

523# ------------------------ 

524 

525 

526class CmdLineFwk: 

527 """PipelineTask framework which executes tasks from command line. 

528 

529 In addition to executing tasks this activator provides additional methods 

530 for task management like dumping configuration or execution chain. 

531 """ 

532 

533 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing 

534 

535 def __init__(self) -> None: 

536 pass 

537 

538 def makePipeline(self, args: SimpleNamespace) -> Pipeline: 

539 """Build a pipeline from command line arguments. 

540 

541 Parameters 

542 ---------- 

543 args : `types.SimpleNamespace` 

544 Parsed command line. 

545 

546 Returns 

547 ------- 

548 pipeline : `~lsst.pipe.base.Pipeline` 

549 Newly-constructed pipeline. 

550 """ 

551 if args.pipeline: 

552 pipeline = Pipeline.from_uri(args.pipeline) 

553 else: 

554 pipeline = Pipeline("anonymous") 

555 

556 # loop over all pipeline actions and apply them in order 

557 for action in args.pipeline_actions: 

558 if action.action == "add_instrument": 

559 pipeline.addInstrument(action.value) 

560 

561 elif action.action == "new_task": 

562 pipeline.addTask(action.value, action.label) 

563 

564 elif action.action == "delete_task": 

565 pipeline.removeTask(action.label) 

566 

567 elif action.action == "config": 

568 # action value string is "field=value", split it at '=' 

569 field, _, value = action.value.partition("=") 

570 pipeline.addConfigOverride(action.label, field, value) 

571 

572 elif action.action == "configfile": 

573 pipeline.addConfigFile(action.label, action.value) 

574 

575 else: 

576 raise ValueError(f"Unexpected pipeline action: {action.action}") 

577 

578 if args.save_pipeline: 

579 pipeline.write_to_uri(args.save_pipeline) 

580 

581 if args.pipeline_dot: 

582 pipeline2dot(pipeline, args.pipeline_dot) 

583 

584 return pipeline 

585 

586 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None: 

587 """Build a graph from command line arguments. 

588 

589 Parameters 

590 ---------- 

591 pipeline : `~lsst.pipe.base.Pipeline` 

592 Pipeline, can be empty or ``None`` if graph is read from a file. 

593 args : `types.SimpleNamespace` 

594 Parsed command line. 

595 

596 Returns 

597 ------- 

598 graph : `~lsst.pipe.base.QuantumGraph` or `None` 

599 If resulting graph is empty then `None` is returned. 

600 """ 

601 # make sure that --extend-run always enables --skip-existing 

602 if args.extend_run: 

603 args.skip_existing = True 

604 

605 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args) 

606 

607 if args.skip_existing and run: 

608 args.skip_existing_in += (run,) 

609 

610 if args.qgraph: 

611 # click passes empty tuple as default value for qgraph_node_id 

612 nodes = args.qgraph_node_id or None 

613 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id) 

614 

615 # pipeline can not be provided in this case 

616 if pipeline: 

617 raise ValueError("Pipeline must not be given when quantum graph is read from file.") 

618 if args.show_qgraph_header: 

619 print(QuantumGraph.readHeader(args.qgraph)) 

620 else: 

621 task_defs = list(pipeline.toExpandedPipeline()) 

622 if args.mock: 

623 from lsst.pipe.base.tests.mocks import mock_task_defs 

624 

625 task_defs = mock_task_defs( 

626 task_defs, 

627 unmocked_dataset_types=args.unmocked_dataset_types, 

628 force_failures=args.mock_failure, 

629 ) 

630 # make execution plan (a.k.a. DAG) for pipeline 

631 graphBuilder = GraphBuilder( 

632 butler.registry, 

633 skipExistingIn=args.skip_existing_in, 

634 clobberOutputs=args.clobber_outputs, 

635 datastore=butler._datastore if args.qgraph_datastore_records else None, 

636 ) 

637 # accumulate metadata 

638 metadata = { 

639 "input": args.input, 

640 "output": args.output, 

641 "butler_argument": args.butler_config, 

642 "output_run": run, 

643 "extend_run": args.extend_run, 

644 "skip_existing_in": args.skip_existing_in, 

645 "skip_existing": args.skip_existing, 

646 "data_query": args.data_query, 

647 "user": getpass.getuser(), 

648 "time": f"{datetime.datetime.now()}", 

649 } 

650 assert run is not None, "Butler output run collection must be defined" 

651 qgraph = graphBuilder.makeGraph( 

652 task_defs, 

653 collections, 

654 run, 

655 args.data_query, 

656 metadata=metadata, 

657 datasetQueryConstraint=args.dataset_query_constraint, 

658 dataId=pipeline.get_data_id(butler.dimensions), 

659 ) 

660 if args.show_qgraph_header: 

661 qgraph.buildAndPrintHeader() 

662 

663 if len(qgraph) == 0: 

664 # Nothing to do. 

665 return None 

666 self._summarize_qgraph(qgraph) 

667 

668 if args.save_qgraph: 

669 qgraph.saveUri(args.save_qgraph) 

670 

671 if args.save_single_quanta: 

672 for quantumNode in qgraph: 

673 sqgraph = qgraph.subset(quantumNode) 

674 uri = args.save_single_quanta.format(quantumNode) 

675 sqgraph.saveUri(uri) 

676 

677 if args.qgraph_dot: 

678 graph2dot(qgraph, args.qgraph_dot) 

679 

680 if args.execution_butler_location: 

681 butler = Butler.from_config(args.butler_config) 

682 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

683 newArgs = copy.deepcopy(args) 

684 

685 def builderShim(butler: Butler) -> Butler: 

686 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler" 

687 newArgs.butler_config = butler._config 

688 # Calling makeWriteButler is done for the side effects of 

689 # calling that method, maining parsing all the args into 

690 # collection names, creating collections, etc. 

691 newButler = _ButlerFactory.makeWriteButler(newArgs) 

692 return newButler 

693 

694 # Include output collection in collections for input 

695 # files if it exists in the repo. 

696 all_inputs = args.input 

697 if args.output is not None: 

698 with contextlib.suppress(MissingCollectionError): 

699 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),) 

700 

701 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs) 

702 buildExecutionButler( 

703 butler, 

704 qgraph, 

705 args.execution_butler_location, 

706 run, 

707 butlerModifier=builderShim, 

708 collections=all_inputs, 

709 clobber=args.clobber_execution_butler, 

710 datastoreRoot=args.target_datastore_root, 

711 transfer=args.transfer, 

712 ) 

713 

714 return qgraph 

715 

716 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources: 

717 """Construct the execution resource class from arguments. 

718 

719 Parameters 

720 ---------- 

721 args : `types.SimpleNamespace` 

722 Parsed command line. 

723 

724 Returns 

725 ------- 

726 resources : `~lsst.pipe.base.ExecutionResources` 

727 The resources available to each quantum. 

728 """ 

729 return ExecutionResources( 

730 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB 

731 ) 

732 

733 def runPipeline( 

734 self, 

735 graph: QuantumGraph, 

736 taskFactory: TaskFactory, 

737 args: SimpleNamespace, 

738 butler: Butler | None = None, 

739 ) -> None: 

740 """Execute complete QuantumGraph. 

741 

742 Parameters 

743 ---------- 

744 graph : `~lsst.pipe.base.QuantumGraph` 

745 Execution graph. 

746 taskFactory : `~lsst.pipe.base.TaskFactory` 

747 Task factory. 

748 args : `types.SimpleNamespace` 

749 Parsed command line. 

750 butler : `~lsst.daf.butler.Butler`, optional 

751 Data Butler instance, if not defined then new instance is made 

752 using command line options. 

753 """ 

754 if not args.enable_implicit_threading: 

755 disable_implicit_threading() 

756 

757 # Check that output run defined on command line is consistent with 

758 # quantum graph. 

759 if args.output_run and graph.metadata: 

760 graph_output_run = graph.metadata.get("output_run", args.output_run) 

761 if graph_output_run != args.output_run: 

762 raise ValueError( 

763 f"Output run defined on command line ({args.output_run}) has to be " 

764 f"identical to graph metadata ({graph_output_run}). " 

765 "To update graph metadata run `pipetask update-graph-run` command." 

766 ) 

767 

768 # Make sure that --extend-run always enables --skip-existing, 

769 # clobbering should be disabled if --extend-run is not specified. 

770 if args.extend_run: 

771 args.skip_existing = True 

772 else: 

773 args.clobber_outputs = False 

774 

775 # Make butler instance. QuantumGraph should have an output run defined, 

776 # but we ignore it here and let command line decide actual output run. 

777 if butler is None: 

778 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph()) 

779 

780 if args.skip_existing: 

781 args.skip_existing_in += (butler.run,) 

782 

783 # Enable lsstDebug debugging. Note that this is done once in the 

784 # main process before PreExecInit and it is also repeated before 

785 # running each task in SingleQuantumExecutor (which may not be 

786 # needed if `multiprocessing` always uses fork start method). 

787 if args.enableLsstDebug: 

788 try: 

789 _LOG.debug("Will try to import debug.py") 

790 import debug # type: ignore # noqa:F401 

791 except ImportError: 

792 _LOG.warn("No 'debug' module found.") 

793 

794 # Save all InitOutputs, configs, etc. 

795 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run) 

796 preExecInit.initialize( 

797 graph, 

798 saveInitOutputs=not args.skip_init_writes, 

799 registerDatasetTypes=args.register_dataset_types, 

800 saveVersions=not args.no_versions, 

801 ) 

802 

803 if not args.init_only: 

804 graphFixup = self._importGraphFixup(args) 

805 resources = self._make_execution_resources(args) 

806 quantumExecutor = SingleQuantumExecutor( 

807 butler, 

808 taskFactory, 

809 skipExistingIn=args.skip_existing_in, 

810 clobberOutputs=args.clobber_outputs, 

811 enableLsstDebug=args.enableLsstDebug, 

812 exitOnKnownError=args.fail_fast, 

813 resources=resources, 

814 ) 

815 

816 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

817 executor = MPGraphExecutor( 

818 numProc=args.processes, 

819 timeout=timeout, 

820 startMethod=args.start_method, 

821 quantumExecutor=quantumExecutor, 

822 failFast=args.fail_fast, 

823 pdb=args.pdb, 

824 executionGraphFixup=graphFixup, 

825 ) 

826 # Have to reset connection pool to avoid sharing connections with 

827 # forked processes. 

828 butler.registry.resetConnectionPool() 

829 try: 

830 with lsst.utils.timer.profile(args.profile, _LOG): 

831 executor.execute(graph) 

832 finally: 

833 if args.summary: 

834 report = executor.getReport() 

835 if report: 

836 with open(args.summary, "w") as out: 

837 # Do not save fields that are not set. 

838 out.write(report.model_dump_json(exclude_none=True, indent=2)) 

839 

840 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table: 

841 """Generate astropy table listing the number of quanta per task for a 

842 given quantum graph. 

843 

844 Parameters 

845 ---------- 

846 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph` 

847 A QuantumGraph object. 

848 

849 Returns 

850 ------- 

851 qg_task_table : `astropy.table.table.Table` 

852 An astropy table containing columns: Quanta and Tasks. 

853 """ 

854 qg_quanta, qg_tasks = [], [] 

855 for task_def in qgraph.iterTaskGraph(): 

856 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def) 

857 qg_quanta.append(num_qnodes) 

858 qg_tasks.append(task_def.label) 

859 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks)) 

860 return qg_task_table 

861 

862 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int: 

863 """Report a summary of the quanta in the graph. 

864 

865 Parameters 

866 ---------- 

867 qgraph : `lsst.pipe.base.QuantumGraph` 

868 The graph to be summarized. 

869 

870 Returns 

871 ------- 

872 n_quanta : `int` 

873 The number of quanta in the graph. 

874 """ 

875 n_quanta = len(qgraph) 

876 if n_quanta == 0: 

877 _LOG.info("QuantumGraph contains no quanta.") 

878 else: 

879 if _LOG.isEnabledFor(logging.INFO): 

880 qg_task_table = self._generateTaskTable(qgraph) 

881 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all()) 

882 quanta_str = "quantum" if n_quanta == 1 else "quanta" 

883 n_tasks = len(qgraph.taskGraph) 

884 n_tasks_plural = "" if n_tasks == 1 else "s" 

885 _LOG.info( 

886 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s", 

887 n_quanta, 

888 quanta_str, 

889 n_tasks, 

890 n_tasks_plural, 

891 qgraph.graphID, 

892 qg_task_table_formatted, 

893 ) 

894 return n_quanta 

895 

896 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None: 

897 """Import/instantiate graph fixup object. 

898 

899 Parameters 

900 ---------- 

901 args : `types.SimpleNamespace` 

902 Parsed command line. 

903 

904 Returns 

905 ------- 

906 fixup : `ExecutionGraphFixup` or `None` 

907 

908 Raises 

909 ------ 

910 ValueError 

911 Raised if import fails, method call raises exception, or returned 

912 instance has unexpected type. 

913 """ 

914 if args.graph_fixup: 

915 try: 

916 factory = doImportType(args.graph_fixup) 

917 except Exception as exc: 

918 raise ValueError("Failed to import graph fixup class/method") from exc 

919 try: 

920 fixup = factory() 

921 except Exception as exc: 

922 raise ValueError("Failed to make instance of graph fixup") from exc 

923 if not isinstance(fixup, ExecutionGraphFixup): 

924 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class") 

925 return fixup 

926 return None 

927 

928 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

929 # Load quantum graph. We do not really need individual Quanta here, 

930 # but we need datastore records for initInputs, and those are only 

931 # available from Quanta, so load the whole thing. 

932 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id) 

933 universe = qgraph.universe 

934 

935 # Collect all init input/output dataset IDs. 

936 predicted_inputs: set[DatasetId] = set() 

937 predicted_outputs: set[DatasetId] = set() 

938 for taskDef in qgraph.iterTaskGraph(): 

939 if (refs := qgraph.initInputRefs(taskDef)) is not None: 

940 predicted_inputs.update(ref.id for ref in refs) 

941 if (refs := qgraph.initOutputRefs(taskDef)) is not None: 

942 predicted_outputs.update(ref.id for ref in refs) 

943 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs()) 

944 # remove intermediates from inputs 

945 predicted_inputs -= predicted_outputs 

946 

947 # Very inefficient way to extract datastore records from quantum graph, 

948 # we have to scan all quanta and look at their datastore records. 

949 datastore_records: dict[str, DatastoreRecordData] = {} 

950 for quantum_node in qgraph: 

951 for store_name, records in quantum_node.quantum.datastore_records.items(): 

952 subset = records.subset(predicted_inputs) 

953 if subset is not None: 

954 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset) 

955 

956 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

957 

958 # Make butler from everything. 

959 butler = QuantumBackedButler.from_predicted( 

960 config=args.butler_config, 

961 predicted_inputs=predicted_inputs, 

962 predicted_outputs=predicted_outputs, 

963 dimensions=universe, 

964 datastore_records=datastore_records, 

965 search_paths=args.config_search_path, 

966 dataset_types=dataset_types, 

967 ) 

968 

969 # Save all InitOutputs, configs, etc. 

970 preExecInit = PreExecInitLimited(butler, task_factory) 

971 preExecInit.initialize(qgraph) 

972 

973 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None: 

974 if not args.enable_implicit_threading: 

975 disable_implicit_threading() 

976 

977 # Load quantum graph. 

978 nodes = args.qgraph_node_id or None 

979 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id) 

980 

981 if qgraph.metadata is None: 

982 raise ValueError("QuantumGraph is missing metadata, cannot continue.") 

983 

984 self._summarize_qgraph(qgraph) 

985 

986 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

987 

988 _butler_factory = _QBBFactory( 

989 butler_config=args.butler_config, 

990 dimensions=qgraph.universe, 

991 dataset_types=dataset_types, 

992 ) 

993 

994 # make special quantum executor 

995 resources = self._make_execution_resources(args) 

996 quantumExecutor = SingleQuantumExecutor( 

997 butler=None, 

998 taskFactory=task_factory, 

999 enableLsstDebug=args.enableLsstDebug, 

1000 exitOnKnownError=args.fail_fast, 

1001 limited_butler_factory=_butler_factory, 

1002 resources=resources, 

1003 ) 

1004 

1005 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout 

1006 executor = MPGraphExecutor( 

1007 numProc=args.processes, 

1008 timeout=timeout, 

1009 startMethod=args.start_method, 

1010 quantumExecutor=quantumExecutor, 

1011 failFast=args.fail_fast, 

1012 pdb=args.pdb, 

1013 ) 

1014 try: 

1015 with lsst.utils.timer.profile(args.profile, _LOG): 

1016 executor.execute(qgraph) 

1017 finally: 

1018 if args.summary: 

1019 report = executor.getReport() 

1020 if report: 

1021 with open(args.summary, "w") as out: 

1022 # Do not save fields that are not set. 

1023 out.write(report.json(exclude_none=True, indent=2))