Coverage for python/lsst/ctrl/bps/generic_workflow.py: 37%

317 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 00:15 -0700

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31import pickle 

32from collections import Counter 

33from typing import Optional 

34 

35from lsst.utils.iteration import ensure_iterable 

36from networkx import DiGraph, topological_sort 

37from networkx.algorithms.dag import is_directed_acyclic_graph 

38 

39from .bps_draw import draw_networkx_dot 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44@dataclasses.dataclass 

45class GenericWorkflowFile: 

46 """Information about a file that may be needed by various workflow 

47 management services. 

48 """ 

49 

50 name: str 

51 """Lookup key (logical file name) of file/directory. Must be unique 

52 within run. 

53 """ 

54 

55 src_uri: str or None # don't know that need ResourcePath 

56 """Original location of file/directory. 

57 """ 

58 

59 wms_transfer: bool 

60 """Whether the WMS should ignore file or not. Default is False. 

61 """ 

62 

63 job_access_remote: bool 

64 """Whether the job can remotely access file (using separately specified 

65 file access protocols). Default is False. 

66 """ 

67 

68 job_shared: bool 

69 """Whether job requires its own copy of this file. Default is False. 

70 """ 

71 

72 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

73 # values, so writing own __init__. 

74 def __init__( 

75 self, 

76 name: str, 

77 src_uri: str = None, 

78 wms_transfer: bool = False, 

79 job_access_remote: bool = False, 

80 job_shared: bool = False, 

81 ): 

82 self.name = name 

83 self.src_uri = src_uri 

84 self.wms_transfer = wms_transfer 

85 self.job_access_remote = job_access_remote 

86 self.job_shared = job_shared 

87 

88 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

89 

90 def __hash__(self): 

91 return hash(self.name) 

92 

93 

94@dataclasses.dataclass 

95class GenericWorkflowExec: 

96 """Information about an executable that may be needed by various workflow 

97 management services. 

98 """ 

99 

100 name: str 

101 """Lookup key (logical file name) of executable. Must be unique 

102 within run. 

103 """ 

104 

105 src_uri: str or None # don't know that need ResourcePath 

106 """Original location of executable. 

107 """ 

108 

109 transfer_executable: bool 

110 """Whether the WMS/plugin is responsible for staging executable to 

111 location usable by job. 

112 """ 

113 

114 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

115 # values, so writing own __init__. 

116 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

117 self.name = name 

118 self.src_uri = src_uri 

119 self.transfer_executable = transfer_executable 

120 

121 __slots__ = ("name", "src_uri", "transfer_executable") 

122 

123 def __hash__(self): 

124 return hash(self.name) 

125 

126 

127@dataclasses.dataclass 

128class GenericWorkflowJob: 

129 """Information about a job that may be needed by various workflow 

130 management services. 

131 """ 

132 

133 name: str 

134 """Name of job. Must be unique within workflow. 

135 """ 

136 

137 label: Optional[str] 

138 """Primary user-facing label for job. Does not need to be unique 

139 and may be used for summary reports. 

140 """ 

141 

142 quanta_counts: Optional[Counter] 

143 """Counts of quanta per task label in job. 

144 """ 

145 

146 tags: Optional[dict] 

147 """Other key/value pairs for job that user may want to use as a filter. 

148 """ 

149 

150 executable: Optional[GenericWorkflowExec] 

151 """Executable for job. 

152 """ 

153 

154 arguments: Optional[str] 

155 """Command line arguments for job. 

156 """ 

157 

158 cmdvals: Optional[dict] 

159 """Values for variables in cmdline when using lazy command line creation. 

160 """ 

161 

162 memory_multiplier: Optional[float] 

163 """Memory growth rate between retries. 

164 """ 

165 

166 request_memory: Optional[int] # MB 

167 """Max memory (in MB) that the job is expected to need. 

168 """ 

169 

170 request_memory_max: Optional[int] # MB 

171 """Max memory (in MB) that the job should ever use. 

172 """ 

173 

174 request_cpus: Optional[int] # cores 

175 """Max number of cpus that the job is expected to need. 

176 """ 

177 

178 request_disk: Optional[int] # MB 

179 """Max amount of job scratch disk (in MB) that the job is expected to need. 

180 """ 

181 

182 request_walltime: Optional[str] # minutes 

183 """Max amount of time (in seconds) that the job is expected to need. 

184 """ 

185 

186 compute_site: Optional[str] 

187 """Key to look up site-specific information for running the job. 

188 """ 

189 

190 accounting_group: Optional[str] 

191 """Name of the accounting group to use. 

192 """ 

193 

194 accounting_user: Optional[str] 

195 """Name of the user to use for accounting purposes. 

196 """ 

197 

198 mail_to: Optional[str] 

199 """Comma separated list of email addresses for emailing job status. 

200 """ 

201 

202 when_to_mail: Optional[str] 

203 """WMS-specific terminology for when to email job status. 

204 """ 

205 

206 number_of_retries: Optional[int] 

207 """Number of times to automatically retry a failed job. 

208 """ 

209 

210 retry_unless_exit: Optional[int] 

211 """Exit code for job that means to not automatically retry. 

212 """ 

213 

214 abort_on_value: Optional[int] 

215 """Job exit value for signals to abort the entire workflow. 

216 """ 

217 

218 abort_return_value: Optional[int] 

219 """Exit value to use when aborting the entire workflow. 

220 """ 

221 

222 priority: Optional[str] 

223 """Initial priority of job in WMS-format. 

224 """ 

225 

226 category: Optional[str] 

227 """WMS-facing label of job within single workflow (e.g., can be used for 

228 throttling jobs within a single workflow). 

229 """ 

230 

231 concurrency_limit: Optional[str] 

232 """Names of concurrency limits that the WMS plugin can appropriately 

233 translate to limit the number of this job across all running workflows. 

234 """ 

235 

236 queue: Optional[str] 

237 """Name of queue to use. Different WMS can translate this concept 

238 differently. 

239 """ 

240 

241 pre_cmdline: Optional[str] 

242 """Command line to be executed prior to executing job. 

243 """ 

244 

245 post_cmdline: Optional[str] 

246 """Command line to be executed after job executes. 

247 

248 Should be executed regardless of exit status. 

249 """ 

250 

251 preemptible: Optional[bool] 

252 """The flag indicating whether the job can be preempted. 

253 """ 

254 

255 profile: Optional[dict] 

256 """Nested dictionary of WMS-specific key/value pairs with primary key being 

257 WMS key (e.g., pegasus, condor, panda). 

258 """ 

259 

260 attrs: Optional[dict] 

261 """Key/value pairs of job attributes (for WMS that have attributes in 

262 addition to commands). 

263 """ 

264 

265 environment: Optional[dict] 

266 """Environment variable names and values to be explicitly set inside job. 

267 """ 

268 

269 compute_cloud: Optional[str] 

270 """Key to look up cloud-specific information for running the job. 

271 """ 

272 

273 # As of python 3.7.8, can't use __slots__ if give default values, so 

274 # writing own __init__. 

275 def __init__(self, name: str): 

276 self.name = name 

277 self.label = None 

278 self.quanta_counts = Counter() 

279 self.tags = {} 

280 self.executable = None 

281 self.arguments = None 

282 self.cmdvals = {} 

283 self.memory_multiplier = None 

284 self.request_memory = None 

285 self.request_memory_max = None 

286 self.request_cpus = None 

287 self.request_disk = None 

288 self.request_walltime = None 

289 self.compute_site = None 

290 self.accounting_group = None 

291 self.accounting_user = None 

292 self.mail_to = None 

293 self.when_to_mail = None 

294 self.number_of_retries = None 

295 self.retry_unless_exit = None 

296 self.abort_on_value = None 

297 self.abort_return_value = None 

298 self.priority = None 

299 self.category = None 

300 self.concurrency_limit = None 

301 self.queue = None 

302 self.pre_cmdline = None 

303 self.post_cmdline = None 

304 self.preemptible = None 

305 self.profile = {} 

306 self.attrs = {} 

307 self.environment = {} 

308 self.compute_cloud = None 

309 

310 __slots__ = ( 

311 "name", 

312 "label", 

313 "quanta_counts", 

314 "tags", 

315 "mail_to", 

316 "when_to_mail", 

317 "executable", 

318 "arguments", 

319 "cmdvals", 

320 "memory_multiplier", 

321 "request_memory", 

322 "request_memory_max", 

323 "request_cpus", 

324 "request_disk", 

325 "request_walltime", 

326 "number_of_retries", 

327 "retry_unless_exit", 

328 "abort_on_value", 

329 "abort_return_value", 

330 "compute_site", 

331 "accounting_group", 

332 "accounting_user", 

333 "environment", 

334 "priority", 

335 "category", 

336 "concurrency_limit", 

337 "queue", 

338 "pre_cmdline", 

339 "post_cmdline", 

340 "preemptible", 

341 "profile", 

342 "attrs", 

343 "compute_cloud", 

344 ) 

345 

346 def __hash__(self): 

347 return hash(self.name) 

348 

349 

350class GenericWorkflow(DiGraph): 

351 """A generic representation of a workflow used to submit to specific 

352 workflow management systems. 

353 

354 Parameters 

355 ---------- 

356 name : `str` 

357 Name of generic workflow. 

358 incoming_graph_data : `Any`, optional 

359 Data used to initialized graph that is passed through to DiGraph 

360 constructor. Can be any type supported by networkx.DiGraph. 

361 attr : `dict` 

362 Keyword arguments passed through to DiGraph constructor. 

363 """ 

364 

365 def __init__(self, name, incoming_graph_data=None, **attr): 

366 super().__init__(incoming_graph_data, **attr) 

367 self._name = name 

368 self.run_attrs = {} 

369 self._files = {} 

370 self._executables = {} 

371 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

372 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

373 self.run_id = None 

374 self._final = None 

375 

376 @property 

377 def name(self): 

378 """Retrieve name of generic workflow. 

379 

380 Returns 

381 ------- 

382 name : `str` 

383 Name of generic workflow. 

384 """ 

385 return self._name 

386 

387 @property 

388 def quanta_counts(self): 

389 """Count of quanta per task label (`collections.Counter`).""" 

390 qcounts = Counter() 

391 for job_name in self: 

392 gwjob = self.get_job(job_name) 

393 if gwjob.quanta_counts is not None: 

394 qcounts += gwjob.quanta_counts 

395 return qcounts 

396 

397 @property 

398 def job_counts(self): 

399 """Count of jobs per job label (`collections.Counter`).""" 

400 jcounts = Counter() 

401 for job_name in self: 

402 gwjob = self.get_job(job_name) 

403 jcounts[gwjob.label] += 1 

404 

405 # Final is separate 

406 final = self.get_final() 

407 if final: 

408 if isinstance(final, GenericWorkflow): 

409 for job_name in final: 

410 gwjob = final.get_job(job_name) 

411 jcounts[gwjob.label] += 1 

412 else: 

413 jcounts[final.label] += 1 

414 

415 return jcounts 

416 

417 def __iter__(self): 

418 """Return iterator of job names in topologically sorted order.""" 

419 return topological_sort(self) 

420 

421 def get_files(self, data=False, transfer_only=True): 

422 """Retrieve files from generic workflow. 

423 

424 Need API in case change way files are stored (e.g., make 

425 workflow a bipartite graph with jobs and files nodes). 

426 

427 Parameters 

428 ---------- 

429 data : `bool`, optional 

430 Whether to return the file data as well as the file object name. 

431 (The defaults is False.) 

432 transfer_only : `bool`, optional 

433 Whether to only return files for which a workflow management system 

434 would be responsible for transferring. 

435 

436 Returns 

437 ------- 

438 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

439 File names or objects from generic workflow meeting specifications. 

440 """ 

441 files = [] 

442 for filename, file in self._files.items(): 

443 if not transfer_only or file.wms_transfer: 

444 if not data: 

445 files.append(filename) 

446 else: 

447 files.append(file) 

448 return files 

449 

450 def add_job(self, job, parent_names=None, child_names=None): 

451 """Add job to generic workflow. 

452 

453 Parameters 

454 ---------- 

455 job : `lsst.ctrl.bps.GenericWorkflowJob` 

456 Job to add to the generic workflow. 

457 parent_names : `list` [`str`], optional 

458 Names of jobs that are parents of given job 

459 child_names : `list` [`str`], optional 

460 Names of jobs that are children of given job 

461 """ 

462 if not isinstance(job, GenericWorkflowJob): 

463 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

464 if self.has_node(job.name): 

465 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

466 super().add_node(job.name, job=job) 

467 self.add_job_relationships(parent_names, job.name) 

468 self.add_job_relationships(job.name, child_names) 

469 self.add_executable(job.executable) 

470 

471 def add_node(self, node_for_adding, **attr): 

472 """Override networkx function to call more specific add_job function. 

473 

474 Parameters 

475 ---------- 

476 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

477 Job to be added to generic workflow. 

478 attr : 

479 Needed to match original networkx function, but not used. 

480 """ 

481 self.add_job(node_for_adding) 

482 

483 def add_job_relationships(self, parents, children): 

484 """Add dependencies between parent and child jobs. All parents will 

485 be connected to all children. 

486 

487 Parameters 

488 ---------- 

489 parents : `list` [`str`] 

490 Parent job names. 

491 children : `list` [`str`] 

492 Children job names. 

493 """ 

494 if parents is not None and children is not None: 

495 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

496 

497 def add_edges_from(self, ebunch_to_add, **attr): 

498 """Add several edges between jobs in the generic workflow. 

499 

500 Parameters 

501 ---------- 

502 ebunch_to_add : Iterable [`tuple`] 

503 Iterable of job name pairs between which a dependency should be 

504 saved. 

505 attr : keyword arguments, optional 

506 Data can be assigned using keyword arguments (not currently used). 

507 """ 

508 for edge_to_add in ebunch_to_add: 

509 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

510 

511 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

512 """Add edge connecting jobs in workflow. 

513 

514 Parameters 

515 ---------- 

516 u_of_edge : `str` 

517 Name of parent job. 

518 v_of_edge : `str` 

519 Name of child job. 

520 attr : keyword arguments, optional 

521 Attributes to save with edge. 

522 """ 

523 if u_of_edge not in self: 

524 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

525 if v_of_edge not in self: 

526 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

527 super().add_edge(u_of_edge, v_of_edge, **attr) 

528 

529 def get_job(self, job_name: str): 

530 """Retrieve job by name from workflow. 

531 

532 Parameters 

533 ---------- 

534 job_name : `str` 

535 Name of job to retrieve. 

536 

537 Returns 

538 ------- 

539 job : `lsst.ctrl.bps.GenericWorkflowJob` 

540 Job matching given job_name. 

541 """ 

542 return self.nodes[job_name]["job"] 

543 

544 def del_job(self, job_name: str): 

545 """Delete job from generic workflow leaving connected graph. 

546 

547 Parameters 

548 ---------- 

549 job_name : `str` 

550 Name of job to delete from workflow. 

551 """ 

552 # Connect all parent jobs to all children jobs. 

553 parents = self.predecessors(job_name) 

554 children = self.successors(job_name) 

555 self.add_job_relationships(parents, children) 

556 

557 # Delete job node (which deleted edges). 

558 self.remove_node(job_name) 

559 

560 def add_job_inputs(self, job_name, files): 

561 """Add files as inputs to specified job. 

562 

563 Parameters 

564 ---------- 

565 job_name : `str` 

566 Name of job to which inputs should be added 

567 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

568 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

569 File object(s) to be added as inputs to the specified job. 

570 """ 

571 self._inputs.setdefault(job_name, []) 

572 for file in ensure_iterable(files): 

573 # Save the central copy 

574 if file.name not in self._files: 

575 self._files[file.name] = file 

576 

577 # Save the job reference to the file 

578 self._inputs[job_name].append(file) 

579 

580 def get_file(self, name): 

581 """Retrieve a file object by name. 

582 

583 Parameters 

584 ---------- 

585 name : `str` 

586 Name of file object 

587 

588 Returns 

589 ------- 

590 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

591 File matching given name. 

592 """ 

593 return self._files[name] 

594 

595 def add_file(self, gwfile): 

596 """Add file object. 

597 

598 Parameters 

599 ---------- 

600 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

601 File object to add to workflow 

602 """ 

603 if gwfile.name not in self._files: 

604 self._files[gwfile.name] = gwfile 

605 else: 

606 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

607 

608 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

609 """Return the input files for the given job. 

610 

611 Parameters 

612 ---------- 

613 job_name : `str` 

614 Name of the job. 

615 data : `bool`, optional 

616 Whether to return the file data as well as the file object name. 

617 transfer_only : `bool`, optional 

618 Whether to only return files for which a workflow management system 

619 would be responsible for transferring. 

620 

621 Returns 

622 ------- 

623 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

624 Input files for the given job. If no input files for the job, 

625 returns an empty list. 

626 """ 

627 inputs = [] 

628 if job_name in self._inputs: 

629 for gwfile in self._inputs[job_name]: 

630 if not transfer_only or gwfile.wms_transfer: 

631 if not data: 

632 inputs.append(gwfile.name) 

633 else: 

634 inputs.append(gwfile) 

635 return inputs 

636 

637 def add_job_outputs(self, job_name, files): 

638 """Add output files to a job. 

639 

640 Parameters 

641 ---------- 

642 job_name : `str` 

643 Name of job to which the files should be added as outputs. 

644 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

645 File objects to be added as outputs for specified job. 

646 """ 

647 self._outputs.setdefault(job_name, []) 

648 

649 for file_ in ensure_iterable(files): 

650 # Save the central copy 

651 if file_.name not in self._files: 

652 self._files[file_.name] = file_ 

653 

654 # Save the job reference to the file 

655 self._outputs[job_name].append(file_) 

656 

657 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

658 """Return the output files for the given job. 

659 

660 Parameters 

661 ---------- 

662 job_name : `str` 

663 Name of the job. 

664 data : `bool` 

665 Whether to return the file data as well as the file object name. 

666 It defaults to `True` thus returning file data as well. 

667 transfer_only : `bool` 

668 Whether to only return files for which a workflow management system 

669 would be responsible for transferring. It defaults to `False` thus 

670 returning all output files. 

671 

672 Returns 

673 ------- 

674 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

675 Output files for the given job. If no output files for the job, 

676 returns an empty list. 

677 """ 

678 outputs = [] 

679 

680 if job_name in self._outputs: 

681 for file_name in self._outputs[job_name]: 

682 file = self._files[file_name] 

683 if not transfer_only or file.wms_transfer: 

684 if not data: 

685 outputs.append(file_name) 

686 else: 

687 outputs.append(self._files[file_name]) 

688 return outputs 

689 

690 def draw(self, stream, format_="dot"): 

691 """Output generic workflow in a visualization format. 

692 

693 Parameters 

694 ---------- 

695 stream : `str` or `io.BufferedIOBase` 

696 Stream to which the visualization should be written. 

697 format_ : `str`, optional 

698 Which visualization format to use. It defaults to the format for 

699 the dot program. 

700 """ 

701 draw_funcs = {"dot": draw_networkx_dot} 

702 if format_ in draw_funcs: 

703 draw_funcs[format_](self, stream) 

704 else: 

705 raise RuntimeError(f"Unknown draw format ({format_}") 

706 

707 def save(self, stream, format_="pickle"): 

708 """Save the generic workflow in a format that is loadable. 

709 

710 Parameters 

711 ---------- 

712 stream : `str` or `io.BufferedIOBase` 

713 Stream to pass to the format-specific writer. Accepts anything 

714 that the writer accepts. 

715 

716 format_ : `str`, optional 

717 Format in which to write the data. It defaults to pickle format. 

718 """ 

719 if format_ == "pickle": 

720 pickle.dump(self, stream) 

721 else: 

722 raise RuntimeError(f"Unknown format ({format_})") 

723 

724 @classmethod 

725 def load(cls, stream, format_="pickle"): 

726 """Load a GenericWorkflow from the given stream 

727 

728 Parameters 

729 ---------- 

730 stream : `str` or `io.BufferedIOBase` 

731 Stream to pass to the format-specific loader. Accepts anything that 

732 the loader accepts. 

733 format_ : `str`, optional 

734 Format of data to expect when loading from stream. It defaults 

735 to pickle format. 

736 

737 Returns 

738 ------- 

739 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

740 Generic workflow loaded from the given stream 

741 """ 

742 if format_ == "pickle": 

743 return pickle.load(stream) 

744 

745 raise RuntimeError(f"Unknown format ({format_})") 

746 

747 def validate(self): 

748 """Run checks to ensure that the generic workflow graph is valid.""" 

749 # Make sure a directed acyclic graph 

750 assert is_directed_acyclic_graph(self) 

751 

752 def add_workflow_source(self, workflow): 

753 """Add given workflow as new source to this workflow. 

754 

755 Parameters 

756 ---------- 

757 workflow : `lsst.ctrl.bps.GenericWorkflow` 

758 """ 

759 # Find source nodes in self. 

760 self_sources = [n for n in self if self.in_degree(n) == 0] 

761 _LOG.debug("self_sources = %s", self_sources) 

762 

763 # Find sink nodes of workflow. 

764 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

765 _LOG.debug("new sinks = %s", new_sinks) 

766 

767 # Add new workflow nodes to self graph and make new edges. 

768 self.add_nodes_from(workflow.nodes(data=True)) 

769 self.add_edges_from(workflow.edges()) 

770 for source in self_sources: 

771 for sink in new_sinks: 

772 self.add_edge(sink, source) 

773 

774 # Files are stored separately so copy them. 

775 for job_name in workflow: 

776 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

777 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

778 self.add_executable(workflow.get_job(job_name).executable) 

779 

780 def add_final(self, final): 

781 """Add special final job/workflow to the generic workflow. 

782 

783 Parameters 

784 ---------- 

785 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

786 `lsst.ctrl.bps.GenericWorkflow` 

787 Information needed to execute the special final job(s), the 

788 job(s) to be executed after all jobs that can be executed 

789 have been executed regardless of exit status of any of the 

790 jobs. 

791 """ 

792 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

793 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

794 

795 self._final = final 

796 if isinstance(final, GenericWorkflowJob): 

797 self.add_executable(final.executable) 

798 

799 def get_final(self): 

800 """Return job/workflow to be executed after all jobs that can be 

801 executed have been executed regardless of exit status of any of 

802 the jobs. 

803 

804 Returns 

805 ------- 

806 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

807 `lsst.ctrl.bps.GenericWorkflow` 

808 Information needed to execute final job(s). 

809 """ 

810 return self._final 

811 

812 def add_executable(self, executable): 

813 """Add executable to workflow's list of executables. 

814 

815 Parameters 

816 ---------- 

817 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

818 Executable object to be added to workflow. 

819 """ 

820 if executable is not None: 

821 self._executables[executable.name] = executable 

822 else: 

823 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

824 

825 def get_executables(self, data=False, transfer_only=True): 

826 """Retrieve executables from generic workflow. 

827 

828 Parameters 

829 ---------- 

830 data : `bool`, optional 

831 Whether to return the executable data as well as the exec object 

832 name. (The defaults is False.) 

833 transfer_only : `bool`, optional 

834 Whether to only return executables for which transfer_executable 

835 is True. 

836 

837 Returns 

838 ------- 

839 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

840 Filtered executable names or objects from generic workflow. 

841 """ 

842 execs = [] 

843 for name, executable in self._executables.items(): 

844 if not transfer_only or executable.transfer_executable: 

845 if not data: 

846 execs.append(name) 

847 else: 

848 execs.append(executable) 

849 return execs