Coverage for python/lsst/ctrl/bps/generic_workflow.py: 37%

316 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-31 10:45 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31from collections import Counter 

32from typing import Optional 

33 

34from lsst.utils.iteration import ensure_iterable 

35from networkx import DiGraph, read_gpickle, topological_sort, write_gpickle 

36from networkx.algorithms.dag import is_directed_acyclic_graph 

37 

38from .bps_draw import draw_networkx_dot 

39 

40_LOG = logging.getLogger(__name__) 

41 

42 

43@dataclasses.dataclass 

44class GenericWorkflowFile: 

45 """Information about a file that may be needed by various workflow 

46 management services. 

47 """ 

48 

49 name: str 

50 """Lookup key (logical file name) of file/directory. Must be unique 

51 within run. 

52 """ 

53 

54 src_uri: str or None # don't know that need ResourcePath 

55 """Original location of file/directory. 

56 """ 

57 

58 wms_transfer: bool 

59 """Whether the WMS should ignore file or not. Default is False. 

60 """ 

61 

62 job_access_remote: bool 

63 """Whether the job can remotely access file (using separately specified 

64 file access protocols). Default is False. 

65 """ 

66 

67 job_shared: bool 

68 """Whether job requires its own copy of this file. Default is False. 

69 """ 

70 

71 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

72 # values, so writing own __init__. 

73 def __init__( 

74 self, 

75 name: str, 

76 src_uri: str = None, 

77 wms_transfer: bool = False, 

78 job_access_remote: bool = False, 

79 job_shared: bool = False, 

80 ): 

81 self.name = name 

82 self.src_uri = src_uri 

83 self.wms_transfer = wms_transfer 

84 self.job_access_remote = job_access_remote 

85 self.job_shared = job_shared 

86 

87 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

88 

89 def __hash__(self): 

90 return hash(self.name) 

91 

92 

93@dataclasses.dataclass 

94class GenericWorkflowExec: 

95 """Information about an executable that may be needed by various workflow 

96 management services. 

97 """ 

98 

99 name: str 

100 """Lookup key (logical file name) of executable. Must be unique 

101 within run. 

102 """ 

103 

104 src_uri: str or None # don't know that need ResourcePath 

105 """Original location of executable. 

106 """ 

107 

108 transfer_executable: bool 

109 """Whether the WMS/plugin is responsible for staging executable to 

110 location usable by job. 

111 """ 

112 

113 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

114 # values, so writing own __init__. 

115 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

116 self.name = name 

117 self.src_uri = src_uri 

118 self.transfer_executable = transfer_executable 

119 

120 __slots__ = ("name", "src_uri", "transfer_executable") 

121 

122 def __hash__(self): 

123 return hash(self.name) 

124 

125 

126@dataclasses.dataclass 

127class GenericWorkflowJob: 

128 """Information about a job that may be needed by various workflow 

129 management services. 

130 """ 

131 

132 name: str 

133 """Name of job. Must be unique within workflow. 

134 """ 

135 

136 label: Optional[str] 

137 """Primary user-facing label for job. Does not need to be unique 

138 and may be used for summary reports. 

139 """ 

140 

141 quanta_counts: Optional[Counter] 

142 """Counts of quanta per task label in job. 

143 """ 

144 

145 tags: Optional[dict] 

146 """Other key/value pairs for job that user may want to use as a filter. 

147 """ 

148 

149 executable: Optional[GenericWorkflowExec] 

150 """Executable for job. 

151 """ 

152 

153 arguments: Optional[str] 

154 """Command line arguments for job. 

155 """ 

156 

157 cmdvals: Optional[dict] 

158 """Values for variables in cmdline when using lazy command line creation. 

159 """ 

160 

161 memory_multiplier: Optional[float] 

162 """Memory growth rate between retries. 

163 """ 

164 

165 request_memory: Optional[int] # MB 

166 """Max memory (in MB) that the job is expected to need. 

167 """ 

168 

169 request_memory_max: Optional[int] # MB 

170 """Max memory (in MB) that the job should ever use. 

171 """ 

172 

173 request_cpus: Optional[int] # cores 

174 """Max number of cpus that the job is expected to need. 

175 """ 

176 

177 request_disk: Optional[int] # MB 

178 """Max amount of job scratch disk (in MB) that the job is expected to need. 

179 """ 

180 

181 request_walltime: Optional[str] # minutes 

182 """Max amount of time (in seconds) that the job is expected to need. 

183 """ 

184 

185 compute_site: Optional[str] 

186 """Key to look up site-specific information for running the job. 

187 """ 

188 

189 accounting_group: Optional[str] 

190 """Name of the accounting group to use. 

191 """ 

192 

193 accounting_user: Optional[str] 

194 """Name of the user to use for accounting purposes. 

195 """ 

196 

197 mail_to: Optional[str] 

198 """Comma separated list of email addresses for emailing job status. 

199 """ 

200 

201 when_to_mail: Optional[str] 

202 """WMS-specific terminology for when to email job status. 

203 """ 

204 

205 number_of_retries: Optional[int] 

206 """Number of times to automatically retry a failed job. 

207 """ 

208 

209 retry_unless_exit: Optional[int] 

210 """Exit code for job that means to not automatically retry. 

211 """ 

212 

213 abort_on_value: Optional[int] 

214 """Job exit value for signals to abort the entire workflow. 

215 """ 

216 

217 abort_return_value: Optional[int] 

218 """Exit value to use when aborting the entire workflow. 

219 """ 

220 

221 priority: Optional[str] 

222 """Initial priority of job in WMS-format. 

223 """ 

224 

225 category: Optional[str] 

226 """WMS-facing label of job within single workflow (e.g., can be used for 

227 throttling jobs within a single workflow). 

228 """ 

229 

230 concurrency_limit: Optional[str] 

231 """Names of concurrency limits that the WMS plugin can appropriately 

232 translate to limit the number of this job across all running workflows. 

233 """ 

234 

235 queue: Optional[str] 

236 """Name of queue to use. Different WMS can translate this concept 

237 differently. 

238 """ 

239 

240 pre_cmdline: Optional[str] 

241 """Command line to be executed prior to executing job. 

242 """ 

243 

244 post_cmdline: Optional[str] 

245 """Command line to be executed after job executes. 

246 

247 Should be executed regardless of exit status. 

248 """ 

249 

250 preemptible: Optional[bool] 

251 """The flag indicating whether the job can be preempted. 

252 """ 

253 

254 profile: Optional[dict] 

255 """Nested dictionary of WMS-specific key/value pairs with primary key being 

256 WMS key (e.g., pegasus, condor, panda). 

257 """ 

258 

259 attrs: Optional[dict] 

260 """Key/value pairs of job attributes (for WMS that have attributes in 

261 addition to commands). 

262 """ 

263 

264 environment: Optional[dict] 

265 """Environment variable names and values to be explicitly set inside job. 

266 """ 

267 

268 compute_cloud: Optional[str] 

269 """Key to look up cloud-specific information for running the job. 

270 """ 

271 

272 # As of python 3.7.8, can't use __slots__ if give default values, so 

273 # writing own __init__. 

274 def __init__(self, name: str): 

275 self.name = name 

276 self.label = None 

277 self.quanta_counts = Counter() 

278 self.tags = {} 

279 self.executable = None 

280 self.arguments = None 

281 self.cmdvals = {} 

282 self.memory_multiplier = None 

283 self.request_memory = None 

284 self.request_memory_max = None 

285 self.request_cpus = None 

286 self.request_disk = None 

287 self.request_walltime = None 

288 self.compute_site = None 

289 self.accounting_group = None 

290 self.accounting_user = None 

291 self.mail_to = None 

292 self.when_to_mail = None 

293 self.number_of_retries = None 

294 self.retry_unless_exit = None 

295 self.abort_on_value = None 

296 self.abort_return_value = None 

297 self.priority = None 

298 self.category = None 

299 self.concurrency_limit = None 

300 self.queue = None 

301 self.pre_cmdline = None 

302 self.post_cmdline = None 

303 self.preemptible = None 

304 self.profile = {} 

305 self.attrs = {} 

306 self.environment = {} 

307 self.compute_cloud = None 

308 

309 __slots__ = ( 

310 "name", 

311 "label", 

312 "quanta_counts", 

313 "tags", 

314 "mail_to", 

315 "when_to_mail", 

316 "executable", 

317 "arguments", 

318 "cmdvals", 

319 "memory_multiplier", 

320 "request_memory", 

321 "request_memory_max", 

322 "request_cpus", 

323 "request_disk", 

324 "request_walltime", 

325 "number_of_retries", 

326 "retry_unless_exit", 

327 "abort_on_value", 

328 "abort_return_value", 

329 "compute_site", 

330 "accounting_group", 

331 "accounting_user", 

332 "environment", 

333 "priority", 

334 "category", 

335 "concurrency_limit", 

336 "queue", 

337 "pre_cmdline", 

338 "post_cmdline", 

339 "preemptible", 

340 "profile", 

341 "attrs", 

342 "compute_cloud", 

343 ) 

344 

345 def __hash__(self): 

346 return hash(self.name) 

347 

348 

349class GenericWorkflow(DiGraph): 

350 """A generic representation of a workflow used to submit to specific 

351 workflow management systems. 

352 

353 Parameters 

354 ---------- 

355 name : `str` 

356 Name of generic workflow. 

357 incoming_graph_data : `Any`, optional 

358 Data used to initialized graph that is passed through to DiGraph 

359 constructor. Can be any type supported by networkx.DiGraph. 

360 attr : `dict` 

361 Keyword arguments passed through to DiGraph constructor. 

362 """ 

363 

364 def __init__(self, name, incoming_graph_data=None, **attr): 

365 super().__init__(incoming_graph_data, **attr) 

366 self._name = name 

367 self.run_attrs = {} 

368 self._files = {} 

369 self._executables = {} 

370 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

371 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

372 self.run_id = None 

373 self._final = None 

374 

375 @property 

376 def name(self): 

377 """Retrieve name of generic workflow. 

378 

379 Returns 

380 ------- 

381 name : `str` 

382 Name of generic workflow. 

383 """ 

384 return self._name 

385 

386 @property 

387 def quanta_counts(self): 

388 """Count of quanta per task label (`collections.Counter`).""" 

389 qcounts = Counter() 

390 for job_name in self: 

391 gwjob = self.get_job(job_name) 

392 if gwjob.quanta_counts is not None: 

393 qcounts += gwjob.quanta_counts 

394 return qcounts 

395 

396 @property 

397 def job_counts(self): 

398 """Count of jobs per job label (`collections.Counter`).""" 

399 jcounts = Counter() 

400 for job_name in self: 

401 gwjob = self.get_job(job_name) 

402 jcounts[gwjob.label] += 1 

403 

404 # Final is separate 

405 final = self.get_final() 

406 if final: 

407 if isinstance(final, GenericWorkflow): 

408 for job_name in final: 

409 gwjob = final.get_job(job_name) 

410 jcounts[gwjob.label] += 1 

411 else: 

412 jcounts[final.label] += 1 

413 

414 return jcounts 

415 

416 def __iter__(self): 

417 """Return iterator of job names in topologically sorted order.""" 

418 return topological_sort(self) 

419 

420 def get_files(self, data=False, transfer_only=True): 

421 """Retrieve files from generic workflow. 

422 

423 Need API in case change way files are stored (e.g., make 

424 workflow a bipartite graph with jobs and files nodes). 

425 

426 Parameters 

427 ---------- 

428 data : `bool`, optional 

429 Whether to return the file data as well as the file object name. 

430 (The defaults is False.) 

431 transfer_only : `bool`, optional 

432 Whether to only return files for which a workflow management system 

433 would be responsible for transferring. 

434 

435 Returns 

436 ------- 

437 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

438 File names or objects from generic workflow meeting specifications. 

439 """ 

440 files = [] 

441 for filename, file in self._files.items(): 

442 if not transfer_only or file.wms_transfer: 

443 if not data: 

444 files.append(filename) 

445 else: 

446 files.append(file) 

447 return files 

448 

449 def add_job(self, job, parent_names=None, child_names=None): 

450 """Add job to generic workflow. 

451 

452 Parameters 

453 ---------- 

454 job : `lsst.ctrl.bps.GenericWorkflowJob` 

455 Job to add to the generic workflow. 

456 parent_names : `list` [`str`], optional 

457 Names of jobs that are parents of given job 

458 child_names : `list` [`str`], optional 

459 Names of jobs that are children of given job 

460 """ 

461 if not isinstance(job, GenericWorkflowJob): 

462 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

463 if self.has_node(job.name): 

464 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

465 super().add_node(job.name, job=job) 

466 self.add_job_relationships(parent_names, job.name) 

467 self.add_job_relationships(job.name, child_names) 

468 self.add_executable(job.executable) 

469 

470 def add_node(self, node_for_adding, **attr): 

471 """Override networkx function to call more specific add_job function. 

472 

473 Parameters 

474 ---------- 

475 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

476 Job to be added to generic workflow. 

477 attr : 

478 Needed to match original networkx function, but not used. 

479 """ 

480 self.add_job(node_for_adding) 

481 

482 def add_job_relationships(self, parents, children): 

483 """Add dependencies between parent and child jobs. All parents will 

484 be connected to all children. 

485 

486 Parameters 

487 ---------- 

488 parents : `list` [`str`] 

489 Parent job names. 

490 children : `list` [`str`] 

491 Children job names. 

492 """ 

493 if parents is not None and children is not None: 

494 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

495 

496 def add_edges_from(self, ebunch_to_add, **attr): 

497 """Add several edges between jobs in the generic workflow. 

498 

499 Parameters 

500 ---------- 

501 ebunch_to_add : Iterable [`tuple`] 

502 Iterable of job name pairs between which a dependency should be 

503 saved. 

504 attr : keyword arguments, optional 

505 Data can be assigned using keyword arguments (not currently used). 

506 """ 

507 for edge_to_add in ebunch_to_add: 

508 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

509 

510 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

511 """Add edge connecting jobs in workflow. 

512 

513 Parameters 

514 ---------- 

515 u_of_edge : `str` 

516 Name of parent job. 

517 v_of_edge : `str` 

518 Name of child job. 

519 attr : keyword arguments, optional 

520 Attributes to save with edge. 

521 """ 

522 if u_of_edge not in self: 

523 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

524 if v_of_edge not in self: 

525 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

526 super().add_edge(u_of_edge, v_of_edge, **attr) 

527 

528 def get_job(self, job_name: str): 

529 """Retrieve job by name from workflow. 

530 

531 Parameters 

532 ---------- 

533 job_name : `str` 

534 Name of job to retrieve. 

535 

536 Returns 

537 ------- 

538 job : `lsst.ctrl.bps.GenericWorkflowJob` 

539 Job matching given job_name. 

540 """ 

541 return self.nodes[job_name]["job"] 

542 

543 def del_job(self, job_name: str): 

544 """Delete job from generic workflow leaving connected graph. 

545 

546 Parameters 

547 ---------- 

548 job_name : `str` 

549 Name of job to delete from workflow. 

550 """ 

551 # Connect all parent jobs to all children jobs. 

552 parents = self.predecessors(job_name) 

553 children = self.successors(job_name) 

554 self.add_job_relationships(parents, children) 

555 

556 # Delete job node (which deleted edges). 

557 self.remove_node(job_name) 

558 

559 def add_job_inputs(self, job_name, files): 

560 """Add files as inputs to specified job. 

561 

562 Parameters 

563 ---------- 

564 job_name : `str` 

565 Name of job to which inputs should be added 

566 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

567 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

568 File object(s) to be added as inputs to the specified job. 

569 """ 

570 self._inputs.setdefault(job_name, []) 

571 for file in ensure_iterable(files): 

572 # Save the central copy 

573 if file.name not in self._files: 

574 self._files[file.name] = file 

575 

576 # Save the job reference to the file 

577 self._inputs[job_name].append(file) 

578 

579 def get_file(self, name): 

580 """Retrieve a file object by name. 

581 

582 Parameters 

583 ---------- 

584 name : `str` 

585 Name of file object 

586 

587 Returns 

588 ------- 

589 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

590 File matching given name. 

591 """ 

592 return self._files[name] 

593 

594 def add_file(self, gwfile): 

595 """Add file object. 

596 

597 Parameters 

598 ---------- 

599 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

600 File object to add to workflow 

601 """ 

602 if gwfile.name not in self._files: 

603 self._files[gwfile.name] = gwfile 

604 else: 

605 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

606 

607 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

608 """Return the input files for the given job. 

609 

610 Parameters 

611 ---------- 

612 job_name : `str` 

613 Name of the job. 

614 data : `bool`, optional 

615 Whether to return the file data as well as the file object name. 

616 transfer_only : `bool`, optional 

617 Whether to only return files for which a workflow management system 

618 would be responsible for transferring. 

619 

620 Returns 

621 ------- 

622 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

623 Input files for the given job. If no input files for the job, 

624 returns an empty list. 

625 """ 

626 inputs = [] 

627 if job_name in self._inputs: 

628 for gwfile in self._inputs[job_name]: 

629 if not transfer_only or gwfile.wms_transfer: 

630 if not data: 

631 inputs.append(gwfile.name) 

632 else: 

633 inputs.append(gwfile) 

634 return inputs 

635 

636 def add_job_outputs(self, job_name, files): 

637 """Add output files to a job. 

638 

639 Parameters 

640 ---------- 

641 job_name : `str` 

642 Name of job to which the files should be added as outputs. 

643 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

644 File objects to be added as outputs for specified job. 

645 """ 

646 self._outputs.setdefault(job_name, []) 

647 

648 for file_ in ensure_iterable(files): 

649 # Save the central copy 

650 if file_.name not in self._files: 

651 self._files[file_.name] = file_ 

652 

653 # Save the job reference to the file 

654 self._outputs[job_name].append(file_) 

655 

656 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

657 """Return the output files for the given job. 

658 

659 Parameters 

660 ---------- 

661 job_name : `str` 

662 Name of the job. 

663 data : `bool` 

664 Whether to return the file data as well as the file object name. 

665 It defaults to `True` thus returning file data as well. 

666 transfer_only : `bool` 

667 Whether to only return files for which a workflow management system 

668 would be responsible for transferring. It defaults to `False` thus 

669 returning all output files. 

670 

671 Returns 

672 ------- 

673 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

674 Output files for the given job. If no output files for the job, 

675 returns an empty list. 

676 """ 

677 outputs = [] 

678 

679 if job_name in self._outputs: 

680 for file_name in self._outputs[job_name]: 

681 file = self._files[file_name] 

682 if not transfer_only or file.wms_transfer: 

683 if not data: 

684 outputs.append(file_name) 

685 else: 

686 outputs.append(self._files[file_name]) 

687 return outputs 

688 

689 def draw(self, stream, format_="dot"): 

690 """Output generic workflow in a visualization format. 

691 

692 Parameters 

693 ---------- 

694 stream : `str` or `io.BufferedIOBase` 

695 Stream to which the visualization should be written. 

696 format_ : `str`, optional 

697 Which visualization format to use. It defaults to the format for 

698 the dot program. 

699 """ 

700 draw_funcs = {"dot": draw_networkx_dot} 

701 if format_ in draw_funcs: 

702 draw_funcs[format_](self, stream) 

703 else: 

704 raise RuntimeError(f"Unknown draw format ({format_}") 

705 

706 def save(self, stream, format_="pickle"): 

707 """Save the generic workflow in a format that is loadable. 

708 

709 Parameters 

710 ---------- 

711 stream : `str` or `io.BufferedIOBase` 

712 Stream to pass to the format-specific writer. Accepts anything 

713 that the writer accepts. 

714 

715 format_ : `str`, optional 

716 Format in which to write the data. It defaults to pickle format. 

717 """ 

718 if format_ == "pickle": 

719 write_gpickle(self, stream) 

720 else: 

721 raise RuntimeError(f"Unknown format ({format_})") 

722 

723 @classmethod 

724 def load(cls, stream, format_="pickle"): 

725 """Load a GenericWorkflow from the given stream 

726 

727 Parameters 

728 ---------- 

729 stream : `str` or `io.BufferedIOBase` 

730 Stream to pass to the format-specific loader. Accepts anything that 

731 the loader accepts. 

732 format_ : `str`, optional 

733 Format of data to expect when loading from stream. It defaults 

734 to pickle format. 

735 

736 Returns 

737 ------- 

738 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

739 Generic workflow loaded from the given stream 

740 """ 

741 if format_ == "pickle": 

742 return read_gpickle(stream) 

743 

744 raise RuntimeError(f"Unknown format ({format_})") 

745 

746 def validate(self): 

747 """Run checks to ensure that the generic workflow graph is valid.""" 

748 # Make sure a directed acyclic graph 

749 assert is_directed_acyclic_graph(self) 

750 

751 def add_workflow_source(self, workflow): 

752 """Add given workflow as new source to this workflow. 

753 

754 Parameters 

755 ---------- 

756 workflow : `lsst.ctrl.bps.GenericWorkflow` 

757 """ 

758 # Find source nodes in self. 

759 self_sources = [n for n in self if self.in_degree(n) == 0] 

760 _LOG.debug("self_sources = %s", self_sources) 

761 

762 # Find sink nodes of workflow. 

763 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

764 _LOG.debug("new sinks = %s", new_sinks) 

765 

766 # Add new workflow nodes to self graph and make new edges. 

767 self.add_nodes_from(workflow.nodes(data=True)) 

768 self.add_edges_from(workflow.edges()) 

769 for source in self_sources: 

770 for sink in new_sinks: 

771 self.add_edge(sink, source) 

772 

773 # Files are stored separately so copy them. 

774 for job_name in workflow: 

775 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

776 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

777 self.add_executable(workflow.get_job(job_name).executable) 

778 

779 def add_final(self, final): 

780 """Add special final job/workflow to the generic workflow. 

781 

782 Parameters 

783 ---------- 

784 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

785 `lsst.ctrl.bps.GenericWorkflow` 

786 Information needed to execute the special final job(s), the 

787 job(s) to be executed after all jobs that can be executed 

788 have been executed regardless of exit status of any of the 

789 jobs. 

790 """ 

791 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

792 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

793 

794 self._final = final 

795 if isinstance(final, GenericWorkflowJob): 

796 self.add_executable(final.executable) 

797 

798 def get_final(self): 

799 """Return job/workflow to be executed after all jobs that can be 

800 executed have been executed regardless of exit status of any of 

801 the jobs. 

802 

803 Returns 

804 ------- 

805 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

806 `lsst.ctrl.bps.GenericWorkflow` 

807 Information needed to execute final job(s). 

808 """ 

809 return self._final 

810 

811 def add_executable(self, executable): 

812 """Add executable to workflow's list of executables. 

813 

814 Parameters 

815 ---------- 

816 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

817 Executable object to be added to workflow. 

818 """ 

819 if executable is not None: 

820 self._executables[executable.name] = executable 

821 else: 

822 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

823 

824 def get_executables(self, data=False, transfer_only=True): 

825 """Retrieve executables from generic workflow. 

826 

827 Parameters 

828 ---------- 

829 data : `bool`, optional 

830 Whether to return the executable data as well as the exec object 

831 name. (The defaults is False.) 

832 transfer_only : `bool`, optional 

833 Whether to only return executables for which transfer_executable 

834 is True. 

835 

836 Returns 

837 ------- 

838 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

839 Filtered executable names or objects from generic workflow. 

840 """ 

841 execs = [] 

842 for name, executable in self._executables.items(): 

843 if not transfer_only or executable.transfer_executable: 

844 if not data: 

845 execs.append(name) 

846 else: 

847 execs.append(executable) 

848 return execs