Coverage for python/lsst/ctrl/bps/generic_workflow.py: 37%

313 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-26 09:58 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31from collections import Counter 

32from typing import Optional 

33 

34from lsst.utils.iteration import ensure_iterable 

35from networkx import DiGraph, read_gpickle, topological_sort, write_gpickle 

36from networkx.algorithms.dag import is_directed_acyclic_graph 

37 

38from .bps_draw import draw_networkx_dot 

39 

40_LOG = logging.getLogger(__name__) 

41 

42 

43@dataclasses.dataclass 

44class GenericWorkflowFile: 

45 """Information about a file that may be needed by various workflow 

46 management services. 

47 """ 

48 

49 name: str 

50 """Lookup key (logical file name) of file/directory. Must be unique 

51 within run. 

52 """ 

53 

54 src_uri: str or None # don't know that need ResourcePath 

55 """Original location of file/directory. 

56 """ 

57 

58 wms_transfer: bool 

59 """Whether the WMS should ignore file or not. Default is False. 

60 """ 

61 

62 job_access_remote: bool 

63 """Whether the job can remotely access file (using separately specified 

64 file access protocols). Default is False. 

65 """ 

66 

67 job_shared: bool 

68 """Whether job requires its own copy of this file. Default is False. 

69 """ 

70 

71 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

72 # values, so writing own __init__. 

73 def __init__( 

74 self, 

75 name: str, 

76 src_uri: str = None, 

77 wms_transfer: bool = False, 

78 job_access_remote: bool = False, 

79 job_shared: bool = False, 

80 ): 

81 self.name = name 

82 self.src_uri = src_uri 

83 self.wms_transfer = wms_transfer 

84 self.job_access_remote = job_access_remote 

85 self.job_shared = job_shared 

86 

87 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

88 

89 def __hash__(self): 

90 return hash(self.name) 

91 

92 

93@dataclasses.dataclass 

94class GenericWorkflowExec: 

95 """Information about an executable that may be needed by various workflow 

96 management services. 

97 """ 

98 

99 name: str 

100 """Lookup key (logical file name) of executable. Must be unique 

101 within run. 

102 """ 

103 

104 src_uri: str or None # don't know that need ResourcePath 

105 """Original location of executable. 

106 """ 

107 

108 transfer_executable: bool 

109 """Whether the WMS/plugin is responsible for staging executable to 

110 location usable by job. 

111 """ 

112 

113 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

114 # values, so writing own __init__. 

115 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

116 self.name = name 

117 self.src_uri = src_uri 

118 self.transfer_executable = transfer_executable 

119 

120 __slots__ = ("name", "src_uri", "transfer_executable") 

121 

122 def __hash__(self): 

123 return hash(self.name) 

124 

125 

126@dataclasses.dataclass 

127class GenericWorkflowJob: 

128 """Information about a job that may be needed by various workflow 

129 management services. 

130 """ 

131 

132 name: str 

133 """Name of job. Must be unique within workflow. 

134 """ 

135 

136 label: Optional[str] 

137 """Primary user-facing label for job. Does not need to be unique 

138 and may be used for summary reports. 

139 """ 

140 

141 quanta_counts: Optional[Counter] 

142 """Counts of quanta per task label in job. 

143 """ 

144 

145 tags: Optional[dict] 

146 """Other key/value pairs for job that user may want to use as a filter. 

147 """ 

148 

149 executable: Optional[GenericWorkflowExec] 

150 """Executable for job. 

151 """ 

152 

153 arguments: Optional[str] 

154 """Command line arguments for job. 

155 """ 

156 

157 cmdvals: Optional[dict] 

158 """Values for variables in cmdline when using lazy command line creation. 

159 """ 

160 

161 memory_multiplier: Optional[float] 

162 """Memory growth rate between retries. 

163 """ 

164 

165 request_memory: Optional[int] # MB 

166 """Max memory (in MB) that the job is expected to need. 

167 """ 

168 

169 request_memory_max: Optional[int] # MB 

170 """Max memory (in MB) that the job should ever use. 

171 """ 

172 

173 request_cpus: Optional[int] # cores 

174 """Max number of cpus that the job is expected to need. 

175 """ 

176 

177 request_disk: Optional[int] # MB 

178 """Max amount of job scratch disk (in MB) that the job is expected to need. 

179 """ 

180 

181 request_walltime: Optional[str] # minutes 

182 """Max amount of time (in seconds) that the job is expected to need. 

183 """ 

184 

185 compute_site: Optional[str] 

186 """Key to look up site-specific information for running the job. 

187 """ 

188 

189 accounting_group: Optional[str] 

190 """Name of the accounting group to use. 

191 """ 

192 

193 accounting_user: Optional[str] 

194 """Name of the user to use for accounting purposes. 

195 """ 

196 

197 mail_to: Optional[str] 

198 """Comma separated list of email addresses for emailing job status. 

199 """ 

200 

201 when_to_mail: Optional[str] 

202 """WMS-specific terminology for when to email job status. 

203 """ 

204 

205 number_of_retries: Optional[int] 

206 """Number of times to automatically retry a failed job. 

207 """ 

208 

209 retry_unless_exit: Optional[int] 

210 """Exit code for job that means to not automatically retry. 

211 """ 

212 

213 abort_on_value: Optional[int] 

214 """Job exit value for signals to abort the entire workflow. 

215 """ 

216 

217 abort_return_value: Optional[int] 

218 """Exit value to use when aborting the entire workflow. 

219 """ 

220 

221 priority: Optional[str] 

222 """Initial priority of job in WMS-format. 

223 """ 

224 

225 category: Optional[str] 

226 """WMS-facing label of job within single workflow (e.g., can be used for 

227 throttling jobs within a single workflow). 

228 """ 

229 

230 concurrency_limit: Optional[str] 

231 """Names of concurrency limits that the WMS plugin can appropriately 

232 translate to limit the number of this job across all running workflows. 

233 """ 

234 

235 queue: Optional[str] 

236 """Name of queue to use. Different WMS can translate this concept 

237 differently. 

238 """ 

239 

240 pre_cmdline: Optional[str] 

241 """Command line to be executed prior to executing job. 

242 """ 

243 

244 post_cmdline: Optional[str] 

245 """Command line to be executed after job executes. 

246 

247 Should be executed regardless of exit status. 

248 """ 

249 

250 preemptible: Optional[bool] 

251 """The flag indicating whether the job can be preempted. 

252 """ 

253 

254 profile: Optional[dict] 

255 """Nested dictionary of WMS-specific key/value pairs with primary key being 

256 WMS key (e.g., pegasus, condor, panda). 

257 """ 

258 

259 attrs: Optional[dict] 

260 """Key/value pairs of job attributes (for WMS that have attributes in 

261 addition to commands). 

262 """ 

263 

264 environment: Optional[dict] 

265 """Environment variable names and values to be explicitly set inside job. 

266 """ 

267 

268 # As of python 3.7.8, can't use __slots__ if give default values, so 

269 # writing own __init__. 

270 def __init__(self, name: str): 

271 self.name = name 

272 self.label = None 

273 self.quanta_counts = Counter() 

274 self.tags = {} 

275 self.executable = None 

276 self.arguments = None 

277 self.cmdvals = {} 

278 self.memory_multiplier = None 

279 self.request_memory = None 

280 self.request_memory_max = None 

281 self.request_cpus = None 

282 self.request_disk = None 

283 self.request_walltime = None 

284 self.compute_site = None 

285 self.accounting_group = None 

286 self.accounting_user = None 

287 self.mail_to = None 

288 self.when_to_mail = None 

289 self.number_of_retries = None 

290 self.retry_unless_exit = None 

291 self.abort_on_value = None 

292 self.abort_return_value = None 

293 self.priority = None 

294 self.category = None 

295 self.concurrency_limit = None 

296 self.queue = None 

297 self.pre_cmdline = None 

298 self.post_cmdline = None 

299 self.preemptible = None 

300 self.profile = {} 

301 self.attrs = {} 

302 self.environment = {} 

303 

304 __slots__ = ( 

305 "name", 

306 "label", 

307 "quanta_counts", 

308 "tags", 

309 "mail_to", 

310 "when_to_mail", 

311 "executable", 

312 "arguments", 

313 "cmdvals", 

314 "memory_multiplier", 

315 "request_memory", 

316 "request_memory_max", 

317 "request_cpus", 

318 "request_disk", 

319 "request_walltime", 

320 "number_of_retries", 

321 "retry_unless_exit", 

322 "abort_on_value", 

323 "abort_return_value", 

324 "compute_site", 

325 "accounting_group", 

326 "accounting_user", 

327 "environment", 

328 "priority", 

329 "category", 

330 "concurrency_limit", 

331 "queue", 

332 "pre_cmdline", 

333 "post_cmdline", 

334 "preemptible", 

335 "profile", 

336 "attrs", 

337 ) 

338 

339 def __hash__(self): 

340 return hash(self.name) 

341 

342 

343class GenericWorkflow(DiGraph): 

344 """A generic representation of a workflow used to submit to specific 

345 workflow management systems. 

346 

347 Parameters 

348 ---------- 

349 name : `str` 

350 Name of generic workflow. 

351 incoming_graph_data : `Any`, optional 

352 Data used to initialized graph that is passed through to DiGraph 

353 constructor. Can be any type supported by networkx.DiGraph. 

354 attr : `dict` 

355 Keyword arguments passed through to DiGraph constructor. 

356 """ 

357 

358 def __init__(self, name, incoming_graph_data=None, **attr): 

359 super().__init__(incoming_graph_data, **attr) 

360 self._name = name 

361 self.run_attrs = {} 

362 self._files = {} 

363 self._executables = {} 

364 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

365 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

366 self.run_id = None 

367 self._final = None 

368 

369 @property 

370 def name(self): 

371 """Retrieve name of generic workflow. 

372 

373 Returns 

374 ------- 

375 name : `str` 

376 Name of generic workflow. 

377 """ 

378 return self._name 

379 

380 @property 

381 def quanta_counts(self): 

382 """Count of quanta per task label (`collections.Counter`).""" 

383 qcounts = Counter() 

384 for job_name in self: 

385 gwjob = self.get_job(job_name) 

386 if gwjob.quanta_counts is not None: 

387 qcounts += gwjob.quanta_counts 

388 return qcounts 

389 

390 @property 

391 def job_counts(self): 

392 """Count of jobs per job label (`collections.Counter`).""" 

393 jcounts = Counter() 

394 for job_name in self: 

395 gwjob = self.get_job(job_name) 

396 jcounts[gwjob.label] += 1 

397 

398 # Final is separate 

399 final = self.get_final() 

400 if final: 

401 if isinstance(final, GenericWorkflow): 

402 for job_name in final: 

403 gwjob = final.get_job(job_name) 

404 jcounts[gwjob.label] += 1 

405 else: 

406 jcounts[final.label] += 1 

407 

408 return jcounts 

409 

410 def __iter__(self): 

411 """Return iterator of job names in topologically sorted order.""" 

412 return topological_sort(self) 

413 

414 def get_files(self, data=False, transfer_only=True): 

415 """Retrieve files from generic workflow. 

416 

417 Need API in case change way files are stored (e.g., make 

418 workflow a bipartite graph with jobs and files nodes). 

419 

420 Parameters 

421 ---------- 

422 data : `bool`, optional 

423 Whether to return the file data as well as the file object name. 

424 (The defaults is False.) 

425 transfer_only : `bool`, optional 

426 Whether to only return files for which a workflow management system 

427 would be responsible for transferring. 

428 

429 Returns 

430 ------- 

431 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

432 File names or objects from generic workflow meeting specifications. 

433 """ 

434 files = [] 

435 for filename, file in self._files.items(): 

436 if not transfer_only or file.wms_transfer: 

437 if not data: 

438 files.append(filename) 

439 else: 

440 files.append(file) 

441 return files 

442 

443 def add_job(self, job, parent_names=None, child_names=None): 

444 """Add job to generic workflow. 

445 

446 Parameters 

447 ---------- 

448 job : `lsst.ctrl.bps.GenericWorkflowJob` 

449 Job to add to the generic workflow. 

450 parent_names : `list` [`str`], optional 

451 Names of jobs that are parents of given job 

452 child_names : `list` [`str`], optional 

453 Names of jobs that are children of given job 

454 """ 

455 if not isinstance(job, GenericWorkflowJob): 

456 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

457 if self.has_node(job.name): 

458 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

459 super().add_node(job.name, job=job) 

460 self.add_job_relationships(parent_names, job.name) 

461 self.add_job_relationships(job.name, child_names) 

462 self.add_executable(job.executable) 

463 

464 def add_node(self, node_for_adding, **attr): 

465 """Override networkx function to call more specific add_job function. 

466 

467 Parameters 

468 ---------- 

469 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

470 Job to be added to generic workflow. 

471 attr : 

472 Needed to match original networkx function, but not used. 

473 """ 

474 self.add_job(node_for_adding) 

475 

476 def add_job_relationships(self, parents, children): 

477 """Add dependencies between parent and child jobs. All parents will 

478 be connected to all children. 

479 

480 Parameters 

481 ---------- 

482 parents : `list` [`str`] 

483 Parent job names. 

484 children : `list` [`str`] 

485 Children job names. 

486 """ 

487 if parents is not None and children is not None: 

488 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

489 

490 def add_edges_from(self, ebunch_to_add, **attr): 

491 """Add several edges between jobs in the generic workflow. 

492 

493 Parameters 

494 ---------- 

495 ebunch_to_add : Iterable [`tuple`] 

496 Iterable of job name pairs between which a dependency should be 

497 saved. 

498 attr : keyword arguments, optional 

499 Data can be assigned using keyword arguments (not currently used). 

500 """ 

501 for edge_to_add in ebunch_to_add: 

502 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

503 

504 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

505 """Add edge connecting jobs in workflow. 

506 

507 Parameters 

508 ---------- 

509 u_of_edge : `str` 

510 Name of parent job. 

511 v_of_edge : `str` 

512 Name of child job. 

513 attr : keyword arguments, optional 

514 Attributes to save with edge. 

515 """ 

516 if u_of_edge not in self: 

517 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

518 if v_of_edge not in self: 

519 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

520 super().add_edge(u_of_edge, v_of_edge, **attr) 

521 

522 def get_job(self, job_name: str): 

523 """Retrieve job by name from workflow. 

524 

525 Parameters 

526 ---------- 

527 job_name : `str` 

528 Name of job to retrieve. 

529 

530 Returns 

531 ------- 

532 job : `lsst.ctrl.bps.GenericWorkflowJob` 

533 Job matching given job_name. 

534 """ 

535 return self.nodes[job_name]["job"] 

536 

537 def del_job(self, job_name: str): 

538 """Delete job from generic workflow leaving connected graph. 

539 

540 Parameters 

541 ---------- 

542 job_name : `str` 

543 Name of job to delete from workflow. 

544 """ 

545 # Connect all parent jobs to all children jobs. 

546 parents = self.predecessors(job_name) 

547 children = self.successors(job_name) 

548 self.add_job_relationships(parents, children) 

549 

550 # Delete job node (which deleted edges). 

551 self.remove_node(job_name) 

552 

553 def add_job_inputs(self, job_name, files): 

554 """Add files as inputs to specified job. 

555 

556 Parameters 

557 ---------- 

558 job_name : `str` 

559 Name of job to which inputs should be added 

560 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

561 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

562 File object(s) to be added as inputs to the specified job. 

563 """ 

564 self._inputs.setdefault(job_name, []) 

565 for file in ensure_iterable(files): 

566 # Save the central copy 

567 if file.name not in self._files: 

568 self._files[file.name] = file 

569 

570 # Save the job reference to the file 

571 self._inputs[job_name].append(file) 

572 

573 def get_file(self, name): 

574 """Retrieve a file object by name. 

575 

576 Parameters 

577 ---------- 

578 name : `str` 

579 Name of file object 

580 

581 Returns 

582 ------- 

583 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

584 File matching given name. 

585 """ 

586 return self._files[name] 

587 

588 def add_file(self, gwfile): 

589 """Add file object. 

590 

591 Parameters 

592 ---------- 

593 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

594 File object to add to workflow 

595 """ 

596 if gwfile.name not in self._files: 

597 self._files[gwfile.name] = gwfile 

598 else: 

599 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

600 

601 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

602 """Return the input files for the given job. 

603 

604 Parameters 

605 ---------- 

606 job_name : `str` 

607 Name of the job. 

608 data : `bool`, optional 

609 Whether to return the file data as well as the file object name. 

610 transfer_only : `bool`, optional 

611 Whether to only return files for which a workflow management system 

612 would be responsible for transferring. 

613 

614 Returns 

615 ------- 

616 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

617 Input files for the given job. If no input files for the job, 

618 returns an empty list. 

619 """ 

620 inputs = [] 

621 if job_name in self._inputs: 

622 for gwfile in self._inputs[job_name]: 

623 if not transfer_only or gwfile.wms_transfer: 

624 if not data: 

625 inputs.append(gwfile.name) 

626 else: 

627 inputs.append(gwfile) 

628 return inputs 

629 

630 def add_job_outputs(self, job_name, files): 

631 """Add output files to a job. 

632 

633 Parameters 

634 ---------- 

635 job_name : `str` 

636 Name of job to which the files should be added as outputs. 

637 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

638 File objects to be added as outputs for specified job. 

639 """ 

640 self._outputs.setdefault(job_name, []) 

641 

642 for file_ in ensure_iterable(files): 

643 # Save the central copy 

644 if file_.name not in self._files: 

645 self._files[file_.name] = file_ 

646 

647 # Save the job reference to the file 

648 self._outputs[job_name].append(file_) 

649 

650 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

651 """Return the output files for the given job. 

652 

653 Parameters 

654 ---------- 

655 job_name : `str` 

656 Name of the job. 

657 data : `bool` 

658 Whether to return the file data as well as the file object name. 

659 It defaults to `True` thus returning file data as well. 

660 transfer_only : `bool` 

661 Whether to only return files for which a workflow management system 

662 would be responsible for transferring. It defaults to `False` thus 

663 returning all output files. 

664 

665 Returns 

666 ------- 

667 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

668 Output files for the given job. If no output files for the job, 

669 returns an empty list. 

670 """ 

671 outputs = [] 

672 

673 if job_name in self._outputs: 

674 for file_name in self._outputs[job_name]: 

675 file = self._files[file_name] 

676 if not transfer_only or file.wms_transfer: 

677 if not data: 

678 outputs.append(file_name) 

679 else: 

680 outputs.append(self._files[file_name]) 

681 return outputs 

682 

683 def draw(self, stream, format_="dot"): 

684 """Output generic workflow in a visualization format. 

685 

686 Parameters 

687 ---------- 

688 stream : `str` or `io.BufferedIOBase` 

689 Stream to which the visualization should be written. 

690 format_ : `str`, optional 

691 Which visualization format to use. It defaults to the format for 

692 the dot program. 

693 """ 

694 draw_funcs = {"dot": draw_networkx_dot} 

695 if format_ in draw_funcs: 

696 draw_funcs[format_](self, stream) 

697 else: 

698 raise RuntimeError(f"Unknown draw format ({format_}") 

699 

700 def save(self, stream, format_="pickle"): 

701 """Save the generic workflow in a format that is loadable. 

702 

703 Parameters 

704 ---------- 

705 stream : `str` or `io.BufferedIOBase` 

706 Stream to pass to the format-specific writer. Accepts anything 

707 that the writer accepts. 

708 

709 format_ : `str`, optional 

710 Format in which to write the data. It defaults to pickle format. 

711 """ 

712 if format_ == "pickle": 

713 write_gpickle(self, stream) 

714 else: 

715 raise RuntimeError(f"Unknown format ({format_})") 

716 

717 @classmethod 

718 def load(cls, stream, format_="pickle"): 

719 """Load a GenericWorkflow from the given stream 

720 

721 Parameters 

722 ---------- 

723 stream : `str` or `io.BufferedIOBase` 

724 Stream to pass to the format-specific loader. Accepts anything that 

725 the loader accepts. 

726 format_ : `str`, optional 

727 Format of data to expect when loading from stream. It defaults 

728 to pickle format. 

729 

730 Returns 

731 ------- 

732 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

733 Generic workflow loaded from the given stream 

734 """ 

735 if format_ == "pickle": 

736 return read_gpickle(stream) 

737 

738 raise RuntimeError(f"Unknown format ({format_})") 

739 

740 def validate(self): 

741 """Run checks to ensure that the generic workflow graph is valid.""" 

742 # Make sure a directed acyclic graph 

743 assert is_directed_acyclic_graph(self) 

744 

745 def add_workflow_source(self, workflow): 

746 """Add given workflow as new source to this workflow. 

747 

748 Parameters 

749 ---------- 

750 workflow : `lsst.ctrl.bps.GenericWorkflow` 

751 """ 

752 # Find source nodes in self. 

753 self_sources = [n for n in self if self.in_degree(n) == 0] 

754 _LOG.debug("self_sources = %s", self_sources) 

755 

756 # Find sink nodes of workflow. 

757 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

758 _LOG.debug("new sinks = %s", new_sinks) 

759 

760 # Add new workflow nodes to self graph and make new edges. 

761 self.add_nodes_from(workflow.nodes(data=True)) 

762 self.add_edges_from(workflow.edges()) 

763 for source in self_sources: 

764 for sink in new_sinks: 

765 self.add_edge(sink, source) 

766 

767 # Files are stored separately so copy them. 

768 for job_name in workflow: 

769 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

770 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

771 self.add_executable(workflow.get_job(job_name).executable) 

772 

773 def add_final(self, final): 

774 """Add special final job/workflow to the generic workflow. 

775 

776 Parameters 

777 ---------- 

778 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

779 `lsst.ctrl.bps.GenericWorkflow` 

780 Information needed to execute the special final job(s), the 

781 job(s) to be executed after all jobs that can be executed 

782 have been executed regardless of exit status of any of the 

783 jobs. 

784 """ 

785 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

786 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

787 

788 self._final = final 

789 if isinstance(final, GenericWorkflowJob): 

790 self.add_executable(final.executable) 

791 

792 def get_final(self): 

793 """Return job/workflow to be executed after all jobs that can be 

794 executed have been executed regardless of exit status of any of 

795 the jobs. 

796 

797 Returns 

798 ------- 

799 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

800 `lsst.ctrl.bps.GenericWorkflow` 

801 Information needed to execute final job(s). 

802 """ 

803 return self._final 

804 

805 def add_executable(self, executable): 

806 """Add executable to workflow's list of executables. 

807 

808 Parameters 

809 ---------- 

810 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

811 Executable object to be added to workflow. 

812 """ 

813 if executable is not None: 

814 self._executables[executable.name] = executable 

815 else: 

816 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

817 

818 def get_executables(self, data=False, transfer_only=True): 

819 """Retrieve executables from generic workflow. 

820 

821 Parameters 

822 ---------- 

823 data : `bool`, optional 

824 Whether to return the executable data as well as the exec object 

825 name. (The defaults is False.) 

826 transfer_only : `bool`, optional 

827 Whether to only return executables for which transfer_executable 

828 is True. 

829 

830 Returns 

831 ------- 

832 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

833 Filtered executable names or objects from generic workflow. 

834 """ 

835 execs = [] 

836 for name, executable in self._executables.items(): 

837 if not transfer_only or executable.transfer_executable: 

838 if not data: 

839 execs.append(name) 

840 else: 

841 execs.append(executable) 

842 return execs