Coverage for python/lsst/ctrl/bps/generic_workflow.py: 29%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

269 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31from collections import Counter 

32from typing import Optional 

33 

34from lsst.utils.iteration import ensure_iterable 

35from networkx import DiGraph, read_gpickle, topological_sort, write_gpickle 

36from networkx.algorithms.dag import is_directed_acyclic_graph 

37 

38from .bps_draw import draw_networkx_dot 

39 

40_LOG = logging.getLogger(__name__) 

41 

42 

43@dataclasses.dataclass 

44class GenericWorkflowFile: 

45 """Information about a file that may be needed by various workflow 

46 management services. 

47 """ 

48 

49 name: str 

50 """Lookup key (logical file name) of file/directory. Must be unique 

51 within run. 

52 """ 

53 

54 src_uri: str or None # don't know that need ResourcePath 

55 """Original location of file/directory. 

56 """ 

57 

58 wms_transfer: bool 

59 """Whether the WMS should ignore file or not. Default is False. 

60 """ 

61 

62 job_access_remote: bool 

63 """Whether the job can remotely access file (using separately specified 

64 file access protocols). Default is False. 

65 """ 

66 

67 job_shared: bool 

68 """Whether job requires its own copy of this file. Default is False. 

69 """ 

70 

71 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

72 # values, so writing own __init__. 

73 def __init__( 

74 self, 

75 name: str, 

76 src_uri: str = None, 

77 wms_transfer: bool = False, 

78 job_access_remote: bool = False, 

79 job_shared: bool = False, 

80 ): 

81 self.name = name 

82 self.src_uri = src_uri 

83 self.wms_transfer = wms_transfer 

84 self.job_access_remote = job_access_remote 

85 self.job_shared = job_shared 

86 

87 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

88 

89 def __hash__(self): 

90 return hash(self.name) 

91 

92 

93@dataclasses.dataclass 

94class GenericWorkflowExec: 

95 """Information about an executable that may be needed by various workflow 

96 management services. 

97 """ 

98 

99 name: str 

100 """Lookup key (logical file name) of executable. Must be unique 

101 within run. 

102 """ 

103 

104 src_uri: str or None # don't know that need ResourcePath 

105 """Original location of executable. 

106 """ 

107 

108 transfer_executable: bool 

109 """Whether the WMS/plugin is responsible for staging executable to 

110 location usable by job. 

111 """ 

112 

113 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

114 # values, so writing own __init__. 

115 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

116 self.name = name 

117 self.src_uri = src_uri 

118 self.transfer_executable = transfer_executable 

119 

120 __slots__ = ("name", "src_uri", "transfer_executable") 

121 

122 def __hash__(self): 

123 return hash(self.name) 

124 

125 

126@dataclasses.dataclass 

127class GenericWorkflowJob: 

128 """Information about a job that may be needed by various workflow 

129 management services. 

130 """ 

131 

132 name: str 

133 """Name of job. Must be unique within workflow. 

134 """ 

135 

136 label: Optional[str] 

137 """Primary user-facing label for job. Does not need to be unique 

138 and may be used for summary reports. 

139 """ 

140 

141 quanta_counts: Optional[Counter] 

142 """Counts of quanta per task label in job. 

143 """ 

144 

145 tags: Optional[dict] 

146 """Other key/value pairs for job that user may want to use as a filter. 

147 """ 

148 

149 executable: Optional[GenericWorkflowExec] 

150 """Executable for job. 

151 """ 

152 

153 arguments: Optional[str] 

154 """Command line arguments for job. 

155 """ 

156 

157 cmdvals: Optional[dict] 

158 """Values for variables in cmdline when using lazy command line creation. 

159 """ 

160 

161 memory_multiplier: Optional[float] 

162 """Memory growth rate between retries. 

163 """ 

164 

165 request_memory: Optional[int] # MB 

166 """Max memory (in MB) that the job is expected to need. 

167 """ 

168 

169 request_memory_max: Optional[int] # MB 

170 """Max memory (in MB) that the job should ever use. 

171 """ 

172 

173 request_cpus: Optional[int] # cores 

174 """Max number of cpus that the job is expected to need. 

175 """ 

176 

177 request_disk: Optional[int] # MB 

178 """Max amount of job scratch disk (in MB) that the job is expected to need. 

179 """ 

180 

181 request_walltime: Optional[str] # minutes 

182 """Max amount of time (in seconds) that the job is expected to need. 

183 """ 

184 

185 compute_site: Optional[str] 

186 """Key to look up site-specific information for running the job. 

187 """ 

188 

189 mail_to: Optional[str] 

190 """Comma separated list of email addresses for emailing job status. 

191 """ 

192 

193 when_to_mail: Optional[str] 

194 """WMS-specific terminology for when to email job status. 

195 """ 

196 

197 number_of_retries: Optional[int] 

198 """Number of times to automatically retry a failed job. 

199 """ 

200 

201 retry_unless_exit: Optional[int] 

202 """Exit code for job that means to not automatically retry. 

203 """ 

204 

205 abort_on_value: Optional[int] 

206 """Job exit value for signals to abort the entire workflow. 

207 """ 

208 

209 abort_return_value: Optional[int] 

210 """Exit value to use when aborting the entire workflow. 

211 """ 

212 

213 priority: Optional[str] 

214 """Initial priority of job in WMS-format. 

215 """ 

216 

217 category: Optional[str] 

218 """WMS-facing label of job within single workflow (e.g., can be used for 

219 throttling jobs within a single workflow). 

220 """ 

221 

222 concurrency_limit: Optional[str] 

223 """Names of concurrency limits that the WMS plugin can appropriately 

224 translate to limit the number of this job across all running workflows. 

225 """ 

226 

227 queue: Optional[str] 

228 """Name of queue to use. Different WMS can translate this concept 

229 differently. 

230 """ 

231 

232 pre_cmdline: Optional[str] 

233 """Command line to be executed prior to executing job. 

234 """ 

235 

236 post_cmdline: Optional[str] 

237 """Command line to be executed after job executes. 

238 

239 Should be executed regardless of exit status. 

240 """ 

241 

242 preemptible: Optional[bool] 

243 """The flag indicating whether the job can be preempted. 

244 """ 

245 

246 profile: Optional[dict] 

247 """Nested dictionary of WMS-specific key/value pairs with primary key being 

248 WMS key (e.g., pegasus, condor, panda). 

249 """ 

250 

251 attrs: Optional[dict] 

252 """Key/value pairs of job attributes (for WMS that have attributes in 

253 addition to commands). 

254 """ 

255 

256 environment: Optional[dict] 

257 """Environment variable names and values to be explicitly set inside job. 

258 """ 

259 

260 # As of python 3.7.8, can't use __slots__ if give default values, so 

261 # writing own __init__. 

262 def __init__(self, name: str): 

263 self.name = name 

264 self.label = None 

265 self.quanta_counts = Counter() 

266 self.tags = {} 

267 self.executable = None 

268 self.arguments = None 

269 self.cmdvals = {} 

270 self.memory_multiplier = None 

271 self.request_memory = None 

272 self.request_memory_max = None 

273 self.request_cpus = None 

274 self.request_disk = None 

275 self.request_walltime = None 

276 self.compute_site = None 

277 self.mail_to = None 

278 self.when_to_mail = None 

279 self.number_of_retries = None 

280 self.retry_unless_exit = None 

281 self.abort_on_value = None 

282 self.abort_return_value = None 

283 self.priority = None 

284 self.category = None 

285 self.concurrency_limit = None 

286 self.queue = None 

287 self.pre_cmdline = None 

288 self.post_cmdline = None 

289 self.preemptible = None 

290 self.profile = {} 

291 self.attrs = {} 

292 self.environment = {} 

293 

294 __slots__ = ( 

295 "name", 

296 "label", 

297 "quanta_counts", 

298 "tags", 

299 "mail_to", 

300 "when_to_mail", 

301 "executable", 

302 "arguments", 

303 "cmdvals", 

304 "memory_multiplier", 

305 "request_memory", 

306 "request_memory_max", 

307 "request_cpus", 

308 "request_disk", 

309 "request_walltime", 

310 "number_of_retries", 

311 "retry_unless_exit", 

312 "abort_on_value", 

313 "abort_return_value", 

314 "compute_site", 

315 "environment", 

316 "priority", 

317 "category", 

318 "concurrency_limit", 

319 "queue", 

320 "pre_cmdline", 

321 "post_cmdline", 

322 "preemptible", 

323 "profile", 

324 "attrs", 

325 ) 

326 

327 def __hash__(self): 

328 return hash(self.name) 

329 

330 

331class GenericWorkflow(DiGraph): 

332 """A generic representation of a workflow used to submit to specific 

333 workflow management systems. 

334 

335 Parameters 

336 ---------- 

337 name : `str` 

338 Name of generic workflow. 

339 incoming_graph_data : `Any`, optional 

340 Data used to initialized graph that is passed through to DiGraph 

341 constructor. Can be any type supported by networkx.DiGraph. 

342 attr : `dict` 

343 Keyword arguments passed through to DiGraph constructor. 

344 """ 

345 

346 def __init__(self, name, incoming_graph_data=None, **attr): 

347 super().__init__(incoming_graph_data, **attr) 

348 self._name = name 

349 self.run_attrs = {} 

350 self._files = {} 

351 self._executables = {} 

352 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

353 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

354 self.run_id = None 

355 self._final = None 

356 

357 @property 

358 def name(self): 

359 """Retrieve name of generic workflow. 

360 

361 Returns 

362 ------- 

363 name : `str` 

364 Name of generic workflow. 

365 """ 

366 return self._name 

367 

368 @property 

369 def quanta_counts(self): 

370 """Count of quanta per task label (`collections.Counter`).""" 

371 qcounts = Counter() 

372 for job_name in self: 

373 gwjob = self.get_job(job_name) 

374 if gwjob.quanta_counts is not None: 

375 qcounts += gwjob.quanta_counts 

376 return qcounts 

377 

378 @property 

379 def job_counts(self): 

380 """Count of jobs per job label (`collections.Counter`).""" 

381 jcounts = Counter() 

382 for job_name in self: 

383 gwjob = self.get_job(job_name) 

384 jcounts[gwjob.label] += 1 

385 

386 # Final is separate 

387 final = self.get_final() 

388 if final: 

389 if isinstance(final, GenericWorkflow): 

390 for job_name in final: 

391 gwjob = final.get_job(job_name) 

392 jcounts[gwjob.label] += 1 

393 else: 

394 jcounts[final.label] += 1 

395 

396 return jcounts 

397 

398 def __iter__(self): 

399 """Return iterator of job names in topologically sorted order.""" 

400 return topological_sort(self) 

401 

402 def get_files(self, data=False, transfer_only=True): 

403 """Retrieve files from generic workflow. 

404 

405 Need API in case change way files are stored (e.g., make 

406 workflow a bipartite graph with jobs and files nodes). 

407 

408 Parameters 

409 ---------- 

410 data : `bool`, optional 

411 Whether to return the file data as well as the file object name. 

412 (The defaults is False.) 

413 transfer_only : `bool`, optional 

414 Whether to only return files for which a workflow management system 

415 would be responsible for transferring. 

416 

417 Returns 

418 ------- 

419 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

420 File names or objects from generic workflow meeting specifications. 

421 """ 

422 files = [] 

423 for filename, file in self._files.items(): 

424 if not transfer_only or file.wms_transfer: 

425 if not data: 

426 files.append(filename) 

427 else: 

428 files.append(file) 

429 return files 

430 

431 def add_job(self, job, parent_names=None, child_names=None): 

432 """Add job to generic workflow. 

433 

434 Parameters 

435 ---------- 

436 job : `lsst.ctrl.bps.GenericWorkflowJob` 

437 Job to add to the generic workflow. 

438 parent_names : `list` [`str`], optional 

439 Names of jobs that are parents of given job 

440 child_names : `list` [`str`], optional 

441 Names of jobs that are children of given job 

442 """ 

443 if not isinstance(job, GenericWorkflowJob): 

444 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

445 if self.has_node(job.name): 

446 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

447 super().add_node(job.name, job=job) 

448 self.add_job_relationships(parent_names, job.name) 

449 self.add_job_relationships(job.name, child_names) 

450 self.add_executable(job.executable) 

451 

452 def add_node(self, node_for_adding, **attr): 

453 """Override networkx function to call more specific add_job function. 

454 

455 Parameters 

456 ---------- 

457 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

458 Job to be added to generic workflow. 

459 attr : 

460 Needed to match original networkx function, but not used. 

461 """ 

462 self.add_job(node_for_adding) 

463 

464 def add_job_relationships(self, parents, children): 

465 """Add dependencies between parent and child jobs. All parents will 

466 be connected to all children. 

467 

468 Parameters 

469 ---------- 

470 parents : `list` [`str`] 

471 Parent job names. 

472 children : `list` [`str`] 

473 Children job names. 

474 """ 

475 if parents is not None and children is not None: 

476 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

477 

478 def add_edges_from(self, ebunch_to_add, **attr): 

479 """Add several edges between jobs in the generic workflow. 

480 

481 Parameters 

482 ---------- 

483 ebunch_to_add : Iterable [`tuple`] 

484 Iterable of job name pairs between which a dependency should be 

485 saved. 

486 attr : keyword arguments, optional 

487 Data can be assigned using keyword arguments (not currently used). 

488 """ 

489 for edge_to_add in ebunch_to_add: 

490 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

491 

492 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

493 """Add edge connecting jobs in workflow. 

494 

495 Parameters 

496 ---------- 

497 u_of_edge : `str` 

498 Name of parent job. 

499 v_of_edge : `str` 

500 Name of child job. 

501 attr : keyword arguments, optional 

502 Attributes to save with edge. 

503 """ 

504 if u_of_edge not in self: 

505 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

506 if v_of_edge not in self: 

507 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

508 super().add_edge(u_of_edge, v_of_edge, **attr) 

509 

510 def get_job(self, job_name: str): 

511 """Retrieve job by name from workflow. 

512 

513 Parameters 

514 ---------- 

515 job_name : `str` 

516 Name of job to retrieve. 

517 

518 Returns 

519 ------- 

520 job : `lsst.ctrl.bps.GenericWorkflowJob` 

521 Job matching given job_name. 

522 """ 

523 return self.nodes[job_name]["job"] 

524 

525 def del_job(self, job_name: str): 

526 """Delete job from generic workflow leaving connected graph. 

527 

528 Parameters 

529 ---------- 

530 job_name : `str` 

531 Name of job to delete from workflow. 

532 """ 

533 # Connect all parent jobs to all children jobs. 

534 parents = self.predecessors(job_name) 

535 children = self.successors(job_name) 

536 self.add_job_relationships(parents, children) 

537 

538 # Delete job node (which deleted edges). 

539 self.remove_node(job_name) 

540 

541 def add_job_inputs(self, job_name, files): 

542 """Add files as inputs to specified job. 

543 

544 Parameters 

545 ---------- 

546 job_name : `str` 

547 Name of job to which inputs should be added 

548 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

549 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

550 File object(s) to be added as inputs to the specified job. 

551 """ 

552 self._inputs.setdefault(job_name, []) 

553 for file in ensure_iterable(files): 

554 # Save the central copy 

555 if file.name not in self._files: 

556 self._files[file.name] = file 

557 

558 # Save the job reference to the file 

559 self._inputs[job_name].append(file) 

560 

561 def get_file(self, name): 

562 """Retrieve a file object by name. 

563 

564 Parameters 

565 ---------- 

566 name : `str` 

567 Name of file object 

568 

569 Returns 

570 ------- 

571 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

572 File matching given name. 

573 """ 

574 return self._files[name] 

575 

576 def add_file(self, gwfile): 

577 """Add file object. 

578 

579 Parameters 

580 ---------- 

581 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

582 File object to add to workflow 

583 """ 

584 if gwfile.name not in self._files: 

585 self._files[gwfile.name] = gwfile 

586 else: 

587 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

588 

589 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

590 """Return the input files for the given job. 

591 

592 Parameters 

593 ---------- 

594 job_name : `str` 

595 Name of the job. 

596 data : `bool`, optional 

597 Whether to return the file data as well as the file object name. 

598 transfer_only : `bool`, optional 

599 Whether to only return files for which a workflow management system 

600 would be responsible for transferring. 

601 

602 Returns 

603 ------- 

604 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

605 Input files for the given job. If no input files for the job, 

606 returns an empty list. 

607 """ 

608 inputs = [] 

609 if job_name in self._inputs: 

610 for gwfile in self._inputs[job_name]: 

611 if not transfer_only or gwfile.wms_transfer: 

612 if not data: 

613 inputs.append(gwfile.name) 

614 else: 

615 inputs.append(gwfile) 

616 return inputs 

617 

618 def add_job_outputs(self, job_name, files): 

619 """Add output files to a job. 

620 

621 Parameters 

622 ---------- 

623 job_name : `str` 

624 Name of job to which the files should be added as outputs. 

625 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

626 File objects to be added as outputs for specified job. 

627 """ 

628 self._outputs.setdefault(job_name, []) 

629 

630 for file_ in ensure_iterable(files): 

631 # Save the central copy 

632 if file_.name not in self._files: 

633 self._files[file_.name] = file_ 

634 

635 # Save the job reference to the file 

636 self._outputs[job_name].append(file_) 

637 

638 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

639 """Return the output files for the given job. 

640 

641 Parameters 

642 ---------- 

643 job_name : `str` 

644 Name of the job. 

645 data : `bool` 

646 Whether to return the file data as well as the file object name. 

647 It defaults to `True` thus returning file data as well. 

648 transfer_only : `bool` 

649 Whether to only return files for which a workflow management system 

650 would be responsible for transferring. It defaults to `False` thus 

651 returning all output files. 

652 

653 Returns 

654 ------- 

655 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

656 Output files for the given job. If no output files for the job, 

657 returns an empty list. 

658 """ 

659 outputs = [] 

660 

661 if job_name in self._outputs: 

662 for file_name in self._outputs[job_name]: 

663 file = self._files[file_name] 

664 if not transfer_only or file.wms_transfer: 

665 if not data: 

666 outputs.append(file_name) 

667 else: 

668 outputs.append(self._files[file_name]) 

669 return outputs 

670 

671 def draw(self, stream, format_="dot"): 

672 """Output generic workflow in a visualization format. 

673 

674 Parameters 

675 ---------- 

676 stream : `str` or `io.BufferedIOBase` 

677 Stream to which the visualization should be written. 

678 format_ : `str`, optional 

679 Which visualization format to use. It defaults to the format for 

680 the dot program. 

681 """ 

682 draw_funcs = {"dot": draw_networkx_dot} 

683 if format_ in draw_funcs: 

684 draw_funcs[format_](self, stream) 

685 else: 

686 raise RuntimeError(f"Unknown draw format ({format_}") 

687 

688 def save(self, stream, format_="pickle"): 

689 """Save the generic workflow in a format that is loadable. 

690 

691 Parameters 

692 ---------- 

693 stream : `str` or `io.BufferedIOBase` 

694 Stream to pass to the format-specific writer. Accepts anything 

695 that the writer accepts. 

696 

697 format_ : `str`, optional 

698 Format in which to write the data. It defaults to pickle format. 

699 """ 

700 if format_ == "pickle": 

701 write_gpickle(self, stream) 

702 else: 

703 raise RuntimeError(f"Unknown format ({format_})") 

704 

705 @classmethod 

706 def load(cls, stream, format_="pickle"): 

707 """Load a GenericWorkflow from the given stream 

708 

709 Parameters 

710 ---------- 

711 stream : `str` or `io.BufferedIOBase` 

712 Stream to pass to the format-specific loader. Accepts anything that 

713 the loader accepts. 

714 format_ : `str`, optional 

715 Format of data to expect when loading from stream. It defaults 

716 to pickle format. 

717 

718 Returns 

719 ------- 

720 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

721 Generic workflow loaded from the given stream 

722 """ 

723 if format_ == "pickle": 

724 return read_gpickle(stream) 

725 

726 raise RuntimeError(f"Unknown format ({format_})") 

727 

728 def validate(self): 

729 """Run checks to ensure that the generic workflow graph is valid.""" 

730 # Make sure a directed acyclic graph 

731 assert is_directed_acyclic_graph(self) 

732 

733 def add_workflow_source(self, workflow): 

734 """Add given workflow as new source to this workflow. 

735 

736 Parameters 

737 ---------- 

738 workflow : `lsst.ctrl.bps.GenericWorkflow` 

739 """ 

740 # Find source nodes in self. 

741 self_sources = [n for n in self if self.in_degree(n) == 0] 

742 _LOG.debug("self_sources = %s", self_sources) 

743 

744 # Find sink nodes of workflow. 

745 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

746 _LOG.debug("new sinks = %s", new_sinks) 

747 

748 # Add new workflow nodes to self graph and make new edges. 

749 self.add_nodes_from(workflow.nodes(data=True)) 

750 self.add_edges_from(workflow.edges()) 

751 for source in self_sources: 

752 for sink in new_sinks: 

753 self.add_edge(sink, source) 

754 

755 # Files are stored separately so copy them. 

756 for job_name in workflow: 

757 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

758 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

759 self.add_executable(workflow.get_job(job_name).executable) 

760 

761 def add_final(self, final): 

762 """Add special final job/workflow to the generic workflow. 

763 

764 Parameters 

765 ---------- 

766 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

767 `lsst.ctrl.bps.GenericWorkflow` 

768 Information needed to execute the special final job(s), the 

769 job(s) to be executed after all jobs that can be executed 

770 have been executed regardless of exit status of any of the 

771 jobs. 

772 """ 

773 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

774 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

775 

776 self._final = final 

777 if isinstance(final, GenericWorkflowJob): 

778 self.add_executable(final.executable) 

779 

780 def get_final(self): 

781 """Return job/workflow to be executed after all jobs that can be 

782 executed have been executed regardless of exit status of any of 

783 the jobs. 

784 

785 Returns 

786 ------- 

787 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

788 `lsst.ctrl.bps.GenericWorkflow` 

789 Information needed to execute final job(s). 

790 """ 

791 return self._final 

792 

793 def add_executable(self, executable): 

794 """Add executable to workflow's list of executables. 

795 

796 Parameters 

797 ---------- 

798 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

799 Executable object to be added to workflow. 

800 """ 

801 if executable is not None: 

802 self._executables[executable.name] = executable 

803 else: 

804 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

805 

806 def get_executables(self, data=False, transfer_only=True): 

807 """Retrieve executables from generic workflow. 

808 

809 Parameters 

810 ---------- 

811 data : `bool`, optional 

812 Whether to return the executable data as well as the exec object 

813 name. (The defaults is False.) 

814 transfer_only : `bool`, optional 

815 Whether to only return executables for which transfer_executable 

816 is True. 

817 

818 Returns 

819 ------- 

820 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

821 Filtered executable names or objects from generic workflow. 

822 """ 

823 execs = [] 

824 for name, executable in self._executables.items(): 

825 if not transfer_only or executable.transfer_executable: 

826 if not data: 

827 execs.append(name) 

828 else: 

829 execs.append(executable) 

830 return execs