Coverage for python/lsst/ctrl/bps/generic_workflow.py: 29%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

269 statements  

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31from typing import Optional 

32from collections import Counter 

33 

34from networkx import DiGraph, read_gpickle, write_gpickle, topological_sort 

35from networkx.algorithms.dag import is_directed_acyclic_graph 

36 

37from lsst.utils.iteration import ensure_iterable 

38from .bps_draw import draw_networkx_dot 

39 

40_LOG = logging.getLogger(__name__) 

41 

42 

43@dataclasses.dataclass 

44class GenericWorkflowFile: 

45 """Information about a file that may be needed by various workflow 

46 management services. 

47 """ 

48 name: str 

49 """Lookup key (logical file name) of file/directory. Must be unique 

50 within run. 

51 """ 

52 

53 src_uri: str or None # don't know that need ButlerURI 

54 """Original location of file/directory. 

55 """ 

56 

57 wms_transfer: bool 

58 """Whether the WMS should ignore file or not. Default is False. 

59 """ 

60 

61 job_access_remote: bool 

62 """Whether the job can remotely access file (using separately specified 

63 file access protocols). Default is False. 

64 """ 

65 

66 job_shared: bool 

67 """Whether job requires its own copy of this file. Default is False. 

68 """ 

69 

70 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

71 # values, so writing own __init__. 

72 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False, 

73 job_access_remote: bool = False, job_shared: bool = False): 

74 self.name = name 

75 self.src_uri = src_uri 

76 self.wms_transfer = wms_transfer 

77 self.job_access_remote = job_access_remote 

78 self.job_shared = job_shared 

79 

80 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

81 

82 def __hash__(self): 

83 return hash(self.name) 

84 

85 

86@dataclasses.dataclass 

87class GenericWorkflowExec: 

88 """Information about an executable that may be needed by various workflow 

89 management services. 

90 """ 

91 name: str 

92 """Lookup key (logical file name) of executable. Must be unique 

93 within run. 

94 """ 

95 

96 src_uri: str or None # don't know that need ButlerURI 

97 """Original location of executable. 

98 """ 

99 

100 transfer_executable: bool 

101 """Whether the WMS/plugin is responsible for staging executable to 

102 location usable by job. 

103 """ 

104 

105 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

106 # values, so writing own __init__. 

107 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

108 self.name = name 

109 self.src_uri = src_uri 

110 self.transfer_executable = transfer_executable 

111 

112 __slots__ = ("name", "src_uri", "transfer_executable") 

113 

114 def __hash__(self): 

115 return hash(self.name) 

116 

117 

118@dataclasses.dataclass 

119class GenericWorkflowJob: 

120 """Information about a job that may be needed by various workflow 

121 management services. 

122 """ 

123 name: str 

124 """Name of job. Must be unique within workflow. 

125 """ 

126 

127 label: Optional[str] 

128 """Primary user-facing label for job. Does not need to be unique 

129 and may be used for summary reports. 

130 """ 

131 

132 quanta_counts: Optional[Counter] 

133 """Counts of quanta per task label in job. 

134 """ 

135 

136 tags: Optional[dict] 

137 """Other key/value pairs for job that user may want to use as a filter. 

138 """ 

139 

140 executable: Optional[GenericWorkflowExec] 

141 """Executable for job. 

142 """ 

143 

144 arguments: Optional[str] 

145 """Command line arguments for job. 

146 """ 

147 

148 cmdvals: Optional[dict] 

149 """Values for variables in cmdline when using lazy command line creation. 

150 """ 

151 

152 memory_multiplier: Optional[float] 

153 """Memory growth rate between retries. 

154 """ 

155 

156 request_memory: Optional[int] # MB 

157 """Max memory (in MB) that the job is expected to need. 

158 """ 

159 

160 request_memory_max: Optional[int] # MB 

161 """Max memory (in MB) that the job should ever use. 

162 """ 

163 

164 request_cpus: Optional[int] # cores 

165 """Max number of cpus that the job is expected to need. 

166 """ 

167 

168 request_disk: Optional[int] # MB 

169 """Max amount of job scratch disk (in MB) that the job is expected to need. 

170 """ 

171 

172 request_walltime: Optional[str] # minutes 

173 """Max amount of time (in seconds) that the job is expected to need. 

174 """ 

175 

176 compute_site: Optional[str] 

177 """Key to look up site-specific information for running the job. 

178 """ 

179 

180 mail_to: Optional[str] 

181 """Comma separated list of email addresses for emailing job status. 

182 """ 

183 

184 when_to_mail: Optional[str] 

185 """WMS-specific terminology for when to email job status. 

186 """ 

187 

188 number_of_retries: Optional[int] 

189 """Number of times to automatically retry a failed job. 

190 """ 

191 

192 retry_unless_exit: Optional[int] 

193 """Exit code for job that means to not automatically retry. 

194 """ 

195 

196 abort_on_value: Optional[int] 

197 """Job exit value for signals to abort the entire workflow. 

198 """ 

199 

200 abort_return_value: Optional[int] 

201 """Exit value to use when aborting the entire workflow. 

202 """ 

203 

204 priority: Optional[str] 

205 """Initial priority of job in WMS-format. 

206 """ 

207 

208 category: Optional[str] 

209 """WMS-facing label of job within single workflow (e.g., can be used for 

210 throttling jobs within a single workflow). 

211 """ 

212 

213 concurrency_limit: Optional[str] 

214 """Names of concurrency limits that the WMS plugin can appropriately 

215 translate to limit the number of this job across all running workflows. 

216 """ 

217 

218 queue: Optional[str] 

219 """Name of queue to use. Different WMS can translate this concept 

220 differently. 

221 """ 

222 

223 pre_cmdline: Optional[str] 

224 """Command line to be executed prior to executing job. 

225 """ 

226 

227 post_cmdline: Optional[str] 

228 """Command line to be executed after job executes. 

229 

230 Should be executed regardless of exit status. 

231 """ 

232 

233 preemptible: Optional[bool] 

234 """The flag indicating whether the job can be preempted. 

235 """ 

236 

237 profile: Optional[dict] 

238 """Nested dictionary of WMS-specific key/value pairs with primary key being 

239 WMS key (e.g., pegasus, condor, panda). 

240 """ 

241 

242 attrs: Optional[dict] 

243 """Key/value pairs of job attributes (for WMS that have attributes in 

244 addition to commands). 

245 """ 

246 

247 environment: Optional[dict] 

248 """Environment variable names and values to be explicitly set inside job. 

249 """ 

250 

251 # As of python 3.7.8, can't use __slots__ if give default values, so 

252 # writing own __init__. 

253 def __init__(self, name: str): 

254 self.name = name 

255 self.label = None 

256 self.quanta_counts = Counter() 

257 self.tags = {} 

258 self.executable = None 

259 self.arguments = None 

260 self.cmdvals = {} 

261 self.memory_multiplier = None 

262 self.request_memory = None 

263 self.request_memory_max = None 

264 self.request_cpus = None 

265 self.request_disk = None 

266 self.request_walltime = None 

267 self.compute_site = None 

268 self.mail_to = None 

269 self.when_to_mail = None 

270 self.number_of_retries = None 

271 self.retry_unless_exit = None 

272 self.abort_on_value = None 

273 self.abort_return_value = None 

274 self.priority = None 

275 self.category = None 

276 self.concurrency_limit = None 

277 self.queue = None 

278 self.pre_cmdline = None 

279 self.post_cmdline = None 

280 self.preemptible = None 

281 self.profile = {} 

282 self.attrs = {} 

283 self.environment = {} 

284 

285 __slots__ = ("name", "label", "quanta_counts", "tags", "mail_to", "when_to_mail", 

286 "executable", "arguments", "cmdvals", 

287 "memory_multiplier", "request_memory", "request_memory_max", "request_cpus", "request_disk", 

288 "request_walltime", "number_of_retries", "retry_unless_exit", "abort_on_value", 

289 "abort_return_value", "compute_site", "environment", "priority", "category", 

290 "concurrency_limit", "queue", "pre_cmdline", "post_cmdline", "preemptible", "profile", 

291 "attrs") 

292 

293 def __hash__(self): 

294 return hash(self.name) 

295 

296 

297class GenericWorkflow(DiGraph): 

298 """A generic representation of a workflow used to submit to specific 

299 workflow management systems. 

300 

301 Parameters 

302 ---------- 

303 name : `str` 

304 Name of generic workflow. 

305 incoming_graph_data : `Any`, optional 

306 Data used to initialized graph that is passed through to DiGraph 

307 constructor. Can be any type supported by networkx.DiGraph. 

308 attr : `dict` 

309 Keyword arguments passed through to DiGraph constructor. 

310 """ 

311 def __init__(self, name, incoming_graph_data=None, **attr): 

312 super().__init__(incoming_graph_data, **attr) 

313 self._name = name 

314 self.run_attrs = {} 

315 self._files = {} 

316 self._executables = {} 

317 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

318 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

319 self.run_id = None 

320 self._final = None 

321 

322 @property 

323 def name(self): 

324 """Retrieve name of generic workflow. 

325 

326 Returns 

327 ------- 

328 name : `str` 

329 Name of generic workflow. 

330 """ 

331 return self._name 

332 

333 @property 

334 def quanta_counts(self): 

335 """Counts of quanta per task label in workflow (`collections.Counter`). 

336 """ 

337 qcounts = Counter() 

338 for job_name in self: 

339 gwjob = self.get_job(job_name) 

340 if gwjob.quanta_counts is not None: 

341 qcounts += gwjob.quanta_counts 

342 return qcounts 

343 

344 @property 

345 def job_counts(self): 

346 """Counts of jobs per job label in workflow (`collections.Counter`). 

347 """ 

348 jcounts = Counter() 

349 for job_name in self: 

350 gwjob = self.get_job(job_name) 

351 jcounts[gwjob.label] += 1 

352 

353 # Final is separate 

354 final = self.get_final() 

355 if final: 

356 if isinstance(final, GenericWorkflow): 

357 for job_name in final: 

358 gwjob = final.get_job(job_name) 

359 jcounts[gwjob.label] += 1 

360 else: 

361 jcounts[final.label] += 1 

362 

363 return jcounts 

364 

365 def __iter__(self): 

366 """Return iterator of job names in topologically sorted order. 

367 """ 

368 return topological_sort(self) 

369 

370 def get_files(self, data=False, transfer_only=True): 

371 """Retrieve files from generic workflow. 

372 

373 Need API in case change way files are stored (e.g., make 

374 workflow a bipartite graph with jobs and files nodes). 

375 

376 Parameters 

377 ---------- 

378 data : `bool`, optional 

379 Whether to return the file data as well as the file object name. 

380 (The defaults is False.) 

381 transfer_only : `bool`, optional 

382 Whether to only return files for which a workflow management system 

383 would be responsible for transferring. 

384 

385 Returns 

386 ------- 

387 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

388 File names or objects from generic workflow meeting specifications. 

389 """ 

390 files = [] 

391 for filename, file in self._files.items(): 

392 if not transfer_only or file.wms_transfer: 

393 if not data: 

394 files.append(filename) 

395 else: 

396 files.append(file) 

397 return files 

398 

399 def add_job(self, job, parent_names=None, child_names=None): 

400 """Add job to generic workflow. 

401 

402 Parameters 

403 ---------- 

404 job : `lsst.ctrl.bps.GenericWorkflowJob` 

405 Job to add to the generic workflow. 

406 parent_names : `list` [`str`], optional 

407 Names of jobs that are parents of given job 

408 child_names : `list` [`str`], optional 

409 Names of jobs that are children of given job 

410 """ 

411 if not isinstance(job, GenericWorkflowJob): 

412 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

413 if self.has_node(job.name): 

414 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

415 super().add_node(job.name, job=job) 

416 self.add_job_relationships(parent_names, job.name) 

417 self.add_job_relationships(job.name, child_names) 

418 self.add_executable(job.executable) 

419 

420 def add_node(self, node_for_adding, **attr): 

421 """Override networkx function to call more specific add_job function. 

422 

423 Parameters 

424 ---------- 

425 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

426 Job to be added to generic workflow. 

427 attr : 

428 Needed to match original networkx function, but not used. 

429 """ 

430 self.add_job(node_for_adding) 

431 

432 def add_job_relationships(self, parents, children): 

433 """Add dependencies between parent and child jobs. All parents will 

434 be connected to all children. 

435 

436 Parameters 

437 ---------- 

438 parents : `list` [`str`] 

439 Parent job names. 

440 children : `list` [`str`] 

441 Children job names. 

442 """ 

443 if parents is not None and children is not None: 

444 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

445 

446 def add_edges_from(self, ebunch_to_add, **attr): 

447 """Add several edges between jobs in the generic workflow. 

448 

449 Parameters 

450 ---------- 

451 ebunch_to_add : Iterable [`tuple`] 

452 Iterable of job name pairs between which a dependency should be 

453 saved. 

454 attr : keyword arguments, optional 

455 Data can be assigned using keyword arguments (not currently used). 

456 """ 

457 for edge_to_add in ebunch_to_add: 

458 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

459 

460 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

461 """Add edge connecting jobs in workflow. 

462 

463 Parameters 

464 ---------- 

465 u_of_edge : `str` 

466 Name of parent job. 

467 v_of_edge : `str` 

468 Name of child job. 

469 attr : keyword arguments, optional 

470 Attributes to save with edge. 

471 """ 

472 if u_of_edge not in self: 

473 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

474 if v_of_edge not in self: 

475 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

476 super().add_edge(u_of_edge, v_of_edge, **attr) 

477 

478 def get_job(self, job_name: str): 

479 """Retrieve job by name from workflow. 

480 

481 Parameters 

482 ---------- 

483 job_name : `str` 

484 Name of job to retrieve. 

485 

486 Returns 

487 ------- 

488 job : `lsst.ctrl.bps.GenericWorkflowJob` 

489 Job matching given job_name. 

490 """ 

491 return self.nodes[job_name]["job"] 

492 

493 def del_job(self, job_name: str): 

494 """Delete job from generic workflow leaving connected graph. 

495 

496 Parameters 

497 ---------- 

498 job_name : `str` 

499 Name of job to delete from workflow. 

500 """ 

501 # Connect all parent jobs to all children jobs. 

502 parents = self.predecessors(job_name) 

503 children = self.successors(job_name) 

504 self.add_job_relationships(parents, children) 

505 

506 # Delete job node (which deleted edges). 

507 self.remove_node(job_name) 

508 

509 def add_job_inputs(self, job_name, files): 

510 """Add files as inputs to specified job. 

511 

512 Parameters 

513 ---------- 

514 job_name : `str` 

515 Name of job to which inputs should be added 

516 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

517 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

518 File object(s) to be added as inputs to the specified job. 

519 """ 

520 self._inputs.setdefault(job_name, []) 

521 for file in ensure_iterable(files): 

522 # Save the central copy 

523 if file.name not in self._files: 

524 self._files[file.name] = file 

525 

526 # Save the job reference to the file 

527 self._inputs[job_name].append(file) 

528 

529 def get_file(self, name): 

530 """Retrieve a file object by name. 

531 

532 Parameters 

533 ---------- 

534 name : `str` 

535 Name of file object 

536 

537 Returns 

538 ------- 

539 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

540 File matching given name. 

541 """ 

542 return self._files[name] 

543 

544 def add_file(self, gwfile): 

545 """Add file object. 

546 

547 Parameters 

548 ---------- 

549 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

550 File object to add to workflow 

551 """ 

552 if gwfile.name not in self._files: 

553 self._files[gwfile.name] = gwfile 

554 else: 

555 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

556 

557 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

558 """Return the input files for the given job. 

559 

560 Parameters 

561 ---------- 

562 job_name : `str` 

563 Name of the job. 

564 data : `bool`, optional 

565 Whether to return the file data as well as the file object name. 

566 transfer_only : `bool`, optional 

567 Whether to only return files for which a workflow management system 

568 would be responsible for transferring. 

569 

570 Returns 

571 ------- 

572 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

573 Input files for the given job. If no input files for the job, 

574 returns an empty list. 

575 """ 

576 inputs = [] 

577 if job_name in self._inputs: 

578 for gwfile in self._inputs[job_name]: 

579 if not transfer_only or gwfile.wms_transfer: 

580 if not data: 

581 inputs.append(gwfile.name) 

582 else: 

583 inputs.append(gwfile) 

584 return inputs 

585 

586 def add_job_outputs(self, job_name, files): 

587 """Add output files to a job. 

588 

589 Parameters 

590 ---------- 

591 job_name : `str` 

592 Name of job to which the files should be added as outputs. 

593 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

594 File objects to be added as outputs for specified job. 

595 """ 

596 self._outputs.setdefault(job_name, []) 

597 

598 for file_ in ensure_iterable(files): 

599 # Save the central copy 

600 if file_.name not in self._files: 

601 self._files[file_.name] = file_ 

602 

603 # Save the job reference to the file 

604 self._outputs[job_name].append(file_) 

605 

606 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

607 """Return the output files for the given job. 

608 

609 Parameters 

610 ---------- 

611 job_name : `str` 

612 Name of the job. 

613 data : `bool` 

614 Whether to return the file data as well as the file object name. 

615 It defaults to `True` thus returning file data as well. 

616 transfer_only : `bool` 

617 Whether to only return files for which a workflow management system 

618 would be responsible for transferring. It defaults to `False` thus 

619 returning all output files. 

620 

621 Returns 

622 ------- 

623 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

624 Output files for the given job. If no output files for the job, 

625 returns an empty list. 

626 """ 

627 outputs = [] 

628 

629 if job_name in self._outputs: 

630 for file_name in self._outputs[job_name]: 

631 file = self._files[file_name] 

632 if not transfer_only or file.wms_transfer: 

633 if not data: 

634 outputs.append(file_name) 

635 else: 

636 outputs.append(self._files[file_name]) 

637 return outputs 

638 

639 def draw(self, stream, format_="dot"): 

640 """Output generic workflow in a visualization format. 

641 

642 Parameters 

643 ---------- 

644 stream : `str` or `io.BufferedIOBase` 

645 Stream to which the visualization should be written. 

646 format_ : `str`, optional 

647 Which visualization format to use. It defaults to the format for 

648 the dot program. 

649 """ 

650 draw_funcs = {"dot": draw_networkx_dot} 

651 if format_ in draw_funcs: 

652 draw_funcs[format_](self, stream) 

653 else: 

654 raise RuntimeError(f"Unknown draw format ({format_}") 

655 

656 def save(self, stream, format_="pickle"): 

657 """Save the generic workflow in a format that is loadable. 

658 

659 Parameters 

660 ---------- 

661 stream : `str` or `io.BufferedIOBase` 

662 Stream to pass to the format-specific writer. Accepts anything 

663 that the writer accepts. 

664 

665 format_ : `str`, optional 

666 Format in which to write the data. It defaults to pickle format. 

667 """ 

668 if format_ == "pickle": 

669 write_gpickle(self, stream) 

670 else: 

671 raise RuntimeError(f"Unknown format ({format_})") 

672 

673 @classmethod 

674 def load(cls, stream, format_="pickle"): 

675 """Load a GenericWorkflow from the given stream 

676 

677 Parameters 

678 ---------- 

679 stream : `str` or `io.BufferedIOBase` 

680 Stream to pass to the format-specific loader. Accepts anything that 

681 the loader accepts. 

682 format_ : `str`, optional 

683 Format of data to expect when loading from stream. It defaults 

684 to pickle format. 

685 

686 Returns 

687 ------- 

688 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

689 Generic workflow loaded from the given stream 

690 """ 

691 if format_ == "pickle": 

692 return read_gpickle(stream) 

693 

694 raise RuntimeError(f"Unknown format ({format_})") 

695 

696 def validate(self): 

697 """Run checks to ensure this is still a valid generic workflow graph. 

698 """ 

699 # Make sure a directed acyclic graph 

700 assert is_directed_acyclic_graph(self) 

701 

702 def add_workflow_source(self, workflow): 

703 """Add given workflow as new source to this workflow. 

704 

705 Parameters 

706 ---------- 

707 workflow : `lsst.ctrl.bps.GenericWorkflow` 

708 """ 

709 # Find source nodes in self. 

710 self_sources = [n for n in self if self.in_degree(n) == 0] 

711 _LOG.debug("self_sources = %s", self_sources) 

712 

713 # Find sink nodes of workflow. 

714 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

715 _LOG.debug("new sinks = %s", new_sinks) 

716 

717 # Add new workflow nodes to self graph and make new edges. 

718 self.add_nodes_from(workflow.nodes(data=True)) 

719 self.add_edges_from(workflow.edges()) 

720 for source in self_sources: 

721 for sink in new_sinks: 

722 self.add_edge(sink, source) 

723 

724 # Files are stored separately so copy them. 

725 for job_name in workflow: 

726 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

727 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

728 self.add_executable(workflow.get_job(job_name).executable) 

729 

730 def add_final(self, final): 

731 """Add special final job/workflow to the generic workflow. 

732 

733 Parameters 

734 ---------- 

735 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

736 `lsst.ctrl.bps.GenericWorkflow` 

737 Information needed to execute the special final job(s), the 

738 job(s) to be executed after all jobs that can be executed 

739 have been executed regardless of exit status of any of the 

740 jobs. 

741 """ 

742 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

743 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

744 

745 self._final = final 

746 if isinstance(final, GenericWorkflowJob): 

747 self.add_executable(final.executable) 

748 

749 def get_final(self): 

750 """Return job/workflow to be executed after all jobs that can be 

751 executed have been executed regardless of exit status of any of 

752 the jobs. 

753 

754 Returns 

755 ------- 

756 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

757 `lsst.ctrl.bps.GenericWorkflow` 

758 Information needed to execute final job(s). 

759 """ 

760 return self._final 

761 

762 def add_executable(self, executable): 

763 """Add executable to workflow's list of executables. 

764 

765 Parameters 

766 ---------- 

767 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

768 Executable object to be added to workflow. 

769 """ 

770 if executable is not None: 

771 self._executables[executable.name] = executable 

772 else: 

773 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

774 

775 def get_executables(self, data=False, transfer_only=True): 

776 """Retrieve executables from generic workflow. 

777 

778 Parameters 

779 ---------- 

780 data : `bool`, optional 

781 Whether to return the executable data as well as the exec object 

782 name. (The defaults is False.) 

783 transfer_only : `bool`, optional 

784 Whether to only return executables for which transfer_executable 

785 is True. 

786 

787 Returns 

788 ------- 

789 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

790 Filtered executable names or objects from generic workflow. 

791 """ 

792 execs = [] 

793 for name, executable in self._executables.items(): 

794 if not transfer_only or executable.transfer_executable: 

795 if not data: 

796 execs.append(name) 

797 else: 

798 execs.append(executable) 

799 return execs