Coverage for python/lsst/ctrl/bps/generic_workflow.py: 36%

334 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-15 10:57 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

26 

27 

28import dataclasses 

29import itertools 

30import logging 

31import pickle 

32from collections import Counter, defaultdict 

33from typing import Optional 

34 

35from lsst.utils.iteration import ensure_iterable 

36from networkx import DiGraph, topological_sort 

37from networkx.algorithms.dag import is_directed_acyclic_graph 

38 

39from .bps_draw import draw_networkx_dot 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44@dataclasses.dataclass 

45class GenericWorkflowFile: 

46 """Information about a file that may be needed by various workflow 

47 management services. 

48 """ 

49 

50 name: str 

51 """Lookup key (logical file name) of file/directory. Must be unique 

52 within run. 

53 """ 

54 

55 src_uri: str or None # don't know that need ResourcePath 

56 """Original location of file/directory. 

57 """ 

58 

59 wms_transfer: bool 

60 """Whether the WMS should ignore file or not. Default is False. 

61 """ 

62 

63 job_access_remote: bool 

64 """Whether the job can remotely access file (using separately specified 

65 file access protocols). Default is False. 

66 """ 

67 

68 job_shared: bool 

69 """Whether job requires its own copy of this file. Default is False. 

70 """ 

71 

72 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

73 # values, so writing own __init__. 

74 def __init__( 

75 self, 

76 name: str, 

77 src_uri: str = None, 

78 wms_transfer: bool = False, 

79 job_access_remote: bool = False, 

80 job_shared: bool = False, 

81 ): 

82 self.name = name 

83 self.src_uri = src_uri 

84 self.wms_transfer = wms_transfer 

85 self.job_access_remote = job_access_remote 

86 self.job_shared = job_shared 

87 

88 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

89 

90 def __hash__(self): 

91 return hash(self.name) 

92 

93 

94@dataclasses.dataclass 

95class GenericWorkflowExec: 

96 """Information about an executable that may be needed by various workflow 

97 management services. 

98 """ 

99 

100 name: str 

101 """Lookup key (logical file name) of executable. Must be unique 

102 within run. 

103 """ 

104 

105 src_uri: str or None # don't know that need ResourcePath 

106 """Original location of executable. 

107 """ 

108 

109 transfer_executable: bool 

110 """Whether the WMS/plugin is responsible for staging executable to 

111 location usable by job. 

112 """ 

113 

114 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

115 # values, so writing own __init__. 

116 def __init__(self, name: str, src_uri: str = None, transfer_executable: bool = False): 

117 self.name = name 

118 self.src_uri = src_uri 

119 self.transfer_executable = transfer_executable 

120 

121 __slots__ = ("name", "src_uri", "transfer_executable") 

122 

123 def __hash__(self): 

124 return hash(self.name) 

125 

126 

127@dataclasses.dataclass 

128class GenericWorkflowJob: 

129 """Information about a job that may be needed by various workflow 

130 management services. 

131 """ 

132 

133 name: str 

134 """Name of job. Must be unique within workflow. 

135 """ 

136 

137 label: Optional[str] 

138 """Primary user-facing label for job. Does not need to be unique 

139 and may be used for summary reports. 

140 """ 

141 

142 quanta_counts: Optional[Counter] 

143 """Counts of quanta per task label in job. 

144 """ 

145 

146 tags: Optional[dict] 

147 """Other key/value pairs for job that user may want to use as a filter. 

148 """ 

149 

150 executable: Optional[GenericWorkflowExec] 

151 """Executable for job. 

152 """ 

153 

154 arguments: Optional[str] 

155 """Command line arguments for job. 

156 """ 

157 

158 cmdvals: Optional[dict] 

159 """Values for variables in cmdline when using lazy command line creation. 

160 """ 

161 

162 memory_multiplier: Optional[float] 

163 """Memory growth rate between retries. 

164 """ 

165 

166 request_memory: Optional[int] # MB 

167 """Max memory (in MB) that the job is expected to need. 

168 """ 

169 

170 request_memory_max: Optional[int] # MB 

171 """Max memory (in MB) that the job should ever use. 

172 """ 

173 

174 request_cpus: Optional[int] # cores 

175 """Max number of cpus that the job is expected to need. 

176 """ 

177 

178 request_disk: Optional[int] # MB 

179 """Max amount of job scratch disk (in MB) that the job is expected to need. 

180 """ 

181 

182 request_walltime: Optional[str] # minutes 

183 """Max amount of time (in seconds) that the job is expected to need. 

184 """ 

185 

186 compute_site: Optional[str] 

187 """Key to look up site-specific information for running the job. 

188 """ 

189 

190 accounting_group: Optional[str] 

191 """Name of the accounting group to use. 

192 """ 

193 

194 accounting_user: Optional[str] 

195 """Name of the user to use for accounting purposes. 

196 """ 

197 

198 mail_to: Optional[str] 

199 """Comma separated list of email addresses for emailing job status. 

200 """ 

201 

202 when_to_mail: Optional[str] 

203 """WMS-specific terminology for when to email job status. 

204 """ 

205 

206 number_of_retries: Optional[int] 

207 """Number of times to automatically retry a failed job. 

208 """ 

209 

210 retry_unless_exit: Optional[int] 

211 """Exit code for job that means to not automatically retry. 

212 """ 

213 

214 abort_on_value: Optional[int] 

215 """Job exit value for signals to abort the entire workflow. 

216 """ 

217 

218 abort_return_value: Optional[int] 

219 """Exit value to use when aborting the entire workflow. 

220 """ 

221 

222 priority: Optional[str] 

223 """Initial priority of job in WMS-format. 

224 """ 

225 

226 category: Optional[str] 

227 """WMS-facing label of job within single workflow (e.g., can be used for 

228 throttling jobs within a single workflow). 

229 """ 

230 

231 concurrency_limit: Optional[str] 

232 """Names of concurrency limits that the WMS plugin can appropriately 

233 translate to limit the number of this job across all running workflows. 

234 """ 

235 

236 queue: Optional[str] 

237 """Name of queue to use. Different WMS can translate this concept 

238 differently. 

239 """ 

240 

241 pre_cmdline: Optional[str] 

242 """Command line to be executed prior to executing job. 

243 """ 

244 

245 post_cmdline: Optional[str] 

246 """Command line to be executed after job executes. 

247 

248 Should be executed regardless of exit status. 

249 """ 

250 

251 preemptible: Optional[bool] 

252 """The flag indicating whether the job can be preempted. 

253 """ 

254 

255 profile: Optional[dict] 

256 """Nested dictionary of WMS-specific key/value pairs with primary key being 

257 WMS key (e.g., pegasus, condor, panda). 

258 """ 

259 

260 attrs: Optional[dict] 

261 """Key/value pairs of job attributes (for WMS that have attributes in 

262 addition to commands). 

263 """ 

264 

265 environment: Optional[dict] 

266 """Environment variable names and values to be explicitly set inside job. 

267 """ 

268 

269 compute_cloud: Optional[str] 

270 """Key to look up cloud-specific information for running the job. 

271 """ 

272 

273 # As of python 3.7.8, can't use __slots__ if give default values, so 

274 # writing own __init__. 

275 def __init__(self, name: str): 

276 self.name = name 

277 self.label = None 

278 self.quanta_counts = Counter() 

279 self.tags = {} 

280 self.executable = None 

281 self.arguments = None 

282 self.cmdvals = {} 

283 self.memory_multiplier = None 

284 self.request_memory = None 

285 self.request_memory_max = None 

286 self.request_cpus = None 

287 self.request_disk = None 

288 self.request_walltime = None 

289 self.compute_site = None 

290 self.accounting_group = None 

291 self.accounting_user = None 

292 self.mail_to = None 

293 self.when_to_mail = None 

294 self.number_of_retries = None 

295 self.retry_unless_exit = None 

296 self.abort_on_value = None 

297 self.abort_return_value = None 

298 self.priority = None 

299 self.category = None 

300 self.concurrency_limit = None 

301 self.queue = None 

302 self.pre_cmdline = None 

303 self.post_cmdline = None 

304 self.preemptible = None 

305 self.profile = {} 

306 self.attrs = {} 

307 self.environment = {} 

308 self.compute_cloud = None 

309 

310 __slots__ = ( 

311 "name", 

312 "label", 

313 "quanta_counts", 

314 "tags", 

315 "mail_to", 

316 "when_to_mail", 

317 "executable", 

318 "arguments", 

319 "cmdvals", 

320 "memory_multiplier", 

321 "request_memory", 

322 "request_memory_max", 

323 "request_cpus", 

324 "request_disk", 

325 "request_walltime", 

326 "number_of_retries", 

327 "retry_unless_exit", 

328 "abort_on_value", 

329 "abort_return_value", 

330 "compute_site", 

331 "accounting_group", 

332 "accounting_user", 

333 "environment", 

334 "priority", 

335 "category", 

336 "concurrency_limit", 

337 "queue", 

338 "pre_cmdline", 

339 "post_cmdline", 

340 "preemptible", 

341 "profile", 

342 "attrs", 

343 "compute_cloud", 

344 ) 

345 

346 def __hash__(self): 

347 return hash(self.name) 

348 

349 

350class GenericWorkflow(DiGraph): 

351 """A generic representation of a workflow used to submit to specific 

352 workflow management systems. 

353 

354 Parameters 

355 ---------- 

356 name : `str` 

357 Name of generic workflow. 

358 incoming_graph_data : `Any`, optional 

359 Data used to initialized graph that is passed through to DiGraph 

360 constructor. Can be any type supported by networkx.DiGraph. 

361 attr : `dict` 

362 Keyword arguments passed through to DiGraph constructor. 

363 """ 

364 

365 def __init__(self, name, incoming_graph_data=None, **attr): 

366 super().__init__(incoming_graph_data, **attr) 

367 self._name = name 

368 self.run_attrs = {} 

369 self._files = {} 

370 self._executables = {} 

371 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

372 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

373 self._labels = defaultdict(list) # mapping job label to list of GenericWorkflowJob 

374 self.run_id = None 

375 self._final = None 

376 

377 @property 

378 def name(self): 

379 """Retrieve name of generic workflow. 

380 

381 Returns 

382 ------- 

383 name : `str` 

384 Name of generic workflow. 

385 """ 

386 return self._name 

387 

388 @property 

389 def quanta_counts(self): 

390 """Count of quanta per task label (`collections.Counter`).""" 

391 qcounts = Counter() 

392 for job_name in self: 

393 gwjob = self.get_job(job_name) 

394 if gwjob.quanta_counts is not None: 

395 qcounts += gwjob.quanta_counts 

396 return qcounts 

397 

398 @property 

399 def labels(self): 

400 """List of job labels (`list` [`str`], read-only)""" 

401 return list(self._labels.keys()) 

402 

403 def regenerate_labels(self): 

404 """Regenerate the list of job labels.""" 

405 self._labels = defaultdict(list) 

406 for job_name in self: 

407 job = self.get_job(job_name) 

408 self._labels[job.label].append(job) 

409 

410 @property 

411 def job_counts(self): 

412 """Count of jobs per job label (`collections.Counter`).""" 

413 jcounts = Counter({label: len(jobs) for label, jobs in self._labels.items()}) 

414 

415 # Final is separate 

416 final = self.get_final() 

417 if final: 

418 if isinstance(final, GenericWorkflow): 

419 jcounts.update(final.job_counts()) 

420 else: 

421 jcounts[final.label] += 1 

422 

423 return jcounts 

424 

425 def __iter__(self): 

426 """Return iterator of job names in topologically sorted order.""" 

427 return topological_sort(self) 

428 

429 def get_files(self, data=False, transfer_only=True): 

430 """Retrieve files from generic workflow. 

431 

432 Need API in case change way files are stored (e.g., make 

433 workflow a bipartite graph with jobs and files nodes). 

434 

435 Parameters 

436 ---------- 

437 data : `bool`, optional 

438 Whether to return the file data as well as the file object name. 

439 (The defaults is False.) 

440 transfer_only : `bool`, optional 

441 Whether to only return files for which a workflow management system 

442 would be responsible for transferring. 

443 

444 Returns 

445 ------- 

446 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

447 File names or objects from generic workflow meeting specifications. 

448 """ 

449 files = [] 

450 for filename, file in self._files.items(): 

451 if not transfer_only or file.wms_transfer: 

452 if not data: 

453 files.append(filename) 

454 else: 

455 files.append(file) 

456 return files 

457 

458 def add_job(self, job, parent_names=None, child_names=None): 

459 """Add job to generic workflow. 

460 

461 Parameters 

462 ---------- 

463 job : `lsst.ctrl.bps.GenericWorkflowJob` 

464 Job to add to the generic workflow. 

465 parent_names : `list` [`str`], optional 

466 Names of jobs that are parents of given job 

467 child_names : `list` [`str`], optional 

468 Names of jobs that are children of given job 

469 """ 

470 if not isinstance(job, GenericWorkflowJob): 

471 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

472 if self.has_node(job.name): 

473 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

474 super().add_node(job.name, job=job) 

475 self.add_job_relationships(parent_names, job.name) 

476 self.add_job_relationships(job.name, child_names) 

477 self.add_executable(job.executable) 

478 self._labels[job.label].append(job) 

479 

480 def add_node(self, node_for_adding, **attr): 

481 """Override networkx function to call more specific add_job function. 

482 

483 Parameters 

484 ---------- 

485 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

486 Job to be added to generic workflow. 

487 attr : 

488 Needed to match original networkx function, but not used. 

489 """ 

490 self.add_job(node_for_adding) 

491 

492 def add_job_relationships(self, parents, children): 

493 """Add dependencies between parent and child jobs. All parents will 

494 be connected to all children. 

495 

496 Parameters 

497 ---------- 

498 parents : `list` [`str`] 

499 Parent job names. 

500 children : `list` [`str`] 

501 Children job names. 

502 """ 

503 if parents is not None and children is not None: 

504 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

505 

506 def add_edges_from(self, ebunch_to_add, **attr): 

507 """Add several edges between jobs in the generic workflow. 

508 

509 Parameters 

510 ---------- 

511 ebunch_to_add : Iterable [`tuple`] 

512 Iterable of job name pairs between which a dependency should be 

513 saved. 

514 attr : keyword arguments, optional 

515 Data can be assigned using keyword arguments (not currently used). 

516 """ 

517 for edge_to_add in ebunch_to_add: 

518 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

519 

520 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

521 """Add edge connecting jobs in workflow. 

522 

523 Parameters 

524 ---------- 

525 u_of_edge : `str` 

526 Name of parent job. 

527 v_of_edge : `str` 

528 Name of child job. 

529 attr : keyword arguments, optional 

530 Attributes to save with edge. 

531 """ 

532 if u_of_edge not in self: 

533 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

534 if v_of_edge not in self: 

535 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

536 super().add_edge(u_of_edge, v_of_edge, **attr) 

537 

538 def get_job(self, job_name: str): 

539 """Retrieve job by name from workflow. 

540 

541 Parameters 

542 ---------- 

543 job_name : `str` 

544 Name of job to retrieve. 

545 

546 Returns 

547 ------- 

548 job : `lsst.ctrl.bps.GenericWorkflowJob` 

549 Job matching given job_name. 

550 """ 

551 return self.nodes[job_name]["job"] 

552 

553 def del_job(self, job_name: str): 

554 """Delete job from generic workflow leaving connected graph. 

555 

556 Parameters 

557 ---------- 

558 job_name : `str` 

559 Name of job to delete from workflow. 

560 """ 

561 job = self.get_job(job_name) 

562 self._labels[job.label].remove(job) 

563 # Don't leave keys around if removed last job 

564 if not self._labels[job.label]: 

565 del self._labels[job.label] 

566 

567 # Connect all parent jobs to all children jobs. 

568 parents = self.predecessors(job_name) 

569 children = self.successors(job_name) 

570 self.add_job_relationships(parents, children) 

571 

572 # Delete job node (which deletes edges). 

573 self.remove_node(job_name) 

574 

575 def add_job_inputs(self, job_name, files): 

576 """Add files as inputs to specified job. 

577 

578 Parameters 

579 ---------- 

580 job_name : `str` 

581 Name of job to which inputs should be added 

582 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

583 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

584 File object(s) to be added as inputs to the specified job. 

585 """ 

586 self._inputs.setdefault(job_name, []) 

587 for file in ensure_iterable(files): 

588 # Save the central copy 

589 if file.name not in self._files: 

590 self._files[file.name] = file 

591 

592 # Save the job reference to the file 

593 self._inputs[job_name].append(file) 

594 

595 def get_file(self, name): 

596 """Retrieve a file object by name. 

597 

598 Parameters 

599 ---------- 

600 name : `str` 

601 Name of file object 

602 

603 Returns 

604 ------- 

605 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

606 File matching given name. 

607 """ 

608 return self._files[name] 

609 

610 def add_file(self, gwfile): 

611 """Add file object. 

612 

613 Parameters 

614 ---------- 

615 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

616 File object to add to workflow 

617 """ 

618 if gwfile.name not in self._files: 

619 self._files[gwfile.name] = gwfile 

620 else: 

621 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

622 

623 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

624 """Return the input files for the given job. 

625 

626 Parameters 

627 ---------- 

628 job_name : `str` 

629 Name of the job. 

630 data : `bool`, optional 

631 Whether to return the file data as well as the file object name. 

632 transfer_only : `bool`, optional 

633 Whether to only return files for which a workflow management system 

634 would be responsible for transferring. 

635 

636 Returns 

637 ------- 

638 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

639 Input files for the given job. If no input files for the job, 

640 returns an empty list. 

641 """ 

642 inputs = [] 

643 if job_name in self._inputs: 

644 for gwfile in self._inputs[job_name]: 

645 if not transfer_only or gwfile.wms_transfer: 

646 if not data: 

647 inputs.append(gwfile.name) 

648 else: 

649 inputs.append(gwfile) 

650 return inputs 

651 

652 def add_job_outputs(self, job_name, files): 

653 """Add output files to a job. 

654 

655 Parameters 

656 ---------- 

657 job_name : `str` 

658 Name of job to which the files should be added as outputs. 

659 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

660 File objects to be added as outputs for specified job. 

661 """ 

662 self._outputs.setdefault(job_name, []) 

663 

664 for file_ in ensure_iterable(files): 

665 # Save the central copy 

666 if file_.name not in self._files: 

667 self._files[file_.name] = file_ 

668 

669 # Save the job reference to the file 

670 self._outputs[job_name].append(file_) 

671 

672 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

673 """Return the output files for the given job. 

674 

675 Parameters 

676 ---------- 

677 job_name : `str` 

678 Name of the job. 

679 data : `bool` 

680 Whether to return the file data as well as the file object name. 

681 It defaults to `True` thus returning file data as well. 

682 transfer_only : `bool` 

683 Whether to only return files for which a workflow management system 

684 would be responsible for transferring. It defaults to `False` thus 

685 returning all output files. 

686 

687 Returns 

688 ------- 

689 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

690 Output files for the given job. If no output files for the job, 

691 returns an empty list. 

692 """ 

693 outputs = [] 

694 

695 if job_name in self._outputs: 

696 for file_name in self._outputs[job_name]: 

697 file = self._files[file_name] 

698 if not transfer_only or file.wms_transfer: 

699 if not data: 

700 outputs.append(file_name) 

701 else: 

702 outputs.append(self._files[file_name]) 

703 return outputs 

704 

705 def draw(self, stream, format_="dot"): 

706 """Output generic workflow in a visualization format. 

707 

708 Parameters 

709 ---------- 

710 stream : `str` or `io.BufferedIOBase` 

711 Stream to which the visualization should be written. 

712 format_ : `str`, optional 

713 Which visualization format to use. It defaults to the format for 

714 the dot program. 

715 """ 

716 draw_funcs = {"dot": draw_networkx_dot} 

717 if format_ in draw_funcs: 

718 draw_funcs[format_](self, stream) 

719 else: 

720 raise RuntimeError(f"Unknown draw format ({format_}") 

721 

722 def save(self, stream, format_="pickle"): 

723 """Save the generic workflow in a format that is loadable. 

724 

725 Parameters 

726 ---------- 

727 stream : `str` or `io.BufferedIOBase` 

728 Stream to pass to the format-specific writer. Accepts anything 

729 that the writer accepts. 

730 

731 format_ : `str`, optional 

732 Format in which to write the data. It defaults to pickle format. 

733 """ 

734 if format_ == "pickle": 

735 pickle.dump(self, stream) 

736 else: 

737 raise RuntimeError(f"Unknown format ({format_})") 

738 

739 @classmethod 

740 def load(cls, stream, format_="pickle"): 

741 """Load a GenericWorkflow from the given stream 

742 

743 Parameters 

744 ---------- 

745 stream : `str` or `io.BufferedIOBase` 

746 Stream to pass to the format-specific loader. Accepts anything that 

747 the loader accepts. 

748 format_ : `str`, optional 

749 Format of data to expect when loading from stream. It defaults 

750 to pickle format. 

751 

752 Returns 

753 ------- 

754 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

755 Generic workflow loaded from the given stream 

756 """ 

757 if format_ == "pickle": 

758 return pickle.load(stream) 

759 

760 raise RuntimeError(f"Unknown format ({format_})") 

761 

762 def validate(self): 

763 """Run checks to ensure that the generic workflow graph is valid.""" 

764 # Make sure a directed acyclic graph 

765 assert is_directed_acyclic_graph(self) 

766 

767 def add_workflow_source(self, workflow): 

768 """Add given workflow as new source to this workflow. 

769 

770 Parameters 

771 ---------- 

772 workflow : `lsst.ctrl.bps.GenericWorkflow` 

773 """ 

774 # Find source nodes in self. 

775 self_sources = [n for n in self if self.in_degree(n) == 0] 

776 _LOG.debug("self_sources = %s", self_sources) 

777 

778 # Find sink nodes of workflow. 

779 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

780 _LOG.debug("new sinks = %s", new_sinks) 

781 

782 # Add new workflow nodes to self graph and make new edges. 

783 self.add_nodes_from(workflow.nodes(data=True)) 

784 self.add_edges_from(workflow.edges()) 

785 for source in self_sources: 

786 for sink in new_sinks: 

787 self.add_edge(sink, source) 

788 

789 # Files are stored separately so copy them. 

790 for job_name in workflow: 

791 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

792 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

793 self.add_executable(workflow.get_job(job_name).executable) 

794 

795 # Note: label ordering inferred from dict order 

796 # so adding given source workflow first 

797 labels = defaultdict(list) 

798 for label in workflow._labels: 

799 labels[label] = workflow._labels[label] 

800 for label in self._labels: 

801 labels[label] = self._labels[label] 

802 self._labels = labels 

803 

804 def add_final(self, final): 

805 """Add special final job/workflow to the generic workflow. 

806 

807 Parameters 

808 ---------- 

809 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

810 `lsst.ctrl.bps.GenericWorkflow` 

811 Information needed to execute the special final job(s), the 

812 job(s) to be executed after all jobs that can be executed 

813 have been executed regardless of exit status of any of the 

814 jobs. 

815 """ 

816 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

817 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

818 

819 self._final = final 

820 if isinstance(final, GenericWorkflowJob): 

821 self.add_executable(final.executable) 

822 

823 def get_final(self): 

824 """Return job/workflow to be executed after all jobs that can be 

825 executed have been executed regardless of exit status of any of 

826 the jobs. 

827 

828 Returns 

829 ------- 

830 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

831 `lsst.ctrl.bps.GenericWorkflow` 

832 Information needed to execute final job(s). 

833 """ 

834 return self._final 

835 

836 def add_executable(self, executable): 

837 """Add executable to workflow's list of executables. 

838 

839 Parameters 

840 ---------- 

841 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

842 Executable object to be added to workflow. 

843 """ 

844 if executable is not None: 

845 self._executables[executable.name] = executable 

846 else: 

847 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

848 

849 def get_executables(self, data=False, transfer_only=True): 

850 """Retrieve executables from generic workflow. 

851 

852 Parameters 

853 ---------- 

854 data : `bool`, optional 

855 Whether to return the executable data as well as the exec object 

856 name. (The defaults is False.) 

857 transfer_only : `bool`, optional 

858 Whether to only return executables for which transfer_executable 

859 is True. 

860 

861 Returns 

862 ------- 

863 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

864 Filtered executable names or objects from generic workflow. 

865 """ 

866 execs = [] 

867 for name, executable in self._executables.items(): 

868 if not transfer_only or executable.transfer_executable: 

869 if not data: 

870 execs.append(name) 

871 else: 

872 execs.append(executable) 

873 return execs 

874 

875 def get_jobs_by_label(self, label: str): 

876 """Retrieve jobs by label from workflow. 

877 

878 Parameters 

879 ---------- 

880 label : `str` 

881 Label of jobs to retrieve. 

882 

883 Returns 

884 ------- 

885 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`] 

886 Jobs having given label. 

887 """ 

888 return self._labels[label]