Coverage for python/lsst/ctrl/bps/generic_workflow.py: 39%

318 statements  

« prev     ^ index     » next       coverage.py v7.4.2, created at 2024-02-22 11:13 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Class definitions for a Generic Workflow Graph. 

29""" 

30 

31__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob", "GenericWorkflowExec"] 

32 

33 

34import dataclasses 

35import itertools 

36import logging 

37import pickle 

38from collections import Counter, defaultdict 

39 

40from lsst.utils.iteration import ensure_iterable 

41from networkx import DiGraph, topological_sort 

42from networkx.algorithms.dag import is_directed_acyclic_graph 

43 

44from .bps_draw import draw_networkx_dot 

45 

46_LOG = logging.getLogger(__name__) 

47 

48 

49@dataclasses.dataclass(slots=True) 

50class GenericWorkflowFile: 

51 """Information about a file that may be needed by various workflow 

52 management services. 

53 """ 

54 

55 name: str 

56 """Lookup key (logical file name) of file/directory. Must be unique 

57 within run. 

58 """ 

59 

60 src_uri: str | None = None # don't know that need ResourcePath 

61 """Original location of file/directory. 

62 """ 

63 

64 wms_transfer: bool = False 

65 """Whether the WMS should ignore file or not. Default is False. 

66 """ 

67 

68 job_access_remote: bool = False 

69 """Whether the job can remotely access file (using separately specified 

70 file access protocols). Default is False. 

71 """ 

72 

73 job_shared: bool = False 

74 """Whether job requires its own copy of this file. Default is False. 

75 """ 

76 

77 def __hash__(self): 

78 return hash(self.name) 

79 

80 

81@dataclasses.dataclass(slots=True) 

82class GenericWorkflowExec: 

83 """Information about an executable that may be needed by various workflow 

84 management services. 

85 """ 

86 

87 name: str 

88 """Lookup key (logical file name) of executable. Must be unique 

89 within run. 

90 """ 

91 

92 src_uri: str or None = None # don't know that need ResourcePath 

93 """Original location of executable. 

94 """ 

95 

96 transfer_executable: bool = False 

97 """Whether the WMS/plugin is responsible for staging executable to 

98 location usable by job. 

99 """ 

100 

101 def __hash__(self): 

102 return hash(self.name) 

103 

104 

105@dataclasses.dataclass(slots=True) 

106class GenericWorkflowJob: 

107 """Information about a job that may be needed by various workflow 

108 management services. 

109 """ 

110 

111 name: str 

112 """Name of job. Must be unique within workflow. 

113 """ 

114 

115 label: str = "UNK" 

116 """Primary user-facing label for job. Does not need to be unique 

117 and may be used for summary reports. 

118 """ 

119 

120 quanta_counts: Counter = dataclasses.field(default_factory=Counter) 

121 """Counts of quanta per task label in job. 

122 """ 

123 

124 tags: dict = dataclasses.field(default_factory=dict) 

125 """Other key/value pairs for job that user may want to use as a filter. 

126 """ 

127 

128 executable: GenericWorkflowExec | None = None 

129 """Executable for job. 

130 """ 

131 

132 arguments: str | None = None 

133 """Command line arguments for job. 

134 """ 

135 

136 cmdvals: dict = dataclasses.field(default_factory=dict) 

137 """Values for variables in cmdline when using lazy command line creation. 

138 """ 

139 

140 memory_multiplier: float | None = None 

141 """Memory growth rate between retries. 

142 """ 

143 

144 request_memory: int | None = None # MB 

145 """Max memory (in MB) that the job is expected to need. 

146 """ 

147 

148 request_memory_max: int | None = None # MB 

149 """Max memory (in MB) that the job should ever use. 

150 """ 

151 

152 request_cpus: int | None = None # cores 

153 """Max number of cpus that the job is expected to need. 

154 """ 

155 

156 request_disk: int | None = None # MB 

157 """Max amount of job scratch disk (in MB) that the job is expected to need. 

158 """ 

159 

160 request_walltime: str | None = None # minutes 

161 """Max amount of time (in seconds) that the job is expected to need. 

162 """ 

163 

164 compute_site: str | None = None 

165 """Key to look up site-specific information for running the job. 

166 """ 

167 

168 accounting_group: str | None = None 

169 """Name of the accounting group to use. 

170 """ 

171 

172 accounting_user: str | None = None 

173 """Name of the user to use for accounting purposes. 

174 """ 

175 

176 mail_to: str | None = None 

177 """Comma separated list of email addresses for emailing job status. 

178 """ 

179 

180 when_to_mail: str | None = None 

181 """WMS-specific terminology for when to email job status. 

182 """ 

183 

184 number_of_retries: int | None = None 

185 """Number of times to automatically retry a failed job. 

186 """ 

187 

188 retry_unless_exit: int | None = None 

189 """Exit code for job that means to not automatically retry. 

190 """ 

191 

192 abort_on_value: int | None = None 

193 """Job exit value for signals to abort the entire workflow. 

194 """ 

195 

196 abort_return_value: int | None = None 

197 """Exit value to use when aborting the entire workflow. 

198 """ 

199 

200 priority: str | None = None 

201 """Initial priority of job in WMS-format. 

202 """ 

203 

204 category: str | None = None 

205 """WMS-facing label of job within single workflow (e.g., can be used for 

206 throttling jobs within a single workflow). 

207 """ 

208 

209 concurrency_limit: str | None = None 

210 """Names of concurrency limits that the WMS plugin can appropriately 

211 translate to limit the number of this job across all running workflows. 

212 """ 

213 

214 queue: str | None = None 

215 """Name of queue to use. Different WMS can translate this concept 

216 differently. 

217 """ 

218 

219 pre_cmdline: str | None = None 

220 """Command line to be executed prior to executing job. 

221 """ 

222 

223 post_cmdline: str | None = None 

224 """Command line to be executed after job executes. 

225 

226 Should be executed regardless of exit status. 

227 """ 

228 

229 preemptible: bool | None = None 

230 """The flag indicating whether the job can be preempted. 

231 """ 

232 

233 profile: dict = dataclasses.field(default_factory=dict) 

234 """Nested dictionary of WMS-specific key/value pairs with primary key being 

235 WMS key (e.g., pegasus, condor, panda). 

236 """ 

237 

238 attrs: dict = dataclasses.field(default_factory=dict) 

239 """Key/value pairs of job attributes (for WMS that have attributes in 

240 addition to commands). 

241 """ 

242 

243 environment: dict = dataclasses.field(default_factory=dict) 

244 """Environment variable names and values to be explicitly set inside job. 

245 """ 

246 

247 compute_cloud: str | None = None 

248 """Key to look up cloud-specific information for running the job. 

249 """ 

250 

251 def __hash__(self): 

252 return hash(self.name) 

253 

254 

255class GenericWorkflow(DiGraph): 

256 """A generic representation of a workflow used to submit to specific 

257 workflow management systems. 

258 

259 Parameters 

260 ---------- 

261 name : `str` 

262 Name of generic workflow. 

263 incoming_graph_data : `Any`, optional 

264 Data used to initialized graph that is passed through to DiGraph 

265 constructor. Can be any type supported by networkx.DiGraph. 

266 **attr : `dict` 

267 Keyword arguments passed through to DiGraph constructor. 

268 """ 

269 

270 def __init__(self, name, incoming_graph_data=None, **attr): 

271 super().__init__(incoming_graph_data, **attr) 

272 self._name = name 

273 self.run_attrs = {} 

274 self._job_labels = GenericWorkflowLabels() 

275 self._files = {} 

276 self._executables = {} 

277 self._inputs = {} # mapping job.names to list of GenericWorkflowFile 

278 self._outputs = {} # mapping job.names to list of GenericWorkflowFile 

279 self.run_id = None 

280 self._final = None 

281 

282 @property 

283 def name(self): 

284 """Retrieve name of generic workflow. 

285 

286 Returns 

287 ------- 

288 name : `str` 

289 Name of generic workflow. 

290 """ 

291 return self._name 

292 

293 @property 

294 def quanta_counts(self): 

295 """Count of quanta per task label (`collections.Counter`).""" 

296 qcounts = Counter() 

297 for job_name in self: 

298 gwjob = self.get_job(job_name) 

299 if gwjob.quanta_counts is not None: 

300 qcounts += gwjob.quanta_counts 

301 return qcounts 

302 

303 @property 

304 def labels(self): 

305 """Job labels (`list` [`str`], read-only).""" 

306 return self._job_labels.labels 

307 

308 def regenerate_labels(self): 

309 """Regenerate the list of job labels.""" 

310 self._job_labels = GenericWorkflowLabels() 

311 for job_name in self: 

312 job = self.get_job(job_name) 

313 self._job_labels.add_job( 

314 job, 

315 [self.get_job(p).label for p in self.predecessors(job.name)], 

316 [self.get_job(p).label for p in self.successors(job.name)], 

317 ) 

318 

319 @property 

320 def job_counts(self): 

321 """Count of jobs per job label (`collections.Counter`).""" 

322 jcounts = self._job_labels.job_counts 

323 

324 # Final is separate 

325 final = self.get_final() 

326 if final: 

327 if isinstance(final, GenericWorkflow): 

328 jcounts.update(final.job_counts) 

329 else: 

330 jcounts[final.label] += 1 

331 

332 return jcounts 

333 

334 def __iter__(self): 

335 """Return iterator of job names in topologically sorted order.""" 

336 return topological_sort(self) 

337 

338 def get_files(self, data=False, transfer_only=True): 

339 """Retrieve files from generic workflow. 

340 

341 Need API in case change way files are stored (e.g., make 

342 workflow a bipartite graph with jobs and files nodes). 

343 

344 Parameters 

345 ---------- 

346 data : `bool`, optional 

347 Whether to return the file data as well as the file object name 

348 (The defaults is False). 

349 transfer_only : `bool`, optional 

350 Whether to only return files for which a workflow management system 

351 would be responsible for transferring. 

352 

353 Returns 

354 ------- 

355 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

356 File names or objects from generic workflow meeting specifications. 

357 """ 

358 files = [] 

359 for filename, file in self._files.items(): 

360 if not transfer_only or file.wms_transfer: 

361 if not data: 

362 files.append(filename) 

363 else: 

364 files.append(file) 

365 return files 

366 

367 def add_job(self, job, parent_names=None, child_names=None): 

368 """Add job to generic workflow. 

369 

370 Parameters 

371 ---------- 

372 job : `lsst.ctrl.bps.GenericWorkflowJob` 

373 Job to add to the generic workflow. 

374 parent_names : `list` [`str`], optional 

375 Names of jobs that are parents of given job. 

376 child_names : `list` [`str`], optional 

377 Names of jobs that are children of given job. 

378 """ 

379 _LOG.debug("job: %s (%s)", job.name, job.label) 

380 _LOG.debug("parent_names: %s", parent_names) 

381 _LOG.debug("child_names: %s", child_names) 

382 if not isinstance(job, GenericWorkflowJob): 

383 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

384 if self.has_node(job.name): 

385 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

386 super().add_node(job.name, job=job) 

387 self.add_job_relationships(parent_names, job.name) 

388 self.add_job_relationships(job.name, child_names) 

389 self.add_executable(job.executable) 

390 self._job_labels.add_job( 

391 job, 

392 [self.get_job(p).label for p in self.predecessors(job.name)], 

393 [self.get_job(p).label for p in self.successors(job.name)], 

394 ) 

395 

396 def add_node(self, node_for_adding, **attr): 

397 """Override networkx function to call more specific add_job function. 

398 

399 Parameters 

400 ---------- 

401 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

402 Job to be added to generic workflow. 

403 **attr 

404 Needed to match original networkx function, but not used. 

405 """ 

406 self.add_job(node_for_adding) 

407 

408 def add_job_relationships(self, parents, children): 

409 """Add dependencies between parent and child jobs. All parents will 

410 be connected to all children. 

411 

412 Parameters 

413 ---------- 

414 parents : `list` [`str`] 

415 Parent job names. 

416 children : `list` [`str`] 

417 Children job names. 

418 """ 

419 if parents is not None and children is not None: 

420 self.add_edges_from(itertools.product(ensure_iterable(parents), ensure_iterable(children))) 

421 self._job_labels.add_job_relationships( 

422 [self.get_job(n).label for n in ensure_iterable(parents)], 

423 [self.get_job(n).label for n in ensure_iterable(children)], 

424 ) 

425 

426 def add_edges_from(self, ebunch_to_add, **attr): 

427 """Add several edges between jobs in the generic workflow. 

428 

429 Parameters 

430 ---------- 

431 ebunch_to_add : Iterable [`tuple`] 

432 Iterable of job name pairs between which a dependency should be 

433 saved. 

434 **attr : keyword arguments, optional 

435 Data can be assigned using keyword arguments (not currently used). 

436 """ 

437 for edge_to_add in ebunch_to_add: 

438 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

439 

440 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

441 """Add edge connecting jobs in workflow. 

442 

443 Parameters 

444 ---------- 

445 u_of_edge : `str` 

446 Name of parent job. 

447 v_of_edge : `str` 

448 Name of child job. 

449 **attr : keyword arguments, optional 

450 Attributes to save with edge. 

451 """ 

452 if u_of_edge not in self: 

453 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

454 if v_of_edge not in self: 

455 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

456 super().add_edge(u_of_edge, v_of_edge, **attr) 

457 

458 def get_job(self, job_name: str): 

459 """Retrieve job by name from workflow. 

460 

461 Parameters 

462 ---------- 

463 job_name : `str` 

464 Name of job to retrieve. 

465 

466 Returns 

467 ------- 

468 job : `lsst.ctrl.bps.GenericWorkflowJob` 

469 Job matching given job_name. 

470 """ 

471 return self.nodes[job_name]["job"] 

472 

473 def del_job(self, job_name: str): 

474 """Delete job from generic workflow leaving connected graph. 

475 

476 Parameters 

477 ---------- 

478 job_name : `str` 

479 Name of job to delete from workflow. 

480 """ 

481 job = self.get_job(job_name) 

482 

483 # Remove from job labels 

484 self._job_labels.del_job(job) 

485 

486 # Connect all parent jobs to all children jobs. 

487 parents = self.predecessors(job_name) 

488 children = self.successors(job_name) 

489 self.add_job_relationships(parents, children) 

490 

491 # Delete job node (which deletes edges). 

492 self.remove_node(job_name) 

493 

494 def add_job_inputs(self, job_name, files): 

495 """Add files as inputs to specified job. 

496 

497 Parameters 

498 ---------- 

499 job_name : `str` 

500 Name of job to which inputs should be added. 

501 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

502 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

503 File object(s) to be added as inputs to the specified job. 

504 """ 

505 self._inputs.setdefault(job_name, []) 

506 for file in ensure_iterable(files): 

507 # Save the central copy 

508 if file.name not in self._files: 

509 self._files[file.name] = file 

510 

511 # Save the job reference to the file 

512 self._inputs[job_name].append(file) 

513 

514 def get_file(self, name): 

515 """Retrieve a file object by name. 

516 

517 Parameters 

518 ---------- 

519 name : `str` 

520 Name of file object. 

521 

522 Returns 

523 ------- 

524 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

525 File matching given name. 

526 """ 

527 return self._files[name] 

528 

529 def add_file(self, gwfile): 

530 """Add file object. 

531 

532 Parameters 

533 ---------- 

534 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

535 File object to add to workflow. 

536 """ 

537 if gwfile.name not in self._files: 

538 self._files[gwfile.name] = gwfile 

539 else: 

540 _LOG.debug("Skipped add_file for existing file %s", gwfile.name) 

541 

542 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

543 """Return the input files for the given job. 

544 

545 Parameters 

546 ---------- 

547 job_name : `str` 

548 Name of the job. 

549 data : `bool`, optional 

550 Whether to return the file data as well as the file object name. 

551 transfer_only : `bool`, optional 

552 Whether to only return files for which a workflow management system 

553 would be responsible for transferring. 

554 

555 Returns 

556 ------- 

557 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

558 Input files for the given job. If no input files for the job, 

559 returns an empty list. 

560 """ 

561 inputs = [] 

562 if job_name in self._inputs: 

563 for gwfile in self._inputs[job_name]: 

564 if not transfer_only or gwfile.wms_transfer: 

565 if not data: 

566 inputs.append(gwfile.name) 

567 else: 

568 inputs.append(gwfile) 

569 return inputs 

570 

571 def add_job_outputs(self, job_name, files): 

572 """Add output files to a job. 

573 

574 Parameters 

575 ---------- 

576 job_name : `str` 

577 Name of job to which the files should be added as outputs. 

578 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

579 File objects to be added as outputs for specified job. 

580 """ 

581 self._outputs.setdefault(job_name, []) 

582 

583 for file_ in ensure_iterable(files): 

584 # Save the central copy 

585 if file_.name not in self._files: 

586 self._files[file_.name] = file_ 

587 

588 # Save the job reference to the file 

589 self._outputs[job_name].append(file_) 

590 

591 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

592 """Return the output files for the given job. 

593 

594 Parameters 

595 ---------- 

596 job_name : `str` 

597 Name of the job. 

598 data : `bool` 

599 Whether to return the file data as well as the file object name. 

600 It defaults to `True` thus returning file data as well. 

601 transfer_only : `bool` 

602 Whether to only return files for which a workflow management system 

603 would be responsible for transferring. It defaults to `False` thus 

604 returning all output files. 

605 

606 Returns 

607 ------- 

608 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

609 Output files for the given job. If no output files for the job, 

610 returns an empty list. 

611 """ 

612 outputs = [] 

613 

614 if job_name in self._outputs: 

615 for file_name in self._outputs[job_name]: 

616 file = self._files[file_name] 

617 if not transfer_only or file.wms_transfer: 

618 if not data: 

619 outputs.append(file_name) 

620 else: 

621 outputs.append(self._files[file_name]) 

622 return outputs 

623 

624 def draw(self, stream, format_="dot"): 

625 """Output generic workflow in a visualization format. 

626 

627 Parameters 

628 ---------- 

629 stream : `str` or `io.BufferedIOBase` 

630 Stream to which the visualization should be written. 

631 format_ : `str`, optional 

632 Which visualization format to use. It defaults to the format for 

633 the dot program. 

634 """ 

635 draw_funcs = {"dot": draw_networkx_dot} 

636 if format_ in draw_funcs: 

637 draw_funcs[format_](self, stream) 

638 else: 

639 raise RuntimeError(f"Unknown draw format ({format_}") 

640 

641 def save(self, stream, format_="pickle"): 

642 """Save the generic workflow in a format that is loadable. 

643 

644 Parameters 

645 ---------- 

646 stream : `str` or `io.BufferedIOBase` 

647 Stream to pass to the format-specific writer. Accepts anything 

648 that the writer accepts. 

649 format_ : `str`, optional 

650 Format in which to write the data. It defaults to pickle format. 

651 """ 

652 if format_ == "pickle": 

653 pickle.dump(self, stream) 

654 else: 

655 raise RuntimeError(f"Unknown format ({format_})") 

656 

657 @classmethod 

658 def load(cls, stream, format_="pickle"): 

659 """Load a GenericWorkflow from the given stream. 

660 

661 Parameters 

662 ---------- 

663 stream : `str` or `io.BufferedIOBase` 

664 Stream to pass to the format-specific loader. Accepts anything that 

665 the loader accepts. 

666 format_ : `str`, optional 

667 Format of data to expect when loading from stream. It defaults 

668 to pickle format. 

669 

670 Returns 

671 ------- 

672 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

673 Generic workflow loaded from the given stream. 

674 """ 

675 if format_ == "pickle": 

676 return pickle.load(stream) 

677 

678 raise RuntimeError(f"Unknown format ({format_})") 

679 

680 def validate(self): 

681 """Run checks to ensure that the generic workflow graph is valid.""" 

682 # Make sure a directed acyclic graph 

683 assert is_directed_acyclic_graph(self) 

684 

685 def add_workflow_source(self, workflow): 

686 """Add given workflow as new source to this workflow. 

687 

688 Parameters 

689 ---------- 

690 workflow : `lsst.ctrl.bps.GenericWorkflow` 

691 The given workflow. 

692 """ 

693 # Find source nodes in self. 

694 self_sources = [n for n in self if self.in_degree(n) == 0] 

695 _LOG.debug("self_sources = %s", self_sources) 

696 

697 # Find sink nodes of workflow. 

698 new_sinks = [n for n in workflow if workflow.out_degree(n) == 0] 

699 _LOG.debug("new sinks = %s", new_sinks) 

700 

701 # Add new workflow nodes to self graph and make new edges. 

702 self.add_nodes_from(workflow.nodes(data=True)) 

703 self.add_edges_from(workflow.edges()) 

704 for source in self_sources: 

705 for sink in new_sinks: 

706 self.add_edge(sink, source) 

707 

708 # Add separately stored info 

709 for job_name in workflow: 

710 job = self.get_job(job_name) 

711 # Add job labels 

712 self._job_labels.add_job( 

713 job, 

714 [self.get_job(p).label for p in self.predecessors(job.name)], 

715 [self.get_job(p).label for p in self.successors(job.name)], 

716 ) 

717 # Files are stored separately so copy them. 

718 self.add_job_inputs(job_name, workflow.get_job_inputs(job_name, data=True)) 

719 self.add_job_outputs(job_name, workflow.get_job_outputs(job_name, data=True)) 

720 # Executables are stored separately so copy them. 

721 self.add_executable(workflow.get_job(job_name).executable) 

722 

723 def add_final(self, final): 

724 """Add special final job/workflow to the generic workflow. 

725 

726 Parameters 

727 ---------- 

728 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

729 `lsst.ctrl.bps.GenericWorkflow` 

730 Information needed to execute the special final job(s), the 

731 job(s) to be executed after all jobs that can be executed 

732 have been executed regardless of exit status of any of the 

733 jobs. 

734 """ 

735 if not isinstance(final, GenericWorkflowJob) and not isinstance(final, GenericWorkflow): 

736 raise TypeError("Invalid type for GenericWorkflow final ({type(final)})") 

737 

738 self._final = final 

739 if isinstance(final, GenericWorkflowJob): 

740 self.add_executable(final.executable) 

741 

742 def get_final(self): 

743 """Return job/workflow to be executed after all jobs that can be 

744 executed have been executed regardless of exit status of any of 

745 the jobs. 

746 

747 Returns 

748 ------- 

749 final : `lsst.ctrl.bps.GenericWorkflowJob` or \ 

750 `lsst.ctrl.bps.GenericWorkflow` 

751 Information needed to execute final job(s). 

752 """ 

753 return self._final 

754 

755 def add_executable(self, executable): 

756 """Add executable to workflow's list of executables. 

757 

758 Parameters 

759 ---------- 

760 executable : `lsst.ctrl.bps.GenericWorkflowExec` 

761 Executable object to be added to workflow. 

762 """ 

763 if executable is not None: 

764 self._executables[executable.name] = executable 

765 else: 

766 _LOG.warning("executable not specified (None); cannot add to the workflow's list of executables") 

767 

768 def get_executables(self, data=False, transfer_only=True): 

769 """Retrieve executables from generic workflow. 

770 

771 Parameters 

772 ---------- 

773 data : `bool`, optional 

774 Whether to return the executable data as well as the exec object 

775 name (The defaults is False). 

776 transfer_only : `bool`, optional 

777 Whether to only return executables for which transfer_executable 

778 is True. 

779 

780 Returns 

781 ------- 

782 execs : `list` [`lsst.ctrl.bps.GenericWorkflowExec`] or `list` [`str`] 

783 Filtered executable names or objects from generic workflow. 

784 """ 

785 execs = [] 

786 for name, executable in self._executables.items(): 

787 if not transfer_only or executable.transfer_executable: 

788 if not data: 

789 execs.append(name) 

790 else: 

791 execs.append(executable) 

792 return execs 

793 

794 def get_jobs_by_label(self, label: str): 

795 """Retrieve jobs by label from workflow. 

796 

797 Parameters 

798 ---------- 

799 label : `str` 

800 Label of jobs to retrieve. 

801 

802 Returns 

803 ------- 

804 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`] 

805 Jobs having given label. 

806 """ 

807 return self._job_labels.get_jobs_by_label(label) 

808 

809 

810class GenericWorkflowLabels: 

811 """Label-oriented representation of the GenericWorkflow.""" 

812 

813 def __init__(self): 

814 self._label_graph = DiGraph() # Dependency graph of job labels 

815 self._label_to_jobs = defaultdict(list) # mapping job label to list of GenericWorkflowJob 

816 

817 @property 

818 def labels(self): 

819 """List of job labels (`list` [`str`], read-only).""" 

820 return list(topological_sort(self._label_graph)) 

821 

822 @property 

823 def job_counts(self): 

824 """Count of jobs per job label (`collections.Counter`).""" 

825 return Counter({label: len(self._label_to_jobs[label]) for label in self.labels}) 

826 

827 def get_jobs_by_label(self, label: str): 

828 """Retrieve jobs by label from workflow. 

829 

830 Parameters 

831 ---------- 

832 label : `str` 

833 Label of jobs to retrieve. 

834 

835 Returns 

836 ------- 

837 jobs : list[`lsst.ctrl.bps.GenericWorkflowJob`] 

838 Jobs having given label. 

839 """ 

840 return self._label_to_jobs[label] 

841 

842 def add_job(self, job, parent_labels, child_labels): 

843 """Add job's label to labels. 

844 

845 Parameters 

846 ---------- 

847 job : `lsst.ctrl.bps.GenericWorkflowJob` 

848 The job to delete from the job labels. 

849 parent_labels : `list` [`str`] 

850 Parent job labels. 

851 child_labels : `list` [`str`] 

852 Children job labels. 

853 """ 

854 _LOG.debug("job: %s (%s)", job.name, job.label) 

855 _LOG.debug("parent_labels: %s", parent_labels) 

856 _LOG.debug("child_labels: %s", child_labels) 

857 self._label_to_jobs[job.label].append(job) 

858 self._label_graph.add_node(job.label) 

859 for parent in parent_labels: 

860 self._label_graph.add_edge(parent, job.label) 

861 for child in child_labels: 

862 self._label_graph.add_edge(job.label, child) 

863 

864 def add_job_relationships(self, parent_labels, children_labels): 

865 """Add dependencies between parent and child job labels. 

866 All parents will be connected to all children. 

867 

868 Parameters 

869 ---------- 

870 parent_labels : `list` [`str`] 

871 Parent job labels. 

872 children_labels : `list` [`str`] 

873 Children job labels. 

874 """ 

875 if parent_labels is not None and children_labels is not None: 

876 # Since labels, must ensure not adding edge from label to itself. 

877 edges = [ 

878 e 

879 for e in itertools.product(ensure_iterable(parent_labels), ensure_iterable(children_labels)) 

880 if e[0] != e[1] 

881 ] 

882 

883 self._label_graph.add_edges_from(edges) 

884 

885 def del_job(self, job): 

886 """Delete job and its label from job labels. 

887 

888 Parameters 

889 ---------- 

890 job : `lsst.ctrl.bps.GenericWorkflowJob` 

891 The job to delete from the job labels. 

892 """ 

893 self._label_to_jobs[job.label].remove(job) 

894 # Don't leave keys around if removed last job 

895 if not self._label_to_jobs[job.label]: 

896 del self._label_to_jobs[job.label] 

897 

898 parents = self._label_graph.predecessors(job.label) 

899 children = self._label_graph.successors(job.label) 

900 self._label_graph.remove_node(job.label) 

901 self._label_graph.add_edges_from( 

902 itertools.product(ensure_iterable(parents), ensure_iterable(children)) 

903 )