Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph. 

23""" 

24 

25__all__ = ["GenericWorkflow", "GenericWorkflowFile", "GenericWorkflowJob"] 

26 

27 

28import dataclasses 

29import itertools 

30from typing import Optional 

31 

32import networkx as nx 

33 

34from lsst.daf.butler.core.utils import iterable 

35from .bps_draw import draw_networkx_dot 

36 

37 

38@dataclasses.dataclass 

39class GenericWorkflowFile: 

40 """Information about a file that may be needed by various workflow 

41 management services. 

42 """ 

43 name: str 

44 """Lookup key (logical file name) of file/directory. Must be unique 

45 within run. 

46 """ 

47 

48 src_uri: str or None # don't know that need ButlerURI 

49 """Original location of file/directory. 

50 """ 

51 

52 wms_transfer: bool 

53 """Whether the WMS should ignore file or not. Default is False. 

54 """ 

55 

56 job_access_remote: bool 

57 """Whether the job can remotely access file (using separately specified 

58 file access protocols). Default is False. 

59 """ 

60 

61 job_shared: bool 

62 """Whether job requires its own copy of this file. Default is False. 

63 """ 

64 

65 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

66 # values, so writing own __init__. 

67 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False, 

68 job_access_remote: bool = False, job_shared: bool = False): 

69 self.name = name 

70 self.src_uri = src_uri 

71 self.wms_transfer = wms_transfer 

72 self.job_access_remote = job_access_remote 

73 self.job_shared = job_shared 

74 

75 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

76 

77 def __hash__(self): 

78 return hash(self.name) 

79 

80 

81@dataclasses.dataclass 

82class GenericWorkflowJob: 

83 """Information about a job that may be needed by various workflow 

84 management services. 

85 """ 

86 name: str 

87 """Name of job. Must be unique within workflow. 

88 """ 

89 

90 label: Optional[str] 

91 """Primary user-facing label for job. Does not need to be unique 

92 and may be used for summary reports. 

93 """ 

94 

95 tags: Optional[dict] 

96 """Other key/value pairs for job that user may want to use as a filter. 

97 """ 

98 

99 cmdline: Optional[str] 

100 """Command line for job. 

101 """ 

102 

103 cmdvals: Optional[dict] 

104 """Values for variables in cmdline when using lazy command line creation. 

105 """ 

106 

107 request_memory: Optional[int] # MB 

108 """Max memory (in MB) that the job is expected to need. 

109 """ 

110 

111 request_cpus: Optional[int] # cores 

112 """Max number of cpus that the job is expected to need. 

113 """ 

114 

115 request_disk: Optional[int] # MB 

116 """Max amount of job scratch disk (in MB) that the job is expected to need. 

117 """ 

118 

119 request_walltime: Optional[str] # minutes 

120 """Max amount of time (in seconds) that the job is expected to need. 

121 """ 

122 

123 compute_site: Optional[str] 

124 """Key to look up site-specific information for running the job. 

125 """ 

126 

127 mail_to: Optional[str] 

128 """Comma separated list of email addresses for emailing job status. 

129 """ 

130 

131 when_to_mail: Optional[str] 

132 """WMS-specific terminology for when to email job status. 

133 """ 

134 

135 number_of_retries: Optional[int] 

136 """Number of times to automatically retry a failed job. 

137 """ 

138 

139 retry_unless_exit: Optional[int] 

140 """Exit code for job that means to not automatically retry. 

141 """ 

142 

143 abort_on_value: Optional[int] 

144 """Job exit value for signals to abort the entire workflow. 

145 """ 

146 

147 abort_return_value: Optional[int] 

148 """Exit value to use when aborting the entire workflow. 

149 """ 

150 

151 priority: Optional[str] 

152 """Initial priority of job in WMS-format. 

153 """ 

154 

155 category: Optional[str] 

156 """WMS-facing label of job within single workflow (e.g., can be used for 

157 throttling jobs within a single workflow). 

158 """ 

159 

160 concurrency_limit: Optional[list] 

161 """Names of concurrency limits that the WMS plugin can appropriately 

162 translate to limit the number of this job across all running workflows. 

163 """ 

164 

165 pre_cmdline: Optional[str] 

166 """Command line to be executed prior to executing job. 

167 """ 

168 

169 post_cmdline: Optional[str] 

170 """Command line to be executed after job executes. 

171 

172 Should be executed regardless of exit status. 

173 """ 

174 

175 profile: Optional[dict] 

176 """Nested dictionary of WMS-specific key/value pairs with primary key being 

177 WMS key (e.g., pegasus, condor, panda). 

178 """ 

179 

180 attrs: Optional[dict] 

181 """Key/value pairs of job attributes (for WMS that have attributes in 

182 addition to commands). 

183 """ 

184 

185 environment: Optional[dict] 

186 """Environment variable names and values to be explicitly set inside job. 

187 """ 

188 

189 # As of python 3.7.8, can't use __slots__ if give default values, so 

190 # writing own __init__. 

191 def __init__(self, name: str): 

192 self.name = name 

193 self.label = None 

194 self.tags = {} 

195 self.cmdline = None 

196 self.cmdvals = {} 

197 self.request_memory = None 

198 self.request_cpus = None 

199 self.request_disk = None 

200 self.request_walltime = None 

201 self.compute_site = None 

202 self.mail_to = None 

203 self.when_to_mail = None 

204 self.number_of_retries = None 

205 self.retry_unless_exit = None 

206 self.abort_on_value = None 

207 self.abort_return_value = None 

208 self.priority = None 

209 self.category = None 

210 self.concurrency_limit = [] 

211 self.pre_cmdline = None 

212 self.post_cmdline = None 

213 self.profile = {} 

214 self.attrs = {} 

215 self.environment = {} 

216 

217 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail", 

218 "cmdline", "cmdvals", "transfer_executable", 

219 "request_memory", "request_cpus", "request_disk", "request_walltime", 

220 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value", 

221 "compute_site", "environment", "priority", "category", "concurrency_limit", 

222 "pre_cmdline", "post_cmdline", "profile", "attrs") 

223 

224 def __hash__(self): 

225 return hash(self.name) 

226 

227 

228class GenericWorkflow(nx.DiGraph): 

229 """A generic representation of a workflow used to submit to specific 

230 workflow management systems. 

231 

232 Parameters 

233 ---------- 

234 name : `str` 

235 Name of generic workflow. 

236 incoming_graph_data : `Any`, optional 

237 Data used to initialized graph that is passed through to nx.DiGraph 

238 constructor. Can be any type supported by networkx.DiGraph. 

239 attr : `dict` 

240 Keyword arguments passed through to nx.DiGraph constructor. 

241 """ 

242 def __init__(self, name, incoming_graph_data=None, **attr): 

243 super().__init__(incoming_graph_data, **attr) 

244 self._name = name 

245 self.run_attrs = {} 

246 self._files = {} 

247 self.run_id = None 

248 

249 @property 

250 def name(self): 

251 """Retrieve name of generic workflow. 

252 

253 Returns 

254 ------- 

255 name : `str` 

256 Name of generic workflow. 

257 """ 

258 return self._name 

259 

260 def get_files(self, data=False, transfer_only=True): 

261 """Retrieve files from generic workflow. 

262 

263 Need API in case change way files are stored (e.g., make 

264 workflow a bipartite graph with jobs and files nodes). 

265 

266 Parameters 

267 ---------- 

268 data : `bool`, optional 

269 Whether to return the file data as well as the file object name. 

270 (The defaults is False.) 

271 transfer_only : `bool`, optional 

272 Whether to only return files for which a workflow management system 

273 would be responsible for transferring. 

274 

275 Returns 

276 ------- 

277 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] or `list` [`str`] 

278 File names or objects from generic workflow meeting specifications. 

279 """ 

280 files = [] 

281 for filename, file in self._files.items(): 

282 if not transfer_only or file.wms_transfer: 

283 if not data: 

284 files.append(filename) 

285 else: 

286 files.append(file) 

287 return files 

288 

289 def add_job(self, job, parent_names=None, child_names=None): 

290 """Add job to generic workflow. 

291 

292 Parameters 

293 ---------- 

294 job : `lsst.ctrl.bps.GenericWorkflowJob` 

295 Job to add to the generic workflow. 

296 parent_names : `list` [`str`], optional 

297 Names of jobs that are parents of given job 

298 child_names : `list` [`str`], optional 

299 Names of jobs that are children of given job 

300 """ 

301 if not isinstance(job, GenericWorkflowJob): 

302 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

303 if self.has_node(job.name): 

304 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

305 super().add_node(job.name, job=job, inputs={}, outputs={}) 

306 self.add_job_relationships(parent_names, job.name) 

307 self.add_job_relationships(job.name, child_names) 

308 

309 def add_node(self, node_for_adding, **attr): 

310 """Override networkx function to call more specific add_job function. 

311 

312 Parameters 

313 ---------- 

314 node_for_adding : `lsst.ctrl.bps.GenericWorkflowJob` 

315 Job to be added to generic workflow. 

316 attr : 

317 Needed to match original networkx function, but not used. 

318 """ 

319 self.add_job(node_for_adding) 

320 

321 def add_job_relationships(self, parents, children): 

322 """Add dependencies between parent and child jobs. All parents will 

323 be connected to all children. 

324 

325 Parameters 

326 ---------- 

327 parents : `list` [`str`] 

328 Parent job names. 

329 children : `list` [`str`] 

330 Children job names. 

331 """ 

332 if parents is not None and children is not None: 

333 self.add_edges_from(itertools.product(iterable(parents), iterable(children))) 

334 

335 def add_edges_from(self, ebunch_to_add, **attr): 

336 """Add several edges between jobs in the generic workflow. 

337 

338 Parameters 

339 ---------- 

340 ebunch_to_add : Iterable [`tuple`] 

341 Iterable of job name pairs between which a dependency should be 

342 saved. 

343 attr : keyword arguments, optional 

344 Data can be assigned using keyword arguments (not currently used). 

345 """ 

346 for edge_to_add in ebunch_to_add: 

347 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

348 

349 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

350 """Add edge connecting jobs in workflow. 

351 

352 Parameters 

353 ---------- 

354 u_of_edge : `str` 

355 Name of parent job. 

356 v_of_edge : `str` 

357 Name of child job. 

358 attr : keyword arguments, optional 

359 Attributes to save with edge. 

360 """ 

361 if u_of_edge not in self: 

362 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

363 if v_of_edge not in self: 

364 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

365 super().add_edge(u_of_edge, v_of_edge, **attr) 

366 

367 def get_job(self, job_name: str): 

368 """Retrieve job by name from workflow. 

369 

370 Parameters 

371 ---------- 

372 job_name : `str` 

373 Name of job to retrieve. 

374 

375 Returns 

376 ------- 

377 job : `lsst.ctrl.bps.GenericWorkflowJob` 

378 Job matching given job_name. 

379 """ 

380 return self.nodes[job_name]["job"] 

381 

382 def del_job(self, job_name: str): 

383 """Delete job from generic workflow leaving connected graph. 

384 

385 Parameters 

386 ---------- 

387 job_name : `str` 

388 Name of job to delete from workflow. 

389 """ 

390 # Connect all parent jobs to all children jobs. 

391 parents = self.predecessors(job_name) 

392 children = self.successors(job_name) 

393 self.add_job_relationships(parents, children) 

394 

395 # Delete job node (which deleted edges). 

396 self.remove_node(job_name) 

397 

398 def add_job_inputs(self, job_name: str, files): 

399 """Add files as inputs to specified job. 

400 

401 Parameters 

402 ---------- 

403 job_name : `str` 

404 Name of job to which inputs should be added 

405 files : `lsst.ctrl.bps.GenericWorkflowFile` or \ 

406 `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

407 File object(s) to be added as inputs to the specified job. 

408 """ 

409 job_inputs = self.nodes[job_name]["inputs"] 

410 for file in iterable(files): 

411 # Save the central copy 

412 if file.name not in self._files: 

413 self._files[file.name] = file 

414 

415 # Save the job reference to the file 

416 job_inputs[file.name] = file 

417 

418 def get_file(self, name): 

419 """Retrieve a file object by name. 

420 

421 Parameters 

422 ---------- 

423 name : `str` 

424 Name of file object 

425 

426 Returns 

427 ------- 

428 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

429 File matching given name. 

430 """ 

431 return self._files[name] 

432 

433 def add_file(self, gwfile): 

434 """Add file object. 

435 

436 Parameters 

437 ---------- 

438 gwfile : `lsst.ctrl.bps.GenericWorkflowFile` 

439 File object to add to workflow 

440 """ 

441 if gwfile.name not in self._files: 

442 self._files[gwfile.name] = gwfile 

443 

444 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

445 """Return the input files for the given job. 

446 

447 Parameters 

448 ---------- 

449 job_name : `str` 

450 Name of the job. 

451 data : `bool`, optional 

452 Whether to return the file data as well as the file object name. 

453 transfer_only : `bool`, optional 

454 Whether to only return files for which a workflow management system 

455 would be responsible for transferring. 

456 

457 Returns 

458 ------- 

459 inputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

460 Input files for the given job. 

461 """ 

462 job_inputs = self.nodes[job_name]["inputs"] 

463 inputs = [] 

464 for file_name in job_inputs: 

465 file = self._files[file_name] 

466 if not transfer_only or file.wms_transfer: 

467 if not data: 

468 inputs.append(file_name) 

469 else: 

470 inputs.append(self._files[file_name]) 

471 return inputs 

472 

473 def add_job_outputs(self, job_name, files): 

474 """Add output files to a job. 

475 

476 Parameters 

477 ---------- 

478 job_name : `str` 

479 Name of job to which the files should be added as outputs. 

480 files : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

481 File objects to be added as outputs for specified job. 

482 """ 

483 job_outputs = self.nodes[job_name]["outputs"] 

484 for file in files: 

485 # Save the central copy 

486 self.add_file(file.name) 

487 

488 # Save the job reference to the file 

489 job_outputs[file.name] = file 

490 

491 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

492 """Return the output files for the given job. 

493 

494 Parameters 

495 ---------- 

496 job_name : `str` 

497 Name of the job. 

498 data : `bool` 

499 Whether to return the file data as well as the file object name. 

500 It defaults to `True` thus returning file data as well. 

501 transfer_only : `bool` 

502 Whether to only return files for which a workflow management system 

503 would be responsible for transferring. It defaults to `False` thus 

504 returning all output files. 

505 

506 Returns 

507 ------- 

508 outputs : `list` [`lsst.ctrl.bps.GenericWorkflowFile`] 

509 Output files for the given job. 

510 """ 

511 job_outputs = self.nodes[job_name]["outputs"] 

512 outputs = [] 

513 for file_name in job_outputs: 

514 file = self._files[file_name] 

515 if not transfer_only or file.wms_transfer: 

516 if not data: 

517 outputs.append(file_name) 

518 else: 

519 outputs.append(self._files[file_name]) 

520 return outputs 

521 

522 def draw(self, stream, format_="dot"): 

523 """Output generic workflow in a visualization format. 

524 

525 Parameters 

526 ---------- 

527 stream : `str` or `io.BufferedIOBase` 

528 Stream to which the visualization should be written. 

529 format_ : `str`, optional 

530 Which visualization format to use. It defaults to the format for 

531 the dot program. 

532 """ 

533 draw_funcs = {"dot": draw_networkx_dot} 

534 if format_ in draw_funcs: 

535 draw_funcs[format_](self, stream) 

536 else: 

537 raise RuntimeError(f"Unknown draw format ({format_}") 

538 

539 def save(self, stream, format_="pickle"): 

540 """Save the generic workflow in a format that is loadable. 

541 

542 Parameters 

543 ---------- 

544 stream : `str` or `io.BufferedIOBase` 

545 Stream to pass to the format-specific writer. Accepts anything 

546 that the writer accepts. 

547 

548 format_ : `str`, optional 

549 Format in which to write the data. It defaults to pickle format. 

550 """ 

551 if format_ == "pickle": 

552 nx.write_gpickle(self, stream) 

553 else: 

554 raise RuntimeError(f"Unknown format ({format_})") 

555 

556 @classmethod 

557 def load(cls, stream, format_="pickle"): 

558 """Load a GenericWorkflow from the given stream 

559 

560 Parameters 

561 ---------- 

562 stream : `str` or `io.BufferedIOBase` 

563 Stream to pass to the format-specific loader. Accepts anything that 

564 the loader accepts. 

565 format_ : `str`, optional 

566 Format of data to expect when loading from stream. It defaults 

567 to pickle format. 

568 

569 Returns 

570 ------- 

571 generic_workflow : `lsst.ctrl.bps.GenericWorkflow` 

572 Generic workflow loaded from the given stream 

573 """ 

574 if format_ == "pickle": 

575 return nx.read_gpickle(stream) 

576 

577 raise RuntimeError(f"Unknown format ({format_})") 

578 

579 def validate(self): 

580 """Run checks to ensure this is still a valid generic workflow graph. 

581 """ 

582 # Make sure a directed acyclic graph 

583 assert nx.algorithms.dag.is_directed_acyclic_graph(self)