Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Class definitions for a Generic Workflow Graph 

23""" 

24 

25import dataclasses 

26import itertools 

27from typing import Optional 

28 

29import networkx as nx 

30 

31from lsst.daf.butler.core.utils import iterable 

32from .bps_draw import draw_networkx_dot 

33 

34 

35@dataclasses.dataclass 

36class GenericWorkflowFile: 

37 """Information about a file that may be needed by various workflow 

38 management services. 

39 """ 

40 name: str 

41 """Lookup key (logical file name) of file/directory. Must be unique 

42 within run. 

43 """ 

44 

45 src_uri: str or None # don't know that need ButlerURI 

46 """Original location of file/directory. 

47 """ 

48 

49 wms_transfer: bool 

50 """Whether the WMS should ignore file or not. Default is False. 

51 """ 

52 

53 job_access_remote: bool 

54 """Whether the job can remotely access file (using separately specified 

55 file access protocols). Default is False. 

56 """ 

57 

58 job_shared: bool 

59 """Whether job requires its own copy of this file. Default is False. 

60 """ 

61 

62 # As of python 3.7.8, can't use __slots__ + dataclass if give default 

63 # values, so writing own __init__ 

64 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False, 

65 job_access_remote: bool = False, job_shared: bool = False): 

66 self.name = name 

67 self.src_uri = src_uri 

68 self.wms_transfer = wms_transfer 

69 self.job_access_remote = job_access_remote 

70 self.job_shared = job_shared 

71 

72 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared") 

73 

74 def __hash__(self): 

75 return hash(self.name) 

76 

77 

78@dataclasses.dataclass 

79class GenericWorkflowJob: 

80 """Information about a job that may be needed by various workflow 

81 management services. 

82 """ 

83 name: str 

84 """Name of job. Must be unique within workflow. 

85 """ 

86 

87 label: Optional[str] 

88 """Primary user-facing label for job. Does not need to be unique 

89 and may be used for summary reports. 

90 """ 

91 

92 tags: Optional[dict] 

93 """Other key/value pairs for job that user may want to use to filter reports. 

94 """ 

95 

96 cmdline: Optional[str] 

97 """Command line for job. 

98 """ 

99 

100 cmdvals: Optional[dict] 

101 """Values for variables in cmdline when using lazy command line creation. 

102 """ 

103 

104 request_memory: Optional[int] # MB 

105 """Max memory (in MB) that the job is expected to need. 

106 """ 

107 

108 request_cpus: Optional[int] # cores 

109 """Max number of cpus that the job is expected to need. 

110 """ 

111 

112 request_disk: Optional[int] # MB 

113 """Max amount of job scratch disk (in MB) that the job is expected to need. 

114 """ 

115 

116 request_walltime: Optional[str] # minutes 

117 """Max amount of time (in seconds) that the job is expected to need. 

118 """ 

119 

120 compute_site: Optional[str] 

121 """Key to look up site-specific information for running the job. 

122 """ 

123 

124 mail_to: Optional[str] 

125 """Comma separated list of email addresses for 

126 emailing job status. 

127 """ 

128 

129 when_to_mail: Optional[str] 

130 """WMS-specific terminology for when to email job status. 

131 """ 

132 

133 number_of_retries: Optional[int] 

134 """Number of times to automatically retry a failed job. 

135 """ 

136 

137 retry_unless_exit: Optional[int] 

138 """Exit code for job that means to not automatically retry. 

139 """ 

140 

141 abort_on_value: Optional[int] 

142 """Job exit value for signals to abort the entire workflow. 

143 """ 

144 

145 abort_return_value: Optional[int] 

146 """Exit value to use when aborting the entire workflow. 

147 """ 

148 

149 priority: Optional[str] 

150 """Initial priority of job in WMS-format. 

151 """ 

152 

153 category: Optional[str] 

154 """WMS-facing label of job within single workflow (e.g., can be 

155 used for throttling jobs within a single workflow). 

156 """ 

157 

158 concurrency_limit: Optional[list] 

159 """Names of concurrency limits that the WMS plugin 

160 can appropriately translate to limit the number of this 

161 job across all running workflows. 

162 """ 

163 

164 pre_cmdline: Optional[str] 

165 """Command line to be executed prior to executing job. 

166 """ 

167 

168 post_cmdline: Optional[str] 

169 """Command line to be executed after job executes. 

170 Should be executed regardless of exit status. 

171 """ 

172 

173 profile: Optional[dict] 

174 """Nested dictionary of WMS-specific key/value pairs with 

175 primary key being WMS key (e.g., pegasus, condor, panda). 

176 """ 

177 

178 attrs: Optional[dict] 

179 """Key/value pairs of job attributes (for WMS that have 

180 attributes in addition to commands). 

181 """ 

182 

183 environment: Optional[dict] 

184 """Environment variable names and values to be explicitly set 

185 inside job. 

186 """ 

187 

188 # As of python 3.7.8, can't use __slots__ if give default values, so writing own __init__ 

189 def __init__(self, name: str): 

190 self.name = name 

191 self.label = None 

192 self.tags = {} 

193 self.cmdline = None 

194 self.cmdvals = {} 

195 self.request_memory = None 

196 self.request_cpus = None 

197 self.request_disk = None 

198 self.request_walltime = None 

199 self.compute_site = None 

200 self.mail_to = None 

201 self.when_to_mail = None 

202 self.number_of_retries = None 

203 self.retry_unless_exit = None 

204 self.abort_on_value = None 

205 self.abort_return_value = None 

206 self.priority = None 

207 self.category = None 

208 self.concurrency_limit = [] 

209 self.pre_cmdline = None 

210 self.post_cmdline = None 

211 self.profile = {} 

212 self.attrs = {} 

213 self.environment = {} 

214 

215 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail", 

216 "cmdline", "cmdvals", "transfer_executable", 

217 "request_memory", "request_cpus", "request_disk", "request_walltime", 

218 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value", 

219 "compute_site", "environment", "priority", "category", "concurrency_limit", 

220 "pre_cmdline", "post_cmdline", "profile", "attrs") 

221 

222 def __hash__(self): 

223 return hash(self.name) 

224 

225 

226class GenericWorkflow(nx.DiGraph): 

227 """A generic representation of a workflow used to submit to specific 

228 workflow management systems. 

229 

230 Parameters 

231 ---------- 

232 name : `str` 

233 Name of generic workflow. 

234 incoming_graph_data : `Any`, optional 

235 Data used to initialized graph that is passed through to nx.DiGraph 

236 constructor. Can be any type supported by networkx.DiGraph. 

237 attr : `dict` 

238 Keyword arguments passed through to nx.DiGraph constructor. 

239 """ 

240 def __init__(self, name, incoming_graph_data=None, **attr): 

241 super().__init__(incoming_graph_data, **attr) 

242 self._name = name 

243 self.run_attrs = {} 

244 self._files = {} 

245 self.run_id = None 

246 

247 @property 

248 def name(self): 

249 """Retrieve name of generic workflow. 

250 

251 Returns 

252 ------- 

253 name : `str` 

254 Name of generic workflow. 

255 """ 

256 return self._name 

257 

258 def get_files(self, data=False, transfer_only=True): 

259 """Retrieve files from generic workflow. 

260 Need API in case change way files are stored (e.g., make 

261 workflow a bipartite graph with jobs and files nodes). 

262 

263 Parameters 

264 ---------- 

265 data : `bool`, optional 

266 Whether to return the file data as well as the file object name. 

267 (The defaults is False.) 

268 transfer_only : `bool`, optional 

269 Whether to only return files for which a workflow management system 

270 would be responsible for transferring. 

271 

272 Returns 

273 ------- 

274 files : `list` [`~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`] 

275 or `list` [`str`] 

276 File names or objects from generic workflow meeting specifications. 

277 """ 

278 files = [] 

279 for filename, file in self._files.items(): 

280 if not transfer_only or file.wms_transfer: 

281 if not data: 

282 files.append(filename) 

283 else: 

284 files.append(file) 

285 return files 

286 

287 def add_job(self, job, parent_names=None, child_names=None): 

288 """Add job to generic workflow. 

289 

290 Parameters 

291 ---------- 

292 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

293 Job to add to the generic workflow. 

294 parent_names : `list` [`str`], optional 

295 Names of jobs that are parents of given job 

296 child_names : `list` [`str`], optional 

297 Names of jobs that are children of given job 

298 """ 

299 if not isinstance(job, GenericWorkflowJob): 

300 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).") 

301 if self.has_node(job.name): 

302 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.") 

303 super().add_node(job.name, job=job, inputs={}, outputs={}) 

304 self.add_job_relationships(parent_names, job.name) 

305 self.add_job_relationships(job.name, child_names) 

306 

307 def add_node(self, node_for_adding, **attr): 

308 """Override networkx function to call more specific add_job function. 

309 

310 Parameters 

311 ---------- 

312 node_for_adding : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

313 Job to be added to generic workflow. 

314 attr : 

315 Needed to match original networkx function, but not used. 

316 """ 

317 self.add_job(node_for_adding) 

318 

319 def add_job_relationships(self, parents, children): 

320 """Add dependencies between parent and child jobs. All parents will 

321 be connected to all children. 

322 

323 Parameters 

324 ---------- 

325 parents : `list` [`str`] 

326 Parent job names. 

327 children : `list` [`str`] 

328 Children job names. 

329 """ 

330 if parents is not None and children is not None: 

331 self.add_edges_from(itertools.product(iterable(parents), iterable(children))) 

332 

333 def add_edges_from(self, ebunch_to_add, **attr): 

334 """Add several edges between jobs in the generic workflow. 

335 

336 Parameters 

337 ---------- 

338 ebunch_to_add : Iterable [`tuple`] 

339 Iterable of job name pairs between which a dependency should be saved. 

340 attr : keyword arguments, optional 

341 Data can be assigned using keyword arguments (not currently used) 

342 """ 

343 for edge_to_add in ebunch_to_add: 

344 self.add_edge(edge_to_add[0], edge_to_add[1], **attr) 

345 

346 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr): 

347 """Add edge connecting jobs in workflow. 

348 

349 Parameters 

350 ---------- 

351 u_of_edge : `str` 

352 Name of parent job. 

353 v_of_edge : `str` 

354 Name of child job. 

355 attr : keyword arguments, optional 

356 Attributes to save with edge. 

357 """ 

358 if u_of_edge not in self: 

359 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow") 

360 if v_of_edge not in self: 

361 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow") 

362 super().add_edge(u_of_edge, v_of_edge, **attr) 

363 

364 def get_job(self, job_name: str): 

365 """Retrieve job by name from workflow. 

366 

367 Parameters 

368 ---------- 

369 job_name : `str` 

370 Name of job to retrieve. 

371 

372 Returns 

373 ------- 

374 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob` 

375 Job matching given job_name. 

376 """ 

377 return self.nodes[job_name]["job"] 

378 

379 def del_job(self, job_name: str): 

380 """Delete job from generic workflow leaving connected graph. 

381 

382 Parameters 

383 ---------- 

384 job_name : `str` 

385 Name of job to delete from workflow. 

386 """ 

387 # Connect all parent jobs to all children jobs. 

388 parents = self.predecessors(job_name) 

389 children = self.successors(job_name) 

390 self.add_job_relationships(parents, children) 

391 

392 # Delete job node (which deleted edges). 

393 self.remove_node(job_name) 

394 

395 def add_job_inputs(self, job_name: str, files): 

396 """Add files as inputs to specified job. 

397 

398 Parameters 

399 ---------- 

400 job_name : `str` 

401 Name of job to which inputs should be added 

402 files : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` or `list` 

403 File object(s) to be added as inputs to the specified job. 

404 """ 

405 job_inputs = self.nodes[job_name]["inputs"] 

406 for file in iterable(files): 

407 # Save the central copy 

408 if file.name not in self._files: 

409 self._files[file.name] = file 

410 

411 # Save the job reference to the file 

412 job_inputs[file.name] = file 

413 

414 def get_file(self, name): 

415 """Retrieve a file object by name. 

416 

417 Parameters 

418 ---------- 

419 name : `str` 

420 Name of file object 

421 

422 Returns 

423 ------- 

424 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

425 File matching given name. 

426 """ 

427 return self._files[name] 

428 

429 def add_file(self, gwfile): 

430 """Add file object. 

431 

432 Parameters 

433 ---------- 

434 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

435 File object to add to workflow 

436 """ 

437 if gwfile.name not in self._files: 

438 self._files[gwfile.name] = gwfile 

439 

440 def get_job_inputs(self, job_name, data=True, transfer_only=False): 

441 """Return the input files for the given job. 

442 

443 Parameters 

444 ---------- 

445 job_name : `str` 

446 Name of the job. 

447 data : `bool`, optional 

448 Whether to return the file data as well as the file object name. 

449 transfer_only : `bool`, optional 

450 Whether to only return files for which a workflow management system 

451 would be responsible for transferring. 

452 

453 Returns 

454 ------- 

455 inputs : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

456 Input files for the given job. 

457 """ 

458 job_inputs = self.nodes[job_name]["inputs"] 

459 inputs = [] 

460 for file_name in job_inputs: 

461 file = self._files[file_name] 

462 if not transfer_only or file.wms_transfer: 

463 if not data: 

464 inputs.append(file_name) 

465 else: 

466 inputs.append(self._files[file_name]) 

467 return inputs 

468 

469 def add_job_outputs(self, job_name, files): 

470 """Add output files to a job. 

471 

472 Parameters 

473 ---------- 

474 job_name : `str` 

475 Name of job to which the files should be added as outputs. 

476 files : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

477 File objects to be added as outputs for specified job. 

478 """ 

479 job_outputs = self.nodes[job_name]["outputs"] 

480 for file in files: 

481 # Save the central copy 

482 self.add_file(file.name) 

483 

484 # Save the job reference to the file 

485 job_outputs[file.name] = file 

486 

487 def get_job_outputs(self, job_name, data=True, transfer_only=False): 

488 """Return the output files for the given job. 

489 

490 Parameters 

491 ---------- 

492 job_name : `str` 

493 Name of the job. 

494 data : `bool` 

495 Whether to return the file data as well as the file object name. 

496 It defaults to `True` thus returning file data as well. 

497 transfer_only : `bool` 

498 Whether to only return files for which a workflow management system 

499 would be responsible for transferring. It defaults to `False` thus 

500 returning all output files. 

501 

502 Returns 

503 ------- 

504 outputs : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` 

505 Output files for the given job. 

506 """ 

507 job_outputs = self.nodes[job_name]["outputs"] 

508 outputs = [] 

509 for file_name in job_outputs: 

510 file = self._files[file_name] 

511 if not transfer_only or file.wms_transfer: 

512 if not data: 

513 outputs.append(file_name) 

514 else: 

515 outputs.append(self._files[file_name]) 

516 return outputs 

517 

518 def draw(self, stream, format_="dot"): 

519 """Output generic workflow in a visualization format. 

520 

521 Parameters 

522 ---------- 

523 stream : `str` or `io.BufferedIOBase` 

524 Stream to which the visualization should be written. 

525 format_ : `str`, optional 

526 Which visualization format to use. It defaults to the format for 

527 the dot program. 

528 """ 

529 draw_funcs = {"dot": draw_networkx_dot} 

530 if format_ in draw_funcs: 

531 draw_funcs[format_](self, stream) 

532 else: 

533 raise RuntimeError(f"Unknown draw format ({format_}") 

534 

535 def save(self, stream, format_="pickle"): 

536 """Save the generic workflow in a format that is loadable. 

537 

538 Parameters 

539 ---------- 

540 stream : `str` or `io.BufferedIOBase` 

541 Stream to pass to the format-specific writer. Accepts anything 

542 that the writer accepts. 

543 

544 format_ : `str`, optional 

545 Format in which to write the data. It defaults to pickle format. 

546 """ 

547 if format_ == "pickle": 

548 nx.write_gpickle(self, stream) 

549 else: 

550 raise RuntimeError(f"Unknown format ({format_})") 

551 

552 @classmethod 

553 def load(cls, stream, format_="pickle"): 

554 """Load a GenericWorkflow from the given stream 

555 

556 Parameters 

557 ---------- 

558 stream : `str` or `io.BufferedIOBase` 

559 Stream to pass to the format-specific loader. Accepts anything that 

560 the loader accepts. 

561 format_ : `str`, optional 

562 Format of data to expect when loading from stream. It defaults 

563 to pickle format. 

564 

565 Returns 

566 ------- 

567 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow` 

568 Generic workflow loaded from the given stream 

569 """ 

570 if format_ == "pickle": 

571 return nx.read_gpickle(stream) 

572 

573 raise RuntimeError(f"Unknown format ({format_})") 

574 

575 def validate(self): 

576 """Run checks to ensure this is still a valid generic workflow graph. 

577 """ 

578 # Make sure a directed acyclic graph 

579 assert nx.algorithms.dag.is_directed_acyclic_graph(self)