Coverage for python/lsst/ctrl/bps/generic_workflow.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Class definitions for a Generic Workflow Graph
23"""
25import dataclasses
26import itertools
27from typing import Optional
29import networkx as nx
31from lsst.daf.butler.core.utils import iterable
32from .bps_draw import draw_networkx_dot
35@dataclasses.dataclass
36class GenericWorkflowFile:
37 """Information about a file that may be needed by various workflow
38 management services.
39 """
40 name: str
41 """Lookup key (logical file name) of file/directory. Must be unique
42 within run.
43 """
45 src_uri: str or None # don't know that need ButlerURI
46 """Original location of file/directory.
47 """
49 wms_transfer: bool
50 """Whether the WMS should ignore file or not. Default is False.
51 """
53 job_access_remote: bool
54 """Whether the job can remotely access file (using separately specified
55 file access protocols). Default is False.
56 """
58 job_shared: bool
59 """Whether job requires its own copy of this file. Default is False.
60 """
62 # As of python 3.7.8, can't use __slots__ + dataclass if give default
63 # values, so writing own __init__
64 def __init__(self, name: str, src_uri: str = None, wms_transfer: bool = False,
65 job_access_remote: bool = False, job_shared: bool = False):
66 self.name = name
67 self.src_uri = src_uri
68 self.wms_transfer = wms_transfer
69 self.job_access_remote = job_access_remote
70 self.job_shared = job_shared
72 __slots__ = ("name", "src_uri", "wms_transfer", "job_access_remote", "job_shared")
74 def __hash__(self):
75 return hash(self.name)
78@dataclasses.dataclass
79class GenericWorkflowJob:
80 """Information about a job that may be needed by various workflow
81 management services.
82 """
83 name: str
84 """Name of job. Must be unique within workflow.
85 """
87 label: Optional[str]
88 """Primary user-facing label for job. Does not need to be unique
89 and may be used for summary reports.
90 """
92 tags: Optional[dict]
93 """Other key/value pairs for job that user may want to use to filter reports.
94 """
96 cmdline: Optional[str]
97 """Command line for job.
98 """
100 cmdvals: Optional[dict]
101 """Values for variables in cmdline when using lazy command line creation.
102 """
104 request_memory: Optional[int] # MB
105 """Max memory (in MB) that the job is expected to need.
106 """
108 request_cpus: Optional[int] # cores
109 """Max number of cpus that the job is expected to need.
110 """
112 request_disk: Optional[int] # MB
113 """Max amount of job scratch disk (in MB) that the job is expected to need.
114 """
116 request_walltime: Optional[str] # minutes
117 """Max amount of time (in seconds) that the job is expected to need.
118 """
120 compute_site: Optional[str]
121 """Key to look up site-specific information for running the job.
122 """
124 mail_to: Optional[str]
125 """Comma separated list of email addresses for
126 emailing job status.
127 """
129 when_to_mail: Optional[str]
130 """WMS-specific terminology for when to email job status.
131 """
133 number_of_retries: Optional[int]
134 """Number of times to automatically retry a failed job.
135 """
137 retry_unless_exit: Optional[int]
138 """Exit code for job that means to not automatically retry.
139 """
141 abort_on_value: Optional[int]
142 """Job exit value for signals to abort the entire workflow.
143 """
145 abort_return_value: Optional[int]
146 """Exit value to use when aborting the entire workflow.
147 """
149 priority: Optional[str]
150 """Initial priority of job in WMS-format.
151 """
153 category: Optional[str]
154 """WMS-facing label of job within single workflow (e.g., can be
155 used for throttling jobs within a single workflow).
156 """
158 concurrency_limit: Optional[list]
159 """Names of concurrency limits that the WMS plugin
160 can appropriately translate to limit the number of this
161 job across all running workflows.
162 """
164 pre_cmdline: Optional[str]
165 """Command line to be executed prior to executing job.
166 """
168 post_cmdline: Optional[str]
169 """Command line to be executed after job executes.
170 Should be executed regardless of exit status.
171 """
173 profile: Optional[dict]
174 """Nested dictionary of WMS-specific key/value pairs with
175 primary key being WMS key (e.g., pegasus, condor, panda).
176 """
178 attrs: Optional[dict]
179 """Key/value pairs of job attributes (for WMS that have
180 attributes in addition to commands).
181 """
183 environment: Optional[dict]
184 """Environment variable names and values to be explicitly set
185 inside job.
186 """
188 # As of python 3.7.8, can't use __slots__ if give default values, so writing own __init__
189 def __init__(self, name: str):
190 self.name = name
191 self.label = None
192 self.tags = {}
193 self.cmdline = None
194 self.cmdvals = {}
195 self.request_memory = None
196 self.request_cpus = None
197 self.request_disk = None
198 self.request_walltime = None
199 self.compute_site = None
200 self.mail_to = None
201 self.when_to_mail = None
202 self.number_of_retries = None
203 self.retry_unless_exit = None
204 self.abort_on_value = None
205 self.abort_return_value = None
206 self.priority = None
207 self.category = None
208 self.concurrency_limit = []
209 self.pre_cmdline = None
210 self.post_cmdline = None
211 self.profile = {}
212 self.attrs = {}
213 self.environment = {}
215 __slots__ = ("name", "label", "tags", "mail_to", "when_to_mail",
216 "cmdline", "cmdvals", "transfer_executable",
217 "request_memory", "request_cpus", "request_disk", "request_walltime",
218 "number_of_retries", "retry_unless_exit", "abort_on_value", "abort_return_value",
219 "compute_site", "environment", "priority", "category", "concurrency_limit",
220 "pre_cmdline", "post_cmdline", "profile", "attrs")
222 def __hash__(self):
223 return hash(self.name)
226class GenericWorkflow(nx.DiGraph):
227 """A generic representation of a workflow used to submit to specific
228 workflow management systems.
230 Parameters
231 ----------
232 name : `str`
233 Name of generic workflow.
234 incoming_graph_data : `Any`, optional
235 Data used to initialized graph that is passed through to nx.DiGraph
236 constructor. Can be any type supported by networkx.DiGraph.
237 attr : `dict`
238 Keyword arguments passed through to nx.DiGraph constructor.
239 """
240 def __init__(self, name, incoming_graph_data=None, **attr):
241 super().__init__(incoming_graph_data, **attr)
242 self._name = name
243 self.run_attrs = {}
244 self._files = {}
245 self.run_id = None
247 @property
248 def name(self):
249 """Retrieve name of generic workflow.
251 Returns
252 -------
253 name : `str`
254 Name of generic workflow.
255 """
256 return self._name
258 def get_files(self, data=False, transfer_only=True):
259 """Retrieve files from generic workflow.
260 Need API in case change way files are stored (e.g., make
261 workflow a bipartite graph with jobs and files nodes).
263 Parameters
264 ----------
265 data : `bool`, optional
266 Whether to return the file data as well as the file object name.
267 (The defaults is False.)
268 transfer_only : `bool`, optional
269 Whether to only return files for which a workflow management system
270 would be responsible for transferring.
272 Returns
273 -------
274 files : `list` [`~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`]
275 or `list` [`str`]
276 File names or objects from generic workflow meeting specifications.
277 """
278 files = []
279 for filename, file in self._files.items():
280 if not transfer_only or file.wms_transfer:
281 if not data:
282 files.append(filename)
283 else:
284 files.append(file)
285 return files
287 def add_job(self, job, parent_names=None, child_names=None):
288 """Add job to generic workflow.
290 Parameters
291 ----------
292 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob`
293 Job to add to the generic workflow.
294 parent_names : `list` [`str`], optional
295 Names of jobs that are parents of given job
296 child_names : `list` [`str`], optional
297 Names of jobs that are children of given job
298 """
299 if not isinstance(job, GenericWorkflowJob):
300 raise RuntimeError(f"Invalid type for job to be added to GenericWorkflowGraph ({type(job)}).")
301 if self.has_node(job.name):
302 raise RuntimeError(f"Job {job.name} already exists in GenericWorkflowGraph.")
303 super().add_node(job.name, job=job, inputs={}, outputs={})
304 self.add_job_relationships(parent_names, job.name)
305 self.add_job_relationships(job.name, child_names)
307 def add_node(self, node_for_adding, **attr):
308 """Override networkx function to call more specific add_job function.
310 Parameters
311 ----------
312 node_for_adding : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob`
313 Job to be added to generic workflow.
314 attr :
315 Needed to match original networkx function, but not used.
316 """
317 self.add_job(node_for_adding)
319 def add_job_relationships(self, parents, children):
320 """Add dependencies between parent and child jobs. All parents will
321 be connected to all children.
323 Parameters
324 ----------
325 parents : `list` [`str`]
326 Parent job names.
327 children : `list` [`str`]
328 Children job names.
329 """
330 if parents is not None and children is not None:
331 self.add_edges_from(itertools.product(iterable(parents), iterable(children)))
333 def add_edges_from(self, ebunch_to_add, **attr):
334 """Add several edges between jobs in the generic workflow.
336 Parameters
337 ----------
338 ebunch_to_add : Iterable [`tuple`]
339 Iterable of job name pairs between which a dependency should be saved.
340 attr : keyword arguments, optional
341 Data can be assigned using keyword arguments (not currently used)
342 """
343 for edge_to_add in ebunch_to_add:
344 self.add_edge(edge_to_add[0], edge_to_add[1], **attr)
346 def add_edge(self, u_of_edge: str, v_of_edge: str, **attr):
347 """Add edge connecting jobs in workflow.
349 Parameters
350 ----------
351 u_of_edge : `str`
352 Name of parent job.
353 v_of_edge : `str`
354 Name of child job.
355 attr : keyword arguments, optional
356 Attributes to save with edge.
357 """
358 if u_of_edge not in self:
359 raise RuntimeError(f"{u_of_edge} not in GenericWorkflow")
360 if v_of_edge not in self:
361 raise RuntimeError(f"{v_of_edge} not in GenericWorkflow")
362 super().add_edge(u_of_edge, v_of_edge, **attr)
364 def get_job(self, job_name: str):
365 """Retrieve job by name from workflow.
367 Parameters
368 ----------
369 job_name : `str`
370 Name of job to retrieve.
372 Returns
373 -------
374 job : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowJob`
375 Job matching given job_name.
376 """
377 return self.nodes[job_name]["job"]
379 def del_job(self, job_name: str):
380 """Delete job from generic workflow leaving connected graph.
382 Parameters
383 ----------
384 job_name : `str`
385 Name of job to delete from workflow.
386 """
387 # Connect all parent jobs to all children jobs.
388 parents = self.predecessors(job_name)
389 children = self.successors(job_name)
390 self.add_job_relationships(parents, children)
392 # Delete job node (which deleted edges).
393 self.remove_node(job_name)
395 def add_job_inputs(self, job_name: str, files):
396 """Add files as inputs to specified job.
398 Parameters
399 ----------
400 job_name : `str`
401 Name of job to which inputs should be added
402 files : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile` or `list`
403 File object(s) to be added as inputs to the specified job.
404 """
405 job_inputs = self.nodes[job_name]["inputs"]
406 for file in iterable(files):
407 # Save the central copy
408 if file.name not in self._files:
409 self._files[file.name] = file
411 # Save the job reference to the file
412 job_inputs[file.name] = file
414 def get_file(self, name):
415 """Retrieve a file object by name.
417 Parameters
418 ----------
419 name : `str`
420 Name of file object
422 Returns
423 -------
424 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
425 File matching given name.
426 """
427 return self._files[name]
429 def add_file(self, gwfile):
430 """Add file object.
432 Parameters
433 ----------
434 gwfile : `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
435 File object to add to workflow
436 """
437 if gwfile.name not in self._files:
438 self._files[gwfile.name] = gwfile
440 def get_job_inputs(self, job_name, data=True, transfer_only=False):
441 """Return the input files for the given job.
443 Parameters
444 ----------
445 job_name : `str`
446 Name of the job.
447 data : `bool`, optional
448 Whether to return the file data as well as the file object name.
449 transfer_only : `bool`, optional
450 Whether to only return files for which a workflow management system
451 would be responsible for transferring.
453 Returns
454 -------
455 inputs : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
456 Input files for the given job.
457 """
458 job_inputs = self.nodes[job_name]["inputs"]
459 inputs = []
460 for file_name in job_inputs:
461 file = self._files[file_name]
462 if not transfer_only or file.wms_transfer:
463 if not data:
464 inputs.append(file_name)
465 else:
466 inputs.append(self._files[file_name])
467 return inputs
469 def add_job_outputs(self, job_name, files):
470 """Add output files to a job.
472 Parameters
473 ----------
474 job_name : `str`
475 Name of job to which the files should be added as outputs.
476 files : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
477 File objects to be added as outputs for specified job.
478 """
479 job_outputs = self.nodes[job_name]["outputs"]
480 for file in files:
481 # Save the central copy
482 self.add_file(file.name)
484 # Save the job reference to the file
485 job_outputs[file.name] = file
487 def get_job_outputs(self, job_name, data=True, transfer_only=False):
488 """Return the output files for the given job.
490 Parameters
491 ----------
492 job_name : `str`
493 Name of the job.
494 data : `bool`
495 Whether to return the file data as well as the file object name.
496 It defaults to `True` thus returning file data as well.
497 transfer_only : `bool`
498 Whether to only return files for which a workflow management system
499 would be responsible for transferring. It defaults to `False` thus
500 returning all output files.
502 Returns
503 -------
504 outputs : `list` of `~lsst.ctrl.bps.generic_workflow.GenericWorkflowFile`
505 Output files for the given job.
506 """
507 job_outputs = self.nodes[job_name]["outputs"]
508 outputs = []
509 for file_name in job_outputs:
510 file = self._files[file_name]
511 if not transfer_only or file.wms_transfer:
512 if not data:
513 outputs.append(file_name)
514 else:
515 outputs.append(self._files[file_name])
516 return outputs
518 def draw(self, stream, format_="dot"):
519 """Output generic workflow in a visualization format.
521 Parameters
522 ----------
523 stream : `str` or `io.BufferedIOBase`
524 Stream to which the visualization should be written.
525 format_ : `str`, optional
526 Which visualization format to use. It defaults to the format for
527 the dot program.
528 """
529 draw_funcs = {"dot": draw_networkx_dot}
530 if format_ in draw_funcs:
531 draw_funcs[format_](self, stream)
532 else:
533 raise RuntimeError(f"Unknown draw format ({format_}")
535 def save(self, stream, format_="pickle"):
536 """Save the generic workflow in a format that is loadable.
538 Parameters
539 ----------
540 stream : `str` or `io.BufferedIOBase`
541 Stream to pass to the format-specific writer. Accepts anything
542 that the writer accepts.
544 format_ : `str`, optional
545 Format in which to write the data. It defaults to pickle format.
546 """
547 if format_ == "pickle":
548 nx.write_gpickle(self, stream)
549 else:
550 raise RuntimeError(f"Unknown format ({format_})")
552 @classmethod
553 def load(cls, stream, format_="pickle"):
554 """Load a GenericWorkflow from the given stream
556 Parameters
557 ----------
558 stream : `str` or `io.BufferedIOBase`
559 Stream to pass to the format-specific loader. Accepts anything that
560 the loader accepts.
561 format_ : `str`, optional
562 Format of data to expect when loading from stream. It defaults
563 to pickle format.
565 Returns
566 -------
567 generic_workflow : `~lsst.ctrl.bps.generic_workflow.GenericWorkflow`
568 Generic workflow loaded from the given stream
569 """
570 if format_ == "pickle":
571 return nx.read_gpickle(stream)
573 raise RuntimeError(f"Unknown format ({format_})")
575 def validate(self):
576 """Run checks to ensure this is still a valid generic workflow graph.
577 """
578 # Make sure a directed acyclic graph
579 assert nx.algorithms.dag.is_directed_acyclic_graph(self)