Coverage for python/lsst/ctrl/bps/bps_core.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_bps.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Core functionality of BPS
23"""
25__all__ = ("BpsCore",)
27import logging
28import subprocess
29import itertools
30import os
31import datetime
32from os.path import expandvars, basename
33import re
34import pickle
35import shlex
36import shutil
37import time
38import networkx
39from networkx import bipartite
40import yaml
42try:
43 from StringIO import StringIO
44except ImportError:
45 from io import StringIO
47import lsst.log
48from lsst.daf.butler import DimensionUniverse
49from lsst.pipe.base.graph import QuantumGraph
50from lsst.ctrl.bps.bps_config import BpsConfig
51from lsst.daf.butler.core.config import Loader
52from lsst.ctrl.bps.bps_draw import draw_networkx_dot
54# Config section search order
55BPS_SEARCH_ORDER = ["payload", "pipetask", "site", "global"]
57# Graph property
58FILENODE = 0
59TASKNODE = 1
61# logging properties
62_LOG_PROP = """\
63log4j.rootLogger=INFO, A1
64log4j.appender.A1=ConsoleAppender
65log4j.appender.A1.Target=System.err
66log4j.appender.A1.layout=PatternLayout
67log4j.appender.A1.layout.ConversionPattern={}
68"""
70_LOG = logging.getLogger()
73def execute(command, filename):
74 """Execute a command.
76 Parameters
77 ----------
78 command : `str`
79 String representing the command to execute.
80 filename : `str`
81 A file to which both stderr and stdout will be written to.
83 Returns
84 -------
85 exit_code : `int`
86 The exit code the command being executed finished with.
87 """
88 buffer_size = 5000
89 with open(filename, "w") as f:
90 f.write(command)
91 f.write("\n")
92 process = subprocess.Popen(
93 shlex.split(command), shell=False, stdout=subprocess.PIPE,
94 stderr=subprocess.STDOUT
95 )
96 buffer = os.read(process.stdout.fileno(), buffer_size).decode()
97 while process.poll is None or len(buffer) != 0:
98 f.write(buffer)
99 buffer = os.read(process.stdout.fileno(), buffer_size).decode()
100 process.stdout.close()
101 process.wait()
102 return process.returncode
105def pretty_dataset_label(orig_name):
106 """Tweak dataset for a label
108 Parameters
109 ----------
110 orig_name : `str`
111 dataset as str
113 Returns
114 -------
115 new_name : `str`
116 reformatted dataset for label
117 """
118 new_name = re.sub(r": ", "=", orig_name)
119 new_name = re.sub(r"\+", "\n", new_name)
120 new_name = re.sub(r",", "\n", new_name)
121 new_name = re.sub(r"[\{\}]", "", new_name)
122 return new_name
125def save_qg_subgraph(qnodes, qgraph, out_filename):
126 """Save subgraph to file
128 Parameters
129 ----------
130 qnodes : `lsst.pipe.base.graph.quantumNode.QuantumNode` or
131 iterable of `lsst.pipe.base.graph.quantumNode.QuantumNode`
132 QuantumNodes for Quanta inside given qgraph to save
133 out_filename : `str`
134 Name of the output file
135 """
137 # create subgraph
138 subgraph = qgraph.subset(qnodes)
140 # output to file
141 os.makedirs(os.path.dirname(out_filename), exist_ok=True)
142 with open(out_filename, "wb") as outfh:
143 subgraph.save(outfh)
146class BpsCore():
147 """Contains information needed for submitting a run
148 """
149 @staticmethod
150 def config_log(longlog):
151 """Configure logging system.
153 Parameters
154 ----------
155 longlog : `bool`
156 If True then make log messages appear in "long format"
157 """
158 if longlog:
159 message_fmt = "%-5p %d{yyyy-MM-ddThh:mm:ss.sss} %c (%X{LABEL})(%F:%L)- %m%n"
160 else:
161 message_fmt = "%c %p: %m%n"
163 lsst.log.configure_prop(_LOG_PROP.format(message_fmt))
165 def __init__(self, configFile, **kwargs):
166 self.config_log(False)
167 self.config = BpsConfig(configFile, BPS_SEARCH_ORDER)
168 _LOG.debug("Core kwargs = '%s'", kwargs)
169 self.config[".global.timestamp"] = "{:%Y%m%dT%Hh%Mm%Ss}".format(datetime.datetime.now())
170 if "uniqProcName" not in self.config:
171 self.config[".global.uniqProcName"] = self.config["outCollection"].replace("/", "_")
173 if len(kwargs.get("overrides", {})) > 0:
174 overrides_io = StringIO(kwargs["overrides"])
175 dct = yaml.load(overrides_io, Loader)
176 self.config.update(dct)
178 self.submit_path = self.config["submitPath"]
179 _LOG.info("submit_path = '%s'", self.submit_path)
181 # make directories
182 os.makedirs(self.submit_path, exist_ok=True)
184 if self.config.get("saveDot", {"default": False}):
185 os.makedirs("%s/draw" % self.submit_path, exist_ok=True)
187 self.butler = None
188 self.pipeline_labels = []
189 self.qgraph_filename = None
190 self.qgraph = None
191 self.sci_graph = None
192 self.gen_wf_graph = None
193 self.gen_wf_config = None
194 self.workflow = None
196 def _create_cmdline_building_qgraph(self):
197 """Create the command for generating QuantumGraph from scratch.
199 Returns
200 -------
201 cmd : `str`
202 String representing the command to generate QuantumGraph.
203 """
204 cmd = ["pipetask"]
205 cmd.append("qgraph") # pipetask subcommand
207 found, data_query = self.config.search("dataQuery")
208 if found:
209 cmd.append('-d "%s"' % data_query)
210 found, butler_config = self.config.search("butlerConfig")
211 if found:
212 cmd.append("-b %s" % (expandvars(butler_config)))
214 if "packageSearch" in self.config:
215 for pkg in self.config["packageSearch"].split(","):
216 cmd.append("-p %s" % pkg.strip())
218 cmd.append("-i %s" % (self.config["inCollection"]))
219 cmd.append("-o notused")
220 # cmd.append('--output-run %s' % (self.config["outCollection"]))
221 if "pipelineYaml" in self.config:
222 cmd.append("-p %s" % (self.config["pipelineYaml"]))
223 else:
224 for task_abbrev in [x.strip() for x in self.pipeline_labels]:
225 pipetask = self.config["pipetask"][task_abbrev]
226 cmd.append("-t %s:%s" % (pipetask["module"], task_abbrev))
227 if "configFile" in pipetask:
228 cmd.append("-C %s:%s" % (task_abbrev, expandvars(pipetask["configFile"])))
229 if "configOverride" in pipetask:
230 cmd.append("-c %s:%s" % (task_abbrev, expandvars(pipetask["configOverride"])))
232 cmd.append("-q %s" % (self.qgraph_filename))
234 if self.config.get("saveDot", {"default": False}):
235 cmd.append("--pipeline-dot %s/draw/pipetask_pipeline.dot" % (self.submit_path))
236 cmd.append("--qgraph-dot %s/draw/pipetask_qgraph.dot" % (self.submit_path))
238 return " ".join(cmd)
240 def _create_quantum_graph(self):
241 """Create QuantumGraph
242 """
243 _LOG.debug("submit_path = '%s'", self.submit_path)
244 self.qgraph_filename = "%s/%s.pickle" % (self.submit_path, self.config["uniqProcName"])
246 args = {"curvals": {"qgraphfile": self.qgraph_filename}}
247 found, cmd = self.config.search("createQuantumGraph", opt=args)
248 if not found:
249 cmd = self._create_cmdline_building_qgraph()
250 _LOG.warning("command for generating Quantum Graph not found; "
251 "generated one from scratch")
252 _LOG.info(cmd)
254 out = f"{self.submit_path}/quantumGraphGeneration.out"
255 status = execute(cmd, out)
256 if status != 0:
257 raise RuntimeError(
258 "QuantumGraph generation exited with non-zero exit code (%s)" % (status)
259 )
261 def _read_quantum_graph(self):
262 """Read the QuantumGraph
263 """
265 with open(self.qgraph_filename, "rb") as infh:
266 self.qgraph = QuantumGraph.load(infh, DimensionUniverse())
267 if len(self.qgraph) == 0:
268 raise RuntimeError("QuantumGraph is empty")
270 def _create_science_graph(self):
271 """Create expanded graph from the QuantumGraph that has
272 explicit dependencies and has individual nodes for each
273 input/output dataset
275 Parameters
276 ----------
277 qgraph : `QuantumGraph`
278 QuantumGraph for the pipeline (as generated by the
279 QuantumGraph Generator)
280 """
281 _LOG.info("creating explicit science graph")
283 self.sci_graph = networkx.DiGraph()
284 tcnt = 0 # task node counter
285 dcnt = 0 # dataset ref node counter
287 dsname_to_node_id = {}
289 for node in self.qgraph:
290 _LOG.debug("type(node)=%s", type(node))
291 _LOG.debug("nodeId=%s", node.nodeId)
293 task_def = node.taskDef
295 _LOG.debug("config=%s", task_def.config)
296 _LOG.debug("taskClass=%s", task_def.taskClass)
297 _LOG.debug("taskName=%s", task_def.taskName)
298 _LOG.debug("label=%s", task_def.label)
300 tcnt += 1
302 tnode_name = "%06d" % (node.nodeId.number)
303 self.sci_graph.add_node(
304 tnode_name,
305 node_type=TASKNODE,
306 task_abbrev=task_def.label,
307 qgnode=node,
308 shape="box",
309 fillcolor="gray",
310 # style='"filled,bold"',
311 style="filled",
312 label=".".join(task_def.taskName.split(".")[-2:]),
313 )
314 quantum = node.quantum
316 # Make dataset ref nodes for inputs
317 for ds_ref in itertools.chain.from_iterable(quantum.inputs.values()):
318 ds_name = f"{ds_ref.datasetType.name}+{ds_ref.dataId}"
319 if ds_name not in dsname_to_node_id:
320 dcnt += 1
321 fnode_name = f"ds{dcnt:06}"
322 dsname_to_node_id[ds_name] = fnode_name
323 fnode_label = pretty_dataset_label(ds_name)
324 self.sci_graph.add_node(
325 fnode_name, node_type=FILENODE, label=fnode_label, shape="box", style="rounded"
326 )
327 fnode_name = dsname_to_node_id[ds_name]
328 self.sci_graph.add_edge(fnode_name, tnode_name)
330 # Make dataset ref nodes for outputs
331 for ds_ref in itertools.chain.from_iterable(quantum.outputs.values()):
332 ds_name = f"{ds_ref.datasetType.name}+{ds_ref.dataId}"
333 if ds_name not in dsname_to_node_id:
334 dcnt += 1
335 fnode_name = f"ds{dcnt:06}"
336 dsname_to_node_id[ds_name] = fnode_name
337 fnode_label = pretty_dataset_label(ds_name)
338 self.sci_graph.add_node(
339 fnode_name, node_type=FILENODE, label=fnode_label, shape="box", style="rounded"
340 )
341 fnode_name = dsname_to_node_id[ds_name]
342 self.sci_graph.add_edge(tnode_name, fnode_name)
344 if "pipelineLabels" in self.config:
345 self.pipeline_labels = self.config["pipelineLabels"].split(",")
346 else:
347 self.pipeline_labels = [task.label for task in self.qgraph.iterTaskGraph()]
348 _LOG.info("pipeline_labels = %s", self.pipeline_labels)
350 _LOG.info("Number of sci_graph nodes: tasks=%d files=%d", tcnt, dcnt)
352 def _update_task(self, task_abbrev, tnode, qlfn):
353 """Update task node with workflow info
355 Parameters
356 ----------
357 task_abbrev: `str`
358 Task abbreviation used for config searches
359 tnode: node
360 Task node
361 qlfn: `str`
362 Single quantum logical file name
363 """
364 task_opt = {"curvals": {"curr_pipetask": task_abbrev, "qlfn": qlfn}, "required": True}
365 _, tnode["exec_name"] = self.config.search("runQuantumExec", opt=task_opt)
366 _, tnode["exec_args"] = self.config.search("runQuantumArgs", opt=task_opt)
367 _, compute_site = self.config.search("computeSite", opt=task_opt)
369 task_opt["required"] = False
370 job_profile = {}
371 job_attribs = {}
372 if "profile" in self.config["site"][compute_site]:
373 if "condor" in self.config["site"][compute_site]["profile"]:
374 for key, val in self.config["site"][compute_site]["profile"]["condor"].items():
375 if key.startswith("+"):
376 job_attribs[key[1:]] = val
377 else:
378 job_profile[key] = val
380 found, val = self.config.search("requestMemory", opt=task_opt)
381 if found:
382 job_profile["request_memory"] = val
384 found, val = self.config.search("requestCpus", opt=task_opt)
385 if found:
386 job_profile["request_cpus"] = val
388 if len(job_profile) > 0:
389 tnode["jobProfile"] = job_profile
390 if len(job_attribs) > 0:
391 tnode["jobAttribs"] = job_attribs
393 def _add_workflow_init_nodes(self):
394 """ Add nodes to workflow graph that perform any initialization for the workflow.
396 Assumes that all of the initialization should be executed prior to any of the
397 current workflow.
398 """
399 # Create a workflow graph that will have task and file nodes necessary for
400 # initializing the pipeline execution
401 init_graph = self._create_workflow_init_graph()
402 _LOG.debug("init_graph nodes = %s", init_graph.nodes())
404 # Find source nodes in workflow graph.
405 task_nodes = [n for n, d in self.gen_wf_graph.nodes(data=True) if d["node_type"] == TASKNODE]
406 task_graph = bipartite.projected_graph(self.gen_wf_graph, task_nodes)
407 task_sources = [n for n in task_graph if task_graph.in_degree(n) == 0]
408 _LOG.debug("workflow sources = %s", task_sources)
410 # Find sink nodes of initonly graph.
411 init_sinks = [n for n in init_graph if init_graph.out_degree(n) == 0]
412 _LOG.debug("init sinks = %s", init_sinks)
414 # Add initonly nodes to Workflow graph and make new edges.
415 self.gen_wf_graph.add_nodes_from(init_graph.nodes(data=True))
416 self.gen_wf_graph.add_edges_from(init_graph.edges())
417 for source in task_sources:
418 for sink in init_sinks:
419 self.gen_wf_graph.add_edge(sink, source)
421 def _create_workflow_init_graph(self):
422 """Create workflow subgraph for running initialization job(s).
423 """
424 _LOG.info("creating init subgraph")
425 initgraph = networkx.DiGraph()
427 # create nodes for executing --init-only
428 tnode_name = "pipetask_init"
429 initgraph.add_node(
430 tnode_name,
431 node_type=TASKNODE,
432 task_abbrev=tnode_name,
433 label=tnode_name,
434 job_attrib={"bps_jobabbrev": tnode_name},
435 shape="box",
436 fillcolor="gray",
437 style="filled",
438 )
439 self._update_task(tnode_name, initgraph.nodes[tnode_name], self.qgraph_filename)
441 _LOG.info("creating init task input(s)")
442 fnode_name = basename(self.qgraph_filename)
443 initgraph.add_node(
444 fnode_name,
445 node_type=FILENODE,
446 lfn=fnode_name,
447 label=fnode_name,
448 pfn=self.qgraph_filename,
449 ignore=False,
450 data_type="quantum",
451 shape="box",
452 style="rounded",
453 )
454 initgraph.add_edge(fnode_name, tnode_name)
456 _LOG.info("creating init task output(s)")
457 # All outputs go to Butler. So currently need dummy file node.
458 fnode_name = "pipetask_init_outputs"
459 initgraph.add_node(
460 fnode_name,
461 node_type=FILENODE,
462 lfn=fnode_name,
463 label=fnode_name,
464 ignore=True,
465 data_type="science",
466 shape="box",
467 style="rounded",
468 )
469 initgraph.add_edge(tnode_name, fnode_name)
471 return initgraph
473 def _create_workflow_graph(self, gname):
474 """Create workflow graph from the Science Graph that has information
475 needed for WMS (e.g., filenames, command line arguments, etc)
477 Parameters
478 ----------
479 args :
480 Command line arguments
481 sci_graph : `networkx.DiGraph`
482 Science Graph for the pipeline
483 task_def : `dict`
484 Dictionary of task_def
485 """
487 _LOG.info("creating workflow graph")
488 self.gen_wf_graph = networkx.DiGraph(self.sci_graph, gname=gname, gtype="workflow")
490 ncnt = networkx.number_of_nodes(self.gen_wf_graph)
491 taskcnts = {}
492 qcnt = 0
493 nodelist = list(self.gen_wf_graph.nodes())
494 for nodename in nodelist:
495 node = self.gen_wf_graph.nodes[nodename]
496 if node["node_type"] == FILENODE: # data/file
497 node["lfn"] = nodename
498 node["ignore"] = True
499 node["data_type"] = "science"
500 elif node["node_type"] == TASKNODE: # task
501 task_abbrev = node["task_abbrev"]
502 node["job_attrib"] = {"bps_jobabbrev": task_abbrev}
503 if task_abbrev not in taskcnts:
504 taskcnts[task_abbrev] = 0
505 taskcnts[task_abbrev] += 1
507 # add quantum pickle input data node
508 ncnt += 1
509 qcnt += 1
510 qnode_name = f"qgraph_{nodename}"
511 qlfn = f"quantum_{nodename}_{task_abbrev}.pickle"
512 q_filename = os.path.join(self.submit_path, "input", task_abbrev, qlfn)
513 lfn = basename(q_filename)
514 self.gen_wf_graph.add_node(
515 qnode_name,
516 node_type=FILENODE,
517 lfn=lfn,
518 label=lfn,
519 pfn=q_filename,
520 ignore=False,
521 data_type="quantum",
522 shape="box",
523 style="rounded",
524 )
525 save_qg_subgraph(node["qgnode"], self.qgraph, q_filename)
527 self._update_task(task_abbrev, node, qlfn)
528 self.gen_wf_graph.add_edge(qnode_name, nodename)
529 else:
530 raise ValueError("Invalid node_type (%s)" % node["node_type"])
532 if self.config.get("runInit", "{default: False}"):
533 self._add_workflow_init_nodes()
535 # save pipeline summary description to graph attributes
536 run_summary = []
537 for task_abbrev in [x.strip() for x in self.pipeline_labels]:
538 run_summary.append("%s:%d" % (task_abbrev, taskcnts[task_abbrev]))
539 self.gen_wf_graph.graph["run_attrib"] = {
540 "bps_run_summary": ";".join(run_summary),
541 "bps_isjob": "True",
542 "bps_project": self.config["project"],
543 "bps_campaign": self.config["campaign"],
544 "bps_run": gname,
545 "bps_operator": self.config["operator"],
546 "bps_payload": self.config["payloadName"],
547 "bps_runsite": "TODO",
548 }
550 def _create_generic_workflow(self):
551 """Create generic workflow graph
552 """
553 # first convert LSST-specific graph implementation to networkX graph
554 self._create_science_graph()
555 if self.config.get("saveDot", {"default": False}):
556 draw_networkx_dot(self.sci_graph, os.path.join(self.submit_path, "draw", "bpsgraph_sci.dot"))
558 # Create workflow graph
559 self._create_workflow_graph(self.config["uniqProcName"])
560 if self.config.get("saveWFGraph", {"default": False}):
561 with open(os.path.join(self.submit_path, "wfgraph.pickle"), "wb") as pickle_file:
562 pickle.dump(self.gen_wf_graph, pickle_file)
563 if self.config.get("saveDot", {"default": False}):
564 draw_networkx_dot(self.gen_wf_graph, os.path.join(self.submit_path, "draw", "bpsgraph_wf.dot"))
566 def _create_generic_workflow_config(self):
567 """Create generic workflow configuration
568 """
569 self.gen_wf_config = BpsConfig(self.config)
570 self.gen_wf_config["workflowName"] = self.config["uniqProcName"]
571 self.gen_wf_config["workflowPath"] = self.submit_path
573 def _implement_workflow(self):
574 """Convert workflow to inputs for a particular WMS
575 """
576 # import workflow engine class
577 modparts = self.config[".global.workflowEngineClass"].split(".")
578 fromname = ".".join(modparts[0:-1])
579 importname = modparts[-1]
580 _LOG.info("%s %s", fromname, importname)
581 mod = __import__(fromname, fromlist=[importname])
582 dynclass = getattr(mod, importname)
583 workflow_engine = dynclass(self.gen_wf_config)
584 self.workflow = workflow_engine.implement_workflow(self.gen_wf_graph)
586 def create_submission(self):
587 """Create submission files but don't actually submit
588 """
589 subtime = time.time()
591 found, filename = self.config.search("qgraph_file")
592 if found:
593 _LOG.info("Copying quantum graph (%s)", filename)
594 stime = time.time()
595 self.qgraph_filename = "%s/%s" % (self.submit_path, basename(filename))
596 shutil.copy2(filename, self.qgraph_filename)
597 _LOG.info("Copying quantum graph took %.2f seconds", time.time() - stime)
598 else:
599 _LOG.info("Creating quantum graph")
600 stime = time.time()
601 self._create_quantum_graph()
602 _LOG.info("Creating quantum graph took %.2f seconds", time.time() - stime)
604 _LOG.info("Reading quantum graph (%s)", self.qgraph_filename)
605 stime = time.time()
606 self._read_quantum_graph()
607 _LOG.info("Reading quantum graph with %d nodes took %.2f seconds", len(self.qgraph),
608 time.time() - stime)
610 _LOG.info("Creating Generic Workflow")
611 stime = time.time()
612 self._create_generic_workflow()
613 self._create_generic_workflow_config()
614 _LOG.info("Creating Generic Workflow took %.2f seconds", time.time() - stime)
616 stime = time.time()
617 _LOG.info("Creating specific implementation of workflow")
618 self._implement_workflow()
619 _LOG.info("Creating specific implementation of workflow took %.2f seconds", time.time() - stime)
621 _LOG.info("Total submission creation time = %.2f", time.time() - subtime)
623 def submit(self):
624 """Submit workflow for running
625 """
626 self.workflow.submit()
628 def get_id(self):
629 """Return workflow's run ID
630 """
631 return self.workflow.get_id()