Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 5%

126 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-01 02:12 -0700

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Functions that convert QuantumGraph into ClusteredQuantumGraph. 

23""" 

24import logging 

25import re 

26from collections import defaultdict 

27 

28from lsst.pipe.base import NodeId 

29from networkx import DiGraph, is_directed_acyclic_graph 

30 

31from . import ClusteredQuantumGraph, QuantaCluster 

32 

33_LOG = logging.getLogger(__name__) 

34 

35 

36def single_quantum_clustering(config, qgraph, name): 

37 """Create clusters with only single quantum. 

38 

39 Parameters 

40 ---------- 

41 config : `lsst.ctrl.bps.BpsConfig` 

42 BPS configuration. 

43 qgraph : `lsst.pipe.base.QuantumGraph` 

44 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

45 name : `str` 

46 Name to give to ClusteredQuantumGraph. 

47 

48 Returns 

49 ------- 

50 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph` 

51 ClusteredQuantumGraph with single quantum per cluster created from 

52 given QuantumGraph. 

53 """ 

54 cqgraph = ClusteredQuantumGraph( 

55 name=name, 

56 qgraph=qgraph, 

57 qgraph_filename=config[".bps_defined.runQgraphFile"], 

58 ) 

59 

60 # Save mapping of quantum nodeNumber to name so don't have to create it 

61 # multiple times. 

62 number_to_name = {} 

63 

64 # Cache template per label for speed. 

65 cached_template = {} 

66 

67 # Create cluster of single quantum. 

68 for qnode in qgraph: 

69 if qnode.taskDef.label not in cached_template: 

70 found, template_data_id = config.search( 

71 "templateDataId", 

72 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False}, 

73 ) 

74 if found: 

75 template = "{node_number}_{label}_" + template_data_id 

76 else: 

77 template = "{node_number}" 

78 cached_template[qnode.taskDef.label] = template 

79 

80 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label]) 

81 

82 # Save mapping for use when creating dependencies. 

83 number_to_name[qnode.nodeId] = cluster.name 

84 

85 cqgraph.add_cluster(cluster) 

86 

87 # Add cluster dependencies. 

88 for qnode in qgraph: 

89 # Get child nodes. 

90 children = qgraph.determineOutputsOfQuantumNode(qnode) 

91 for child in children: 

92 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId]) 

93 

94 return cqgraph 

95 

96 

97def _check_clusters_tasks(cluster_config, taskGraph): 

98 """Check cluster definitions in terms of pipetask lists. 

99 

100 Parameters 

101 ---------- 

102 cluster_config : `lsst.ctrl.bps.BpsConfig` 

103 The cluster section from the BPS configuration. 

104 taskGraph : `lsst.pipe.base.taskGraph` 

105 Directed graph of tasks. 

106 

107 Returns 

108 ------- 

109 task_labels : `set` [`str`] 

110 Set of task labels from the cluster definitions. 

111 

112 Raises 

113 ------- 

114 RuntimeError 

115 Raised if task label appears in more than one cluster def or 

116 if there's a cycle in the cluster defs. 

117 """ 

118 

119 # Build a "clustered" task graph to check for cycle. 

120 task_to_cluster = {} 

121 task_labels = set() 

122 clustered_task_graph = DiGraph() 

123 

124 # Create clusters based on given configuration. 

125 for cluster_label in cluster_config: 

126 _LOG.debug("cluster = %s", cluster_label) 

127 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

128 for task_label in cluster_tasks: 

129 if task_label in task_labels: 

130 raise RuntimeError( 

131 f"Task label {task_label} appears in more than one cluster definition. " 

132 "Aborting submission." 

133 ) 

134 task_labels.add(task_label) 

135 task_to_cluster[task_label] = cluster_label 

136 clustered_task_graph.add_node(cluster_label) 

137 

138 # Create clusters for tasks not covered by clusters. 

139 for task in taskGraph: 

140 if task.label not in task_labels: 

141 task_to_cluster[task.label] = task.label 

142 clustered_task_graph.add_node(task.label) 

143 

144 # Create dependencies between clusters. 

145 for edge in taskGraph.edges: 

146 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]: 

147 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label]) 

148 

149 _LOG.debug("clustered_task_graph.edges = %s", [e for e in clustered_task_graph.edges]) 

150 

151 if not is_directed_acyclic_graph(clustered_task_graph): 

152 raise RuntimeError("Cluster pipetasks do not create a DAG") 

153 

154 return task_labels 

155 

156 

157def dimension_clustering(config, qgraph, name): 

158 """Follow config instructions to make clusters based upon dimensions. 

159 

160 Parameters 

161 ---------- 

162 config : `lsst.ctrl.bps.BpsConfig` 

163 BPS configuration. 

164 qgraph : `lsst.pipe.base.QuantumGraph` 

165 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

166 name : `str` 

167 Name to give to ClusteredQuantumGraph. 

168 

169 Returns 

170 ------- 

171 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

172 ClusteredQuantumGraph with clustering as defined in config. 

173 """ 

174 cqgraph = ClusteredQuantumGraph( 

175 name=name, 

176 qgraph=qgraph, 

177 qgraph_filename=config[".bps_defined.runQgraphFile"], 

178 ) 

179 

180 # save mapping in order to create dependencies later 

181 quantum_to_cluster = {} 

182 

183 cluster_config = config["cluster"] 

184 task_labels = _check_clusters_tasks(cluster_config, qgraph.taskGraph) 

185 for cluster_label in cluster_config: 

186 _LOG.debug("cluster = %s", cluster_label) 

187 cluster_dims = [] 

188 if "dimensions" in cluster_config[cluster_label]: 

189 cluster_dims = [d.strip() for d in cluster_config[cluster_label]["dimensions"].split(",")] 

190 _LOG.debug("cluster_dims = %s", cluster_dims) 

191 

192 found, template = cluster_config[cluster_label].search("clusterTemplate", opt={"replaceVars": False}) 

193 if not found: 

194 if cluster_dims: 

195 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims) 

196 else: 

197 template = cluster_label 

198 _LOG.debug("template = %s", template) 

199 

200 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

201 for task_label in cluster_tasks: 

202 task_labels.add(task_label) 

203 

204 # Currently getQuantaForTask is currently a mapping taskDef to 

205 # Quanta, so quick enough to call repeatedly. 

206 task_def = qgraph.findTaskDefByLabel(task_label) 

207 if task_def is None: 

208 continue 

209 quantum_nodes = qgraph.getNodesForTask(task_def) 

210 

211 equal_dims = cluster_config[cluster_label].get("equalDimensions", None) 

212 

213 # Determine cluster for each node 

214 for qnode in quantum_nodes: 

215 # Gather info for cluster name template into a dictionary. 

216 info = {} 

217 

218 missing_info = set() 

219 data_id_info = qnode.quantum.dataId.byName() 

220 for dim_name in cluster_dims: 

221 _LOG.debug("dim_name = %s", dim_name) 

222 if dim_name in data_id_info: 

223 info[dim_name] = data_id_info[dim_name] 

224 else: 

225 missing_info.add(dim_name) 

226 if equal_dims: 

227 for pair in [pt.strip() for pt in equal_dims.split(",")]: 

228 dim1, dim2 = pair.strip().split(":") 

229 if dim1 in cluster_dims and dim2 in data_id_info: 

230 info[dim1] = data_id_info[dim2] 

231 missing_info.remove(dim1) 

232 elif dim2 in cluster_dims and dim1 in data_id_info: 

233 info[dim2] = data_id_info[dim1] 

234 missing_info.remove(dim2) 

235 

236 info["label"] = cluster_label 

237 _LOG.debug("info for template = %s", info) 

238 

239 if missing_info: 

240 raise RuntimeError( 

241 "Quantum %s (%s) missing dimensions %s required for cluster %s" 

242 % (qnode.nodeId, data_id_info, ",".join(missing_info), cluster_label) 

243 ) 

244 

245 # Use dictionary plus template format string to create name. 

246 # To avoid # key errors from generic patterns, use defaultdict. 

247 cluster_name = template.format_map(defaultdict(lambda: "", info)) 

248 cluster_name = re.sub("_+", "_", cluster_name) 

249 

250 # Some dimensions contain slash which must be replaced. 

251 cluster_name = re.sub("/", "_", cluster_name) 

252 _LOG.debug("cluster_name = %s", cluster_name) 

253 

254 # Save mapping for use when creating dependencies. 

255 quantum_to_cluster[qnode.nodeId] = cluster_name 

256 

257 # Add cluster to the ClusteredQuantumGraph. 

258 # Saving NodeId instead of number because QuantumGraph API 

259 # requires it for creating per-job QuantumGraphs. 

260 if cluster_name in cqgraph: 

261 cluster = cqgraph.get_cluster(cluster_name) 

262 else: 

263 cluster = QuantaCluster(cluster_name, cluster_label, info) 

264 cqgraph.add_cluster(cluster) 

265 cluster.add_quantum(qnode.nodeId, task_label) 

266 

267 # Assume any task not handled above is supposed to be 1 cluster = 1 quantum 

268 for task_def in qgraph.iterTaskGraph(): 

269 if task_def.label not in task_labels: 

270 _LOG.info("Creating 1-quantum clusters for task %s", task_def.label) 

271 found, template_data_id = config.search( 

272 "templateDataId", opt={"curvals": {"curr_pipetask": task_def.label}, "replaceVars": False} 

273 ) 

274 if found: 

275 template = "{node_number}_{label}_" + template_data_id 

276 else: 

277 template = "{node_number}" 

278 

279 for qnode in qgraph.getNodesForTask(task_def): 

280 cluster = QuantaCluster.from_quantum_node(qnode, template) 

281 cqgraph.add_cluster(cluster) 

282 quantum_to_cluster[qnode.nodeId] = cluster.name 

283 

284 # Add cluster dependencies. 

285 for parent in qgraph: 

286 # Get child nodes. 

287 children = qgraph.determineOutputsOfQuantumNode(parent) 

288 for child in children: 

289 try: 

290 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[child.nodeId]: 

291 cqgraph.add_dependency( 

292 quantum_to_cluster[parent.nodeId], quantum_to_cluster[child.nodeId] 

293 ) 

294 except KeyError as e: # pragma: no cover 

295 # For debugging a problem internal to method 

296 nid = NodeId(e.args[0], qgraph.graphID) 

297 qnode = qgraph.getQuantumNodeByNodeId(nid) 

298 

299 print( 

300 f"Quanta missing when clustering: {qnode.taskDef.label}, {qnode.quantum.dataId.byName()}" 

301 ) 

302 raise 

303 

304 return cqgraph