Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 5%

126 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-04 10:19 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Functions that convert QuantumGraph into ClusteredQuantumGraph. 

23""" 

24import logging 

25import re 

26from collections import defaultdict 

27 

28from lsst.pipe.base import NodeId 

29from networkx import DiGraph, is_directed_acyclic_graph 

30 

31from . import ClusteredQuantumGraph, QuantaCluster 

32 

33_LOG = logging.getLogger(__name__) 

34 

35 

36def single_quantum_clustering(config, qgraph, name): 

37 """Create clusters with only single quantum. 

38 

39 Parameters 

40 ---------- 

41 config : `lsst.ctrl.bps.BpsConfig` 

42 BPS configuration. 

43 qgraph : `lsst.pipe.base.QuantumGraph` 

44 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

45 name : `str` 

46 Name to give to ClusteredQuantumGraph. 

47 

48 Returns 

49 ------- 

50 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph` 

51 ClusteredQuantumGraph with single quantum per cluster created from 

52 given QuantumGraph. 

53 """ 

54 cqgraph = ClusteredQuantumGraph( 

55 name=name, 

56 qgraph=qgraph, 

57 qgraph_filename=config[".bps_defined.runQgraphFile"], 

58 butler_uri=config["butlerConfig"], 

59 ) 

60 

61 # Save mapping of quantum nodeNumber to name so don't have to create it 

62 # multiple times. 

63 number_to_name = {} 

64 

65 # Cache template per label for speed. 

66 cached_template = {} 

67 

68 # Create cluster of single quantum. 

69 for qnode in qgraph: 

70 if qnode.taskDef.label not in cached_template: 

71 found, template_data_id = config.search( 

72 "templateDataId", 

73 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False}, 

74 ) 

75 if found: 

76 template = "{node_number}_{label}_" + template_data_id 

77 else: 

78 template = "{node_number}" 

79 cached_template[qnode.taskDef.label] = template 

80 

81 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label]) 

82 

83 # Save mapping for use when creating dependencies. 

84 number_to_name[qnode.nodeId] = cluster.name 

85 

86 cqgraph.add_cluster(cluster) 

87 

88 # Add cluster dependencies. 

89 for qnode in qgraph: 

90 # Get child nodes. 

91 children = qgraph.determineOutputsOfQuantumNode(qnode) 

92 for child in children: 

93 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId]) 

94 

95 return cqgraph 

96 

97 

98def _check_clusters_tasks(cluster_config, taskGraph): 

99 """Check cluster definitions in terms of pipetask lists. 

100 

101 Parameters 

102 ---------- 

103 cluster_config : `lsst.ctrl.bps.BpsConfig` 

104 The cluster section from the BPS configuration. 

105 taskGraph : `lsst.pipe.base.taskGraph` 

106 Directed graph of tasks. 

107 

108 Returns 

109 ------- 

110 task_labels : `set` [`str`] 

111 Set of task labels from the cluster definitions. 

112 

113 Raises 

114 ------- 

115 RuntimeError 

116 Raised if task label appears in more than one cluster def or 

117 if there's a cycle in the cluster defs. 

118 """ 

119 

120 # Build a "clustered" task graph to check for cycle. 

121 task_to_cluster = {} 

122 task_labels = set() 

123 clustered_task_graph = DiGraph() 

124 

125 # Create clusters based on given configuration. 

126 for cluster_label in cluster_config: 

127 _LOG.debug("cluster = %s", cluster_label) 

128 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

129 for task_label in cluster_tasks: 

130 if task_label in task_labels: 

131 raise RuntimeError( 

132 f"Task label {task_label} appears in more than one cluster definition. " 

133 "Aborting submission." 

134 ) 

135 task_labels.add(task_label) 

136 task_to_cluster[task_label] = cluster_label 

137 clustered_task_graph.add_node(cluster_label) 

138 

139 # Create clusters for tasks not covered by clusters. 

140 for task in taskGraph: 

141 if task.label not in task_labels: 

142 task_to_cluster[task.label] = task.label 

143 clustered_task_graph.add_node(task.label) 

144 

145 # Create dependencies between clusters. 

146 for edge in taskGraph.edges: 

147 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]: 

148 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label]) 

149 

150 _LOG.debug("clustered_task_graph.edges = %s", [e for e in clustered_task_graph.edges]) 

151 

152 if not is_directed_acyclic_graph(clustered_task_graph): 

153 raise RuntimeError("Cluster pipetasks do not create a DAG") 

154 

155 return task_labels 

156 

157 

158def dimension_clustering(config, qgraph, name): 

159 """Follow config instructions to make clusters based upon dimensions. 

160 

161 Parameters 

162 ---------- 

163 config : `lsst.ctrl.bps.BpsConfig` 

164 BPS configuration. 

165 qgraph : `lsst.pipe.base.QuantumGraph` 

166 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

167 name : `str` 

168 Name to give to ClusteredQuantumGraph. 

169 

170 Returns 

171 ------- 

172 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

173 ClusteredQuantumGraph with clustering as defined in config. 

174 """ 

175 cqgraph = ClusteredQuantumGraph( 

176 name=name, 

177 qgraph=qgraph, 

178 qgraph_filename=config[".bps_defined.runQgraphFile"], 

179 butler_uri=config["butlerConfig"], 

180 ) 

181 

182 # save mapping in order to create dependencies later 

183 quantum_to_cluster = {} 

184 

185 cluster_config = config["cluster"] 

186 task_labels = _check_clusters_tasks(cluster_config, qgraph.taskGraph) 

187 for cluster_label in cluster_config: 

188 _LOG.debug("cluster = %s", cluster_label) 

189 cluster_dims = [] 

190 if "dimensions" in cluster_config[cluster_label]: 

191 cluster_dims = [d.strip() for d in cluster_config[cluster_label]["dimensions"].split(",")] 

192 _LOG.debug("cluster_dims = %s", cluster_dims) 

193 

194 found, template = cluster_config[cluster_label].search("clusterTemplate", opt={"replaceVars": False}) 

195 if not found: 

196 if cluster_dims: 

197 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims) 

198 else: 

199 template = cluster_label 

200 _LOG.debug("template = %s", template) 

201 

202 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

203 for task_label in cluster_tasks: 

204 task_labels.add(task_label) 

205 

206 # Currently getQuantaForTask is currently a mapping taskDef to 

207 # Quanta, so quick enough to call repeatedly. 

208 task_def = qgraph.findTaskDefByLabel(task_label) 

209 if task_def is None: 

210 continue 

211 quantum_nodes = qgraph.getNodesForTask(task_def) 

212 

213 equal_dims = cluster_config[cluster_label].get("equalDimensions", None) 

214 

215 # Determine cluster for each node 

216 for qnode in quantum_nodes: 

217 # Gather info for cluster name template into a dictionary. 

218 info = {} 

219 

220 missing_info = set() 

221 data_id_info = qnode.quantum.dataId.byName() 

222 for dim_name in cluster_dims: 

223 _LOG.debug("dim_name = %s", dim_name) 

224 if dim_name in data_id_info: 

225 info[dim_name] = data_id_info[dim_name] 

226 else: 

227 missing_info.add(dim_name) 

228 if equal_dims: 

229 for pair in [pt.strip() for pt in equal_dims.split(",")]: 

230 dim1, dim2 = pair.strip().split(":") 

231 if dim1 in cluster_dims and dim2 in data_id_info: 

232 info[dim1] = data_id_info[dim2] 

233 missing_info.remove(dim1) 

234 elif dim2 in cluster_dims and dim1 in data_id_info: 

235 info[dim2] = data_id_info[dim1] 

236 missing_info.remove(dim2) 

237 

238 info["label"] = cluster_label 

239 _LOG.debug("info for template = %s", info) 

240 

241 if missing_info: 

242 raise RuntimeError( 

243 "Quantum %s (%s) missing dimensions %s required for cluster %s" 

244 % (qnode.nodeId, data_id_info, ",".join(missing_info), cluster_label) 

245 ) 

246 

247 # Use dictionary plus template format string to create name. 

248 # To avoid # key errors from generic patterns, use defaultdict. 

249 cluster_name = template.format_map(defaultdict(lambda: "", info)) 

250 cluster_name = re.sub("_+", "_", cluster_name) 

251 

252 # Some dimensions contain slash which must be replaced. 

253 cluster_name = re.sub("/", "_", cluster_name) 

254 _LOG.debug("cluster_name = %s", cluster_name) 

255 

256 # Save mapping for use when creating dependencies. 

257 quantum_to_cluster[qnode.nodeId] = cluster_name 

258 

259 # Add cluster to the ClusteredQuantumGraph. 

260 # Saving NodeId instead of number because QuantumGraph API 

261 # requires it for creating per-job QuantumGraphs. 

262 if cluster_name in cqgraph: 

263 cluster = cqgraph.get_cluster(cluster_name) 

264 else: 

265 cluster = QuantaCluster(cluster_name, cluster_label, info) 

266 cqgraph.add_cluster(cluster) 

267 cluster.add_quantum(qnode.nodeId, task_label) 

268 

269 # Assume any task not handled above is supposed to be 1 cluster = 1 quantum 

270 for task_def in qgraph.iterTaskGraph(): 

271 if task_def.label not in task_labels: 

272 _LOG.info("Creating 1-quantum clusters for task %s", task_def.label) 

273 found, template_data_id = config.search( 

274 "templateDataId", opt={"curvals": {"curr_pipetask": task_def.label}, "replaceVars": False} 

275 ) 

276 if found: 

277 template = "{node_number}_{label}_" + template_data_id 

278 else: 

279 template = "{node_number}" 

280 

281 for qnode in qgraph.getNodesForTask(task_def): 

282 cluster = QuantaCluster.from_quantum_node(qnode, template) 

283 cqgraph.add_cluster(cluster) 

284 quantum_to_cluster[qnode.nodeId] = cluster.name 

285 

286 # Add cluster dependencies. 

287 for parent in qgraph: 

288 # Get child nodes. 

289 children = qgraph.determineOutputsOfQuantumNode(parent) 

290 for child in children: 

291 try: 

292 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[child.nodeId]: 

293 cqgraph.add_dependency( 

294 quantum_to_cluster[parent.nodeId], quantum_to_cluster[child.nodeId] 

295 ) 

296 except KeyError as e: # pragma: no cover 

297 # For debugging a problem internal to method 

298 nid = NodeId(e.args[0], qgraph.graphID) 

299 qnode = qgraph.getQuantumNodeByNodeId(nid) 

300 

301 print( 

302 f"Quanta missing when clustering: {qnode.taskDef.label}, {qnode.quantum.dataId.byName()}" 

303 ) 

304 raise 

305 

306 return cqgraph