Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%

148 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-14 02:22 -0700

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Functions that convert QuantumGraph into ClusteredQuantumGraph. 

23""" 

24import logging 

25import re 

26from collections import defaultdict 

27 

28from networkx import DiGraph, is_directed_acyclic_graph, topological_sort 

29 

30from . import ClusteredQuantumGraph, QuantaCluster 

31 

32_LOG = logging.getLogger(__name__) 

33 

34 

35def single_quantum_clustering(config, qgraph, name): 

36 """Create clusters with only single quantum. 

37 

38 Parameters 

39 ---------- 

40 config : `lsst.ctrl.bps.BpsConfig` 

41 BPS configuration. 

42 qgraph : `lsst.pipe.base.QuantumGraph` 

43 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

44 name : `str` 

45 Name to give to ClusteredQuantumGraph. 

46 

47 Returns 

48 ------- 

49 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph` 

50 ClusteredQuantumGraph with single quantum per cluster created from 

51 given QuantumGraph. 

52 """ 

53 cqgraph = ClusteredQuantumGraph( 

54 name=name, 

55 qgraph=qgraph, 

56 qgraph_filename=config[".bps_defined.runQgraphFile"], 

57 ) 

58 

59 # Save mapping of quantum nodeNumber to name so don't have to create it 

60 # multiple times. 

61 number_to_name = {} 

62 

63 # Cache template per label for speed. 

64 cached_template = {} 

65 

66 # Create cluster of single quantum. 

67 for qnode in qgraph: 

68 if qnode.taskDef.label not in cached_template: 

69 found, template_data_id = config.search( 

70 "templateDataId", 

71 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False}, 

72 ) 

73 if found: 

74 template = "{node_number}_{label}_" + template_data_id 

75 else: 

76 template = "{node_number}" 

77 cached_template[qnode.taskDef.label] = template 

78 

79 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label]) 

80 

81 # Save mapping for use when creating dependencies. 

82 number_to_name[qnode.nodeId] = cluster.name 

83 

84 cqgraph.add_cluster(cluster) 

85 

86 # Add cluster dependencies. 

87 for qnode in qgraph: 

88 # Get child nodes. 

89 children = qgraph.determineOutputsOfQuantumNode(qnode) 

90 for child in children: 

91 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId]) 

92 

93 return cqgraph 

94 

95 

96def _check_clusters_tasks(cluster_config, task_graph): 

97 """Check cluster definitions in terms of pipetask lists. 

98 

99 Parameters 

100 ---------- 

101 cluster_config : `lsst.ctrl.bps.BpsConfig` 

102 The cluster section from the BPS configuration. 

103 task_graph : `lsst.pipe.base.taskGraph` 

104 Directed graph of tasks. 

105 

106 Returns 

107 ------- 

108 cluster_labels: `list` [`str`] 

109 Dependency ordered list of cluster labels (includes 

110 single quantum clusters). 

111 ordered_tasks : `dict` [`str`, `list` [`str`]] 

112 Mapping of cluster label to ordered list of task labels. 

113 

114 Raises 

115 ------- 

116 RuntimeError 

117 Raised if task label appears in more than one cluster def or 

118 if there's a cycle in the cluster defs. 

119 """ 

120 

121 # Build a PipelineTask graph of just labels because TaskGraph 

122 # methods revolve around TaskDefs instead of labels. 

123 label_graph = DiGraph() 

124 for tdef in task_graph: 

125 label_graph.add_node(tdef.label) 

126 for parent in task_graph.predecessors(tdef): 

127 label_graph.add_edge(parent.label, tdef.label) 

128 

129 # Build a "clustered" task graph to check for cycle. 

130 task_to_cluster = {} 

131 used_labels = set() 

132 clustered_task_graph = DiGraph() 

133 ordered_tasks = {} # cluster label to ordered list of task labels 

134 

135 # Create clusters based on given configuration. 

136 for cluster_label in cluster_config: 

137 _LOG.debug("cluster = %s", cluster_label) 

138 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

139 cluster_tasks_in_qgraph = [] 

140 for task_label in cluster_tasks: 

141 if task_label in used_labels: 

142 raise RuntimeError( 

143 f"Task label {task_label} appears in more than one cluster definition. " 

144 "Aborting submission." 

145 ) 

146 # Only check cluster defs that affect the QuantumGraph 

147 if label_graph.has_node(task_label): 

148 cluster_tasks_in_qgraph.append(task_label) 

149 used_labels.add(task_label) 

150 task_to_cluster[task_label] = cluster_label 

151 

152 if cluster_tasks_in_qgraph: 

153 # Ensure have list of tasks in dependency order. 

154 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph) 

155 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph)) 

156 

157 clustered_task_graph.add_node(cluster_label) 

158 

159 # Create single task clusters for tasks not covered by clusters. 

160 for label in label_graph: 

161 if label not in used_labels: 

162 task_to_cluster[label] = label 

163 clustered_task_graph.add_node(label) 

164 ordered_tasks[label] = [label] 

165 

166 # Create dependencies between clusters. 

167 for edge in task_graph.edges: 

168 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]: 

169 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label]) 

170 

171 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges)) 

172 

173 if not is_directed_acyclic_graph(clustered_task_graph): 

174 raise RuntimeError("Cluster pipetasks do not create a DAG") 

175 

176 return list(topological_sort(clustered_task_graph)), ordered_tasks 

177 

178 

179def dimension_clustering(config, qgraph, name): 

180 """Follow config instructions to make clusters based upon dimensions. 

181 

182 Parameters 

183 ---------- 

184 config : `lsst.ctrl.bps.BpsConfig` 

185 BPS configuration. 

186 qgraph : `lsst.pipe.base.QuantumGraph` 

187 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

188 name : `str` 

189 Name to give to ClusteredQuantumGraph. 

190 

191 Returns 

192 ------- 

193 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

194 ClusteredQuantumGraph with clustering as defined in config. 

195 """ 

196 cqgraph = ClusteredQuantumGraph( 

197 name=name, 

198 qgraph=qgraph, 

199 qgraph_filename=config[".bps_defined.runQgraphFile"], 

200 ) 

201 

202 # save mapping in order to create dependencies later 

203 quantum_to_cluster = {} 

204 

205 cluster_section = config["cluster"] 

206 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph) 

207 for cluster_label in cluster_labels: 

208 _LOG.debug("cluster = %s", cluster_label) 

209 if cluster_label in cluster_section: 

210 add_dim_clusters( 

211 cluster_section[cluster_label], 

212 cluster_label, 

213 qgraph, 

214 ordered_tasks, 

215 cqgraph, 

216 quantum_to_cluster, 

217 ) 

218 else: 

219 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster) 

220 

221 return cqgraph 

222 

223 

224def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster): 

225 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph. 

226 

227 Parameters 

228 ---------- 

229 config : `lsst.ctrl.bps.BpsConfig` 

230 BPS configuration. 

231 label : `str` 

232 taskDef label for which to add clusters. 

233 qgraph : `lsst.pipe.base.QuantumGraph` 

234 QuantumGraph providing quanta for the clusters. 

235 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

236 The ClusteredQuantumGraph to which the new 1-quantum 

237 clusters are added. (modified in method) 

238 quantum_to_cluster : `dict` [ `str`, `str` ] 

239 Mapping of quantum node id to which cluster it was added. 

240 (modified in method) 

241 """ 

242 _LOG.info("Creating 1-quantum clusters for task %s", label) 

243 found, template_data_id = config.search( 

244 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False} 

245 ) 

246 if found: 

247 template = "{node_number}_{label}_" + template_data_id 

248 else: 

249 template = "{node_number}" 

250 

251 # Currently getQuantaForTask is currently a mapping taskDef to 

252 # Quanta, so quick enough to call repeatedly. 

253 task_def = qgraph.findTaskDefByLabel(label) 

254 quantum_nodes = qgraph.getNodesForTask(task_def) 

255 

256 for qnode in quantum_nodes: 

257 cluster = QuantaCluster.from_quantum_node(qnode, template) 

258 cqgraph.add_cluster(cluster) 

259 quantum_to_cluster[qnode.nodeId] = cluster.name 

260 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

261 

262 

263def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster): 

264 """Add clusters for a cluster label to a ClusteredQuantumGraph. 

265 

266 Parameters 

267 ---------- 

268 cluster_config : `lsst.ctrl.bps.BpsConfig` 

269 BPS configuration for specific cluster label. 

270 cluster_label : `str` 

271 Cluster label for which to add clusters. 

272 qgraph : `lsst.pipe.base.QuantumGraph` 

273 QuantumGraph providing quanta for the clusters. 

274 ordered_tasks : `dict` [`str`, `list` [`str`]] 

275 Mapping of cluster label to ordered list of task labels. 

276 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

277 The ClusteredQuantumGraph to which the new 1-quantum 

278 clusters are added. (modified in method) 

279 quantum_to_cluster : `dict` [ `str`, `str` ] 

280 Mapping of quantum node id to which cluster it was added. 

281 (modified in method) 

282 """ 

283 cluster_dims = [] 

284 if "dimensions" in cluster_config: 

285 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")] 

286 _LOG.debug("cluster_dims = %s", cluster_dims) 

287 

288 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False}) 

289 if not found: 

290 if cluster_dims: 

291 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims) 

292 else: 

293 template = cluster_label 

294 _LOG.debug("template = %s", template) 

295 

296 new_clusters = [] 

297 for task_label in ordered_tasks[cluster_label]: 

298 # Currently getQuantaForTask is currently a mapping taskDef to 

299 # Quanta, so quick enough to call repeatedly. 

300 task_def = qgraph.findTaskDefByLabel(task_label) 

301 if task_def is None: 

302 continue 

303 quantum_nodes = qgraph.getNodesForTask(task_def) 

304 

305 equal_dims = cluster_config.get("equalDimensions", None) 

306 

307 # Determine cluster for each node 

308 for qnode in quantum_nodes: 

309 # Gather info for cluster name template into a dictionary. 

310 info = {} 

311 

312 missing_info = set() 

313 data_id_info = qnode.quantum.dataId.byName() 

314 for dim_name in cluster_dims: 

315 _LOG.debug("dim_name = %s", dim_name) 

316 if dim_name in data_id_info: 

317 info[dim_name] = data_id_info[dim_name] 

318 else: 

319 missing_info.add(dim_name) 

320 if equal_dims: 

321 for pair in [pt.strip() for pt in equal_dims.split(",")]: 

322 dim1, dim2 = pair.strip().split(":") 

323 if dim1 in cluster_dims and dim2 in data_id_info: 

324 info[dim1] = data_id_info[dim2] 

325 missing_info.remove(dim1) 

326 elif dim2 in cluster_dims and dim1 in data_id_info: 

327 info[dim2] = data_id_info[dim1] 

328 missing_info.remove(dim2) 

329 

330 info["label"] = cluster_label 

331 _LOG.debug("info for template = %s", info) 

332 

333 if missing_info: 

334 raise RuntimeError( 

335 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions {','.join(missing_info)}" 

336 "required for cluster {cluster_label}" 

337 ) 

338 

339 # Use dictionary plus template format string to create name. 

340 # To avoid # key errors from generic patterns, use defaultdict. 

341 cluster_name = template.format_map(defaultdict(lambda: "", info)) 

342 cluster_name = re.sub("_+", "_", cluster_name) 

343 

344 # Some dimensions contain slash which must be replaced. 

345 cluster_name = re.sub("/", "_", cluster_name) 

346 _LOG.debug("cluster_name = %s", cluster_name) 

347 

348 # Save mapping for use when creating dependencies. 

349 quantum_to_cluster[qnode.nodeId] = cluster_name 

350 

351 # Add cluster to the ClusteredQuantumGraph. 

352 # Saving NodeId instead of number because QuantumGraph API 

353 # requires it for creating per-job QuantumGraphs. 

354 if cluster_name in cqgraph: 

355 cluster = cqgraph.get_cluster(cluster_name) 

356 else: 

357 cluster = QuantaCluster(cluster_name, cluster_label, info) 

358 cqgraph.add_cluster(cluster) 

359 cluster.add_quantum(qnode.nodeId, task_label) 

360 new_clusters.append(cluster) 

361 

362 for cluster in new_clusters: 

363 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

364 

365 

366def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster): 

367 """Add dependencies for a cluster within a ClusteredQuantumGraph. 

368 

369 Parameters 

370 ---------- 

371 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

372 The ClusteredQuantumGraph to which the new 1-quantum 

373 clusters are added. (modified in method) 

374 cluster : `lsst.ctrl.bps.QuantaCluster` 

375 The cluster for which to add dependencies. 

376 quantum_to_cluster : `dict` [ `str`, `str` ] 

377 Mapping of quantum node id to which cluster it was added. 

378 (modified in method) 

379 

380 Raises 

381 ------ 

382 KeyError : 

383 Raised if any of the cluster's quantum node ids are missing 

384 from quantum_to_cluster or if their parent quantum node ids 

385 are missing from quantum_to_cluster. 

386 """ 

387 qgraph = cqgraph.qgraph 

388 for node_id in cluster.qgraph_node_ids: 

389 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id)) 

390 for parent in parents: 

391 try: 

392 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]: 

393 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id]) 

394 except KeyError as e: # pragma: no cover 

395 # For debugging a problem internal to method 

396 qnode = qgraph.getQuantumNodeByNodeId(e.args[0]) 

397 _LOG.error( 

398 "Quanta missing when clustering: %s, %s", 

399 qnode.taskDef.label, 

400 qnode.quantum.dataId.byName(), 

401 ) 

402 raise