Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%

148 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-05 09:22 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Functions that convert QuantumGraph into ClusteredQuantumGraph. 

23""" 

24import logging 

25import re 

26from collections import defaultdict 

27 

28from networkx import DiGraph, is_directed_acyclic_graph, topological_sort 

29 

30from . import ClusteredQuantumGraph, QuantaCluster 

31 

32_LOG = logging.getLogger(__name__) 

33 

34 

35def single_quantum_clustering(config, qgraph, name): 

36 """Create clusters with only single quantum. 

37 

38 Parameters 

39 ---------- 

40 config : `lsst.ctrl.bps.BpsConfig` 

41 BPS configuration. 

42 qgraph : `lsst.pipe.base.QuantumGraph` 

43 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

44 name : `str` 

45 Name to give to ClusteredQuantumGraph. 

46 

47 Returns 

48 ------- 

49 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph` 

50 ClusteredQuantumGraph with single quantum per cluster created from 

51 given QuantumGraph. 

52 """ 

53 cqgraph = ClusteredQuantumGraph( 

54 name=name, 

55 qgraph=qgraph, 

56 qgraph_filename=config[".bps_defined.runQgraphFile"], 

57 ) 

58 

59 # Save mapping of quantum nodeNumber to name so don't have to create it 

60 # multiple times. 

61 number_to_name = {} 

62 

63 # Cache template per label for speed. 

64 cached_template = {} 

65 

66 # Create cluster of single quantum. 

67 for qnode in qgraph: 

68 if qnode.taskDef.label not in cached_template: 

69 found, template_data_id = config.search( 

70 "templateDataId", 

71 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False}, 

72 ) 

73 if found: 

74 template = "{node_number}_{label}_" + template_data_id 

75 else: 

76 template = "{node_number}" 

77 cached_template[qnode.taskDef.label] = template 

78 

79 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label]) 

80 

81 # Save mapping for use when creating dependencies. 

82 number_to_name[qnode.nodeId] = cluster.name 

83 

84 cqgraph.add_cluster(cluster) 

85 

86 # Add cluster dependencies. 

87 for qnode in qgraph: 

88 # Get child nodes. 

89 children = qgraph.determineOutputsOfQuantumNode(qnode) 

90 for child in children: 

91 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId]) 

92 

93 return cqgraph 

94 

95 

96def _check_clusters_tasks(cluster_config, task_graph): 

97 """Check cluster definitions in terms of pipetask lists. 

98 

99 Parameters 

100 ---------- 

101 cluster_config : `lsst.ctrl.bps.BpsConfig` 

102 The cluster section from the BPS configuration. 

103 task_graph : `lsst.pipe.base.taskGraph` 

104 Directed graph of tasks. 

105 

106 Returns 

107 ------- 

108 cluster_labels: `list` [`str`] 

109 Dependency ordered list of cluster labels (includes 

110 single quantum clusters). 

111 ordered_tasks : `dict` [`str`, `list` [`str`]] 

112 Mapping of cluster label to ordered list of task labels. 

113 

114 Raises 

115 ------ 

116 RuntimeError 

117 Raised if task label appears in more than one cluster def or 

118 if there's a cycle in the cluster defs. 

119 """ 

120 # Build a PipelineTask graph of just labels because TaskGraph 

121 # methods revolve around TaskDefs instead of labels. 

122 label_graph = DiGraph() 

123 for tdef in task_graph: 

124 label_graph.add_node(tdef.label) 

125 for parent in task_graph.predecessors(tdef): 

126 label_graph.add_edge(parent.label, tdef.label) 

127 

128 # Build a "clustered" task graph to check for cycle. 

129 task_to_cluster = {} 

130 used_labels = set() 

131 clustered_task_graph = DiGraph() 

132 ordered_tasks = {} # cluster label to ordered list of task labels 

133 

134 # Create clusters based on given configuration. 

135 for cluster_label in cluster_config: 

136 _LOG.debug("cluster = %s", cluster_label) 

137 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

138 cluster_tasks_in_qgraph = [] 

139 for task_label in cluster_tasks: 

140 if task_label in used_labels: 

141 raise RuntimeError( 

142 f"Task label {task_label} appears in more than one cluster definition. " 

143 "Aborting submission." 

144 ) 

145 # Only check cluster defs that affect the QuantumGraph 

146 if label_graph.has_node(task_label): 

147 cluster_tasks_in_qgraph.append(task_label) 

148 used_labels.add(task_label) 

149 task_to_cluster[task_label] = cluster_label 

150 

151 if cluster_tasks_in_qgraph: 

152 # Ensure have list of tasks in dependency order. 

153 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph) 

154 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph)) 

155 

156 clustered_task_graph.add_node(cluster_label) 

157 

158 # Create single task clusters for tasks not covered by clusters. 

159 for label in label_graph: 

160 if label not in used_labels: 

161 task_to_cluster[label] = label 

162 clustered_task_graph.add_node(label) 

163 ordered_tasks[label] = [label] 

164 

165 # Create dependencies between clusters. 

166 for edge in task_graph.edges: 

167 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]: 

168 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label]) 

169 

170 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges)) 

171 

172 if not is_directed_acyclic_graph(clustered_task_graph): 

173 raise RuntimeError("Cluster pipetasks do not create a DAG") 

174 

175 return list(topological_sort(clustered_task_graph)), ordered_tasks 

176 

177 

178def dimension_clustering(config, qgraph, name): 

179 """Follow config instructions to make clusters based upon dimensions. 

180 

181 Parameters 

182 ---------- 

183 config : `lsst.ctrl.bps.BpsConfig` 

184 BPS configuration. 

185 qgraph : `lsst.pipe.base.QuantumGraph` 

186 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

187 name : `str` 

188 Name to give to ClusteredQuantumGraph. 

189 

190 Returns 

191 ------- 

192 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

193 ClusteredQuantumGraph with clustering as defined in config. 

194 """ 

195 cqgraph = ClusteredQuantumGraph( 

196 name=name, 

197 qgraph=qgraph, 

198 qgraph_filename=config[".bps_defined.runQgraphFile"], 

199 ) 

200 

201 # save mapping in order to create dependencies later 

202 quantum_to_cluster = {} 

203 

204 cluster_section = config["cluster"] 

205 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph) 

206 for cluster_label in cluster_labels: 

207 _LOG.debug("cluster = %s", cluster_label) 

208 if cluster_label in cluster_section: 

209 add_dim_clusters( 

210 cluster_section[cluster_label], 

211 cluster_label, 

212 qgraph, 

213 ordered_tasks, 

214 cqgraph, 

215 quantum_to_cluster, 

216 ) 

217 else: 

218 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster) 

219 

220 return cqgraph 

221 

222 

223def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster): 

224 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph. 

225 

226 Parameters 

227 ---------- 

228 config : `lsst.ctrl.bps.BpsConfig` 

229 BPS configuration. 

230 label : `str` 

231 taskDef label for which to add clusters. 

232 qgraph : `lsst.pipe.base.QuantumGraph` 

233 QuantumGraph providing quanta for the clusters. 

234 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

235 The ClusteredQuantumGraph to which the new 1-quantum 

236 clusters are added. (modified in method) 

237 quantum_to_cluster : `dict` [ `str`, `str` ] 

238 Mapping of quantum node id to which cluster it was added. 

239 (modified in method) 

240 """ 

241 _LOG.info("Creating 1-quantum clusters for task %s", label) 

242 found, template_data_id = config.search( 

243 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False} 

244 ) 

245 if found: 

246 template = "{node_number}_{label}_" + template_data_id 

247 else: 

248 template = "{node_number}" 

249 

250 # Currently getQuantaForTask is currently a mapping taskDef to 

251 # Quanta, so quick enough to call repeatedly. 

252 task_def = qgraph.findTaskDefByLabel(label) 

253 quantum_nodes = qgraph.getNodesForTask(task_def) 

254 

255 for qnode in quantum_nodes: 

256 cluster = QuantaCluster.from_quantum_node(qnode, template) 

257 cqgraph.add_cluster(cluster) 

258 quantum_to_cluster[qnode.nodeId] = cluster.name 

259 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

260 

261 

262def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster): 

263 """Add clusters for a cluster label to a ClusteredQuantumGraph. 

264 

265 Parameters 

266 ---------- 

267 cluster_config : `lsst.ctrl.bps.BpsConfig` 

268 BPS configuration for specific cluster label. 

269 cluster_label : `str` 

270 Cluster label for which to add clusters. 

271 qgraph : `lsst.pipe.base.QuantumGraph` 

272 QuantumGraph providing quanta for the clusters. 

273 ordered_tasks : `dict` [`str`, `list` [`str`]] 

274 Mapping of cluster label to ordered list of task labels. 

275 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

276 The ClusteredQuantumGraph to which the new 1-quantum 

277 clusters are added. (modified in method) 

278 quantum_to_cluster : `dict` [ `str`, `str` ] 

279 Mapping of quantum node id to which cluster it was added. 

280 (modified in method) 

281 """ 

282 cluster_dims = [] 

283 if "dimensions" in cluster_config: 

284 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")] 

285 _LOG.debug("cluster_dims = %s", cluster_dims) 

286 

287 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False}) 

288 if not found: 

289 if cluster_dims: 

290 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims) 

291 else: 

292 template = cluster_label 

293 _LOG.debug("template = %s", template) 

294 

295 new_clusters = [] 

296 for task_label in ordered_tasks[cluster_label]: 

297 # Currently getQuantaForTask is currently a mapping taskDef to 

298 # Quanta, so quick enough to call repeatedly. 

299 task_def = qgraph.findTaskDefByLabel(task_label) 

300 if task_def is None: 

301 continue 

302 quantum_nodes = qgraph.getNodesForTask(task_def) 

303 

304 equal_dims = cluster_config.get("equalDimensions", None) 

305 

306 # Determine cluster for each node 

307 for qnode in quantum_nodes: 

308 # Gather info for cluster name template into a dictionary. 

309 info = {} 

310 

311 missing_info = set() 

312 data_id_info = qnode.quantum.dataId.byName() 

313 for dim_name in cluster_dims: 

314 _LOG.debug("dim_name = %s", dim_name) 

315 if dim_name in data_id_info: 

316 info[dim_name] = data_id_info[dim_name] 

317 else: 

318 missing_info.add(dim_name) 

319 if equal_dims: 

320 for pair in [pt.strip() for pt in equal_dims.split(",")]: 

321 dim1, dim2 = pair.strip().split(":") 

322 if dim1 in cluster_dims and dim2 in data_id_info: 

323 info[dim1] = data_id_info[dim2] 

324 missing_info.remove(dim1) 

325 elif dim2 in cluster_dims and dim1 in data_id_info: 

326 info[dim2] = data_id_info[dim1] 

327 missing_info.remove(dim2) 

328 

329 info["label"] = cluster_label 

330 _LOG.debug("info for template = %s", info) 

331 

332 if missing_info: 

333 raise RuntimeError( 

334 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions {','.join(missing_info)}" 

335 "required for cluster {cluster_label}" 

336 ) 

337 

338 # Use dictionary plus template format string to create name. 

339 # To avoid # key errors from generic patterns, use defaultdict. 

340 cluster_name = template.format_map(defaultdict(lambda: "", info)) 

341 cluster_name = re.sub("_+", "_", cluster_name) 

342 

343 # Some dimensions contain slash which must be replaced. 

344 cluster_name = re.sub("/", "_", cluster_name) 

345 _LOG.debug("cluster_name = %s", cluster_name) 

346 

347 # Save mapping for use when creating dependencies. 

348 quantum_to_cluster[qnode.nodeId] = cluster_name 

349 

350 # Add cluster to the ClusteredQuantumGraph. 

351 # Saving NodeId instead of number because QuantumGraph API 

352 # requires it for creating per-job QuantumGraphs. 

353 if cluster_name in cqgraph: 

354 cluster = cqgraph.get_cluster(cluster_name) 

355 else: 

356 cluster = QuantaCluster(cluster_name, cluster_label, info) 

357 cqgraph.add_cluster(cluster) 

358 cluster.add_quantum(qnode.nodeId, task_label) 

359 new_clusters.append(cluster) 

360 

361 for cluster in new_clusters: 

362 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

363 

364 

365def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster): 

366 """Add dependencies for a cluster within a ClusteredQuantumGraph. 

367 

368 Parameters 

369 ---------- 

370 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

371 The ClusteredQuantumGraph to which the new 1-quantum 

372 clusters are added. (modified in method) 

373 cluster : `lsst.ctrl.bps.QuantaCluster` 

374 The cluster for which to add dependencies. 

375 quantum_to_cluster : `dict` [ `str`, `str` ] 

376 Mapping of quantum node id to which cluster it was added. 

377 (modified in method) 

378 

379 Raises 

380 ------ 

381 KeyError : 

382 Raised if any of the cluster's quantum node ids are missing 

383 from quantum_to_cluster or if their parent quantum node ids 

384 are missing from quantum_to_cluster. 

385 """ 

386 qgraph = cqgraph.qgraph 

387 for node_id in cluster.qgraph_node_ids: 

388 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id)) 

389 for parent in parents: 

390 try: 

391 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]: 

392 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id]) 

393 except KeyError as e: # pragma: no cover 

394 # For debugging a problem internal to method 

395 qnode = qgraph.getQuantumNodeByNodeId(e.args[0]) 

396 _LOG.error( 

397 "Quanta missing when clustering: %s, %s", 

398 qnode.taskDef.label, 

399 qnode.quantum.dataId.byName(), 

400 ) 

401 raise