Coverage for python/lsst/ctrl/bps/quantum_clustering_funcs.py: 6%

148 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 11:07 +0000

1# This file is part of ctrl_bps. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Functions that convert QuantumGraph into ClusteredQuantumGraph. 

29""" 

30import logging 

31import re 

32from collections import defaultdict 

33 

34from networkx import DiGraph, is_directed_acyclic_graph, topological_sort 

35 

36from . import ClusteredQuantumGraph, QuantaCluster 

37 

38_LOG = logging.getLogger(__name__) 

39 

40 

41def single_quantum_clustering(config, qgraph, name): 

42 """Create clusters with only single quantum. 

43 

44 Parameters 

45 ---------- 

46 config : `lsst.ctrl.bps.BpsConfig` 

47 BPS configuration. 

48 qgraph : `lsst.pipe.base.QuantumGraph` 

49 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

50 name : `str` 

51 Name to give to ClusteredQuantumGraph. 

52 

53 Returns 

54 ------- 

55 clustered_quantum : `lsst.ctrl.bps.ClusteredQuantumGraph` 

56 ClusteredQuantumGraph with single quantum per cluster created from 

57 given QuantumGraph. 

58 """ 

59 cqgraph = ClusteredQuantumGraph( 

60 name=name, 

61 qgraph=qgraph, 

62 qgraph_filename=config[".bps_defined.runQgraphFile"], 

63 ) 

64 

65 # Save mapping of quantum nodeNumber to name so don't have to create it 

66 # multiple times. 

67 number_to_name = {} 

68 

69 # Cache template per label for speed. 

70 cached_template = {} 

71 

72 # Create cluster of single quantum. 

73 for qnode in qgraph: 

74 if qnode.taskDef.label not in cached_template: 

75 found, template_data_id = config.search( 

76 "templateDataId", 

77 opt={"curvals": {"curr_pipetask": qnode.taskDef.label}, "replaceVars": False}, 

78 ) 

79 if found: 

80 template = "{node_number}_{label}_" + template_data_id 

81 else: 

82 template = "{node_number}" 

83 cached_template[qnode.taskDef.label] = template 

84 

85 cluster = QuantaCluster.from_quantum_node(qnode, cached_template[qnode.taskDef.label]) 

86 

87 # Save mapping for use when creating dependencies. 

88 number_to_name[qnode.nodeId] = cluster.name 

89 

90 cqgraph.add_cluster(cluster) 

91 

92 # Add cluster dependencies. 

93 for qnode in qgraph: 

94 # Get child nodes. 

95 children = qgraph.determineOutputsOfQuantumNode(qnode) 

96 for child in children: 

97 cqgraph.add_dependency(number_to_name[qnode.nodeId], number_to_name[child.nodeId]) 

98 

99 return cqgraph 

100 

101 

102def _check_clusters_tasks(cluster_config, task_graph): 

103 """Check cluster definitions in terms of pipetask lists. 

104 

105 Parameters 

106 ---------- 

107 cluster_config : `lsst.ctrl.bps.BpsConfig` 

108 The cluster section from the BPS configuration. 

109 task_graph : `lsst.pipe.base.taskGraph` 

110 Directed graph of tasks. 

111 

112 Returns 

113 ------- 

114 cluster_labels: `list` [`str`] 

115 Dependency ordered list of cluster labels (includes 

116 single quantum clusters). 

117 ordered_tasks : `dict` [`str`, `list` [`str`]] 

118 Mapping of cluster label to ordered list of task labels. 

119 

120 Raises 

121 ------ 

122 RuntimeError 

123 Raised if task label appears in more than one cluster def or 

124 if there's a cycle in the cluster defs. 

125 """ 

126 # Build a PipelineTask graph of just labels because TaskGraph 

127 # methods revolve around TaskDefs instead of labels. 

128 label_graph = DiGraph() 

129 for tdef in task_graph: 

130 label_graph.add_node(tdef.label) 

131 for parent in task_graph.predecessors(tdef): 

132 label_graph.add_edge(parent.label, tdef.label) 

133 

134 # Build a "clustered" task graph to check for cycle. 

135 task_to_cluster = {} 

136 used_labels = set() 

137 clustered_task_graph = DiGraph() 

138 ordered_tasks = {} # cluster label to ordered list of task labels 

139 

140 # Create clusters based on given configuration. 

141 for cluster_label in cluster_config: 

142 _LOG.debug("cluster = %s", cluster_label) 

143 cluster_tasks = [pt.strip() for pt in cluster_config[cluster_label]["pipetasks"].split(",")] 

144 cluster_tasks_in_qgraph = [] 

145 for task_label in cluster_tasks: 

146 if task_label in used_labels: 

147 raise RuntimeError( 

148 f"Task label {task_label} appears in more than one cluster definition. " 

149 "Aborting submission." 

150 ) 

151 # Only check cluster defs that affect the QuantumGraph 

152 if label_graph.has_node(task_label): 

153 cluster_tasks_in_qgraph.append(task_label) 

154 used_labels.add(task_label) 

155 task_to_cluster[task_label] = cluster_label 

156 

157 if cluster_tasks_in_qgraph: 

158 # Ensure have list of tasks in dependency order. 

159 quantum_subgraph = label_graph.subgraph(cluster_tasks_in_qgraph) 

160 ordered_tasks[cluster_label] = list(topological_sort(quantum_subgraph)) 

161 

162 clustered_task_graph.add_node(cluster_label) 

163 

164 # Create single task clusters for tasks not covered by clusters. 

165 for label in label_graph: 

166 if label not in used_labels: 

167 task_to_cluster[label] = label 

168 clustered_task_graph.add_node(label) 

169 ordered_tasks[label] = [label] 

170 

171 # Create dependencies between clusters. 

172 for edge in task_graph.edges: 

173 if task_to_cluster[edge[0].label] != task_to_cluster[edge[1].label]: 

174 clustered_task_graph.add_edge(task_to_cluster[edge[0].label], task_to_cluster[edge[1].label]) 

175 

176 _LOG.debug("clustered_task_graph.edges = %s", list(clustered_task_graph.edges)) 

177 

178 if not is_directed_acyclic_graph(clustered_task_graph): 

179 raise RuntimeError("Cluster pipetasks do not create a DAG") 

180 

181 return list(topological_sort(clustered_task_graph)), ordered_tasks 

182 

183 

184def dimension_clustering(config, qgraph, name): 

185 """Follow config instructions to make clusters based upon dimensions. 

186 

187 Parameters 

188 ---------- 

189 config : `lsst.ctrl.bps.BpsConfig` 

190 BPS configuration. 

191 qgraph : `lsst.pipe.base.QuantumGraph` 

192 QuantumGraph to break into clusters for ClusteredQuantumGraph. 

193 name : `str` 

194 Name to give to ClusteredQuantumGraph. 

195 

196 Returns 

197 ------- 

198 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

199 ClusteredQuantumGraph with clustering as defined in config. 

200 """ 

201 cqgraph = ClusteredQuantumGraph( 

202 name=name, 

203 qgraph=qgraph, 

204 qgraph_filename=config[".bps_defined.runQgraphFile"], 

205 ) 

206 

207 # save mapping in order to create dependencies later 

208 quantum_to_cluster = {} 

209 

210 cluster_section = config["cluster"] 

211 cluster_labels, ordered_tasks = _check_clusters_tasks(cluster_section, qgraph.taskGraph) 

212 for cluster_label in cluster_labels: 

213 _LOG.debug("cluster = %s", cluster_label) 

214 if cluster_label in cluster_section: 

215 add_dim_clusters( 

216 cluster_section[cluster_label], 

217 cluster_label, 

218 qgraph, 

219 ordered_tasks, 

220 cqgraph, 

221 quantum_to_cluster, 

222 ) 

223 else: 

224 add_clusters_per_quantum(config, cluster_label, qgraph, cqgraph, quantum_to_cluster) 

225 

226 return cqgraph 

227 

228 

229def add_clusters_per_quantum(config, label, qgraph, cqgraph, quantum_to_cluster): 

230 """Add 1-quantum clusters for a task to a ClusteredQuantumGraph. 

231 

232 Parameters 

233 ---------- 

234 config : `lsst.ctrl.bps.BpsConfig` 

235 BPS configuration. 

236 label : `str` 

237 taskDef label for which to add clusters. 

238 qgraph : `lsst.pipe.base.QuantumGraph` 

239 QuantumGraph providing quanta for the clusters. 

240 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

241 The ClusteredQuantumGraph to which the new 1-quantum 

242 clusters are added. (modified in method) 

243 quantum_to_cluster : `dict` [ `str`, `str` ] 

244 Mapping of quantum node id to which cluster it was added. 

245 (modified in method) 

246 """ 

247 _LOG.info("Creating 1-quantum clusters for task %s", label) 

248 found, template_data_id = config.search( 

249 "templateDataId", opt={"curvals": {"curr_pipetask": label}, "replaceVars": False} 

250 ) 

251 if found: 

252 template = "{node_number}_{label}_" + template_data_id 

253 else: 

254 template = "{node_number}" 

255 

256 # Currently getQuantaForTask is currently a mapping taskDef to 

257 # Quanta, so quick enough to call repeatedly. 

258 task_def = qgraph.findTaskDefByLabel(label) 

259 quantum_nodes = qgraph.getNodesForTask(task_def) 

260 

261 for qnode in quantum_nodes: 

262 cluster = QuantaCluster.from_quantum_node(qnode, template) 

263 cqgraph.add_cluster(cluster) 

264 quantum_to_cluster[qnode.nodeId] = cluster.name 

265 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

266 

267 

268def add_dim_clusters(cluster_config, cluster_label, qgraph, ordered_tasks, cqgraph, quantum_to_cluster): 

269 """Add clusters for a cluster label to a ClusteredQuantumGraph. 

270 

271 Parameters 

272 ---------- 

273 cluster_config : `lsst.ctrl.bps.BpsConfig` 

274 BPS configuration for specific cluster label. 

275 cluster_label : `str` 

276 Cluster label for which to add clusters. 

277 qgraph : `lsst.pipe.base.QuantumGraph` 

278 QuantumGraph providing quanta for the clusters. 

279 ordered_tasks : `dict` [`str`, `list` [`str`]] 

280 Mapping of cluster label to ordered list of task labels. 

281 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

282 The ClusteredQuantumGraph to which the new 1-quantum 

283 clusters are added. (modified in method) 

284 quantum_to_cluster : `dict` [ `str`, `str` ] 

285 Mapping of quantum node id to which cluster it was added. 

286 (modified in method) 

287 """ 

288 cluster_dims = [] 

289 if "dimensions" in cluster_config: 

290 cluster_dims = [d.strip() for d in cluster_config["dimensions"].split(",")] 

291 _LOG.debug("cluster_dims = %s", cluster_dims) 

292 

293 found, template = cluster_config.search("clusterTemplate", opt={"replaceVars": False}) 

294 if not found: 

295 if cluster_dims: 

296 template = f"{cluster_label}_" + "_".join(f"{{{dim}}}" for dim in cluster_dims) 

297 else: 

298 template = cluster_label 

299 _LOG.debug("template = %s", template) 

300 

301 new_clusters = [] 

302 for task_label in ordered_tasks[cluster_label]: 

303 # Currently getQuantaForTask is currently a mapping taskDef to 

304 # Quanta, so quick enough to call repeatedly. 

305 task_def = qgraph.findTaskDefByLabel(task_label) 

306 if task_def is None: 

307 continue 

308 quantum_nodes = qgraph.getNodesForTask(task_def) 

309 

310 equal_dims = cluster_config.get("equalDimensions", None) 

311 

312 # Determine cluster for each node 

313 for qnode in quantum_nodes: 

314 # Gather info for cluster name template into a dictionary. 

315 info = {} 

316 

317 missing_info = set() 

318 data_id_info = dict(qnode.quantum.dataId.mapping) 

319 for dim_name in cluster_dims: 

320 _LOG.debug("dim_name = %s", dim_name) 

321 if dim_name in data_id_info: 

322 info[dim_name] = data_id_info[dim_name] 

323 else: 

324 missing_info.add(dim_name) 

325 if equal_dims: 

326 for pair in [pt.strip() for pt in equal_dims.split(",")]: 

327 dim1, dim2 = pair.strip().split(":") 

328 if dim1 in cluster_dims and dim2 in data_id_info: 

329 info[dim1] = data_id_info[dim2] 

330 missing_info.remove(dim1) 

331 elif dim2 in cluster_dims and dim1 in data_id_info: 

332 info[dim2] = data_id_info[dim1] 

333 missing_info.remove(dim2) 

334 

335 info["label"] = cluster_label 

336 _LOG.debug("info for template = %s", info) 

337 

338 if missing_info: 

339 raise RuntimeError( 

340 f"Quantum {qnode.nodeId} ({data_id_info}) missing dimensions: {','.join(missing_info)}; " 

341 f"required for cluster {cluster_label}" 

342 ) 

343 

344 # Use dictionary plus template format string to create name. 

345 # To avoid # key errors from generic patterns, use defaultdict. 

346 cluster_name = template.format_map(defaultdict(lambda: "", info)) 

347 cluster_name = re.sub("_+", "_", cluster_name) 

348 

349 # Some dimensions contain slash which must be replaced. 

350 cluster_name = re.sub("/", "_", cluster_name) 

351 _LOG.debug("cluster_name = %s", cluster_name) 

352 

353 # Save mapping for use when creating dependencies. 

354 quantum_to_cluster[qnode.nodeId] = cluster_name 

355 

356 # Add cluster to the ClusteredQuantumGraph. 

357 # Saving NodeId instead of number because QuantumGraph API 

358 # requires it for creating per-job QuantumGraphs. 

359 if cluster_name in cqgraph: 

360 cluster = cqgraph.get_cluster(cluster_name) 

361 else: 

362 cluster = QuantaCluster(cluster_name, cluster_label, info) 

363 cqgraph.add_cluster(cluster) 

364 cluster.add_quantum(qnode.nodeId, task_label) 

365 new_clusters.append(cluster) 

366 

367 for cluster in new_clusters: 

368 add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster) 

369 

370 

371def add_cluster_dependencies(cqgraph, cluster, quantum_to_cluster): 

372 """Add dependencies for a cluster within a ClusteredQuantumGraph. 

373 

374 Parameters 

375 ---------- 

376 cqgraph : `lsst.ctrl.bps.ClusteredQuantumGraph` 

377 The ClusteredQuantumGraph to which the new 1-quantum 

378 clusters are added. (modified in method) 

379 cluster : `lsst.ctrl.bps.QuantaCluster` 

380 The cluster for which to add dependencies. 

381 quantum_to_cluster : `dict` [ `str`, `str` ] 

382 Mapping of quantum node id to which cluster it was added. 

383 (modified in method) 

384 

385 Raises 

386 ------ 

387 KeyError : 

388 Raised if any of the cluster's quantum node ids are missing 

389 from quantum_to_cluster or if their parent quantum node ids 

390 are missing from quantum_to_cluster. 

391 """ 

392 qgraph = cqgraph.qgraph 

393 for node_id in cluster.qgraph_node_ids: 

394 parents = qgraph.determineInputsToQuantumNode(qgraph.getQuantumNodeByNodeId(node_id)) 

395 for parent in parents: 

396 try: 

397 if quantum_to_cluster[parent.nodeId] != quantum_to_cluster[node_id]: 

398 cqgraph.add_dependency(quantum_to_cluster[parent.nodeId], quantum_to_cluster[node_id]) 

399 except KeyError as e: # pragma: no cover 

400 # For debugging a problem internal to method 

401 qnode = qgraph.getQuantumNodeByNodeId(e.args[0]) 

402 _LOG.error( 

403 "Quanta missing when clustering: %s, %s", 

404 qnode.taskDef.label, 

405 qnode.quantum.dataId, 

406 ) 

407 raise