Coverage for python/lsst/source/injection/utils/make_injection_pipeline.py: 5%

102 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 05:25 -0700

1# This file is part of source_injection. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["make_injection_pipeline"] 

25 

26import itertools 

27import logging 

28 

29from lsst.analysis.tools.interfaces import AnalysisPipelineTask 

30from lsst.pipe.base import LabelSpecifier, Pipeline 

31 

32 

33def _parse_config_override(config_override: str) -> tuple[str, str, str]: 

34 """Parse a config override string into a label, a key and a value. 

35 

36 Parameters 

37 ---------- 

38 config_override : `str` 

39 Config override string to parse. 

40 

41 Returns 

42 ------- 

43 label : `str` 

44 Label to override. 

45 key : `str` 

46 Key to override. 

47 value : `str` 

48 Value to override. 

49 

50 Raises 

51 ------ 

52 TypeError 

53 If the config override string cannot be parsed. 

54 """ 

55 try: 

56 label, keyvalue = config_override.split(":", 1) 

57 except ValueError: 

58 raise TypeError( 

59 f"Unrecognized syntax for option 'config': '{config_override}' (does not match pattern " 

60 "(?P<label>.+):(?P<value>.+=.+))" 

61 ) from None 

62 try: 

63 key, value = keyvalue.split("=", 1) 

64 except ValueError as e: 

65 raise TypeError( 

66 f"Could not parse key-value pair '{config_override}' using separator '=', with multiple values " 

67 f"not allowed: {e}" 

68 ) from None 

69 return label, key, value 

70 

71 

72def make_injection_pipeline( 

73 dataset_type_name: str, 

74 reference_pipeline: Pipeline | str, 

75 injection_pipeline: Pipeline | str | None = None, 

76 exclude_subsets: bool = False, 

77 excluded_tasks: set[str] | str = { 

78 "jointcal", 

79 "gbdesAstrometricFit", 

80 "fgcmBuildFromIsolatedStars", 

81 "fgcmFitCycle", 

82 "fgcmOutputProducts", 

83 }, 

84 prefix: str = "injected_", 

85 instrument: str | None = None, 

86 config: str | list[str] | None = None, 

87 log_level: int = logging.INFO, 

88) -> Pipeline: 

89 """Make an expanded source injection pipeline. 

90 

91 This function takes a reference pipeline definition file in YAML format and 

92 prefixes all post-injection dataset type names with the injected prefix. If 

93 an optional injection pipeline definition YAML file is also provided, the 

94 injection task will be merged into the pipeline. 

95 

96 Unless explicitly excluded, all subsets from the reference pipeline 

97 containing the task which generates the injection dataset type will also be 

98 updated to include the injection task. A series of new injected subsets 

99 will also be created. These new subsets are copies of existent subsets, but 

100 containing only the tasks which are affected by source injection. New 

101 injected subsets will be the original subset name with the prefix 

102 'injected_' prepended. 

103 

104 Parameters 

105 ---------- 

106 dataset_type_name : `str` 

107 Name of the dataset type being injected into. 

108 reference_pipeline : Pipeline | `str` 

109 Location of a reference pipeline definition YAML file. 

110 injection_pipeline : Pipeline | `str`, optional 

111 Location of an injection pipeline definition YAML file stub. If not 

112 provided, an attempt to infer the injection pipeline will be made based 

113 on the injected dataset type name. 

114 exclude_subsets : `bool`, optional 

115 If True, do not update pipeline subsets to include the injection task. 

116 excluded_tasks : `set` [`str`] | `str` 

117 Set or comma-separated string of task labels to exclude from the 

118 injection pipeline. 

119 prefix : `str`, optional 

120 Prefix to prepend to each affected post-injection dataset type name. 

121 instrument : `str`, optional 

122 Add instrument overrides. Must be a fully qualified class name. 

123 config : `str` | `list` [`str`], optional 

124 Config override for a task, in the format 'label:key=value'. 

125 log_level : `int`, optional 

126 The log level to use for logging. 

127 

128 Returns 

129 ------- 

130 pipeline : `lsst.pipe.base.Pipeline` 

131 An expanded source injection pipeline. 

132 """ 

133 # Instantiate logger. 

134 logger = logging.getLogger(__name__) 

135 logger.setLevel(log_level) 

136 

137 # Load the pipeline and apply config overrides, if supplied. 

138 if isinstance(reference_pipeline, str): 

139 pipeline = Pipeline.fromFile(reference_pipeline) 

140 else: 

141 pipeline = reference_pipeline 

142 if config: 

143 if isinstance(config, str): 

144 config = [config] 

145 for conf in config: 

146 config_label, config_key, config_value = _parse_config_override(conf) 

147 pipeline.addConfigOverride(config_label, config_key, config_value) 

148 

149 # Add an instrument override, if provided. 

150 if instrument: 

151 pipeline.addInstrument(instrument) 

152 

153 # Remove all tasks which are not to be included in the injection pipeline. 

154 if isinstance(excluded_tasks, str): 

155 excluded_tasks = set(excluded_tasks.split(",")) 

156 all_tasks = set(pipeline.task_labels) 

157 preserved_tasks = all_tasks - excluded_tasks 

158 label_specifier = LabelSpecifier(labels=preserved_tasks) 

159 # EDIT mode removes tasks from parent subsets but keeps the subset itself. 

160 pipeline = pipeline.subsetFromLabels(label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

161 if len(not_found_tasks := excluded_tasks - all_tasks) > 0: 

162 grammar = "Task" if len(not_found_tasks) == 1 else "Tasks" 

163 logger.warning( 

164 "%s marked for exclusion not found in the reference pipeline: %s.", 

165 grammar, 

166 ", ".join(sorted(not_found_tasks)), 

167 ) 

168 

169 # Determine the set of dataset type names affected by source injection. 

170 injected_tasks = set() 

171 all_connection_type_names = set() 

172 injected_types = {dataset_type_name} 

173 precursor_injection_task_labels = set() 

174 # Loop over all tasks in the pipeline. 

175 for task_node in pipeline.to_graph().tasks.values(): 

176 # Add override for Analysis Tools tasks. Connections in Analysis 

177 # Tools are dynamically assigned, and so are not able to be modified in 

178 # the same way as a static connection. Instead, we add a config 

179 # override here to the connections.outputName field. This field is 

180 # prepended to all Analysis Tools connections, and so will prepend the 

181 # injection prefix to all plot/metric outputs. Further processing of 

182 # this task will be skipped thereafter. 

183 if issubclass(task_node.task_class, AnalysisPipelineTask): 

184 pipeline.addConfigOverride( 

185 task_node.label, 

186 "connections.outputName", 

187 prefix + task_node.config.connections.outputName, 

188 ) 

189 continue 

190 

191 input_types = { 

192 read_edge.dataset_type_name 

193 for read_edge in itertools.chain(task_node.inputs.values(), task_node.init.inputs.values()) 

194 } 

195 output_types = { 

196 write_edge.dataset_type_name 

197 for write_edge in itertools.chain(task_node.outputs.values(), task_node.init.outputs.values()) 

198 } 

199 all_connection_type_names |= input_types | output_types 

200 # Identify the precursor task: allows appending inject task to subset. 

201 if dataset_type_name in output_types: 

202 precursor_injection_task_labels.add(task_node.label) 

203 # If the task has any injected dataset type names as inputs, add the 

204 # task to a set of tasks touched by injection, and add all of the 

205 # outputs of this task to the set of injected types. 

206 if len(input_types & injected_types) > 0: 

207 injected_tasks |= {task_node.label} 

208 injected_types |= output_types 

209 # Add the injection prefix to all affected dataset type names. 

210 for edge in itertools.chain( 

211 task_node.inputs.values(), 

212 task_node.outputs.values(), 

213 task_node.init.inputs.values(), 

214 task_node.init.outputs.values(), 

215 ): 

216 if hasattr(task_node.config.connections.ConnectionsClass, edge.connection_name): 

217 # If the connection type is not dynamic, modify as usual. 

218 if edge.parent_dataset_type_name in injected_types: 

219 pipeline.addConfigOverride( 

220 task_node.label, 

221 "connections." + edge.connection_name, 

222 prefix + edge.dataset_type_name, 

223 ) 

224 else: 

225 # Add log warning if the connection type is dynamic. 

226 logger.warning( 

227 "Dynamic connection %s in task %s is not supported here. This connection will " 

228 "neither be modified nor merged into the output injection pipeline.", 

229 edge.connection_name, 

230 task_node.label, 

231 ) 

232 # Raise if the injected dataset type does not exist in the pipeline. 

233 if dataset_type_name not in all_connection_type_names: 

234 raise RuntimeError( 

235 f"Dataset type '{dataset_type_name}' not found in the reference pipeline; " 

236 "no connection type edits to be made." 

237 ) 

238 

239 # Attempt to infer the injection pipeline from the dataset type name. 

240 if not injection_pipeline: 

241 match dataset_type_name: 

242 case "postISRCCD": 

243 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_exposure.yaml" 

244 case "icExp" | "calexp": 

245 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_visit.yaml" 

246 case "deepCoadd" | "deepCoadd_calexp" | "goodSeeingCoadd": 

247 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_coadd.yaml" 

248 case _: 

249 # Print a warning rather than a raise, as the user may wish to 

250 # edit connection names without merging an injection pipeline. 

251 logger.warning( 

252 "Unable to infer injection pipeline stub from dataset type name '%s' and none was " 

253 "provided. No injection pipeline will be merged into the output pipeline.", 

254 dataset_type_name, 

255 ) 

256 if injection_pipeline: 

257 logger.info( 

258 "Injected dataset type '%s' used to infer injection pipeline: %s", 

259 dataset_type_name, 

260 injection_pipeline, 

261 ) 

262 

263 # Merge the injection pipeline to the modified pipeline, if provided. 

264 if injection_pipeline: 

265 if isinstance(injection_pipeline, str): 

266 injection_pipeline = Pipeline.fromFile(injection_pipeline) 

267 if len(injection_pipeline) != 1: 

268 raise RuntimeError( 

269 f"The injection pipeline contains {len(injection_pipeline)} tasks; only 1 task is allowed." 

270 ) 

271 pipeline.mergePipeline(injection_pipeline) 

272 # Loop over all injection tasks and modify the connection names. 

273 for injection_task_label in injection_pipeline.task_labels: 

274 injected_tasks.add(injection_task_label) 

275 pipeline.addConfigOverride(injection_task_label, "connections.input_exposure", dataset_type_name) 

276 pipeline.addConfigOverride( 

277 injection_task_label, "connections.output_exposure", prefix + dataset_type_name 

278 ) 

279 # Optionally update subsets to include the injection task. 

280 if not exclude_subsets: 

281 for label in precursor_injection_task_labels: 

282 precursor_subsets = pipeline.findSubsetsWithLabel(label) 

283 for subset in precursor_subsets: 

284 pipeline.addLabelToSubset(subset, injection_task_label) 

285 

286 # Create injected subsets. 

287 injected_label_specifier = LabelSpecifier(labels=injected_tasks) 

288 injected_pipeline = pipeline.subsetFromLabels(injected_label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

289 injected_subset_labels = set() 

290 for injected_subset in injected_pipeline.subsets.keys(): 

291 injected_subset_label = "injected_" + injected_subset 

292 injected_subset_description = ( 

293 "All tasks from the '" + injected_subset + "' subset impacted by source injection." 

294 ) 

295 if len(injected_subset_tasks := injected_pipeline.subsets[injected_subset]) > 0: 

296 injected_subset_labels |= {injected_subset_label} 

297 pipeline.addLabeledSubset( 

298 injected_subset_label, injected_subset_description, injected_subset_tasks 

299 ) 

300 

301 grammar1 = "task" if len(pipeline) == 1 else "tasks" 

302 grammar2 = "subset" if len(injected_subset_labels) == 1 else "subsets" 

303 logger.info( 

304 "Made an injection pipeline containing %d %s and %d new injected %s.", 

305 len(pipeline), 

306 grammar1, 

307 len(injected_subset_labels), 

308 grammar2, 

309 ) 

310 return pipeline