Coverage for python/lsst/source/injection/utils/make_injection_pipeline.py: 5%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-15 14:36 +0000

1# This file is part of source_injection. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["make_injection_pipeline"] 

25 

26import logging 

27 

28from lsst.analysis.tools.interfaces import AnalysisPipelineTask 

29from lsst.pipe.base import LabelSpecifier, Pipeline 

30 

31 

32def _get_dataset_type_names(conns, fields): 

33 """Return the name of a connection's dataset type.""" 

34 dataset_type_names = set() 

35 for field in fields: 

36 dataset_type_names.add(getattr(conns, field).name) 

37 return dataset_type_names 

38 

39 

40def _parse_config_override(config_override: str) -> tuple[str, str, str]: 

41 """Parse a config override string into a label, a key and a value. 

42 

43 Parameters 

44 ---------- 

45 config_override : `str` 

46 Config override string to parse. 

47 

48 Returns 

49 ------- 

50 label : `str` 

51 Label to override. 

52 key : `str` 

53 Key to override. 

54 value : `str` 

55 Value to override. 

56 

57 Raises 

58 ------ 

59 TypeError 

60 If the config override string cannot be parsed. 

61 """ 

62 try: 

63 label, keyvalue = config_override.split(":", 1) 

64 except ValueError: 

65 raise TypeError( 

66 f"Unrecognized syntax for option 'config': '{config_override}' (does not match pattern " 

67 "(?P<label>.+):(?P<value>.+=.+))" 

68 ) from None 

69 try: 

70 key, value = keyvalue.split("=", 1) 

71 except ValueError as e: 

72 raise TypeError( 

73 f"Could not parse key-value pair '{config_override}' using separator '=', with multiple values " 

74 f"not allowed: {e}" 

75 ) from None 

76 return label, key, value 

77 

78 

79def make_injection_pipeline( 

80 dataset_type_name: str, 

81 reference_pipeline: Pipeline | str, 

82 injection_pipeline: Pipeline | str | None = None, 

83 exclude_subsets: bool = False, 

84 excluded_tasks: set[str] 

85 | str = { 

86 "jointcal", 

87 "gbdesAstrometricFit", 

88 "fgcmBuildFromIsolatedStars", 

89 "fgcmFitCycle", 

90 "fgcmOutputProducts", 

91 }, 

92 prefix: str = "injected_", 

93 instrument: str | None = None, 

94 config: str | list[str] | None = None, 

95 log_level: int = logging.INFO, 

96) -> Pipeline: 

97 """Make an expanded source injection pipeline. 

98 

99 This function takes a reference pipeline definition file in YAML format and 

100 prefixes all post-injection dataset type names with the injected prefix. If 

101 an optional injection pipeline definition YAML file is also provided, the 

102 injection task will be merged into the pipeline. 

103 

104 Unless explicitly excluded, all subsets from the reference pipeline 

105 containing the task which generates the injection dataset type will also be 

106 updated to include the injection task. A series of new injected subsets 

107 will also be created. These new subsets are copies of existent subsets, but 

108 containing only the tasks which are affected by source injection. New 

109 injected subsets will be the original subset name with the prefix 

110 'injected_' prepended. 

111 

112 Parameters 

113 ---------- 

114 dataset_type_name : `str` 

115 Name of the dataset type being injected into. 

116 reference_pipeline : Pipeline | `str` 

117 Location of a reference pipeline definition YAML file. 

118 injection_pipeline : Pipeline | `str`, optional 

119 Location of an injection pipeline definition YAML file stub. If not 

120 provided, an attempt to infer the injection pipeline will be made based 

121 on the injected dataset type name. 

122 exclude_subsets : `bool`, optional 

123 If True, do not update pipeline subsets to include the injection task. 

124 excluded_tasks : `set` [`str`] | `str` 

125 Set or comma-separated string of task labels to exclude from the 

126 injection pipeline. 

127 prefix : `str`, optional 

128 Prefix to prepend to each affected post-injection dataset type name. 

129 instrument : `str`, optional 

130 Add instrument overrides. Must be a fully qualified class name. 

131 config : `str` | `list` [`str`], optional 

132 Config override for a task, in the format 'label:key=value'. 

133 log_level : `int`, optional 

134 The log level to use for logging. 

135 

136 Returns 

137 ------- 

138 pipeline : `lsst.pipe.base.Pipeline` 

139 An expanded source injection pipeline. 

140 """ 

141 # Instantiate logger. 

142 logger = logging.getLogger(__name__) 

143 logger.setLevel(log_level) 

144 

145 # Load the pipeline and apply config overrides, if supplied. 

146 if isinstance(reference_pipeline, str): 

147 pipeline = Pipeline.fromFile(reference_pipeline) 

148 else: 

149 pipeline = reference_pipeline 

150 if config: 

151 if isinstance(config, str): 

152 config = [config] 

153 for conf in config: 

154 config_label, config_key, config_value = _parse_config_override(conf) 

155 pipeline.addConfigOverride(config_label, config_key, config_value) 

156 

157 # Add an instrument override, if provided. 

158 if instrument: 

159 pipeline.addInstrument(instrument) 

160 

161 # Remove all tasks which are not to be included in the injection pipeline. 

162 if isinstance(excluded_tasks, str): 

163 excluded_tasks = set(excluded_tasks.split(",")) 

164 all_tasks = {taskDef.label for taskDef in pipeline.toExpandedPipeline()} 

165 preserved_tasks = all_tasks - excluded_tasks 

166 label_specifier = LabelSpecifier(labels=preserved_tasks) 

167 # EDIT mode removes tasks from parent subsets but keeps the subset itself. 

168 pipeline = pipeline.subsetFromLabels(label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

169 if len(not_found_tasks := excluded_tasks - all_tasks) > 0: 

170 grammar = "Task" if len(not_found_tasks) == 1 else "Tasks" 

171 logger.warning( 

172 "%s marked for exclusion not found in the reference pipeline: %s.", 

173 grammar, 

174 ", ".join(sorted(not_found_tasks)), 

175 ) 

176 

177 # Determine the set of dataset type names affected by source injection. 

178 injected_tasks = set() 

179 all_connection_type_names = set() 

180 injected_types = {dataset_type_name} 

181 precursor_injection_task_labels = set() 

182 # Loop over all tasks in the pipeline. 

183 for taskDef in pipeline.toExpandedPipeline(): 

184 # Add override for Analysis Tools taskDefs. Connections in Analysis 

185 # Tools are dynamically assigned, and so are not able to be modified in 

186 # the same way as a static connection. Instead, we add a config 

187 # override here to the connections.outputName field. This field is 

188 # prepended to all Analysis Tools connections, and so will prepend the 

189 # injection prefix to all plot/metric outputs. Further processing of 

190 # this taskDef will be skipped thereafter. 

191 if issubclass(taskDef.taskClass, AnalysisPipelineTask): 

192 pipeline.addConfigOverride( 

193 taskDef.label, "connections.outputName", prefix + taskDef.config.connections.outputName 

194 ) 

195 continue 

196 

197 conns = taskDef.connections 

198 input_types = _get_dataset_type_names(conns, conns.initInputs | conns.inputs) 

199 output_types = _get_dataset_type_names(conns, conns.initOutputs | conns.outputs) 

200 all_connection_type_names |= input_types | output_types 

201 # Identify the precursor task: allows appending inject task to subset. 

202 if dataset_type_name in output_types: 

203 precursor_injection_task_labels.add(taskDef.label) 

204 # If the task has any injected dataset type names as inputs, add the 

205 # task to a set of tasks touched by injection, and add all of the 

206 # outputs of this task to the set of injected types. 

207 if len(input_types & injected_types) > 0: 

208 injected_tasks |= {taskDef.label} 

209 injected_types |= output_types 

210 # Add the injection prefix to all affected dataset type names. 

211 for field in conns.initInputs | conns.inputs | conns.initOutputs | conns.outputs: 

212 if hasattr(taskDef.config.connections.ConnectionsClass, field): 

213 # If the connection type is not dynamic, modify as usual. 

214 if (conn_type := getattr(conns, field).name) in injected_types: 

215 pipeline.addConfigOverride(taskDef.label, "connections." + field, prefix + conn_type) 

216 else: 

217 # Add log warning if the connection type is dynamic. 

218 logger.warning( 

219 "Dynamic connection %s in task %s is not supported here. This connection will " 

220 "neither be modified nor merged into the output injection pipeline.", 

221 field, 

222 taskDef.label, 

223 ) 

224 # Raise if the injected dataset type does not exist in the pipeline. 

225 if dataset_type_name not in all_connection_type_names: 

226 raise RuntimeError( 

227 f"Dataset type '{dataset_type_name}' not found in the reference pipeline; " 

228 "no connection type edits to be made." 

229 ) 

230 

231 # Attempt to infer the injection pipeline from the dataset type name. 

232 if not injection_pipeline: 

233 match dataset_type_name: 

234 case "postISRCCD": 

235 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_exposure.yaml" 

236 case "icExp" | "calexp": 

237 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_visit.yaml" 

238 case "deepCoadd" | "deepCoadd_calexp" | "goodSeeingCoadd": 

239 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_coadd.yaml" 

240 case _: 

241 # Print a warning rather than a raise, as the user may wish to 

242 # edit connection names without merging an injection pipeline. 

243 logger.warning( 

244 "Unable to infer injection pipeline stub from dataset type name '%s' and none was " 

245 "provided. No injection pipeline will be merged into the output pipeline.", 

246 dataset_type_name, 

247 ) 

248 if injection_pipeline: 

249 logger.info( 

250 "Injected dataset type '%s' used to infer injection pipeline: %s", 

251 dataset_type_name, 

252 injection_pipeline, 

253 ) 

254 

255 # Merge the injection pipeline to the modified pipeline, if provided. 

256 if injection_pipeline: 

257 if isinstance(injection_pipeline, str): 

258 injection_pipeline = Pipeline.fromFile(injection_pipeline) 

259 if len(injection_pipeline) != 1: 

260 raise RuntimeError( 

261 f"The injection pipeline contains {len(injection_pipeline)} tasks; only 1 task is allowed." 

262 ) 

263 pipeline.mergePipeline(injection_pipeline) 

264 # Loop over all injection tasks and modify the connection names. 

265 for injection_taskDef in injection_pipeline.toExpandedPipeline(): 

266 injected_tasks |= {injection_taskDef.label} 

267 conns = injection_taskDef.connections 

268 pipeline.addConfigOverride( 

269 injection_taskDef.label, "connections.input_exposure", dataset_type_name 

270 ) 

271 pipeline.addConfigOverride( 

272 injection_taskDef.label, "connections.output_exposure", prefix + dataset_type_name 

273 ) 

274 # Optionally update subsets to include the injection task. 

275 if not exclude_subsets: 

276 for label in precursor_injection_task_labels: 

277 precursor_subsets = pipeline.findSubsetsWithLabel(label) 

278 for subset in precursor_subsets: 

279 pipeline.addLabelToSubset(subset, injection_taskDef.label) 

280 

281 # Create injected subsets. 

282 injected_label_specifier = LabelSpecifier(labels=injected_tasks) 

283 injected_pipeline = pipeline.subsetFromLabels(injected_label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

284 injected_subset_labels = set() 

285 for injected_subset in injected_pipeline.subsets.keys(): 

286 injected_subset_label = "injected_" + injected_subset 

287 injected_subset_description = ( 

288 "All tasks from the '" + injected_subset + "' subset impacted by source injection." 

289 ) 

290 if len(injected_subset_tasks := injected_pipeline.subsets[injected_subset]) > 0: 

291 injected_subset_labels |= {injected_subset_label} 

292 pipeline.addLabeledSubset( 

293 injected_subset_label, injected_subset_description, injected_subset_tasks 

294 ) 

295 

296 grammar1 = "task" if len(pipeline) == 1 else "tasks" 

297 grammar2 = "subset" if len(injected_subset_labels) == 1 else "subsets" 

298 logger.info( 

299 "Made an injection pipeline containing %d %s and %d new injected %s.", 

300 len(pipeline), 

301 grammar1, 

302 len(injected_subset_labels), 

303 grammar2, 

304 ) 

305 return pipeline