Coverage for python/lsst/source/injection/utils/make_injection_pipeline.py: 5%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-20 06:03 -0700

1# This file is part of source_injection. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["make_injection_pipeline"] 

25 

26import logging 

27 

28from lsst.analysis.tools.interfaces import AnalysisPipelineTask 

29from lsst.pipe.base import LabelSpecifier, Pipeline 

30 

31 

32def _get_dataset_type_names(conns, fields): 

33 """Return the name of a connection's dataset type.""" 

34 dataset_type_names = set() 

35 for field in fields: 

36 dataset_type_names.add(getattr(conns, field).name) 

37 return dataset_type_names 

38 

39 

40def _parse_config_override(config_override: str) -> tuple[str, str, str]: 

41 """Parse a config override string into a label, a key and a value. 

42 

43 Parameters 

44 ---------- 

45 config_override : `str` 

46 Config override string to parse. 

47 

48 Returns 

49 ------- 

50 label : `str` 

51 Label to override. 

52 key : `str` 

53 Key to override. 

54 value : `str` 

55 Value to override. 

56 

57 Raises 

58 ------ 

59 TypeError 

60 If the config override string cannot be parsed. 

61 """ 

62 try: 

63 label, keyvalue = config_override.split(":", 1) 

64 except ValueError: 

65 raise TypeError( 

66 f"Unrecognized syntax for option 'config': '{config_override}' (does not match pattern " 

67 "(?P<label>.+):(?P<value>.+=.+))" 

68 ) from None 

69 try: 

70 key, value = keyvalue.split("=", 1) 

71 except ValueError as e: 

72 raise TypeError( 

73 f"Could not parse key-value pair '{config_override}' using separator '=', with multiple values " 

74 f"not allowed: {e}" 

75 ) from None 

76 return label, key, value 

77 

78 

79def make_injection_pipeline( 

80 dataset_type_name: str, 

81 reference_pipeline: Pipeline | str, 

82 injection_pipeline: Pipeline | str | None = None, 

83 exclude_subsets: bool = False, 

84 excluded_tasks: set[str] | str = { 

85 "jointcal", 

86 "gbdesAstrometricFit", 

87 "fgcmBuildFromIsolatedStars", 

88 "fgcmFitCycle", 

89 "fgcmOutputProducts", 

90 }, 

91 prefix: str = "injected_", 

92 instrument: str | None = None, 

93 config: str | list[str] | None = None, 

94 log_level: int = logging.INFO, 

95) -> Pipeline: 

96 """Make an expanded source injection pipeline. 

97 

98 This function takes a reference pipeline definition file in YAML format and 

99 prefixes all post-injection dataset type names with the injected prefix. If 

100 an optional injection pipeline definition YAML file is also provided, the 

101 injection task will be merged into the pipeline. 

102 

103 Unless explicitly excluded, all subsets from the reference pipeline 

104 containing the task which generates the injection dataset type will also be 

105 updated to include the injection task. A series of new injected subsets 

106 will also be created. These new subsets are copies of existent subsets, but 

107 containing only the tasks which are affected by source injection. New 

108 injected subsets will be the original subset name with the prefix 

109 'injected_' prepended. 

110 

111 Parameters 

112 ---------- 

113 dataset_type_name : `str` 

114 Name of the dataset type being injected into. 

115 reference_pipeline : Pipeline | `str` 

116 Location of a reference pipeline definition YAML file. 

117 injection_pipeline : Pipeline | `str`, optional 

118 Location of an injection pipeline definition YAML file stub. If not 

119 provided, an attempt to infer the injection pipeline will be made based 

120 on the injected dataset type name. 

121 exclude_subsets : `bool`, optional 

122 If True, do not update pipeline subsets to include the injection task. 

123 excluded_tasks : `set` [`str`] | `str` 

124 Set or comma-separated string of task labels to exclude from the 

125 injection pipeline. 

126 prefix : `str`, optional 

127 Prefix to prepend to each affected post-injection dataset type name. 

128 instrument : `str`, optional 

129 Add instrument overrides. Must be a fully qualified class name. 

130 config : `str` | `list` [`str`], optional 

131 Config override for a task, in the format 'label:key=value'. 

132 log_level : `int`, optional 

133 The log level to use for logging. 

134 

135 Returns 

136 ------- 

137 pipeline : `lsst.pipe.base.Pipeline` 

138 An expanded source injection pipeline. 

139 """ 

140 # Instantiate logger. 

141 logger = logging.getLogger(__name__) 

142 logger.setLevel(log_level) 

143 

144 # Load the pipeline and apply config overrides, if supplied. 

145 if isinstance(reference_pipeline, str): 

146 pipeline = Pipeline.fromFile(reference_pipeline) 

147 else: 

148 pipeline = reference_pipeline 

149 if config: 

150 if isinstance(config, str): 

151 config = [config] 

152 for conf in config: 

153 config_label, config_key, config_value = _parse_config_override(conf) 

154 pipeline.addConfigOverride(config_label, config_key, config_value) 

155 

156 # Add an instrument override, if provided. 

157 if instrument: 

158 pipeline.addInstrument(instrument) 

159 

160 # Remove all tasks which are not to be included in the injection pipeline. 

161 if isinstance(excluded_tasks, str): 

162 excluded_tasks = set(excluded_tasks.split(",")) 

163 all_tasks = {taskDef.label for taskDef in pipeline.toExpandedPipeline()} 

164 preserved_tasks = all_tasks - excluded_tasks 

165 label_specifier = LabelSpecifier(labels=preserved_tasks) 

166 # EDIT mode removes tasks from parent subsets but keeps the subset itself. 

167 pipeline = pipeline.subsetFromLabels(label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

168 if len(not_found_tasks := excluded_tasks - all_tasks) > 0: 

169 grammar = "Task" if len(not_found_tasks) == 1 else "Tasks" 

170 logger.warning( 

171 "%s marked for exclusion not found in the reference pipeline: %s.", 

172 grammar, 

173 ", ".join(sorted(not_found_tasks)), 

174 ) 

175 

176 # Determine the set of dataset type names affected by source injection. 

177 injected_tasks = set() 

178 all_connection_type_names = set() 

179 injected_types = {dataset_type_name} 

180 precursor_injection_task_labels = set() 

181 # Loop over all tasks in the pipeline. 

182 for taskDef in pipeline.toExpandedPipeline(): 

183 # Add override for Analysis Tools taskDefs. Connections in Analysis 

184 # Tools are dynamically assigned, and so are not able to be modified in 

185 # the same way as a static connection. Instead, we add a config 

186 # override here to the connections.outputName field. This field is 

187 # prepended to all Analysis Tools connections, and so will prepend the 

188 # injection prefix to all plot/metric outputs. Further processing of 

189 # this taskDef will be skipped thereafter. 

190 if issubclass(taskDef.taskClass, AnalysisPipelineTask): 

191 pipeline.addConfigOverride( 

192 taskDef.label, "connections.outputName", prefix + taskDef.config.connections.outputName 

193 ) 

194 continue 

195 

196 conns = taskDef.connections 

197 input_types = _get_dataset_type_names(conns, conns.initInputs | conns.inputs) 

198 output_types = _get_dataset_type_names(conns, conns.initOutputs | conns.outputs) 

199 all_connection_type_names |= input_types | output_types 

200 # Identify the precursor task: allows appending inject task to subset. 

201 if dataset_type_name in output_types: 

202 precursor_injection_task_labels.add(taskDef.label) 

203 # If the task has any injected dataset type names as inputs, add the 

204 # task to a set of tasks touched by injection, and add all of the 

205 # outputs of this task to the set of injected types. 

206 if len(input_types & injected_types) > 0: 

207 injected_tasks |= {taskDef.label} 

208 injected_types |= output_types 

209 # Add the injection prefix to all affected dataset type names. 

210 for field in conns.initInputs | conns.inputs | conns.initOutputs | conns.outputs: 

211 if hasattr(taskDef.config.connections.ConnectionsClass, field): 

212 # If the connection type is not dynamic, modify as usual. 

213 if (conn_type := getattr(conns, field).name) in injected_types: 

214 pipeline.addConfigOverride(taskDef.label, "connections." + field, prefix + conn_type) 

215 else: 

216 # Add log warning if the connection type is dynamic. 

217 logger.warning( 

218 "Dynamic connection %s in task %s is not supported here. This connection will " 

219 "neither be modified nor merged into the output injection pipeline.", 

220 field, 

221 taskDef.label, 

222 ) 

223 # Raise if the injected dataset type does not exist in the pipeline. 

224 if dataset_type_name not in all_connection_type_names: 

225 raise RuntimeError( 

226 f"Dataset type '{dataset_type_name}' not found in the reference pipeline; " 

227 "no connection type edits to be made." 

228 ) 

229 

230 # Attempt to infer the injection pipeline from the dataset type name. 

231 if not injection_pipeline: 

232 match dataset_type_name: 

233 case "postISRCCD": 

234 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_exposure.yaml" 

235 case "icExp" | "calexp": 

236 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_visit.yaml" 

237 case "deepCoadd" | "deepCoadd_calexp" | "goodSeeingCoadd": 

238 injection_pipeline = "$SOURCE_INJECTION_DIR/pipelines/inject_coadd.yaml" 

239 case _: 

240 # Print a warning rather than a raise, as the user may wish to 

241 # edit connection names without merging an injection pipeline. 

242 logger.warning( 

243 "Unable to infer injection pipeline stub from dataset type name '%s' and none was " 

244 "provided. No injection pipeline will be merged into the output pipeline.", 

245 dataset_type_name, 

246 ) 

247 if injection_pipeline: 

248 logger.info( 

249 "Injected dataset type '%s' used to infer injection pipeline: %s", 

250 dataset_type_name, 

251 injection_pipeline, 

252 ) 

253 

254 # Merge the injection pipeline to the modified pipeline, if provided. 

255 if injection_pipeline: 

256 if isinstance(injection_pipeline, str): 

257 injection_pipeline = Pipeline.fromFile(injection_pipeline) 

258 if len(injection_pipeline) != 1: 

259 raise RuntimeError( 

260 f"The injection pipeline contains {len(injection_pipeline)} tasks; only 1 task is allowed." 

261 ) 

262 pipeline.mergePipeline(injection_pipeline) 

263 # Loop over all injection tasks and modify the connection names. 

264 for injection_taskDef in injection_pipeline.toExpandedPipeline(): 

265 injected_tasks |= {injection_taskDef.label} 

266 conns = injection_taskDef.connections 

267 pipeline.addConfigOverride( 

268 injection_taskDef.label, "connections.input_exposure", dataset_type_name 

269 ) 

270 pipeline.addConfigOverride( 

271 injection_taskDef.label, "connections.output_exposure", prefix + dataset_type_name 

272 ) 

273 # Optionally update subsets to include the injection task. 

274 if not exclude_subsets: 

275 for label in precursor_injection_task_labels: 

276 precursor_subsets = pipeline.findSubsetsWithLabel(label) 

277 for subset in precursor_subsets: 

278 pipeline.addLabelToSubset(subset, injection_taskDef.label) 

279 

280 # Create injected subsets. 

281 injected_label_specifier = LabelSpecifier(labels=injected_tasks) 

282 injected_pipeline = pipeline.subsetFromLabels(injected_label_specifier, pipeline.PipelineSubsetCtrl.EDIT) 

283 injected_subset_labels = set() 

284 for injected_subset in injected_pipeline.subsets.keys(): 

285 injected_subset_label = "injected_" + injected_subset 

286 injected_subset_description = ( 

287 "All tasks from the '" + injected_subset + "' subset impacted by source injection." 

288 ) 

289 if len(injected_subset_tasks := injected_pipeline.subsets[injected_subset]) > 0: 

290 injected_subset_labels |= {injected_subset_label} 

291 pipeline.addLabeledSubset( 

292 injected_subset_label, injected_subset_description, injected_subset_tasks 

293 ) 

294 

295 grammar1 = "task" if len(pipeline) == 1 else "tasks" 

296 grammar2 = "subset" if len(injected_subset_labels) == 1 else "subsets" 

297 logger.info( 

298 "Made an injection pipeline containing %d %s and %d new injected %s.", 

299 len(pipeline), 

300 grammar1, 

301 len(injected_subset_labels), 

302 grammar2, 

303 ) 

304 return pipeline