Coverage for python/lsst/ctrl/mpexec/mock_task.py: 20%

86 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-13 16:33 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23from typing import Any, List, Optional, Union 

24 

25from lsst.daf.butler import Butler, DatasetRef, Quantum 

26from lsst.pex.config import Field 

27from lsst.pipe.base import ( 

28 ButlerQuantumContext, 

29 DeferredDatasetRef, 

30 InputQuantizedConnection, 

31 OutputQuantizedConnection, 

32 PipelineTask, 

33 PipelineTaskConfig, 

34 PipelineTaskConnections, 

35) 

36from lsst.utils import doImportType 

37from lsst.utils.introspection import get_full_type_name 

38 

39from .dataid_match import DataIdMatch 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class MockButlerQuantumContext(ButlerQuantumContext): 

45 """Implementation of ButlerQuantumContext to use with a mock task. 

46 

47 Parameters 

48 ---------- 

49 butler : `~lsst.daf.butler.Butler` 

50 Data butler instance. 

51 quantum : `~lsst.daf.butler.Quantum` 

52 Execution quantum. 

53 

54 Notes 

55 ----- 

56 This implementation overrides get method to try to retrieve dataset from a 

57 mock dataset type if it exists. Get method always returns a dictionary. 

58 Put method stores the data with a mock dataset type, but also registers 

59 DatasetRef with registry using original dataset type. 

60 """ 

61 

62 def __init__(self, butler: Butler, quantum: Quantum): 

63 super().__init__(butler, quantum) 

64 self.butler = butler 

65 

66 @classmethod 

67 def mockDatasetTypeName(cls, datasetTypeName: str) -> str: 

68 """Make mock dataset type name from actual dataset type name.""" 

69 return "_mock_" + datasetTypeName 

70 

71 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: 

72 # docstring is inherited from the base class 

73 if ref is None: 

74 return None 

75 if isinstance(ref, DeferredDatasetRef): 

76 ref = ref.datasetRef 

77 datasetType = ref.datasetType 

78 

79 typeName, component = datasetType.nameAndComponent() 

80 if component is not None: 

81 mockDatasetTypeName = self.mockDatasetTypeName(typeName) 

82 else: 

83 mockDatasetTypeName = self.mockDatasetTypeName(datasetType.name) 

84 

85 try: 

86 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName) 

87 ref = DatasetRef(mockDatasetType, ref.dataId) 

88 data = self.butler.get(ref) 

89 except KeyError: 

90 data = super()._get(ref) 

91 # If the input as an actual non-mock data then we want to replace 

92 # it with a provenance data which will be stored as a part of 

93 # output dataset. 

94 data = { 

95 "ref": { 

96 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

97 "datasetType": ref.datasetType.name, 

98 }, 

99 "type": get_full_type_name(type(data)), 

100 } 

101 if component is not None: 

102 data.update(component=component) 

103 return data 

104 

105 def _put(self, value: Any, ref: DatasetRef) -> None: 

106 # docstring is inherited from the base class 

107 

108 mockDatasetType = self.registry.getDatasetType(self.mockDatasetTypeName(ref.datasetType.name)) 

109 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

110 value.setdefault("ref", {}).update(datasetType=mockDatasetType.name) 

111 self.butler.put(value, mockRef) 

112 

113 # also "store" non-mock refs 

114 self.registry._importDatasets([ref]) 

115 

116 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

117 # docstring is inherited from the base class 

118 return 

119 

120 

121class MockPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=PipelineTaskConnections): 

122 failCondition: Field[str] = Field( 

123 dtype=str, 

124 default="", 

125 doc=( 

126 "Condition on DataId to raise an exception. String expression which includes attributes of " 

127 "quantum DataId using a syntax of daf_butler user expressions (e.g. 'visit = 123')." 

128 ), 

129 ) 

130 

131 failException: Field[str] = Field( 

132 dtype=str, 

133 default="builtins.ValueError", 

134 doc=( 

135 "Class name of the exception to raise when fail condition is triggered. Can be " 

136 "'lsst.pipe.base.NoWorkFound' to specify non-failure exception." 

137 ), 

138 ) 

139 

140 def dataIdMatch(self) -> Optional[DataIdMatch]: 

141 if not self.failCondition: 

142 return None 

143 return DataIdMatch(self.failCondition) 

144 

145 

146class MockPipelineTask(PipelineTask): 

147 """Implementation of PipelineTask used for running a mock pipeline. 

148 

149 Notes 

150 ----- 

151 This class overrides `runQuantum` to read all input datasetRefs and to 

152 store simple dictionary as output data. Output dictionary contains some 

153 provenance data about inputs, the task that produced it, and corresponding 

154 quantum. This class depends on `MockButlerQuantumContext` which knows how 

155 to store the output dictionary data with special dataset types. 

156 """ 

157 

158 ConfigClass = MockPipelineTaskConfig 

159 

160 def __init__(self, *, config: Optional[MockPipelineTaskConfig] = None, **kwargs: Any): 

161 super().__init__(config=config, **kwargs) 

162 

163 self.failException: Optional[type] = None 

164 self.dataIdMatch: Optional[DataIdMatch] = None 

165 if config is not None: 

166 self.dataIdMatch = config.dataIdMatch() 

167 if self.dataIdMatch: 

168 self.failException = doImportType(config.failException) 

169 

170 def runQuantum( 

171 self, 

172 butlerQC: ButlerQuantumContext, 

173 inputRefs: InputQuantizedConnection, 

174 outputRefs: OutputQuantizedConnection, 

175 ) -> None: 

176 # docstring is inherited from the base class 

177 quantum = butlerQC.quantum 

178 

179 _LOG.info("Mocking execution of task '%s' on quantum %s", self.getName(), quantum.dataId) 

180 

181 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

182 

183 # Possibly raise an exception. 

184 if self.dataIdMatch is not None and self.dataIdMatch.match(quantum.dataId): 

185 _LOG.info("Simulating failure of task '%s' on quantum %s", self.getName(), quantum.dataId) 

186 message = f"Simulated failure: task={self.getName()} dataId={quantum.dataId}" 

187 assert self.failException is not None, "Exception type must be defined" 

188 raise self.failException(message) 

189 

190 # read all inputs 

191 inputs = butlerQC.get(inputRefs) 

192 

193 _LOG.info("Read input data for task '%s' on quantum %s", self.getName(), quantum.dataId) 

194 

195 # To avoid very deep provenance we trim inputs to a single level 

196 for name, data in inputs.items(): 

197 if isinstance(data, dict): 

198 data = [data] 

199 if isinstance(data, list): 

200 for item in data: 

201 qdata = item.get("quantum", {}) 

202 qdata.pop("inputs", None) 

203 

204 # store mock outputs 

205 for name, refs in outputRefs: 

206 if not isinstance(refs, list): 

207 refs = [refs] 

208 for ref in refs: 

209 data = { 

210 "ref": { 

211 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

212 "datasetType": ref.datasetType.name, 

213 }, 

214 "quantum": { 

215 "task": self.getName(), 

216 "dataId": {key.name: quantum.dataId[key] for key in quantum.dataId.keys()}, 

217 "inputs": inputs, 

218 }, 

219 "outputName": name, 

220 } 

221 butlerQC.put(data, ref) 

222 

223 _LOG.info("Finished mocking task '%s' on quantum %s", self.getName(), quantum.dataId)