Coverage for python/lsst/ctrl/mpexec/mock_task.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

80 statements  

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23from typing import Any, List, Optional, Union 

24 

25from lsst.daf.butler import Butler, DatasetRef, Quantum 

26from lsst.pex.config import Field 

27from lsst.pipe.base import ( 

28 ButlerQuantumContext, 

29 DeferredDatasetRef, 

30 InputQuantizedConnection, 

31 OutputQuantizedConnection, 

32 PipelineTask, 

33 PipelineTaskConfig, 

34 PipelineTaskConnections, 

35) 

36from lsst.utils import doImport 

37from lsst.utils.introspection import get_full_type_name 

38 

39from .dataid_match import DataIdMatch 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class MockButlerQuantumContext(ButlerQuantumContext): 

45 """Implementation of ButlerQuantumContext to use with a mock task. 

46 

47 Parameters 

48 ---------- 

49 butler : `~lsst.daf.butler.Butler` 

50 Data butler instance. 

51 quantum : `~lsst.daf.butler.Quantum` 

52 Execution quantum. 

53 

54 Notes 

55 ----- 

56 This implementation overrides get method to try to retrieve dataset from a 

57 mock dataset type if it exists. Get method always returns a dictionary. 

58 Put method stores the data with a mock dataset type, but also registers 

59 DatasetRef with registry using original dataset type. 

60 """ 

61 

62 def __init__(self, butler: Butler, quantum: Quantum): 

63 super().__init__(butler, quantum) 

64 self.butler = butler 

65 

66 @classmethod 

67 def mockDatasetTypeName(cls, datasetTypeName: str) -> str: 

68 """Make mock dataset type name from actual dataset type name.""" 

69 return "_mock_" + datasetTypeName 

70 

71 def _get(self, ref: DatasetRef) -> Any: 

72 # docstring is inherited from the base class 

73 if isinstance(ref, DeferredDatasetRef): 

74 ref = ref.datasetRef 

75 datasetType = ref.datasetType 

76 

77 typeName, component = datasetType.nameAndComponent() 

78 if component is not None: 

79 mockDatasetTypeName = self.mockDatasetTypeName(typeName) 

80 else: 

81 mockDatasetTypeName = self.mockDatasetTypeName(datasetType.name) 

82 

83 try: 

84 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName) 

85 ref = DatasetRef(mockDatasetType, ref.dataId) 

86 data = self.butler.get(ref) 

87 except KeyError: 

88 data = super()._get(ref) 

89 # If the input as an actual non-mock data then we want to replace 

90 # it with a provenance data which will be stored as a part of 

91 # output dataset. 

92 data = { 

93 "ref": { 

94 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

95 "datasetType": ref.datasetType.name, 

96 }, 

97 "type": get_full_type_name(type(data)), 

98 } 

99 if component is not None: 

100 data.update(component=component) 

101 return data 

102 

103 def _put(self, value: Any, ref: DatasetRef): 

104 # docstring is inherited from the base class 

105 

106 mockDatasetType = self.registry.getDatasetType(self.mockDatasetTypeName(ref.datasetType.name)) 

107 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

108 value.setdefault("ref", {}).update(datasetType=mockDatasetType.name) 

109 self.butler.put(value, mockRef) 

110 

111 # also "store" non-mock refs 

112 self.registry._importDatasets([ref]) 

113 

114 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set): 

115 # docstring is inherited from the base class 

116 return 

117 

118 

119class MockPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=PipelineTaskConnections): 

120 

121 failCondition = Field( 

122 dtype=str, 

123 default="", 

124 doc=( 

125 "Condition on DataId to raise an exception. String expression which includes attributes of " 

126 "quantum DataId using a syntax of daf_butler user expressions (e.g. 'visit = 123')." 

127 ), 

128 ) 

129 

130 failException = Field( 

131 dtype=str, 

132 default="builtins.ValueError", 

133 doc=( 

134 "Class name of the exception to raise when fail condition is triggered. Can be " 

135 "'lsst.pipe.base.NoWorkFound' to specify non-failure exception." 

136 ), 

137 ) 

138 

139 def dataIdMatch(self) -> Optional[DataIdMatch]: 

140 if not self.failCondition: 

141 return None 

142 return DataIdMatch(self.failCondition) 

143 

144 

145class MockPipelineTask(PipelineTask): 

146 """Implementation of PipelineTask used for running a mock pipeline. 

147 

148 Notes 

149 ----- 

150 This class overrides `runQuantum` to read all input datasetRefs and to 

151 store simple dictionary as output data. Output dictionary contains some 

152 provenance data about inputs, the task that produced it, and corresponding 

153 quantum. This class depends on `MockButlerQuantumContext` which knows how 

154 to store the output dictionary data with special dataset types. 

155 """ 

156 

157 ConfigClass = MockPipelineTaskConfig 

158 

159 def __init__(self, *, config=None, **kwargs): 

160 super().__init__(config=config, **kwargs) 

161 

162 self.dataIdMatch = None if config is None else config.dataIdMatch() 

163 if self.dataIdMatch: 

164 self.failException = doImport(config.failException) 

165 else: 

166 self.failException = None 

167 

168 def runQuantum( 

169 self, 

170 butlerQC: MockButlerQuantumContext, 

171 inputRefs: InputQuantizedConnection, 

172 outputRefs: OutputQuantizedConnection, 

173 ): 

174 # docstring is inherited from the base class 

175 quantum = butlerQC.quantum 

176 

177 _LOG.info("Mocking execution of task '%s' on quantum %s", self.getName(), quantum.dataId) 

178 

179 # Possibly raise an exception. 

180 if self.dataIdMatch is not None and self.dataIdMatch.match(quantum.dataId): 

181 _LOG.info("Simulating failure of task '%s' on quantum %s", self.getName(), quantum.dataId) 

182 message = f"Simulated failure: task={self.getName()} dataId={quantum.dataId}" 

183 raise self.failException(message) 

184 

185 # read all inputs 

186 inputs = butlerQC.get(inputRefs) 

187 

188 _LOG.info("Read input data for task '%s' on quantum %s", self.getName(), quantum.dataId) 

189 

190 # To avoid very deep provenance we trim inputs to a single level 

191 for name, data in inputs.items(): 

192 if isinstance(data, dict): 

193 data = [data] 

194 if isinstance(data, list): 

195 for item in data: 

196 qdata = item.get("quantum", {}) 

197 qdata.pop("inputs", None) 

198 

199 # store mock outputs 

200 for name, refs in outputRefs: 

201 if not isinstance(refs, list): 

202 refs = [refs] 

203 for ref in refs: 

204 data = { 

205 "ref": { 

206 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

207 "datasetType": ref.datasetType.name, 

208 }, 

209 "quantum": { 

210 "task": self.getName(), 

211 "dataId": {key.name: quantum.dataId[key] for key in quantum.dataId.keys()}, 

212 "inputs": inputs, 

213 }, 

214 "outputName": name, 

215 } 

216 butlerQC.put(data, ref) 

217 

218 _LOG.info("Finished mocking task '%s' on quantum %s", self.getName(), quantum.dataId)