Coverage for python/lsst/verify/gen2tasks/metricsControllerTask.py: 27%

75 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-07-09 06:23 -0700

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["MetricsControllerConfig", "MetricsControllerTask"] 

23 

24import os.path 

25 

26import lsst.pex.config as pexConfig 

27import lsst.daf.persistence as dafPersist 

28from lsst.pipe.base import Task, Struct 

29from lsst.verify import Job 

30from lsst.verify.tasks import MetricComputationError 

31from .metadataTask import SquashMetadataTask 

32from .metricRegistry import MetricRegistry 

33 

34 

35def _flatten(nested): 

36 """Flatten an iterable of possibly nested iterables. 

37 

38 Parameters 

39 ---------- 

40 nested : iterable 

41 An iterable that may contain a mix of scalars or other iterables. 

42 

43 Returns 

44 ------- 

45 flat : sequence 

46 A sequence where each iterable element of `nested` has been replaced 

47 with its elements, in order, and so on recursively. 

48 

49 Examples 

50 -------- 

51 >>> x = [42, [4, 3, 5]] 

52 >>> _flatten(x) 

53 [42, 4, 3, 5] 

54 """ 

55 flat = [] 

56 for x in nested: 

57 try: 

58 iter(x) 

59 flat.extend(_flatten(x)) 

60 except TypeError: 

61 flat.append(x) 

62 return flat 

63 

64 

65class MetricsControllerConfig(pexConfig.Config): 

66 """Configuration options for `MetricsControllerTask`. 

67 """ 

68 jobFileTemplate = pexConfig.Field( 

69 dtype=str, 

70 doc="A template for the path to which the measurements are " 

71 "written. {id} is replaced with a unique index (recommended), " 

72 "while {dataId} is replaced with the data ID.", 

73 default="metrics{id}.{dataId}.verify.json", 

74 ) 

75 metadataAdder = pexConfig.ConfigurableField( 

76 target=SquashMetadataTask, 

77 doc="Task for adding metadata needed by measurement clients. " 

78 "Its ``run`` method must take a `~lsst.verify.Job` as its first " 

79 "parameter, and should accept unknown keyword arguments. It must " 

80 "return a `~lsst.pipe.base.Struct` with the field ``job`` " 

81 "pointing to the modified job.", 

82 ) 

83 measurers = MetricRegistry.registry.makeField( 

84 multi=True, 

85 doc=r"`MetricTask`\ s to call and their configuration. Each " 

86 "`MetricTask` must be identified by the name passed to its " 

87 "`~lsst.verify.gen2tasks.register` or " 

88 "`~lsst.verify.gen2tasks.registerMultiple` decorator.", 

89 ) 

90 

91 

92class MetricsControllerTask(Task): 

93 """A Task for executing a collection of 

94 `lsst.verify.tasks.MetricTask` objects. 

95 

96 This class handles Butler input of datasets needed by metrics, as well as 

97 persistence of the resulting measurements. 

98 

99 Notes 

100 ----- 

101 ``MetricsControllerTask`` is a stand-in for functionality provided by the 

102 Gen 3 Tasks framework. It will become redundant once we fully adopt 

103 that framework. 

104 

105 Because ``MetricsControllerTask`` cannot support the full functionality of 

106 the Gen 3 framework, it places several restrictions on its metrics: 

107 

108 * each ``MetricTask`` must measure a unique metric 

109 * no ``MetricTask`` may depend on the output of another ``MetricTask`` 

110 * the granularity of the metrics is determined by the inputs to 

111 ``runDataRefs``; configuration information specifying a different 

112 granularity is allowed but is ignored 

113 """ 

114 

115 _DefaultName = "metricsController" 

116 ConfigClass = MetricsControllerConfig 

117 

118 measurers = [] 

119 """The tasks to be executed by this object (iterable of 

120 `lsst.verify.tasks.MetricTask`). 

121 """ 

122 

123 def __init__(self, config=None, **kwargs): 

124 super().__init__(config=config, **kwargs) 

125 self.makeSubtask("metadataAdder") 

126 

127 self.measurers = _flatten(self.config.measurers.apply()) 

128 

129 def _computeSingleMeasurement(self, job, metricTask, dataref): 

130 """Call a single metric task on a single dataref. 

131 

132 This method adds a single measurement to ``job``, as specified by 

133 ``metricTask``. 

134 

135 Parameters 

136 ---------- 

137 job : `lsst.verify.Job` 

138 A Job object in which to store the new measurement. Must not 

139 already contain a measurement for ``metricTask.config.metricName``. 

140 metricTask : `lsst.verify.tasks.MetricTask` 

141 The code for computing the measurement. 

142 dataref : `lsst.daf.persistence.ButlerDataRef` 

143 The repository and data ID to analyze. The data ID may be 

144 incomplete, but must have the granularity of the desired metric. 

145 

146 Notes 

147 ----- 

148 If measurement calculation fails, this method logs an error and leaves 

149 ``job`` unchanged. 

150 """ 

151 self.log.debug("Running %s on %r", type(metricTask), dataref) 

152 inputTypes = metricTask.getInputDatasetTypes(metricTask.config) 

153 inputScalars = metricTask.areInputDatasetsScalar(metricTask.config) 

154 inputData = {} 

155 inputDataIds = {} 

156 for (param, dataType), scalar \ 

157 in zip(inputTypes.items(), inputScalars.values()): 

158 inputRefs = dafPersist.searchDataRefs( 

159 dataref.getButler(), dataType, dataId=dataref.dataId) 

160 if scalar: 

161 inputData[param] = inputRefs[0].get() if inputRefs else None 

162 inputDataIds[param] = inputRefs[0].dataId if inputRefs else {} 

163 else: 

164 inputData[param] = [ref.get() for ref in inputRefs] 

165 inputDataIds[param] = [ref.dataId for ref in inputRefs] 

166 

167 outputDataIds = {"measurement": dataref.dataId} 

168 try: 

169 result = metricTask.adaptArgsAndRun(inputData, inputDataIds, 

170 outputDataIds) 

171 value = result.measurement 

172 if value is not None: 

173 job.measurements.insert(value) 

174 else: 

175 self.log.debug( 

176 "Skipping measurement of %r on %s as not applicable.", 

177 metricTask, inputDataIds) 

178 except MetricComputationError: 

179 self.log.error("Measurement of %r failed on %s->%s", 

180 metricTask, inputDataIds, outputDataIds, 

181 exc_info=True) 

182 

183 def runDataRefs(self, datarefs, customMetadata=None, skipExisting=False): 

184 """Call all registered metric tasks on each dataref. 

185 

186 This method loads all datasets required to compute a particular 

187 metric, and persists the metrics as one or more `lsst.verify.Job` 

188 objects. Only metrics that successfully produce a 

189 `~lsst.verify.Measurement` will be included in a job. 

190 

191 Parameters 

192 ---------- 

193 datarefs : `list` of `lsst.daf.persistence.ButlerDataRef` 

194 The data to measure. Datarefs may be complete or partial; each 

195 generates a measurement at the same granularity (e.g., a 

196 dataref with only ``"visit"`` specified generates visit-level 

197 measurements). 

198 customMetadata : `dict`, optional 

199 Any metadata that are needed for a specific pipeline, but that are 

200 not needed by the ``lsst.verify`` framework or by general-purpose 

201 measurement analysis code (these cases are handled by the 

202 `~MetricsControllerConfig.metadataAdder` subtask). If omitted, 

203 only generic metadata are added. Both keys and values must be valid 

204 inputs to `~lsst.verify.Metadata`. 

205 skipExisting : `bool`, optional 

206 If this flag is set, MetricsControllerTask will skip computing 

207 metrics for any data ID that already has an output job file on 

208 disk. While this option is useful for restarting failed runs, it 

209 does *not* check whether the file is valid. 

210 

211 Returns 

212 ------- 

213 struct : `lsst.pipe.base.Struct` 

214 A `~lsst.pipe.base.Struct` containing the following component: 

215 

216 - ``jobs`` : a list of collections of measurements (`list` of 

217 `lsst.verify.Job`). Each job in the list contains the 

218 measurement(s) for the corresponding dataref, and each job has 

219 at most one measurement for each element in `self.measurers`. A 

220 particular measurement is omitted if it could not be created. 

221 If ``skipExisting`` is set, any jobs that already exist on disk 

222 are also omitted. 

223 

224 Notes 

225 ----- 

226 Some objects may be persisted, or incorrectly persisted, in the event 

227 of an exception. 

228 """ 

229 jobs = [] 

230 index = 0 

231 for dataref in datarefs: 

232 jobFile = self._getJobFilePath(index, dataref.dataId) 

233 if not (skipExisting and os.path.isfile(jobFile)): 

234 job = Job.load_metrics_package() 

235 try: 

236 self.metadataAdder.run(job, dataref=dataref) 

237 if customMetadata: 

238 job.meta.update(customMetadata) 

239 

240 for task in self.measurers: 

241 self._computeSingleMeasurement(job, task, dataref) 

242 finally: 

243 self.log.info("Persisting metrics to %s...", jobFile) 

244 # This call order maximizes the chance that job gets 

245 # written, and to a unique file 

246 index += 1 

247 job.write(jobFile) 

248 jobs.append(job) 

249 else: 

250 self.log.debug("File %s already exists; skipping.", jobFile) 

251 

252 return Struct(jobs=jobs) 

253 

254 def _getJobFilePath(self, index, dataId): 

255 """Generate an output file for a Job. 

256 

257 Parameters 

258 ---------- 

259 index : `int` 

260 A unique integer across all Jobs created by this task. 

261 dataId : `lsst.daf.persistence.DataId` 

262 The identifier of all metrics in the Job to be persisted. 

263 """ 

264 # Construct a relatively OS-friendly string (i.e., no quotes or {}) 

265 idString = "_".join("%s%s" % (key, dataId[key]) for key in dataId) 

266 return self.config.jobFileTemplate.format(id=index, dataId=idString)