Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of verify. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ["MetricsControllerConfig", "MetricsControllerTask"] 

23 

24import os.path 

25import traceback 

26 

27import lsst.pex.config as pexConfig 

28import lsst.daf.persistence as dafPersist 

29from lsst.pipe.base import Task, Struct 

30from lsst.verify import Job 

31from lsst.verify.tasks import MetricComputationError 

32from .metadataTask import SquashMetadataTask 

33from .metricRegistry import MetricRegistry 

34 

35 

36def _flatten(nested): 

37 """Flatten an iterable of possibly nested iterables. 

38 

39 Parameters 

40 ---------- 

41 nested : iterable 

42 An iterable that may contain a mix of scalars or other iterables. 

43 

44 Returns 

45 ------- 

46 flat : sequence 

47 A sequence where each iterable element of `nested` has been replaced 

48 with its elements, in order, and so on recursively. 

49 

50 Examples 

51 -------- 

52 >>> x = [42, [4, 3, 5]] 

53 >>> _flatten(x) 

54 [42, 4, 3, 5] 

55 """ 

56 flat = [] 

57 for x in nested: 

58 try: 

59 iter(x) 

60 flat.extend(_flatten(x)) 

61 except TypeError: 

62 flat.append(x) 

63 return flat 

64 

65 

66class MetricsControllerConfig(pexConfig.Config): 

67 """Configuration options for `MetricsControllerTask`. 

68 """ 

69 jobFileTemplate = pexConfig.Field( 

70 dtype=str, 

71 doc="A template for the path to which the measurements are " 

72 "written. {id} is replaced with a unique index (recommended), " 

73 "while {dataId} is replaced with the data ID.", 

74 default="metrics{id}.{dataId}.verify.json", 

75 ) 

76 metadataAdder = pexConfig.ConfigurableField( 

77 target=SquashMetadataTask, 

78 doc="Task for adding metadata needed by measurement clients. " 

79 "Its ``run`` method must take a `~lsst.verify.Job` as its first " 

80 "parameter, and should accept unknown keyword arguments. It must " 

81 "return a `~lsst.pipe.base.Struct` with the field ``job`` " 

82 "pointing to the modified job.", 

83 ) 

84 measurers = MetricRegistry.registry.makeField( 

85 multi=True, 

86 doc=r"`MetricTask`\ s to call and their configuration. Each " 

87 "`MetricTask` must be identified by the name passed to its " 

88 "`~lsst.verify.gen2tasks.register` or " 

89 "`~lsst.verify.gen2tasks.registerMultiple` decorator.", 

90 ) 

91 

92 

93class MetricsControllerTask(Task): 

94 """A Task for executing a collection of 

95 `lsst.verify.tasks.MetricTask` objects. 

96 

97 This class handles Butler input of datasets needed by metrics, as well as 

98 persistence of the resulting measurements. 

99 

100 Notes 

101 ----- 

102 ``MetricsControllerTask`` is a stand-in for functionality provided by the 

103 Gen 3 Tasks framework. It will become redundant once we fully adopt 

104 that framework. 

105 

106 Because ``MetricsControllerTask`` cannot support the full functionality of 

107 the Gen 3 framework, it places several restrictions on its metrics: 

108 

109 * each ``MetricTask`` must measure a unique metric 

110 * no ``MetricTask`` may depend on the output of another ``MetricTask`` 

111 * the granularity of the metrics is determined by the inputs to 

112 ``runDataRefs``; configuration information specifying a different 

113 granularity is allowed but is ignored 

114 """ 

115 

116 _DefaultName = "metricsController" 

117 ConfigClass = MetricsControllerConfig 

118 

119 measurers = [] 

120 """The tasks to be executed by this object (iterable of 

121 `lsst.verify.tasks.MetricTask`). 

122 """ 

123 

124 def __init__(self, config=None, **kwargs): 

125 super().__init__(config=config, **kwargs) 

126 self.makeSubtask("metadataAdder") 

127 

128 self.measurers = _flatten(self.config.measurers.apply()) 

129 

130 def _computeSingleMeasurement(self, job, metricTask, dataref): 

131 """Call a single metric task on a single dataref. 

132 

133 This method adds a single measurement to ``job``, as specified by 

134 ``metricTask``. 

135 

136 Parameters 

137 ---------- 

138 job : `lsst.verify.Job` 

139 A Job object in which to store the new measurement. Must not 

140 already contain a measurement for ``metricTask.config.metricName``. 

141 metricTask : `lsst.verify.tasks.MetricTask` 

142 The code for computing the measurement. 

143 dataref : `lsst.daf.persistence.ButlerDataRef` 

144 The repository and data ID to analyze. The data ID may be 

145 incomplete, but must have the granularity of the desired metric. 

146 

147 Notes 

148 ----- 

149 If measurement calculation fails, this method logs an error and leaves 

150 ``job`` unchanged. 

151 """ 

152 self.log.debug("Running %s on %r", type(metricTask), dataref) 

153 inputTypes = metricTask.getInputDatasetTypes(metricTask.config) 

154 inputScalars = metricTask.areInputDatasetsScalar(metricTask.config) 

155 inputData = {} 

156 inputDataIds = {} 

157 for (param, dataType), scalar \ 

158 in zip(inputTypes.items(), inputScalars.values()): 

159 inputRefs = dafPersist.searchDataRefs( 

160 dataref.getButler(), dataType, dataId=dataref.dataId) 

161 if scalar: 

162 inputData[param] = inputRefs[0].get() if inputRefs else None 

163 inputDataIds[param] = inputRefs[0].dataId if inputRefs else {} 

164 else: 

165 inputData[param] = [ref.get() for ref in inputRefs] 

166 inputDataIds[param] = [ref.dataId for ref in inputRefs] 

167 

168 outputDataIds = {"measurement": dataref.dataId} 

169 try: 

170 result = metricTask.adaptArgsAndRun(inputData, inputDataIds, 

171 outputDataIds) 

172 value = result.measurement 

173 if value is not None: 

174 job.measurements.insert(value) 

175 else: 

176 self.log.debug( 

177 "Skipping measurement of %r on %s as not applicable.", 

178 metricTask, inputDataIds) 

179 except MetricComputationError: 

180 # Apparently lsst.log doesn't have built-in exception support? 

181 self.log.error("Measurement of %r failed on %s->%s\n%s", 

182 metricTask, inputDataIds, outputDataIds, 

183 traceback.format_exc()) 

184 

185 def runDataRefs(self, datarefs, customMetadata=None, skipExisting=False): 

186 """Call all registered metric tasks on each dataref. 

187 

188 This method loads all datasets required to compute a particular 

189 metric, and persists the metrics as one or more `lsst.verify.Job` 

190 objects. Only metrics that successfully produce a 

191 `~lsst.verify.Measurement` will be included in a job. 

192 

193 Parameters 

194 ---------- 

195 datarefs : `list` of `lsst.daf.persistence.ButlerDataRef` 

196 The data to measure. Datarefs may be complete or partial; each 

197 generates a measurement at the same granularity (e.g., a 

198 dataref with only ``"visit"`` specified generates visit-level 

199 measurements). 

200 customMetadata : `dict`, optional 

201 Any metadata that are needed for a specific pipeline, but that are 

202 not needed by the ``lsst.verify`` framework or by general-purpose 

203 measurement analysis code (these cases are handled by the 

204 `~MetricsControllerConfig.metadataAdder` subtask). If omitted, 

205 only generic metadata are added. Both keys and values must be valid 

206 inputs to `~lsst.verify.Metadata`. 

207 skipExisting : `bool`, optional 

208 If this flag is set, MetricsControllerTask will skip computing 

209 metrics for any data ID that already has an output job file on 

210 disk. While this option is useful for restarting failed runs, it 

211 does *not* check whether the file is valid. 

212 

213 Returns 

214 ------- 

215 struct : `lsst.pipe.base.Struct` 

216 A `~lsst.pipe.base.Struct` containing the following component: 

217 

218 - ``jobs`` : a list of collections of measurements (`list` of 

219 `lsst.verify.Job`). Each job in the list contains the 

220 measurement(s) for the corresponding dataref, and each job has 

221 at most one measurement for each element in `self.measurers`. A 

222 particular measurement is omitted if it could not be created. 

223 If ``skipExisting`` is set, any jobs that already exist on disk 

224 are also omitted. 

225 

226 Notes 

227 ----- 

228 Some objects may be persisted, or incorrectly persisted, in the event 

229 of an exception. 

230 """ 

231 jobs = [] 

232 index = 0 

233 for dataref in datarefs: 

234 jobFile = self._getJobFilePath(index, dataref.dataId) 

235 if not (skipExisting and os.path.isfile(jobFile)): 

236 job = Job.load_metrics_package() 

237 try: 

238 self.metadataAdder.run(job, dataref=dataref) 

239 if customMetadata: 

240 job.meta.update(customMetadata) 

241 

242 for task in self.measurers: 

243 self._computeSingleMeasurement(job, task, dataref) 

244 finally: 

245 self.log.info("Persisting metrics to %s...", jobFile) 

246 # This call order maximizes the chance that job gets 

247 # written, and to a unique file 

248 index += 1 

249 job.write(jobFile) 

250 jobs.append(job) 

251 else: 

252 self.log.debug("File %s already exists; skipping.", jobFile) 

253 

254 return Struct(jobs=jobs) 

255 

256 def _getJobFilePath(self, index, dataId): 

257 """Generate an output file for a Job. 

258 

259 Parameters 

260 ---------- 

261 index : `int` 

262 A unique integer across all Jobs created by this task. 

263 dataId : `lsst.daf.persistence.DataId` 

264 The identifier of all metrics in the Job to be persisted. 

265 """ 

266 # Construct a relatively OS-friendly string (i.e., no quotes or {}) 

267 idString = "_".join("%s%s" % (key, dataId[key]) for key in dataId) 

268 return self.config.jobFileTemplate.format(id=index, dataId=idString)