Coverage for python/lsst/verify/gen2tasks/metricsControllerTask.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of verify.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["MetricsControllerConfig", "MetricsControllerTask"]
24import os.path
25import traceback
27import lsst.pex.config as pexConfig
28import lsst.daf.persistence as dafPersist
29from lsst.pipe.base import Task, Struct
30from lsst.verify import Job
31from lsst.verify.tasks import MetricComputationError
32from .metadataTask import SquashMetadataTask
33from .metricRegistry import MetricRegistry
36def _flatten(nested):
37 """Flatten an iterable of possibly nested iterables.
39 Parameters
40 ----------
41 nested : iterable
42 An iterable that may contain a mix of scalars or other iterables.
44 Returns
45 -------
46 flat : sequence
47 A sequence where each iterable element of `nested` has been replaced
48 with its elements, in order, and so on recursively.
50 Examples
51 --------
52 >>> x = [42, [4, 3, 5]]
53 >>> _flatten(x)
54 [42, 4, 3, 5]
55 """
56 flat = []
57 for x in nested:
58 try:
59 iter(x)
60 flat.extend(_flatten(x))
61 except TypeError:
62 flat.append(x)
63 return flat
66class MetricsControllerConfig(pexConfig.Config):
67 """Configuration options for `MetricsControllerTask`.
68 """
69 jobFileTemplate = pexConfig.Field(
70 dtype=str,
71 doc="A template for the path to which the measurements are "
72 "written. {id} is replaced with a unique index (recommended), "
73 "while {dataId} is replaced with the data ID.",
74 default="metrics{id}.{dataId}.verify.json",
75 )
76 metadataAdder = pexConfig.ConfigurableField(
77 target=SquashMetadataTask,
78 doc="Task for adding metadata needed by measurement clients. "
79 "Its ``run`` method must take a `~lsst.verify.Job` as its first "
80 "parameter, and should accept unknown keyword arguments. It must "
81 "return a `~lsst.pipe.base.Struct` with the field ``job`` "
82 "pointing to the modified job.",
83 )
84 measurers = MetricRegistry.registry.makeField(
85 multi=True,
86 doc=r"`MetricTask`\ s to call and their configuration. Each "
87 "`MetricTask` must be identified by the name passed to its "
88 "`~lsst.verify.gen2tasks.register` or "
89 "`~lsst.verify.gen2tasks.registerMultiple` decorator.",
90 )
93class MetricsControllerTask(Task):
94 """A Task for executing a collection of
95 `lsst.verify.tasks.MetricTask` objects.
97 This class handles Butler input of datasets needed by metrics, as well as
98 persistence of the resulting measurements.
100 Notes
101 -----
102 ``MetricsControllerTask`` is a stand-in for functionality provided by the
103 Gen 3 Tasks framework. It will become redundant once we fully adopt
104 that framework.
106 Because ``MetricsControllerTask`` cannot support the full functionality of
107 the Gen 3 framework, it places several restrictions on its metrics:
109 * each ``MetricTask`` must measure a unique metric
110 * no ``MetricTask`` may depend on the output of another ``MetricTask``
111 * the granularity of the metrics is determined by the inputs to
112 ``runDataRefs``; configuration information specifying a different
113 granularity is allowed but is ignored
114 """
116 _DefaultName = "metricsController"
117 ConfigClass = MetricsControllerConfig
119 measurers = []
120 """The tasks to be executed by this object (iterable of
121 `lsst.verify.tasks.MetricTask`).
122 """
124 def __init__(self, config=None, **kwargs):
125 super().__init__(config=config, **kwargs)
126 self.makeSubtask("metadataAdder")
128 self.measurers = _flatten(self.config.measurers.apply())
130 def _computeSingleMeasurement(self, job, metricTask, dataref):
131 """Call a single metric task on a single dataref.
133 This method adds a single measurement to ``job``, as specified by
134 ``metricTask``.
136 Parameters
137 ----------
138 job : `lsst.verify.Job`
139 A Job object in which to store the new measurement. Must not
140 already contain a measurement for ``metricTask.config.metricName``.
141 metricTask : `lsst.verify.tasks.MetricTask`
142 The code for computing the measurement.
143 dataref : `lsst.daf.persistence.ButlerDataRef`
144 The repository and data ID to analyze. The data ID may be
145 incomplete, but must have the granularity of the desired metric.
147 Notes
148 -----
149 If measurement calculation fails, this method logs an error and leaves
150 ``job`` unchanged.
151 """
152 self.log.debug("Running %s on %r", type(metricTask), dataref)
153 inputTypes = metricTask.getInputDatasetTypes(metricTask.config)
154 inputScalars = metricTask.areInputDatasetsScalar(metricTask.config)
155 inputData = {}
156 inputDataIds = {}
157 for (param, dataType), scalar \
158 in zip(inputTypes.items(), inputScalars.values()):
159 inputRefs = dafPersist.searchDataRefs(
160 dataref.getButler(), dataType, dataId=dataref.dataId)
161 if scalar:
162 inputData[param] = inputRefs[0].get() if inputRefs else None
163 inputDataIds[param] = inputRefs[0].dataId if inputRefs else {}
164 else:
165 inputData[param] = [ref.get() for ref in inputRefs]
166 inputDataIds[param] = [ref.dataId for ref in inputRefs]
168 outputDataIds = {"measurement": dataref.dataId}
169 try:
170 result = metricTask.adaptArgsAndRun(inputData, inputDataIds,
171 outputDataIds)
172 value = result.measurement
173 if value is not None:
174 job.measurements.insert(value)
175 else:
176 self.log.debug(
177 "Skipping measurement of %r on %s as not applicable.",
178 metricTask, inputDataIds)
179 except MetricComputationError:
180 # Apparently lsst.log doesn't have built-in exception support?
181 self.log.error("Measurement of %r failed on %s->%s\n%s",
182 metricTask, inputDataIds, outputDataIds,
183 traceback.format_exc())
185 def runDataRefs(self, datarefs, customMetadata=None, skipExisting=False):
186 """Call all registered metric tasks on each dataref.
188 This method loads all datasets required to compute a particular
189 metric, and persists the metrics as one or more `lsst.verify.Job`
190 objects. Only metrics that successfully produce a
191 `~lsst.verify.Measurement` will be included in a job.
193 Parameters
194 ----------
195 datarefs : `list` of `lsst.daf.persistence.ButlerDataRef`
196 The data to measure. Datarefs may be complete or partial; each
197 generates a measurement at the same granularity (e.g., a
198 dataref with only ``"visit"`` specified generates visit-level
199 measurements).
200 customMetadata : `dict`, optional
201 Any metadata that are needed for a specific pipeline, but that are
202 not needed by the ``lsst.verify`` framework or by general-purpose
203 measurement analysis code (these cases are handled by the
204 `~MetricsControllerConfig.metadataAdder` subtask). If omitted,
205 only generic metadata are added. Both keys and values must be valid
206 inputs to `~lsst.verify.Metadata`.
207 skipExisting : `bool`, optional
208 If this flag is set, MetricsControllerTask will skip computing
209 metrics for any data ID that already has an output job file on
210 disk. While this option is useful for restarting failed runs, it
211 does *not* check whether the file is valid.
213 Returns
214 -------
215 struct : `lsst.pipe.base.Struct`
216 A `~lsst.pipe.base.Struct` containing the following component:
218 - ``jobs`` : a list of collections of measurements (`list` of
219 `lsst.verify.Job`). Each job in the list contains the
220 measurement(s) for the corresponding dataref, and each job has
221 at most one measurement for each element in `self.measurers`. A
222 particular measurement is omitted if it could not be created.
223 If ``skipExisting`` is set, any jobs that already exist on disk
224 are also omitted.
226 Notes
227 -----
228 Some objects may be persisted, or incorrectly persisted, in the event
229 of an exception.
230 """
231 jobs = []
232 index = 0
233 for dataref in datarefs:
234 jobFile = self._getJobFilePath(index, dataref.dataId)
235 if not (skipExisting and os.path.isfile(jobFile)):
236 job = Job.load_metrics_package()
237 try:
238 self.metadataAdder.run(job, dataref=dataref)
239 if customMetadata:
240 job.meta.update(customMetadata)
242 for task in self.measurers:
243 self._computeSingleMeasurement(job, task, dataref)
244 finally:
245 self.log.info("Persisting metrics to %s...", jobFile)
246 # This call order maximizes the chance that job gets
247 # written, and to a unique file
248 index += 1
249 job.write(jobFile)
250 jobs.append(job)
251 else:
252 self.log.debug("File %s already exists; skipping.", jobFile)
254 return Struct(jobs=jobs)
256 def _getJobFilePath(self, index, dataId):
257 """Generate an output file for a Job.
259 Parameters
260 ----------
261 index : `int`
262 A unique integer across all Jobs created by this task.
263 dataId : `lsst.daf.persistence.DataId`
264 The identifier of all metrics in the Job to be persisted.
265 """
266 # Construct a relatively OS-friendly string (i.e., no quotes or {})
267 idString = "_".join("%s%s" % (key, dataId[key]) for key in dataId)
268 return self.config.jobFileTemplate.format(id=index, dataId=idString)