Coverage for python/lsst/verify/gen2tasks/metricsControllerTask.py: 23%
74 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 20:30 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 20:30 +0000
1# This file is part of verify.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ["MetricsControllerConfig", "MetricsControllerTask"]
24import os.path
26import lsst.pex.config as pexConfig
27import lsst.daf.persistence as dafPersist
28from lsst.pipe.base import Task, Struct
29from lsst.verify import Job
30from lsst.verify.tasks import MetricComputationError
31from .metadataTask import SquashMetadataTask
32from .metricRegistry import MetricRegistry
35def _flatten(nested):
36 """Flatten an iterable of possibly nested iterables.
38 Parameters
39 ----------
40 nested : iterable
41 An iterable that may contain a mix of scalars or other iterables.
43 Returns
44 -------
45 flat : sequence
46 A sequence where each iterable element of `nested` has been replaced
47 with its elements, in order, and so on recursively.
49 Examples
50 --------
51 >>> x = [42, [4, 3, 5]]
52 >>> _flatten(x)
53 [42, 4, 3, 5]
54 """
55 flat = []
56 for x in nested:
57 try:
58 iter(x)
59 flat.extend(_flatten(x))
60 except TypeError:
61 flat.append(x)
62 return flat
65class MetricsControllerConfig(pexConfig.Config):
66 """Configuration options for `MetricsControllerTask`.
67 """
68 jobFileTemplate = pexConfig.Field(
69 dtype=str,
70 doc="A template for the path to which the measurements are "
71 "written. {id} is replaced with a unique index (recommended), "
72 "while {dataId} is replaced with the data ID.",
73 default="metrics{id}.{dataId}.verify.json",
74 )
75 metadataAdder = pexConfig.ConfigurableField(
76 target=SquashMetadataTask,
77 doc="Task for adding metadata needed by measurement clients. "
78 "Its ``run`` method must take a `~lsst.verify.Job` as its first "
79 "parameter, and should accept unknown keyword arguments. It must "
80 "return a `~lsst.pipe.base.Struct` with the field ``job`` "
81 "pointing to the modified job.",
82 )
83 measurers = MetricRegistry.registry.makeField(
84 multi=True,
85 doc=r"`MetricTask`\ s to call and their configuration. Each "
86 "`MetricTask` must be identified by the name passed to its "
87 "`~lsst.verify.gen2tasks.register` or "
88 "`~lsst.verify.gen2tasks.registerMultiple` decorator.",
89 )
92class MetricsControllerTask(Task):
93 """A Task for executing a collection of
94 `lsst.verify.tasks.MetricTask` objects.
96 This class handles Butler input of datasets needed by metrics, as well as
97 persistence of the resulting measurements.
99 Notes
100 -----
101 ``MetricsControllerTask`` is a stand-in for functionality provided by the
102 Gen 3 Tasks framework. It will become redundant once we fully adopt
103 that framework.
105 Because ``MetricsControllerTask`` cannot support the full functionality of
106 the Gen 3 framework, it places several restrictions on its metrics:
108 * each ``MetricTask`` must measure a unique metric
109 * no ``MetricTask`` may depend on the output of another ``MetricTask``
110 * the granularity of the metrics is determined by the inputs to
111 ``runDataRefs``; configuration information specifying a different
112 granularity is allowed but is ignored
113 """
115 _DefaultName = "metricsController"
116 ConfigClass = MetricsControllerConfig
118 measurers = []
119 """The tasks to be executed by this object (iterable of
120 `lsst.verify.tasks.MetricTask`).
121 """
123 def __init__(self, config=None, **kwargs):
124 super().__init__(config=config, **kwargs)
125 self.makeSubtask("metadataAdder")
127 self.measurers = _flatten(self.config.measurers.apply())
129 def _computeSingleMeasurement(self, job, metricTask, dataref):
130 """Call a single metric task on a single dataref.
132 This method adds a single measurement to ``job``, as specified by
133 ``metricTask``.
135 Parameters
136 ----------
137 job : `lsst.verify.Job`
138 A Job object in which to store the new measurement. Must not
139 already contain a measurement for ``metricTask.config.metricName``.
140 metricTask : `lsst.verify.tasks.MetricTask`
141 The code for computing the measurement.
142 dataref : `lsst.daf.persistence.ButlerDataRef`
143 The repository and data ID to analyze. The data ID may be
144 incomplete, but must have the granularity of the desired metric.
146 Notes
147 -----
148 If measurement calculation fails, this method logs an error and leaves
149 ``job`` unchanged.
150 """
151 self.log.debug("Running %s on %r", type(metricTask), dataref)
152 inputTypes = metricTask.getInputDatasetTypes(metricTask.config)
153 inputScalars = metricTask.areInputDatasetsScalar(metricTask.config)
154 inputData = {}
155 inputDataIds = {}
156 for (param, dataType), scalar \
157 in zip(inputTypes.items(), inputScalars.values()):
158 inputRefs = dafPersist.searchDataRefs(
159 dataref.getButler(), dataType, dataId=dataref.dataId)
160 if scalar:
161 inputData[param] = inputRefs[0].get() if inputRefs else None
162 inputDataIds[param] = inputRefs[0].dataId if inputRefs else {}
163 else:
164 inputData[param] = [ref.get() for ref in inputRefs]
165 inputDataIds[param] = [ref.dataId for ref in inputRefs]
167 outputDataIds = {"measurement": dataref.dataId}
168 try:
169 result = metricTask.adaptArgsAndRun(inputData, inputDataIds,
170 outputDataIds)
171 value = result.measurement
172 if value is not None:
173 job.measurements.insert(value)
174 else:
175 self.log.debug(
176 "Skipping measurement of %r on %s as not applicable.",
177 metricTask, inputDataIds)
178 except MetricComputationError:
179 self.log.error("Measurement of %r failed on %s->%s",
180 metricTask, inputDataIds, outputDataIds,
181 exc_info=True)
183 def runDataRefs(self, datarefs, customMetadata=None, skipExisting=False):
184 """Call all registered metric tasks on each dataref.
186 This method loads all datasets required to compute a particular
187 metric, and persists the metrics as one or more `lsst.verify.Job`
188 objects. Only metrics that successfully produce a
189 `~lsst.verify.Measurement` will be included in a job.
191 Parameters
192 ----------
193 datarefs : `list` of `lsst.daf.persistence.ButlerDataRef`
194 The data to measure. Datarefs may be complete or partial; each
195 generates a measurement at the same granularity (e.g., a
196 dataref with only ``"visit"`` specified generates visit-level
197 measurements).
198 customMetadata : `dict`, optional
199 Any metadata that are needed for a specific pipeline, but that are
200 not needed by the ``lsst.verify`` framework or by general-purpose
201 measurement analysis code (these cases are handled by the
202 `~MetricsControllerConfig.metadataAdder` subtask). If omitted,
203 only generic metadata are added. Both keys and values must be valid
204 inputs to `~lsst.verify.Metadata`.
205 skipExisting : `bool`, optional
206 If this flag is set, MetricsControllerTask will skip computing
207 metrics for any data ID that already has an output job file on
208 disk. While this option is useful for restarting failed runs, it
209 does *not* check whether the file is valid.
211 Returns
212 -------
213 struct : `lsst.pipe.base.Struct`
214 A `~lsst.pipe.base.Struct` containing the following component:
216 - ``jobs`` : a list of collections of measurements (`list` of
217 `lsst.verify.Job`). Each job in the list contains the
218 measurement(s) for the corresponding dataref, and each job has
219 at most one measurement for each element in `self.measurers`. A
220 particular measurement is omitted if it could not be created.
221 If ``skipExisting`` is set, any jobs that already exist on disk
222 are also omitted.
224 Notes
225 -----
226 Some objects may be persisted, or incorrectly persisted, in the event
227 of an exception.
228 """
229 jobs = []
230 index = 0
231 for dataref in datarefs:
232 jobFile = self._getJobFilePath(index, dataref.dataId)
233 if not (skipExisting and os.path.isfile(jobFile)):
234 job = Job.load_metrics_package()
235 try:
236 self.metadataAdder.run(job, dataref=dataref)
237 if customMetadata:
238 job.meta.update(customMetadata)
240 for task in self.measurers:
241 self._computeSingleMeasurement(job, task, dataref)
242 finally:
243 self.log.info("Persisting metrics to %s...", jobFile)
244 # This call order maximizes the chance that job gets
245 # written, and to a unique file
246 index += 1
247 job.write(jobFile)
248 jobs.append(job)
249 else:
250 self.log.debug("File %s already exists; skipping.", jobFile)
252 return Struct(jobs=jobs)
254 def _getJobFilePath(self, index, dataId):
255 """Generate an output file for a Job.
257 Parameters
258 ----------
259 index : `int`
260 A unique integer across all Jobs created by this task.
261 dataId : `lsst.daf.persistence.DataId`
262 The identifier of all metrics in the Job to be persisted.
263 """
264 # Construct a relatively OS-friendly string (i.e., no quotes or {})
265 idString = "_".join("%s%s" % (key, dataId[key]) for key in dataId)
266 return self.config.jobFileTemplate.format(id=index, dataId=idString)