Coverage for python/lsst/ap/association/diaCalculation.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from collections import namedtuple
23import numpy as np
24import pandas as pd
26from lsst.meas.base import (
27 BasePlugin,
28 CatalogCalculationPluginConfig,
29 CatalogCalculationPlugin,
30 CatalogCalculationConfig,
31 CatalogCalculationTask,
32 PluginRegistry,
33 PluginMap)
34from lsst.meas.base.catalogCalculation import CCContext
35import lsst.pipe.base
37# Enforce an error for unsafe column/array value setting in pandas.
38pd.options.mode.chained_assignment = 'raise'
40__all__ = ("DiaObjectCalculationPlugin", "DiaObjectCalculationPluginConfig",
41 "DiaObjectCalculationTask", "DiaObjectCalculationConfig")
44class DiaObjectCalculationPluginConfig(CatalogCalculationPluginConfig):
45 """Default configuration class for DIA catalog calculation plugins.
46 """
47 pass
50class DiaObjectCalculationPlugin(CatalogCalculationPlugin):
51 """Base class for DIA catalog calculation plugins.
53 Task follows CatalogCalculationPlugin with modifications for use in AP.
55 Parameters
56 ----------
57 config : `DiaObjectCalculationPlugin.ConfigClass`
58 Plugin configuration.
59 name : `str`
60 The string the plugin was registered with.
61 metadata : `lsst.daf.base.PropertySet`
62 Plugin metadata that will be attached to the output catalog
63 """
65 ConfigClass = DiaObjectCalculationPluginConfig
67 registry = PluginRegistry(DiaObjectCalculationPluginConfig)
68 """List of available plugins (`lsst.meas.base.PluginRegistry`).
69 """
71 FLUX_MOMENTS_CALCULATED = 5.0
72 """Add order after flux means and stds are calculated.
73 """
75 plugType = 'single'
76 """Does the plugin operate on a single source or the whole catalog (`str`)?
77 If the plugin operates on a single source at a time, this should be set to
78 ``"single"``; if it expects the whoe catalog, to ``"multi"``. If the
79 plugin is of type ``"multi"``, the `fail` method must be implemented to
80 accept the whole catalog. If the plugin is of type ``"single"``, `fail`
81 should accept a single source record.
82 """
84 inputCols = []
85 """DiaObject column names required by the plugin in order to run and
86 complete its calculation. DiaCalculationTask should raise an error is a
87 plugin is instantiated without the needed column available. Input columns
88 should be defined in the DPDD/cat/Apdb schema. Filter dependent columns
89 should be specified without the filter name perpended to them. eg
90 ``PSFluxMean`` instead of ``uPSFluxMean``.
91 """
92 outputCols = []
93 """DiaObject column names output by the plugin. DiaCalculationTask should
94 raise an error if another pluging is run output to the same column.
95 Output columns should be defined in the DPDD/cat/Apdb schema. Filter
96 dependent columns should be specified without the filter name perpended to
97 them. eg ``PSFluxMean`` instead of ``uPSFluxMean``.
98 """
100 def __init__(self, config, name, metadata):
101 BasePlugin.__init__(self, config, name)
103 def calculate(self,
104 diaObject,
105 diaSources,
106 filterDiaFluxes=None,
107 filterName=None,
108 **kwargs):
109 """Perform the calculation specified by this plugin.
111 This method can either be used to operate on a single catalog record
112 or a whole catalog, populating it with the output defined by this
113 plugin.
115 Note that results may be added to catalog records as new columns, or
116 may result in changes to existing values.
118 Parameters
119 ----------
120 diaObject : `dict`
121 Summary object to store values in.
122 diaSources : `pandas.DataFrame`
123 DataFrame representing all diaSources associated with this
124 diaObject.
125 filterDiaFluxes : `pandas.DataFrame`
126 DataFrame representing diaSources associated with this
127 diaObject that are observed in the band pass ``filterName``.
128 filterName : `str`
129 Simple name of the filter for the flux being calculated.
130 **kwargs
131 Any additional keyword arguments that may be passed to the plugin.
132 """
133 raise NotImplementedError()
135 def fail(self, diaObject, columns, error=None):
136 """Set diaObject position values to nan.
138 Parameters
139 ----------
140 diaObject : `dict`
141 Summary object to store values in.
142 columns : `list` of `str`
143 List of string names of columns to write a the failed value.
144 error : `BaseException` or `None`
145 Error to pass. Kept for consistency with CatologCalculationPlugin.
146 Unused.
147 """
148 for colName in columns:
149 diaObject[colName] = np.nan
152class DiaObjectCalculationConfig(CatalogCalculationConfig):
153 """Config class for the catalog calculation driver task.
155 Specifies which plugins will execute when the `CatalogCalculationTask`
156 associated with this configuration is run.
157 """
159 plugins = DiaObjectCalculationPlugin.registry.makeField(
160 multi=True,
161 default=["ap_meanPosition",
162 "ap_meanFlux"],
163 doc="Plugins to be run and their configuration")
166class DiaObjectCalculationTask(CatalogCalculationTask):
167 """Run plugins which operate on a catalog of DIA sources.
169 This task facilitates running plugins which will operate on a source
170 catalog. These plugins may do things such as classifying an object based
171 on source record entries inserted during a measurement task.
173 This task differs from CatalogCaculationTask in the following ways:
175 -No multi mode is available for plugins. All plugins are assumed to run
176 in single mode.
178 -Input and output catalog types are assumed to be `pandas.DataFrames` with
179 columns following those used in the Apdb.
181 -No schema argument is passed to the plugins. Each plugin specifies
182 output columns and required inputs.
184 Parameters
185 ----------
186 plugMetaData : `lsst.daf.base.PropertyList` or `None`
187 Will be modified in-place to contain metadata about the plugins being
188 run. If `None`, an empty `~lsst.daf.base.PropertyList` will be
189 created.
190 **kwargs
191 Additional arguments passed to the superclass constructor.
193 Notes
194 -----
195 Plugins may either take an entire catalog to work on at a time, or work on
196 individual records.
197 """
198 ConfigClass = DiaObjectCalculationConfig
199 _DefaultName = "diaObjectCalculation"
201 def __init__(self, plugMetadata=None, **kwargs):
202 lsst.pipe.base.Task.__init__(self, **kwargs)
203 if plugMetadata is None:
204 plugMetadata = lsst.daf.base.PropertyList()
205 self.plugMetadata = plugMetadata
206 self.plugins = PluginMap()
207 self.outputCols = []
209 self.initializePlugins()
211 def initializePlugins(self):
212 """Initialize the plugins according to the configuration.
213 """
215 pluginType = namedtuple('pluginType', 'single multi')
216 self.executionDict = {}
217 # Read the properties for each plugin. Allocate a dictionary entry for
218 # each run level. Verify that the plugins are above the minimum run
219 # level for an catalogCalculation plugin. For each run level, the
220 # plugins are sorted into either single record, or multi record groups
221 # to later be run appropriately
222 for executionOrder, name, config, PluginClass in sorted(self.config.plugins.apply()):
223 if executionOrder not in self.executionDict:
224 self.executionDict[executionOrder] = pluginType(single=[], multi=[])
225 if PluginClass.getExecutionOrder() >= BasePlugin.DEFAULT_CATALOGCALCULATION:
226 plug = PluginClass(config, name, metadata=self.plugMetadata)
228 self._validatePluginCols(plug)
230 self.plugins[name] = plug
231 if plug.plugType == 'single':
232 self.executionDict[executionOrder].single.append(plug)
233 elif plug.plugType == 'multi':
234 self.executionDict[executionOrder].multi.append(plug)
235 else:
236 errorTuple = (PluginClass, PluginClass.getExecutionOrder(),
237 BasePlugin.DEFAULT_CATALOGCALCULATION)
238 raise ValueError("{} has an execution order less than the minimum for an catalogCalculation "
239 "plugin. Value {} : Minimum {}".format(*errorTuple))
241 def _validatePluginCols(self, plug):
242 """Assert that output columns are not duplicated and input columns
243 exist for dependent plugins.
245 Parameters
246 ----------
247 plug : `lsst.ap.association.DiaCalculationPlugin`
248 Plugin to test for output collisions and input needs.
249 """
250 for inputName in plug.inputCols:
251 if inputName not in self.outputCols:
252 errorTuple = (plug.name, plug.getExecutionOrder(),
253 inputName)
254 raise ValueError(
255 "Plugin, {} with execution order {} requires DiaObject "
256 "column {} to exist. Check the execution order of the "
257 "plugin and make sure it runs after a plugin creating "
258 "the column is run.".format(*errorTuple))
259 for outputName in plug.outputCols:
260 if outputName in self.outputCols:
261 errorTuple = (plug.name, plug.getExecutionOrder(),
262 outputName)
263 raise ValueError(
264 "Plugin, {} with execution order {} is attempting to "
265 "output a column {}, however the column is already being "
266 "produced by another plugin. Check other plugins for "
267 "collisions with this one.".format(*errorTuple))
268 else:
269 self.outputCols.append(outputName)
271 @lsst.pipe.base.timeMethod
272 def run(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterName):
273 """The entry point for the DIA catalog calculation task.
275 Run method both updates the values in the diaObjectCat and appends
276 newly created DiaObjects to the catalog. For catalog column names
277 see the lsst.cat schema definitions for the DiaObject and DiaSource
278 tables (http://github.com/lsst/cat).
280 Parameters
281 ----------
282 diaObjectCat : `pandas.DataFrame`
283 DiaObjects to update values of and append new objects to. DataFrame
284 should be indexed on "diaObjectId"
285 diaSourceCat : `pandas.DataFrame`
286 DiaSources associated with the DiaObjects in diaObjectCat.
287 DataFrame should be indexed on
288 `["diaObjectId", "filterName", "diaSourceId"]`
289 updatedDiaObjectIds : `numpy.ndarray`
290 Integer ids of the DiaObjects to update and create.
291 filterName : `str`
292 String name of the filter being processed.
294 Returns
295 -------
296 returnStruct : `lsst.pipe.base.Struct`
297 Struct containing:
299 ``diaObjectCat``
300 Full set of DiaObjects including both un-updated and
301 updated/new DiaObjects (`pandas.DataFrame`).
302 ``updatedDiaObjects``
303 Catalog of DiaObjects that were updated or created by this
304 task (`pandas.DataFrame`).
305 """
306 if diaObjectCat.index.name is None:
307 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False)
308 elif diaObjectCat.index.name != "diaObjectId":
309 self.log.warn(
310 "Input diaObjectCat is indexed on column(s) incompatible with "
311 "this task. Should be indexed on 'diaObjectId'. Trying to set "
312 "index regardless")
313 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False)
315 # ``names`` by default is FrozenList([None]) hence we access the first
316 # element and test for None.
317 if diaSourceCat.index.names[0] is None:
318 diaSourceCat.set_index(
319 ["diaObjectId", "filterName", "diaSourceId"],
320 inplace=True,
321 drop=False)
322 elif (diaSourceCat.index.names
323 != ["diaObjectId", "filterName", "diaSourceId"]):
324 self.log.warn(
325 "Input diaSourceCat is indexed on column(s) incompatible with "
326 "this task. Should be indexed on 'multi-index, "
327 "['diaObjectId', 'filterName', 'diaSourceId']. Trying to set "
328 "index regardless.")
329 diaSourceCat.set_index(
330 ["diaObjectId", "filterName", "diaSourceId"],
331 inplace=True,
332 drop=False)
334 return self.callCompute(diaObjectCat,
335 diaSourceCat,
336 updatedDiaObjectIds,
337 filterName)
339 @lsst.pipe.base.timeMethod
340 def callCompute(self,
341 diaObjectCat,
342 diaSourceCat,
343 updatedDiaObjectIds,
344 filterName):
345 """Run each of the plugins on the catalog.
347 For catalog column names see the lsst.cat schema definitions for the
348 DiaObject and DiaSource tables (http://github.com/lsst/cat).
350 Parameters
351 ----------
352 diaObjectCat : `pandas.DataFrame`
353 DiaObjects to update values of and append new objects to. DataFrame
354 should be indexed on "diaObjectId"
355 diaSourceCat : `pandas.DataFrame`
356 DiaSources associated with the DiaObjects in diaObjectCat.
357 DataFrame must be indexed on
358 ["diaObjectId", "filterName", "diaSourceId"]`
359 updatedDiaObjectIds : `numpy.ndarray`
360 Integer ids of the DiaObjects to update and create.
361 filterName : `str`
362 String name of the filter being processed.
364 Returns
365 -------
366 returnStruct : `lsst.pipe.base.Struct`
367 Struct containing:
369 ``diaObjectCat``
370 Full set of DiaObjects including both un-updated and
371 updated/new DiaObjects (`pandas.DataFrame`).
372 ``updatedDiaObjects``
373 Catalog of DiaObjects that were updated or created by this
374 task (`pandas.DataFrame`).
376 Raises
377 ------
378 KeyError
379 Raises if `pandas.DataFrame` indexing is not properly set.
380 """
381 # DiaObjects will be updated in place.
382 diaObjectsToUpdate = diaObjectCat.loc[updatedDiaObjectIds, :]
384 updatingDiaSources = diaSourceCat.loc[updatedDiaObjectIds, :]
385 # Pandas does not convert NULL to `nan` values in custom select
386 # statements, instead using None. We thus must replace to None with
387 # `nan` manually.
388 updatingFilterDiaSources = updatingDiaSources.loc[
389 (slice(None), filterName), :
390 ]
392 # Level=0 here groups by diaObjectId.
393 diaSourcesGB = updatingDiaSources.groupby(level=0)
394 filterDiaSourcesGB = updatingFilterDiaSources.groupby(level=0)
396 for runlevel in sorted(self.executionDict):
397 for plug in self.executionDict[runlevel].single:
398 for updatedDiaObjectId in updatedDiaObjectIds:
400 # Sub-select diaSources associated with this diaObject.
401 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId]
403 # Sub-select on diaSources observed in the current filter.
404 filterObjDiaSources = objDiaSources.loc[filterName]
405 with CCContext(plug, updatedDiaObjectId, self.log):
406 # We feed the catalog we need to update and the id
407 # so as to get a few into the catalog and not a copy.
408 # This updates the values in the catalog.
409 plug.calculate(diaObjects=diaObjectsToUpdate,
410 diaObjectId=updatedDiaObjectId,
411 diaSources=objDiaSources,
412 filterDiaSources=filterObjDiaSources,
413 filterName=filterName)
414 for plug in self.executionDict[runlevel].multi:
415 with CCContext(plug, diaObjectsToUpdate, self.log):
416 plug.calculate(diaObjects=diaObjectsToUpdate,
417 diaSources=diaSourcesGB,
418 filterDiaSources=filterDiaSourcesGB,
419 filterName=filterName)
420 # Need to store the newly updated diaObjects directly as the editing
421 # a view into diaObjectsToUpdate does not update the values of
422 # diaObjectCat.
423 diaObjectCat.loc[updatedDiaObjectIds, :] = diaObjectsToUpdate
424 return lsst.pipe.base.Struct(
425 diaObjectCat=diaObjectCat,
426 updatedDiaObjects=diaObjectsToUpdate)
428 def _initialize_dia_object(self, objId):
429 """Create a new DiaObject with values required to be initialized by the
430 Apdb.
432 Parameters
433 ----------
434 objid : `int`
435 ``diaObjectId`` value for the of the new DiaObject.
437 Returns
438 -------
439 diaObject : `dict`
440 Newly created DiaObject with keys:
442 ``diaObjectId``
443 Unique DiaObjectId (`int`).
444 ``pmParallaxNdata``
445 Number of data points used for parallax calculation (`int`).
446 ``nearbyObj1``
447 Id of the a nearbyObject in the Object table (`int`).
448 ``nearbyObj2``
449 Id of the a nearbyObject in the Object table (`int`).
450 ``nearbyObj3``
451 Id of the a nearbyObject in the Object table (`int`).
452 ``?PSFluxData``
453 Number of data points used to calculate point source flux
454 summary statistics in each bandpass (`int`).
455 """
456 new_dia_object = {"diaObjectId": objId,
457 "pmParallaxNdata": 0,
458 "nearbyObj1": 0,
459 "nearbyObj2": 0,
460 "nearbyObj3": 0}
461 for f in ["u", "g", "r", "i", "z", "y"]:
462 new_dia_object["%sPSFluxNdata" % f] = 0
463 return new_dia_object