22 from collections
import namedtuple
26 from .catalogCalculation
import (CatalogCalculationPluginConfig,
27 CatalogCalculationPlugin,
28 CatalogCalculationConfig,
29 CatalogCalculationTask,
31 from .pluginsBase
import BasePlugin
32 from .pluginRegistry
import (PluginRegistry, PluginMap)
36 pd.options.mode.chained_assignment =
'raise'
38 __all__ = (
"DiaObjectCalculationPlugin",
"DiaObjectCalculationPluginConfig",
39 "DiaObjectCalculationTask",
"DiaObjectCalculationConfig")
43 """Default configuration class for DIA catalog calculation plugins.
49 """Base class for DIA catalog calculation plugins.
51 Task follows CatalogCalculationPlugin with modifications for use in AP.
55 config : `DiaObjectCalculationPlugin.ConfigClass`
58 The string the plugin was registered with.
59 metadata : `lsst.daf.base.PropertySet`
60 Plugin metadata that will be attached to the output catalog
63 ConfigClass = DiaObjectCalculationPluginConfig
66 """List of available plugins (`lsst.meas.base.PluginRegistry`).
69 FLUX_MOMENTS_CALCULATED = 5.0
70 """Add order after flux means and stds are calculated.
74 """Does the plugin operate on a single source or the whole catalog (`str`)?
75 If the plugin operates on a single source at a time, this should be set to
76 ``"single"``; if it expects the whoe catalog, to ``"multi"``. If the
77 plugin is of type ``"multi"``, the `fail` method must be implemented to
78 accept the whole catalog. If the plugin is of type ``"single"``, `fail`
79 should accept a single source record.
83 """DiaObject column names required by the plugin in order to run and
84 complete its calculation. DiaCalculationTask should raise an error is a
85 plugin is instantiated without the needed column available. Input columns
86 should be defined in the DPDD/cat/Apdb schema. Filter dependent columns
87 should be specified without the filter name perpended to them. eg
88 ``PSFluxMean`` instead of ``uPSFluxMean``.
91 """DiaObject column names output by the plugin. DiaCalculationTask should
92 raise an error if another pluging is run output to the same column.
93 Output columns should be defined in the DPDD/cat/Apdb schema. Filter
94 dependent columns should be specified without the filter name perpended to
95 them. eg ``PSFluxMean`` instead of ``uPSFluxMean``.
99 """This plugin requires a filter to be specified. Plugin's using filter
100 names usually deal with fluxes and only a sub-set of the DiaSource
101 catalog. Plugins that to not use the filter name usually run over a value
102 common across all observations/detections such as position.
106 BasePlugin.__init__(self, config, name)
111 filterDiaFluxes=None,
114 """Perform the calculation specified by this plugin.
116 This method can either be used to operate on a single catalog record
117 or a whole catalog, populating it with the output defined by this
120 Note that results may be added to catalog records as new columns, or
121 may result in changes to existing values.
126 Summary object to store values in.
127 diaSources : `pandas.DataFrame`
128 DataFrame representing all diaSources associated with this
130 filterDiaFluxes : `pandas.DataFrame`
131 DataFrame representing diaSources associated with this
132 diaObject that are observed in the band pass ``filterName``.
134 Simple name of the filter for the flux being calculated.
136 Any additional keyword arguments that may be passed to the plugin.
138 raise NotImplementedError()
140 def fail(self, diaObject, columns, error=None):
141 """Set diaObject position values to nan.
146 Summary object to store values in.
147 columns : `list` of `str`
148 List of string names of columns to write a the failed value.
149 error : `BaseException` or `None`
150 Error to pass. Kept for consistency with CatologCalculationPlugin.
153 for colName
in columns:
154 diaObject[colName] = np.nan
158 """Config class for the catalog calculation driver task.
160 Specifies which plugins will execute when the `CatalogCalculationTask`
161 associated with this configuration is run.
164 plugins = DiaObjectCalculationPlugin.registry.makeField(
166 default=[
"ap_meanPosition",
168 doc=
"Plugins to be run and their configuration")
172 """Run plugins which operate on a catalog of DIA sources.
174 This task facilitates running plugins which will operate on a source
175 catalog. These plugins may do things such as classifying an object based
176 on source record entries inserted during a measurement task.
178 This task differs from CatalogCaculationTask in the following ways:
180 -No multi mode is available for plugins. All plugins are assumed to run
183 -Input and output catalog types are assumed to be `pandas.DataFrames` with
184 columns following those used in the Apdb.
186 -No schema argument is passed to the plugins. Each plugin specifies
187 output columns and required inputs.
191 plugMetaData : `lsst.daf.base.PropertyList` or `None`
192 Will be modified in-place to contain metadata about the plugins being
193 run. If `None`, an empty `~lsst.daf.base.PropertyList` will be
196 Additional arguments passed to the superclass constructor.
200 Plugins may either take an entire catalog to work on at a time, or work on
203 ConfigClass = DiaObjectCalculationConfig
204 _DefaultName =
"diaObjectCalculation"
207 lsst.pipe.base.Task.__init__(self, **kwargs)
208 if plugMetadata
is None:
217 """Initialize the plugins according to the configuration.
220 pluginType = namedtuple(
'pluginType',
'single multi')
227 for executionOrder, name, config, PluginClass
in sorted(self.config.plugins.apply()):
230 if PluginClass.getExecutionOrder() >= BasePlugin.DEFAULT_CATALOGCALCULATION:
236 if plug.plugType ==
'single':
238 elif plug.plugType ==
'multi':
241 errorTuple = (PluginClass, PluginClass.getExecutionOrder(),
242 BasePlugin.DEFAULT_CATALOGCALCULATION)
243 raise ValueError(
"{} has an execution order less than the minimum for an catalogCalculation "
244 "plugin. Value {} : Minimum {}".format(*errorTuple))
246 def _validatePluginCols(self, plug):
247 """Assert that output columns are not duplicated and input columns
248 exist for dependent plugins.
252 plug : `lsst.ap.association.DiaCalculationPlugin`
253 Plugin to test for output collisions and input needs.
255 for inputName
in plug.inputCols:
256 if inputName
not in self.
outputColsoutputCols:
257 errorTuple = (plug.name, plug.getExecutionOrder(),
260 "Plugin, {} with execution order {} requires DiaObject "
261 "column {} to exist. Check the execution order of the "
262 "plugin and make sure it runs after a plugin creating "
263 "the column is run.".format(*errorTuple))
264 for outputName
in plug.outputCols:
266 errorTuple = (plug.name, plug.getExecutionOrder(),
269 "Plugin, {} with execution order {} is attempting to "
270 "output a column {}, however the column is already being "
271 "produced by another plugin. Check other plugins for "
272 "collisions with this one.".format(*errorTuple))
276 @lsst.pipe.base.timeMethod
282 """The entry point for the DIA catalog calculation task.
284 Run method both updates the values in the diaObjectCat and appends
285 newly created DiaObjects to the catalog. For catalog column names
286 see the lsst.cat schema definitions for the DiaObject and DiaSource
287 tables (http://github.com/lsst/cat).
291 diaObjectCat : `pandas.DataFrame`
292 DiaObjects to update values of and append new objects to. DataFrame
293 should be indexed on "diaObjectId"
294 diaSourceCat : `pandas.DataFrame`
295 DiaSources associated with the DiaObjects in diaObjectCat.
296 DataFrame should be indexed on
297 `["diaObjectId", "filterName", "diaSourceId"]`
298 updatedDiaObjectIds : `numpy.ndarray`
299 Integer ids of the DiaObjects to update and create.
300 filterNames : `list` of `str`
301 List of string names of filters to be being processed.
305 returnStruct : `lsst.pipe.base.Struct`
309 Full set of DiaObjects including both un-updated and
310 updated/new DiaObjects (`pandas.DataFrame`).
311 ``updatedDiaObjects``
312 Catalog of DiaObjects that were updated or created by this
313 task (`pandas.DataFrame`).
315 if diaObjectCat.index.name
is None:
316 diaObjectCat.set_index(
"diaObjectId", inplace=
True, drop=
False)
317 elif diaObjectCat.index.name !=
"diaObjectId":
319 "Input diaObjectCat is indexed on column(s) incompatible with "
320 "this task. Should be indexed on 'diaObjectId'. Trying to set "
322 diaObjectCat.set_index(
"diaObjectId", inplace=
True, drop=
False)
326 if diaSourceCat.index.names[0]
is None:
327 diaSourceCat.set_index(
328 [
"diaObjectId",
"filterName",
"diaSourceId"],
331 elif (diaSourceCat.index.names
332 != [
"diaObjectId",
"filterName",
"diaSourceId"]):
334 "Input diaSourceCat is indexed on column(s) incompatible with "
335 "this task. Should be indexed on 'multi-index, "
336 "['diaObjectId', 'filterName', 'diaSourceId']. Trying to set "
338 diaSourceCat.set_index(
339 [
"diaObjectId",
"filterName",
"diaSourceId"],
348 @lsst.pipe.base.timeMethod
354 """Run each of the plugins on the catalog.
356 For catalog column names see the lsst.cat schema definitions for the
357 DiaObject and DiaSource tables (http://github.com/lsst/cat).
361 diaObjectCat : `pandas.DataFrame`
362 DiaObjects to update values of and append new objects to. DataFrame
363 should be indexed on "diaObjectId"
364 diaSourceCat : `pandas.DataFrame`
365 DiaSources associated with the DiaObjects in diaObjectCat.
366 DataFrame must be indexed on
367 ["diaObjectId", "filterName", "diaSourceId"]`
368 updatedDiaObjectIds : `numpy.ndarray`
369 Integer ids of the DiaObjects to update and create.
370 filterNames : `list` of `str`
371 List of string names of filters to be being processed.
375 returnStruct : `lsst.pipe.base.Struct`
379 Full set of DiaObjects including both un-updated and
380 updated/new DiaObjects (`pandas.DataFrame`).
381 ``updatedDiaObjects``
382 Catalog of DiaObjects that were updated or created by this
383 task (`pandas.DataFrame`).
388 Raises if `pandas.DataFrame` indexing is not properly set.
391 diaObjectsToUpdate = diaObjectCat.loc[updatedDiaObjectIds, :]
392 self.log.info(
"Calculating summary stats for %i DiaObjects" %
393 len(diaObjectsToUpdate))
395 updatingDiaSources = diaSourceCat.loc[updatedDiaObjectIds, :]
396 diaSourcesGB = updatingDiaSources.groupby(level=0)
401 for updatedDiaObjectId
in updatedDiaObjectIds:
404 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId]
407 with CCContext(plug, updatedDiaObjectId, self.log):
411 plug.calculate(diaObjects=diaObjectsToUpdate,
412 diaObjectId=updatedDiaObjectId,
413 diaSources=objDiaSources,
414 filterDiaSources=
None,
419 with CCContext(plug, diaObjectsToUpdate, self.log):
420 plug.calculate(diaObjects=diaObjectsToUpdate,
421 diaSources=diaSourcesGB,
422 filterDiaSources=
None,
425 for filterName
in filterNames:
427 updatingFilterDiaSources = updatingDiaSources.loc[
428 (slice(
None), filterName), :
431 self.log.warn(f
"No DiaSource data with fitler={filterName}. "
435 filterDiaSourcesGB = updatingFilterDiaSources.groupby(level=0)
439 if not plug.needsFilter:
441 for updatedDiaObjectId
in updatedDiaObjectIds:
444 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId]
448 filterObjDiaSources = objDiaSources.loc[filterName]
451 "DiaObjectId={updatedDiaObjectId} has no "
452 "DiaSources for filter={filterName}. "
454 with CCContext(plug, updatedDiaObjectId, self.log):
458 plug.calculate(diaObjects=diaObjectsToUpdate,
459 diaObjectId=updatedDiaObjectId,
460 diaSources=objDiaSources,
461 filterDiaSources=filterObjDiaSources,
462 filterName=filterName)
464 if not plug.needsFilter:
466 with CCContext(plug, diaObjectsToUpdate, self.log):
467 plug.calculate(diaObjects=diaObjectsToUpdate,
468 diaSources=diaSourcesGB,
469 filterDiaSources=filterDiaSourcesGB,
470 filterName=filterName)
474 diaObjectCat.loc[updatedDiaObjectIds, :] = diaObjectsToUpdate
475 return lsst.pipe.base.Struct(
476 diaObjectCat=diaObjectCat,
477 updatedDiaObjects=diaObjectsToUpdate)
479 def _initialize_dia_object(self, objId):
480 """Create a new DiaObject with values required to be initialized by the
486 ``diaObjectId`` value for the of the new DiaObject.
491 Newly created DiaObject with keys:
494 Unique DiaObjectId (`int`).
496 Number of data points used for parallax calculation (`int`).
498 Id of the a nearbyObject in the Object table (`int`).
500 Id of the a nearbyObject in the Object table (`int`).
502 Id of the a nearbyObject in the Object table (`int`).
504 Number of data points used to calculate point source flux
505 summary statistics in each bandpass (`int`).
507 new_dia_object = {
"diaObjectId": objId,
508 "pmParallaxNdata": 0,
512 for f
in [
"u",
"g",
"r",
"i",
"z",
"y"]:
513 new_dia_object[
"%sPSFluxNdata" % f] = 0
514 return new_dia_object
def callCompute(self, catalog)
def initializePlugins(self)
def __init__(self, config, name, metadata)
def calculate(self, diaObject, diaSources, filterDiaFluxes=None, filterName=None, **kwargs)
def fail(self, diaObject, columns, error=None)
def __init__(self, plugMetadata=None, **kwargs)
def _validatePluginCols(self, plug)
def run(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterNames)
def initializePlugins(self)
def callCompute(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterNames)