Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from collections import namedtuple 

23import numpy as np 

24import pandas as pd 

25 

26from lsst.meas.base import ( 

27 BasePlugin, 

28 CatalogCalculationPluginConfig, 

29 CatalogCalculationPlugin, 

30 CatalogCalculationConfig, 

31 CatalogCalculationTask, 

32 PluginRegistry, 

33 PluginMap) 

34from lsst.meas.base.catalogCalculation import CCContext 

35import lsst.pipe.base 

36 

37# Enforce an error for unsafe column/array value setting in pandas. 

38pd.options.mode.chained_assignment = 'raise' 

39 

40__all__ = ("DiaObjectCalculationPlugin", "DiaObjectCalculationPluginConfig", 

41 "DiaObjectCalculationTask", "DiaObjectCalculationConfig") 

42 

43 

44class DiaObjectCalculationPluginConfig(CatalogCalculationPluginConfig): 

45 """Default configuration class for DIA catalog calculation plugins. 

46 """ 

47 pass 

48 

49 

50class DiaObjectCalculationPlugin(CatalogCalculationPlugin): 

51 """Base class for DIA catalog calculation plugins. 

52 

53 Task follows CatalogCalculationPlugin with modifications for use in AP. 

54 

55 Parameters 

56 ---------- 

57 config : `DiaObjectCalculationPlugin.ConfigClass` 

58 Plugin configuration. 

59 name : `str` 

60 The string the plugin was registered with. 

61 metadata : `lsst.daf.base.PropertySet` 

62 Plugin metadata that will be attached to the output catalog 

63 """ 

64 

65 ConfigClass = DiaObjectCalculationPluginConfig 

66 

67 registry = PluginRegistry(DiaObjectCalculationPluginConfig) 

68 """List of available plugins (`lsst.meas.base.PluginRegistry`). 

69 """ 

70 

71 FLUX_MOMENTS_CALCULATED = 5.0 

72 """Add order after flux means and stds are calculated. 

73 """ 

74 

75 plugType = 'single' 

76 """Does the plugin operate on a single source or the whole catalog (`str`)? 

77 If the plugin operates on a single source at a time, this should be set to 

78 ``"single"``; if it expects the whoe catalog, to ``"multi"``. If the 

79 plugin is of type ``"multi"``, the `fail` method must be implemented to 

80 accept the whole catalog. If the plugin is of type ``"single"``, `fail` 

81 should accept a single source record. 

82 """ 

83 

84 inputCols = [] 

85 """DiaObject column names required by the plugin in order to run and 

86 complete its calculation. DiaCalculationTask should raise an error is a 

87 plugin is instantiated without the needed column available. Input columns 

88 should be defined in the DPDD/cat/Apdb schema. Filter dependent columns 

89 should be specified without the filter name perpended to them. eg 

90 ``PSFluxMean`` instead of ``uPSFluxMean``. 

91 """ 

92 outputCols = [] 

93 """DiaObject column names output by the plugin. DiaCalculationTask should 

94 raise an error if another pluging is run output to the same column. 

95 Output columns should be defined in the DPDD/cat/Apdb schema. Filter 

96 dependent columns should be specified without the filter name perpended to 

97 them. eg ``PSFluxMean`` instead of ``uPSFluxMean``. 

98 """ 

99 

100 def __init__(self, config, name, metadata): 

101 BasePlugin.__init__(self, config, name) 

102 

103 def calculate(self, 

104 diaObject, 

105 diaSources, 

106 filterDiaFluxes=None, 

107 filterName=None, 

108 **kwargs): 

109 """Perform the calculation specified by this plugin. 

110 

111 This method can either be used to operate on a single catalog record 

112 or a whole catalog, populating it with the output defined by this 

113 plugin. 

114 

115 Note that results may be added to catalog records as new columns, or 

116 may result in changes to existing values. 

117 

118 Parameters 

119 ---------- 

120 diaObject : `dict` 

121 Summary object to store values in. 

122 diaSources : `pandas.DataFrame` 

123 DataFrame representing all diaSources associated with this 

124 diaObject. 

125 filterDiaFluxes : `pandas.DataFrame` 

126 DataFrame representing diaSources associated with this 

127 diaObject that are observed in the band pass ``filterName``. 

128 filterName : `str` 

129 Simple name of the filter for the flux being calculated. 

130 **kwargs 

131 Any additional keyword arguments that may be passed to the plugin. 

132 """ 

133 raise NotImplementedError() 

134 

135 def fail(self, diaObject, columns, error=None): 

136 """Set diaObject position values to nan. 

137 

138 Parameters 

139 ---------- 

140 diaObject : `dict` 

141 Summary object to store values in. 

142 columns : `list` of `str` 

143 List of string names of columns to write a the failed value. 

144 error : `BaseException` or `None` 

145 Error to pass. Kept for consistency with CatologCalculationPlugin. 

146 Unused. 

147 """ 

148 for colName in columns: 

149 diaObject[colName] = np.nan 

150 

151 

152class DiaObjectCalculationConfig(CatalogCalculationConfig): 

153 """Config class for the catalog calculation driver task. 

154 

155 Specifies which plugins will execute when the `CatalogCalculationTask` 

156 associated with this configuration is run. 

157 """ 

158 

159 plugins = DiaObjectCalculationPlugin.registry.makeField( 

160 multi=True, 

161 default=["ap_meanPosition", 

162 "ap_meanFlux"], 

163 doc="Plugins to be run and their configuration") 

164 

165 

166class DiaObjectCalculationTask(CatalogCalculationTask): 

167 """Run plugins which operate on a catalog of DIA sources. 

168 

169 This task facilitates running plugins which will operate on a source 

170 catalog. These plugins may do things such as classifying an object based 

171 on source record entries inserted during a measurement task. 

172 

173 This task differs from CatalogCaculationTask in the following ways: 

174 

175 -No multi mode is available for plugins. All plugins are assumed to run 

176 in single mode. 

177 

178 -Input and output catalog types are assumed to be `pandas.DataFrames` with 

179 columns following those used in the Apdb. 

180 

181 -No schema argument is passed to the plugins. Each plugin specifies 

182 output columns and required inputs. 

183 

184 Parameters 

185 ---------- 

186 plugMetaData : `lsst.daf.base.PropertyList` or `None` 

187 Will be modified in-place to contain metadata about the plugins being 

188 run. If `None`, an empty `~lsst.daf.base.PropertyList` will be 

189 created. 

190 **kwargs 

191 Additional arguments passed to the superclass constructor. 

192 

193 Notes 

194 ----- 

195 Plugins may either take an entire catalog to work on at a time, or work on 

196 individual records. 

197 """ 

198 ConfigClass = DiaObjectCalculationConfig 

199 _DefaultName = "diaObjectCalculation" 

200 

201 def __init__(self, plugMetadata=None, **kwargs): 

202 lsst.pipe.base.Task.__init__(self, **kwargs) 

203 if plugMetadata is None: 

204 plugMetadata = lsst.daf.base.PropertyList() 

205 self.plugMetadata = plugMetadata 

206 self.plugins = PluginMap() 

207 self.outputCols = [] 

208 

209 self.initializePlugins() 

210 

211 def initializePlugins(self): 

212 """Initialize the plugins according to the configuration. 

213 """ 

214 

215 pluginType = namedtuple('pluginType', 'single multi') 

216 self.executionDict = {} 

217 # Read the properties for each plugin. Allocate a dictionary entry for 

218 # each run level. Verify that the plugins are above the minimum run 

219 # level for an catalogCalculation plugin. For each run level, the 

220 # plugins are sorted into either single record, or multi record groups 

221 # to later be run appropriately 

222 for executionOrder, name, config, PluginClass in sorted(self.config.plugins.apply()): 

223 if executionOrder not in self.executionDict: 

224 self.executionDict[executionOrder] = pluginType(single=[], multi=[]) 

225 if PluginClass.getExecutionOrder() >= BasePlugin.DEFAULT_CATALOGCALCULATION: 

226 plug = PluginClass(config, name, metadata=self.plugMetadata) 

227 

228 self._validatePluginCols(plug) 

229 

230 self.plugins[name] = plug 

231 if plug.plugType == 'single': 

232 self.executionDict[executionOrder].single.append(plug) 

233 elif plug.plugType == 'multi': 

234 self.executionDict[executionOrder].multi.append(plug) 

235 else: 

236 errorTuple = (PluginClass, PluginClass.getExecutionOrder(), 

237 BasePlugin.DEFAULT_CATALOGCALCULATION) 

238 raise ValueError("{} has an execution order less than the minimum for an catalogCalculation " 

239 "plugin. Value {} : Minimum {}".format(*errorTuple)) 

240 

241 def _validatePluginCols(self, plug): 

242 """Assert that output columns are not duplicated and input columns 

243 exist for dependent plugins. 

244 

245 Parameters 

246 ---------- 

247 plug : `lsst.ap.association.DiaCalculationPlugin` 

248 Plugin to test for output collisions and input needs. 

249 """ 

250 for inputName in plug.inputCols: 

251 if inputName not in self.outputCols: 

252 errorTuple = (plug.name, plug.getExecutionOrder(), 

253 inputName) 

254 raise ValueError( 

255 "Plugin, {} with execution order {} requires DiaObject " 

256 "column {} to exist. Check the execution order of the " 

257 "plugin and make sure it runs after a plugin creating " 

258 "the column is run.".format(*errorTuple)) 

259 for outputName in plug.outputCols: 

260 if outputName in self.outputCols: 

261 errorTuple = (plug.name, plug.getExecutionOrder(), 

262 outputName) 

263 raise ValueError( 

264 "Plugin, {} with execution order {} is attempting to " 

265 "output a column {}, however the column is already being " 

266 "produced by another plugin. Check other plugins for " 

267 "collisions with this one.".format(*errorTuple)) 

268 else: 

269 self.outputCols.append(outputName) 

270 

271 @lsst.pipe.base.timeMethod 

272 def run(self, diaObjectCat, diaSourceCat, updatedDiaObjectIds, filterName): 

273 """The entry point for the DIA catalog calculation task. 

274 

275 Run method both updates the values in the diaObjectCat and appends 

276 newly created DiaObjects to the catalog. For catalog column names 

277 see the lsst.cat schema definitions for the DiaObject and DiaSource 

278 tables (http://github.com/lsst/cat). 

279 

280 Parameters 

281 ---------- 

282 diaObjectCat : `pandas.DataFrame` 

283 DiaObjects to update values of and append new objects to. DataFrame 

284 should be indexed on "diaObjectId" 

285 diaSourceCat : `pandas.DataFrame` 

286 DiaSources associated with the DiaObjects in diaObjectCat. 

287 DataFrame should be indexed on 

288 `["diaObjectId", "filterName", "diaSourceId"]` 

289 updatedDiaObjectIds : `numpy.ndarray` 

290 Integer ids of the DiaObjects to update and create. 

291 filterName : `str` 

292 String name of the filter being processed. 

293 

294 Returns 

295 ------- 

296 returnStruct : `lsst.pipe.base.Struct` 

297 Struct containing: 

298 

299 ``diaObjectCat`` 

300 Full set of DiaObjects including both un-updated and 

301 updated/new DiaObjects (`pandas.DataFrame`). 

302 ``updatedDiaObjects`` 

303 Catalog of DiaObjects that were updated or created by this 

304 task (`pandas.DataFrame`). 

305 """ 

306 if diaObjectCat.index.name is None: 

307 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False) 

308 elif diaObjectCat.index.name != "diaObjectId": 

309 self.log.warn( 

310 "Input diaObjectCat is indexed on column(s) incompatible with " 

311 "this task. Should be indexed on 'diaObjectId'. Trying to set " 

312 "index regardless") 

313 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False) 

314 

315 # ``names`` by default is FrozenList([None]) hence we access the first 

316 # element and test for None. 

317 if diaSourceCat.index.names[0] is None: 

318 diaSourceCat.set_index( 

319 ["diaObjectId", "filterName", "diaSourceId"], 

320 inplace=True, 

321 drop=False) 

322 elif (diaSourceCat.index.names 

323 != ["diaObjectId", "filterName", "diaSourceId"]): 

324 self.log.warn( 

325 "Input diaSourceCat is indexed on column(s) incompatible with " 

326 "this task. Should be indexed on 'multi-index, " 

327 "['diaObjectId', 'filterName', 'diaSourceId']. Trying to set " 

328 "index regardless.") 

329 diaSourceCat.set_index( 

330 ["diaObjectId", "filterName", "diaSourceId"], 

331 inplace=True, 

332 drop=False) 

333 

334 return self.callCompute(diaObjectCat, 

335 diaSourceCat, 

336 updatedDiaObjectIds, 

337 filterName) 

338 

339 @lsst.pipe.base.timeMethod 

340 def callCompute(self, 

341 diaObjectCat, 

342 diaSourceCat, 

343 updatedDiaObjectIds, 

344 filterName): 

345 """Run each of the plugins on the catalog. 

346 

347 For catalog column names see the lsst.cat schema definitions for the 

348 DiaObject and DiaSource tables (http://github.com/lsst/cat). 

349 

350 Parameters 

351 ---------- 

352 diaObjectCat : `pandas.DataFrame` 

353 DiaObjects to update values of and append new objects to. DataFrame 

354 should be indexed on "diaObjectId" 

355 diaSourceCat : `pandas.DataFrame` 

356 DiaSources associated with the DiaObjects in diaObjectCat. 

357 DataFrame must be indexed on 

358 ["diaObjectId", "filterName", "diaSourceId"]` 

359 updatedDiaObjectIds : `numpy.ndarray` 

360 Integer ids of the DiaObjects to update and create. 

361 filterName : `str` 

362 String name of the filter being processed. 

363 

364 Returns 

365 ------- 

366 returnStruct : `lsst.pipe.base.Struct` 

367 Struct containing: 

368 

369 ``diaObjectCat`` 

370 Full set of DiaObjects including both un-updated and 

371 updated/new DiaObjects (`pandas.DataFrame`). 

372 ``updatedDiaObjects`` 

373 Catalog of DiaObjects that were updated or created by this 

374 task (`pandas.DataFrame`). 

375 

376 Raises 

377 ------ 

378 KeyError 

379 Raises if `pandas.DataFrame` indexing is not properly set. 

380 """ 

381 # DiaObjects will be updated in place. 

382 diaObjectsToUpdate = diaObjectCat.loc[updatedDiaObjectIds, :] 

383 

384 updatingDiaSources = diaSourceCat.loc[updatedDiaObjectIds, :] 

385 # Pandas does not convert NULL to `nan` values in custom select 

386 # statements, instead using None. We thus must replace to None with 

387 # `nan` manually. 

388 updatingDiaSources.replace(to_replace=[None], value=np.nan) 

389 updatingFilterDiaSources = updatingDiaSources.loc[ 

390 (slice(None), filterName), : 

391 ] 

392 

393 # Level=0 here groups by diaObjectId. 

394 diaSourcesGB = updatingDiaSources.groupby(level=0) 

395 filterDiaSourcesGB = updatingFilterDiaSources.groupby(level=0) 

396 

397 for runlevel in sorted(self.executionDict): 

398 for plug in self.executionDict[runlevel].single: 

399 for updatedDiaObjectId in updatedDiaObjectIds: 

400 

401 # Sub-select diaSources associated with this diaObject. 

402 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId] 

403 

404 # Sub-select on diaSources observed in the current filter. 

405 filterObjDiaSources = objDiaSources.loc[filterName] 

406 with CCContext(plug, updatedDiaObjectId, self.log): 

407 # We feed the catalog we need to update and the id 

408 # so as to get a few into the catalog and not a copy. 

409 # This updates the values in the catalog. 

410 plug.calculate(diaObjects=diaObjectsToUpdate, 

411 diaObjectId=updatedDiaObjectId, 

412 diaSources=objDiaSources, 

413 filterDiaSources=filterObjDiaSources, 

414 filterName=filterName) 

415 for plug in self.executionDict[runlevel].multi: 

416 with CCContext(plug, diaObjectsToUpdate, self.log): 

417 plug.calculate(diaObjects=diaObjectsToUpdate, 

418 diaSources=diaSourcesGB, 

419 filterDiaSources=filterDiaSourcesGB, 

420 filterName=filterName) 

421 # Need to store the newly updated diaObjects directly as the editing 

422 # a view into diaObjectsToUpdate does not update the values of 

423 # diaObjectCat. 

424 diaObjectCat.loc[updatedDiaObjectIds, :] = diaObjectsToUpdate 

425 return lsst.pipe.base.Struct( 

426 diaObjectCat=diaObjectCat, 

427 updatedDiaObjects=diaObjectsToUpdate) 

428 

429 def _initialize_dia_object(self, objId): 

430 """Create a new DiaObject with values required to be initialized by the 

431 Apdb. 

432 

433 Parameters 

434 ---------- 

435 objid : `int` 

436 ``diaObjectId`` value for the of the new DiaObject. 

437 

438 Returns 

439 ------- 

440 diaObject : `dict` 

441 Newly created DiaObject with keys: 

442 

443 ``diaObjectId`` 

444 Unique DiaObjectId (`int`). 

445 ``pmParallaxNdata`` 

446 Number of data points used for parallax calculation (`int`). 

447 ``nearbyObj1`` 

448 Id of the a nearbyObject in the Object table (`int`). 

449 ``nearbyObj2`` 

450 Id of the a nearbyObject in the Object table (`int`). 

451 ``nearbyObj3`` 

452 Id of the a nearbyObject in the Object table (`int`). 

453 ``?PSFluxData`` 

454 Number of data points used to calculate point source flux 

455 summary statistics in each bandpass (`int`). 

456 """ 

457 new_dia_object = {"diaObjectId": objId, 

458 "pmParallaxNdata": 0, 

459 "nearbyObj1": 0, 

460 "nearbyObj2": 0, 

461 "nearbyObj3": 0} 

462 for f in ["u", "g", "r", "i", "z", "y"]: 

463 new_dia_object["%sPSFluxNdata" % f] = 0 

464 return new_dia_object