Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22from collections import namedtuple 

23import numpy as np 

24import pandas as pd 

25 

26from .catalogCalculation import (CatalogCalculationPluginConfig, 

27 CatalogCalculationPlugin, 

28 CatalogCalculationConfig, 

29 CatalogCalculationTask, 

30 CCContext) 

31from .pluginsBase import BasePlugin 

32from .pluginRegistry import (PluginRegistry, PluginMap) 

33import lsst.pipe.base 

34 

35# Enforce an error for unsafe column/array value setting in pandas. 

36pd.options.mode.chained_assignment = 'raise' 

37 

38__all__ = ("DiaObjectCalculationPlugin", "DiaObjectCalculationPluginConfig", 

39 "DiaObjectCalculationTask", "DiaObjectCalculationConfig") 

40 

41 

42class DiaObjectCalculationPluginConfig(CatalogCalculationPluginConfig): 

43 """Default configuration class for DIA catalog calculation plugins. 

44 """ 

45 pass 

46 

47 

48class DiaObjectCalculationPlugin(CatalogCalculationPlugin): 

49 """Base class for DIA catalog calculation plugins. 

50 

51 Task follows CatalogCalculationPlugin with modifications for use in AP. 

52 

53 Parameters 

54 ---------- 

55 config : `DiaObjectCalculationPlugin.ConfigClass` 

56 Plugin configuration. 

57 name : `str` 

58 The string the plugin was registered with. 

59 metadata : `lsst.daf.base.PropertySet` 

60 Plugin metadata that will be attached to the output catalog 

61 """ 

62 

63 ConfigClass = DiaObjectCalculationPluginConfig 

64 

65 registry = PluginRegistry(DiaObjectCalculationPluginConfig) 

66 """List of available plugins (`lsst.meas.base.PluginRegistry`). 

67 """ 

68 

69 FLUX_MOMENTS_CALCULATED = 5.0 

70 """Add order after flux means and stds are calculated. 

71 """ 

72 

73 plugType = 'single' 

74 """Does the plugin operate on a single source or the whole catalog (`str`)? 

75 If the plugin operates on a single source at a time, this should be set to 

76 ``"single"``; if it expects the whoe catalog, to ``"multi"``. If the 

77 plugin is of type ``"multi"``, the `fail` method must be implemented to 

78 accept the whole catalog. If the plugin is of type ``"single"``, `fail` 

79 should accept a single source record. 

80 """ 

81 

82 inputCols = [] 

83 """DiaObject column names required by the plugin in order to run and 

84 complete its calculation. DiaCalculationTask should raise an error is a 

85 plugin is instantiated without the needed column available. Input columns 

86 should be defined in the DPDD/cat/Apdb schema. Filter dependent columns 

87 should be specified without the filter name perpended to them. eg 

88 ``PSFluxMean`` instead of ``uPSFluxMean``. 

89 """ 

90 outputCols = [] 

91 """DiaObject column names output by the plugin. DiaCalculationTask should 

92 raise an error if another pluging is run output to the same column. 

93 Output columns should be defined in the DPDD/cat/Apdb schema. Filter 

94 dependent columns should be specified without the filter name perpended to 

95 them. eg ``PSFluxMean`` instead of ``uPSFluxMean``. 

96 """ 

97 

98 needsFilter = True 

99 """This plugin requires a filter to be specified. Plugin's using filter 

100 names usually deal with fluxes and only a sub-set of the DiaSource 

101 catalog. Plugins that to not use the filter name usually run over a value 

102 common across all observations/detections such as position. 

103 """ 

104 

105 def __init__(self, config, name, metadata): 

106 BasePlugin.__init__(self, config, name) 

107 

108 def calculate(self, 

109 diaObject, 

110 diaSources, 

111 filterDiaFluxes=None, 

112 filterName=None, 

113 **kwargs): 

114 """Perform the calculation specified by this plugin. 

115 

116 This method can either be used to operate on a single catalog record 

117 or a whole catalog, populating it with the output defined by this 

118 plugin. 

119 

120 Note that results may be added to catalog records as new columns, or 

121 may result in changes to existing values. 

122 

123 Parameters 

124 ---------- 

125 diaObject : `dict` 

126 Summary object to store values in. 

127 diaSources : `pandas.DataFrame` 

128 DataFrame representing all diaSources associated with this 

129 diaObject. 

130 filterDiaFluxes : `pandas.DataFrame` 

131 DataFrame representing diaSources associated with this 

132 diaObject that are observed in the band pass ``filterName``. 

133 filterName : `str` 

134 Simple name of the filter for the flux being calculated. 

135 **kwargs 

136 Any additional keyword arguments that may be passed to the plugin. 

137 """ 

138 raise NotImplementedError() 

139 

140 def fail(self, diaObject, columns, error=None): 

141 """Set diaObject position values to nan. 

142 

143 Parameters 

144 ---------- 

145 diaObject : `dict` 

146 Summary object to store values in. 

147 columns : `list` of `str` 

148 List of string names of columns to write a the failed value. 

149 error : `BaseException` or `None` 

150 Error to pass. Kept for consistency with CatologCalculationPlugin. 

151 Unused. 

152 """ 

153 for colName in columns: 

154 diaObject[colName] = np.nan 

155 

156 

157class DiaObjectCalculationConfig(CatalogCalculationConfig): 

158 """Config class for the catalog calculation driver task. 

159 

160 Specifies which plugins will execute when the `CatalogCalculationTask` 

161 associated with this configuration is run. 

162 """ 

163 

164 plugins = DiaObjectCalculationPlugin.registry.makeField( 

165 multi=True, 

166 default=["ap_meanPosition", 

167 "ap_meanFlux"], 

168 doc="Plugins to be run and their configuration") 

169 

170 

171class DiaObjectCalculationTask(CatalogCalculationTask): 

172 """Run plugins which operate on a catalog of DIA sources. 

173 

174 This task facilitates running plugins which will operate on a source 

175 catalog. These plugins may do things such as classifying an object based 

176 on source record entries inserted during a measurement task. 

177 

178 This task differs from CatalogCaculationTask in the following ways: 

179 

180 -No multi mode is available for plugins. All plugins are assumed to run 

181 in single mode. 

182 

183 -Input and output catalog types are assumed to be `pandas.DataFrames` with 

184 columns following those used in the Apdb. 

185 

186 -No schema argument is passed to the plugins. Each plugin specifies 

187 output columns and required inputs. 

188 

189 Parameters 

190 ---------- 

191 plugMetaData : `lsst.daf.base.PropertyList` or `None` 

192 Will be modified in-place to contain metadata about the plugins being 

193 run. If `None`, an empty `~lsst.daf.base.PropertyList` will be 

194 created. 

195 **kwargs 

196 Additional arguments passed to the superclass constructor. 

197 

198 Notes 

199 ----- 

200 Plugins may either take an entire catalog to work on at a time, or work on 

201 individual records. 

202 """ 

203 ConfigClass = DiaObjectCalculationConfig 

204 _DefaultName = "diaObjectCalculation" 

205 

206 def __init__(self, plugMetadata=None, **kwargs): 

207 lsst.pipe.base.Task.__init__(self, **kwargs) 

208 if plugMetadata is None: 

209 plugMetadata = lsst.daf.base.PropertyList() 

210 self.plugMetadata = plugMetadata 

211 self.plugins = PluginMap() 

212 self.outputCols = [] 

213 

214 self.initializePlugins() 

215 

216 def initializePlugins(self): 

217 """Initialize the plugins according to the configuration. 

218 """ 

219 

220 pluginType = namedtuple('pluginType', 'single multi') 

221 self.executionDict = {} 

222 # Read the properties for each plugin. Allocate a dictionary entry for 

223 # each run level. Verify that the plugins are above the minimum run 

224 # level for an catalogCalculation plugin. For each run level, the 

225 # plugins are sorted into either single record, or multi record groups 

226 # to later be run appropriately 

227 for executionOrder, name, config, PluginClass in sorted(self.config.plugins.apply()): 

228 if executionOrder not in self.executionDict: 

229 self.executionDict[executionOrder] = pluginType(single=[], multi=[]) 

230 if PluginClass.getExecutionOrder() >= BasePlugin.DEFAULT_CATALOGCALCULATION: 

231 plug = PluginClass(config, name, metadata=self.plugMetadata) 

232 

233 self._validatePluginCols(plug) 

234 

235 self.plugins[name] = plug 

236 if plug.plugType == 'single': 

237 self.executionDict[executionOrder].single.append(plug) 

238 elif plug.plugType == 'multi': 

239 self.executionDict[executionOrder].multi.append(plug) 

240 else: 

241 errorTuple = (PluginClass, PluginClass.getExecutionOrder(), 

242 BasePlugin.DEFAULT_CATALOGCALCULATION) 

243 raise ValueError("{} has an execution order less than the minimum for an catalogCalculation " 

244 "plugin. Value {} : Minimum {}".format(*errorTuple)) 

245 

246 def _validatePluginCols(self, plug): 

247 """Assert that output columns are not duplicated and input columns 

248 exist for dependent plugins. 

249 

250 Parameters 

251 ---------- 

252 plug : `lsst.ap.association.DiaCalculationPlugin` 

253 Plugin to test for output collisions and input needs. 

254 """ 

255 for inputName in plug.inputCols: 

256 if inputName not in self.outputCols: 

257 errorTuple = (plug.name, plug.getExecutionOrder(), 

258 inputName) 

259 raise ValueError( 

260 "Plugin, {} with execution order {} requires DiaObject " 

261 "column {} to exist. Check the execution order of the " 

262 "plugin and make sure it runs after a plugin creating " 

263 "the column is run.".format(*errorTuple)) 

264 for outputName in plug.outputCols: 

265 if outputName in self.outputCols: 

266 errorTuple = (plug.name, plug.getExecutionOrder(), 

267 outputName) 

268 raise ValueError( 

269 "Plugin, {} with execution order {} is attempting to " 

270 "output a column {}, however the column is already being " 

271 "produced by another plugin. Check other plugins for " 

272 "collisions with this one.".format(*errorTuple)) 

273 else: 

274 self.outputCols.append(outputName) 

275 

276 @lsst.pipe.base.timeMethod 

277 def run(self, 

278 diaObjectCat, 

279 diaSourceCat, 

280 updatedDiaObjectIds, 

281 filterNames): 

282 """The entry point for the DIA catalog calculation task. 

283 

284 Run method both updates the values in the diaObjectCat and appends 

285 newly created DiaObjects to the catalog. For catalog column names 

286 see the lsst.cat schema definitions for the DiaObject and DiaSource 

287 tables (http://github.com/lsst/cat). 

288 

289 Parameters 

290 ---------- 

291 diaObjectCat : `pandas.DataFrame` 

292 DiaObjects to update values of and append new objects to. DataFrame 

293 should be indexed on "diaObjectId" 

294 diaSourceCat : `pandas.DataFrame` 

295 DiaSources associated with the DiaObjects in diaObjectCat. 

296 DataFrame should be indexed on 

297 `["diaObjectId", "filterName", "diaSourceId"]` 

298 updatedDiaObjectIds : `numpy.ndarray` 

299 Integer ids of the DiaObjects to update and create. 

300 filterNames : `list` of `str` 

301 List of string names of filters to be being processed. 

302 

303 Returns 

304 ------- 

305 returnStruct : `lsst.pipe.base.Struct` 

306 Struct containing: 

307 

308 ``diaObjectCat`` 

309 Full set of DiaObjects including both un-updated and 

310 updated/new DiaObjects (`pandas.DataFrame`). 

311 ``updatedDiaObjects`` 

312 Catalog of DiaObjects that were updated or created by this 

313 task (`pandas.DataFrame`). 

314 """ 

315 if diaObjectCat.index.name is None: 

316 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False) 

317 elif diaObjectCat.index.name != "diaObjectId": 

318 self.log.warning( 

319 "Input diaObjectCat is indexed on column(s) incompatible with " 

320 "this task. Should be indexed on 'diaObjectId'. Trying to set " 

321 "index regardless") 

322 diaObjectCat.set_index("diaObjectId", inplace=True, drop=False) 

323 

324 # ``names`` by default is FrozenList([None]) hence we access the first 

325 # element and test for None. 

326 if diaSourceCat.index.names[0] is None: 

327 diaSourceCat.set_index( 

328 ["diaObjectId", "filterName", "diaSourceId"], 

329 inplace=True, 

330 drop=False) 

331 elif (diaSourceCat.index.names 

332 != ["diaObjectId", "filterName", "diaSourceId"]): 

333 self.log.warning( 

334 "Input diaSourceCat is indexed on column(s) incompatible with " 

335 "this task. Should be indexed on 'multi-index, " 

336 "['diaObjectId', 'filterName', 'diaSourceId']. Trying to set " 

337 "index regardless.") 

338 diaSourceCat.set_index( 

339 ["diaObjectId", "filterName", "diaSourceId"], 

340 inplace=True, 

341 drop=False) 

342 

343 return self.callCompute(diaObjectCat, 

344 diaSourceCat, 

345 updatedDiaObjectIds, 

346 filterNames) 

347 

348 @lsst.pipe.base.timeMethod 

349 def callCompute(self, 

350 diaObjectCat, 

351 diaSourceCat, 

352 updatedDiaObjectIds, 

353 filterNames): 

354 """Run each of the plugins on the catalog. 

355 

356 For catalog column names see the lsst.cat schema definitions for the 

357 DiaObject and DiaSource tables (http://github.com/lsst/cat). 

358 

359 Parameters 

360 ---------- 

361 diaObjectCat : `pandas.DataFrame` 

362 DiaObjects to update values of and append new objects to. DataFrame 

363 should be indexed on "diaObjectId" 

364 diaSourceCat : `pandas.DataFrame` 

365 DiaSources associated with the DiaObjects in diaObjectCat. 

366 DataFrame must be indexed on 

367 ["diaObjectId", "filterName", "diaSourceId"]` 

368 updatedDiaObjectIds : `numpy.ndarray` 

369 Integer ids of the DiaObjects to update and create. 

370 filterNames : `list` of `str` 

371 List of string names of filters to be being processed. 

372 

373 Returns 

374 ------- 

375 returnStruct : `lsst.pipe.base.Struct` 

376 Struct containing: 

377 

378 ``diaObjectCat`` 

379 Full set of DiaObjects including both un-updated and 

380 updated/new DiaObjects (`pandas.DataFrame`). 

381 ``updatedDiaObjects`` 

382 Catalog of DiaObjects that were updated or created by this 

383 task (`pandas.DataFrame`). 

384 

385 Raises 

386 ------ 

387 KeyError 

388 Raises if `pandas.DataFrame` indexing is not properly set. 

389 """ 

390 # DiaObjects will be updated in place. 

391 diaObjectsToUpdate = diaObjectCat.loc[updatedDiaObjectIds, :] 

392 self.log.info("Calculating summary stats for %i DiaObjects", 

393 len(diaObjectsToUpdate)) 

394 

395 updatingDiaSources = diaSourceCat.loc[updatedDiaObjectIds, :] 

396 diaSourcesGB = updatingDiaSources.groupby(level=0) 

397 for runlevel in sorted(self.executionDict): 

398 for plug in self.executionDict[runlevel].single: 

399 if plug.needsFilter: 

400 continue 

401 for updatedDiaObjectId in updatedDiaObjectIds: 

402 

403 # Sub-select diaSources associated with this diaObject. 

404 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId] 

405 

406 # Sub-select on diaSources observed in the current filter. 

407 with CCContext(plug, updatedDiaObjectId, self.log): 

408 # We feed the catalog we need to update and the id 

409 # so as to get a few into the catalog and not a copy. 

410 # This updates the values in the catalog. 

411 plug.calculate(diaObjects=diaObjectsToUpdate, 

412 diaObjectId=updatedDiaObjectId, 

413 diaSources=objDiaSources, 

414 filterDiaSources=None, 

415 filterName=None) 

416 for plug in self.executionDict[runlevel].multi: 

417 if plug.needsFilter: 

418 continue 

419 with CCContext(plug, diaObjectsToUpdate, self.log): 

420 plug.calculate(diaObjects=diaObjectsToUpdate, 

421 diaSources=diaSourcesGB, 

422 filterDiaSources=None, 

423 filterName=None) 

424 

425 for filterName in filterNames: 

426 try: 

427 updatingFilterDiaSources = updatingDiaSources.loc[ 

428 (slice(None), filterName), : 

429 ] 

430 except KeyError: 

431 self.log.warning("No DiaSource data with fitler=%s. " 

432 "Continuing...", filterName) 

433 continue 

434 # Level=0 here groups by diaObjectId. 

435 filterDiaSourcesGB = updatingFilterDiaSources.groupby(level=0) 

436 

437 for runlevel in sorted(self.executionDict): 

438 for plug in self.executionDict[runlevel].single: 

439 if not plug.needsFilter: 

440 continue 

441 for updatedDiaObjectId in updatedDiaObjectIds: 

442 

443 # Sub-select diaSources associated with this diaObject. 

444 objDiaSources = updatingDiaSources.loc[updatedDiaObjectId] 

445 

446 # Sub-select on diaSources observed in the current filter. 

447 try: 

448 filterObjDiaSources = objDiaSources.loc[filterName] 

449 except KeyError: 

450 self.log.warning( 

451 "DiaObjectId={updatedDiaObjectId} has no " 

452 "DiaSources for filter=%s. " 

453 "Continuing...", filterName) 

454 with CCContext(plug, updatedDiaObjectId, self.log): 

455 # We feed the catalog we need to update and the id 

456 # so as to get a few into the catalog and not a copy. 

457 # This updates the values in the catalog. 

458 plug.calculate(diaObjects=diaObjectsToUpdate, 

459 diaObjectId=updatedDiaObjectId, 

460 diaSources=objDiaSources, 

461 filterDiaSources=filterObjDiaSources, 

462 filterName=filterName) 

463 for plug in self.executionDict[runlevel].multi: 

464 if not plug.needsFilter: 

465 continue 

466 with CCContext(plug, diaObjectsToUpdate, self.log): 

467 plug.calculate(diaObjects=diaObjectsToUpdate, 

468 diaSources=diaSourcesGB, 

469 filterDiaSources=filterDiaSourcesGB, 

470 filterName=filterName) 

471 # Need to store the newly updated diaObjects directly as the editing 

472 # a view into diaObjectsToUpdate does not update the values of 

473 # diaObjectCat. 

474 diaObjectCat.loc[updatedDiaObjectIds, :] = diaObjectsToUpdate 

475 return lsst.pipe.base.Struct( 

476 diaObjectCat=diaObjectCat, 

477 updatedDiaObjects=diaObjectsToUpdate) 

478 

479 def _initialize_dia_object(self, objId): 

480 """Create a new DiaObject with values required to be initialized by the 

481 Apdb. 

482 

483 Parameters 

484 ---------- 

485 objid : `int` 

486 ``diaObjectId`` value for the of the new DiaObject. 

487 

488 Returns 

489 ------- 

490 diaObject : `dict` 

491 Newly created DiaObject with keys: 

492 

493 ``diaObjectId`` 

494 Unique DiaObjectId (`int`). 

495 ``pmParallaxNdata`` 

496 Number of data points used for parallax calculation (`int`). 

497 ``nearbyObj1`` 

498 Id of the a nearbyObject in the Object table (`int`). 

499 ``nearbyObj2`` 

500 Id of the a nearbyObject in the Object table (`int`). 

501 ``nearbyObj3`` 

502 Id of the a nearbyObject in the Object table (`int`). 

503 ``?PSFluxData`` 

504 Number of data points used to calculate point source flux 

505 summary statistics in each bandpass (`int`). 

506 """ 

507 new_dia_object = {"diaObjectId": objId, 

508 "pmParallaxNdata": 0, 

509 "nearbyObj1": 0, 

510 "nearbyObj2": 0, 

511 "nearbyObj3": 0} 

512 for f in ["u", "g", "r", "i", "z", "y"]: 

513 new_dia_object["%sPSFluxNdata" % f] = 0 

514 return new_dia_object