Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 32%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

77 statements  

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe. 

23""" 

24import numpy as np 

25import pandas as pd 

26from sqlalchemy.exc import OperationalError, ProgrammingError 

27 

28import lsst.geom as geom 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31import lsst.sphgeom as sphgeom 

32 

33__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig") 

34 

35 

36class LoadDiaCatalogsConfig(pexConfig.Config): 

37 """Config class for LoadDiaCatalogsConfig. 

38 """ 

39 htmLevel = pexConfig.RangeField( 

40 dtype=int, 

41 doc="Level of the HTM pixelization.", 

42 default=20, 

43 min=1, 

44 ) 

45 htmMaxRanges = pexConfig.RangeField( 

46 dtype=int, 

47 doc="Maximum number of HTM (min, max) ranges to return.", 

48 default=128, 

49 min=2, 

50 ) 

51 pixelMargin = pexConfig.RangeField( 

52 doc="Padding to add to 4 all edges of the bounding box (pixels)", 

53 dtype=int, 

54 default=250, 

55 min=0, 

56 ) 

57 loadDiaSourcesByPixelId = pexConfig.Field( 

58 doc="Load DiaSources by their HTM pixelId instead of by their " 

59 "associated diaObjectId", 

60 dtype=bool, 

61 default=True, 

62 ) 

63 

64 

65class LoadDiaCatalogsTask(pipeBase.Task): 

66 """Retrieve DiaObjects and associated DiaSources from the Apdb given an 

67 input exposure. 

68 """ 

69 ConfigClass = LoadDiaCatalogsConfig 

70 _DefaultName = "loadDiaCatalogs" 

71 

72 def __init__(self, **kwargs): 

73 pipeBase.Task.__init__(self, **kwargs) 

74 self.pixelator = sphgeom.HtmPixelization(self.config.htmLevel) 

75 

76 @pipeBase.timeMethod 

77 def run(self, exposure, apdb): 

78 """Preload all DiaObjects and DiaSources from the Apdb given the 

79 current exposure. 

80 

81 Parameters 

82 ---------- 

83 exposure : `lsst.afw.image.Exposure` 

84 An exposure with a bounding box. 

85 apdb : `lsst.dax.apdb.Apdb` 

86 AP database connection object. 

87 

88 Returns 

89 ------- 

90 result : `lsst.pipe.base.Struct` 

91 Results struct with components. 

92 

93 - ``diaObjects`` : Complete set of DiaObjects covering the input 

94 exposure padded by ``pixelMargin``. DataFrame is indexed by 

95 the ``diaObjectId`` column. (`pandas.DataFrame`) 

96 - ``diaSources`` : Complete set of DiaSources covering the input 

97 exposure padded by ``pixelMargin``. DataFrame is indexed by 

98 ``diaObjectId``, ``filterName``, ``diaSourceId`` columns. 

99 (`pandas.DataFrame`) 

100 """ 

101 visiInfo = exposure.getInfo().getVisitInfo() 

102 pixelRanges = self._getPixelRanges(exposure) 

103 

104 # This is the first database query 

105 try: 

106 diaObjects = self.loadDiaObjects(pixelRanges, apdb) 

107 except (OperationalError, ProgrammingError) as e: 

108 raise RuntimeError( 

109 "Database query failed to load DiaObjects; did you call " 

110 "make_apdb.py first? If you did, some other error occurred " 

111 "during database access of the DiaObject table.") from e 

112 

113 dateTime = visiInfo.getDate().toPython() 

114 

115 diaSources = self.loadDiaSources(diaObjects, 

116 pixelRanges, 

117 dateTime, 

118 apdb) 

119 

120 diaForcedSources = self.loadDiaForcedSources(diaObjects, 

121 dateTime, 

122 apdb) 

123 

124 return pipeBase.Struct( 

125 diaObjects=diaObjects, 

126 diaSources=diaSources, 

127 diaForcedSources=diaForcedSources) 

128 

129 @pipeBase.timeMethod 

130 def loadDiaObjects(self, pixelRanges, apdb): 

131 """Load DiaObjects from the Apdb based on their HTM location. 

132 

133 Parameters 

134 ---------- 

135 pixelRanges : `tuple` [`int`] 

136 Ranges of pixel values that cover region of interest. 

137 apdb : `lsst.dax.apdb.Apdb` 

138 Database connection object to load from. 

139 

140 Returns 

141 ------- 

142 diaObjects : `pandas.DataFrame` 

143 DiaObjects loaded from the Apdb that are within the area defined 

144 by ``pixelRanges``. 

145 """ 

146 if len(pixelRanges) == 0: 

147 # If no area is specified return an empty DataFrame with the 

148 # the column used for indexing later in AssociationTask. 

149 diaObjects = pd.DataFrame(columns=["diaObjectId"]) 

150 else: 

151 diaObjects = apdb.getDiaObjects(pixelRanges, return_pandas=True) 

152 

153 diaObjects.set_index("diaObjectId", drop=False, inplace=True) 

154 if diaObjects.index.has_duplicates: 

155 self.log.warn( 

156 "Duplicate DiaObjects loaded from the Apdb. This may cause " 

157 "downstream pipeline issues. Dropping duplicated rows") 

158 # Drop duplicates via index and keep the first appearance. 

159 diaObjects = diaObjects.groupby(diaObjects.index).first() 

160 

161 return diaObjects.replace(to_replace=[None], value=np.nan) 

162 

163 @pipeBase.timeMethod 

164 def loadDiaSources(self, diaObjects, pixelRanges, dateTime, apdb): 

165 """Load DiaSources from the Apdb based on their diaObjectId or 

166 pixelId location. 

167 

168 Variable used to load sources is set in config. 

169 

170 Parameters 

171 ---------- 

172 diaObjects : `pandas.DataFrame` 

173 DiaObjects loaded from the Apdb that are within the area defined 

174 by ``pixelRanges``. 

175 pixelRanges : `list` of `tuples` 

176 Ranges of pixelIds that cover region of interest. 

177 dateTime : `datetime.datetime` 

178 Time of the current visit 

179 apdb : `lsst.dax.apdb.Apdb` 

180 Database connection object to load from. 

181 

182 Returns 

183 ------- 

184 DiaSources : `pandas.DataFrame` 

185 DiaSources loaded from the Apdb that are within the area defined 

186 by ``pixelRange`` and associated with ``diaObjects``. 

187 """ 

188 if self.config.loadDiaSourcesByPixelId: 

189 if len(pixelRanges) == 0: 

190 # If no area is specified return an empty DataFrame with the 

191 # the column used for indexing later in AssociationTask. 

192 diaSources = pd.DataFrame(columns=["diaObjectId", 

193 "filterName", 

194 "diaSourceId"]) 

195 else: 

196 diaSources = apdb.getDiaSourcesInRegion(pixelRanges, 

197 dateTime, 

198 return_pandas=True) 

199 else: 

200 if len(diaObjects) == 0: 

201 # If no diaObjects are available return an empty DataFrame with 

202 # the the column used for indexing later in AssociationTask. 

203 diaSources = pd.DataFrame(columns=["diaObjectId", 

204 "filterName", 

205 "diaSourceId"]) 

206 else: 

207 diaSources = apdb.getDiaSources( 

208 diaObjects.loc[:, "diaObjectId"], 

209 dateTime, 

210 return_pandas=True) 

211 

212 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

213 drop=False, 

214 inplace=True) 

215 if diaSources.index.has_duplicates: 

216 self.log.warn( 

217 "Duplicate DiaSources loaded from the Apdb. This may cause " 

218 "downstream pipeline issues. Dropping duplicated rows") 

219 # Drop duplicates via index and keep the first appearance. Reset 

220 # due to the index shape being slight different thatn expected. 

221 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True) 

222 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

223 drop=False, 

224 inplace=True) 

225 

226 return diaSources.replace(to_replace=[None], value=np.nan) 

227 

228 @pipeBase.timeMethod 

229 def loadDiaForcedSources(self, diaObjects, dateTime, apdb): 

230 """Load DiaObjects from the Apdb based on their HTM location. 

231 

232 Parameters 

233 ---------- 

234 diaObjects : `pandas.DataFrame` 

235 DiaObjects loaded from the Apdb. 

236 dateTime : `datetime.datetime` 

237 Time of the current visit 

238 apdb : `lsst.dax.apdb.Apdb` 

239 Database connection object to load from. 

240 

241 Returns 

242 ------- 

243 diaObjects : `pandas.DataFrame` 

244 DiaObjects loaded from the Apdb that are within the area defined 

245 by ``pixelRanges``. 

246 """ 

247 if len(diaObjects) == 0: 

248 # If no diaObjects are available return an empty DataFrame with 

249 # the the column used for indexing later in AssociationTask. 

250 diaForcedSources = pd.DataFrame(columns=["diaObjectId", 

251 "diaForcedSourceId"]) 

252 else: 

253 diaForcedSources = apdb.getDiaForcedSources( 

254 diaObjects.loc[:, "diaObjectId"], 

255 dateTime, 

256 return_pandas=True) 

257 

258 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

259 drop=False, 

260 inplace=True) 

261 if diaForcedSources.index.has_duplicates: 

262 self.log.warn( 

263 "Duplicate DiaForcedSources loaded from the Apdb. This may " 

264 "cause downstream pipeline issues. Dropping duplicated rows.") 

265 # Drop duplicates via index and keep the first appearance. Reset 

266 # due to the index shape being slight different thatn expected. 

267 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first() 

268 diaForcedSources.reset_index(drop=True, inplace=True) 

269 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

270 drop=False, 

271 inplace=True) 

272 

273 return diaForcedSources.replace(to_replace=[None], value=np.nan) 

274 

275 @pipeBase.timeMethod 

276 def _getPixelRanges(self, exposure): 

277 """Calculate covering HTM pixels for the current exposure. 

278 

279 Parameters 

280 ---------- 

281 exposure : `lsst.afw.image.Exposure` 

282 Exposure object with calibrated WCS. 

283 

284 Returns 

285 ------- 

286 htmRanges : `list` of `tuples` 

287 A list of tuples containing `int` values. 

288 """ 

289 bbox = geom.Box2D(exposure.getBBox()) 

290 bbox.grow(self.config.pixelMargin) 

291 wcs = exposure.getWcs() 

292 

293 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector() 

294 for pp in bbox.getCorners()]) 

295 

296 indices = self.pixelator.envelope(region, self.config.htmMaxRanges) 

297 

298 return indices.ranges()