Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 39%

68 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 12:06 +0000

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe. 

23""" 

24import numpy as np 

25import pandas as pd 

26from sqlalchemy.exc import OperationalError, ProgrammingError 

27 

28import lsst.geom as geom 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31import lsst.sphgeom as sphgeom 

32from lsst.utils.timer import timeMethod 

33 

34__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig") 

35 

36 

37class LoadDiaCatalogsConfig(pexConfig.Config): 

38 """Config class for LoadDiaCatalogsConfig. 

39 """ 

40 pixelMargin = pexConfig.RangeField( 

41 doc="Padding to add to 4 all edges of the bounding box (pixels)", 

42 dtype=int, 

43 default=250, 

44 min=0, 

45 ) 

46 

47 

48class LoadDiaCatalogsTask(pipeBase.Task): 

49 """Retrieve DiaObjects and associated DiaSources from the Apdb given an 

50 input exposure. 

51 """ 

52 ConfigClass = LoadDiaCatalogsConfig 

53 _DefaultName = "loadDiaCatalogs" 

54 

55 def __init__(self, **kwargs): 

56 pipeBase.Task.__init__(self, **kwargs) 

57 

58 @timeMethod 

59 def run(self, exposure, apdb): 

60 """Preload all DiaObjects and DiaSources from the Apdb given the 

61 current exposure. 

62 

63 Parameters 

64 ---------- 

65 exposure : `lsst.afw.image.Exposure` 

66 An exposure with a bounding box. 

67 apdb : `lsst.dax.apdb.Apdb` 

68 AP database connection object. 

69 

70 Returns 

71 ------- 

72 result : `lsst.pipe.base.Struct` 

73 Results struct with components. 

74 

75 - ``diaObjects`` : Complete set of DiaObjects covering the input 

76 exposure padded by ``pixelMargin``. DataFrame is indexed by 

77 the ``diaObjectId`` column. (`pandas.DataFrame`) 

78 - ``diaSources`` : Complete set of DiaSources covering the input 

79 exposure padded by ``pixelMargin``. DataFrame is indexed by 

80 ``diaObjectId``, ``band``, ``diaSourceId`` columns. 

81 (`pandas.DataFrame`) 

82 """ 

83 region = self._getRegion(exposure) 

84 

85 # This is the first database query. 

86 try: 

87 diaObjects = self.loadDiaObjects(region, apdb) 

88 except (OperationalError, ProgrammingError) as e: 

89 raise RuntimeError( 

90 "Database query failed to load DiaObjects; did you call " 

91 "make_apdb.py first? If you did, some other error occurred " 

92 "during database access of the DiaObject table.") from e 

93 

94 dateTime = exposure.visitInfo.date 

95 

96 diaSources = self.loadDiaSources(diaObjects, region, dateTime, apdb) 

97 

98 diaForcedSources = self.loadDiaForcedSources(diaObjects, region, dateTime, apdb) 

99 

100 return pipeBase.Struct( 

101 diaObjects=diaObjects, 

102 diaSources=diaSources, 

103 diaForcedSources=diaForcedSources) 

104 

105 @timeMethod 

106 def loadDiaObjects(self, region, apdb): 

107 """Load DiaObjects from the Apdb based on their HTM location. 

108 

109 Parameters 

110 ---------- 

111 region : `sphgeom.Region` 

112 Region of interest. 

113 apdb : `lsst.dax.apdb.Apdb` 

114 Database connection object to load from. 

115 

116 Returns 

117 ------- 

118 diaObjects : `pandas.DataFrame` 

119 DiaObjects loaded from the Apdb that are within the area defined 

120 by ``pixelRanges``. 

121 """ 

122 if region is None: 

123 # If no area is specified return an empty DataFrame with the 

124 # the column used for indexing later in AssociationTask. 

125 diaObjects = pd.DataFrame(columns=["diaObjectId"]) 

126 else: 

127 diaObjects = apdb.getDiaObjects(region) 

128 

129 diaObjects.set_index("diaObjectId", drop=False, inplace=True) 

130 if diaObjects.index.has_duplicates: 

131 self.log.warning( 

132 "Duplicate DiaObjects loaded from the Apdb. This may cause " 

133 "downstream pipeline issues. Dropping duplicated rows") 

134 # Drop duplicates via index and keep the first appearance. 

135 diaObjects = diaObjects.groupby(diaObjects.index).first() 

136 

137 return diaObjects.replace(to_replace=[None], value=np.nan) 

138 

139 @timeMethod 

140 def loadDiaSources(self, diaObjects, region, dateTime, apdb): 

141 """Load DiaSources from the Apdb based on their diaObjectId or 

142 location. 

143 

144 Variable used to load sources is set in config. 

145 

146 Parameters 

147 ---------- 

148 diaObjects : `pandas.DataFrame` 

149 DiaObjects loaded from the Apdb that are within the area defined 

150 by ``pixelRanges``. 

151 region : `sphgeom.Region` 

152 Region of interest. 

153 dateTime : `lsst.daf.base.DateTime` 

154 Time of the current visit 

155 apdb : `lsst.dax.apdb.Apdb` 

156 Database connection object to load from. 

157 

158 Returns 

159 ------- 

160 DiaSources : `pandas.DataFrame` 

161 DiaSources loaded from the Apdb that are within the area defined 

162 by ``pixelRange`` and associated with ``diaObjects``. 

163 """ 

164 if region is None: 

165 # If no area is specified return an empty DataFrame with the 

166 # the column used for indexing later in AssociationTask. 

167 diaSources = pd.DataFrame(columns=["diaObjectId", 

168 "band", 

169 "diaSourceId"]) 

170 else: 

171 diaSources = apdb.getDiaSources(region, diaObjects.loc[:, "diaObjectId"], dateTime) 

172 

173 diaSources.set_index(["diaObjectId", "band", "diaSourceId"], 

174 drop=False, 

175 inplace=True) 

176 if diaSources.index.has_duplicates: 

177 self.log.warning( 

178 "Duplicate DiaSources loaded from the Apdb. This may cause " 

179 "downstream pipeline issues. Dropping duplicated rows") 

180 # Drop duplicates via index and keep the first appearance. Reset 

181 # due to the index shape being slight different thatn expected. 

182 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True) 

183 diaSources.set_index(["diaObjectId", "band", "diaSourceId"], 

184 drop=False, 

185 inplace=True) 

186 

187 return diaSources.replace(to_replace=[None], value=np.nan) 

188 

189 @timeMethod 

190 def loadDiaForcedSources(self, diaObjects, region, dateTime, apdb): 

191 """Load DiaObjects from the Apdb based on their HTM location. 

192 

193 Parameters 

194 ---------- 

195 diaObjects : `pandas.DataFrame` 

196 DiaObjects loaded from the Apdb. 

197 region : `sphgeom.Region` 

198 Region of interest. 

199 dateTime : `lsst.daf.base.DateTime` 

200 Time of the current visit 

201 apdb : `lsst.dax.apdb.Apdb` 

202 Database connection object to load from. 

203 

204 Returns 

205 ------- 

206 diaObjects : `pandas.DataFrame` 

207 DiaObjects loaded from the Apdb that are within the area defined 

208 by ``pixelRanges``. 

209 """ 

210 if len(diaObjects) == 0: 

211 # If no diaObjects are available return an empty DataFrame with 

212 # the the column used for indexing later in AssociationTask. 

213 diaForcedSources = pd.DataFrame(columns=["diaObjectId", 

214 "diaForcedSourceId"]) 

215 else: 

216 diaForcedSources = apdb.getDiaForcedSources( 

217 region, 

218 diaObjects.loc[:, "diaObjectId"], 

219 dateTime) 

220 

221 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

222 drop=False, 

223 inplace=True) 

224 if diaForcedSources.index.has_duplicates: 

225 self.log.warning( 

226 "Duplicate DiaForcedSources loaded from the Apdb. This may " 

227 "cause downstream pipeline issues. Dropping duplicated rows.") 

228 # Drop duplicates via index and keep the first appearance. Reset 

229 # due to the index shape being slightly different than expected. 

230 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first() 

231 diaForcedSources.reset_index(drop=True, inplace=True) 

232 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

233 drop=False, 

234 inplace=True) 

235 

236 return diaForcedSources.replace(to_replace=[None], value=np.nan) 

237 

238 @timeMethod 

239 def _getRegion(self, exposure): 

240 """Calculate an enveloping region for an exposure. 

241 

242 Parameters 

243 ---------- 

244 exposure : `lsst.afw.image.Exposure` 

245 Exposure object with calibrated WCS. 

246 

247 Returns 

248 ------- 

249 region : `sphgeom.Region` 

250 Region enveloping an exposure. 

251 """ 

252 bbox = geom.Box2D(exposure.getBBox()) 

253 bbox.grow(self.config.pixelMargin) 

254 wcs = exposure.getWcs() 

255 

256 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector() 

257 for pp in bbox.getCorners()]) 

258 

259 return region