Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 31%

69 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-23 04:02 -0700

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe. 

23""" 

24import numpy as np 

25import pandas as pd 

26from sqlalchemy.exc import OperationalError, ProgrammingError 

27 

28import lsst.geom as geom 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31import lsst.sphgeom as sphgeom 

32from lsst.utils.timer import timeMethod 

33 

34__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig") 

35 

36 

37class LoadDiaCatalogsConfig(pexConfig.Config): 

38 """Config class for LoadDiaCatalogsConfig. 

39 """ 

40 pixelMargin = pexConfig.RangeField( 

41 doc="Padding to add to 4 all edges of the bounding box (pixels)", 

42 dtype=int, 

43 default=250, 

44 min=0, 

45 ) 

46 

47 

48class LoadDiaCatalogsTask(pipeBase.Task): 

49 """Retrieve DiaObjects and associated DiaSources from the Apdb given an 

50 input exposure. 

51 """ 

52 ConfigClass = LoadDiaCatalogsConfig 

53 _DefaultName = "loadDiaCatalogs" 

54 

55 def __init__(self, **kwargs): 

56 pipeBase.Task.__init__(self, **kwargs) 

57 

58 @timeMethod 

59 def run(self, exposure, apdb): 

60 """Preload all DiaObjects and DiaSources from the Apdb given the 

61 current exposure. 

62 

63 Parameters 

64 ---------- 

65 exposure : `lsst.afw.image.Exposure` 

66 An exposure with a bounding box. 

67 apdb : `lsst.dax.apdb.Apdb` 

68 AP database connection object. 

69 

70 Returns 

71 ------- 

72 result : `lsst.pipe.base.Struct` 

73 Results struct with components. 

74 

75 - ``diaObjects`` : Complete set of DiaObjects covering the input 

76 exposure padded by ``pixelMargin``. DataFrame is indexed by 

77 the ``diaObjectId`` column. (`pandas.DataFrame`) 

78 - ``diaSources`` : Complete set of DiaSources covering the input 

79 exposure padded by ``pixelMargin``. DataFrame is indexed by 

80 ``diaObjectId``, ``filterName``, ``diaSourceId`` columns. 

81 (`pandas.DataFrame`) 

82 """ 

83 visiInfo = exposure.getInfo().getVisitInfo() 

84 region = self._getRegion(exposure) 

85 

86 # This is the first database query 

87 try: 

88 diaObjects = self.loadDiaObjects(region, apdb) 

89 except (OperationalError, ProgrammingError) as e: 

90 raise RuntimeError( 

91 "Database query failed to load DiaObjects; did you call " 

92 "make_apdb.py first? If you did, some other error occurred " 

93 "during database access of the DiaObject table.") from e 

94 

95 dateTime = visiInfo.getDate() 

96 

97 diaSources = self.loadDiaSources(diaObjects, 

98 region, 

99 dateTime, 

100 apdb) 

101 

102 diaForcedSources = self.loadDiaForcedSources(diaObjects, 

103 region, 

104 dateTime, 

105 apdb) 

106 

107 return pipeBase.Struct( 

108 diaObjects=diaObjects, 

109 diaSources=diaSources, 

110 diaForcedSources=diaForcedSources) 

111 

112 @timeMethod 

113 def loadDiaObjects(self, region, apdb): 

114 """Load DiaObjects from the Apdb based on their HTM location. 

115 

116 Parameters 

117 ---------- 

118 region : `sphgeom.Region` 

119 Region of interest. 

120 apdb : `lsst.dax.apdb.Apdb` 

121 Database connection object to load from. 

122 

123 Returns 

124 ------- 

125 diaObjects : `pandas.DataFrame` 

126 DiaObjects loaded from the Apdb that are within the area defined 

127 by ``pixelRanges``. 

128 """ 

129 if region is None: 

130 # If no area is specified return an empty DataFrame with the 

131 # the column used for indexing later in AssociationTask. 

132 diaObjects = pd.DataFrame(columns=["diaObjectId"]) 

133 else: 

134 diaObjects = apdb.getDiaObjects(region) 

135 

136 diaObjects.set_index("diaObjectId", drop=False, inplace=True) 

137 if diaObjects.index.has_duplicates: 

138 self.log.warning( 

139 "Duplicate DiaObjects loaded from the Apdb. This may cause " 

140 "downstream pipeline issues. Dropping duplicated rows") 

141 # Drop duplicates via index and keep the first appearance. 

142 diaObjects = diaObjects.groupby(diaObjects.index).first() 

143 

144 return diaObjects.replace(to_replace=[None], value=np.nan) 

145 

146 @timeMethod 

147 def loadDiaSources(self, diaObjects, region, dateTime, apdb): 

148 """Load DiaSources from the Apdb based on their diaObjectId or 

149 location. 

150 

151 Variable used to load sources is set in config. 

152 

153 Parameters 

154 ---------- 

155 diaObjects : `pandas.DataFrame` 

156 DiaObjects loaded from the Apdb that are within the area defined 

157 by ``pixelRanges``. 

158 region : `sphgeom.Region` 

159 Region of interest. 

160 dateTime : `lsst.daf.base.DateTime` 

161 Time of the current visit 

162 apdb : `lsst.dax.apdb.Apdb` 

163 Database connection object to load from. 

164 

165 Returns 

166 ------- 

167 DiaSources : `pandas.DataFrame` 

168 DiaSources loaded from the Apdb that are within the area defined 

169 by ``pixelRange`` and associated with ``diaObjects``. 

170 """ 

171 if region is None: 

172 # If no area is specified return an empty DataFrame with the 

173 # the column used for indexing later in AssociationTask. 

174 diaSources = pd.DataFrame(columns=["diaObjectId", 

175 "filterName", 

176 "diaSourceId"]) 

177 else: 

178 diaSources = apdb.getDiaSources(region, diaObjects.loc[:, "diaObjectId"], dateTime) 

179 

180 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

181 drop=False, 

182 inplace=True) 

183 if diaSources.index.has_duplicates: 

184 self.log.warning( 

185 "Duplicate DiaSources loaded from the Apdb. This may cause " 

186 "downstream pipeline issues. Dropping duplicated rows") 

187 # Drop duplicates via index and keep the first appearance. Reset 

188 # due to the index shape being slight different thatn expected. 

189 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True) 

190 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

191 drop=False, 

192 inplace=True) 

193 

194 return diaSources.replace(to_replace=[None], value=np.nan) 

195 

196 @timeMethod 

197 def loadDiaForcedSources(self, diaObjects, region, dateTime, apdb): 

198 """Load DiaObjects from the Apdb based on their HTM location. 

199 

200 Parameters 

201 ---------- 

202 diaObjects : `pandas.DataFrame` 

203 DiaObjects loaded from the Apdb. 

204 region : `sphgeom.Region` 

205 Region of interest. 

206 dateTime : `lsst.daf.base.DateTime` 

207 Time of the current visit 

208 apdb : `lsst.dax.apdb.Apdb` 

209 Database connection object to load from. 

210 

211 Returns 

212 ------- 

213 diaObjects : `pandas.DataFrame` 

214 DiaObjects loaded from the Apdb that are within the area defined 

215 by ``pixelRanges``. 

216 """ 

217 if len(diaObjects) == 0: 

218 # If no diaObjects are available return an empty DataFrame with 

219 # the the column used for indexing later in AssociationTask. 

220 diaForcedSources = pd.DataFrame(columns=["diaObjectId", 

221 "diaForcedSourceId"]) 

222 else: 

223 diaForcedSources = apdb.getDiaForcedSources( 

224 region, 

225 diaObjects.loc[:, "diaObjectId"], 

226 dateTime) 

227 

228 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

229 drop=False, 

230 inplace=True) 

231 if diaForcedSources.index.has_duplicates: 

232 self.log.warning( 

233 "Duplicate DiaForcedSources loaded from the Apdb. This may " 

234 "cause downstream pipeline issues. Dropping duplicated rows.") 

235 # Drop duplicates via index and keep the first appearance. Reset 

236 # due to the index shape being slight different thatn expected. 

237 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first() 

238 diaForcedSources.reset_index(drop=True, inplace=True) 

239 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

240 drop=False, 

241 inplace=True) 

242 

243 return diaForcedSources.replace(to_replace=[None], value=np.nan) 

244 

245 @timeMethod 

246 def _getRegion(self, exposure): 

247 """Calculate an enveloping region for an exposure. 

248 

249 Parameters 

250 ---------- 

251 exposure : `lsst.afw.image.Exposure` 

252 Exposure object with calibrated WCS. 

253 

254 Returns 

255 ------- 

256 region : `sphgeom.Region` 

257 Region enveloping an exposure. 

258 """ 

259 bbox = geom.Box2D(exposure.getBBox()) 

260 bbox.grow(self.config.pixelMargin) 

261 wcs = exposure.getWcs() 

262 

263 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector() 

264 for pp in bbox.getCorners()]) 

265 

266 return region