Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 38%

70 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 04:31 -0700

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe. 

23""" 

24import numpy as np 

25import pandas as pd 

26from sqlalchemy.exc import OperationalError, ProgrammingError 

27 

28import lsst.geom as geom 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31import lsst.sphgeom as sphgeom 

32from lsst.utils.timer import timeMethod 

33 

34__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig") 

35 

36 

37class LoadDiaCatalogsConfig(pexConfig.Config): 

38 """Config class for LoadDiaCatalogsConfig. 

39 """ 

40 pixelMargin = pexConfig.RangeField( 

41 doc="Padding to add to 4 all edges of the bounding box (pixels)", 

42 dtype=int, 

43 default=250, 

44 min=0, 

45 ) 

46 

47 

48class LoadDiaCatalogsTask(pipeBase.Task): 

49 """Retrieve DiaObjects and associated DiaSources from the Apdb given an 

50 input exposure. 

51 """ 

52 ConfigClass = LoadDiaCatalogsConfig 

53 _DefaultName = "loadDiaCatalogs" 

54 

55 def __init__(self, **kwargs): 

56 pipeBase.Task.__init__(self, **kwargs) 

57 

58 @timeMethod 

59 def run(self, exposure, apdb, doLoadForcedSources=True): 

60 """Preload all DiaObjects and DiaSources from the Apdb given the 

61 current exposure. 

62 

63 Parameters 

64 ---------- 

65 exposure : `lsst.afw.image.Exposure` 

66 An exposure with a bounding box. 

67 apdb : `lsst.dax.apdb.Apdb` 

68 AP database connection object. 

69 doLoadForcedSources : `bool`, optional 

70 Load forced DiaSource history from the APDB? 

71 This should only be turned off for debugging purposes. 

72 Added to allow disabling forced sources for performance 

73 reasons during the ops rehearsal. 

74 

75 Returns 

76 ------- 

77 result : `lsst.pipe.base.Struct` 

78 Results struct with components. 

79 

80 - ``diaObjects`` : Complete set of DiaObjects covering the input 

81 exposure padded by ``pixelMargin``. DataFrame is indexed by 

82 the ``diaObjectId`` column. (`pandas.DataFrame`) 

83 - ``diaSources`` : Complete set of DiaSources covering the input 

84 exposure padded by ``pixelMargin``. DataFrame is indexed by 

85 ``diaObjectId``, ``band``, ``diaSourceId`` columns. 

86 (`pandas.DataFrame`) 

87 - ``diaForcedSources`` : Complete set of forced photometered fluxes 

88 on the past 12 months of difference images at DiaObject locations. 

89 

90 Raises 

91 ------ 

92 RuntimeError 

93 Raised if the Database query failed to load DiaObjects. 

94 """ 

95 region = self._getRegion(exposure) 

96 

97 # This is the first database query. 

98 try: 

99 diaObjects = self.loadDiaObjects(region, apdb) 

100 except (OperationalError, ProgrammingError) as e: 

101 raise RuntimeError( 

102 "Database query failed to load DiaObjects; did you call " 

103 "make_apdb.py first? If you did, some other error occurred " 

104 "during database access of the DiaObject table.") from e 

105 

106 dateTime = exposure.visitInfo.date 

107 

108 diaSources = self.loadDiaSources(diaObjects, region, dateTime, apdb) 

109 

110 if doLoadForcedSources: 

111 diaForcedSources = self.loadDiaForcedSources(diaObjects, region, dateTime, apdb) 

112 else: 

113 diaForcedSources = pd.DataFrame(columns=["diaObjectId", "diaForcedSourceId"]) 

114 

115 return pipeBase.Struct( 

116 diaObjects=diaObjects, 

117 diaSources=diaSources, 

118 diaForcedSources=diaForcedSources) 

119 

120 @timeMethod 

121 def loadDiaObjects(self, region, apdb): 

122 """Load DiaObjects from the Apdb based on their HTM location. 

123 

124 Parameters 

125 ---------- 

126 region : `sphgeom.Region` 

127 Region of interest. 

128 apdb : `lsst.dax.apdb.Apdb` 

129 Database connection object to load from. 

130 

131 Returns 

132 ------- 

133 diaObjects : `pandas.DataFrame` 

134 DiaObjects loaded from the Apdb that are within the area defined 

135 by ``pixelRanges``. 

136 """ 

137 if region is None: 

138 # If no area is specified return an empty DataFrame with the 

139 # the column used for indexing later in AssociationTask. 

140 diaObjects = pd.DataFrame(columns=["diaObjectId"]) 

141 else: 

142 diaObjects = apdb.getDiaObjects(region) 

143 

144 diaObjects.set_index("diaObjectId", drop=False, inplace=True) 

145 if diaObjects.index.has_duplicates: 

146 self.log.warning( 

147 "Duplicate DiaObjects loaded from the Apdb. This may cause " 

148 "downstream pipeline issues. Dropping duplicated rows") 

149 # Drop duplicates via index and keep the first appearance. 

150 diaObjects = diaObjects.groupby(diaObjects.index).first() 

151 

152 return diaObjects.replace(to_replace=[None], value=np.nan) 

153 

154 @timeMethod 

155 def loadDiaSources(self, diaObjects, region, dateTime, apdb): 

156 """Load DiaSources from the Apdb based on their diaObjectId or 

157 location. 

158 

159 Variable used to load sources is set in config. 

160 

161 Parameters 

162 ---------- 

163 diaObjects : `pandas.DataFrame` 

164 DiaObjects loaded from the Apdb that are within the area defined 

165 by ``pixelRanges``. 

166 region : `sphgeom.Region` 

167 Region of interest. 

168 dateTime : `lsst.daf.base.DateTime` 

169 Time of the current visit 

170 apdb : `lsst.dax.apdb.Apdb` 

171 Database connection object to load from. 

172 

173 Returns 

174 ------- 

175 DiaSources : `pandas.DataFrame` 

176 DiaSources loaded from the Apdb that are within the area defined 

177 by ``pixelRange`` and associated with ``diaObjects``. 

178 """ 

179 if region is None: 

180 # If no area is specified return an empty DataFrame with the 

181 # the column used for indexing later in AssociationTask. 

182 diaSources = pd.DataFrame(columns=["diaObjectId", 

183 "band", 

184 "diaSourceId"]) 

185 else: 

186 diaSources = apdb.getDiaSources(region, diaObjects.loc[:, "diaObjectId"], dateTime.toAstropy()) 

187 

188 diaSources.set_index(["diaObjectId", "band", "diaSourceId"], 

189 drop=False, 

190 inplace=True) 

191 if diaSources.index.has_duplicates: 

192 self.log.warning( 

193 "Duplicate DiaSources loaded from the Apdb. This may cause " 

194 "downstream pipeline issues. Dropping duplicated rows") 

195 # Drop duplicates via index and keep the first appearance. Reset 

196 # due to the index shape being slight different thatn expected. 

197 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True) 

198 diaSources.set_index(["diaObjectId", "band", "diaSourceId"], 

199 drop=False, 

200 inplace=True) 

201 

202 return diaSources.replace(to_replace=[None], value=np.nan) 

203 

204 @timeMethod 

205 def loadDiaForcedSources(self, diaObjects, region, dateTime, apdb): 

206 """Load DiaObjects from the Apdb based on their HTM location. 

207 

208 Parameters 

209 ---------- 

210 diaObjects : `pandas.DataFrame` 

211 DiaObjects loaded from the Apdb. 

212 region : `sphgeom.Region` 

213 Region of interest. 

214 dateTime : `lsst.daf.base.DateTime` 

215 Time of the current visit 

216 apdb : `lsst.dax.apdb.Apdb` 

217 Database connection object to load from. 

218 

219 Returns 

220 ------- 

221 diaObjects : `pandas.DataFrame` 

222 DiaObjects loaded from the Apdb that are within the area defined 

223 by ``pixelRanges``. 

224 """ 

225 if len(diaObjects) == 0: 

226 # If no diaObjects are available return an empty DataFrame with 

227 # the the column used for indexing later in AssociationTask. 

228 diaForcedSources = pd.DataFrame(columns=["diaObjectId", 

229 "diaForcedSourceId"]) 

230 else: 

231 diaForcedSources = apdb.getDiaForcedSources( 

232 region, 

233 diaObjects.loc[:, "diaObjectId"], 

234 dateTime.toAstropy()) 

235 

236 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

237 drop=False, 

238 inplace=True) 

239 if diaForcedSources.index.has_duplicates: 

240 self.log.warning( 

241 "Duplicate DiaForcedSources loaded from the Apdb. This may " 

242 "cause downstream pipeline issues. Dropping duplicated rows.") 

243 # Drop duplicates via index and keep the first appearance. Reset 

244 # due to the index shape being slightly different than expected. 

245 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first() 

246 diaForcedSources.reset_index(drop=True, inplace=True) 

247 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"], 

248 drop=False, 

249 inplace=True) 

250 

251 return diaForcedSources.replace(to_replace=[None], value=np.nan) 

252 

253 @timeMethod 

254 def _getRegion(self, exposure): 

255 """Calculate an enveloping region for an exposure. 

256 

257 Parameters 

258 ---------- 

259 exposure : `lsst.afw.image.Exposure` 

260 Exposure object with calibrated WCS. 

261 

262 Returns 

263 ------- 

264 region : `sphgeom.Region` 

265 Region enveloping an exposure. 

266 """ 

267 bbox = geom.Box2D(exposure.getBBox()) 

268 bbox.grow(self.config.pixelMargin) 

269 wcs = exposure.getWcs() 

270 

271 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector() 

272 for pp in bbox.getCorners()]) 

273 

274 return region