Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 38%
70 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 02:20 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 02:20 -0700
1# This file is part of ap_association.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe.
23"""
24import numpy as np
25import pandas as pd
26from sqlalchemy.exc import OperationalError, ProgrammingError
28import lsst.geom as geom
29import lsst.pex.config as pexConfig
30import lsst.pipe.base as pipeBase
31import lsst.sphgeom as sphgeom
32from lsst.utils.timer import timeMethod
34__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig")
37class LoadDiaCatalogsConfig(pexConfig.Config):
38 """Config class for LoadDiaCatalogsConfig.
39 """
40 pixelMargin = pexConfig.RangeField(
41 doc="Padding to add to 4 all edges of the bounding box (pixels)",
42 dtype=int,
43 default=250,
44 min=0,
45 )
48class LoadDiaCatalogsTask(pipeBase.Task):
49 """Retrieve DiaObjects and associated DiaSources from the Apdb given an
50 input exposure.
51 """
52 ConfigClass = LoadDiaCatalogsConfig
53 _DefaultName = "loadDiaCatalogs"
55 def __init__(self, **kwargs):
56 pipeBase.Task.__init__(self, **kwargs)
58 @timeMethod
59 def run(self, exposure, apdb, doLoadForcedSources=True):
60 """Preload all DiaObjects and DiaSources from the Apdb given the
61 current exposure.
63 Parameters
64 ----------
65 exposure : `lsst.afw.image.Exposure`
66 An exposure with a bounding box.
67 apdb : `lsst.dax.apdb.Apdb`
68 AP database connection object.
69 doLoadForcedSources : `bool`, optional
70 Load forced DiaSource history from the APDB?
71 This should only be turned off for debugging purposes.
72 Added to allow disabling forced sources for performance
73 reasons during the ops rehearsal.
75 Returns
76 -------
77 result : `lsst.pipe.base.Struct`
78 Results struct with components.
80 - ``diaObjects`` : Complete set of DiaObjects covering the input
81 exposure padded by ``pixelMargin``. DataFrame is indexed by
82 the ``diaObjectId`` column. (`pandas.DataFrame`)
83 - ``diaSources`` : Complete set of DiaSources covering the input
84 exposure padded by ``pixelMargin``. DataFrame is indexed by
85 ``diaObjectId``, ``band``, ``diaSourceId`` columns.
86 (`pandas.DataFrame`)
87 - ``diaForcedSources`` : Complete set of forced photometered fluxes
88 on the past 12 months of difference images at DiaObject locations.
90 Raises
91 ------
92 RuntimeError
93 Raised if the Database query failed to load DiaObjects.
94 """
95 region = self._getRegion(exposure)
97 # This is the first database query.
98 try:
99 diaObjects = self.loadDiaObjects(region, apdb)
100 except (OperationalError, ProgrammingError) as e:
101 raise RuntimeError(
102 "Database query failed to load DiaObjects; did you call "
103 "make_apdb.py first? If you did, some other error occurred "
104 "during database access of the DiaObject table.") from e
106 dateTime = exposure.visitInfo.date
108 diaSources = self.loadDiaSources(diaObjects, region, dateTime, apdb)
110 if doLoadForcedSources:
111 diaForcedSources = self.loadDiaForcedSources(diaObjects, region, dateTime, apdb)
112 else:
113 diaForcedSources = pd.DataFrame(columns=["diaObjectId", "diaForcedSourceId"])
115 return pipeBase.Struct(
116 diaObjects=diaObjects,
117 diaSources=diaSources,
118 diaForcedSources=diaForcedSources)
120 @timeMethod
121 def loadDiaObjects(self, region, apdb):
122 """Load DiaObjects from the Apdb based on their HTM location.
124 Parameters
125 ----------
126 region : `sphgeom.Region`
127 Region of interest.
128 apdb : `lsst.dax.apdb.Apdb`
129 Database connection object to load from.
131 Returns
132 -------
133 diaObjects : `pandas.DataFrame`
134 DiaObjects loaded from the Apdb that are within the area defined
135 by ``pixelRanges``.
136 """
137 if region is None:
138 # If no area is specified return an empty DataFrame with the
139 # the column used for indexing later in AssociationTask.
140 diaObjects = pd.DataFrame(columns=["diaObjectId"])
141 else:
142 diaObjects = apdb.getDiaObjects(region)
144 diaObjects.set_index("diaObjectId", drop=False, inplace=True)
145 if diaObjects.index.has_duplicates:
146 self.log.warning(
147 "Duplicate DiaObjects loaded from the Apdb. This may cause "
148 "downstream pipeline issues. Dropping duplicated rows")
149 # Drop duplicates via index and keep the first appearance.
150 diaObjects = diaObjects.groupby(diaObjects.index).first()
152 return diaObjects.replace(to_replace=[None], value=np.nan)
154 @timeMethod
155 def loadDiaSources(self, diaObjects, region, dateTime, apdb):
156 """Load DiaSources from the Apdb based on their diaObjectId or
157 location.
159 Variable used to load sources is set in config.
161 Parameters
162 ----------
163 diaObjects : `pandas.DataFrame`
164 DiaObjects loaded from the Apdb that are within the area defined
165 by ``pixelRanges``.
166 region : `sphgeom.Region`
167 Region of interest.
168 dateTime : `lsst.daf.base.DateTime`
169 Time of the current visit
170 apdb : `lsst.dax.apdb.Apdb`
171 Database connection object to load from.
173 Returns
174 -------
175 DiaSources : `pandas.DataFrame`
176 DiaSources loaded from the Apdb that are within the area defined
177 by ``pixelRange`` and associated with ``diaObjects``.
178 """
179 if region is None:
180 # If no area is specified return an empty DataFrame with the
181 # the column used for indexing later in AssociationTask.
182 diaSources = pd.DataFrame(columns=["diaObjectId",
183 "band",
184 "diaSourceId"])
185 else:
186 diaSources = apdb.getDiaSources(region, diaObjects.loc[:, "diaObjectId"], dateTime.toAstropy())
188 diaSources.set_index(["diaObjectId", "band", "diaSourceId"],
189 drop=False,
190 inplace=True)
191 if diaSources.index.has_duplicates:
192 self.log.warning(
193 "Duplicate DiaSources loaded from the Apdb. This may cause "
194 "downstream pipeline issues. Dropping duplicated rows")
195 # Drop duplicates via index and keep the first appearance. Reset
196 # due to the index shape being slight different thatn expected.
197 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True)
198 diaSources.set_index(["diaObjectId", "band", "diaSourceId"],
199 drop=False,
200 inplace=True)
202 return diaSources.replace(to_replace=[None], value=np.nan)
204 @timeMethod
205 def loadDiaForcedSources(self, diaObjects, region, dateTime, apdb):
206 """Load DiaObjects from the Apdb based on their HTM location.
208 Parameters
209 ----------
210 diaObjects : `pandas.DataFrame`
211 DiaObjects loaded from the Apdb.
212 region : `sphgeom.Region`
213 Region of interest.
214 dateTime : `lsst.daf.base.DateTime`
215 Time of the current visit
216 apdb : `lsst.dax.apdb.Apdb`
217 Database connection object to load from.
219 Returns
220 -------
221 diaObjects : `pandas.DataFrame`
222 DiaObjects loaded from the Apdb that are within the area defined
223 by ``pixelRanges``.
224 """
225 if len(diaObjects) == 0:
226 # If no diaObjects are available return an empty DataFrame with
227 # the the column used for indexing later in AssociationTask.
228 diaForcedSources = pd.DataFrame(columns=["diaObjectId",
229 "diaForcedSourceId"])
230 else:
231 diaForcedSources = apdb.getDiaForcedSources(
232 region,
233 diaObjects.loc[:, "diaObjectId"],
234 dateTime.toAstropy())
236 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"],
237 drop=False,
238 inplace=True)
239 if diaForcedSources.index.has_duplicates:
240 self.log.warning(
241 "Duplicate DiaForcedSources loaded from the Apdb. This may "
242 "cause downstream pipeline issues. Dropping duplicated rows.")
243 # Drop duplicates via index and keep the first appearance. Reset
244 # due to the index shape being slightly different than expected.
245 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first()
246 diaForcedSources.reset_index(drop=True, inplace=True)
247 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"],
248 drop=False,
249 inplace=True)
251 return diaForcedSources.replace(to_replace=[None], value=np.nan)
253 @timeMethod
254 def _getRegion(self, exposure):
255 """Calculate an enveloping region for an exposure.
257 Parameters
258 ----------
259 exposure : `lsst.afw.image.Exposure`
260 Exposure object with calibrated WCS.
262 Returns
263 -------
264 region : `sphgeom.Region`
265 Region enveloping an exposure.
266 """
267 bbox = geom.Box2D(exposure.getBBox())
268 bbox.grow(self.config.pixelMargin)
269 wcs = exposure.getWcs()
271 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector()
272 for pp in bbox.getCorners()])
274 return region