Coverage for python/lsst/ap/association/loadDiaCatalogs.py: 29%
77 statements
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 10:45 +0000
« prev ^ index » next coverage.py v7.2.1, created at 2023-03-12 10:45 +0000
1# This file is part of ap_association.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Task for pre-loading DiaSources and DiaObjects within ap_pipe.
23"""
24import numpy as np
25import pandas as pd
26from sqlalchemy.exc import OperationalError, ProgrammingError
28import lsst.geom as geom
29import lsst.pex.config as pexConfig
30import lsst.pipe.base as pipeBase
31import lsst.sphgeom as sphgeom
33__all__ = ("LoadDiaCatalogsTask", "LoadDiaCatalogsConfig")
36class LoadDiaCatalogsConfig(pexConfig.Config):
37 """Config class for LoadDiaCatalogsConfig.
38 """
39 htmLevel = pexConfig.RangeField(
40 dtype=int,
41 doc="Level of the HTM pixelization.",
42 default=20,
43 min=1,
44 )
45 htmMaxRanges = pexConfig.RangeField(
46 dtype=int,
47 doc="Maximum number of HTM (min, max) ranges to return.",
48 default=128,
49 min=2,
50 )
51 pixelMargin = pexConfig.RangeField(
52 doc="Padding to add to 4 all edges of the bounding box (pixels)",
53 dtype=int,
54 default=250,
55 min=0,
56 )
57 loadDiaSourcesByPixelId = pexConfig.Field(
58 doc="Load DiaSources by their HTM pixelId instead of by their "
59 "associated diaObjectId",
60 dtype=bool,
61 default=True,
62 )
65class LoadDiaCatalogsTask(pipeBase.Task):
66 """Retrieve DiaObjects and associated DiaSources from the Apdb given an
67 input exposure.
68 """
69 ConfigClass = LoadDiaCatalogsConfig
70 _DefaultName = "loadDiaCatalogs"
72 def __init__(self, **kwargs):
73 pipeBase.Task.__init__(self, **kwargs)
74 self.pixelator = sphgeom.HtmPixelization(self.config.htmLevel)
76 @pipeBase.timeMethod
77 def run(self, exposure, apdb):
78 """Preload all DiaObjects and DiaSources from the Apdb given the
79 current exposure.
81 Parameters
82 ----------
83 exposure : `lsst.afw.image.Exposure`
84 An exposure with a bounding box.
85 apdb : `lsst.dax.apdb.Apdb`
86 AP database connection object.
88 Returns
89 -------
90 result : `lsst.pipe.base.Struct`
91 Results struct with components.
93 - ``diaObjects`` : Complete set of DiaObjects covering the input
94 exposure padded by ``pixelMargin``. DataFrame is indexed by
95 the ``diaObjectId`` column. (`pandas.DataFrame`)
96 - ``diaSources`` : Complete set of DiaSources covering the input
97 exposure padded by ``pixelMargin``. DataFrame is indexed by
98 ``diaObjectId``, ``filterName``, ``diaSourceId`` columns.
99 (`pandas.DataFrame`)
100 """
101 visiInfo = exposure.getInfo().getVisitInfo()
102 pixelRanges = self._getPixelRanges(exposure)
104 # This is the first database query
105 try:
106 diaObjects = self.loadDiaObjects(pixelRanges, apdb)
107 except (OperationalError, ProgrammingError) as e:
108 raise RuntimeError(
109 "Database query failed to load DiaObjects; did you call "
110 "make_apdb.py first? If you did, some other error occurred "
111 "during database access of the DiaObject table.") from e
113 dateTime = visiInfo.getDate().toPython()
115 diaSources = self.loadDiaSources(diaObjects,
116 pixelRanges,
117 dateTime,
118 apdb)
120 diaForcedSources = self.loadDiaForcedSources(diaObjects,
121 dateTime,
122 apdb)
124 return pipeBase.Struct(
125 diaObjects=diaObjects,
126 diaSources=diaSources,
127 diaForcedSources=diaForcedSources)
129 @pipeBase.timeMethod
130 def loadDiaObjects(self, pixelRanges, apdb):
131 """Load DiaObjects from the Apdb based on their HTM location.
133 Parameters
134 ----------
135 pixelRanges : `tuple` [`int`]
136 Ranges of pixel values that cover region of interest.
137 apdb : `lsst.dax.apdb.Apdb`
138 Database connection object to load from.
140 Returns
141 -------
142 diaObjects : `pandas.DataFrame`
143 DiaObjects loaded from the Apdb that are within the area defined
144 by ``pixelRanges``.
145 """
146 if len(pixelRanges) == 0:
147 # If no area is specified return an empty DataFrame with the
148 # the column used for indexing later in AssociationTask.
149 diaObjects = pd.DataFrame(columns=["diaObjectId"])
150 else:
151 diaObjects = apdb.getDiaObjects(pixelRanges, return_pandas=True)
153 diaObjects.set_index("diaObjectId", drop=False, inplace=True)
154 if diaObjects.index.has_duplicates:
155 self.log.warn(
156 "Duplicate DiaObjects loaded from the Apdb. This may cause "
157 "downstream pipeline issues. Dropping duplicated rows")
158 # Drop duplicates via index and keep the first appearance.
159 diaObjects = diaObjects.groupby(diaObjects.index).first()
161 return diaObjects.replace(to_replace=[None], value=np.nan)
163 @pipeBase.timeMethod
164 def loadDiaSources(self, diaObjects, pixelRanges, dateTime, apdb):
165 """Load DiaSources from the Apdb based on their diaObjectId or
166 pixelId location.
168 Variable used to load sources is set in config.
170 Parameters
171 ----------
172 diaObjects : `pandas.DataFrame`
173 DiaObjects loaded from the Apdb that are within the area defined
174 by ``pixelRanges``.
175 pixelRanges : `list` of `tuples`
176 Ranges of pixelIds that cover region of interest.
177 dateTime : `datetime.datetime`
178 Time of the current visit
179 apdb : `lsst.dax.apdb.Apdb`
180 Database connection object to load from.
182 Returns
183 -------
184 DiaSources : `pandas.DataFrame`
185 DiaSources loaded from the Apdb that are within the area defined
186 by ``pixelRange`` and associated with ``diaObjects``.
187 """
188 if self.config.loadDiaSourcesByPixelId:
189 if len(pixelRanges) == 0:
190 # If no area is specified return an empty DataFrame with the
191 # the column used for indexing later in AssociationTask.
192 diaSources = pd.DataFrame(columns=["diaObjectId",
193 "filterName",
194 "diaSourceId"])
195 else:
196 diaSources = apdb.getDiaSourcesInRegion(pixelRanges,
197 dateTime,
198 return_pandas=True)
199 else:
200 if len(diaObjects) == 0:
201 # If no diaObjects are available return an empty DataFrame with
202 # the the column used for indexing later in AssociationTask.
203 diaSources = pd.DataFrame(columns=["diaObjectId",
204 "filterName",
205 "diaSourceId"])
206 else:
207 diaSources = apdb.getDiaSources(
208 diaObjects.loc[:, "diaObjectId"],
209 dateTime,
210 return_pandas=True)
212 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"],
213 drop=False,
214 inplace=True)
215 if diaSources.index.has_duplicates:
216 self.log.warn(
217 "Duplicate DiaSources loaded from the Apdb. This may cause "
218 "downstream pipeline issues. Dropping duplicated rows")
219 # Drop duplicates via index and keep the first appearance. Reset
220 # due to the index shape being slight different thatn expected.
221 diaSources = diaSources.groupby(diaSources.index).first().reset_index(drop=True)
222 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"],
223 drop=False,
224 inplace=True)
226 return diaSources.replace(to_replace=[None], value=np.nan)
228 @pipeBase.timeMethod
229 def loadDiaForcedSources(self, diaObjects, dateTime, apdb):
230 """Load DiaObjects from the Apdb based on their HTM location.
232 Parameters
233 ----------
234 diaObjects : `pandas.DataFrame`
235 DiaObjects loaded from the Apdb.
236 dateTime : `datetime.datetime`
237 Time of the current visit
238 apdb : `lsst.dax.apdb.Apdb`
239 Database connection object to load from.
241 Returns
242 -------
243 diaObjects : `pandas.DataFrame`
244 DiaObjects loaded from the Apdb that are within the area defined
245 by ``pixelRanges``.
246 """
247 if len(diaObjects) == 0:
248 # If no diaObjects are available return an empty DataFrame with
249 # the the column used for indexing later in AssociationTask.
250 diaForcedSources = pd.DataFrame(columns=["diaObjectId",
251 "diaForcedSourceId"])
252 else:
253 diaForcedSources = apdb.getDiaForcedSources(
254 diaObjects.loc[:, "diaObjectId"],
255 dateTime,
256 return_pandas=True)
258 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"],
259 drop=False,
260 inplace=True)
261 if diaForcedSources.index.has_duplicates:
262 self.log.warn(
263 "Duplicate DiaForcedSources loaded from the Apdb. This may "
264 "cause downstream pipeline issues. Dropping duplicated rows.")
265 # Drop duplicates via index and keep the first appearance. Reset
266 # due to the index shape being slight different thatn expected.
267 diaForcedSources = diaForcedSources.groupby(diaForcedSources.index).first()
268 diaForcedSources.reset_index(drop=True, inplace=True)
269 diaForcedSources.set_index(["diaObjectId", "diaForcedSourceId"],
270 drop=False,
271 inplace=True)
273 return diaForcedSources.replace(to_replace=[None], value=np.nan)
275 @pipeBase.timeMethod
276 def _getPixelRanges(self, exposure):
277 """Calculate covering HTM pixels for the current exposure.
279 Parameters
280 ----------
281 exposure : `lsst.afw.image.Exposure`
282 Exposure object with calibrated WCS.
284 Returns
285 -------
286 htmRanges : `list` of `tuples`
287 A list of tuples containing `int` values.
288 """
289 bbox = geom.Box2D(exposure.getBBox())
290 bbox.grow(self.config.pixelMargin)
291 wcs = exposure.getWcs()
293 region = sphgeom.ConvexPolygon([wcs.pixelToSky(pp).getVector()
294 for pp in bbox.getCorners()])
296 indices = self.pixelator.envelope(region, self.config.htmMaxRanges)
298 return indices.ranges()