Coverage for python/lsst/pipe/tasks/simpleAssociation.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks.
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21#
23"""Simple association algorithm for DRP.
24Adapted from http://github.com/LSSTDESC/dia_pipe
25"""
27import numpy as np
28import pandas as pd
30import lsst.afw.table as afwTable
31import lsst.geom as geom
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34from lsst.obs.base import ExposureIdInfo
36from .associationUtils import query_disc, eq2xyz, toIndex
39class SimpleAssociationConfig(pexConfig.Config):
40 """Configuration parameters for the SimpleAssociationTask
41 """
42 tolerance = pexConfig.Field(
43 dtype=float,
44 doc='maximum distance to match sources together in arcsec',
45 default=0.5
46 )
47 nside = pexConfig.Field(
48 dtype=int,
49 doc='Healpix nside value used for indexing',
50 default=2**18,
51 )
54class SimpleAssociationTask(pipeBase.Task):
55 """Construct DiaObjects from a DataFrame of DIASources by spatially
56 associating the sources.
58 Represents a simple, brute force algorithm, 2-way matching of DiaSources
59 into. DiaObjects. Algorithm picks the nearest, first match within the
60 matching radius of a DiaObject to associate a source to for simplicity.
61 """
62 ConfigClass = SimpleAssociationConfig
63 _DefaultName = "simpleAssociation"
65 def run(self, diaSources, tractPatchId, skymapBits):
66 """Associate DiaSources into a collection of DiaObjects using a
67 brute force matching algorithm.
69 Reproducible is for the same input data is assured by ordering the
70 DiaSource data by ccdVisit ordering.
72 Parameters
73 ----------
74 diaSources : `pandas.DataFrame`
75 DiaSources grouped by CcdVisitId to spatially associate into
76 DiaObjects.
77 tractPatchId : `int`
78 Unique identifier for the tract patch.
79 skymapBits : `int`
80 Maximum number of bits used the ``tractPatchId`` integer
81 identifier.
83 Returns
84 -------
85 results : `lsst.pipe.base.Struct`
86 Results struct with attributes:
88 ``assocDiaSources``
89 Table of DiaSources with updated values for the DiaObjects
90 they are spatially associated to (`pandas.DataFrame`).
91 ``diaObjects``
92 Table of DiaObjects from matching DiaSources
93 (`pandas.DataFrame`).
95 """
96 # Sort by ccdVisit and diaSourceId to get a reproducible ordering for
97 # the association.
98 diaSources.reset_index(inplace=True)
99 diaSources.set_index(["ccdVisitId", "diaSourceId"], inplace=True)
101 # Empty lists to store matching and location data.
102 diaObjectCat = []
103 diaObjectCoords = []
104 healPixIndices = []
106 # Create Id factory and catalog for creating DiaObjectIds.
107 exposureIdInfo = ExposureIdInfo(tractPatchId, skymapBits)
108 idFactory = exposureIdInfo.makeSourceIdFactory()
109 idCat = afwTable.SourceCatalog(
110 afwTable.SourceTable.make(afwTable.SourceTable.makeMinimalSchema(),
111 idFactory))
113 for ccdVisit in diaSources.index.levels[0]:
114 # For the first ccdVisit, just copy the DiaSource info into the
115 # diaObject data to create the first set of Objects.
116 ccdVisitSources = diaSources.loc[ccdVisit]
117 if len(diaObjectCat) == 0:
118 for diaSourceId, diaSrc in ccdVisitSources.iterrows():
119 self.addNewDiaObject(diaSrc,
120 diaSources,
121 ccdVisit,
122 diaSourceId,
123 diaObjectCat,
124 idCat,
125 diaObjectCoords,
126 healPixIndices)
127 continue
128 # Temp list to store DiaObjects already used for this ccdVisit.
129 usedMatchIndicies = []
130 # Run over subsequent data.
131 for diaSourceId, diaSrc in ccdVisitSources.iterrows():
132 # Find matches.
133 matchResult = self.findMatches(diaSrc["ra"],
134 diaSrc["decl"],
135 2*self.config.tolerance,
136 healPixIndices,
137 diaObjectCat)
138 dists = matchResult.dists
139 matches = matchResult.matches
140 # Create a new DiaObject if no match found.
141 if dists is None:
142 self.addNewDiaObject(diaSrc,
143 diaSources,
144 ccdVisit,
145 diaSourceId,
146 diaObjectCat,
147 idCat,
148 diaObjectCoords,
149 healPixIndices)
150 continue
151 # If matched, update catalogs and arrays.
152 if np.min(dists) < np.deg2rad(self.config.tolerance/3600):
153 matchDistArg = np.argmin(dists)
154 matchIndex = matches[matchDistArg]
155 # Test to see if the DiaObject has been used.
156 if np.isin([matchIndex], usedMatchIndicies).sum() < 1:
157 self.updateCatalogs(matchIndex,
158 diaSrc,
159 diaSources,
160 ccdVisit,
161 diaSourceId,
162 diaObjectCat,
163 diaObjectCoords,
164 healPixIndices)
165 usedMatchIndicies.append(matchIndex)
166 # If the matched DiaObject has already been used, create a
167 # new DiaObject for this DiaSource.
168 else:
169 self.addNewDiaObject(diaSrc,
170 diaSources,
171 ccdVisit,
172 diaSourceId,
173 diaObjectCat,
174 idCat,
175 diaObjectCoords,
176 healPixIndices)
177 # Create new DiaObject if no match found within the matching
178 # tolerance.
179 else:
180 self.addNewDiaObject(diaSrc,
181 diaSources,
182 ccdVisit,
183 diaSourceId,
184 diaObjectCat,
185 idCat,
186 diaObjectCoords,
187 healPixIndices)
189 # Drop indices before returning associated diaSource catalog.
190 diaSources.reset_index(inplace=True)
191 diaSources.set_index("diaSourceId", inplace=True, verify_integrity=True)
193 diaObjects = pd.DataFrame(data=diaObjectCat)
194 diaSources.reset_index(inplace=True)
195 if "diaObjectId" in diaObjects.columns:
196 diaObjects.set_index("diaObjectId", inplace=True, verify_integrity=True)
198 return pipeBase.Struct(
199 assocDiaSources=diaSources,
200 diaObjects=diaObjects)
202 def addNewDiaObject(self,
203 diaSrc,
204 diaSources,
205 ccdVisit,
206 diaSourceId,
207 diaObjCat,
208 idCat,
209 diaObjCoords,
210 healPixIndices):
211 """Create a new DiaObject and append its data.
213 Parameters
214 ----------
215 diaSrc : `pandas.Series`
216 Full unassociated DiaSource to create a DiaObject from.
217 diaSources : `pandas.DataFrame`
218 DiaSource catalog to update information in. The catalog is
219 modified in place.
220 ccdVisit : `int`
221 Unique identifier of the ccdVisit where ``diaSrc`` was observed.
222 diaSourceId : `int`
223 Unique identifier of the DiaSource.
224 diaObjectCat : `list` of `dict`s
225 Catalog of diaObjects to append the new object o.
226 idCat : `lsst.afw.table.SourceCatalog`
227 Catalog with the IdFactory used to generate unique DiaObject
228 identifiers.
229 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s
230 Set of coordinates of DiaSource locations that make up the
231 DiaObject average coordinate.
232 healPixIndices : `list` of `int`s
233 HealPix indices representing the locations of each currently
234 existing DiaObject.
235 """
236 hpIndex = toIndex(self.config.nside,
237 diaSrc["ra"],
238 diaSrc["decl"])
239 healPixIndices.append(hpIndex)
241 sphPoint = geom.SpherePoint(diaSrc["ra"],
242 diaSrc["decl"],
243 geom.degrees)
244 diaObjCoords.append([sphPoint])
246 diaObjId = idCat.addNew().get("id")
247 diaObjCat.append(self.createDiaObject(diaObjId,
248 diaSrc["ra"],
249 diaSrc["decl"]))
250 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = diaObjId
252 def updateCatalogs(self,
253 matchIndex,
254 diaSrc,
255 diaSources,
256 ccdVisit,
257 diaSourceId,
258 diaObjCat,
259 diaObjCoords,
260 healPixIndices):
261 """Update DiaObject and DiaSource values after an association.
263 Parameters
264 ----------
265 matchIndex : `int`
266 Array index location of the DiaObject that ``diaSrc`` was
267 associated to.
268 diaSrc : `pandas.Series`
269 Full unassociated DiaSource to create a DiaObject from.
270 diaSources : `pandas.DataFrame`
271 DiaSource catalog to update information in. The catalog is
272 modified in place.
273 ccdVisit : `int`
274 Unique identifier of the ccdVisit where ``diaSrc`` was observed.
275 diaSourceId : `int`
276 Unique identifier of the DiaSource.
277 diaObjectCat : `list` of `dict`s
278 Catalog of diaObjects to append the new object o.
279 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s
280 Set of coordinates of DiaSource locations that make up the
281 DiaObject average coordinate.
282 healPixIndices : `list` of `int`s
283 HealPix indices representing the locations of each currently
284 existing DiaObject.
285 """
286 # Update location and healPix index.
287 sphPoint = geom.SpherePoint(diaSrc["ra"],
288 diaSrc["decl"],
289 geom.degrees)
290 diaObjCoords[matchIndex].append(sphPoint)
291 aveCoord = geom.averageSpherePoint(diaObjCoords[matchIndex])
292 diaObjCat[matchIndex]["ra"] = aveCoord.getRa().asDegrees()
293 diaObjCat[matchIndex]["decl"] = aveCoord.getDec().asDegrees()
294 nSources = diaObjCat[matchIndex]["nDiaSources"]
295 diaObjCat[matchIndex]["nDiaSources"] = nSources + 1
296 healPixIndices[matchIndex] = toIndex(self.config.nside,
297 diaObjCat[matchIndex]["ra"],
298 diaObjCat[matchIndex]["decl"])
299 # Update DiaObject Id that this source is now associated to.
300 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = \
301 diaObjCat[matchIndex]["diaObjectId"]
303 def findMatches(self, src_ra, src_dec, tol, hpIndices, diaObjs):
304 """Search healPixels around DiaSource locations for DiaObjects.
306 Parameters
307 ----------
308 src_ra : `float`
309 DiaSource RA location.
310 src_dec : `float`
311 DiaSource Dec location.
312 tol : `float`
313 Size of annulus to convert to covering healPixels and search for
314 DiaObjects.
315 hpIndices : `list` of `int`s
316 List of heal pix indices containing the DiaObjects in ``diaObjs``.
317 diaObjs : `list` of `dict`s
318 Catalog diaObjects to with full location information for comparing
319 to DiaSources.
321 Returns
322 -------
323 results : `lsst.pipe.base.Struct`
324 Results struct containing
326 ``dists``
327 Array of distances between the current DiaSource diaObjects.
328 (`numpy.ndarray` or `None`)
329 ``matches``
330 Array of array indices of diaObjects this DiaSource matches to.
331 (`numpy.ndarray` or `None`)
332 """
333 match_indices = query_disc(self.config.nside,
334 src_ra,
335 src_dec,
336 np.deg2rad(tol/3600.))
337 matchIndices = np.argwhere(np.isin(hpIndices, match_indices)).flatten()
339 if len(matchIndices) < 1:
340 return pipeBase.Struct(dists=None, matches=None)
342 dists = np.array(
343 [np.sqrt(np.sum((eq2xyz(src_ra, src_dec)
344 - eq2xyz(diaObjs[match]["ra"],
345 diaObjs[match]["decl"]))**2))
346 for match in matchIndices])
347 return pipeBase.Struct(
348 dists=dists,
349 matches=matchIndices)
351 def createDiaObject(self, objId, ra, decl):
352 """Create a simple empty DiaObject with location and id information.
354 Parameters
355 ----------
356 objId : `int`
357 Unique ID for this new DiaObject.
358 ra : `float`
359 RA location of this DiaObject.
360 decl : `float`
361 Dec location of this DiaObject
363 Returns
364 -------
365 DiaObject : `dict`
366 Dictionary of values representing a DiaObject.
367 """
368 new_dia_object = {"diaObjectId": objId,
369 "ra": ra,
370 "decl": decl,
371 "nDiaSources": 1}
372 return new_dia_object