Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
36from lsst.pipe.tasks.parquetTable import ParquetTable
39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
40 dimensions=("instrument", "visit", "detector"),
41 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
42 """Butler connections for TransformDiaSourceCatalogTask.
43 """
44 diaSourceSchema = connTypes.InitInput(
45 doc="Schema for DIASource catalog output by ImageDifference.",
46 storageClass="SourceCatalog",
47 name="{fakesType}{coaddName}Diff_diaSrc_schema",
48 )
49 diaSourceCat = connTypes.Input(
50 doc="Catalog of DiaSources produced during image differencing.",
51 name="{fakesType}{coaddName}Diff_diaSrc",
52 storageClass="SourceCatalog",
53 dimensions=("instrument", "visit", "detector"),
54 )
55 diffIm = connTypes.Input(
56 doc="Difference image on which the DiaSources were detected.",
57 name="{fakesType}{coaddName}Diff_differenceExp",
58 storageClass="ExposureF",
59 dimensions=("instrument", "visit", "detector"),
60 )
61 diaSourceTable = connTypes.Output(
62 doc=".",
63 name="{fakesType}{coaddName}Diff_diaSrcTable",
64 storageClass="DataFrame",
65 dimensions=("instrument", "visit", "detector"),
66 )
69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
70 pipelineConnections=TransformDiaSourceCatalogConnections):
71 """
72 """
73 flagMap = pexConfig.Field(
74 dtype=str,
75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
76 default=os.path.join("${AP_ASSOCIATION_DIR}",
77 "data",
78 "association-flag-map.yaml"),
79 )
80 functorFile = pexConfig.Field(
81 dtype=str,
82 doc='Path to YAML file specifying Science DataModel functors to use '
83 'when copying columns and computing calibrated values.',
84 default=os.path.join("${AP_ASSOCIATION_DIR}",
85 "data",
86 "DiaSource.yaml")
87 )
88 doRemoveSkySources = pexConfig.Field(
89 dtype=bool,
90 default=False,
91 doc="Input DiaSource catalog contains SkySources that should be "
92 "removed before storing the output DiaSource catalog."
93 )
96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
97 """Apply Science DataModel-ification on the DiaSource afw table.
99 This task calibrates and renames columns in the DiaSource catalog
100 to ready the catalog for insertion into the Apdb.
102 This is a Gen3 Butler only task. It will not run in Gen2.
103 """
105 ConfigClass = TransformDiaSourceCatalogConfig
106 _DefaultName = "transformDiaSourceCatalog"
107 RunnerClass = pipeBase.ButlerInitializedTaskRunner
108 # Needed to create a valid TransformCatalogBaseTask, but unused
109 inputDataset = "deepDiff_diaSrc"
110 outputDataset = "deepDiff_diaSrcTable"
112 def __init__(self, initInputs, **kwargs):
113 super().__init__(**kwargs)
114 self.funcs = self.getFunctors()
115 self.inputSchema = initInputs['diaSourceSchema'].schema
116 self._create_bit_pack_mappings()
118 def _create_bit_pack_mappings(self):
119 """Setup all flag bit packings.
120 """
121 self.bit_pack_columns = []
122 flag_map_file = os.path.expandvars(self.config.flagMap)
123 with open(flag_map_file) as yaml_stream:
124 table_list = list(yaml.safe_load_all(yaml_stream))
125 for table in table_list:
126 if table['tableName'] == 'DiaSource':
127 self.bit_pack_columns = table['columns']
128 break
130 # Test that all flags requested are present in the input schemas.
131 # Output schemas are flexible, however if names are not specified in
132 # the Apdb schema, flag columns will not be persisted.
133 for outputFlag in self.bit_pack_columns:
134 bitList = outputFlag['bitList']
135 for bit in bitList:
136 try:
137 self.inputSchema.find(bit['name'])
138 except KeyError:
139 raise KeyError(
140 "Requested column %s not found in input DiaSource "
141 "schema. Please check that the requested input "
142 "column exists." % bit['name'])
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
146 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
147 returnMaxBits=True)
148 inputs["ccdVisitId"] = expId
149 inputs["band"] = butlerQC.quantum.dataId["band"]
151 outputs = self.run(**inputs)
153 butlerQC.put(outputs, outputRefs)
155 @pipeBase.timeMethod
156 def run(self,
157 diaSourceCat,
158 diffIm,
159 band,
160 ccdVisitId,
161 funcs=None):
162 """Convert input catalog to ParquetTable/Pandas and run functors.
164 Additionally, add new columns for stripping information from the
165 exposure and into the DiaSource catalog.
167 Parameters
168 ----------
170 Returns
171 -------
172 results : `lsst.pipe.base.Struct`
173 Results struct with components.
175 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
176 and renamed columns.
177 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
178 """
179 self.log.info(
180 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
181 ccdVisitId)
183 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
184 if self.config.doRemoveSkySources:
185 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]
186 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
187 diaSourceDf["ccdVisitId"] = ccdVisitId
188 diaSourceDf["filterName"] = band
189 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
190 diaSourceDf["diaObjectId"] = 0
191 diaSourceDf["pixelId"] = 0
192 self.bitPackFlags(diaSourceDf)
194 df = self.transform(band,
195 ParquetTable(dataFrame=diaSourceDf),
196 self.funcs,
197 dataId=None).df
198 # The Ra/DecColumn functors preserve the coord_ra/dec original columns.
199 # Since we don't need these and keeping them causes a DB insert crash
200 # we drop them from the DataFrame before returning output catalog.
201 return pipeBase.Struct(
202 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]),
203 )
205 def computeBBoxSizes(self, inputCatalog):
206 """Compute the size of a square bbox that fully contains the detection
207 footprint.
209 Parameters
210 ----------
211 inputCatalog : `lsst.afw.table.SourceCatalog`
212 Catalog containing detected footprints.
214 Returns
215 -------
216 outputBBoxSizes : `list` of `float`
217 Array of bbox sizes.
218 """
219 outputBBoxSizes = []
220 for record in inputCatalog:
221 if self.config.doRemoveSkySources:
222 if record["sky_source"]:
223 continue
224 footprintBBox = record.getFootprint().getBBox()
225 # Compute twice the size of the largest dimension of the footprint
226 # bounding box. This is the largest footprint we should need to cover
227 # the complete DiaSource assuming the centroid is withing the bounding
228 # box.
229 maxSize = 2 * np.max([footprintBBox.getWidth(),
230 footprintBBox.getHeight()])
231 recX = record.getCentroid().x
232 recY = record.getCentroid().y
233 bboxSize = int(
234 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
235 footprintBBox.minX - recX,
236 footprintBBox.maxY - recY,
237 footprintBBox.minY - recY]))))
238 if bboxSize > maxSize:
239 bboxSize = maxSize
240 outputBBoxSizes.append(bboxSize)
242 return outputBBoxSizes
244 def bitPackFlags(self, df):
245 """Pack requested flag columns in inputRecord into single columns in
246 outputRecord.
248 Parameters
249 ----------
250 df : `pandas.DataFrame`
251 DataFrame to read bits from and pack them into.
252 """
253 for outputFlag in self.bit_pack_columns:
254 bitList = outputFlag['bitList']
255 value = np.zeros(len(df), dtype=np.uint64)
256 for bit in bitList:
257 # Hard type the bit arrays.
258 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
259 df[outputFlag['columnName']] = value
262class UnpackApdbFlags:
263 """Class for unpacking bits from integer flag fields stored in the Apdb.
265 Attributes
266 ----------
267 flag_map_file : `str`
268 Absolute or relative path to a yaml file specifiying mappings of flags
269 to integer bits.
270 table_name : `str`
271 Name of the Apdb table the integer bit data are coming from.
272 """
274 def __init__(self, flag_map_file, table_name):
275 self.bit_pack_columns = []
276 with open(flag_map_file) as yaml_stream:
277 table_list = list(yaml.safe_load_all(yaml_stream))
278 for table in table_list:
279 if table['tableName'] == table_name:
280 self.bit_pack_columns = table['columns']
281 break
283 self.output_flag_columns = {}
285 for column in self.bit_pack_columns:
286 names = []
287 for bit in column["bitList"]:
288 names.append((bit["name"], bool))
289 self.output_flag_columns[column["columnName"]] = names
291 def unpack(self, input_flag_values, flag_name):
292 """Determine individual boolean flags from an input array of unsigned
293 ints.
295 Parameters
296 ----------
297 input_flag_values : array-like of type uint
298 Array of integer flags to unpack.
299 flag_name : `str`
300 Apdb column name of integer flags to unpack. Names of packed int
301 flags are given by the flag_map_file.
303 Returns
304 -------
305 output_flags : `numpy.ndarray`
306 Numpy named tuple of booleans.
307 """
308 bit_names_types = self.output_flag_columns[flag_name]
309 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
311 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
312 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
313 output_flags[bit_name] = masked_bits
315 return output_flags