Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
36from lsst.pipe.tasks.parquetTable import ParquetTable
39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
40 dimensions=("instrument", "visit", "detector"),
41 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
42 """Butler connections for TransformDiaSourceCatalogTask.
43 """
44 diaSourceSchema = connTypes.InitInput(
45 doc="Schema for DIASource catalog output by ImageDifference.",
46 storageClass="SourceCatalog",
47 name="{fakesType}{coaddName}Diff_diaSrc_schema",
48 )
49 diaSourceCat = connTypes.Input(
50 doc="Catalog of DiaSources produced during image differencing.",
51 name="{fakesType}{coaddName}Diff_diaSrc",
52 storageClass="SourceCatalog",
53 dimensions=("instrument", "visit", "detector"),
54 )
55 diffIm = connTypes.Input(
56 doc="Difference image on which the DiaSources were detected.",
57 name="{fakesType}{coaddName}Diff_differenceExp",
58 storageClass="ExposureF",
59 dimensions=("instrument", "visit", "detector"),
60 )
61 diaSourceTable = connTypes.Output(
62 doc=".",
63 name="{fakesType}{coaddName}Diff_diaSrcTable",
64 storageClass="DataFrame",
65 dimensions=("instrument", "visit", "detector"),
66 )
69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
70 pipelineConnections=TransformDiaSourceCatalogConnections):
71 """
72 """
73 flagMap = pexConfig.Field(
74 dtype=str,
75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
76 default=os.path.join("${AP_ASSOCIATION_DIR}",
77 "data",
78 "association-flag-map.yaml"),
79 )
80 functorFile = pexConfig.Field(
81 dtype=str,
82 doc='Path to YAML file specifying Science DataModel functors to use '
83 'when copying columns and computing calibrated values.',
84 default=os.path.join("${AP_ASSOCIATION_DIR}",
85 "data",
86 "DiaSource.yaml")
87 )
88 doRemoveSkySources = pexConfig.Field(
89 dtype=bool,
90 default=False,
91 doc="Input DiaSource catalog contains SkySources that should be "
92 "removed before storing the output DiaSource catalog."
93 )
96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
97 """Apply Science DataModel-ification on the DiaSource afw table.
99 This task calibrates and renames columns in the DiaSource catalog
100 to ready the catalog for insertion into the Apdb.
102 This is a Gen3 Butler only task. It will not run in Gen2.
103 """
105 ConfigClass = TransformDiaSourceCatalogConfig
106 _DefaultName = "transformDiaSourceCatalog"
107 RunnerClass = pipeBase.ButlerInitializedTaskRunner
109 def __init__(self, initInputs, **kwargs):
110 super().__init__(**kwargs)
111 self.funcs = self.getFunctors()
112 self.inputSchema = initInputs['diaSourceSchema'].schema
113 self._create_bit_pack_mappings()
115 def _create_bit_pack_mappings(self):
116 """Setup all flag bit packings.
117 """
118 self.bit_pack_columns = []
119 flag_map_file = os.path.expandvars(self.config.flagMap)
120 with open(flag_map_file) as yaml_stream:
121 table_list = list(yaml.safe_load_all(yaml_stream))
122 for table in table_list:
123 if table['tableName'] == 'DiaSource':
124 self.bit_pack_columns = table['columns']
125 break
127 # Test that all flags requested are present in the input schemas.
128 # Output schemas are flexible, however if names are not specified in
129 # the Apdb schema, flag columns will not be persisted.
130 for outputFlag in self.bit_pack_columns:
131 bitList = outputFlag['bitList']
132 for bit in bitList:
133 try:
134 self.inputSchema.find(bit['name'])
135 except KeyError:
136 raise KeyError(
137 "Requested column %s not found in input DiaSource "
138 "schema. Please check that the requested input "
139 "column exists." % bit['name'])
141 def runQuantum(self, butlerQC, inputRefs, outputRefs):
142 inputs = butlerQC.get(inputRefs)
143 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
144 returnMaxBits=True)
145 inputs["ccdVisitId"] = expId
146 inputs["band"] = butlerQC.quantum.dataId["band"]
148 outputs = self.run(**inputs)
150 butlerQC.put(outputs, outputRefs)
152 @pipeBase.timeMethod
153 def run(self,
154 diaSourceCat,
155 diffIm,
156 band,
157 ccdVisitId,
158 funcs=None):
159 """Convert input catalog to ParquetTable/Pandas and run functors.
161 Additionally, add new columns for stripping information from the
162 exposure and into the DiaSource catalog.
164 Parameters
165 ----------
167 Returns
168 -------
169 results : `lsst.pipe.base.Struct`
170 Results struct with components.
172 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
173 and renamed columns.
174 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
175 """
176 self.log.info(
177 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
178 ccdVisitId)
180 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
181 if self.config.doRemoveSkySources:
182 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]
183 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
184 diaSourceDf["ccdVisitId"] = ccdVisitId
185 diaSourceDf["filterName"] = band
186 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
187 diaSourceDf["diaObjectId"] = 0
188 diaSourceDf["pixelId"] = 0
189 self.bitPackFlags(diaSourceDf)
191 df = self.transform(band,
192 ParquetTable(dataFrame=diaSourceDf),
193 self.funcs,
194 dataId=None).df
195 # The Ra/DecColumn functors preserve the coord_ra/dec original columns.
196 # Since we don't need these and keeping them causes a DB insert crash
197 # we drop them from the DataFrame before returning output catalog.
198 return pipeBase.Struct(
199 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]),
200 )
202 def computeBBoxSizes(self, inputCatalog):
203 """Compute the size of a square bbox that fully contains the detection
204 footprint.
206 Parameters
207 ----------
208 inputCatalog : `lsst.afw.table.SourceCatalog`
209 Catalog containing detected footprints.
211 Returns
212 -------
213 outputBBoxSizes : `list` of `float`
214 Array of bbox sizes.
215 """
216 outputBBoxSizes = []
217 for record in inputCatalog:
218 if self.config.doRemoveSkySources:
219 if record["sky_source"]:
220 continue
221 footprintBBox = record.getFootprint().getBBox()
222 # Compute twice the size of the largest dimension of the footprint
223 # bounding box. This is the largest footprint we should need to cover
224 # the complete DiaSource assuming the centroid is withing the bounding
225 # box.
226 maxSize = 2 * np.max([footprintBBox.getWidth(),
227 footprintBBox.getHeight()])
228 recX = record.getCentroid().x
229 recY = record.getCentroid().y
230 bboxSize = int(
231 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
232 footprintBBox.minX - recX,
233 footprintBBox.maxY - recY,
234 footprintBBox.minY - recY]))))
235 if bboxSize > maxSize:
236 bboxSize = maxSize
237 outputBBoxSizes.append(bboxSize)
239 return outputBBoxSizes
241 def bitPackFlags(self, df):
242 """Pack requested flag columns in inputRecord into single columns in
243 outputRecord.
245 Parameters
246 ----------
247 df : `pandas.DataFrame`
248 DataFrame to read bits from and pack them into.
249 """
250 for outputFlag in self.bit_pack_columns:
251 bitList = outputFlag['bitList']
252 value = np.zeros(len(df), dtype=np.uint64)
253 for bit in bitList:
254 # Hard type the bit arrays.
255 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
256 df[outputFlag['columnName']] = value
259class UnpackApdbFlags:
260 """Class for unpacking bits from integer flag fields stored in the Apdb.
262 Attributes
263 ----------
264 flag_map_file : `str`
265 Absolute or relative path to a yaml file specifiying mappings of flags
266 to integer bits.
267 table_name : `str`
268 Name of the Apdb table the integer bit data are coming from.
269 """
271 def __init__(self, flag_map_file, table_name):
272 self.bit_pack_columns = []
273 with open(flag_map_file) as yaml_stream:
274 table_list = list(yaml.safe_load_all(yaml_stream))
275 for table in table_list:
276 if table['tableName'] == table_name:
277 self.bit_pack_columns = table['columns']
278 break
280 self.output_flag_columns = {}
282 for column in self.bit_pack_columns:
283 names = []
284 for bit in column["bitList"]:
285 names.append((bit["name"], bool))
286 self.output_flag_columns[column["columnName"]] = names
288 def unpack(self, input_flag_values, flag_name):
289 """Determine individual boolean flags from an input array of unsigned
290 ints.
292 Parameters
293 ----------
294 input_flag_values : array-like of type uint
295 Array of integer flags to unpack.
296 flag_name : `str`
297 Apdb column name of integer flags to unpack. Names of packed int
298 flags are given by the flag_map_file.
300 Returns
301 -------
302 output_flags : `numpy.ndarray`
303 Numpy named tuple of booleans.
304 """
305 bit_names_types = self.output_flag_columns[flag_name]
306 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
308 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
309 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
310 output_flags[bit_name] = masked_bits
312 return output_flags