Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
36from lsst.pipe.tasks.parquetTable import ParquetTable
37from lsst.utils import getPackageDir
40class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
41 dimensions=("instrument", "visit", "detector"),
42 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
43 """Butler connections for TransformDiaSourceCatalogTask.
44 """
45 diaSourceSchema = connTypes.InitInput(
46 doc="Schema for DIASource catalog output by ImageDifference.",
47 storageClass="SourceCatalog",
48 name="{fakesType}{coaddName}Diff_diaSrc_schema",
49 )
50 diaSourceCat = connTypes.Input(
51 doc="Catalog of DiaSources produced during image differencing.",
52 name="{fakesType}{coaddName}Diff_diaSrc",
53 storageClass="SourceCatalog",
54 dimensions=("instrument", "visit", "detector"),
55 )
56 diffIm = connTypes.Input(
57 doc="Difference image on which the DiaSources were detected.",
58 name="{fakesType}{coaddName}Diff_differenceExp",
59 storageClass="ExposureF",
60 dimensions=("instrument", "visit", "detector"),
61 )
62 diaSourceTable = connTypes.Output(
63 doc=".",
64 name="{fakesType}{coaddName}Diff_diaSrcTable",
65 storageClass="DataFrame",
66 dimensions=("instrument", "visit", "detector"),
67 )
70class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
71 pipelineConnections=TransformDiaSourceCatalogConnections):
72 """
73 """
74 flagMap = pexConfig.Field(
75 dtype=str,
76 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
77 default=os.path.join(getPackageDir("ap_association"),
78 "data",
79 "association-flag-map.yaml"),
80 )
81 functorFile = pexConfig.Field(
82 dtype=str,
83 doc='Path to YAML file specifying Science DataModel functors to use '
84 'when copying columns and computing calibrated values.',
85 default=os.path.join(getPackageDir("ap_association"),
86 "data",
87 "DiaSource.yaml")
88 )
91class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
92 """Apply Science DataModel-ification on the DiaSource afw table.
94 This task calibrates and renames columns in the DiaSource catalog
95 to ready the catalog for insertion into the Apdb.
97 This is a Gen3 Butler only task. It will not run in Gen2.
98 """
100 ConfigClass = TransformDiaSourceCatalogConfig
101 _DefaultName = "transformDiaSourceCatalog"
102 RunnerClass = pipeBase.ButlerInitializedTaskRunner
104 def __init__(self, initInputs, **kwargs):
105 super().__init__(**kwargs)
106 self.funcs = self.getFunctors()
107 self.inputSchema = initInputs['diaSourceSchema'].schema
108 self._create_bit_pack_mappings()
110 def _create_bit_pack_mappings(self):
111 """Setup all flag bit packings.
112 """
113 self.bit_pack_columns = []
114 with open(self.config.flagMap) as yaml_stream:
115 table_list = list(yaml.safe_load_all(yaml_stream))
116 for table in table_list:
117 if table['tableName'] == 'DiaSource':
118 self.bit_pack_columns = table['columns']
119 break
121 # Test that all flags requested are present in the input schemas.
122 # Output schemas are flexible, however if names are not specified in
123 # the Apdb schema, flag columns will not be persisted.
124 for outputFlag in self.bit_pack_columns:
125 bitList = outputFlag['bitList']
126 for bit in bitList:
127 try:
128 self.inputSchema.find(bit['name'])
129 except KeyError:
130 raise KeyError(
131 "Requested column %s not found in input DiaSource "
132 "schema. Please check that the requested input "
133 "column exists." % bit['name'])
135 def runQuantum(self, butlerQC, inputRefs, outputRefs):
136 inputs = butlerQC.get(inputRefs)
137 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
138 returnMaxBits=True)
139 inputs["ccdVisitId"] = expId
140 inputs["band"] = butlerQC.quantum.dataId["band"]
142 outputs = self.run(**inputs)
144 butlerQC.put(outputs, outputRefs)
146 def run(self,
147 diaSourceCat,
148 diffIm,
149 band,
150 ccdVisitId,
151 funcs=None):
152 """Convert input catalog to ParquetTable/Pandas and run functors.
154 Additionally, add new columns for stripping information from the
155 exposure and into the DiaSource catalog.
157 Parameters
158 ----------
160 Returns
161 -------
162 results : `lsst.pipe.base.Struct`
163 Results struct with components.
165 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
166 and renamed columns.
167 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
168 """
169 self.log.info(
170 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
171 ccdVisitId)
173 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
174 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
175 diaSourceDf["ccdVisitId"] = ccdVisitId
176 diaSourceDf["filterName"] = band
177 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
178 diaSourceDf["diaObjectId"] = 0
179 diaSourceDf["pixelId"] = 0
180 self.bitPackFlags(diaSourceDf)
182 df = self.transform(band,
183 ParquetTable(dataFrame=diaSourceDf),
184 self.funcs,
185 dataId=None).df
186 # The Ra/DecColumn functors preserve the coord_ra/dec original columns.
187 # Since we don't need these and keeping them causes a DB insert crash
188 # we drop them from the DataFrame before returning output catalog.
189 return pipeBase.Struct(
190 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]),
191 )
193 def computeBBoxSizes(self, inputCatalog):
194 """Compute the size of a square bbox that fully contains the detection
195 footprint.
197 Parameters
198 ----------
199 inputCatalog : `lsst.afw.table.SourceCatalog`
200 Catalog containing detected footprints.
202 Returns
203 -------
204 outputBBoxSizes : `numpy.ndarray`, (N,)
205 Array of bbox sizes.
206 """
207 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int)
208 for idx, record in enumerate(inputCatalog):
209 footprintBBox = record.getFootprint().getBBox()
210 # Compute twice the size of the largest dimension of the footprint
211 # bounding box. This is the largest footprint we should need to cover
212 # the complete DiaSource assuming the centroid is withing the bounding
213 # box.
214 maxSize = 2 * np.max([footprintBBox.getWidth(),
215 footprintBBox.getHeight()])
216 recX = record.getCentroid().x
217 recY = record.getCentroid().y
218 bboxSize = int(
219 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
220 footprintBBox.minX - recX,
221 footprintBBox.maxY - recY,
222 footprintBBox.minY - recY]))))
223 if bboxSize > maxSize:
224 bboxSize = maxSize
225 outputBBoxSizes[idx] = bboxSize
227 return outputBBoxSizes
229 def bitPackFlags(self, df):
230 """Pack requested flag columns in inputRecord into single columns in
231 outputRecord.
233 Parameters
234 ----------
235 df : `pandas.DataFrame`
236 DataFrame to read bits from and pack them into.
237 """
238 for outputFlag in self.bit_pack_columns:
239 bitList = outputFlag['bitList']
240 value = np.zeros(len(df), dtype=np.uint64)
241 for bit in bitList:
242 # Hard type the bit arrays.
243 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
244 df[outputFlag['columnName']] = value
247class UnpackApdbFlags:
248 """Class for unpacking bits from integer flag fields stored in the Apdb.
250 Attributes
251 ----------
252 flag_map_file : `str`
253 Absolute or relative path to a yaml file specifiying mappings of flags
254 to integer bits.
255 table_name : `str`
256 Name of the Apdb table the integer bit data are coming from.
257 """
259 def __init__(self, flag_map_file, table_name):
260 self.bit_pack_columns = []
261 with open(flag_map_file) as yaml_stream:
262 table_list = list(yaml.safe_load_all(yaml_stream))
263 for table in table_list:
264 if table['tableName'] == table_name:
265 self.bit_pack_columns = table['columns']
266 break
268 self.output_flag_columns = {}
270 for column in self.bit_pack_columns:
271 names = []
272 for bit in column["bitList"]:
273 names.append((bit["name"], bool))
274 self.output_flag_columns[column["columnName"]] = names
276 def unpack(self, input_flag_values, flag_name):
277 """Determine individual boolean flags from an input array of unsigned
278 ints.
280 Parameters
281 ----------
282 input_flag_values : array-like of type uint
283 Array of integer flags to unpack.
284 flag_name : `str`
285 Apdb column name of integer flags to unpack. Names of packed int
286 flags are given by the flag_map_file.
288 Returns
289 -------
290 output_flags : `numpy.ndarray`
291 Numpy named tuple of booleans.
292 """
293 bit_names_types = self.output_flag_columns[flag_name]
294 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
296 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
297 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
298 output_flags[bit_name] = masked_bits
300 return output_flags