Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
36from lsst.pipe.tasks.parquetTable import ParquetTable
39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
40 dimensions=("instrument", "visit", "detector"),
41 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
42 """Butler connections for TransformDiaSourceCatalogTask.
43 """
44 diaSourceSchema = connTypes.InitInput(
45 doc="Schema for DIASource catalog output by ImageDifference.",
46 storageClass="SourceCatalog",
47 name="{fakesType}{coaddName}Diff_diaSrc_schema",
48 )
49 diaSourceCat = connTypes.Input(
50 doc="Catalog of DiaSources produced during image differencing.",
51 name="{fakesType}{coaddName}Diff_diaSrc",
52 storageClass="SourceCatalog",
53 dimensions=("instrument", "visit", "detector"),
54 )
55 diffIm = connTypes.Input(
56 doc="Difference image on which the DiaSources were detected.",
57 name="{fakesType}{coaddName}Diff_differenceExp",
58 storageClass="ExposureF",
59 dimensions=("instrument", "visit", "detector"),
60 )
61 diaSourceTable = connTypes.Output(
62 doc=".",
63 name="{fakesType}{coaddName}Diff_diaSrcTable",
64 storageClass="DataFrame",
65 dimensions=("instrument", "visit", "detector"),
66 )
69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
70 pipelineConnections=TransformDiaSourceCatalogConnections):
71 """
72 """
73 flagMap = pexConfig.Field(
74 dtype=str,
75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
76 default=os.path.join("${AP_ASSOCIATION_DIR}",
77 "data",
78 "association-flag-map.yaml"),
79 )
80 functorFile = pexConfig.Field(
81 dtype=str,
82 doc='Path to YAML file specifying Science DataModel functors to use '
83 'when copying columns and computing calibrated values.',
84 default=os.path.join("${AP_ASSOCIATION_DIR}",
85 "data",
86 "DiaSource.yaml")
87 )
90class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
91 """Apply Science DataModel-ification on the DiaSource afw table.
93 This task calibrates and renames columns in the DiaSource catalog
94 to ready the catalog for insertion into the Apdb.
96 This is a Gen3 Butler only task. It will not run in Gen2.
97 """
99 ConfigClass = TransformDiaSourceCatalogConfig
100 _DefaultName = "transformDiaSourceCatalog"
101 RunnerClass = pipeBase.ButlerInitializedTaskRunner
103 def __init__(self, initInputs, **kwargs):
104 super().__init__(**kwargs)
105 self.funcs = self.getFunctors()
106 self.inputSchema = initInputs['diaSourceSchema'].schema
107 self._create_bit_pack_mappings()
109 def _create_bit_pack_mappings(self):
110 """Setup all flag bit packings.
111 """
112 self.bit_pack_columns = []
113 flag_map_file = os.path.expandvars(self.config.flagMap)
114 with open(flag_map_file) as yaml_stream:
115 table_list = list(yaml.safe_load_all(yaml_stream))
116 for table in table_list:
117 if table['tableName'] == 'DiaSource':
118 self.bit_pack_columns = table['columns']
119 break
121 # Test that all flags requested are present in the input schemas.
122 # Output schemas are flexible, however if names are not specified in
123 # the Apdb schema, flag columns will not be persisted.
124 for outputFlag in self.bit_pack_columns:
125 bitList = outputFlag['bitList']
126 for bit in bitList:
127 try:
128 self.inputSchema.find(bit['name'])
129 except KeyError:
130 raise KeyError(
131 "Requested column %s not found in input DiaSource "
132 "schema. Please check that the requested input "
133 "column exists." % bit['name'])
135 def runQuantum(self, butlerQC, inputRefs, outputRefs):
136 inputs = butlerQC.get(inputRefs)
137 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
138 returnMaxBits=True)
139 inputs["ccdVisitId"] = expId
140 inputs["band"] = butlerQC.quantum.dataId["band"]
142 outputs = self.run(**inputs)
144 butlerQC.put(outputs, outputRefs)
146 @pipeBase.timeMethod
147 def run(self,
148 diaSourceCat,
149 diffIm,
150 band,
151 ccdVisitId,
152 funcs=None):
153 """Convert input catalog to ParquetTable/Pandas and run functors.
155 Additionally, add new columns for stripping information from the
156 exposure and into the DiaSource catalog.
158 Parameters
159 ----------
161 Returns
162 -------
163 results : `lsst.pipe.base.Struct`
164 Results struct with components.
166 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
167 and renamed columns.
168 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
169 """
170 self.log.info(
171 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
172 ccdVisitId)
174 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
175 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
176 diaSourceDf["ccdVisitId"] = ccdVisitId
177 diaSourceDf["filterName"] = band
178 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
179 diaSourceDf["diaObjectId"] = 0
180 diaSourceDf["pixelId"] = 0
181 self.bitPackFlags(diaSourceDf)
183 df = self.transform(band,
184 ParquetTable(dataFrame=diaSourceDf),
185 self.funcs,
186 dataId=None).df
187 # The Ra/DecColumn functors preserve the coord_ra/dec original columns.
188 # Since we don't need these and keeping them causes a DB insert crash
189 # we drop them from the DataFrame before returning output catalog.
190 return pipeBase.Struct(
191 diaSourceTable=df.drop(columns=["coord_ra", "coord_dec"]),
192 )
194 def computeBBoxSizes(self, inputCatalog):
195 """Compute the size of a square bbox that fully contains the detection
196 footprint.
198 Parameters
199 ----------
200 inputCatalog : `lsst.afw.table.SourceCatalog`
201 Catalog containing detected footprints.
203 Returns
204 -------
205 outputBBoxSizes : `numpy.ndarray`, (N,)
206 Array of bbox sizes.
207 """
208 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int)
209 for idx, record in enumerate(inputCatalog):
210 footprintBBox = record.getFootprint().getBBox()
211 # Compute twice the size of the largest dimension of the footprint
212 # bounding box. This is the largest footprint we should need to cover
213 # the complete DiaSource assuming the centroid is withing the bounding
214 # box.
215 maxSize = 2 * np.max([footprintBBox.getWidth(),
216 footprintBBox.getHeight()])
217 recX = record.getCentroid().x
218 recY = record.getCentroid().y
219 bboxSize = int(
220 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
221 footprintBBox.minX - recX,
222 footprintBBox.maxY - recY,
223 footprintBBox.minY - recY]))))
224 if bboxSize > maxSize:
225 bboxSize = maxSize
226 outputBBoxSizes[idx] = bboxSize
228 return outputBBoxSizes
230 def bitPackFlags(self, df):
231 """Pack requested flag columns in inputRecord into single columns in
232 outputRecord.
234 Parameters
235 ----------
236 df : `pandas.DataFrame`
237 DataFrame to read bits from and pack them into.
238 """
239 for outputFlag in self.bit_pack_columns:
240 bitList = outputFlag['bitList']
241 value = np.zeros(len(df), dtype=np.uint64)
242 for bit in bitList:
243 # Hard type the bit arrays.
244 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
245 df[outputFlag['columnName']] = value
248class UnpackApdbFlags:
249 """Class for unpacking bits from integer flag fields stored in the Apdb.
251 Attributes
252 ----------
253 flag_map_file : `str`
254 Absolute or relative path to a yaml file specifiying mappings of flags
255 to integer bits.
256 table_name : `str`
257 Name of the Apdb table the integer bit data are coming from.
258 """
260 def __init__(self, flag_map_file, table_name):
261 self.bit_pack_columns = []
262 with open(flag_map_file) as yaml_stream:
263 table_list = list(yaml.safe_load_all(yaml_stream))
264 for table in table_list:
265 if table['tableName'] == table_name:
266 self.bit_pack_columns = table['columns']
267 break
269 self.output_flag_columns = {}
271 for column in self.bit_pack_columns:
272 names = []
273 for bit in column["bitList"]:
274 names.append((bit["name"], bool))
275 self.output_flag_columns[column["columnName"]] = names
277 def unpack(self, input_flag_values, flag_name):
278 """Determine individual boolean flags from an input array of unsigned
279 ints.
281 Parameters
282 ----------
283 input_flag_values : array-like of type uint
284 Array of integer flags to unpack.
285 flag_name : `str`
286 Apdb column name of integer flags to unpack. Names of packed int
287 flags are given by the flag_map_file.
289 Returns
290 -------
291 output_flags : `numpy.ndarray`
292 Numpy named tuple of booleans.
293 """
294 bit_names_types = self.output_flag_columns[flag_name]
295 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
297 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
298 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
299 output_flags[bit_name] = masked_bits
301 return output_flags