Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask")
26import numpy as np
27import os
28import yaml
30from lsst.daf.base import DateTime
31import lsst.pex.config as pexConfig
32import lsst.pipe.base as pipeBase
33import lsst.pipe.base.connectionTypes as connTypes
34from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
35from lsst.pipe.tasks.parquetTable import ParquetTable
36from lsst.utils import getPackageDir
39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
40 dimensions=("instrument", "visit", "detector"),
41 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
42 """Butler connections for TransformDiaSourceCatalogTask.
43 """
44 diaSourceSchema = connTypes.InitInput(
45 doc="Schema for DIASource catalog output by ImageDifference.",
46 storageClass="SourceCatalog",
47 name="{fakesType}{coaddName}Diff_diaSrc_schema",
48 )
49 diaSourceCat = connTypes.Input(
50 doc="Catalog of DiaSources produced during image differencing.",
51 name="{fakesType}{coaddName}Diff_diaSrc",
52 storageClass="SourceCatalog",
53 dimensions=("instrument", "visit", "detector"),
54 )
55 diffIm = connTypes.Input(
56 doc="Difference image on which the DiaSources were detected.",
57 name="{fakesType}{coaddName}Diff_differenceExp",
58 storageClass="ExposureF",
59 dimensions=("instrument", "visit", "detector"),
60 )
61 diaSourceTable = connTypes.Output(
62 doc=".",
63 name="{fakesType}{coaddName}Diff_diaSrcTable",
64 storageClass="DataFrame",
65 dimensions=("instrument", "visit", "detector"),
66 )
69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
70 pipelineConnections=TransformDiaSourceCatalogConnections):
71 """
72 """
73 flagMap = pexConfig.Field(
74 dtype=str,
75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
76 default=os.path.join(getPackageDir("ap_association"),
77 "data",
78 "association-flag-map.yaml"),
79 )
80 functorFile = pexConfig.Field(
81 dtype=str,
82 doc='Path to YAML file specifying Science DataModel functors to use '
83 'when copying columns and computing calibrated values.',
84 default=os.path.join(getPackageDir("ap_association"),
85 "data",
86 "DiaSource.yaml")
87 )
90class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
91 """Apply Science DataModel-ification on the DiaSource afw table.
93 This task calibrates and renames columns in the DiaSource catalog
94 to ready the catalog for insertion into the Apdb.
96 This is a Gen3 Butler only task. It will not run in Gen2.
97 """
99 ConfigClass = TransformDiaSourceCatalogConfig
100 _DefaultName = "transformDiaSourceCatalog"
101 RunnerClass = pipeBase.ButlerInitializedTaskRunner
103 def __init__(self, initInputs, **kwargs):
104 super().__init__(**kwargs)
105 self.funcs = self.getFunctors()
106 self.inputSchema = initInputs['diaSourceSchema'].schema
107 self._create_bit_pack_mappings()
109 def _create_bit_pack_mappings(self):
110 """Setup all flag bit packings.
111 """
112 self.bit_pack_columns = []
113 with open(self.config.flagMap) as yaml_stream:
114 table_list = list(yaml.safe_load_all(yaml_stream))
115 for table in table_list:
116 if table['tableName'] == 'DiaSource':
117 self.bit_pack_columns = table['columns']
118 break
120 # Test that all flags requested are present in the input schemas.
121 # Output schemas are flexible, however if names are not specified in
122 # the Apdb schema, flag columns will not be persisted.
123 for outputFlag in self.bit_pack_columns:
124 bitList = outputFlag['bitList']
125 for bit in bitList:
126 try:
127 self.inputSchema.find(bit['name'])
128 except KeyError:
129 raise KeyError(
130 "Requested column %s not found in input DiaSource "
131 "schema. Please check that the requested input "
132 "column exists." % bit['name'])
134 def runQuantum(self, butlerQC, inputRefs, outputRefs):
135 inputs = butlerQC.get(inputRefs)
136 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
137 returnMaxBits=True)
138 inputs["ccdVisitId"] = expId
139 inputs["band"] = butlerQC.quantum.dataId["band"]
141 outputs = self.run(**inputs)
143 butlerQC.put(outputs, outputRefs)
145 def run(self,
146 diaSourceCat,
147 diffIm,
148 band,
149 ccdVisitId,
150 funcs=None):
151 """Convert input catalog to ParquetTable/Pandas and run functors.
153 Additionally, add new columns for stripping information from the
154 exposure and into the DiaSource catalog.
156 Parameters
157 ----------
159 Returns
160 -------
161 results : `lsst.pipe.base.Struct`
162 Results struct with components.
164 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
165 and renamed columns.
166 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
167 """
168 self.log.info(
169 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
170 ccdVisitId)
172 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
173 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
174 diaSourceDf["ccdVisitId"] = ccdVisitId
175 diaSourceDf["filterName"] = band
176 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
177 diaSourceDf["diaObjectId"] = 0
178 diaSourceDf["pixelId"] = 0
179 self.bitPackFlags(diaSourceDf)
181 df = self.transform(band,
182 ParquetTable(dataFrame=diaSourceDf),
183 self.funcs,
184 dataId=None).df
185 return pipeBase.Struct(
186 diaSourceTable=df
187 )
189 def computeBBoxSizes(self, inputCatalog):
190 """Compute the size of a square bbox that fully contains the detection
191 footprint.
193 Parameters
194 ----------
195 inputCatalog : `lsst.afw.table.SourceCatalog`
196 Catalog containing detected footprints.
198 Returns
199 -------
200 outputBBoxSizes : `numpy.ndarray`, (N,)
201 Array of bbox sizes.
202 """
203 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int)
204 for idx, record in enumerate(inputCatalog):
205 footprintBBox = record.getFootprint().getBBox()
206 # Compute twice the size of the largest dimension of the footprint
207 # bounding box. This is the largest footprint we should need to cover
208 # the complete DiaSource assuming the centroid is withing the bounding
209 # box.
210 maxSize = 2 * np.max([footprintBBox.getWidth(),
211 footprintBBox.getHeight()])
212 recX = record.getCentroid().x
213 recY = record.getCentroid().y
214 bboxSize = int(
215 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
216 footprintBBox.minX - recX,
217 footprintBBox.maxY - recY,
218 footprintBBox.minY - recY]))))
219 if bboxSize > maxSize:
220 bboxSize = maxSize
221 outputBBoxSizes[idx] = bboxSize
223 return outputBBoxSizes
225 def bitPackFlags(self, df):
226 """Pack requested flag columns in inputRecord into single columns in
227 outputRecord.
229 Parameters
230 ----------
231 df : `pandas.DataFrame`
232 DataFrame to read bits from and pack them into.
233 """
234 for outputFlag in self.bit_pack_columns:
235 bitList = outputFlag['bitList']
236 value = np.zeros(len(df), dtype=np.uint64)
237 for bit in bitList:
238 # Hard type the bit arrays.
239 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
240 df[outputFlag['columnName']] = value
243class UnpackApdbFlags:
244 """Class for unpacking bits from integer flag fields stored in the Apdb.
246 Attributes
247 ----------
248 flag_map_file : `str`
249 Absolute or relative path to a yaml file specifiying mappings of flags
250 to integer bits.
251 table_name : `str`
252 Name of the Apdb table the integer bit data are coming from.
253 """
255 def __init__(self, flag_map_file, table_name):
256 self.bit_pack_columns = []
257 with open(flag_map_file) as yaml_stream:
258 table_list = list(yaml.safe_load_all(yaml_stream))
259 for table in table_list:
260 if table['tableName'] == table_name:
261 self.bit_pack_columns = table['columns']
262 break
264 self.output_flag_columns = {}
266 for column in self.bit_pack_columns:
267 names = []
268 for bit in column["bitList"]:
269 names.append((bit["name"], bool))
270 self.output_flag_columns[column["columnName"]] = names
272 def unpack(self, input_flag_values, flag_name):
273 """Determine individual boolean flags from an input array of unsigned
274 ints.
276 Parameters
277 ----------
278 input_flag_values : array-like of type uint
279 Array of integer flags to unpack.
280 flag_name : `str`
281 Apdb column name of integer flags to unpack. Names of packed int
282 flags are given by the flag_map_file.
284 Returns
285 -------
286 output_flags : `numpy.ndarray`
287 Numpy named tuple of booleans.
288 """
289 bit_names_types = self.output_flag_columns[flag_name]
290 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
292 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
293 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
294 output_flags[bit_name] = masked_bits
296 return output_flags