Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask
36from lsst.pipe.tasks.parquetTable import ParquetTable
39class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
40 dimensions=("instrument", "visit", "detector"),
41 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
42 """Butler connections for TransformDiaSourceCatalogTask.
43 """
44 diaSourceSchema = connTypes.InitInput(
45 doc="Schema for DIASource catalog output by ImageDifference.",
46 storageClass="SourceCatalog",
47 name="{fakesType}{coaddName}Diff_diaSrc_schema",
48 )
49 diaSourceCat = connTypes.Input(
50 doc="Catalog of DiaSources produced during image differencing.",
51 name="{fakesType}{coaddName}Diff_diaSrc",
52 storageClass="SourceCatalog",
53 dimensions=("instrument", "visit", "detector"),
54 )
55 diffIm = connTypes.Input(
56 doc="Difference image on which the DiaSources were detected.",
57 name="{fakesType}{coaddName}Diff_differenceExp",
58 storageClass="ExposureF",
59 dimensions=("instrument", "visit", "detector"),
60 )
61 diaSourceTable = connTypes.Output(
62 doc=".",
63 name="{fakesType}{coaddName}Diff_diaSrcTable",
64 storageClass="DataFrame",
65 dimensions=("instrument", "visit", "detector"),
66 )
69class TransformDiaSourceCatalogConfig(pipeBase.PipelineTaskConfig,
70 pipelineConnections=TransformDiaSourceCatalogConnections):
71 """
72 """
73 flagMap = pexConfig.Field(
74 dtype=str,
75 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
76 default=os.path.join("${AP_ASSOCIATION_DIR}",
77 "data",
78 "association-flag-map.yaml"),
79 )
80 functorFile = pexConfig.Field(
81 dtype=str,
82 doc='Path to YAML file specifying Science DataModel functors to use '
83 'when copying columns and computing calibrated values.',
84 default=os.path.join("${AP_ASSOCIATION_DIR}",
85 "data",
86 "DiaSource.yaml")
87 )
88 doRemoveSkySources = pexConfig.Field(
89 dtype=bool,
90 default=False,
91 doc="Input DiaSource catalog contains SkySources that should be "
92 "removed before storing the output DiaSource catalog."
93 )
96class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
97 """Apply Science DataModel-ification on the DiaSource afw table.
99 This task calibrates and renames columns in the DiaSource catalog
100 to ready the catalog for insertion into the Apdb.
102 This is a Gen3 Butler only task. It will not run in Gen2.
103 """
105 ConfigClass = TransformDiaSourceCatalogConfig
106 _DefaultName = "transformDiaSourceCatalog"
107 RunnerClass = pipeBase.ButlerInitializedTaskRunner
108 # Needed to create a valid TransformCatalogBaseTask, but unused
109 inputDataset = "deepDiff_diaSrc"
110 outputDataset = "deepDiff_diaSrcTable"
112 def __init__(self, initInputs, **kwargs):
113 super().__init__(**kwargs)
114 self.funcs = self.getFunctors()
115 self.inputSchema = initInputs['diaSourceSchema'].schema
116 self._create_bit_pack_mappings()
118 def _create_bit_pack_mappings(self):
119 """Setup all flag bit packings.
120 """
121 self.bit_pack_columns = []
122 flag_map_file = os.path.expandvars(self.config.flagMap)
123 with open(flag_map_file) as yaml_stream:
124 table_list = list(yaml.safe_load_all(yaml_stream))
125 for table in table_list:
126 if table['tableName'] == 'DiaSource':
127 self.bit_pack_columns = table['columns']
128 break
130 # Test that all flags requested are present in the input schemas.
131 # Output schemas are flexible, however if names are not specified in
132 # the Apdb schema, flag columns will not be persisted.
133 for outputFlag in self.bit_pack_columns:
134 bitList = outputFlag['bitList']
135 for bit in bitList:
136 try:
137 self.inputSchema.find(bit['name'])
138 except KeyError:
139 raise KeyError(
140 "Requested column %s not found in input DiaSource "
141 "schema. Please check that the requested input "
142 "column exists." % bit['name'])
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
146 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
147 returnMaxBits=True)
148 inputs["ccdVisitId"] = expId
149 inputs["band"] = butlerQC.quantum.dataId["band"]
151 outputs = self.run(**inputs)
153 butlerQC.put(outputs, outputRefs)
155 @pipeBase.timeMethod
156 def run(self,
157 diaSourceCat,
158 diffIm,
159 band,
160 ccdVisitId,
161 funcs=None):
162 """Convert input catalog to ParquetTable/Pandas and run functors.
164 Additionally, add new columns for stripping information from the
165 exposure and into the DiaSource catalog.
167 Parameters
168 ----------
170 Returns
171 -------
172 results : `lsst.pipe.base.Struct`
173 Results struct with components.
175 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
176 and renamed columns.
177 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
178 """
179 self.log.info(
180 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
181 ccdVisitId)
183 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
184 if self.config.doRemoveSkySources:
185 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]
186 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
187 diaSourceDf["ccdVisitId"] = ccdVisitId
188 diaSourceDf["filterName"] = band
189 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
190 diaSourceDf["diaObjectId"] = 0
191 diaSourceDf["pixelId"] = 0
192 self.bitPackFlags(diaSourceDf)
194 df = self.transform(band,
195 ParquetTable(dataFrame=diaSourceDf),
196 self.funcs,
197 dataId=None).df
199 return pipeBase.Struct(
200 diaSourceTable=df,
201 )
203 def computeBBoxSizes(self, inputCatalog):
204 """Compute the size of a square bbox that fully contains the detection
205 footprint.
207 Parameters
208 ----------
209 inputCatalog : `lsst.afw.table.SourceCatalog`
210 Catalog containing detected footprints.
212 Returns
213 -------
214 outputBBoxSizes : `list` of `float`
215 Array of bbox sizes.
216 """
217 outputBBoxSizes = []
218 for record in inputCatalog:
219 if self.config.doRemoveSkySources:
220 if record["sky_source"]:
221 continue
222 footprintBBox = record.getFootprint().getBBox()
223 # Compute twice the size of the largest dimension of the footprint
224 # bounding box. This is the largest footprint we should need to cover
225 # the complete DiaSource assuming the centroid is withing the bounding
226 # box.
227 maxSize = 2 * np.max([footprintBBox.getWidth(),
228 footprintBBox.getHeight()])
229 recX = record.getCentroid().x
230 recY = record.getCentroid().y
231 bboxSize = int(
232 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
233 footprintBBox.minX - recX,
234 footprintBBox.maxY - recY,
235 footprintBBox.minY - recY]))))
236 if bboxSize > maxSize:
237 bboxSize = maxSize
238 outputBBoxSizes.append(bboxSize)
240 return outputBBoxSizes
242 def bitPackFlags(self, df):
243 """Pack requested flag columns in inputRecord into single columns in
244 outputRecord.
246 Parameters
247 ----------
248 df : `pandas.DataFrame`
249 DataFrame to read bits from and pack them into.
250 """
251 for outputFlag in self.bit_pack_columns:
252 bitList = outputFlag['bitList']
253 value = np.zeros(len(df), dtype=np.uint64)
254 for bit in bitList:
255 # Hard type the bit arrays.
256 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
257 df[outputFlag['columnName']] = value
260class UnpackApdbFlags:
261 """Class for unpacking bits from integer flag fields stored in the Apdb.
263 Attributes
264 ----------
265 flag_map_file : `str`
266 Absolute or relative path to a yaml file specifiying mappings of flags
267 to integer bits.
268 table_name : `str`
269 Name of the Apdb table the integer bit data are coming from.
270 """
272 def __init__(self, flag_map_file, table_name):
273 self.bit_pack_columns = []
274 flag_map_file = os.path.expandvars(flag_map_file)
275 with open(flag_map_file) as yaml_stream:
276 table_list = list(yaml.safe_load_all(yaml_stream))
277 for table in table_list:
278 if table['tableName'] == table_name:
279 self.bit_pack_columns = table['columns']
280 break
282 self.output_flag_columns = {}
284 for column in self.bit_pack_columns:
285 names = []
286 for bit in column["bitList"]:
287 names.append((bit["name"], bool))
288 self.output_flag_columns[column["columnName"]] = names
290 def unpack(self, input_flag_values, flag_name):
291 """Determine individual boolean flags from an input array of unsigned
292 ints.
294 Parameters
295 ----------
296 input_flag_values : array-like of type uint
297 Array of integer flags to unpack.
298 flag_name : `str`
299 Apdb column name of integer flags to unpack. Names of packed int
300 flags are given by the flag_map_file.
302 Returns
303 -------
304 output_flags : `numpy.ndarray`
305 Numpy named tuple of booleans.
306 """
307 bit_names_types = self.output_flag_columns[flag_name]
308 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
310 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
311 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
312 output_flags[bit_name] = masked_bits
314 return output_flags