Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
31from lsst.daf.base import DateTime
32import lsst.pex.config as pexConfig
33import lsst.pipe.base as pipeBase
34import lsst.pipe.base.connectionTypes as connTypes
35from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig
36from lsst.pipe.tasks.parquetTable import ParquetTable
37from lsst.pipe.tasks.functors import Column
40class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
41 dimensions=("instrument", "visit", "detector"),
42 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
43 """Butler connections for TransformDiaSourceCatalogTask.
44 """
45 diaSourceSchema = connTypes.InitInput(
46 doc="Schema for DIASource catalog output by ImageDifference.",
47 storageClass="SourceCatalog",
48 name="{fakesType}{coaddName}Diff_diaSrc_schema",
49 )
50 diaSourceCat = connTypes.Input(
51 doc="Catalog of DiaSources produced during image differencing.",
52 name="{fakesType}{coaddName}Diff_diaSrc",
53 storageClass="SourceCatalog",
54 dimensions=("instrument", "visit", "detector"),
55 )
56 diffIm = connTypes.Input(
57 doc="Difference image on which the DiaSources were detected.",
58 name="{fakesType}{coaddName}Diff_differenceExp",
59 storageClass="ExposureF",
60 dimensions=("instrument", "visit", "detector"),
61 )
62 diaSourceTable = connTypes.Output(
63 doc=".",
64 name="{fakesType}{coaddName}Diff_diaSrcTable",
65 storageClass="DataFrame",
66 dimensions=("instrument", "visit", "detector"),
67 )
70class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig,
71 pipelineConnections=TransformDiaSourceCatalogConnections):
72 """
73 """
74 flagMap = pexConfig.Field(
75 dtype=str,
76 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
77 default=os.path.join("${AP_ASSOCIATION_DIR}",
78 "data",
79 "association-flag-map.yaml"),
80 )
81 flagRenameMap = pexConfig.Field(
82 dtype=str,
83 doc="Yaml file specifying specifying rules to rename flag names",
84 default=os.path.join("${AP_ASSOCIATION_DIR}",
85 "data",
86 "flag-rename-rules.yaml"),
87 )
88 doRemoveSkySources = pexConfig.Field(
89 dtype=bool,
90 default=False,
91 doc="Input DiaSource catalog contains SkySources that should be "
92 "removed before storing the output DiaSource catalog."
93 )
94 doPackFlags = pexConfig.Field(
95 dtype=bool,
96 default=True,
97 doc="Do pack the flags into one integer column named 'flags'."
98 "If False, instead produce one boolean column per flag."
99 )
101 def setDefaults(self):
102 super().setDefaults()
103 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}",
104 "data",
105 "DiaSource.yaml")
108class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
109 """Apply Science DataModel-ification on the DiaSource afw table.
111 This task calibrates and renames columns in the DiaSource catalog
112 to ready the catalog for insertion into the Apdb.
114 This is a Gen3 Butler only task. It will not run in Gen2.
115 """
117 ConfigClass = TransformDiaSourceCatalogConfig
118 _DefaultName = "transformDiaSourceCatalog"
119 RunnerClass = pipeBase.ButlerInitializedTaskRunner
120 # Needed to create a valid TransformCatalogBaseTask, but unused
121 inputDataset = "deepDiff_diaSrc"
122 outputDataset = "deepDiff_diaSrcTable"
124 def __init__(self, initInputs, **kwargs):
125 super().__init__(**kwargs)
126 self.funcs = self.getFunctors()
127 self.inputSchema = initInputs['diaSourceSchema'].schema
128 self._create_bit_pack_mappings()
130 if not self.config.doPackFlags:
131 # get the flag rename rules
132 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream:
133 self.rename_rules = list(yaml.safe_load_all(yaml_stream))
135 def _create_bit_pack_mappings(self):
136 """Setup all flag bit packings.
137 """
138 self.bit_pack_columns = []
139 flag_map_file = os.path.expandvars(self.config.flagMap)
140 with open(flag_map_file) as yaml_stream:
141 table_list = list(yaml.safe_load_all(yaml_stream))
142 for table in table_list:
143 if table['tableName'] == 'DiaSource':
144 self.bit_pack_columns = table['columns']
145 break
147 # Test that all flags requested are present in the input schemas.
148 # Output schemas are flexible, however if names are not specified in
149 # the Apdb schema, flag columns will not be persisted.
150 for outputFlag in self.bit_pack_columns:
151 bitList = outputFlag['bitList']
152 for bit in bitList:
153 try:
154 self.inputSchema.find(bit['name'])
155 except KeyError:
156 raise KeyError(
157 "Requested column %s not found in input DiaSource "
158 "schema. Please check that the requested input "
159 "column exists." % bit['name'])
161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
162 inputs = butlerQC.get(inputRefs)
163 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
164 returnMaxBits=True)
165 inputs["ccdVisitId"] = expId
166 inputs["band"] = butlerQC.quantum.dataId["band"]
168 outputs = self.run(**inputs)
170 butlerQC.put(outputs, outputRefs)
172 @pipeBase.timeMethod
173 def run(self,
174 diaSourceCat,
175 diffIm,
176 band,
177 ccdVisitId,
178 funcs=None):
179 """Convert input catalog to ParquetTable/Pandas and run functors.
181 Additionally, add new columns for stripping information from the
182 exposure and into the DiaSource catalog.
184 Parameters
185 ----------
187 Returns
188 -------
189 results : `lsst.pipe.base.Struct`
190 Results struct with components.
192 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
193 and renamed columns.
194 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
195 """
196 self.log.info(
197 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
198 ccdVisitId)
200 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
201 if self.config.doRemoveSkySources:
202 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]
203 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
204 diaSourceDf["ccdVisitId"] = ccdVisitId
205 diaSourceDf["filterName"] = band
206 diaSourceDf["midPointTai"] = diffIm.getInfo().getVisitInfo().getDate().get(system=DateTime.MJD)
207 diaSourceDf["diaObjectId"] = 0
208 diaSourceDf["ssObjectId"] = 0
209 if self.config.doPackFlags:
210 # either bitpack the flags
211 self.bitPackFlags(diaSourceDf)
212 else:
213 # or add the individual flag functors
214 self.addUnpackedFlagFunctors()
215 # and remove the packed flag functor
216 if 'flags' in self.funcs.funcDict:
217 del self.funcs.funcDict['flags']
219 df = self.transform(band,
220 ParquetTable(dataFrame=diaSourceDf),
221 self.funcs,
222 dataId=None).df
224 return pipeBase.Struct(
225 diaSourceTable=df,
226 )
228 def addUnpackedFlagFunctors(self):
229 """Add Column functor for each of the flags
231 to the internal functor dictionary
232 """
233 for flag in self.bit_pack_columns[0]['bitList']:
234 flagName = flag['name']
235 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules'])
236 self.funcs.update({targetName: Column(flagName)})
238 def computeBBoxSizes(self, inputCatalog):
239 """Compute the size of a square bbox that fully contains the detection
240 footprint.
242 Parameters
243 ----------
244 inputCatalog : `lsst.afw.table.SourceCatalog`
245 Catalog containing detected footprints.
247 Returns
248 -------
249 outputBBoxSizes : `list` of `float`
250 Array of bbox sizes.
251 """
252 outputBBoxSizes = []
253 for record in inputCatalog:
254 if self.config.doRemoveSkySources:
255 if record["sky_source"]:
256 continue
257 footprintBBox = record.getFootprint().getBBox()
258 # Compute twice the size of the largest dimension of the footprint
259 # bounding box. This is the largest footprint we should need to cover
260 # the complete DiaSource assuming the centroid is withing the bounding
261 # box.
262 maxSize = 2 * np.max([footprintBBox.getWidth(),
263 footprintBBox.getHeight()])
264 recX = record.getCentroid().x
265 recY = record.getCentroid().y
266 bboxSize = int(
267 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
268 footprintBBox.minX - recX,
269 footprintBBox.maxY - recY,
270 footprintBBox.minY - recY]))))
271 if bboxSize > maxSize:
272 bboxSize = maxSize
273 outputBBoxSizes.append(bboxSize)
275 return outputBBoxSizes
277 def bitPackFlags(self, df):
278 """Pack requested flag columns in inputRecord into single columns in
279 outputRecord.
281 Parameters
282 ----------
283 df : `pandas.DataFrame`
284 DataFrame to read bits from and pack them into.
285 """
286 for outputFlag in self.bit_pack_columns:
287 bitList = outputFlag['bitList']
288 value = np.zeros(len(df), dtype=np.uint64)
289 for bit in bitList:
290 # Hard type the bit arrays.
291 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
292 df[outputFlag['columnName']] = value
295class UnpackApdbFlags:
296 """Class for unpacking bits from integer flag fields stored in the Apdb.
298 Attributes
299 ----------
300 flag_map_file : `str`
301 Absolute or relative path to a yaml file specifiying mappings of flags
302 to integer bits.
303 table_name : `str`
304 Name of the Apdb table the integer bit data are coming from.
305 """
307 def __init__(self, flag_map_file, table_name):
308 self.bit_pack_columns = []
309 flag_map_file = os.path.expandvars(flag_map_file)
310 with open(flag_map_file) as yaml_stream:
311 table_list = list(yaml.safe_load_all(yaml_stream))
312 for table in table_list:
313 if table['tableName'] == table_name:
314 self.bit_pack_columns = table['columns']
315 break
317 self.output_flag_columns = {}
319 for column in self.bit_pack_columns:
320 names = []
321 for bit in column["bitList"]:
322 names.append((bit["name"], bool))
323 self.output_flag_columns[column["columnName"]] = names
325 def unpack(self, input_flag_values, flag_name):
326 """Determine individual boolean flags from an input array of unsigned
327 ints.
329 Parameters
330 ----------
331 input_flag_values : array-like of type uint
332 Array of integer flags to unpack.
333 flag_name : `str`
334 Apdb column name of integer flags to unpack. Names of packed int
335 flags are given by the flag_map_file.
337 Returns
338 -------
339 output_flags : `numpy.ndarray`
340 Numpy named tuple of booleans.
341 """
342 bit_names_types = self.output_flag_columns[flag_name]
343 output_flags = np.zeros(len(input_flag_values), dtype=bit_names_types)
345 for bit_idx, (bit_name, dtypes) in enumerate(bit_names_types):
346 masked_bits = np.bitwise_and(input_flag_values, 2**bit_idx)
347 output_flags[bit_name] = masked_bits
349 return output_flags