Coverage for python/lsst/ap/association/transformDiaSourceCatalog.py: 21%
167 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 11:22 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 11:22 +0000
1# This file is part of ap_association
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = ("TransformDiaSourceCatalogConnections",
23 "TransformDiaSourceCatalogConfig",
24 "TransformDiaSourceCatalogTask",
25 "UnpackApdbFlags")
27import numpy as np
28import os
29import yaml
30import pandas as pd
32from lsst.daf.base import DateTime
33import lsst.pex.config as pexConfig
34import lsst.pipe.base as pipeBase
35import lsst.pipe.base.connectionTypes as connTypes
36from lsst.meas.base import DetectorVisitIdGeneratorConfig
37from lsst.pipe.tasks.postprocess import TransformCatalogBaseTask, TransformCatalogBaseConfig
38from lsst.pipe.tasks.functors import Column
39from lsst.utils.timer import timeMethod
42class TransformDiaSourceCatalogConnections(pipeBase.PipelineTaskConnections,
43 dimensions=("instrument", "visit", "detector"),
44 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
45 diaSourceSchema = connTypes.InitInput(
46 doc="Schema for DIASource catalog output by ImageDifference.",
47 storageClass="SourceCatalog",
48 name="{fakesType}{coaddName}Diff_diaSrc_schema",
49 )
50 diaSourceCat = connTypes.Input(
51 doc="Catalog of DiaSources produced during image differencing.",
52 name="{fakesType}{coaddName}Diff_candidateDiaSrc",
53 storageClass="SourceCatalog",
54 dimensions=("instrument", "visit", "detector"),
55 )
56 diffIm = connTypes.Input(
57 doc="Difference image on which the DiaSources were detected.",
58 name="{fakesType}{coaddName}Diff_differenceExp",
59 storageClass="ExposureF",
60 dimensions=("instrument", "visit", "detector"),
61 )
62 reliability = connTypes.Input(
63 doc="Reliability (e.g. real/bogus) classificiation of diaSourceCat sources (optional).",
64 name="{fakesType}{coaddName}RealBogusSources",
65 storageClass="Catalog",
66 dimensions=("instrument", "visit", "detector"),
67 )
68 diaSourceTable = connTypes.Output(
69 doc=".",
70 name="{fakesType}{coaddName}Diff_diaSrcTable",
71 storageClass="DataFrame",
72 dimensions=("instrument", "visit", "detector"),
73 )
75 def __init__(self, *, config=None):
76 super().__init__(config=config)
77 if not self.config.doIncludeReliability:
78 self.inputs.remove("reliability")
81class TransformDiaSourceCatalogConfig(TransformCatalogBaseConfig,
82 pipelineConnections=TransformDiaSourceCatalogConnections):
83 flagMap = pexConfig.Field(
84 dtype=str,
85 doc="Yaml file specifying SciencePipelines flag fields to bit packs.",
86 default=os.path.join("${AP_ASSOCIATION_DIR}",
87 "data",
88 "association-flag-map.yaml"),
89 )
90 flagRenameMap = pexConfig.Field(
91 dtype=str,
92 doc="Yaml file specifying specifying rules to rename flag names",
93 default=os.path.join("${AP_ASSOCIATION_DIR}",
94 "data",
95 "flag-rename-rules.yaml"),
96 )
97 doRemoveSkySources = pexConfig.Field(
98 dtype=bool,
99 default=False,
100 doc="Input DiaSource catalog contains SkySources that should be "
101 "removed before storing the output DiaSource catalog."
102 )
103 doPackFlags = pexConfig.Field(
104 dtype=bool,
105 default=True,
106 doc="Do pack the flags into one integer column named 'flags'."
107 "If False, instead produce one boolean column per flag."
108 )
109 doIncludeReliability = pexConfig.Field(
110 dtype=bool,
111 default=False,
112 doc="Include the reliability (e.g. real/bogus) classifications in the output."
113 )
115 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
117 def setDefaults(self):
118 super().setDefaults()
119 self.functorFile = os.path.join("${AP_ASSOCIATION_DIR}",
120 "data",
121 "DiaSource.yaml")
124class TransformDiaSourceCatalogTask(TransformCatalogBaseTask):
125 """Transform a DiaSource catalog by calibrating and renaming columns to
126 produce a table ready to insert into the Apdb.
128 Parameters
129 ----------
130 initInputs : `dict`
131 Must contain ``diaSourceSchema`` as the schema for the input catalog.
132 """
133 ConfigClass = TransformDiaSourceCatalogConfig
134 _DefaultName = "transformDiaSourceCatalog"
135 # Needed to create a valid TransformCatalogBaseTask, but unused
136 inputDataset = "deepDiff_diaSrc"
137 outputDataset = "deepDiff_diaSrcTable"
139 def __init__(self, initInputs, **kwargs):
140 super().__init__(**kwargs)
141 self.funcs = self.getFunctors()
142 self.inputSchema = initInputs['diaSourceSchema'].schema
143 self._create_bit_pack_mappings()
145 if not self.config.doPackFlags:
146 # get the flag rename rules
147 with open(os.path.expandvars(self.config.flagRenameMap)) as yaml_stream:
148 self.rename_rules = list(yaml.safe_load_all(yaml_stream))
150 def _create_bit_pack_mappings(self):
151 """Setup all flag bit packings.
152 """
153 self.bit_pack_columns = []
154 flag_map_file = os.path.expandvars(self.config.flagMap)
155 with open(flag_map_file) as yaml_stream:
156 table_list = list(yaml.safe_load_all(yaml_stream))
157 for table in table_list:
158 if table['tableName'] == 'DiaSource':
159 self.bit_pack_columns = table['columns']
160 break
162 # Test that all flags requested are present in the input schemas.
163 # Output schemas are flexible, however if names are not specified in
164 # the Apdb schema, flag columns will not be persisted.
165 for outputFlag in self.bit_pack_columns:
166 bitList = outputFlag['bitList']
167 for bit in bitList:
168 try:
169 self.inputSchema.find(bit['name'])
170 except KeyError:
171 raise KeyError(
172 "Requested column %s not found in input DiaSource "
173 "schema. Please check that the requested input "
174 "column exists." % bit['name'])
176 def runQuantum(self, butlerQC, inputRefs, outputRefs):
177 inputs = butlerQC.get(inputRefs)
178 idGenerator = self.config.idGenerator.apply(butlerQC.quantum.dataId)
179 inputs["ccdVisitId"] = idGenerator.catalog_id
180 inputs["band"] = butlerQC.quantum.dataId["band"]
182 outputs = self.run(**inputs)
184 butlerQC.put(outputs, outputRefs)
186 @timeMethod
187 def run(self,
188 diaSourceCat,
189 diffIm,
190 band,
191 ccdVisitId,
192 reliability=None):
193 """Convert input catalog to ParquetTable/Pandas and run functors.
195 Additionally, add new columns for stripping information from the
196 exposure and into the DiaSource catalog.
198 Parameters
199 ----------
200 diaSourceCat : `lsst.afw.table.SourceCatalog`
201 Catalog of sources measured on the difference image.
202 diffIm : `lsst.afw.image.Exposure`
203 Result of subtracting template and science images.
204 band : `str`
205 Filter band of the science image.
206 ccdVisitId : `int`
207 Identifier for this detector+visit.
208 reliability : `lsst.afw.table.SourceCatalog`
209 Reliability (e.g. real/bogus) scores, row-matched to
210 ``diaSourceCat``.
212 Returns
213 -------
214 results : `lsst.pipe.base.Struct`
215 Results struct with components.
217 - ``diaSourceTable`` : Catalog of DiaSources with calibrated values
218 and renamed columns.
219 (`lsst.pipe.tasks.ParquetTable` or `pandas.DataFrame`)
220 """
221 self.log.info(
222 "Transforming/standardizing the DiaSource table ccdVisitId: %i",
223 ccdVisitId)
225 diaSourceDf = diaSourceCat.asAstropy().to_pandas()
226 if self.config.doRemoveSkySources:
227 diaSourceDf = diaSourceDf[~diaSourceDf["sky_source"]]
228 diaSourceCat = diaSourceCat[~diaSourceCat["sky_source"]]
230 diaSourceDf["time_processed"] = DateTime.now().toPython()
231 diaSourceDf["snr"] = getSignificance(diaSourceCat)
232 diaSourceDf["bboxSize"] = self.computeBBoxSizes(diaSourceCat)
233 diaSourceDf["ccdVisitId"] = ccdVisitId
234 diaSourceDf["band"] = band
235 diaSourceDf["midpointMjdTai"] = diffIm.visitInfo.date.get(system=DateTime.MJD)
236 diaSourceDf["diaObjectId"] = 0
237 diaSourceDf["ssObjectId"] = 0
239 if self.config.doIncludeReliability:
240 reliabilityDf = reliability.asAstropy().to_pandas()
241 # This uses the pandas index to match scores with diaSources
242 # but it will silently fill with NaNs if they don't match.
243 diaSourceDf = pd.merge(diaSourceDf, reliabilityDf,
244 how="left", on="id", validate="1:1")
245 diaSourceDf = diaSourceDf.rename(columns={"score": "reliability"})
246 if np.sum(diaSourceDf["reliability"].isna()) == len(diaSourceDf):
247 self.log.warning("Reliability identifiers did not match diaSourceIds")
248 else:
249 diaSourceDf["reliability"] = np.float32(np.nan)
251 if self.config.doPackFlags:
252 # either bitpack the flags
253 self.bitPackFlags(diaSourceDf)
254 else:
255 # or add the individual flag functors
256 self.addUnpackedFlagFunctors()
257 # and remove the packed flag functor
258 if 'flags' in self.funcs.funcDict:
259 del self.funcs.funcDict['flags']
261 df = self.transform(band,
262 diaSourceDf,
263 self.funcs,
264 dataId=None).df
266 return pipeBase.Struct(
267 diaSourceTable=df,
268 )
270 def addUnpackedFlagFunctors(self):
271 """Add Column functor for each of the flags to the internal functor
272 dictionary.
273 """
274 for flag in self.bit_pack_columns[0]['bitList']:
275 flagName = flag['name']
276 targetName = self.funcs.renameCol(flagName, self.rename_rules[0]['flag_rename_rules'])
277 self.funcs.update({targetName: Column(flagName)})
279 def computeBBoxSizes(self, inputCatalog):
280 """Compute the size of a square bbox that fully contains the detection
281 footprint.
283 Parameters
284 ----------
285 inputCatalog : `lsst.afw.table.SourceCatalog`
286 Catalog containing detected footprints.
288 Returns
289 -------
290 outputBBoxSizes : `np.ndarray`, (N,)
291 Array of bbox sizes.
292 """
293 # Schema validation requires that this field is int.
294 outputBBoxSizes = np.empty(len(inputCatalog), dtype=int)
295 for i, record in enumerate(inputCatalog):
296 footprintBBox = record.getFootprint().getBBox()
297 # Compute twice the size of the largest dimension of the footprint
298 # bounding box. This is the largest footprint we should need to cover
299 # the complete DiaSource assuming the centroid is within the bounding
300 # box.
301 maxSize = 2 * np.max([footprintBBox.getWidth(),
302 footprintBBox.getHeight()])
303 recX = record.getCentroid().x
304 recY = record.getCentroid().y
305 bboxSize = int(
306 np.ceil(2 * np.max(np.fabs([footprintBBox.maxX - recX,
307 footprintBBox.minX - recX,
308 footprintBBox.maxY - recY,
309 footprintBBox.minY - recY]))))
310 if bboxSize > maxSize:
311 bboxSize = maxSize
312 outputBBoxSizes[i] = bboxSize
314 return outputBBoxSizes
316 def bitPackFlags(self, df):
317 """Pack requested flag columns in inputRecord into single columns in
318 outputRecord.
320 Parameters
321 ----------
322 df : `pandas.DataFrame`
323 DataFrame to read bits from and pack them into.
324 """
325 for outputFlag in self.bit_pack_columns:
326 bitList = outputFlag['bitList']
327 value = np.zeros(len(df), dtype=np.uint64)
328 for bit in bitList:
329 # Hard type the bit arrays.
330 value += (df[bit['name']]*2**bit['bit']).to_numpy().astype(np.uint64)
331 df[outputFlag['columnName']] = value
334class UnpackApdbFlags:
335 """Class for unpacking bits from integer flag fields stored in the Apdb.
337 Attributes
338 ----------
339 flag_map_file : `str`
340 Absolute or relative path to a yaml file specifiying mappings of flags
341 to integer bits.
342 table_name : `str`
343 Name of the Apdb table the integer bit data are coming from.
344 """
346 def __init__(self, flag_map_file, table_name):
347 self.bit_pack_columns = []
348 flag_map_file = os.path.expandvars(flag_map_file)
349 with open(flag_map_file) as yaml_stream:
350 table_list = list(yaml.safe_load_all(yaml_stream))
351 for table in table_list:
352 if table['tableName'] == table_name:
353 self.bit_pack_columns = table['columns']
354 break
356 self.output_flag_columns = {}
358 for column in self.bit_pack_columns:
359 names = {}
360 for bit in column["bitList"]:
361 names[bit["name"]] = bit["bit"]
362 self.output_flag_columns[column["columnName"]] = names
364 def unpack(self, input_flag_values, flag_name):
365 """Determine individual boolean flags from an input array of unsigned
366 ints.
368 Parameters
369 ----------
370 input_flag_values : array-like of type uint
371 Array of integer packed bit flags to unpack.
372 flag_name : `str`
373 Apdb column name from the loaded file, e.g. "flags".
375 Returns
376 -------
377 output_flags : `numpy.ndarray`
378 Numpy structured array of booleans, one column per flag in the
379 loaded file.
380 """
381 output_flags = np.zeros(len(input_flag_values),
382 dtype=[(name, bool) for name in self.output_flag_columns[flag_name]])
384 for name in self.output_flag_columns[flag_name]:
385 masked_bits = np.bitwise_and(input_flag_values,
386 2**self.output_flag_columns[flag_name][name])
387 output_flags[name] = masked_bits
389 return output_flags
391 def flagExists(self, flagName, columnName='flags'):
392 """Check if named flag is in the bitpacked flag set.
394 Parameters:
395 ----------
396 flagName : `str`
397 Flag name to search for.
398 columnName : `str`, optional
399 Name of bitpacked flag column to search in.
401 Returns
402 -------
403 flagExists : `bool`
404 `True` if `flagName` is present in `columnName`.
406 Raises
407 ------
408 ValueError
409 Raised if `columnName` is not defined.
410 """
411 if columnName not in self.output_flag_columns:
412 raise ValueError(f'column {columnName} not in flag map: {self.output_flag_columns}')
414 return flagName in [c for c in self.output_flag_columns[columnName]]
416 def makeFlagBitMask(self, flagNames, columnName='flags'):
417 """Return a bitmask corresponding to the supplied flag names.
419 Parameters:
420 ----------
421 flagNames : `list` [`str`]
422 Flag names to include in the bitmask.
423 columnName : `str`, optional
424 Name of bitpacked flag column.
426 Returns
427 -------
428 bitmask : `np.unit64`
429 Bitmask corresponding to the supplied flag names given the loaded configuration.
431 Raises
432 ------
433 ValueError
434 Raised if a flag in `flagName` is not included in `columnName`.
435 """
436 bitmask = np.uint64(0)
438 for flag in flagNames:
439 if not self.flagExists(flag, columnName=columnName):
440 raise ValueError(f"flag '{flag}' not included in '{columnName}' flag column")
442 for outputFlag in self.bit_pack_columns:
443 if outputFlag['columnName'] == columnName:
444 bitList = outputFlag['bitList']
445 for bit in bitList:
446 if bit['name'] in flagNames:
447 bitmask += np.uint64(2**bit['bit'])
449 return bitmask
452def getSignificance(catalog):
453 """Return the significance value of the first peak in each source
454 footprint, or NaN for peaks without a significance field.
456 Parameters
457 ----------
458 catalog : `lsst.afw.table.SourceCatalog`
459 Catalog to process.
461 Returns
462 -------
463 significance : `np.ndarray`, (N,)
464 Signficance of the first peak in each source footprint.
465 """
466 result = np.full(len(catalog), np.nan)
467 for i, record in enumerate(catalog):
468 peaks = record.getFootprint().peaks
469 if "significance" in peaks.schema:
470 result[i] = peaks[0]["significance"]
471 return result