Coverage for python/lsst/ap/association/diaPipe.py: 23%
195 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 02:20 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-14 02:20 -0700
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31__all__ = ("DiaPipelineConfig",
32 "DiaPipelineTask",
33 "DiaPipelineConnections")
36import warnings
38import lsst.dax.apdb as daxApdb
39import lsst.pex.config as pexConfig
40import lsst.pipe.base as pipeBase
41import lsst.pipe.base.connectionTypes as connTypes
42import numpy as np
43import pandas as pd
44from lsst.ap.association import (
45 AssociationTask,
46 DiaForcedSourceTask,
47 LoadDiaCatalogsTask,
48 PackageAlertsTask)
49from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
50from lsst.daf.base import DateTime
51from lsst.meas.base import DetectorVisitIdGeneratorConfig, \
52 DiaObjectCalculationTask
53from lsst.utils.timer import timeMethod
56class DiaPipelineConnections(
57 pipeBase.PipelineTaskConnections,
58 dimensions=("instrument", "visit", "detector"),
59 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
60 """Butler connections for DiaPipelineTask.
61 """
62 diaSourceTable = connTypes.Input(
63 doc="Catalog of calibrated DiaSources.",
64 name="{fakesType}{coaddName}Diff_diaSrcTable",
65 storageClass="DataFrame",
66 dimensions=("instrument", "visit", "detector"),
67 )
68 solarSystemObjectTable = connTypes.Input(
69 doc="Catalog of SolarSolarSystem objects expected to be observable in "
70 "this detectorVisit.",
71 name="visitSsObjects",
72 storageClass="DataFrame",
73 dimensions=("instrument", "visit"),
74 )
75 diffIm = connTypes.Input(
76 doc="Difference image on which the DiaSources were detected.",
77 name="{fakesType}{coaddName}Diff_differenceExp",
78 storageClass="ExposureF",
79 dimensions=("instrument", "visit", "detector"),
80 )
81 exposure = connTypes.Input(
82 doc="Calibrated exposure differenced with a template image during "
83 "image differencing.",
84 name="{fakesType}calexp",
85 storageClass="ExposureF",
86 dimensions=("instrument", "visit", "detector"),
87 )
88 template = connTypes.Input(
89 doc="Warped template used to create `subtractedExposure`. Not PSF "
90 "matched.",
91 dimensions=("instrument", "visit", "detector"),
92 storageClass="ExposureF",
93 name="{fakesType}{coaddName}Diff_templateExp",
94 )
95 apdbMarker = connTypes.Output(
96 doc="Marker dataset storing the configuration of the Apdb for each "
97 "visit/detector. Used to signal the completion of the pipeline.",
98 name="apdb_marker",
99 storageClass="Config",
100 dimensions=("instrument", "visit", "detector"),
101 )
102 associatedDiaSources = connTypes.Output(
103 doc="Optional output storing the DiaSource catalog after matching, "
104 "calibration, and standardization for insertion into the Apdb.",
105 name="{fakesType}{coaddName}Diff_assocDiaSrc",
106 storageClass="DataFrame",
107 dimensions=("instrument", "visit", "detector"),
108 )
109 diaForcedSources = connTypes.Output(
110 doc="Optional output storing the forced sources computed at the diaObject positions.",
111 name="{fakesType}{coaddName}Diff_diaForcedSrc",
112 storageClass="DataFrame",
113 dimensions=("instrument", "visit", "detector"),
114 )
115 diaObjects = connTypes.Output(
116 doc="Optional output storing the updated diaObjects associated to these sources.",
117 name="{fakesType}{coaddName}Diff_diaObject",
118 storageClass="DataFrame",
119 dimensions=("instrument", "visit", "detector"),
120 )
122 def __init__(self, *, config=None):
123 super().__init__(config=config)
125 if not config.doWriteAssociatedSources:
126 self.outputs.remove("associatedDiaSources")
127 self.outputs.remove("diaForcedSources")
128 self.outputs.remove("diaObjects")
129 elif not config.doRunForcedMeasurement:
130 self.outputs.remove("diaForcedSources")
131 if not config.doSolarSystemAssociation:
132 self.inputs.remove("solarSystemObjectTable")
134 def adjustQuantum(self, inputs, outputs, label, dataId):
135 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
136 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
137 of the activator.
139 This implementation checks to make sure that the filters in the dataset
140 are compatible with AP processing as set by the Apdb/DPDD schema.
142 Parameters
143 ----------
144 inputs : `dict`
145 Dictionary whose keys are an input (regular or prerequisite)
146 connection name and whose values are a tuple of the connection
147 instance and a collection of associated `DatasetRef` objects.
148 The exact type of the nested collections is unspecified; it can be
149 assumed to be multi-pass iterable and support `len` and ``in``, but
150 it should not be mutated in place. In contrast, the outer
151 dictionaries are guaranteed to be temporary copies that are true
152 `dict` instances, and hence may be modified and even returned; this
153 is especially useful for delegating to `super` (see notes below).
154 outputs : `dict`
155 Dict of output datasets, with the same structure as ``inputs``.
156 label : `str`
157 Label for this task in the pipeline (should be used in all
158 diagnostic messages).
159 data_id : `lsst.daf.butler.DataCoordinate`
160 Data ID for this quantum in the pipeline (should be used in all
161 diagnostic messages).
163 Returns
164 -------
165 adjusted_inputs : `dict`
166 Dict of the same form as ``inputs`` with updated containers of
167 input `DatasetRef` objects. Connections that are not changed
168 should not be returned at all. Datasets may only be removed, not
169 added. Nested collections may be of any multi-pass iterable type,
170 and the order of iteration will set the order of iteration within
171 `PipelineTask.runQuantum`.
172 adjusted_outputs : `dict`
173 Dict of updated output datasets, with the same structure and
174 interpretation as ``adjusted_inputs``.
176 Raises
177 ------
178 ScalarError
179 Raised if any `Input` or `PrerequisiteInput` connection has
180 ``multiple`` set to `False`, but multiple datasets.
181 NoWorkFound
182 Raised to indicate that this quantum should not be run; not enough
183 datasets were found for a regular `Input` connection, and the
184 quantum should be pruned or skipped.
185 FileNotFoundError
186 Raised to cause QuantumGraph generation to fail (with the message
187 included in this exception); not enough datasets were found for a
188 `PrerequisiteInput` connection.
189 """
190 _, refs = inputs["diffIm"]
191 for ref in refs:
192 if ref.dataId["band"] not in self.config.validBands:
193 raise ValueError(
194 f"Requested '{ref.dataId['band']}' not in "
195 "DiaPipelineConfig.validBands. To process bands not in "
196 "the standard Rubin set (ugrizy) you must add the band to "
197 "the validBands list in DiaPipelineConfig and add the "
198 "appropriate columns to the Apdb schema.")
199 return super().adjustQuantum(inputs, outputs, label, dataId)
202class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
203 pipelineConnections=DiaPipelineConnections):
204 """Config for DiaPipelineTask.
205 """
206 coaddName = pexConfig.Field(
207 doc="coadd name: typically one of deep, goodSeeing, or dcr",
208 dtype=str,
209 default="deep",
210 )
211 apdb = pexConfig.ConfigurableField( # TODO: remove on DM-43419
212 target=daxApdb.ApdbSql,
213 doc="Database connection for storing associated DiaSources and "
214 "DiaObjects. Must already be initialized.",
215 deprecated="This field has been replaced by ``apdb_config_url``; set "
216 "``doConfigureApdb=False`` to use it. Will be removed after v28.",
217 )
218 apdb_config_url = pexConfig.Field(
219 dtype=str,
220 default=None,
221 optional=False,
222 doc="A config file specifying the APDB and its connection parameters, "
223 "typically written by the apdb-cli command-line utility. "
224 "The database must already be initialized.",
225 )
226 validBands = pexConfig.ListField(
227 dtype=str,
228 default=["u", "g", "r", "i", "z", "y"],
229 doc="List of bands that are valid for AP processing. To process a "
230 "band not on this list, the appropriate band specific columns "
231 "must be added to the Apdb schema in dax_apdb.",
232 )
233 diaCatalogLoader = pexConfig.ConfigurableField(
234 target=LoadDiaCatalogsTask,
235 doc="Task to load DiaObjects and DiaSources from the Apdb.",
236 )
237 associator = pexConfig.ConfigurableField(
238 target=AssociationTask,
239 doc="Task used to associate DiaSources with DiaObjects.",
240 )
241 doSolarSystemAssociation = pexConfig.Field(
242 dtype=bool,
243 default=False,
244 doc="Process SolarSystem objects through the pipeline.",
245 )
246 solarSystemAssociator = pexConfig.ConfigurableField(
247 target=SolarSystemAssociationTask,
248 doc="Task used to associate DiaSources with SolarSystemObjects.",
249 )
250 diaCalculation = pexConfig.ConfigurableField(
251 target=DiaObjectCalculationTask,
252 doc="Task to compute summary statistics for DiaObjects.",
253 )
254 doLoadForcedSources = pexConfig.Field(
255 dtype=bool,
256 default=True,
257 deprecated="Added to allow disabling forced sources for performance "
258 "reasons during the ops rehearsal. "
259 "It is expected to be removed.",
260 doc="Load forced DiaSource history from the APDB? "
261 "This should only be turned off for debugging purposes.",
262 )
263 doRunForcedMeasurement = pexConfig.Field(
264 dtype=bool,
265 default=True,
266 deprecated="Added to allow disabling forced sources for performance "
267 "reasons during the ops rehearsal. "
268 "It is expected to be removed.",
269 doc="Run forced measurement on all of the diaObjects? "
270 "This should only be turned off for debugging purposes.",
271 )
272 diaForcedSource = pexConfig.ConfigurableField(
273 target=DiaForcedSourceTask,
274 doc="Task used for force photometer DiaObject locations in direct and "
275 "difference images.",
276 )
277 alertPackager = pexConfig.ConfigurableField(
278 target=PackageAlertsTask,
279 doc="Subtask for packaging Ap data into alerts.",
280 )
281 doPackageAlerts = pexConfig.Field(
282 dtype=bool,
283 default=False,
284 doc="Package Dia-data into serialized alerts for distribution and "
285 "write them to disk.",
286 )
287 doWriteAssociatedSources = pexConfig.Field(
288 dtype=bool,
289 default=True,
290 doc="Write out associated DiaSources, DiaForcedSources, and DiaObjects, "
291 "formatted following the Science Data Model.",
292 )
293 imagePixelMargin = pexConfig.RangeField(
294 dtype=int,
295 default=10,
296 min=0,
297 doc="Pad the image by this many pixels before removing off-image "
298 "diaObjects for association.",
299 )
300 idGenerator = DetectorVisitIdGeneratorConfig.make_field()
301 doConfigureApdb = pexConfig.Field( # TODO: remove on DM-43419
302 dtype=bool,
303 default=True,
304 doc="Use the deprecated ``apdb`` sub-config to set up the APDB, "
305 "instead of the new config (``apdb_config_url``). This field is "
306 "provided for backward-compatibility ONLY and will be removed "
307 "without notice after v28.",
308 )
310 def setDefaults(self):
311 self.apdb.dia_object_index = "baseline"
312 self.apdb.dia_object_columns = []
313 self.diaCalculation.plugins = ["ap_meanPosition",
314 "ap_nDiaSources",
315 "ap_meanFlux",
316 "ap_percentileFlux",
317 "ap_sigmaFlux",
318 "ap_chi2Flux",
319 "ap_madFlux",
320 "ap_skewFlux",
321 "ap_minMaxFlux",
322 "ap_maxSlopeFlux",
323 "ap_meanErrFlux",
324 "ap_linearFit",
325 "ap_stetsonJ",
326 "ap_meanTotFlux",
327 "ap_sigmaTotFlux"]
329 # TODO: remove on DM-43419
330 def validate(self):
331 # Sidestep Config.validate to avoid validating uninitialized fields we're not using.
332 skip = {"apdb_config_url"} if self.doConfigureApdb else {"apdb"}
333 for name, field in self._fields.items():
334 if name not in skip:
335 field.validate(self)
337 # It's possible to use apdb without setting it, bypassing the deprecation warning.
338 if self.doConfigureApdb:
339 warnings.warn("Config field DiaPipelineConfig.apdb is deprecated: "
340 # Workaround for DM-44051
341 "This field has been replaced by ``apdb_config_url``; set "
342 "``doConfigureApdb=False`` to use it. Will be removed after v28.",
343 FutureWarning)
346class DiaPipelineTask(pipeBase.PipelineTask):
347 """Task for loading, associating and storing Difference Image Analysis
348 (DIA) Objects and Sources.
349 """
350 ConfigClass = DiaPipelineConfig
351 _DefaultName = "diaPipe"
353 def __init__(self, initInputs=None, **kwargs):
354 super().__init__(**kwargs)
355 if self.config.doConfigureApdb:
356 self.apdb = self.config.apdb.apply()
357 else:
358 self.apdb = daxApdb.Apdb.from_uri(self.config.apdb_config_url)
359 self.makeSubtask("diaCatalogLoader")
360 self.makeSubtask("associator")
361 self.makeSubtask("diaCalculation")
362 if self.config.doRunForcedMeasurement:
363 self.makeSubtask("diaForcedSource")
364 if self.config.doPackageAlerts:
365 self.makeSubtask("alertPackager")
366 if self.config.doSolarSystemAssociation:
367 self.makeSubtask("solarSystemAssociator")
369 def runQuantum(self, butlerQC, inputRefs, outputRefs):
370 inputs = butlerQC.get(inputRefs)
371 inputs["idGenerator"] = self.config.idGenerator.apply(butlerQC.quantum.dataId)
372 inputs["band"] = butlerQC.quantum.dataId["band"]
373 if not self.config.doSolarSystemAssociation:
374 inputs["solarSystemObjectTable"] = None
376 outputs = self.run(**inputs)
378 butlerQC.put(outputs, outputRefs)
380 @timeMethod
381 def run(self,
382 diaSourceTable,
383 solarSystemObjectTable,
384 diffIm,
385 exposure,
386 template,
387 band,
388 idGenerator):
389 """Process DiaSources and DiaObjects.
391 Load previous DiaObjects and their DiaSource history. Calibrate the
392 values in the diaSourceCat. Associate new DiaSources with previous
393 DiaObjects. Run forced photometry at the updated DiaObject locations.
394 Store the results in the Alert Production Database (Apdb).
396 Parameters
397 ----------
398 diaSourceTable : `pandas.DataFrame`
399 Newly detected DiaSources.
400 diffIm : `lsst.afw.image.ExposureF`
401 Difference image exposure in which the sources in ``diaSourceCat``
402 were detected.
403 exposure : `lsst.afw.image.ExposureF`
404 Calibrated exposure differenced with a template to create
405 ``diffIm``.
406 template : `lsst.afw.image.ExposureF`
407 Template exposure used to create diffIm.
408 band : `str`
409 The band in which the new DiaSources were detected.
410 idGenerator : `lsst.meas.base.IdGenerator`
411 Object that generates source IDs and random number generator seeds.
413 Returns
414 -------
415 results : `lsst.pipe.base.Struct`
416 Results struct with components.
418 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
419 that this ccdVisit has completed successfully.
420 (`lsst.dax.apdb.ApdbConfig`)
421 - ``associatedDiaSources`` : Catalog of newly associated
422 DiaSources. (`pandas.DataFrame`)
423 """
424 # Load the DiaObjects and DiaSource history.
425 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb,
426 doLoadForcedSources=self.config.doLoadForcedSources)
427 if len(loaderResult.diaObjects) > 0:
428 diaObjects = self.purgeDiaObjects(diffIm.getBBox(), diffIm.getWcs(), loaderResult.diaObjects,
429 buffer=self.config.imagePixelMargin)
430 else:
431 diaObjects = loaderResult.diaObjects
432 # Associate new DiaSources with existing DiaObjects.
433 assocResults = self.associator.run(diaSourceTable, diaObjects)
435 if self.config.doSolarSystemAssociation:
436 ssoAssocResult = self.solarSystemAssociator.run(
437 assocResults.unAssocDiaSources,
438 solarSystemObjectTable,
439 diffIm)
440 createResults = self.createNewDiaObjects(
441 ssoAssocResult.unAssocDiaSources)
442 toAssociate = []
443 if len(assocResults.matchedDiaSources) > 0:
444 toAssociate.append(assocResults.matchedDiaSources)
445 if len(ssoAssocResult.ssoAssocDiaSources) > 0:
446 toAssociate.append(ssoAssocResult.ssoAssocDiaSources)
447 toAssociate.append(createResults.diaSources)
448 associatedDiaSources = pd.concat(toAssociate)
449 nTotalSsObjects = ssoAssocResult.nTotalSsObjects
450 nAssociatedSsObjects = ssoAssocResult.nAssociatedSsObjects
451 else:
452 createResults = self.createNewDiaObjects(
453 assocResults.unAssocDiaSources)
454 toAssociate = []
455 if len(assocResults.matchedDiaSources) > 0:
456 toAssociate.append(assocResults.matchedDiaSources)
457 toAssociate.append(createResults.diaSources)
458 associatedDiaSources = pd.concat(toAssociate)
459 nTotalSsObjects = 0
460 nAssociatedSsObjects = 0
462 # Create new DiaObjects from unassociated diaSources.
463 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
464 assocResults.nUnassociatedDiaObjects,
465 createResults.nNewDiaObjects,
466 nTotalSsObjects,
467 nAssociatedSsObjects)
468 # Index the DiaSource catalog for this visit after all associations
469 # have been made.
470 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][
471 associatedDiaSources["diaObjectId"] != 0].to_numpy()
472 associatedDiaSources.set_index(["diaObjectId",
473 "band",
474 "diaSourceId"],
475 drop=False,
476 inplace=True)
478 # Append new DiaObjects and DiaSources to their previous history.
479 diaObjects = pd.concat(
480 [diaObjects,
481 createResults.newDiaObjects.set_index("diaObjectId", drop=False)],
482 sort=True)
483 if self.testDataFrameIndex(diaObjects):
484 raise RuntimeError(
485 "Duplicate DiaObjects created after association. This is "
486 "likely due to re-running data with an already populated "
487 "Apdb. If this was not the case then there was an unexpected "
488 "failure in Association while matching and creating new "
489 "DiaObjects and should be reported. Exiting.")
491 if len(loaderResult.diaSources) > 0:
492 # We need to coerce the types of loaderResult.diaSources
493 # to be the same as associatedDiaSources, thanks to pandas
494 # datetime issues (DM-41100). And we may as well coerce
495 # all the columns to ensure consistency for future compatibility.
496 for name, dtype in associatedDiaSources.dtypes.items():
497 if name in loaderResult.diaSources.columns and loaderResult.diaSources[name].dtype != dtype:
498 self.log.debug(
499 "Coercing loaderResult.diaSources column %s from %s to %s",
500 name,
501 str(loaderResult.diaSources[name].dtype),
502 str(dtype),
503 )
504 loaderResult.diaSources[name] = loaderResult.diaSources[name].astype(dtype)
506 mergedDiaSourceHistory = pd.concat(
507 [loaderResult.diaSources, associatedDiaSources],
508 sort=True)
509 else:
510 mergedDiaSourceHistory = pd.concat([associatedDiaSources], sort=True)
512 # Test for DiaSource duplication first. If duplicates are found,
513 # this likely means this is duplicate data being processed and sent
514 # to the Apdb.
515 if self.testDataFrameIndex(mergedDiaSourceHistory):
516 raise RuntimeError(
517 "Duplicate DiaSources found after association and merging "
518 "with history. This is likely due to re-running data with an "
519 "already populated Apdb. If this was not the case then there "
520 "was an unexpected failure in Association while matching "
521 "sources to objects, and should be reported. Exiting.")
523 # Compute DiaObject Summary statistics from their full DiaSource
524 # history.
525 diaCalResult = self.diaCalculation.run(
526 diaObjects,
527 mergedDiaSourceHistory,
528 updatedDiaObjectIds,
529 [band])
530 # Test for duplication in the updated DiaObjects.
531 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
532 raise RuntimeError(
533 "Duplicate DiaObjects (loaded + updated) created after "
534 "DiaCalculation. This is unexpected behavior and should be "
535 "reported. Exiting.")
536 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
537 raise RuntimeError(
538 "Duplicate DiaObjects (updated) created after "
539 "DiaCalculation. This is unexpected behavior and should be "
540 "reported. Exiting.")
542 if self.config.doRunForcedMeasurement:
543 # Force photometer on the Difference and Calibrated exposures using
544 # the new and updated DiaObject locations.
545 diaForcedSources = self.diaForcedSource.run(
546 diaCalResult.diaObjectCat,
547 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
548 exposure,
549 diffIm,
550 idGenerator=idGenerator)
551 else:
552 # alertPackager needs correct columns
553 diaForcedSources = pd.DataFrame(columns=[
554 "diaForcedSourceId", "diaObjectID", "ccdVisitID", "psfFlux", "psfFluxErr",
555 "x", "y", "midpointMjdTai", "band",
556 ])
558 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the
559 # Apdb.
560 self.apdb.store(
561 DateTime.now().toAstropy(),
562 diaCalResult.updatedDiaObjects,
563 associatedDiaSources,
564 diaForcedSources)
566 if self.config.doPackageAlerts:
567 if len(loaderResult.diaForcedSources) > 1:
568 # We need to coerce the types of loaderResult.diaForcedSources
569 # to be the same as associatedDiaSources, thanks to pandas
570 # datetime issues (DM-41100). And we may as well coerce
571 # all the columns to ensure consistency for future compatibility.
572 for name, dtype in diaForcedSources.dtypes.items():
573 if name in loaderResult.diaForcedSources.columns and \
574 loaderResult.diaForcedSources[name].dtype != dtype:
575 self.log.debug(
576 "Coercing loaderResult.diaForcedSources column %s from %s to %s",
577 name,
578 str(loaderResult.diaForcedSources[name].dtype),
579 str(dtype),
580 )
581 loaderResult.diaForcedSources[name] = (
582 loaderResult.diaForcedSources[name].astype(dtype)
583 )
584 diaForcedSources = pd.concat(
585 [diaForcedSources, loaderResult.diaForcedSources],
586 sort=True)
587 if self.testDataFrameIndex(diaForcedSources):
588 self.log.warning(
589 "Duplicate DiaForcedSources created after merge with "
590 "history and new sources. This may cause downstream "
591 "problems. Dropping duplicates.")
592 # Drop duplicates via index and keep the first appearance.
593 # Reset due to the index shape being slight different than
594 # expected.
595 diaForcedSources = diaForcedSources.groupby(
596 diaForcedSources.index).first()
597 diaForcedSources.reset_index(drop=True, inplace=True)
598 diaForcedSources.set_index(
599 ["diaObjectId", "diaForcedSourceId"],
600 drop=False,
601 inplace=True)
602 self.alertPackager.run(associatedDiaSources,
603 diaCalResult.diaObjectCat,
604 loaderResult.diaSources,
605 diaForcedSources,
606 diffIm,
607 exposure,
608 template,
609 doRunForcedMeasurement=self.config.doRunForcedMeasurement,
610 )
612 # For historical reasons, apdbMarker is a Config even if it's not meant to be read.
613 # A default Config is the cheapest way to satisfy the storage class.
614 marker = self.config.apdb.value if self.config.doConfigureApdb else pexConfig.Config()
615 return pipeBase.Struct(apdbMarker=marker,
616 associatedDiaSources=associatedDiaSources,
617 diaForcedSources=diaForcedSources,
618 diaObjects=diaObjects,
619 )
621 def createNewDiaObjects(self, unAssocDiaSources):
622 """Loop through the set of DiaSources and create new DiaObjects
623 for unassociated DiaSources.
625 Parameters
626 ----------
627 unAssocDiaSources : `pandas.DataFrame`
628 Set of DiaSources to create new DiaObjects from.
630 Returns
631 -------
632 results : `lsst.pipe.base.Struct`
633 Results struct containing:
635 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
636 (`pandas.DataFrame`)
637 - ``newDiaObjects`` : Newly created DiaObjects from the
638 unassociated DiaSources. (`pandas.DataFrame`)
639 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`)
640 """
641 if len(unAssocDiaSources) == 0:
642 tmpObj = self._initialize_dia_object(0)
643 newDiaObjects = pd.DataFrame(data=[],
644 columns=tmpObj.keys())
645 else:
646 newDiaObjects = unAssocDiaSources["diaSourceId"].apply(
647 self._initialize_dia_object)
648 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"]
649 return pipeBase.Struct(diaSources=unAssocDiaSources,
650 newDiaObjects=newDiaObjects,
651 nNewDiaObjects=len(newDiaObjects))
653 def _initialize_dia_object(self, objId):
654 """Create a new DiaObject with values required to be initialized by the
655 Ppdb.
657 Parameters
658 ----------
659 objid : `int`
660 ``diaObjectId`` value for the of the new DiaObject.
662 Returns
663 -------
664 diaObject : `dict`
665 Newly created DiaObject with keys:
667 ``diaObjectId``
668 Unique DiaObjectId (`int`).
669 ``pmParallaxNdata``
670 Number of data points used for parallax calculation (`int`).
671 ``nearbyObj1``
672 Id of the a nearbyObject in the Object table (`int`).
673 ``nearbyObj2``
674 Id of the a nearbyObject in the Object table (`int`).
675 ``nearbyObj3``
676 Id of the a nearbyObject in the Object table (`int`).
677 ``?_psfFluxNdata``
678 Number of data points used to calculate point source flux
679 summary statistics in each bandpass (`int`).
680 """
681 new_dia_object = {"diaObjectId": objId,
682 "pmParallaxNdata": 0,
683 "nearbyObj1": 0,
684 "nearbyObj2": 0,
685 "nearbyObj3": 0}
686 for f in ["u", "g", "r", "i", "z", "y"]:
687 new_dia_object["%s_psfFluxNdata" % f] = 0
688 return pd.Series(data=new_dia_object)
690 def testDataFrameIndex(self, df):
691 """Test the sorted DataFrame index for duplicates.
693 Wrapped as a separate function to allow for mocking of the this task
694 in unittesting. Default of a mock return for this test is True.
696 Parameters
697 ----------
698 df : `pandas.DataFrame`
699 DataFrame to text.
701 Returns
702 -------
703 `bool`
704 True if DataFrame contains duplicate rows.
705 """
706 return df.index.has_duplicates
708 def _add_association_meta_data(self,
709 nUpdatedDiaObjects,
710 nUnassociatedDiaObjects,
711 nNewDiaObjects,
712 nTotalSsObjects,
713 nAssociatedSsObjects):
714 """Store summaries of the association step in the task metadata.
716 Parameters
717 ----------
718 nUpdatedDiaObjects : `int`
719 Number of previous DiaObjects associated and updated in this
720 ccdVisit.
721 nUnassociatedDiaObjects : `int`
722 Number of previous DiaObjects that were not associated or updated
723 in this ccdVisit.
724 nNewDiaObjects : `int`
725 Number of newly created DiaObjects for this ccdVisit.
726 nTotalSsObjects : `int`
727 Number of SolarSystemObjects within the observable detector
728 area.
729 nAssociatedSsObjects : `int`
730 Number of successfully associated SolarSystemObjects.
731 """
732 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
733 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
734 self.metadata.add('numNewDiaObjects', nNewDiaObjects)
735 self.metadata.add('numTotalSolarSystemObjects', nTotalSsObjects)
736 self.metadata.add('numAssociatedSsObjects', nAssociatedSsObjects)
738 def purgeDiaObjects(self, bbox, wcs, diaObjCat, buffer=0):
739 """Drop diaObjects that are outside the exposure bounding box.
741 Parameters
742 ----------
743 bbox : `lsst.geom.Box2I`
744 Bounding box of the exposure.
745 wcs : `lsst.afw.geom.SkyWcs`
746 Coordinate system definition (wcs) for the exposure.
747 diaObjCat : `pandas.DataFrame`
748 DiaObjects loaded from the Apdb.
749 buffer : `int`, optional
750 Width, in pixels, to pad the exposure bounding box.
752 Returns
753 -------
754 diaObjCat : `pandas.DataFrame`
755 DiaObjects loaded from the Apdb, restricted to the exposure
756 bounding box.
757 """
758 try:
759 bbox.grow(buffer)
760 raVals = diaObjCat.ra.to_numpy()
761 decVals = diaObjCat.dec.to_numpy()
762 xVals, yVals = wcs.skyToPixelArray(raVals, decVals, degrees=True)
763 selector = bbox.contains(xVals, yVals)
764 nPurged = np.sum(~selector)
765 if nPurged > 0:
766 diaObjCat = diaObjCat[selector].copy()
767 self.log.info(f"Dropped {nPurged} diaObjects that were outside the bbox "
768 f"leaving {len(diaObjCat)} in the catalog")
769 except Exception as e:
770 self.log.warning("Error attempting to check diaObject history: %s", e)
771 return diaObjCat