lsst.pipe.tasks g38dd528b55+8e7d1b62c5
postprocess.py
Go to the documentation of this file.
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22import functools
23import pandas as pd
24from collections import defaultdict
25import numpy as np
26import numbers
27import os
28
29import lsst.geom
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
32import lsst.daf.base as dafBase
33from lsst.pipe.base import connectionTypes
34import lsst.afw.table as afwTable
35from lsst.meas.base import SingleFrameMeasurementTask
36from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
37from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
38from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
39
40from .parquetTable import ParquetTable
41from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
42from .functors import CompositeFunctor, Column
43
44
45def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
46 """Flattens a dataframe with multilevel column index
47 """
48 newDf = pd.DataFrame()
49 # band is the level 0 index
50 dfBands = df.columns.unique(level=0).values
51 for band in dfBands:
52 subdf = df[band]
53 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
54 newColumns = {c: columnFormat.format(band, c)
55 for c in subdf.columns if c not in noDupCols}
56 cols = list(newColumns.keys())
57 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
58
59 # Band must be present in the input and output or else column is all NaN:
60 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
61 # Get the unexploded columns from any present band's partition
62 noDupDf = df[presentBands[0]][noDupCols]
63 newDf = pd.concat([noDupDf, newDf], axis=1)
64 return newDf
65
66
67class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
68 defaultTemplates={"coaddName": "deep"},
69 dimensions=("tract", "patch", "skymap")):
70 inputCatalogMeas = connectionTypes.Input(
71 doc="Catalog of source measurements on the deepCoadd.",
72 dimensions=("tract", "patch", "band", "skymap"),
73 storageClass="SourceCatalog",
74 name="{coaddName}Coadd_meas",
75 multiple=True
76 )
77 inputCatalogForcedSrc = connectionTypes.Input(
78 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
79 dimensions=("tract", "patch", "band", "skymap"),
80 storageClass="SourceCatalog",
81 name="{coaddName}Coadd_forced_src",
82 multiple=True
83 )
84 inputCatalogRef = connectionTypes.Input(
85 doc="Catalog marking the primary detection (which band provides a good shape and position)"
86 "for each detection in deepCoadd_mergeDet.",
87 dimensions=("tract", "patch", "skymap"),
88 storageClass="SourceCatalog",
89 name="{coaddName}Coadd_ref"
90 )
91 outputCatalog = connectionTypes.Output(
92 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
93 "stored as a DataFrame with a multi-level column index per-patch.",
94 dimensions=("tract", "patch", "skymap"),
95 storageClass="DataFrame",
96 name="{coaddName}Coadd_obj"
97 )
98
99
100class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
101 pipelineConnections=WriteObjectTableConnections):
102 engine = pexConfig.Field(
103 dtype=str,
104 default="pyarrow",
105 doc="Parquet engine for writing (pyarrow or fastparquet)"
106 )
107 coaddName = pexConfig.Field(
108 dtype=str,
109 default="deep",
110 doc="Name of coadd"
111 )
112
113
114class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
115 """Write filter-merged source tables to parquet
116 """
117 _DefaultName = "writeObjectTable"
118 ConfigClass = WriteObjectTableConfig
119 RunnerClass = MergeSourcesRunner
120
121 # Names of table datasets to be merged
122 inputDatasets = ('forced_src', 'meas', 'ref')
123
124 # Tag of output dataset written by `MergeSourcesTask.write`
125 outputDataset = 'obj'
126
127 def __init__(self, butler=None, schema=None, **kwargs):
128 # It is a shame that this class can't use the default init for CmdLineTask
129 # But to do so would require its own special task runner, which is many
130 # more lines of specialization, so this is how it is for now
131 super().__init__(**kwargs)
132
133 def runDataRef(self, patchRefList):
134 """!
135 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
136 subclasses that inherit from MergeSourcesTask.
137 @param[in] patchRefList list of data references for each filter
138 """
139 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
140 dataId = patchRefList[0].dataId
141 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
142 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
143
144 def runQuantum(self, butlerQC, inputRefs, outputRefs):
145 inputs = butlerQC.get(inputRefs)
146
147 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
148 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
149 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
150 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
151
152 catalogs = {}
153 for band in measDict.keys():
154 catalogs[band] = {'meas': measDict[band]['meas'],
155 'forced_src': forcedSourceDict[band]['forced_src'],
156 'ref': inputs['inputCatalogRef']}
157 dataId = butlerQC.quantum.dataId
158 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
159 outputs = pipeBase.Struct(outputCatalog=df)
160 butlerQC.put(outputs, outputRefs)
161
162 @classmethod
163 def _makeArgumentParser(cls):
164 """Create a suitable ArgumentParser.
165
166 We will use the ArgumentParser to get a list of data
167 references for patches; the RunnerClass will sort them into lists
168 of data references for the same patch.
169
170 References first of self.inputDatasets, rather than
171 self.inputDataset
172 """
173 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
174
175 def readCatalog(self, patchRef):
176 """Read input catalogs
177
178 Read all the input datasets given by the 'inputDatasets'
179 attribute.
180
181 Parameters
182 ----------
183 patchRef : `lsst.daf.persistence.ButlerDataRef`
184 Data reference for patch
185
186 Returns
187 -------
188 Tuple consisting of band name and a dict of catalogs, keyed by
189 dataset name
190 """
191 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
192 catalogDict = {}
193 for dataset in self.inputDatasets:
194 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
195 self.log.info("Read %d sources from %s for band %s: %s",
196 len(catalog), dataset, band, patchRef.dataId)
197 catalogDict[dataset] = catalog
198 return band, catalogDict
199
200 def run(self, catalogs, tract, patch):
201 """Merge multiple catalogs.
202
203 Parameters
204 ----------
205 catalogs : `dict`
206 Mapping from filter names to dict of catalogs.
207 tract : int
208 tractId to use for the tractId column
209 patch : str
210 patchId to use for the patchId column
211
212 Returns
213 -------
214 catalog : `pandas.DataFrame`
215 Merged dataframe
216 """
217
218 dfs = []
219 for filt, tableDict in catalogs.items():
220 for dataset, table in tableDict.items():
221 # Convert afwTable to pandas DataFrame
222 df = table.asAstropy().to_pandas().set_index('id', drop=True)
223
224 # Sort columns by name, to ensure matching schema among patches
225 df = df.reindex(sorted(df.columns), axis=1)
226 df['tractId'] = tract
227 df['patchId'] = patch
228
229 # Make columns a 3-level MultiIndex
230 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
231 names=('dataset', 'band', 'column'))
232 dfs.append(df)
233
234 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
235 return catalog
236
237 def write(self, patchRef, catalog):
238 """Write the output.
239
240 Parameters
241 ----------
242 catalog : `ParquetTable`
243 Catalog to write
244 patchRef : `lsst.daf.persistence.ButlerDataRef`
245 Data reference for patch
246 """
247 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
248 # since the filter isn't actually part of the data ID for the dataset we're saving,
249 # it's confusing to see it in the log message, even if the butler simply ignores it.
250 mergeDataId = patchRef.dataId.copy()
251 del mergeDataId["filter"]
252 self.log.info("Wrote merged catalog: %s", mergeDataId)
253
254 def writeMetadata(self, dataRefList):
255 """No metadata to write, and not sure how to write it for a list of dataRefs.
256 """
257 pass
258
259
260class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
261 defaultTemplates={"catalogType": ""},
262 dimensions=("instrument", "visit", "detector")):
263
264 catalog = connectionTypes.Input(
265 doc="Input full-depth catalog of sources produced by CalibrateTask",
266 name="{catalogType}src",
267 storageClass="SourceCatalog",
268 dimensions=("instrument", "visit", "detector")
269 )
270 outputCatalog = connectionTypes.Output(
271 doc="Catalog of sources, `src` in Parquet format. The 'id' column is "
272 "replaced with an index; all other columns are unchanged.",
273 name="{catalogType}source",
274 storageClass="DataFrame",
275 dimensions=("instrument", "visit", "detector")
276 )
277
278
279class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
280 pipelineConnections=WriteSourceTableConnections):
281 doApplyExternalPhotoCalib = pexConfig.Field(
282 dtype=bool,
283 default=False,
284 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
285 "generating Source Tables from older src tables which do not already have local calib columns")
286 )
287 doApplyExternalSkyWcs = pexConfig.Field(
288 dtype=bool,
289 default=False,
290 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
291 "generating Source Tables from older src tables which do not already have local calib columns")
292 )
293
294
295class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
296 """Write source table to parquet
297 """
298 _DefaultName = "writeSourceTable"
299 ConfigClass = WriteSourceTableConfig
300
301 def runDataRef(self, dataRef):
302 src = dataRef.get('src')
303 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
304 src = self.addCalibColumns(src, dataRef)
305
306 ccdVisitId = dataRef.get('ccdExposureId')
307 result = self.run(src, ccdVisitId=ccdVisitId)
308 dataRef.put(result.table, 'source')
309
310 def runQuantum(self, butlerQC, inputRefs, outputRefs):
311 inputs = butlerQC.get(inputRefs)
312 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
313 result = self.run(**inputs).table
314 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
315 butlerQC.put(outputs, outputRefs)
316
317 def run(self, catalog, ccdVisitId=None):
318 """Convert `src` catalog to parquet
319
320 Parameters
321 ----------
322 catalog: `afwTable.SourceCatalog`
323 catalog to be converted
324 ccdVisitId: `int`
325 ccdVisitId to be added as a column
326
327 Returns
328 -------
329 result : `lsst.pipe.base.Struct`
330 ``table``
331 `ParquetTable` version of the input catalog
332 """
333 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
334 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
335 df['ccdVisitId'] = ccdVisitId
336 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
337
338 def addCalibColumns(self, catalog, dataRef):
339 """Add columns with local calibration evaluated at each centroid
340
341 for backwards compatibility with old repos.
342 This exists for the purpose of converting old src catalogs
343 (which don't have the expected local calib columns) to Source Tables.
344
345 Parameters
346 ----------
347 catalog: `afwTable.SourceCatalog`
348 catalog to which calib columns will be added
349 dataRef: `lsst.daf.persistence.ButlerDataRef
350 for fetching the calibs from disk.
351
352 Returns
353 -------
354 newCat: `afwTable.SourceCatalog`
355 Source Catalog with requested local calib columns
356 """
357 mapper = afwTable.SchemaMapper(catalog.schema)
358 measureConfig = SingleFrameMeasurementTask.ConfigClass()
359 measureConfig.doReplaceWithNoise = False
360
361 # Just need the WCS or the PhotoCalib attached to an exposue
362 exposure = dataRef.get('calexp_sub',
364
365 mapper = afwTable.SchemaMapper(catalog.schema)
366 mapper.addMinimalSchema(catalog.schema, True)
367 schema = mapper.getOutputSchema()
368
369 exposureIdInfo = dataRef.get("expIdInfo")
370 measureConfig.plugins.names = []
371 if self.config.doApplyExternalSkyWcs:
372 plugin = 'base_LocalWcs'
373 if plugin in schema:
374 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
375 else:
376 measureConfig.plugins.names.add(plugin)
377
378 if self.config.doApplyExternalPhotoCalib:
379 plugin = 'base_LocalPhotoCalib'
380 if plugin in schema:
381 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
382 else:
383 measureConfig.plugins.names.add(plugin)
384
385 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
386 newCat = afwTable.SourceCatalog(schema)
387 newCat.extend(catalog, mapper=mapper)
388 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
389 return newCat
390
391 def writeMetadata(self, dataRef):
392 """No metadata to write.
393 """
394 pass
395
396 @classmethod
397 def _makeArgumentParser(cls):
398 parser = ArgumentParser(name=cls._DefaultName)
399 parser.add_id_argument("--id", 'src',
400 help="data ID, e.g. --id visit=12345 ccd=0")
401 return parser
402
403
404class PostprocessAnalysis(object):
405 """Calculate columns from ParquetTable
406
407 This object manages and organizes an arbitrary set of computations
408 on a catalog. The catalog is defined by a
409 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
410 `deepCoadd_obj` dataset, and the computations are defined by a collection
411 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
412 a `CompositeFunctor`).
413
414 After the object is initialized, accessing the `.df` attribute (which
415 holds the `pandas.DataFrame` containing the results of the calculations) triggers
416 computation of said dataframe.
417
418 One of the conveniences of using this object is the ability to define a desired common
419 filter for all functors. This enables the same functor collection to be passed to
420 several different `PostprocessAnalysis` objects without having to change the original
421 functor collection, since the `filt` keyword argument of this object triggers an
422 overwrite of the `filt` property for all functors in the collection.
423
424 This object also allows a list of refFlags to be passed, and defines a set of default
425 refFlags that are always included even if not requested.
426
427 If a list of `ParquetTable` object is passed, rather than a single one, then the
428 calculations will be mapped over all the input catalogs. In principle, it should
429 be straightforward to parallelize this activity, but initial tests have failed
430 (see TODO in code comments).
431
432 Parameters
433 ----------
434 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
435 Source catalog(s) for computation
436
437 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
438 Computations to do (functors that act on `parq`).
439 If a dict, the output
440 DataFrame will have columns keyed accordingly.
441 If a list, the column keys will come from the
442 `.shortname` attribute of each functor.
443
444 filt : `str` (optional)
445 Filter in which to calculate. If provided,
446 this will overwrite any existing `.filt` attribute
447 of the provided functors.
448
449 flags : `list` (optional)
450 List of flags (per-band) to include in output table.
451 Taken from the `meas` dataset if applied to a multilevel Object Table.
452
453 refFlags : `list` (optional)
454 List of refFlags (only reference band) to include in output table.
455
456 forcedFlags : `list` (optional)
457 List of flags (per-band) to include in output table.
458 Taken from the ``forced_src`` dataset if applied to a
459 multilevel Object Table. Intended for flags from measurement plugins
460 only run during multi-band forced-photometry.
461 """
462 _defaultRefFlags = []
463 _defaultFuncs = ()
464
465 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None, forcedFlags=None):
466 self.parq = parq
467 self.functors = functors
468
469 self.filt = filt
470 self.flags = list(flags) if flags is not None else []
471 self.forcedFlags = list(forcedFlags) if forcedFlags is not None else []
472 self.refFlags = list(self._defaultRefFlags)
473 if refFlags is not None:
474 self.refFlags += list(refFlags)
475
476 self._df = None
477
478 @property
479 def defaultFuncs(self):
480 funcs = dict(self._defaultFuncs)
481 return funcs
482
483 @property
484 def func(self):
485 additionalFuncs = self.defaultFuncs
486 additionalFuncs.update({flag: Column(flag, dataset='forced_src') for flag in self.forcedFlags})
487 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
488 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
489
490 if isinstance(self.functors, CompositeFunctor):
491 func = self.functors
492 else:
493 func = CompositeFunctor(self.functors)
494
495 func.funcDict.update(additionalFuncs)
496 func.filt = self.filt
497
498 return func
499
500 @property
501 def noDupCols(self):
502 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
503
504 @property
505 def df(self):
506 if self._df is None:
507 self.compute()
508 return self._df
509
510 def compute(self, dropna=False, pool=None):
511 # map over multiple parquet tables
512 if type(self.parq) in (list, tuple):
513 if pool is None:
514 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
515 else:
516 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
517 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
518 self._df = pd.concat(dflist)
519 else:
520 self._df = self.func(self.parq, dropna=dropna)
521
522 return self._df
523
524
525class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
526 dimensions=()):
527 """Expected Connections for subclasses of TransformCatalogBaseTask.
528
529 Must be subclassed.
530 """
531 inputCatalog = connectionTypes.Input(
532 name="",
533 storageClass="DataFrame",
534 )
535 outputCatalog = connectionTypes.Output(
536 name="",
537 storageClass="DataFrame",
538 )
539
540
541class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
542 pipelineConnections=TransformCatalogBaseConnections):
543 functorFile = pexConfig.Field(
544 dtype=str,
545 doc="Path to YAML file specifying Science Data Model functors to use "
546 "when copying columns and computing calibrated values.",
547 default=None,
548 optional=True
549 )
550 primaryKey = pexConfig.Field(
551 dtype=str,
552 doc="Name of column to be set as the DataFrame index. If None, the index"
553 "will be named `id`",
554 default=None,
555 optional=True
556 )
557
558
559class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
560 """Base class for transforming/standardizing a catalog
561
562 by applying functors that convert units and apply calibrations.
563 The purpose of this task is to perform a set of computations on
564 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
565 results to a new dataset (which needs to be declared in an `outputDataset`
566 attribute).
567
568 The calculations to be performed are defined in a YAML file that specifies
569 a set of functors to be computed, provided as
570 a `--functorFile` config parameter. An example of such a YAML file
571 is the following:
572
573 funcs:
574 psfMag:
575 functor: Mag
576 args:
577 - base_PsfFlux
578 filt: HSC-G
579 dataset: meas
580 cmodel_magDiff:
581 functor: MagDiff
582 args:
583 - modelfit_CModel
584 - base_PsfFlux
585 filt: HSC-G
586 gauss_magDiff:
587 functor: MagDiff
588 args:
589 - base_GaussianFlux
590 - base_PsfFlux
591 filt: HSC-G
592 count:
593 functor: Column
594 args:
595 - base_InputCount_value
596 filt: HSC-G
597 deconvolved_moments:
598 functor: DeconvolvedMoments
599 filt: HSC-G
600 dataset: forced_src
601 refFlags:
602 - calib_psfUsed
603 - merge_measurement_i
604 - merge_measurement_r
605 - merge_measurement_z
606 - merge_measurement_y
607 - merge_measurement_g
608 - base_PixelFlags_flag_inexact_psfCenter
609 - detect_isPrimary
610
611 The names for each entry under "func" will become the names of columns in the
612 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
613 Positional arguments to be passed to each functor are in the `args` list,
614 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
615 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
616
617 The "flags" entry is the default shortcut for `Column` functors.
618 All columns listed under "flags" will be copied to the output table
619 untransformed. They can be of any datatype.
620 In the special case of transforming a multi-level oject table with
621 band and dataset indices (deepCoadd_obj), these will be taked from the
622 `meas` dataset and exploded out per band.
623
624 There are two special shortcuts that only apply when transforming
625 multi-level Object (deepCoadd_obj) tables:
626 - The "refFlags" entry is shortcut for `Column` functor
627 taken from the `'ref'` dataset if transforming an ObjectTable.
628 - The "forcedFlags" entry is shortcut for `Column` functors.
629 taken from the ``forced_src`` dataset if transforming an ObjectTable.
630 These are expanded out per band.
631
632
633 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
634 to organize and excecute the calculations.
635
636 """
637 @property
638 def _DefaultName(self):
639 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
640
641 @property
642 def outputDataset(self):
643 raise NotImplementedError('Subclass must define "outputDataset" attribute')
644
645 @property
646 def inputDataset(self):
647 raise NotImplementedError('Subclass must define "inputDataset" attribute')
648
649 @property
650 def ConfigClass(self):
651 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
652
653 def __init__(self, *args, **kwargs):
654 super().__init__(*args, **kwargs)
655 if self.config.functorFile:
656 self.log.info('Loading tranform functor definitions from %s',
657 self.config.functorFile)
658 self.funcsfuncs = CompositeFunctor.from_file(self.config.functorFile)
659 self.funcsfuncs.update(dict(PostprocessAnalysis._defaultFuncs))
660 else:
661 self.funcsfuncs = None
662
663 def runQuantum(self, butlerQC, inputRefs, outputRefs):
664 inputs = butlerQC.get(inputRefs)
665 if self.funcsfuncs is None:
666 raise ValueError("config.functorFile is None. "
667 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
668 result = self.runrun(parq=inputs['inputCatalog'], funcs=self.funcsfuncs,
669 dataId=outputRefs.outputCatalog.dataId.full)
670 outputs = pipeBase.Struct(outputCatalog=result)
671 butlerQC.put(outputs, outputRefs)
672
673 def runDataRef(self, dataRef):
674 parq = dataRef.get()
675 if self.funcsfuncs is None:
676 raise ValueError("config.functorFile is None. "
677 "Must be a valid path to yaml in order to run as a CommandlineTask.")
678 df = self.runrun(parq, funcs=self.funcsfuncs, dataId=dataRef.dataId)
679 self.writewrite(df, dataRef)
680 return df
681
682 def run(self, parq, funcs=None, dataId=None, band=None):
683 """Do postprocessing calculations
684
685 Takes a `ParquetTable` object and dataId,
686 returns a dataframe with results of postprocessing calculations.
687
688 Parameters
689 ----------
691 ParquetTable from which calculations are done.
692 funcs : `lsst.pipe.tasks.functors.Functors`
693 Functors to apply to the table's columns
694 dataId : dict, optional
695 Used to add a `patchId` column to the output dataframe.
696 band : `str`, optional
697 Filter band that is being processed.
698
699 Returns
700 ------
701 `pandas.DataFrame`
702
703 """
704 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
705
706 df = self.transformtransform(band, parq, funcs, dataId).df
707 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
708 return df
709
710 def getFunctors(self):
711 return self.funcsfuncs
712
713 def getAnalysis(self, parq, funcs=None, band=None):
714 if funcs is None:
715 funcs = self.funcsfuncs
716 analysis = PostprocessAnalysis(parq, funcs, filt=band)
717 return analysis
718
719 def transform(self, band, parq, funcs, dataId):
720 analysis = self.getAnalysisgetAnalysis(parq, funcs=funcs, band=band)
721 df = analysis.df
722 if dataId is not None:
723 for key, value in dataId.items():
724 df[str(key)] = value
725
726 if self.config.primaryKey:
727 if df.index.name != self.config.primaryKey and self.config.primaryKey in df:
728 df.reset_index(inplace=True, drop=True)
729 df.set_index(self.config.primaryKey, inplace=True)
730
731 return pipeBase.Struct(
732 df=df,
733 analysis=analysis
734 )
735
736 def write(self, df, parqRef):
737 parqRef.put(ParquetTable(dataFrame=df), self.outputDatasetoutputDataset)
738
739 def writeMetadata(self, dataRef):
740 """No metadata to write.
741 """
742 pass
743
744
745class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
746 defaultTemplates={"coaddName": "deep"},
747 dimensions=("tract", "patch", "skymap")):
748 inputCatalog = connectionTypes.Input(
749 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
750 "stored as a DataFrame with a multi-level column index per-patch.",
751 dimensions=("tract", "patch", "skymap"),
752 storageClass="DataFrame",
753 name="{coaddName}Coadd_obj",
754 deferLoad=True,
755 )
756 outputCatalog = connectionTypes.Output(
757 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
758 "data model.",
759 dimensions=("tract", "patch", "skymap"),
760 storageClass="DataFrame",
761 name="objectTable"
762 )
763
764
765class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
766 pipelineConnections=TransformObjectCatalogConnections):
767 coaddName = pexConfig.Field(
768 dtype=str,
769 default="deep",
770 doc="Name of coadd"
771 )
772 # TODO: remove in DM-27177
773 filterMap = pexConfig.DictField(
774 keytype=str,
775 itemtype=str,
776 default={},
777 doc=("Dictionary mapping full filter name to short one for column name munging."
778 "These filters determine the output columns no matter what filters the "
779 "input data actually contain."),
780 deprecated=("Coadds are now identified by the band, so this transform is unused."
781 "Will be removed after v22.")
782 )
783 outputBands = pexConfig.ListField(
784 dtype=str,
785 default=None,
786 optional=True,
787 doc=("These bands and only these bands will appear in the output,"
788 " NaN-filled if the input does not include them."
789 " If None, then use all bands found in the input.")
790 )
791 camelCase = pexConfig.Field(
792 dtype=bool,
793 default=False,
794 doc=("Write per-band columns names with camelCase, else underscore "
795 "For example: gPsFlux instead of g_PsFlux.")
796 )
797 multilevelOutput = pexConfig.Field(
798 dtype=bool,
799 default=False,
800 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
801 "and name-munged (False).")
802 )
803 goodFlags = pexConfig.ListField(
804 dtype=str,
805 default=[],
806 doc=("List of 'good' flags that should be set False when populating empty tables. "
807 "All other flags are considered to be 'bad' flags and will be set to True.")
808 )
809 floatFillValue = pexConfig.Field(
810 dtype=float,
811 default=np.nan,
812 doc="Fill value for float fields when populating empty tables."
813 )
814 integerFillValue = pexConfig.Field(
815 dtype=int,
816 default=-1,
817 doc="Fill value for integer fields when populating empty tables."
818 )
819
820 def setDefaults(self):
821 super().setDefaults()
822 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Object.yaml')
823 self.primaryKey = 'objectId'
824 self.goodFlags = ['calib_astrometry_used',
825 'calib_photometry_reserved',
826 'calib_photometry_used',
827 'calib_psf_candidate',
828 'calib_psf_reserved',
829 'calib_psf_used']
830
831
832class TransformObjectCatalogTask(TransformCatalogBaseTask):
833 """Produce a flattened Object Table to match the format specified in
834 sdm_schemas.
835
836 Do the same set of postprocessing calculations on all bands
837
838 This is identical to `TransformCatalogBaseTask`, except for that it does the
839 specified functor calculations for all filters present in the
840 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
841 by the YAML file will be superceded.
842 """
843 _DefaultName = "transformObjectCatalog"
844 ConfigClass = TransformObjectCatalogConfig
845
846 # Used by Gen 2 runDataRef only:
847 inputDataset = 'deepCoadd_obj'
848 outputDataset = 'objectTable'
849
850 @classmethod
851 def _makeArgumentParser(cls):
852 parser = ArgumentParser(name=cls._DefaultName)
853 parser.add_id_argument("--id", cls.inputDataset,
854 ContainerClass=CoaddDataIdContainer,
855 help="data ID, e.g. --id tract=12345 patch=1,2")
856 return parser
857
858 def run(self, parq, funcs=None, dataId=None, band=None):
859 # NOTE: band kwarg is ignored here.
860 dfDict = {}
861 analysisDict = {}
862 templateDf = pd.DataFrame()
863
864 if isinstance(parq, DeferredDatasetHandle):
865 columns = parq.get(component='columns')
866 inputBands = columns.unique(level=1).values
867 else:
868 inputBands = parq.columnLevelNames['band']
869
870 outputBands = self.config.outputBands if self.config.outputBands else inputBands
871
872 # Perform transform for data of filters that exist in parq.
873 for inputBand in inputBands:
874 if inputBand not in outputBands:
875 self.log.info("Ignoring %s band data in the input", inputBand)
876 continue
877 self.log.info("Transforming the catalog of band %s", inputBand)
878 result = self.transform(inputBand, parq, funcs, dataId)
879 dfDict[inputBand] = result.df
880 analysisDict[inputBand] = result.analysis
881 if templateDf.empty:
882 templateDf = result.df
883
884 # Put filler values in columns of other wanted bands
885 for filt in outputBands:
886 if filt not in dfDict:
887 self.log.info("Adding empty columns for band %s", filt)
888 dfTemp = templateDf.copy()
889 for col in dfTemp.columns:
890 testValue = dfTemp[col].values[0]
891 if isinstance(testValue, (np.bool_, pd.BooleanDtype)):
892 # Boolean flag type, check if it is a "good" flag
893 if col in self.config.goodFlags:
894 fillValue = False
895 else:
896 fillValue = True
897 elif isinstance(testValue, numbers.Integral):
898 # Checking numbers.Integral catches all flavors
899 # of python, numpy, pandas, etc. integers.
900 # We must ensure this is not an unsigned integer.
901 if isinstance(testValue, np.unsignedinteger):
902 raise ValueError("Parquet tables may not have unsigned integer columns.")
903 else:
904 fillValue = self.config.integerFillValue
905 else:
906 fillValue = self.config.floatFillValue
907 dfTemp[col].values[:] = fillValue
908 dfDict[filt] = dfTemp
909
910 # This makes a multilevel column index, with band as first level
911 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
912
913 if not self.config.multilevelOutput:
914 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
915 if self.config.primaryKey in noDupCols:
916 noDupCols.remove(self.config.primaryKey)
917 if dataId is not None:
918 noDupCols += list(dataId.keys())
919 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
920 inputBands=inputBands)
921
922 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
923
924 return df
925
926
927class TractObjectDataIdContainer(CoaddDataIdContainer):
928
929 def makeDataRefList(self, namespace):
930 """Make self.refList from self.idList
931
932 Generate a list of data references given tract and/or patch.
933 This was adapted from `TractQADataIdContainer`, which was
934 `TractDataIdContainer` modifie to not require "filter".
935 Only existing dataRefs are returned.
936 """
937 def getPatchRefList(tract):
938 return [namespace.butler.dataRef(datasetType=self.datasetType,
939 tract=tract.getId(),
940 patch="%d,%d" % patch.getIndex()) for patch in tract]
941
942 tractRefs = defaultdict(list) # Data references for each tract
943 for dataId in self.idList:
944 skymap = self.getSkymap(namespace)
945
946 if "tract" in dataId:
947 tractId = dataId["tract"]
948 if "patch" in dataId:
949 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
950 tract=tractId,
951 patch=dataId['patch']))
952 else:
953 tractRefs[tractId] += getPatchRefList(skymap[tractId])
954 else:
955 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
956 for tract in skymap)
957 outputRefList = []
958 for tractRefList in tractRefs.values():
959 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
960 outputRefList.append(existingRefs)
961
962 self.refList = outputRefList
963
964
965class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
966 dimensions=("tract", "skymap")):
967 inputCatalogs = connectionTypes.Input(
968 doc="Per-Patch objectTables conforming to the standard data model.",
969 name="objectTable",
970 storageClass="DataFrame",
971 dimensions=("tract", "patch", "skymap"),
972 multiple=True,
973 )
974 outputCatalog = connectionTypes.Output(
975 doc="Pre-tract horizontal concatenation of the input objectTables",
976 name="objectTable_tract",
977 storageClass="DataFrame",
978 dimensions=("tract", "skymap"),
979 )
980
981
982class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
983 pipelineConnections=ConsolidateObjectTableConnections):
984 coaddName = pexConfig.Field(
985 dtype=str,
986 default="deep",
987 doc="Name of coadd"
988 )
989
990
991class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
992 """Write patch-merged source tables to a tract-level parquet file
993
994 Concatenates `objectTable` list into a per-visit `objectTable_tract`
995 """
996 _DefaultName = "consolidateObjectTable"
997 ConfigClass = ConsolidateObjectTableConfig
998
999 inputDataset = 'objectTable'
1000 outputDataset = 'objectTable_tract'
1001
1002 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1003 inputs = butlerQC.get(inputRefs)
1004 self.log.info("Concatenating %s per-patch Object Tables",
1005 len(inputs['inputCatalogs']))
1006 df = pd.concat(inputs['inputCatalogs'])
1007 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1008
1009 @classmethod
1010 def _makeArgumentParser(cls):
1011 parser = ArgumentParser(name=cls._DefaultName)
1012
1013 parser.add_id_argument("--id", cls.inputDataset,
1014 help="data ID, e.g. --id tract=12345",
1015 ContainerClass=TractObjectDataIdContainer)
1016 return parser
1017
1018 def runDataRef(self, patchRefList):
1019 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
1020 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1021
1022 def writeMetadata(self, dataRef):
1023 """No metadata to write.
1024 """
1025 pass
1026
1027
1028class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
1029 defaultTemplates={"catalogType": ""},
1030 dimensions=("instrument", "visit", "detector")):
1031
1032 inputCatalog = connectionTypes.Input(
1033 doc="Wide input catalog of sources produced by WriteSourceTableTask",
1034 name="{catalogType}source",
1035 storageClass="DataFrame",
1036 dimensions=("instrument", "visit", "detector"),
1037 deferLoad=True
1038 )
1039 outputCatalog = connectionTypes.Output(
1040 doc="Narrower, per-detector Source Table transformed and converted per a "
1041 "specified set of functors",
1042 name="{catalogType}sourceTable",
1043 storageClass="DataFrame",
1044 dimensions=("instrument", "visit", "detector")
1045 )
1046
1047
1048class TransformSourceTableConfig(TransformCatalogBaseConfig,
1049 pipelineConnections=TransformSourceTableConnections):
1050
1051 def setDefaults(self):
1052 super().setDefaults()
1053 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'Source.yaml')
1054 self.primaryKey = 'sourceId'
1055
1056
1057class TransformSourceTableTask(TransformCatalogBaseTask):
1058 """Transform/standardize a source catalog
1059 """
1060 _DefaultName = "transformSourceTable"
1061 ConfigClass = TransformSourceTableConfig
1062
1063 inputDataset = 'source'
1064 outputDataset = 'sourceTable'
1065
1066 @classmethod
1067 def _makeArgumentParser(cls):
1068 parser = ArgumentParser(name=cls._DefaultName)
1069 parser.add_id_argument("--id", datasetType=cls.inputDataset,
1070 level="sensor",
1071 help="data ID, e.g. --id visit=12345 ccd=0")
1072 return parser
1073
1074 def runDataRef(self, dataRef):
1075 """Override to specify band label to run()."""
1076 parq = dataRef.get()
1077 funcs = self.getFunctors()
1078 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
1079 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
1080 self.write(df, dataRef)
1081 return df
1082
1083
1084class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
1085 dimensions=("instrument", "visit",),
1086 defaultTemplates={"calexpType": ""}):
1087 calexp = connectionTypes.Input(
1088 doc="Processed exposures used for metadata",
1089 name="{calexpType}calexp",
1090 storageClass="ExposureF",
1091 dimensions=("instrument", "visit", "detector"),
1092 deferLoad=True,
1093 multiple=True,
1094 )
1095 visitSummary = connectionTypes.Output(
1096 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1097 "detector id for the id and are sorted for fast lookups of a "
1098 "detector."),
1099 name="{calexpType}visitSummary",
1100 storageClass="ExposureCatalog",
1101 dimensions=("instrument", "visit"),
1102 )
1103
1104
1105class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1106 pipelineConnections=ConsolidateVisitSummaryConnections):
1107 """Config for ConsolidateVisitSummaryTask"""
1108 pass
1109
1110
1111class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1112 """Task to consolidate per-detector visit metadata.
1113
1114 This task aggregates the following metadata from all the detectors in a
1115 single visit into an exposure catalog:
1116 - The visitInfo.
1117 - The wcs.
1118 - The photoCalib.
1119 - The physical_filter and band (if available).
1120 - The psf size, shape, and effective area at the center of the detector.
1121 - The corners of the bounding box in right ascension/declination.
1122
1123 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1124 are not persisted here because of storage concerns, and because of their
1125 limited utility as summary statistics.
1126
1127 Tests for this task are performed in ci_hsc_gen3.
1128 """
1129 _DefaultName = "consolidateVisitSummary"
1130 ConfigClass = ConsolidateVisitSummaryConfig
1131
1132 @classmethod
1133 def _makeArgumentParser(cls):
1134 parser = ArgumentParser(name=cls._DefaultName)
1135
1136 parser.add_id_argument("--id", "calexp",
1137 help="data ID, e.g. --id visit=12345",
1138 ContainerClass=VisitDataIdContainer)
1139 return parser
1140
1141 def writeMetadata(self, dataRef):
1142 """No metadata to persist, so override to remove metadata persistance.
1143 """
1144 pass
1145
1146 def writeConfig(self, butler, clobber=False, doBackup=True):
1147 """No config to persist, so override to remove config persistance.
1148 """
1149 pass
1150
1151 def runDataRef(self, dataRefList):
1152 visit = dataRefList[0].dataId['visit']
1153
1154 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1155 len(dataRefList), visit)
1156
1157 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1158
1159 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1160
1161 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1162 dataRefs = butlerQC.get(inputRefs.calexp)
1163 visit = dataRefs[0].dataId.byName()['visit']
1164
1165 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)",
1166 len(dataRefs), visit)
1167
1168 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1169
1170 butlerQC.put(expCatalog, outputRefs.visitSummary)
1171
1172 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1173 """Make a combined exposure catalog from a list of dataRefs.
1174 These dataRefs must point to exposures with wcs, summaryStats,
1175 and other visit metadata.
1176
1177 Parameters
1178 ----------
1179 visit : `int`
1180 Visit identification number.
1181 dataRefs : `list`
1182 List of dataRefs in visit. May be list of
1183 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1184 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1185 isGen3 : `bool`, optional
1186 Specifies if this is a Gen3 list of datarefs.
1187
1188 Returns
1189 -------
1190 visitSummary : `lsst.afw.table.ExposureCatalog`
1191 Exposure catalog with per-detector summary information.
1192 """
1193 schema = self._makeVisitSummarySchema()
1194 cat = afwTable.ExposureCatalog(schema)
1195 cat.resize(len(dataRefs))
1196
1197 cat['visit'] = visit
1198
1199 for i, dataRef in enumerate(dataRefs):
1200 if isGen3:
1201 visitInfo = dataRef.get(component='visitInfo')
1202 filterLabel = dataRef.get(component='filterLabel')
1203 summaryStats = dataRef.get(component='summaryStats')
1204 detector = dataRef.get(component='detector')
1205 wcs = dataRef.get(component='wcs')
1206 photoCalib = dataRef.get(component='photoCalib')
1207 detector = dataRef.get(component='detector')
1208 bbox = dataRef.get(component='bbox')
1209 validPolygon = dataRef.get(component='validPolygon')
1210 else:
1211 # Note that we need to read the calexp because there is
1212 # no magic access to the psf except through the exposure.
1213 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1214 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1215 visitInfo = exp.getInfo().getVisitInfo()
1216 filterLabel = dataRef.get("calexp_filterLabel")
1217 summaryStats = exp.getInfo().getSummaryStats()
1218 wcs = exp.getWcs()
1219 photoCalib = exp.getPhotoCalib()
1220 detector = exp.getDetector()
1221 bbox = dataRef.get(datasetType='calexp_bbox')
1222 validPolygon = exp.getInfo().getValidPolygon()
1223
1224 rec = cat[i]
1225 rec.setBBox(bbox)
1226 rec.setVisitInfo(visitInfo)
1227 rec.setWcs(wcs)
1228 rec.setPhotoCalib(photoCalib)
1229 rec.setValidPolygon(validPolygon)
1230
1231 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1232 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1233 rec.setId(detector.getId())
1234 rec['psfSigma'] = summaryStats.psfSigma
1235 rec['psfIxx'] = summaryStats.psfIxx
1236 rec['psfIyy'] = summaryStats.psfIyy
1237 rec['psfIxy'] = summaryStats.psfIxy
1238 rec['psfArea'] = summaryStats.psfArea
1239 rec['raCorners'][:] = summaryStats.raCorners
1240 rec['decCorners'][:] = summaryStats.decCorners
1241 rec['ra'] = summaryStats.ra
1242 rec['decl'] = summaryStats.decl
1243 rec['zenithDistance'] = summaryStats.zenithDistance
1244 rec['zeroPoint'] = summaryStats.zeroPoint
1245 rec['skyBg'] = summaryStats.skyBg
1246 rec['skyNoise'] = summaryStats.skyNoise
1247 rec['meanVar'] = summaryStats.meanVar
1248 rec['astromOffsetMean'] = summaryStats.astromOffsetMean
1249 rec['astromOffsetStd'] = summaryStats.astromOffsetStd
1250 rec['nPsfStar'] = summaryStats.nPsfStar
1251 rec['psfStarDeltaE1Median'] = summaryStats.psfStarDeltaE1Median
1252 rec['psfStarDeltaE2Median'] = summaryStats.psfStarDeltaE2Median
1253 rec['psfStarDeltaE1Scatter'] = summaryStats.psfStarDeltaE1Scatter
1254 rec['psfStarDeltaE2Scatter'] = summaryStats.psfStarDeltaE2Scatter
1255 rec['psfStarDeltaSizeMedian'] = summaryStats.psfStarDeltaSizeMedian
1256 rec['psfStarDeltaSizeScatter'] = summaryStats.psfStarDeltaSizeScatter
1257 rec['psfStarScaledDeltaSizeScatter'] = summaryStats.psfStarScaledDeltaSizeScatter
1258
1259 metadata = dafBase.PropertyList()
1260 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1261 # We are looping over existing datarefs, so the following is true
1262 metadata.add("COMMENT", "Only detectors with data have entries.")
1263 cat.setMetadata(metadata)
1264
1265 cat.sort()
1266 return cat
1267
1268 def _makeVisitSummarySchema(self):
1269 """Make the schema for the visitSummary catalog."""
1270 schema = afwTable.ExposureTable.makeMinimalSchema()
1271 schema.addField('visit', type='I', doc='Visit number')
1272 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1273 schema.addField('band', type='String', size=32, doc='Name of band')
1274 schema.addField('psfSigma', type='F',
1275 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1276 schema.addField('psfArea', type='F',
1277 doc='PSF model effective area (center of chip) (pixel**2)')
1278 schema.addField('psfIxx', type='F',
1279 doc='PSF model Ixx (center of chip) (pixel**2)')
1280 schema.addField('psfIyy', type='F',
1281 doc='PSF model Iyy (center of chip) (pixel**2)')
1282 schema.addField('psfIxy', type='F',
1283 doc='PSF model Ixy (center of chip) (pixel**2)')
1284 schema.addField('raCorners', type='ArrayD', size=4,
1285 doc='Right Ascension of bounding box corners (degrees)')
1286 schema.addField('decCorners', type='ArrayD', size=4,
1287 doc='Declination of bounding box corners (degrees)')
1288 schema.addField('ra', type='D',
1289 doc='Right Ascension of bounding box center (degrees)')
1290 schema.addField('decl', type='D',
1291 doc='Declination of bounding box center (degrees)')
1292 schema.addField('zenithDistance', type='F',
1293 doc='Zenith distance of bounding box center (degrees)')
1294 schema.addField('zeroPoint', type='F',
1295 doc='Mean zeropoint in detector (mag)')
1296 schema.addField('skyBg', type='F',
1297 doc='Average sky background (ADU)')
1298 schema.addField('skyNoise', type='F',
1299 doc='Average sky noise (ADU)')
1300 schema.addField('meanVar', type='F',
1301 doc='Mean variance of the weight plane (ADU**2)')
1302 schema.addField('astromOffsetMean', type='F',
1303 doc='Mean offset of astrometric calibration matches (arcsec)')
1304 schema.addField('astromOffsetStd', type='F',
1305 doc='Standard deviation of offsets of astrometric calibration matches (arcsec)')
1306 schema.addField('nPsfStar', type='I', doc='Number of stars used for PSF model')
1307 schema.addField('psfStarDeltaE1Median', type='F',
1308 doc='Median E1 residual (starE1 - psfE1) for psf stars')
1309 schema.addField('psfStarDeltaE2Median', type='F',
1310 doc='Median E2 residual (starE2 - psfE2) for psf stars')
1311 schema.addField('psfStarDeltaE1Scatter', type='F',
1312 doc='Scatter (via MAD) of E1 residual (starE1 - psfE1) for psf stars')
1313 schema.addField('psfStarDeltaE2Scatter', type='F',
1314 doc='Scatter (via MAD) of E2 residual (starE2 - psfE2) for psf stars')
1315 schema.addField('psfStarDeltaSizeMedian', type='F',
1316 doc='Median size residual (starSize - psfSize) for psf stars (pixel)')
1317 schema.addField('psfStarDeltaSizeScatter', type='F',
1318 doc='Scatter (via MAD) of size residual (starSize - psfSize) for psf stars (pixel)')
1319 schema.addField('psfStarScaledDeltaSizeScatter', type='F',
1320 doc='Scatter (via MAD) of size residual scaled by median size squared')
1321
1322 return schema
1323
1324
1325class VisitDataIdContainer(DataIdContainer):
1326 """DataIdContainer that groups sensor-level id's by visit
1327 """
1328
1329 def makeDataRefList(self, namespace):
1330 """Make self.refList from self.idList
1331
1332 Generate a list of data references grouped by visit.
1333
1334 Parameters
1335 ----------
1336 namespace : `argparse.Namespace`
1337 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1338 """
1339 # Group by visits
1340 visitRefs = defaultdict(list)
1341 for dataId in self.idList:
1342 if "visit" in dataId:
1343 visitId = dataId["visit"]
1344 # append all subsets to
1345 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1346 visitRefs[visitId].extend([dataRef for dataRef in subset])
1347
1348 outputRefList = []
1349 for refList in visitRefs.values():
1350 existingRefs = [ref for ref in refList if ref.datasetExists()]
1351 if existingRefs:
1352 outputRefList.append(existingRefs)
1353
1354 self.refList = outputRefList
1355
1356
1357class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1358 defaultTemplates={"catalogType": ""},
1359 dimensions=("instrument", "visit")):
1360 inputCatalogs = connectionTypes.Input(
1361 doc="Input per-detector Source Tables",
1362 name="{catalogType}sourceTable",
1363 storageClass="DataFrame",
1364 dimensions=("instrument", "visit", "detector"),
1365 multiple=True
1366 )
1367 outputCatalog = connectionTypes.Output(
1368 doc="Per-visit concatenation of Source Table",
1369 name="{catalogType}sourceTable_visit",
1370 storageClass="DataFrame",
1371 dimensions=("instrument", "visit")
1372 )
1373
1374
1375class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1376 pipelineConnections=ConsolidateSourceTableConnections):
1377 pass
1378
1379
1380class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1381 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1382 """
1383 _DefaultName = 'consolidateSourceTable'
1384 ConfigClass = ConsolidateSourceTableConfig
1385
1386 inputDataset = 'sourceTable'
1387 outputDataset = 'sourceTable_visit'
1388
1389 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1390 from .makeCoaddTempExp import reorderRefs
1391
1392 detectorOrder = [ref.dataId['detector'] for ref in inputRefs.inputCatalogs]
1393 detectorOrder.sort()
1394 inputRefs = reorderRefs(inputRefs, detectorOrder, dataIdKey='detector')
1395 inputs = butlerQC.get(inputRefs)
1396 self.log.info("Concatenating %s per-detector Source Tables",
1397 len(inputs['inputCatalogs']))
1398 df = pd.concat(inputs['inputCatalogs'])
1399 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1400
1401 def runDataRef(self, dataRefList):
1402 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1403 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1404 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1405
1406 @classmethod
1407 def _makeArgumentParser(cls):
1408 parser = ArgumentParser(name=cls._DefaultName)
1409
1410 parser.add_id_argument("--id", cls.inputDataset,
1411 help="data ID, e.g. --id visit=12345",
1412 ContainerClass=VisitDataIdContainer)
1413 return parser
1414
1415 def writeMetadata(self, dataRef):
1416 """No metadata to write.
1417 """
1418 pass
1419
1420 def writeConfig(self, butler, clobber=False, doBackup=True):
1421 """No config to write.
1422 """
1423 pass
1424
1425
1426class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1427 dimensions=("instrument",),
1428 defaultTemplates={"calexpType": ""}):
1429 visitSummaryRefs = connectionTypes.Input(
1430 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1431 name="{calexpType}visitSummary",
1432 storageClass="ExposureCatalog",
1433 dimensions=("instrument", "visit"),
1434 multiple=True,
1435 deferLoad=True,
1436 )
1437 outputCatalog = connectionTypes.Output(
1438 doc="CCD and Visit metadata table",
1439 name="ccdVisitTable",
1440 storageClass="DataFrame",
1441 dimensions=("instrument",)
1442 )
1443
1444
1445class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1446 pipelineConnections=MakeCcdVisitTableConnections):
1447 pass
1448
1449
1450class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1451 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1452 """
1453 _DefaultName = 'makeCcdVisitTable'
1454 ConfigClass = MakeCcdVisitTableConfig
1455
1456 def run(self, visitSummaryRefs):
1457 """ Make a table of ccd information from the `visitSummary` catalogs.
1458 Parameters
1459 ----------
1460 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1461 List of DeferredDatasetHandles pointing to exposure catalogs with
1462 per-detector summary information.
1463 Returns
1464 -------
1465 result : `lsst.pipe.Base.Struct`
1466 Results struct with attribute:
1467 - `outputCatalog`
1468 Catalog of ccd and visit information.
1469 """
1470 ccdEntries = []
1471 for visitSummaryRef in visitSummaryRefs:
1472 visitSummary = visitSummaryRef.get()
1473 visitInfo = visitSummary[0].getVisitInfo()
1474
1475 ccdEntry = {}
1476 summaryTable = visitSummary.asAstropy()
1477 selectColumns = ['id', 'visit', 'physical_filter', 'band', 'ra', 'decl', 'zenithDistance',
1478 'zeroPoint', 'psfSigma', 'skyBg', 'skyNoise']
1479 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1480 # 'visit' is the human readible visit number
1481 # 'visitId' is the key to the visitId table. They are the same
1482 # Technically you should join to get the visit from the visit table
1483 ccdEntry = ccdEntry.rename(columns={"visit": "visitId"})
1484 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1485 summaryTable['id']]
1486 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1487 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1488 ccdEntry['ccdVisitId'] = ccdVisitIds
1489 ccdEntry['detector'] = summaryTable['id']
1490 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1491 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1492
1493 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1494 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1495 expTime = visitInfo.getExposureTime()
1496 ccdEntry['expTime'] = expTime
1497 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1498 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1499 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1500 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1501 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1502 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1503 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1504 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1505 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1506 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1507 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1508 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1509 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1510 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1511 ccdEntries.append(ccdEntry)
1512
1513 outputCatalog = pd.concat(ccdEntries)
1514 outputCatalog.set_index('ccdVisitId', inplace=True, verify_integrity=True)
1515 return pipeBase.Struct(outputCatalog=outputCatalog)
1516
1517
1518class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1519 dimensions=("instrument",),
1520 defaultTemplates={"calexpType": ""}):
1521 visitSummaries = connectionTypes.Input(
1522 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1523 name="{calexpType}visitSummary",
1524 storageClass="ExposureCatalog",
1525 dimensions=("instrument", "visit",),
1526 multiple=True,
1527 deferLoad=True,
1528 )
1529 outputCatalog = connectionTypes.Output(
1530 doc="Visit metadata table",
1531 name="visitTable",
1532 storageClass="DataFrame",
1533 dimensions=("instrument",)
1534 )
1535
1536
1537class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1538 pipelineConnections=MakeVisitTableConnections):
1539 pass
1540
1541
1542class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1543 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1544 """
1545 _DefaultName = 'makeVisitTable'
1546 ConfigClass = MakeVisitTableConfig
1547
1548 def run(self, visitSummaries):
1549 """ Make a table of visit information from the `visitSummary` catalogs
1550
1551 Parameters
1552 ----------
1553 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1554 List of exposure catalogs with per-detector summary information.
1555 Returns
1556 -------
1557 result : `lsst.pipe.Base.Struct`
1558 Results struct with attribute:
1559 ``outputCatalog``
1560 Catalog of visit information.
1561 """
1562 visitEntries = []
1563 for visitSummary in visitSummaries:
1564 visitSummary = visitSummary.get()
1565 visitRow = visitSummary[0]
1566 visitInfo = visitRow.getVisitInfo()
1567
1568 visitEntry = {}
1569 visitEntry["visitId"] = visitRow['visit']
1570 visitEntry["visit"] = visitRow['visit']
1571 visitEntry["physical_filter"] = visitRow['physical_filter']
1572 visitEntry["band"] = visitRow['band']
1573 raDec = visitInfo.getBoresightRaDec()
1574 visitEntry["ra"] = raDec.getRa().asDegrees()
1575 visitEntry["decl"] = raDec.getDec().asDegrees()
1576 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1577 azAlt = visitInfo.getBoresightAzAlt()
1578 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1579 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1580 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1581 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1582 visitEntry["obsStart"] = visitInfo.getDate().toPython()
1583 visitEntry["expTime"] = visitInfo.getExposureTime()
1584 visitEntries.append(visitEntry)
1585 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1586 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1587
1588 outputCatalog = pd.DataFrame(data=visitEntries)
1589 outputCatalog.set_index('visitId', inplace=True, verify_integrity=True)
1590 return pipeBase.Struct(outputCatalog=outputCatalog)
1591
1592
1593class WriteForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1594 dimensions=("instrument", "visit", "detector", "skymap", "tract")):
1595
1596 inputCatalog = connectionTypes.Input(
1597 doc="Primary per-detector, single-epoch forced-photometry catalog. "
1598 "By default, it is the output of ForcedPhotCcdTask on calexps",
1599 name="forced_src",
1600 storageClass="SourceCatalog",
1601 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1602 )
1603 inputCatalogDiff = connectionTypes.Input(
1604 doc="Secondary multi-epoch, per-detector, forced photometry catalog. "
1605 "By default, it is the output of ForcedPhotCcdTask run on image differences.",
1606 name="forced_diff",
1607 storageClass="SourceCatalog",
1608 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1609 )
1610 outputCatalog = connectionTypes.Output(
1611 doc="InputCatalogs horizonatally joined on `objectId` in Parquet format",
1612 name="mergedForcedSource",
1613 storageClass="DataFrame",
1614 dimensions=("instrument", "visit", "detector", "skymap", "tract")
1615 )
1616
1617
1618class WriteForcedSourceTableConfig(WriteSourceTableConfig,
1619 pipelineConnections=WriteForcedSourceTableConnections):
1620 key = lsst.pex.config.Field(
1621 doc="Column on which to join the two input tables on and make the primary key of the output",
1622 dtype=str,
1623 default="objectId",
1624 )
1625
1626
1627class WriteForcedSourceTableTask(pipeBase.PipelineTask):
1628 """Merge and convert per-detector forced source catalogs to parquet
1629
1630 Because the predecessor ForcedPhotCcdTask operates per-detector,
1631 per-tract, (i.e., it has tract in its dimensions), detectors
1632 on the tract boundary may have multiple forced source catalogs.
1633
1634 The successor task TransformForcedSourceTable runs per-patch
1635 and temporally-aggregates overlapping mergedForcedSource catalogs from all
1636 available multiple epochs.
1637 """
1638 _DefaultName = "writeForcedSourceTable"
1639 ConfigClass = WriteForcedSourceTableConfig
1640
1641 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1642 inputs = butlerQC.get(inputRefs)
1643 # Add ccdVisitId to allow joining with CcdVisitTable
1644 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
1645 inputs['band'] = butlerQC.quantum.dataId.full['band']
1646 outputs = self.run(**inputs)
1647 butlerQC.put(outputs, outputRefs)
1648
1649 def run(self, inputCatalog, inputCatalogDiff, ccdVisitId=None, band=None):
1650 dfs = []
1651 for table, dataset, in zip((inputCatalog, inputCatalogDiff), ('calexp', 'diff')):
1652 df = table.asAstropy().to_pandas().set_index(self.config.key, drop=False)
1653 df = df.reindex(sorted(df.columns), axis=1)
1654 df['ccdVisitId'] = ccdVisitId if ccdVisitId else pd.NA
1655 df['band'] = band if band else pd.NA
1656 df.columns = pd.MultiIndex.from_tuples([(dataset, c) for c in df.columns],
1657 names=('dataset', 'column'))
1658
1659 dfs.append(df)
1660
1661 outputCatalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
1662 return pipeBase.Struct(outputCatalog=outputCatalog)
1663
1664
1665class TransformForcedSourceTableConnections(pipeBase.PipelineTaskConnections,
1666 dimensions=("instrument", "skymap", "patch", "tract")):
1667
1668 inputCatalogs = connectionTypes.Input(
1669 doc="Parquet table of merged ForcedSources produced by WriteForcedSourceTableTask",
1670 name="mergedForcedSource",
1671 storageClass="DataFrame",
1672 dimensions=("instrument", "visit", "detector", "skymap", "tract"),
1673 multiple=True,
1674 deferLoad=True
1675 )
1676 referenceCatalog = connectionTypes.Input(
1677 doc="Reference catalog which was used to seed the forcedPhot. Columns "
1678 "objectId, detect_isPrimary, detect_isTractInner, detect_isPatchInner "
1679 "are expected.",
1680 name="objectTable",
1681 storageClass="DataFrame",
1682 dimensions=("tract", "patch", "skymap"),
1683 deferLoad=True
1684 )
1685 outputCatalog = connectionTypes.Output(
1686 doc="Narrower, temporally-aggregated, per-patch ForcedSource Table transformed and converted per a "
1687 "specified set of functors",
1688 name="forcedSourceTable",
1689 storageClass="DataFrame",
1690 dimensions=("tract", "patch", "skymap")
1691 )
1692
1693
1694class TransformForcedSourceTableConfig(TransformCatalogBaseConfig,
1695 pipelineConnections=TransformForcedSourceTableConnections):
1696 referenceColumns = pexConfig.ListField(
1697 dtype=str,
1698 default=["detect_isPrimary", "detect_isTractInner", "detect_isPatchInner"],
1699 optional=True,
1700 doc="Columns to pull from reference catalog",
1701 )
1702 keyRef = lsst.pex.config.Field(
1703 doc="Column on which to join the two input tables on and make the primary key of the output",
1704 dtype=str,
1705 default="objectId",
1706 )
1707 key = lsst.pex.config.Field(
1708 doc="Rename the output DataFrame index to this name",
1709 dtype=str,
1710 default="forcedSourceId",
1711 )
1712
1713 def setDefaults(self):
1714 super().setDefaults()
1715 self.functorFile = os.path.join('$PIPE_TASKS_DIR', 'schemas', 'ForcedSource.yaml')
1716
1717
1718class TransformForcedSourceTableTask(TransformCatalogBaseTask):
1719 """Transform/standardize a ForcedSource catalog
1720
1721 Transforms each wide, per-detector forcedSource parquet table per the
1722 specification file (per-camera defaults found in ForcedSource.yaml).
1723 All epochs that overlap the patch are aggregated into one per-patch
1724 narrow-parquet file.
1725
1726 No de-duplication of rows is performed. Duplicate resolutions flags are
1727 pulled in from the referenceCatalog: `detect_isPrimary`,
1728 `detect_isTractInner`,`detect_isPatchInner`, so that user may de-duplicate
1729 for analysis or compare duplicates for QA.
1730
1731 The resulting table includes multiple bands. Epochs (MJDs) and other useful
1732 per-visit rows can be retreived by joining with the CcdVisitTable on
1733 ccdVisitId.
1734 """
1735 _DefaultName = "transformForcedSourceTable"
1736 ConfigClass = TransformForcedSourceTableConfig
1737
1738 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1739 inputs = butlerQC.get(inputRefs)
1740 if self.funcs is None:
1741 raise ValueError("config.functorFile is None. "
1742 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
1743 outputs = self.run(inputs['inputCatalogs'], inputs['referenceCatalog'], funcs=self.funcs,
1744 dataId=outputRefs.outputCatalog.dataId.full)
1745
1746 butlerQC.put(outputs, outputRefs)
1747
1748 def run(self, inputCatalogs, referenceCatalog, funcs=None, dataId=None, band=None):
1749 dfs = []
1750 ref = referenceCatalog.get(parameters={"columns": self.config.referenceColumns})
1751 self.log.info("Aggregating %s input catalogs" % (len(inputCatalogs)))
1752 for handle in inputCatalogs:
1753 result = self.transform(None, handle, funcs, dataId)
1754 # Filter for only rows that were detected on (overlap) the patch
1755 dfs.append(result.df.join(ref, how='inner'))
1756
1757 outputCatalog = pd.concat(dfs)
1758
1759 # Now that we are done joining on config.keyRef
1760 # Change index to config.key by
1761 outputCatalog.index.rename(self.config.keyRef, inplace=True)
1762 # Add config.keyRef to the column list
1763 outputCatalog.reset_index(inplace=True)
1764 # set the forcedSourceId to the index. This is specified in the ForcedSource.yaml
1765 outputCatalog.set_index("forcedSourceId", inplace=True, verify_integrity=True)
1766 # Rename it to the config.key
1767 outputCatalog.index.rename(self.config.key, inplace=True)
1768
1769 self.log.info("Made a table of %d columns and %d rows",
1770 len(outputCatalog.columns), len(outputCatalog))
1771 return pipeBase.Struct(outputCatalog=outputCatalog)
1772
1773
1774class ConsolidateTractConnections(pipeBase.PipelineTaskConnections,
1775 defaultTemplates={"catalogType": ""},
1776 dimensions=("instrument", "tract")):
1777 inputCatalogs = connectionTypes.Input(
1778 doc="Input per-patch DataFrame Tables to be concatenated",
1779 name="{catalogType}ForcedSourceTable",
1780 storageClass="DataFrame",
1781 dimensions=("tract", "patch", "skymap"),
1782 multiple=True,
1783 )
1784
1785 outputCatalog = connectionTypes.Output(
1786 doc="Output per-tract concatenation of DataFrame Tables",
1787 name="{catalogType}ForcedSourceTable_tract",
1788 storageClass="DataFrame",
1789 dimensions=("tract", "skymap"),
1790 )
1791
1792
1793class ConsolidateTractConfig(pipeBase.PipelineTaskConfig,
1794 pipelineConnections=ConsolidateTractConnections):
1795 pass
1796
1797
1798class ConsolidateTractTask(CmdLineTask, pipeBase.PipelineTask):
1799 """Concatenate any per-patch, dataframe list into a single
1800 per-tract DataFrame
1801 """
1802 _DefaultName = 'ConsolidateTract'
1803 ConfigClass = ConsolidateTractConfig
1804
1805 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1806 inputs = butlerQC.get(inputRefs)
1807 # Not checking at least one inputCatalog exists because that'd be an empty QG
1808 self.log.info("Concatenating %s per-patch %s Tables",
1809 len(inputs['inputCatalogs']),
1810 inputRefs.inputCatalogs[0].datasetType.name)
1811 df = pd.concat(inputs['inputCatalogs'])
1812 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
def getAnalysis(self, parq, funcs=None, band=None)
Definition: postprocess.py:713
def transform(self, band, parq, funcs, dataId)
Definition: postprocess.py:719
def run(self, parq, funcs=None, dataId=None, band=None)
Definition: postprocess.py:682
def runQuantum(self, butlerQC, inputRefs, outputRefs)
Definition: postprocess.py:663
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None)
Definition: postprocess.py:45