Coverage for python/lsst/pipe/tasks/postprocess.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of pipe_tasks
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import functools
23import pandas as pd
24from collections import defaultdict
25import numpy as np
27import lsst.geom
28import lsst.pex.config as pexConfig
29import lsst.pipe.base as pipeBase
30import lsst.daf.base as dafBase
31from lsst.pipe.base import connectionTypes
32import lsst.afw.table as afwTable
33from lsst.meas.base import SingleFrameMeasurementTask
34from lsst.pipe.base import CmdLineTask, ArgumentParser, DataIdContainer
35from lsst.coadd.utils.coaddDataIdContainer import CoaddDataIdContainer
36from lsst.daf.butler import DeferredDatasetHandle, DataCoordinate
38from .parquetTable import ParquetTable
39from .multiBandUtils import makeMergeArgumentParser, MergeSourcesRunner
40from .functors import CompositeFunctor, RAColumn, DecColumn, Column
43def flattenFilters(df, noDupCols=['coord_ra', 'coord_dec'], camelCase=False, inputBands=None):
44 """Flattens a dataframe with multilevel column index
45 """
46 newDf = pd.DataFrame()
47 # band is the level 0 index
48 dfBands = df.columns.unique(level=0).values
49 for band in dfBands:
50 subdf = df[band]
51 columnFormat = '{0}{1}' if camelCase else '{0}_{1}'
52 newColumns = {c: columnFormat.format(band, c)
53 for c in subdf.columns if c not in noDupCols}
54 cols = list(newColumns.keys())
55 newDf = pd.concat([newDf, subdf[cols].rename(columns=newColumns)], axis=1)
57 # Band must be present in the input and output or else column is all NaN:
58 presentBands = dfBands if inputBands is None else list(set(inputBands).intersection(dfBands))
59 # Get the unexploded columns from any present band's partition
60 noDupDf = df[presentBands[0]][noDupCols]
61 newDf = pd.concat([noDupDf, newDf], axis=1)
62 return newDf
65class WriteObjectTableConnections(pipeBase.PipelineTaskConnections,
66 defaultTemplates={"coaddName": "deep"},
67 dimensions=("tract", "patch", "skymap")):
68 inputCatalogMeas = connectionTypes.Input(
69 doc="Catalog of source measurements on the deepCoadd.",
70 dimensions=("tract", "patch", "band", "skymap"),
71 storageClass="SourceCatalog",
72 name="{coaddName}Coadd_meas",
73 multiple=True
74 )
75 inputCatalogForcedSrc = connectionTypes.Input(
76 doc="Catalog of forced measurements (shape and position parameters held fixed) on the deepCoadd.",
77 dimensions=("tract", "patch", "band", "skymap"),
78 storageClass="SourceCatalog",
79 name="{coaddName}Coadd_forced_src",
80 multiple=True
81 )
82 inputCatalogRef = connectionTypes.Input(
83 doc="Catalog marking the primary detection (which band provides a good shape and position)"
84 "for each detection in deepCoadd_mergeDet.",
85 dimensions=("tract", "patch", "skymap"),
86 storageClass="SourceCatalog",
87 name="{coaddName}Coadd_ref"
88 )
89 outputCatalog = connectionTypes.Output(
90 doc="A vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
91 "stored as a DataFrame with a multi-level column index per-patch.",
92 dimensions=("tract", "patch", "skymap"),
93 storageClass="DataFrame",
94 name="{coaddName}Coadd_obj"
95 )
98class WriteObjectTableConfig(pipeBase.PipelineTaskConfig,
99 pipelineConnections=WriteObjectTableConnections):
100 engine = pexConfig.Field(
101 dtype=str,
102 default="pyarrow",
103 doc="Parquet engine for writing (pyarrow or fastparquet)"
104 )
105 coaddName = pexConfig.Field(
106 dtype=str,
107 default="deep",
108 doc="Name of coadd"
109 )
112class WriteObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
113 """Write filter-merged source tables to parquet
114 """
115 _DefaultName = "writeObjectTable"
116 ConfigClass = WriteObjectTableConfig
117 RunnerClass = MergeSourcesRunner
119 # Names of table datasets to be merged
120 inputDatasets = ('forced_src', 'meas', 'ref')
122 # Tag of output dataset written by `MergeSourcesTask.write`
123 outputDataset = 'obj'
125 def __init__(self, butler=None, schema=None, **kwargs):
126 # It is a shame that this class can't use the default init for CmdLineTask
127 # But to do so would require its own special task runner, which is many
128 # more lines of specialization, so this is how it is for now
129 super().__init__(**kwargs)
131 def runDataRef(self, patchRefList):
132 """!
133 @brief Merge coadd sources from multiple bands. Calls @ref `run` which must be defined in
134 subclasses that inherit from MergeSourcesTask.
135 @param[in] patchRefList list of data references for each filter
136 """
137 catalogs = dict(self.readCatalog(patchRef) for patchRef in patchRefList)
138 dataId = patchRefList[0].dataId
139 mergedCatalog = self.run(catalogs, tract=dataId['tract'], patch=dataId['patch'])
140 self.write(patchRefList[0], ParquetTable(dataFrame=mergedCatalog))
142 def runQuantum(self, butlerQC, inputRefs, outputRefs):
143 inputs = butlerQC.get(inputRefs)
145 measDict = {ref.dataId['band']: {'meas': cat} for ref, cat in
146 zip(inputRefs.inputCatalogMeas, inputs['inputCatalogMeas'])}
147 forcedSourceDict = {ref.dataId['band']: {'forced_src': cat} for ref, cat in
148 zip(inputRefs.inputCatalogForcedSrc, inputs['inputCatalogForcedSrc'])}
150 catalogs = {}
151 for band in measDict.keys():
152 catalogs[band] = {'meas': measDict[band]['meas'],
153 'forced_src': forcedSourceDict[band]['forced_src'],
154 'ref': inputs['inputCatalogRef']}
155 dataId = butlerQC.quantum.dataId
156 df = self.run(catalogs=catalogs, tract=dataId['tract'], patch=dataId['patch'])
157 outputs = pipeBase.Struct(outputCatalog=df)
158 butlerQC.put(outputs, outputRefs)
160 @classmethod
161 def _makeArgumentParser(cls):
162 """Create a suitable ArgumentParser.
164 We will use the ArgumentParser to get a list of data
165 references for patches; the RunnerClass will sort them into lists
166 of data references for the same patch.
168 References first of self.inputDatasets, rather than
169 self.inputDataset
170 """
171 return makeMergeArgumentParser(cls._DefaultName, cls.inputDatasets[0])
173 def readCatalog(self, patchRef):
174 """Read input catalogs
176 Read all the input datasets given by the 'inputDatasets'
177 attribute.
179 Parameters
180 ----------
181 patchRef : `lsst.daf.persistence.ButlerDataRef`
182 Data reference for patch
184 Returns
185 -------
186 Tuple consisting of band name and a dict of catalogs, keyed by
187 dataset name
188 """
189 band = patchRef.get(self.config.coaddName + "Coadd_filterLabel", immediate=True).bandLabel
190 catalogDict = {}
191 for dataset in self.inputDatasets:
192 catalog = patchRef.get(self.config.coaddName + "Coadd_" + dataset, immediate=True)
193 self.log.info("Read %d sources from %s for band %s: %s" %
194 (len(catalog), dataset, band, patchRef.dataId))
195 catalogDict[dataset] = catalog
196 return band, catalogDict
198 def run(self, catalogs, tract, patch):
199 """Merge multiple catalogs.
201 Parameters
202 ----------
203 catalogs : `dict`
204 Mapping from filter names to dict of catalogs.
205 tract : int
206 tractId to use for the tractId column
207 patch : str
208 patchId to use for the patchId column
210 Returns
211 -------
212 catalog : `pandas.DataFrame`
213 Merged dataframe
214 """
216 dfs = []
217 for filt, tableDict in catalogs.items():
218 for dataset, table in tableDict.items():
219 # Convert afwTable to pandas DataFrame
220 df = table.asAstropy().to_pandas().set_index('id', drop=True)
222 # Sort columns by name, to ensure matching schema among patches
223 df = df.reindex(sorted(df.columns), axis=1)
224 df['tractId'] = tract
225 df['patchId'] = patch
227 # Make columns a 3-level MultiIndex
228 df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
229 names=('dataset', 'band', 'column'))
230 dfs.append(df)
232 catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
233 return catalog
235 def write(self, patchRef, catalog):
236 """Write the output.
238 Parameters
239 ----------
240 catalog : `ParquetTable`
241 Catalog to write
242 patchRef : `lsst.daf.persistence.ButlerDataRef`
243 Data reference for patch
244 """
245 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
246 # since the filter isn't actually part of the data ID for the dataset we're saving,
247 # it's confusing to see it in the log message, even if the butler simply ignores it.
248 mergeDataId = patchRef.dataId.copy()
249 del mergeDataId["filter"]
250 self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
252 def writeMetadata(self, dataRefList):
253 """No metadata to write, and not sure how to write it for a list of dataRefs.
254 """
255 pass
258class WriteSourceTableConnections(pipeBase.PipelineTaskConnections,
259 defaultTemplates={"catalogType": ""},
260 dimensions=("instrument", "visit", "detector")):
262 catalog = connectionTypes.Input(
263 doc="Input full-depth catalog of sources produced by CalibrateTask",
264 name="{catalogType}src",
265 storageClass="SourceCatalog",
266 dimensions=("instrument", "visit", "detector")
267 )
268 outputCatalog = connectionTypes.Output(
269 doc="Catalog of sources, `src` in Parquet format",
270 name="{catalogType}source",
271 storageClass="DataFrame",
272 dimensions=("instrument", "visit", "detector")
273 )
276class WriteSourceTableConfig(pipeBase.PipelineTaskConfig,
277 pipelineConnections=WriteSourceTableConnections):
278 doApplyExternalPhotoCalib = pexConfig.Field(
279 dtype=bool,
280 default=False,
281 doc=("Add local photoCalib columns from the calexp.photoCalib? Should only set True if "
282 "generating Source Tables from older src tables which do not already have local calib columns")
283 )
284 doApplyExternalSkyWcs = pexConfig.Field(
285 dtype=bool,
286 default=False,
287 doc=("Add local WCS columns from the calexp.wcs? Should only set True if "
288 "generating Source Tables from older src tables which do not already have local calib columns")
289 )
292class WriteSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
293 """Write source table to parquet
294 """
295 _DefaultName = "writeSourceTable"
296 ConfigClass = WriteSourceTableConfig
298 def runDataRef(self, dataRef):
299 src = dataRef.get('src')
300 if self.config.doApplyExternalPhotoCalib or self.config.doApplyExternalSkyWcs:
301 src = self.addCalibColumns(src, dataRef)
303 ccdVisitId = dataRef.get('ccdExposureId')
304 result = self.run(src, ccdVisitId=ccdVisitId)
305 dataRef.put(result.table, 'source')
307 def runQuantum(self, butlerQC, inputRefs, outputRefs):
308 inputs = butlerQC.get(inputRefs)
309 inputs['ccdVisitId'] = butlerQC.quantum.dataId.pack("visit_detector")
310 result = self.run(**inputs).table
311 outputs = pipeBase.Struct(outputCatalog=result.toDataFrame())
312 butlerQC.put(outputs, outputRefs)
314 def run(self, catalog, ccdVisitId=None):
315 """Convert `src` catalog to parquet
317 Parameters
318 ----------
319 catalog: `afwTable.SourceCatalog`
320 catalog to be converted
321 ccdVisitId: `int`
322 ccdVisitId to be added as a column
324 Returns
325 -------
326 result : `lsst.pipe.base.Struct`
327 ``table``
328 `ParquetTable` version of the input catalog
329 """
330 self.log.info("Generating parquet table from src catalog %s", ccdVisitId)
331 df = catalog.asAstropy().to_pandas().set_index('id', drop=True)
332 df['ccdVisitId'] = ccdVisitId
333 return pipeBase.Struct(table=ParquetTable(dataFrame=df))
335 def addCalibColumns(self, catalog, dataRef):
336 """Add columns with local calibration evaluated at each centroid
338 for backwards compatibility with old repos.
339 This exists for the purpose of converting old src catalogs
340 (which don't have the expected local calib columns) to Source Tables.
342 Parameters
343 ----------
344 catalog: `afwTable.SourceCatalog`
345 catalog to which calib columns will be added
346 dataRef: `lsst.daf.persistence.ButlerDataRef
347 for fetching the calibs from disk.
349 Returns
350 -------
351 newCat: `afwTable.SourceCatalog`
352 Source Catalog with requested local calib columns
353 """
354 mapper = afwTable.SchemaMapper(catalog.schema)
355 measureConfig = SingleFrameMeasurementTask.ConfigClass()
356 measureConfig.doReplaceWithNoise = False
358 # Just need the WCS or the PhotoCalib attached to an exposue
359 exposure = dataRef.get('calexp_sub',
360 bbox=lsst.geom.Box2I(lsst.geom.Point2I(0, 0), lsst.geom.Point2I(0, 0)))
362 mapper = afwTable.SchemaMapper(catalog.schema)
363 mapper.addMinimalSchema(catalog.schema, True)
364 schema = mapper.getOutputSchema()
366 exposureIdInfo = dataRef.get("expIdInfo")
367 measureConfig.plugins.names = []
368 if self.config.doApplyExternalSkyWcs:
369 plugin = 'base_LocalWcs'
370 if plugin in schema:
371 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalSkyWcs=False")
372 else:
373 measureConfig.plugins.names.add(plugin)
375 if self.config.doApplyExternalPhotoCalib:
376 plugin = 'base_LocalPhotoCalib'
377 if plugin in schema:
378 raise RuntimeError(f"{plugin} already in src catalog. Set doApplyExternalPhotoCalib=False")
379 else:
380 measureConfig.plugins.names.add(plugin)
382 measurement = SingleFrameMeasurementTask(config=measureConfig, schema=schema)
383 newCat = afwTable.SourceCatalog(schema)
384 newCat.extend(catalog, mapper=mapper)
385 measurement.run(measCat=newCat, exposure=exposure, exposureId=exposureIdInfo.expId)
386 return newCat
388 def writeMetadata(self, dataRef):
389 """No metadata to write.
390 """
391 pass
393 @classmethod
394 def _makeArgumentParser(cls):
395 parser = ArgumentParser(name=cls._DefaultName)
396 parser.add_id_argument("--id", 'src',
397 help="data ID, e.g. --id visit=12345 ccd=0")
398 return parser
401class PostprocessAnalysis(object):
402 """Calculate columns from ParquetTable
404 This object manages and organizes an arbitrary set of computations
405 on a catalog. The catalog is defined by a
406 `lsst.pipe.tasks.parquetTable.ParquetTable` object (or list thereof), such as a
407 `deepCoadd_obj` dataset, and the computations are defined by a collection
408 of `lsst.pipe.tasks.functor.Functor` objects (or, equivalently,
409 a `CompositeFunctor`).
411 After the object is initialized, accessing the `.df` attribute (which
412 holds the `pandas.DataFrame` containing the results of the calculations) triggers
413 computation of said dataframe.
415 One of the conveniences of using this object is the ability to define a desired common
416 filter for all functors. This enables the same functor collection to be passed to
417 several different `PostprocessAnalysis` objects without having to change the original
418 functor collection, since the `filt` keyword argument of this object triggers an
419 overwrite of the `filt` property for all functors in the collection.
421 This object also allows a list of refFlags to be passed, and defines a set of default
422 refFlags that are always included even if not requested.
424 If a list of `ParquetTable` object is passed, rather than a single one, then the
425 calculations will be mapped over all the input catalogs. In principle, it should
426 be straightforward to parallelize this activity, but initial tests have failed
427 (see TODO in code comments).
429 Parameters
430 ----------
431 parq : `lsst.pipe.tasks.ParquetTable` (or list of such)
432 Source catalog(s) for computation
434 functors : `list`, `dict`, or `lsst.pipe.tasks.functors.CompositeFunctor`
435 Computations to do (functors that act on `parq`).
436 If a dict, the output
437 DataFrame will have columns keyed accordingly.
438 If a list, the column keys will come from the
439 `.shortname` attribute of each functor.
441 filt : `str` (optional)
442 Filter in which to calculate. If provided,
443 this will overwrite any existing `.filt` attribute
444 of the provided functors.
446 flags : `list` (optional)
447 List of flags (per-band) to include in output table.
449 refFlags : `list` (optional)
450 List of refFlags (only reference band) to include in output table.
453 """
454 _defaultRefFlags = []
455 _defaultFuncs = (('coord_ra', RAColumn()),
456 ('coord_dec', DecColumn()))
458 def __init__(self, parq, functors, filt=None, flags=None, refFlags=None):
459 self.parq = parq
460 self.functors = functors
462 self.filt = filt
463 self.flags = list(flags) if flags is not None else []
464 self.refFlags = list(self._defaultRefFlags)
465 if refFlags is not None:
466 self.refFlags += list(refFlags)
468 self._df = None
470 @property
471 def defaultFuncs(self):
472 funcs = dict(self._defaultFuncs)
473 return funcs
475 @property
476 def func(self):
477 additionalFuncs = self.defaultFuncs
478 additionalFuncs.update({flag: Column(flag, dataset='ref') for flag in self.refFlags})
479 additionalFuncs.update({flag: Column(flag, dataset='meas') for flag in self.flags})
481 if isinstance(self.functors, CompositeFunctor):
482 func = self.functors
483 else:
484 func = CompositeFunctor(self.functors)
486 func.funcDict.update(additionalFuncs)
487 func.filt = self.filt
489 return func
491 @property
492 def noDupCols(self):
493 return [name for name, func in self.func.funcDict.items() if func.noDup or func.dataset == 'ref']
495 @property
496 def df(self):
497 if self._df is None:
498 self.compute()
499 return self._df
501 def compute(self, dropna=False, pool=None):
502 # map over multiple parquet tables
503 if type(self.parq) in (list, tuple):
504 if pool is None:
505 dflist = [self.func(parq, dropna=dropna) for parq in self.parq]
506 else:
507 # TODO: Figure out why this doesn't work (pyarrow pickling issues?)
508 dflist = pool.map(functools.partial(self.func, dropna=dropna), self.parq)
509 self._df = pd.concat(dflist)
510 else:
511 self._df = self.func(self.parq, dropna=dropna)
513 return self._df
516class TransformCatalogBaseConnections(pipeBase.PipelineTaskConnections,
517 dimensions=()):
518 """Expected Connections for subclasses of TransformCatalogBaseTask.
520 Must be subclassed.
521 """
522 inputCatalog = connectionTypes.Input(
523 name="",
524 storageClass="DataFrame",
525 )
526 outputCatalog = connectionTypes.Output(
527 name="",
528 storageClass="DataFrame",
529 )
532class TransformCatalogBaseConfig(pipeBase.PipelineTaskConfig,
533 pipelineConnections=TransformCatalogBaseConnections):
534 functorFile = pexConfig.Field(
535 dtype=str,
536 doc='Path to YAML file specifying functors to be computed',
537 default=None,
538 optional=True
539 )
542class TransformCatalogBaseTask(CmdLineTask, pipeBase.PipelineTask):
543 """Base class for transforming/standardizing a catalog
545 by applying functors that convert units and apply calibrations.
546 The purpose of this task is to perform a set of computations on
547 an input `ParquetTable` dataset (such as `deepCoadd_obj`) and write the
548 results to a new dataset (which needs to be declared in an `outputDataset`
549 attribute).
551 The calculations to be performed are defined in a YAML file that specifies
552 a set of functors to be computed, provided as
553 a `--functorFile` config parameter. An example of such a YAML file
554 is the following:
556 funcs:
557 psfMag:
558 functor: Mag
559 args:
560 - base_PsfFlux
561 filt: HSC-G
562 dataset: meas
563 cmodel_magDiff:
564 functor: MagDiff
565 args:
566 - modelfit_CModel
567 - base_PsfFlux
568 filt: HSC-G
569 gauss_magDiff:
570 functor: MagDiff
571 args:
572 - base_GaussianFlux
573 - base_PsfFlux
574 filt: HSC-G
575 count:
576 functor: Column
577 args:
578 - base_InputCount_value
579 filt: HSC-G
580 deconvolved_moments:
581 functor: DeconvolvedMoments
582 filt: HSC-G
583 dataset: forced_src
584 refFlags:
585 - calib_psfUsed
586 - merge_measurement_i
587 - merge_measurement_r
588 - merge_measurement_z
589 - merge_measurement_y
590 - merge_measurement_g
591 - base_PixelFlags_flag_inexact_psfCenter
592 - detect_isPrimary
594 The names for each entry under "func" will become the names of columns in the
595 output dataset. All the functors referenced are defined in `lsst.pipe.tasks.functors`.
596 Positional arguments to be passed to each functor are in the `args` list,
597 and any additional entries for each column other than "functor" or "args" (e.g., `'filt'`,
598 `'dataset'`) are treated as keyword arguments to be passed to the functor initialization.
600 The "refFlags" entry is shortcut for a bunch of `Column` functors with the original column and
601 taken from the `'ref'` dataset.
603 The "flags" entry will be expanded out per band.
605 This task uses the `lsst.pipe.tasks.postprocess.PostprocessAnalysis` object
606 to organize and excecute the calculations.
608 """
609 @property
610 def _DefaultName(self):
611 raise NotImplementedError('Subclass must define "_DefaultName" attribute')
613 @property
614 def outputDataset(self):
615 raise NotImplementedError('Subclass must define "outputDataset" attribute')
617 @property
618 def inputDataset(self):
619 raise NotImplementedError('Subclass must define "inputDataset" attribute')
621 @property
622 def ConfigClass(self):
623 raise NotImplementedError('Subclass must define "ConfigClass" attribute')
625 def __init__(self, *args, **kwargs):
626 super().__init__(*args, **kwargs)
627 if self.config.functorFile:
628 self.log.info('Loading tranform functor definitions from %s',
629 self.config.functorFile)
630 self.funcs = CompositeFunctor.from_file(self.config.functorFile)
631 self.funcs.update(dict(PostprocessAnalysis._defaultFuncs))
632 else:
633 self.funcs = None
635 def runQuantum(self, butlerQC, inputRefs, outputRefs):
636 inputs = butlerQC.get(inputRefs)
637 if self.funcs is None:
638 raise ValueError("config.functorFile is None. "
639 "Must be a valid path to yaml in order to run Task as a PipelineTask.")
640 result = self.run(parq=inputs['inputCatalog'], funcs=self.funcs,
641 dataId=outputRefs.outputCatalog.dataId.full)
642 outputs = pipeBase.Struct(outputCatalog=result)
643 butlerQC.put(outputs, outputRefs)
645 def runDataRef(self, dataRef):
646 parq = dataRef.get()
647 if self.funcs is None:
648 raise ValueError("config.functorFile is None. "
649 "Must be a valid path to yaml in order to run as a CommandlineTask.")
650 df = self.run(parq, funcs=self.funcs, dataId=dataRef.dataId)
651 self.write(df, dataRef)
652 return df
654 def run(self, parq, funcs=None, dataId=None, band=None):
655 """Do postprocessing calculations
657 Takes a `ParquetTable` object and dataId,
658 returns a dataframe with results of postprocessing calculations.
660 Parameters
661 ----------
662 parq : `lsst.pipe.tasks.parquetTable.ParquetTable`
663 ParquetTable from which calculations are done.
664 funcs : `lsst.pipe.tasks.functors.Functors`
665 Functors to apply to the table's columns
666 dataId : dict, optional
667 Used to add a `patchId` column to the output dataframe.
668 band : `str`, optional
669 Filter band that is being processed.
671 Returns
672 ------
673 `pandas.DataFrame`
675 """
676 self.log.info("Transforming/standardizing the source table dataId: %s", dataId)
678 df = self.transform(band, parq, funcs, dataId).df
679 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
680 return df
682 def getFunctors(self):
683 return self.funcs
685 def getAnalysis(self, parq, funcs=None, band=None):
686 if funcs is None:
687 funcs = self.funcs
688 analysis = PostprocessAnalysis(parq, funcs, filt=band)
689 return analysis
691 def transform(self, band, parq, funcs, dataId):
692 analysis = self.getAnalysis(parq, funcs=funcs, band=band)
693 df = analysis.df
694 if dataId is not None:
695 for key, value in dataId.items():
696 df[str(key)] = value
698 return pipeBase.Struct(
699 df=df,
700 analysis=analysis
701 )
703 def write(self, df, parqRef):
704 parqRef.put(ParquetTable(dataFrame=df), self.outputDataset)
706 def writeMetadata(self, dataRef):
707 """No metadata to write.
708 """
709 pass
712class TransformObjectCatalogConnections(pipeBase.PipelineTaskConnections,
713 defaultTemplates={"coaddName": "deep"},
714 dimensions=("tract", "patch", "skymap")):
715 inputCatalog = connectionTypes.Input(
716 doc="The vertical concatenation of the deepCoadd_{ref|meas|forced_src} catalogs, "
717 "stored as a DataFrame with a multi-level column index per-patch.",
718 dimensions=("tract", "patch", "skymap"),
719 storageClass="DataFrame",
720 name="{coaddName}Coadd_obj",
721 deferLoad=True,
722 )
723 outputCatalog = connectionTypes.Output(
724 doc="Per-Patch Object Table of columns transformed from the deepCoadd_obj table per the standard "
725 "data model.",
726 dimensions=("tract", "patch", "skymap"),
727 storageClass="DataFrame",
728 name="objectTable"
729 )
732class TransformObjectCatalogConfig(TransformCatalogBaseConfig,
733 pipelineConnections=TransformObjectCatalogConnections):
734 coaddName = pexConfig.Field(
735 dtype=str,
736 default="deep",
737 doc="Name of coadd"
738 )
739 # TODO: remove in DM-27177
740 filterMap = pexConfig.DictField(
741 keytype=str,
742 itemtype=str,
743 default={},
744 doc=("Dictionary mapping full filter name to short one for column name munging."
745 "These filters determine the output columns no matter what filters the "
746 "input data actually contain."),
747 deprecated=("Coadds are now identified by the band, so this transform is unused."
748 "Will be removed after v22.")
749 )
750 outputBands = pexConfig.ListField(
751 dtype=str,
752 default=None,
753 optional=True,
754 doc=("These bands and only these bands will appear in the output,"
755 " NaN-filled if the input does not include them."
756 " If None, then use all bands found in the input.")
757 )
758 camelCase = pexConfig.Field(
759 dtype=bool,
760 default=True,
761 doc=("Write per-band columns names with camelCase, else underscore "
762 "For example: gPsFlux instead of g_PsFlux.")
763 )
764 multilevelOutput = pexConfig.Field(
765 dtype=bool,
766 default=False,
767 doc=("Whether results dataframe should have a multilevel column index (True) or be flat "
768 "and name-munged (False).")
769 )
772class TransformObjectCatalogTask(TransformCatalogBaseTask):
773 """Produce a flattened Object Table to match the format specified in
774 sdm_schemas.
776 Do the same set of postprocessing calculations on all bands
778 This is identical to `TransformCatalogBaseTask`, except for that it does the
779 specified functor calculations for all filters present in the
780 input `deepCoadd_obj` table. Any specific `"filt"` keywords specified
781 by the YAML file will be superceded.
782 """
783 _DefaultName = "transformObjectCatalog"
784 ConfigClass = TransformObjectCatalogConfig
786 # Used by Gen 2 runDataRef only:
787 inputDataset = 'deepCoadd_obj'
788 outputDataset = 'objectTable'
790 @classmethod
791 def _makeArgumentParser(cls):
792 parser = ArgumentParser(name=cls._DefaultName)
793 parser.add_id_argument("--id", cls.inputDataset,
794 ContainerClass=CoaddDataIdContainer,
795 help="data ID, e.g. --id tract=12345 patch=1,2")
796 return parser
798 def run(self, parq, funcs=None, dataId=None, band=None):
799 # NOTE: band kwarg is ignored here.
800 dfDict = {}
801 analysisDict = {}
802 templateDf = pd.DataFrame()
804 if isinstance(parq, DeferredDatasetHandle):
805 columns = parq.get(component='columns')
806 inputBands = columns.unique(level=1).values
807 else:
808 inputBands = parq.columnLevelNames['band']
810 outputBands = self.config.outputBands if self.config.outputBands else inputBands
812 # Perform transform for data of filters that exist in parq.
813 for inputBand in inputBands:
814 if inputBand not in outputBands:
815 self.log.info("Ignoring %s band data in the input", inputBand)
816 continue
817 self.log.info("Transforming the catalog of band %s", inputBand)
818 result = self.transform(inputBand, parq, funcs, dataId)
819 dfDict[inputBand] = result.df
820 analysisDict[inputBand] = result.analysis
821 if templateDf.empty:
822 templateDf = result.df
824 # Fill NaNs in columns of other wanted bands
825 for filt in outputBands:
826 if filt not in dfDict:
827 self.log.info("Adding empty columns for band %s", filt)
828 dfDict[filt] = pd.DataFrame().reindex_like(templateDf)
830 # This makes a multilevel column index, with band as first level
831 df = pd.concat(dfDict, axis=1, names=['band', 'column'])
833 if not self.config.multilevelOutput:
834 noDupCols = list(set.union(*[set(v.noDupCols) for v in analysisDict.values()]))
835 if dataId is not None:
836 noDupCols += list(dataId.keys())
837 df = flattenFilters(df, noDupCols=noDupCols, camelCase=self.config.camelCase,
838 inputBands=inputBands)
840 self.log.info("Made a table of %d columns and %d rows", len(df.columns), len(df))
841 return df
844class TractObjectDataIdContainer(CoaddDataIdContainer):
846 def makeDataRefList(self, namespace):
847 """Make self.refList from self.idList
849 Generate a list of data references given tract and/or patch.
850 This was adapted from `TractQADataIdContainer`, which was
851 `TractDataIdContainer` modifie to not require "filter".
852 Only existing dataRefs are returned.
853 """
854 def getPatchRefList(tract):
855 return [namespace.butler.dataRef(datasetType=self.datasetType,
856 tract=tract.getId(),
857 patch="%d,%d" % patch.getIndex()) for patch in tract]
859 tractRefs = defaultdict(list) # Data references for each tract
860 for dataId in self.idList:
861 skymap = self.getSkymap(namespace)
863 if "tract" in dataId:
864 tractId = dataId["tract"]
865 if "patch" in dataId:
866 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=self.datasetType,
867 tract=tractId,
868 patch=dataId['patch']))
869 else:
870 tractRefs[tractId] += getPatchRefList(skymap[tractId])
871 else:
872 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
873 for tract in skymap)
874 outputRefList = []
875 for tractRefList in tractRefs.values():
876 existingRefs = [ref for ref in tractRefList if ref.datasetExists()]
877 outputRefList.append(existingRefs)
879 self.refList = outputRefList
882class ConsolidateObjectTableConnections(pipeBase.PipelineTaskConnections,
883 dimensions=("tract", "skymap")):
884 inputCatalogs = connectionTypes.Input(
885 doc="Per-Patch objectTables conforming to the standard data model.",
886 name="objectTable",
887 storageClass="DataFrame",
888 dimensions=("tract", "patch", "skymap"),
889 multiple=True,
890 )
891 outputCatalog = connectionTypes.Output(
892 doc="Pre-tract horizontal concatenation of the input objectTables",
893 name="objectTable_tract",
894 storageClass="DataFrame",
895 dimensions=("tract", "skymap"),
896 )
899class ConsolidateObjectTableConfig(pipeBase.PipelineTaskConfig,
900 pipelineConnections=ConsolidateObjectTableConnections):
901 coaddName = pexConfig.Field(
902 dtype=str,
903 default="deep",
904 doc="Name of coadd"
905 )
908class ConsolidateObjectTableTask(CmdLineTask, pipeBase.PipelineTask):
909 """Write patch-merged source tables to a tract-level parquet file
911 Concatenates `objectTable` list into a per-visit `objectTable_tract`
912 """
913 _DefaultName = "consolidateObjectTable"
914 ConfigClass = ConsolidateObjectTableConfig
916 inputDataset = 'objectTable'
917 outputDataset = 'objectTable_tract'
919 def runQuantum(self, butlerQC, inputRefs, outputRefs):
920 inputs = butlerQC.get(inputRefs)
921 self.log.info("Concatenating %s per-patch Object Tables",
922 len(inputs['inputCatalogs']))
923 df = pd.concat(inputs['inputCatalogs'])
924 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
926 @classmethod
927 def _makeArgumentParser(cls):
928 parser = ArgumentParser(name=cls._DefaultName)
930 parser.add_id_argument("--id", cls.inputDataset,
931 help="data ID, e.g. --id tract=12345",
932 ContainerClass=TractObjectDataIdContainer)
933 return parser
935 def runDataRef(self, patchRefList):
936 df = pd.concat([patchRef.get().toDataFrame() for patchRef in patchRefList])
937 patchRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
939 def writeMetadata(self, dataRef):
940 """No metadata to write.
941 """
942 pass
945class TransformSourceTableConnections(pipeBase.PipelineTaskConnections,
946 defaultTemplates={"catalogType": ""},
947 dimensions=("instrument", "visit", "detector")):
949 inputCatalog = connectionTypes.Input(
950 doc="Wide input catalog of sources produced by WriteSourceTableTask",
951 name="{catalogType}source",
952 storageClass="DataFrame",
953 dimensions=("instrument", "visit", "detector"),
954 deferLoad=True
955 )
956 outputCatalog = connectionTypes.Output(
957 doc="Narrower, per-detector Source Table transformed and converted per a "
958 "specified set of functors",
959 name="{catalogType}sourceTable",
960 storageClass="DataFrame",
961 dimensions=("instrument", "visit", "detector")
962 )
965class TransformSourceTableConfig(TransformCatalogBaseConfig,
966 pipelineConnections=TransformSourceTableConnections):
967 pass
970class TransformSourceTableTask(TransformCatalogBaseTask):
971 """Transform/standardize a source catalog
972 """
973 _DefaultName = "transformSourceTable"
974 ConfigClass = TransformSourceTableConfig
976 inputDataset = 'source'
977 outputDataset = 'sourceTable'
979 @classmethod
980 def _makeArgumentParser(cls):
981 parser = ArgumentParser(name=cls._DefaultName)
982 parser.add_id_argument("--id", datasetType=cls.inputDataset,
983 level="sensor",
984 help="data ID, e.g. --id visit=12345 ccd=0")
985 return parser
987 def runDataRef(self, dataRef):
988 """Override to specify band label to run()."""
989 parq = dataRef.get()
990 funcs = self.getFunctors()
991 band = dataRef.get("calexp_filterLabel", immediate=True).bandLabel
992 df = self.run(parq, funcs=funcs, dataId=dataRef.dataId, band=band)
993 self.write(df, dataRef)
994 return df
997class ConsolidateVisitSummaryConnections(pipeBase.PipelineTaskConnections,
998 dimensions=("instrument", "visit",),
999 defaultTemplates={"calexpType": ""}):
1000 calexp = connectionTypes.Input(
1001 doc="Processed exposures used for metadata",
1002 name="{calexpType}calexp",
1003 storageClass="ExposureF",
1004 dimensions=("instrument", "visit", "detector"),
1005 deferLoad=True,
1006 multiple=True,
1007 )
1008 visitSummary = connectionTypes.Output(
1009 doc=("Per-visit consolidated exposure metadata. These catalogs use "
1010 "detector id for the id and are sorted for fast lookups of a "
1011 "detector."),
1012 name="{calexpType}visitSummary",
1013 storageClass="ExposureCatalog",
1014 dimensions=("instrument", "visit"),
1015 )
1018class ConsolidateVisitSummaryConfig(pipeBase.PipelineTaskConfig,
1019 pipelineConnections=ConsolidateVisitSummaryConnections):
1020 """Config for ConsolidateVisitSummaryTask"""
1021 pass
1024class ConsolidateVisitSummaryTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
1025 """Task to consolidate per-detector visit metadata.
1027 This task aggregates the following metadata from all the detectors in a
1028 single visit into an exposure catalog:
1029 - The visitInfo.
1030 - The wcs.
1031 - The photoCalib.
1032 - The physical_filter and band (if available).
1033 - The psf size, shape, and effective area at the center of the detector.
1034 - The corners of the bounding box in right ascension/declination.
1036 Other quantities such as Detector, Psf, ApCorrMap, and TransmissionCurve
1037 are not persisted here because of storage concerns, and because of their
1038 limited utility as summary statistics.
1040 Tests for this task are performed in ci_hsc_gen3.
1041 """
1042 _DefaultName = "consolidateVisitSummary"
1043 ConfigClass = ConsolidateVisitSummaryConfig
1045 @classmethod
1046 def _makeArgumentParser(cls):
1047 parser = ArgumentParser(name=cls._DefaultName)
1049 parser.add_id_argument("--id", "calexp",
1050 help="data ID, e.g. --id visit=12345",
1051 ContainerClass=VisitDataIdContainer)
1052 return parser
1054 def writeMetadata(self, dataRef):
1055 """No metadata to persist, so override to remove metadata persistance.
1056 """
1057 pass
1059 def writeConfig(self, butler, clobber=False, doBackup=True):
1060 """No config to persist, so override to remove config persistance.
1061 """
1062 pass
1064 def runDataRef(self, dataRefList):
1065 visit = dataRefList[0].dataId['visit']
1067 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1068 (len(dataRefList), visit))
1070 expCatalog = self._combineExposureMetadata(visit, dataRefList, isGen3=False)
1072 dataRefList[0].put(expCatalog, 'visitSummary', visit=visit)
1074 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1075 dataRefs = butlerQC.get(inputRefs.calexp)
1076 visit = dataRefs[0].dataId.byName()['visit']
1078 self.log.debug("Concatenating metadata from %d per-detector calexps (visit %d)" %
1079 (len(dataRefs), visit))
1081 expCatalog = self._combineExposureMetadata(visit, dataRefs)
1083 butlerQC.put(expCatalog, outputRefs.visitSummary)
1085 def _combineExposureMetadata(self, visit, dataRefs, isGen3=True):
1086 """Make a combined exposure catalog from a list of dataRefs.
1087 These dataRefs must point to exposures with wcs, summaryStats,
1088 and other visit metadata.
1090 Parameters
1091 ----------
1092 visit : `int`
1093 Visit identification number.
1094 dataRefs : `list`
1095 List of dataRefs in visit. May be list of
1096 `lsst.daf.persistence.ButlerDataRef` (Gen2) or
1097 `lsst.daf.butler.DeferredDatasetHandle` (Gen3).
1098 isGen3 : `bool`, optional
1099 Specifies if this is a Gen3 list of datarefs.
1101 Returns
1102 -------
1103 visitSummary : `lsst.afw.table.ExposureCatalog`
1104 Exposure catalog with per-detector summary information.
1105 """
1106 schema = self._makeVisitSummarySchema()
1107 cat = afwTable.ExposureCatalog(schema)
1108 cat.resize(len(dataRefs))
1110 cat['visit'] = visit
1112 for i, dataRef in enumerate(dataRefs):
1113 if isGen3:
1114 visitInfo = dataRef.get(component='visitInfo')
1115 filterLabel = dataRef.get(component='filterLabel')
1116 summaryStats = dataRef.get(component='summaryStats')
1117 detector = dataRef.get(component='detector')
1118 wcs = dataRef.get(component='wcs')
1119 photoCalib = dataRef.get(component='photoCalib')
1120 detector = dataRef.get(component='detector')
1121 bbox = dataRef.get(component='bbox')
1122 validPolygon = dataRef.get(component='validPolygon')
1123 else:
1124 # Note that we need to read the calexp because there is
1125 # no magic access to the psf except through the exposure.
1126 gen2_read_bbox = lsst.geom.BoxI(lsst.geom.PointI(0, 0), lsst.geom.PointI(1, 1))
1127 exp = dataRef.get(datasetType='calexp_sub', bbox=gen2_read_bbox)
1128 visitInfo = exp.getInfo().getVisitInfo()
1129 filterLabel = dataRef.get("calexp_filterLabel")
1130 summaryStats = exp.getInfo().getSummaryStats()
1131 wcs = exp.getWcs()
1132 photoCalib = exp.getPhotoCalib()
1133 detector = exp.getDetector()
1134 bbox = dataRef.get(datasetType='calexp_bbox')
1135 validPolygon = exp.getInfo().getValidPolygon()
1137 rec = cat[i]
1138 rec.setBBox(bbox)
1139 rec.setVisitInfo(visitInfo)
1140 rec.setWcs(wcs)
1141 rec.setPhotoCalib(photoCalib)
1142 rec.setValidPolygon(validPolygon)
1144 rec['physical_filter'] = filterLabel.physicalLabel if filterLabel.hasPhysicalLabel() else ""
1145 rec['band'] = filterLabel.bandLabel if filterLabel.hasBandLabel() else ""
1146 rec.setId(detector.getId())
1147 rec['psfSigma'] = summaryStats.psfSigma
1148 rec['psfIxx'] = summaryStats.psfIxx
1149 rec['psfIyy'] = summaryStats.psfIyy
1150 rec['psfIxy'] = summaryStats.psfIxy
1151 rec['psfArea'] = summaryStats.psfArea
1152 rec['raCorners'][:] = summaryStats.raCorners
1153 rec['decCorners'][:] = summaryStats.decCorners
1154 rec['ra'] = summaryStats.ra
1155 rec['decl'] = summaryStats.decl
1156 rec['zenithDistance'] = summaryStats.zenithDistance
1157 rec['zeroPoint'] = summaryStats.zeroPoint
1158 rec['skyBg'] = summaryStats.skyBg
1159 rec['skyNoise'] = summaryStats.skyNoise
1160 rec['meanVar'] = summaryStats.meanVar
1162 metadata = dafBase.PropertyList()
1163 metadata.add("COMMENT", "Catalog id is detector id, sorted.")
1164 # We are looping over existing datarefs, so the following is true
1165 metadata.add("COMMENT", "Only detectors with data have entries.")
1166 cat.setMetadata(metadata)
1168 cat.sort()
1169 return cat
1171 def _makeVisitSummarySchema(self):
1172 """Make the schema for the visitSummary catalog."""
1173 schema = afwTable.ExposureTable.makeMinimalSchema()
1174 schema.addField('visit', type='I', doc='Visit number')
1175 schema.addField('physical_filter', type='String', size=32, doc='Physical filter')
1176 schema.addField('band', type='String', size=32, doc='Name of band')
1177 schema.addField('psfSigma', type='F',
1178 doc='PSF model second-moments determinant radius (center of chip) (pixel)')
1179 schema.addField('psfArea', type='F',
1180 doc='PSF model effective area (center of chip) (pixel**2)')
1181 schema.addField('psfIxx', type='F',
1182 doc='PSF model Ixx (center of chip) (pixel**2)')
1183 schema.addField('psfIyy', type='F',
1184 doc='PSF model Iyy (center of chip) (pixel**2)')
1185 schema.addField('psfIxy', type='F',
1186 doc='PSF model Ixy (center of chip) (pixel**2)')
1187 schema.addField('raCorners', type='ArrayD', size=4,
1188 doc='Right Ascension of bounding box corners (degrees)')
1189 schema.addField('decCorners', type='ArrayD', size=4,
1190 doc='Declination of bounding box corners (degrees)')
1191 schema.addField('ra', type='D',
1192 doc='Right Ascension of bounding box center (degrees)')
1193 schema.addField('decl', type='D',
1194 doc='Declination of bounding box center (degrees)')
1195 schema.addField('zenithDistance', type='F',
1196 doc='Zenith distance of bounding box center (degrees)')
1197 schema.addField('zeroPoint', type='F',
1198 doc='Mean zeropoint in detector (mag)')
1199 schema.addField('skyBg', type='F',
1200 doc='Average sky background (ADU)')
1201 schema.addField('skyNoise', type='F',
1202 doc='Average sky noise (ADU)')
1203 schema.addField('meanVar', type='F',
1204 doc='Mean variance of the weight plane (ADU**2)')
1206 return schema
1209class VisitDataIdContainer(DataIdContainer):
1210 """DataIdContainer that groups sensor-level id's by visit
1211 """
1213 def makeDataRefList(self, namespace):
1214 """Make self.refList from self.idList
1216 Generate a list of data references grouped by visit.
1218 Parameters
1219 ----------
1220 namespace : `argparse.Namespace`
1221 Namespace used by `lsst.pipe.base.CmdLineTask` to parse command line arguments
1222 """
1223 # Group by visits
1224 visitRefs = defaultdict(list)
1225 for dataId in self.idList:
1226 if "visit" in dataId:
1227 visitId = dataId["visit"]
1228 # append all subsets to
1229 subset = namespace.butler.subset(self.datasetType, dataId=dataId)
1230 visitRefs[visitId].extend([dataRef for dataRef in subset])
1232 outputRefList = []
1233 for refList in visitRefs.values():
1234 existingRefs = [ref for ref in refList if ref.datasetExists()]
1235 if existingRefs:
1236 outputRefList.append(existingRefs)
1238 self.refList = outputRefList
1241class ConsolidateSourceTableConnections(pipeBase.PipelineTaskConnections,
1242 defaultTemplates={"catalogType": ""},
1243 dimensions=("instrument", "visit")):
1244 inputCatalogs = connectionTypes.Input(
1245 doc="Input per-detector Source Tables",
1246 name="{catalogType}sourceTable",
1247 storageClass="DataFrame",
1248 dimensions=("instrument", "visit", "detector"),
1249 multiple=True
1250 )
1251 outputCatalog = connectionTypes.Output(
1252 doc="Per-visit concatenation of Source Table",
1253 name="{catalogType}sourceTable_visit",
1254 storageClass="DataFrame",
1255 dimensions=("instrument", "visit")
1256 )
1259class ConsolidateSourceTableConfig(pipeBase.PipelineTaskConfig,
1260 pipelineConnections=ConsolidateSourceTableConnections):
1261 pass
1264class ConsolidateSourceTableTask(CmdLineTask, pipeBase.PipelineTask):
1265 """Concatenate `sourceTable` list into a per-visit `sourceTable_visit`
1266 """
1267 _DefaultName = 'consolidateSourceTable'
1268 ConfigClass = ConsolidateSourceTableConfig
1270 inputDataset = 'sourceTable'
1271 outputDataset = 'sourceTable_visit'
1273 def runQuantum(self, butlerQC, inputRefs, outputRefs):
1274 inputs = butlerQC.get(inputRefs)
1275 self.log.info("Concatenating %s per-detector Source Tables",
1276 len(inputs['inputCatalogs']))
1277 df = pd.concat(inputs['inputCatalogs'])
1278 butlerQC.put(pipeBase.Struct(outputCatalog=df), outputRefs)
1280 def runDataRef(self, dataRefList):
1281 self.log.info("Concatenating %s per-detector Source Tables", len(dataRefList))
1282 df = pd.concat([dataRef.get().toDataFrame() for dataRef in dataRefList])
1283 dataRefList[0].put(ParquetTable(dataFrame=df), self.outputDataset)
1285 @classmethod
1286 def _makeArgumentParser(cls):
1287 parser = ArgumentParser(name=cls._DefaultName)
1289 parser.add_id_argument("--id", cls.inputDataset,
1290 help="data ID, e.g. --id visit=12345",
1291 ContainerClass=VisitDataIdContainer)
1292 return parser
1294 def writeMetadata(self, dataRef):
1295 """No metadata to write.
1296 """
1297 pass
1299 def writeConfig(self, butler, clobber=False, doBackup=True):
1300 """No config to write.
1301 """
1302 pass
1305class MakeCcdVisitTableConnections(pipeBase.PipelineTaskConnections,
1306 dimensions=("instrument",),
1307 defaultTemplates={}):
1308 visitSummaryRefs = connectionTypes.Input(
1309 doc="Data references for per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1310 name="visitSummary",
1311 storageClass="ExposureCatalog",
1312 dimensions=("instrument", "visit"),
1313 multiple=True,
1314 deferLoad=True,
1315 )
1316 outputCatalog = connectionTypes.Output(
1317 doc="CCD and Visit metadata table",
1318 name="CcdVisitTable",
1319 storageClass="DataFrame",
1320 dimensions=("instrument",)
1321 )
1324class MakeCcdVisitTableConfig(pipeBase.PipelineTaskConfig,
1325 pipelineConnections=MakeCcdVisitTableConnections):
1326 pass
1329class MakeCcdVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1330 """Produce a `ccdVisitTable` from the `visitSummary` exposure catalogs.
1331 """
1332 _DefaultName = 'makeCcdVisitTable'
1333 ConfigClass = MakeCcdVisitTableConfig
1335 def run(self, visitSummaryRefs):
1336 """ Make a table of ccd information from the `visitSummary` catalogs.
1337 Parameters
1338 ----------
1339 visitSummaryRefs : `list` of `lsst.daf.butler.DeferredDatasetHandle`
1340 List of DeferredDatasetHandles pointing to exposure catalogs with
1341 per-detector summary information.
1342 Returns
1343 -------
1344 result : `lsst.pipe.Base.Struct`
1345 Results struct with attribute:
1346 - `outputCatalog`
1347 Catalog of ccd and visit information.
1348 """
1349 ccdEntries = []
1350 for visitSummaryRef in visitSummaryRefs:
1351 visitSummary = visitSummaryRef.get()
1352 visitInfo = visitSummary[0].getVisitInfo()
1354 ccdEntry = {}
1355 summaryTable = visitSummary.asAstropy()
1356 selectColumns = ['id', 'visit', 'physical_filter', 'ra', 'decl', 'zenithDistance', 'zeroPoint',
1357 'psfSigma', 'skyBg', 'skyNoise']
1358 ccdEntry = summaryTable[selectColumns].to_pandas().set_index('id')
1359 ccdEntry = ccdEntry.rename(columns={"physical_filter": "filterName", "visit": "visitId"})
1361 dataIds = [DataCoordinate.standardize(visitSummaryRef.dataId, detector=id) for id in
1362 summaryTable['id']]
1363 packer = visitSummaryRef.dataId.universe.makePacker('visit_detector', visitSummaryRef.dataId)
1364 ccdVisitIds = [packer.pack(dataId) for dataId in dataIds]
1365 ccdEntry['ccdVisitId'] = ccdVisitIds
1367 pixToArcseconds = np.array([vR.getWcs().getPixelScale().asArcseconds() for vR in visitSummary])
1368 ccdEntry["seeing"] = visitSummary['psfSigma'] * np.sqrt(8 * np.log(2)) * pixToArcseconds
1370 ccdEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1371 ccdEntry["expMidpt"] = visitInfo.getDate().toPython()
1372 expTime = visitInfo.getExposureTime()
1373 ccdEntry['expTime'] = expTime
1374 ccdEntry["obsStart"] = ccdEntry["expMidpt"] - 0.5 * pd.Timedelta(seconds=expTime)
1375 ccdEntry['darkTime'] = visitInfo.getDarkTime()
1376 ccdEntry['xSize'] = summaryTable['bbox_max_x'] - summaryTable['bbox_min_x']
1377 ccdEntry['ySize'] = summaryTable['bbox_max_y'] - summaryTable['bbox_min_y']
1378 ccdEntry['llcra'] = summaryTable['raCorners'][:, 0]
1379 ccdEntry['llcdec'] = summaryTable['decCorners'][:, 0]
1380 ccdEntry['ulcra'] = summaryTable['raCorners'][:, 1]
1381 ccdEntry['ulcdec'] = summaryTable['decCorners'][:, 1]
1382 ccdEntry['urcra'] = summaryTable['raCorners'][:, 2]
1383 ccdEntry['urcdec'] = summaryTable['decCorners'][:, 2]
1384 ccdEntry['lrcra'] = summaryTable['raCorners'][:, 3]
1385 ccdEntry['lrcdec'] = summaryTable['decCorners'][:, 3]
1386 # TODO: DM-30618, Add raftName, nExposures, ccdTemp, binX, binY, and flags,
1387 # and decide if WCS, and llcx, llcy, ulcx, ulcy, etc. values are actually wanted.
1388 ccdEntries.append(ccdEntry)
1390 outputCatalog = pd.concat(ccdEntries)
1391 return pipeBase.Struct(outputCatalog=outputCatalog)
1394class MakeVisitTableConnections(pipeBase.PipelineTaskConnections,
1395 dimensions=("instrument",),
1396 defaultTemplates={}):
1397 visitSummaries = connectionTypes.Input(
1398 doc="Per-visit consolidated exposure metadata from ConsolidateVisitSummaryTask",
1399 name="visitSummary",
1400 storageClass="ExposureCatalog",
1401 dimensions=("instrument", "visit",),
1402 multiple=True,
1403 deferLoad=True,
1404 )
1405 outputCatalog = connectionTypes.Output(
1406 doc="Visit metadata table",
1407 name="visitTable",
1408 storageClass="DataFrame",
1409 dimensions=("instrument",)
1410 )
1413class MakeVisitTableConfig(pipeBase.PipelineTaskConfig,
1414 pipelineConnections=MakeVisitTableConnections):
1415 pass
1418class MakeVisitTableTask(CmdLineTask, pipeBase.PipelineTask):
1419 """Produce a `visitTable` from the `visitSummary` exposure catalogs.
1420 """
1421 _DefaultName = 'makeVisitTable'
1422 ConfigClass = MakeVisitTableConfig
1424 def run(self, visitSummaries):
1425 """ Make a table of visit information from the `visitSummary` catalogs
1427 Parameters
1428 ----------
1429 visitSummaries : list of `lsst.afw.table.ExposureCatalog`
1430 List of exposure catalogs with per-detector summary information.
1431 Returns
1432 -------
1433 result : `lsst.pipe.Base.Struct`
1434 Results struct with attribute:
1435 ``outputCatalog``
1436 Catalog of visit information.
1437 """
1438 visitEntries = []
1439 for visitSummary in visitSummaries:
1440 visitSummary = visitSummary.get()
1441 visitRow = visitSummary[0]
1442 visitInfo = visitRow.getVisitInfo()
1444 visitEntry = {}
1445 visitEntry["visitId"] = visitRow['visit']
1446 visitEntry["filterName"] = visitRow['physical_filter']
1447 raDec = visitInfo.getBoresightRaDec()
1448 visitEntry["ra"] = raDec.getRa().asDegrees()
1449 visitEntry["decl"] = raDec.getDec().asDegrees()
1450 visitEntry["skyRotation"] = visitInfo.getBoresightRotAngle().asDegrees()
1451 azAlt = visitInfo.getBoresightAzAlt()
1452 visitEntry["azimuth"] = azAlt.getLongitude().asDegrees()
1453 visitEntry["altitude"] = azAlt.getLatitude().asDegrees()
1454 visitEntry["zenithDistance"] = 90 - azAlt.getLatitude().asDegrees()
1455 visitEntry["airmass"] = visitInfo.getBoresightAirmass()
1456 visitEntry["obsStart"] = visitInfo.getDate().toPython()
1457 visitEntry["expTime"] = visitInfo.getExposureTime()
1458 visitEntries.append(visitEntry)
1459 # TODO: DM-30623, Add programId, exposureType, expMidpt, cameraTemp, mirror1Temp, mirror2Temp,
1460 # mirror3Temp, domeTemp, externalTemp, dimmSeeing, pwvGPS, pwvMW, flags, nExposures
1462 outputCatalog = pd.DataFrame(data=visitEntries)
1463 return pipeBase.Struct(outputCatalog=outputCatalog)