Coverage for python/lsst/obs/lsst/_ingestPhotodiode.py: 13%
98 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 17:14 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-01 17:14 -0700
1# This file is part of obs_lsst.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21__all__ = ('PhotodiodeIngestConfig', 'PhotodiodeIngestTask')
24from lsst.daf.butler import (
25 CollectionType,
26 DataCoordinate,
27 DatasetIdGenEnum,
28 DatasetRef,
29 DatasetType,
30 FileDataset,
31 Progress,
32)
33from lsst.ip.isr import PhotodiodeCalib
34from lsst.obs.base import makeTransferChoiceField
35from lsst.obs.base.formatters.fitsGeneric import FitsGenericFormatter
36from lsst.pex.config import Config
37from lsst.pipe.base import Task
38from lsst.resources import ResourcePath
41class PhotodiodeIngestConfig(Config):
42 """Configuration class for PhotodiodeIngestTask."""
44 transfer = makeTransferChoiceField(default="copy")
46 def validate(self):
47 super().validate()
48 if self.transfer != "copy":
49 raise ValueError(f"Transfer Must be 'copy' for photodiode data. {self.transfer}")
52class PhotodiodeIngestTask(Task):
53 """Task to ingest photodiode data into a butler repository.
55 Parameters
56 ----------
57 config : `PhotodiodeIngestConfig`
58 Configuration for the task.
59 instrument : `~lsst.obs.base.Instrument`
60 The instrument these photodiode datasets are from.
61 butler : `~lsst.daf.butler.Butler`
62 Writable butler instance, with ``butler.run`` set to the
63 appropriate `~lsst.daf.butler.CollectionType.RUN` collection
64 for these datasets.
65 **kwargs
66 Additional keyword arguments.
67 """
69 ConfigClass = PhotodiodeIngestConfig
70 _DefaultName = "photodiodeIngest"
72 def getDatasetType(self):
73 """Return the DatasetType of the photodiode datasets."""
74 return DatasetType(
75 "photodiode",
76 ("instrument", "exposure"),
77 "IsrCalib",
78 universe=self.universe,
79 )
81 def __init__(self, butler, instrument, config=None, **kwargs):
82 config.validate()
83 super().__init__(config, **kwargs)
84 self.butler = butler
85 self.universe = self.butler.dimensions
86 self.datasetType = self.getDatasetType()
87 self.progress = Progress(self.log.name)
88 self.instrument = instrument
89 self.camera = self.instrument.getCamera()
91 def run(self, locations, run=None, file_filter=r"Photodiode_Readings.*txt$|_photodiode.ecsv$",
92 track_file_attrs=None):
93 """Ingest photodiode data into a Butler data repository.
95 Parameters
96 ----------
97 files : iterable over `lsst.resources.ResourcePath`
98 URIs to the files to be ingested.
99 run : `str`, optional
100 Name of the RUN-type collection to write to,
101 overriding the default derived from the instrument
102 name.
103 skip_existing_exposures : `bool`, optional
104 If `True`, skip photodiodes that have already been
105 ingested (i.e. raws for which we already have a
106 dataset with the same data ID in the target
107 collection).
108 track_file_attrs : `bool`, optional
109 Control whether file attributes such as the size or
110 checksum should be tracked by the datastore. Whether
111 this parameter is honored depends on the specific
112 datastore implementation.
114 Returns
115 -------
116 refs : `list` [`lsst.daf.butler.DatasetRef`]
117 Dataset references for ingested raws.
119 Raises
120 ------
121 RuntimeError
122 Raised if the number of exposures found for a photodiode
123 file is not one
124 """
125 files = ResourcePath.findFileResources(locations, file_filter)
127 registry = self.butler.registry
128 registry.registerDatasetType(self.datasetType)
130 # Find and register run that we will ingest to.
131 if run is None:
132 run = self.instrument.makeCollectionName("calib", "photodiode")
133 registry.registerCollection(run, type=CollectionType.RUN)
135 # Use datasetIds that match the raw exposure data.
136 if self.butler.registry.supportsIdGenerationMode(DatasetIdGenEnum.DATAID_TYPE_RUN):
137 mode = DatasetIdGenEnum.DATAID_TYPE_RUN
138 else:
139 mode = DatasetIdGenEnum.UNIQUE
141 refs = []
142 numExisting = 0
143 numFailed = 0
144 for inputFile in files:
145 # Convert the file into the right class.
146 calibType = "Unknown"
147 try:
148 # Can this be read directly in standard form?
149 with inputFile.as_local() as localFile:
150 calib = PhotodiodeCalib.readText(localFile.ospath)
151 calibType = "full"
152 except Exception:
153 # Try reading as a two-column file.
154 with inputFile.as_local() as localFile:
155 calib = PhotodiodeCalib.readTwoColumnPhotodiodeData(localFile.ospath)
156 calibType = "two-column"
158 # Get exposure records
159 if calibType == "full":
160 instrumentName = calib.getMetadata().get('INSTRUME')
161 if instrumentName is None:
162 # The field is populated by the calib class, so we
163 # can't use defaults.
164 instrumentName = self.instrument.getName()
166 obsId = calib.getMetadata()['obsId']
167 whereClause = "exposure.obs_id=obsId"
168 binding = {"obsId": obsId}
169 logId = obsId
171 elif calibType == "two-column":
172 dayObs = calib.getMetadata()['day_obs']
173 seqNum = calib.getMetadata()['seq_num']
175 # Find the associated exposure information.
176 whereClause = "exposure.day_obs=dayObs and exposure.seq_num=seqNum"
177 instrumentName = self.instrument.getName()
178 binding = {"dayObs": dayObs, "seqNum": seqNum}
179 logId = (dayObs, seqNum)
181 else:
182 self.log.warning("Skipping input file %s of unknown type.",
183 inputFile)
184 continue
185 exposureRecords = [rec for rec in registry.queryDimensionRecords("exposure",
186 instrument=instrumentName,
187 where=whereClause,
188 bind=binding)]
190 nRecords = len(exposureRecords)
191 if nRecords == 1:
192 exposureId = exposureRecords[0].id
193 calib.updateMetadata(camera=self.camera, exposure=exposureId)
194 elif nRecords == 0:
195 numFailed += 1
196 self.log.warning("Skipping instrument %s and identifiers %s: no exposures found.",
197 instrumentName, logId)
198 continue
199 else:
200 numFailed += 1
201 self.log.warning("Multiple exposure entries found for instrument %s and "
202 "identifiers %s.", instrumentName, logId)
203 continue
205 # Generate the dataId for this file.
206 dataId = DataCoordinate.standardize(
207 instrument=self.instrument.getName(),
208 exposure=exposureId,
209 universe=self.universe,
210 )
212 # If this already exists, we should skip it and continue.
213 existing = {
214 ref.dataId
215 for ref in self.butler.registry.queryDatasets(self.datasetType, collections=[run],
216 dataId=dataId)
217 }
218 if existing:
219 self.log.debug("Skipping instrument %s and identifiers %s: already exists in run %s.",
220 instrumentName, logId, run)
221 numExisting += 1
222 continue
224 # Ingest must work from a file, but we can't use the
225 # original, as we've added new metadata and reformatted
226 # it. Write it to a temp file that we can use to ingest.
227 # If we can have the files written appropriately, this
228 # will be a direct ingest of those files.
229 with ResourcePath.temporary_uri(suffix=".fits") as tempFile:
230 calib.writeFits(tempFile.ospath)
232 ref = DatasetRef(self.datasetType, dataId, run=run, id_generation_mode=mode)
233 dataset = FileDataset(path=tempFile, refs=ref, formatter=FitsGenericFormatter)
235 # No try, as if this fails, we should stop.
236 self.butler.ingest(dataset, transfer=self.config.transfer,
237 record_validation_info=track_file_attrs)
238 self.log.info("Photodiode %s:%d (%s) ingested successfully", instrumentName, exposureId,
239 logId)
240 refs.append(dataset)
242 if numExisting != 0:
243 self.log.warning("Skipped %d entries that already existed in run %s", numExisting, run)
244 if numFailed != 0:
245 raise RuntimeError(f"Failed to ingest {numFailed} entries due to missing exposure information.")
246 return refs