23 __all__ = (
"RawIngestTask",
"RawIngestConfig",
"makeTransferChoiceField")
30 from sqlalchemy.exc
import IntegrityError
32 from astro_metadata_translator
import ObservationInfo
33 from lsst.afw.image
import readMetadata, bboxFromMetadata
34 from lsst.afw.geom
import SkyWcs
35 from lsst.daf.butler
import (DatasetType, StorageClassFactory, Run, DataId, ConflictingDefinitionError,
37 from lsst.daf.butler.instrument
import (Instrument, updateExposureEntryFromObsInfo,
38 updateVisitEntryFromObsInfo)
39 from lsst.geom
import Box2D
40 from lsst.pex.config
import Config, Field, ChoiceField
41 from lsst.pipe.base
import Task
42 from lsst.sphgeom
import ConvexPolygon
53 allowed={
"move":
"move",
55 "hardlink":
"hard link",
56 "symlink":
"symbolic (soft) link"},
64 conflict = ChoiceField(
65 (
"What to do if a raw Dataset with the same data ID as an " 66 "ingested file already exists in the Butler's Collection."),
68 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 69 "'stash' is not None, the new file will be " 70 "ingested into the stash Collection instead."),
71 "fail": (
"Raise RuntimeError if a conflict is encountered " 72 "(which may then be caught if onError == 'continue')."),
78 "Name of an alternate Collection to hold Datasets that lose conflicts.",
82 onError = ChoiceField(
83 "What to do if an error (including fatal conflicts) occurs.",
85 allowed={
"continue":
"Warn and continue with the next file.",
86 "break": (
"Stop processing immediately, but leave " 87 "already-ingested datasets in the repository."),
88 "rollback": (
"Stop processing and attempt to remove aleady-" 89 "ingested datasets from the repository."),
97 doc=
"Add regions when ingesting tasks" 99 padRegionAmount = Field(
102 doc=
"Pad an image with specified number of pixels before calculating region" 107 """Driver Task for ingesting raw data into Gen3 Butler repositories. 109 This Task is intended to be runnable from the command-line, but it doesn't 110 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 111 gain much from being one. It also wouldn't really be appropriate as a 112 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 113 leverage the logging and configurability functionality that provides. 115 Each instance of `RawIngestTask` writes to the same Butler and maintains a 116 cache of Dimension entries that have already been added to or extracted 117 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 118 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 120 RawIngestTask may be subclassed to specialize ingest for the actual 121 structure of raw data files produced by a particular instrument, but this 122 is usually unnecessary because the instrument-specific header-extraction 123 provided by the ``astro_metadata_translator`` is usually enough. 127 config : `RawIngestConfig` 128 Configuration for whether/how to transfer files and how to handle 129 conflicts and errors. 130 butler : `~lsst.daf.butler.Butler` 131 Butler instance. Ingested Datasets will be created as part of 132 ``butler.run`` and associated with its Collection. 134 Other keyword arguments are forwarded to the Task base class constructor. 137 ConfigClass = RawIngestConfig
139 _DefaultName =
"ingest" 143 """Return the DatasetType of the Datasets ingested by this Task. 145 return DatasetType(
"raw", (
"instrument",
"detector",
"exposure"),
146 StorageClassFactory().getStorageClass(
"Exposure"))
148 def __init__(self, config=None, *, butler, **kwds):
152 self.
dimensions = butler.registry.dimensions.extract([
"instrument",
"detector",
"physical_filter",
153 "visit",
"exposure"])
163 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 166 def _addVisitRegions(self):
167 """Adds a region associated with a Visit to registry. 169 Visits will be created using regions for individual ccds that are 170 defined in the visitRegions dict field on self, joined against an 171 existing region if one exists. The dict field is formatted using 172 instrument and visit as a tuple for a key, with values that are a 173 list of regions for detectors associated the region. 175 for (instrument, visit), vertices
in self.
visitRegions.items():
177 existingRegion = self.
butler.registry.expandDataId({
"instrument": instrument,
"visit": visit},
179 if existingRegion
is not None:
180 vertices = list(existingRegion.getVertices()) + vertices
181 region = ConvexPolygon(vertices)
182 self.
butler.registry.setDimensionRegion(instrument=instrument, visit=visit, region=region)
185 """Ingest files into a Butler data repository. 187 This creates any new exposure or visit Dimension entries needed to 188 identify the ingested files, creates new Dataset entries in the 189 Registry and finally ingests the files themselves into the Datastore. 190 Any needed instrument, detector, and physical_filter Dimension entries 191 must exist in the Registry before `run` is called. 195 files : iterable over `str` or path-like objects 196 Paths to the files to be ingested. Will be made absolute 197 if they are not already. 200 if self.config.onError ==
"rollback":
201 with self.
butler.transaction():
204 if self.config.doAddRegions:
206 elif self.config.onError ==
"break":
209 if self.config.doAddRegions:
211 elif self.config.onError ==
"continue":
215 except Exception
as err:
216 self.log.warnf(
"Error processing '{}': {}", file, err)
217 if self.config.doAddRegions:
221 """Read and return any relevant headers from the given file. 223 The default implementation simply reads the header of the first 224 non-empty HDU, so it always returns a single-element list. 228 file : `str` or path-like object 229 Absolute path to the file to be ingested. 233 headers : `list` of `~lsst.daf.base.PropertyList` 234 Single-element list containing the header of the first 237 return [readMetadata(file)]
240 """Builds a region from information contained in a header 244 headers : `lsst.daf.base.PropertyList` 245 Property list containing the information from the header of 250 region : `lsst.sphgeom.ConvexPolygon` 255 If required header keys can not be found to construct region 260 bbox = Box2D(bboxFromMetadata(header))
261 if self.config.padRegionAmount > 0:
262 bbox.grow(self.config.padRegionAmount)
263 corners = bbox.getCorners()
264 sphCorners = [wcs.pixelToSky(point).getVector()
for point
in corners]
265 return ConvexPolygon(sphCorners)
268 """Extract metadata from a raw file and add exposure and visit 271 Any needed instrument, detector, and physical_filter Dimension entries must 272 exist in the Registry before `run` is called. 276 file : `str` or path-like object 277 Absolute path to the file to be ingested. 281 headers : `list` of `~lsst.daf.base.PropertyList` 282 Result of calling `readHeaders`. 284 Data ID dictionary, as returned by `extractDataId`. 287 obsInfo = ObservationInfo(headers[0])
290 fullDataId = self.
extractDataId(file, headers, obsInfo=obsInfo)
293 if fullDataId.get(dimension.name)
is None:
295 dimensionDataId = DataId(fullDataId, dimension=dimension)
298 dimensionEntryDict = self.
butler.registry.findDimensionEntry(dimension, dimensionDataId)
299 if dimensionEntryDict
is None:
300 if dimension.name
in (
"visit",
"exposure"):
302 self.
butler.registry.addDimensionEntry(dimension, dimensionDataId)
305 f
"Entry for {dimension.name} with ID {dimensionDataId} not found; must be " 306 f
"present in Registry prior to ingest." 311 if self.config.doAddRegions:
314 self.
butler.registry.setDimensionRegion(DataId(fullDataId,
315 dimensions=[
'visit',
'detector',
'instrument'],
318 self.
visitRegions.setdefault((fullDataId[
'instrument'], fullDataId[
'visit']),
319 []).extend(region.getVertices())
320 except IntegrityError:
325 return headers, fullDataId
328 """Ingest a single raw file into the repository. 330 All necessary Dimension entres must already be present. 334 file : `str` or path-like object 335 Absolute path to the file to be ingested. 336 headers : `list` of `~lsst.daf.base.PropertyList` 337 Result of calling `readHeaders`. 339 Data ID dictionary, as returned by `extractDataId`. 340 run : `~lsst.daf.butler.Run`, optional 341 Run to add the Dataset to; defaults to ``self.butler.run``. 346 Reference to the ingested dataset. 350 ConflictingDefinitionError 351 Raised if the dataset already exists in the registry. 353 if run
is not None and run != self.
butler.run:
354 butler = Butler(butler=self.
butler, run=run)
358 return butler.ingest(file, self.
datasetType, dataId, transfer=self.config.transfer,
360 except ConflictingDefinitionError
as err:
364 """Ingest a single raw data file after extacting metadata. 366 This creates any new exposure or visit Dimension entries needed to 367 identify the ingest file, creates a new Dataset entry in the 368 Registry and finally ingests the file itself into the Datastore. 369 Any needed instrument, detector, and physical_filter Dimension entries must 370 exist in the Registry before `run` is called. 374 file : `str` or path-like object 375 Absolute path to the file to be ingested. 379 except Exception
as err:
380 raise RuntimeError(f
"Unexpected error adding dimensions for {file}.")
from err
383 with self.
butler.transaction():
387 except IngestConflictError:
388 if self.config.conflict ==
"fail":
390 if self.config.conflict ==
"ignore":
394 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
395 dataId, file, self.config.stash)
396 with self.
butler.transaction():
399 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
402 """Return the Data ID dictionary that should be used to label a file. 406 file : `str` or path-like object 407 Absolute path to the file being ingested (prior to any transfers). 408 headers : `list` of `~lsst.daf.base.PropertyList` 409 All headers returned by `readHeaders()`. 410 obsInfo : `astro_metadata_translator.ObservationInfo` 411 Observational metadata extracted from the headers. 416 A mapping whose key-value pairs uniquely identify raw datasets. 417 Must have ``dataId.dimensions() <= self.dimensions``, with at least 418 instrument, exposure, and detector present. 421 if obsInfo.visit_id
is None:
422 toRemove.add(
"visit")
423 if obsInfo.physical_filter
is None:
424 toRemove.add(
"physical_filter")
426 dimensions = self.
dimensions.toSet().difference(toRemove)
430 dimensions=dimensions,
431 instrument=obsInfo.instrument,
432 exposure=obsInfo.exposure_id,
433 visit=obsInfo.visit_id,
434 detector=obsInfo.detector_num,
435 physical_filter=obsInfo.physical_filter,
437 updateExposureEntryFromObsInfo(dataId, obsInfo)
438 if obsInfo.visit_id
is not None:
439 updateVisitEntryFromObsInfo(dataId, obsInfo)
443 """Return the Formatter that should be used to read this file after 446 The default implementation obtains the formatter from the Instrument 447 class for the given data ID. 450 if instrument
is None:
451 instrument = Instrument.factories[dataId[
"instrument"]]()
453 return instrument.getRawFormatter(dataId)
def ensureDimensions(self, file)
def extractDataId(self, file, headers, obsInfo)
def readHeaders(self, file)
def __init__(self, config=None, butler, kwds)
def buildRegion(self, headers)
def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default=None)
def ingestFile(self, file, headers, dataId, run=None)
def _addVisitRegions(self)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)