23 __all__ = (
"RawIngestTask",
"RawIngestConfig",
"makeTransferChoiceField")
30 from sqlalchemy.exc
import IntegrityError
32 from astro_metadata_translator
import ObservationInfo
33 from lsst.afw.image
import readMetadata, bboxFromMetadata
34 from lsst.afw.geom
import SkyWcs
35 from lsst.daf.butler
import DatasetType, Run, DataId, ConflictingDefinitionError, Butler
36 from lsst.daf.butler.instrument
import (Instrument, updateExposureEntryFromObsInfo,
37 updateVisitEntryFromObsInfo)
38 from lsst.geom
import Box2D
39 from lsst.pex.config
import Config, Field, ChoiceField
40 from lsst.pipe.base
import Task
41 from lsst.sphgeom
import ConvexPolygon
52 allowed={
"move":
"move",
54 "hardlink":
"hard link",
55 "symlink":
"symbolic (soft) link"},
63 conflict = ChoiceField(
64 (
"What to do if a raw Dataset with the same data ID as an " 65 "ingested file already exists in the Butler's Collection."),
67 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 68 "'stash' is not None, the new file will be " 69 "ingested into the stash Collection instead."),
70 "fail": (
"Raise RuntimeError if a conflict is encountered " 71 "(which may then be caught if onError == 'continue')."),
77 "Name of an alternate Collection to hold Datasets that lose conflicts.",
81 onError = ChoiceField(
82 "What to do if an error (including fatal conflicts) occurs.",
84 allowed={
"continue":
"Warn and continue with the next file.",
85 "break": (
"Stop processing immediately, but leave " 86 "already-ingested datasets in the repository."),
87 "rollback": (
"Stop processing and attempt to remove aleady-" 88 "ingested datasets from the repository."),
96 doc=
"Add regions when ingesting tasks" 98 padRegionAmount = Field(
101 doc=
"Pad an image with specified number of pixels before calculating region" 106 """Driver Task for ingesting raw data into Gen3 Butler repositories. 108 This Task is intended to be runnable from the command-line, but it doesn't 109 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 110 gain much from being one. It also wouldn't really be appropriate as a 111 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 112 leverage the logging and configurability functionality that provides. 114 Each instance of `RawIngestTask` writes to the same Butler and maintains a 115 cache of Dimension entries that have already been added to or extracted 116 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 117 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 119 RawIngestTask may be subclassed to specialize ingest for the actual 120 structure of raw data files produced by a particular instrument, but this 121 is usually unnecessary because the instrument-specific header-extraction 122 provided by the ``astro_metadata_translator`` is usually enough. 126 config : `RawIngestConfig` 127 Configuration for whether/how to transfer files and how to handle 128 conflicts and errors. 129 butler : `~lsst.daf.butler.Butler` 130 Butler instance. Ingested Datasets will be created as part of 131 ``butler.run`` and associated with its Collection. 133 Other keyword arguments are forwarded to the Task base class constructor. 136 ConfigClass = RawIngestConfig
138 _DefaultName =
"ingest" 141 """Return the DatasetType of the Datasets ingested by this Task. 143 return DatasetType(
"raw", (
"instrument",
"detector",
"exposure"),
"Exposure",
144 universe=self.
butler.registry.dimensions)
146 def __init__(self, config=None, *, butler, **kwds):
150 self.
dimensions = butler.registry.dimensions.extract([
"instrument",
"detector",
"physical_filter",
151 "visit",
"exposure"])
161 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 164 def _addVisitRegions(self):
165 """Adds a region associated with a Visit to registry. 167 Visits will be created using regions for individual ccds that are 168 defined in the visitRegions dict field on self, joined against an 169 existing region if one exists. The dict field is formatted using 170 instrument and visit as a tuple for a key, with values that are a 171 list of regions for detectors associated the region. 173 for (instrument, visit), vertices
in self.
visitRegions.items():
175 existingRegion = self.
butler.registry.expandDataId({
"instrument": instrument,
"visit": visit},
177 if existingRegion
is not None:
178 vertices = list(existingRegion.getVertices()) + vertices
179 region = ConvexPolygon(vertices)
180 self.
butler.registry.setDimensionRegion(instrument=instrument, visit=visit, region=region)
183 """Ingest files into a Butler data repository. 185 This creates any new exposure or visit Dimension entries needed to 186 identify the ingested files, creates new Dataset entries in the 187 Registry and finally ingests the files themselves into the Datastore. 188 Any needed instrument, detector, and physical_filter Dimension entries 189 must exist in the Registry before `run` is called. 193 files : iterable over `str` or path-like objects 194 Paths to the files to be ingested. Will be made absolute 195 if they are not already. 198 if self.config.onError ==
"rollback":
199 with self.
butler.transaction():
202 if self.config.doAddRegions:
204 elif self.config.onError ==
"break":
207 if self.config.doAddRegions:
209 elif self.config.onError ==
"continue":
213 except Exception
as err:
214 self.log.warnf(
"Error processing '{}': {}", file, err)
215 if self.config.doAddRegions:
219 """Read and return any relevant headers from the given file. 221 The default implementation simply reads the header of the first 222 non-empty HDU, so it always returns a single-element list. 226 file : `str` or path-like object 227 Absolute path to the file to be ingested. 231 headers : `list` of `~lsst.daf.base.PropertyList` 232 Single-element list containing the header of the first 235 return [readMetadata(file)]
238 """Builds a region from information contained in a header 242 headers : `lsst.daf.base.PropertyList` 243 Property list containing the information from the header of 248 region : `lsst.sphgeom.ConvexPolygon` 253 If required header keys can not be found to construct region 258 bbox = Box2D(bboxFromMetadata(header))
259 if self.config.padRegionAmount > 0:
260 bbox.grow(self.config.padRegionAmount)
261 corners = bbox.getCorners()
262 sphCorners = [wcs.pixelToSky(point).getVector()
for point
in corners]
263 return ConvexPolygon(sphCorners)
266 """Extract metadata from a raw file and add exposure and visit 269 Any needed instrument, detector, and physical_filter Dimension entries must 270 exist in the Registry before `run` is called. 274 file : `str` or path-like object 275 Absolute path to the file to be ingested. 279 headers : `list` of `~lsst.daf.base.PropertyList` 280 Result of calling `readHeaders`. 282 Data ID dictionary, as returned by `extractDataId`. 285 obsInfo = ObservationInfo(headers[0])
288 fullDataId = self.
extractDataId(file, headers, obsInfo=obsInfo)
291 if fullDataId.get(dimension.name)
is None:
293 dimensionDataId = DataId(fullDataId, dimension=dimension)
296 dimensionEntryDict = self.
butler.registry.findDimensionEntry(dimension, dimensionDataId)
297 if dimensionEntryDict
is None:
298 if dimension.name
in (
"visit",
"exposure"):
300 self.
butler.registry.addDimensionEntry(dimension, dimensionDataId)
303 f
"Entry for {dimension.name} with ID {dimensionDataId} not found; must be " 304 f
"present in Registry prior to ingest." 309 if self.config.doAddRegions:
312 self.
butler.registry.setDimensionRegion(DataId(fullDataId,
313 dimensions=[
'visit',
'detector',
'instrument'],
316 self.
visitRegions.setdefault((fullDataId[
'instrument'], fullDataId[
'visit']),
317 []).extend(region.getVertices())
318 except IntegrityError:
323 return headers, fullDataId
326 """Ingest a single raw file into the repository. 328 All necessary Dimension entres must already be present. 332 file : `str` or path-like object 333 Absolute path to the file to be ingested. 334 headers : `list` of `~lsst.daf.base.PropertyList` 335 Result of calling `readHeaders`. 337 Data ID dictionary, as returned by `extractDataId`. 338 run : `~lsst.daf.butler.Run`, optional 339 Run to add the Dataset to; defaults to ``self.butler.run``. 344 Reference to the ingested dataset. 348 ConflictingDefinitionError 349 Raised if the dataset already exists in the registry. 351 if run
is not None and run != self.
butler.run:
352 butler = Butler(butler=self.
butler, run=run)
356 return butler.ingest(file, self.
datasetType, dataId, transfer=self.config.transfer,
358 except ConflictingDefinitionError
as err:
362 """Ingest a single raw data file after extacting metadata. 364 This creates any new exposure or visit Dimension entries needed to 365 identify the ingest file, creates a new Dataset entry in the 366 Registry and finally ingests the file itself into the Datastore. 367 Any needed instrument, detector, and physical_filter Dimension entries must 368 exist in the Registry before `run` is called. 372 file : `str` or path-like object 373 Absolute path to the file to be ingested. 377 except Exception
as err:
378 raise RuntimeError(f
"Unexpected error adding dimensions for {file}.")
from err
381 with self.
butler.transaction():
385 except IngestConflictError:
386 if self.config.conflict ==
"fail":
388 if self.config.conflict ==
"ignore":
392 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
393 dataId, file, self.config.stash)
394 with self.
butler.transaction():
397 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
400 """Return the Data ID dictionary that should be used to label a file. 404 file : `str` or path-like object 405 Absolute path to the file being ingested (prior to any transfers). 406 headers : `list` of `~lsst.daf.base.PropertyList` 407 All headers returned by `readHeaders()`. 408 obsInfo : `astro_metadata_translator.ObservationInfo` 409 Observational metadata extracted from the headers. 414 A mapping whose key-value pairs uniquely identify raw datasets. 415 Must have ``dataId.dimensions() <= self.dimensions``, with at least 416 instrument, exposure, and detector present. 419 if obsInfo.visit_id
is None:
420 toRemove.add(
"visit")
421 if obsInfo.physical_filter
is None:
422 toRemove.add(
"physical_filter")
424 dimensions = self.
dimensions.toSet().difference(toRemove)
428 dimensions=dimensions,
429 instrument=obsInfo.instrument,
430 exposure=obsInfo.exposure_id,
431 visit=obsInfo.visit_id,
432 detector=obsInfo.detector_num,
433 physical_filter=obsInfo.physical_filter,
435 updateExposureEntryFromObsInfo(dataId, obsInfo)
436 if obsInfo.visit_id
is not None:
437 updateVisitEntryFromObsInfo(dataId, obsInfo)
441 """Return the Formatter that should be used to read this file after 444 The default implementation obtains the formatter from the Instrument 445 class for the given data ID. 448 if instrument
is None:
449 instrument = Instrument.factories[dataId[
"instrument"]]()
451 return instrument.getRawFormatter(dataId)
def ensureDimensions(self, file)
def extractDataId(self, file, headers, obsInfo)
def readHeaders(self, file)
def __init__(self, config=None, butler, kwds)
def buildRegion(self, headers)
def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default=None)
def ingestFile(self, file, headers, dataId, run=None)
def _addVisitRegions(self)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)