23 __all__ = (
"RawIngestTask",
"RawIngestConfig")
26 from abc
import ABCMeta
28 from astro_metadata_translator
import ObservationInfo
29 from lsst.afw.image
import readMetadata
30 from lsst.daf.butler
import DatasetType, StorageClassFactory, Run, DataId, ConflictingDefinitionError
31 from lsst.daf.butler.instrument
import (Instrument, updateExposureEntryFromObsInfo,
32 updateVisitEntryFromObsInfo)
33 from lsst.pex.config
import Config, Field, ChoiceField
34 from lsst.pipe.base
import Task
42 transfer = ChoiceField(
43 (
"How to transfer files (None for no transfer)."),
45 allowed={
"move":
"move",
47 "hardlink":
"hard link",
48 "symlink":
"symbolic (soft) link"},
51 conflict = ChoiceField(
52 (
"What to do if a raw Dataset with the same data ID as an " 53 "ingested file already exists in the Butler's Collection."),
55 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 56 "'stash' is not None, the new file will be " 57 "ingested into the stash Collection instead."),
58 "fail": (
"Raise RuntimeError if a conflict is encountered " 59 "(which may then be caught if onError == 'continue')."),
65 "Name of an alternate Collection to hold Datasets that lose conflicts.",
69 onError = ChoiceField(
70 "What to do if an error (including fatal conflicts) occurs.",
72 allowed={
"continue":
"Warn and continue with the next file.",
73 "break": (
"Stop processing immediately, but leave " 74 "already-ingested datasets in the repository."),
75 "rollback": (
"Stop processing and attempt to remove aleady-" 76 "ingested datasets from the repository."),
84 """Driver Task for ingesting raw data into Gen3 Butler repositories. 86 This Task is intended to be runnable from the command-line, but it doesn't 87 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 88 gain much from being one. It also wouldn't really be appropriate as a 89 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 90 leverage the logging and configurability functionality that provides. 92 Each instance of `RawIngestTask` writes to the same Butler and maintains a 93 cache of Dimension entries that have already been added to or extracted 94 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 95 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 97 RawIngestTask should be subclassed to specialize ingest for the actual 98 structure of raw data files produced by a particular instrument. 99 Subclasses must either provide populated `MetadataReader` instances in the 100 `dataIdReader`, `visitReader`, and `exposureReader` class attributes, or 101 alternate implementations of the `extractDataId`, `extractVisit`, and 102 `extractExposure` methods that do not use those attributes (each 103 attribute-method pair may be handled differently). Subclasses may also 104 wish to override `getFormatter` and/or (rarely) `getDatasetType`. We do 105 not anticipate overriding `run`, `ensureDimensions`, `ingestFile`, or 106 `processFile` to ever be necessary. 110 config : `RawIngestConfig` 111 Configuration for whether/how to transfer files and how to handle 112 conflicts and errors. 113 butler : `~lsst.daf.butler.Butler` 114 Butler instance. Ingested Datasets will be created as part of 115 ``butler.run`` and associated with its Collection. 117 Other keyword arguments are forwarded to the Task base class constructor. 120 ConfigClass = RawIngestConfig
122 _DefaultName =
"ingest" 126 """Return the DatasetType of the Datasets ingested by this Task. 128 return DatasetType(
"raw", (
"Instrument",
"Detector",
"Exposure"),
129 StorageClassFactory().getStorageClass(
"Exposure"))
131 def __init__(self, config=None, *, butler, **kwds):
135 self.
dimensions = butler.registry.dimensions.extract([
"Instrument",
"Detector",
"PhysicalFilter",
136 "Visit",
"Exposure"])
146 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 149 """Ingest files into a Butler data repository. 151 This creates any new Exposure or Visit Dimension entries needed to 152 identify the ingested files, creates new Dataset entries in the 153 Registry and finally ingests the files themselves into the Datastore. 154 Any needed Instrument, Detector, and PhysicalFilter Dimension entries 155 must exist in the Registry before `run` is called. 159 files : iterable over `str` or path-like objects 160 Paths to the files to be ingested. Will be made absolute 161 if they are not already. 164 if self.config.onError ==
"rollback":
165 with self.
butler.transaction():
168 elif self.config.onError ==
"break":
171 elif self.config.onError ==
"continue":
175 except Exception
as err:
176 self.log.warnf(
"Error processing '{}': {}", file, err)
179 """Read and return any relevant headers from the given file. 181 The default implementation simply reads the header of the first 182 non-empty HDU, so it always returns a single-element list. 186 file : `str` or path-like object 187 Absolute path to the file to be ingested. 191 headers : `list` of `~lsst.daf.base.PropertyList` 192 Single-element list containing the header of the first 195 return [readMetadata(file)]
198 """Extract metadata from a raw file and add Exposure and Visit 201 Any needed Instrument, Detector, and PhysicalFilter Dimension entries must 202 exist in the Registry before `run` is called. 206 file : `str` or path-like object 207 Absolute path to the file to be ingested. 211 headers : `list` of `~lsst.daf.base.PropertyList` 212 Result of calling `readHeaders`. 214 Data ID dictionary, as returned by `extractDataId`. 217 obsInfo = ObservationInfo(headers[0])
220 fullDataId = self.
extractDataId(file, headers, obsInfo=obsInfo)
223 dimensionDataId = DataId(fullDataId, dimension=dimension)
226 dimensionEntryDict = self.
butler.registry.findDimensionEntry(dimension, dimensionDataId)
227 if dimensionEntryDict
is None:
228 if dimension.name
in (
"Visit",
"Exposure"):
230 self.
butler.registry.addDimensionEntry(dimension, dimensionDataId)
233 f
"Entry for {dimension.name} with ID {dimensionDataId} not found; must be " 234 f
"present in Registry prior to ingest." 239 return headers, fullDataId
242 """Ingest a single raw file into the repository. 244 All necessary Dimension entres must already be present. 246 This method is not transactional; it must be wrapped in a 247 ``with self.butler.transaction` block to make per-file ingest 252 file : `str` or path-like object 253 Absolute path to the file to be ingested. 254 headers : `list` of `~lsst.daf.base.PropertyList` 255 Result of calling `readHeaders`. 257 Data ID dictionary, as returned by `extractDataId`. 258 run : `~lsst.daf.butler.Run`, optional 259 Run to add the Dataset to; defaults to ``self.butler.run``. 266 ref = self.
butler.registry.addDataset(self.
datasetType, dataId, run=run, recursive=
True)
267 except ConflictingDefinitionError
as err:
271 self.
butler.datastore.ingest(file, ref, formatter=self.
getFormatter(file, headers, dataId),
272 transfer=self.config.transfer)
276 """Ingest a single raw data file after extacting metadata. 278 This creates any new Exposure or Visit Dimension entries needed to 279 identify the ingest file, creates a new Dataset entry in the 280 Registry and finally ingests the file itself into the Datastore. 281 Any needed Instrument, Detector, and PhysicalFilter Dimension entries must 282 exist in the Registry before `run` is called. 286 file : `str` or path-like object 287 Absolute path to the file to be ingested. 292 with self.
butler.transaction():
296 except IngestConflictError:
297 if self.config.conflict ==
"fail":
299 if self.config.conflict ==
"ignore":
303 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
304 dataId, file, self.config.stash)
305 with self.
butler.transaction():
308 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
311 """Return the Data ID dictionary that should be used to label a file. 315 file : `str` or path-like object 316 Absolute path to the file being ingested (prior to any transfers). 317 headers : `list` of `~lsst.daf.base.PropertyList` 318 All headers returned by `readHeaders()`. 319 obsInfo : `astro_metadata_translator.ObservationInfo` 320 Observational metadata extracted from the headers. 325 A mapping whose key-value pairs uniquely identify raw datasets. 326 Must have ``dataId.dimensions() <= self.dimensions``, with at least 327 Instrument, Exposure, and Detector present. 330 if obsInfo.visit_id
is None:
331 toRemove.add(
"Visit")
332 if obsInfo.physical_filter
is None:
333 toRemove.add(
"PhysicalFilter")
335 dimensions = self.
dimensions.difference(toRemove)
339 dimensions=dimensions,
340 instrument=obsInfo.instrument,
341 exposure=obsInfo.exposure_id,
342 visit=obsInfo.visit_id,
343 detector=obsInfo.detector_num,
344 physical_filter=obsInfo.physical_filter,
346 updateExposureEntryFromObsInfo(dataId, obsInfo)
347 if obsInfo.visit_id
is not None:
348 updateVisitEntryFromObsInfo(dataId, obsInfo)
352 """Return the Formatter that should be used to read this file after 355 The default implementation obtains the formatter from the Instrument 356 class for the given data ID. 359 if instrument
is None:
360 instrument = Instrument.factories[dataId[
"instrument"]]()
362 return instrument.getRawFormatter(dataId)
def ensureDimensions(self, file)
def extractDataId(self, file, headers, obsInfo)
def readHeaders(self, file)
def __init__(self, config=None, butler, kwds)
def ingestFile(self, file, headers, dataId, run=None)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)