23 __all__ = (
"RawIngestTask",
"RawIngestConfig")
26 from abc
import ABCMeta
28 from astro_metadata_translator
import ObservationInfo
29 from lsst.afw.image
import readMetadata
30 from lsst.daf.butler
import DatasetType, StorageClassFactory, Run, DataId
31 from lsst.daf.butler.instrument
import updateExposureEntryFromObsInfo, updateVisitEntryFromObsInfo
32 from lsst.pex.config
import Config, Field, ChoiceField
33 from lsst.pipe.base
import Task
41 transfer = ChoiceField(
42 (
"How to transfer files (None for no transfer)."),
44 allowed={
"move":
"move",
46 "hardlink":
"hard link",
47 "symlink":
"symbolic (soft) link"},
50 conflict = ChoiceField(
51 (
"What to do if a raw Dataset with the same data ID as an " 52 "ingested file already exists in the Butler's Collection."),
54 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 55 "'stash' is not None, the new file will be " 56 "ingested into the stash Collection instead."),
57 "fail": (
"Raise RuntimeError if a conflict is encountered " 58 "(which may then be caught if onError == 'continue')."),
64 "Name of an alternate Collection to hold Datasets that lose conflicts.",
68 onError = ChoiceField(
69 "What to do if an error (including fatal conflicts) occurs.",
71 allowed={
"continue":
"Warn and continue with the next file.",
72 "break": (
"Stop processing immediately, but leave " 73 "already-ingested datasets in the repository."),
74 "rollback": (
"Stop processing and attempt to remove aleady-" 75 "ingested datasets from the repository."),
83 """Driver Task for ingesting raw data into Gen3 Butler repositories. 85 This Task is intended to be runnable from the command-line, but it doesn't 86 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 87 gain much from being one. It also wouldn't really be appropriate as a 88 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 89 leverage the logging and configurability functionality that provides. 91 Each instance of `RawIngestTask` writes to the same Butler and maintains a 92 cache of Dimension entries that have already been added to or extracted 93 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 94 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 96 RawIngestTask should be subclassed to specialize ingest for the actual 97 structure of raw data files produced by a particular instrument. 98 Subclasses must either provide populated `MetadataReader` instances in the 99 `dataIdReader`, `visitReader`, and `exposureReader` class attributes, or 100 alternate implementations of the `extractDataId`, `extractVisit`, and 101 `extractExposure` methods that do not use those attributes (each 102 attribute-method pair may be handled differently). Subclasses may also 103 wish to override `getFormatter` and/or (rarely) `getDatasetType`. We do 104 not anticipate overriding `run`, `ensureDimensions`, `ingestFile`, or 105 `processFile` to ever be necessary. 109 config : `RawIngestConfig` 110 Configuration for whether/how to transfer files and how to handle 111 conflicts and errors. 112 butler : `~lsst.daf.butler.Butler` 113 Butler instance. Ingested Datasets will be created as part of 114 ``butler.run`` and associated with its Collection. 116 Other keyword arguments are forwarded to the Task base class constructor. 119 ConfigClass = RawIngestConfig
121 _DefaultName =
"ingest" 125 """Return the DatasetType of the Datasets ingested by this Task. 127 return DatasetType(
"raw", (
"Instrument",
"Detector",
"Exposure"),
128 StorageClassFactory().getStorageClass(
"Exposure"))
130 def __init__(self, config=None, *, butler, **kwds):
134 self.
dimensions = butler.registry.dimensions.extract([
"Instrument",
"Detector",
"PhysicalFilter",
135 "Visit",
"Exposure"])
142 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 145 """Ingest files into a Butler data repository. 147 This creates any new Exposure or Visit Dimension entries needed to 148 identify the ingested files, creates new Dataset entries in the 149 Registry and finally ingests the files themselves into the Datastore. 150 Any needed Instrument, Detector, and PhysicalFilter Dimension entries 151 must exist in the Registry before `run` is called. 155 files : iterable over `str` or path-like objects 156 Paths to the files to be ingested. Will be made absolute 157 if they are not already. 160 if self.config.onError ==
"rollback":
161 with self.
butler.transaction():
164 elif self.config.onError ==
"break":
167 elif self.config.onError ==
"continue":
171 except Exception
as err:
172 self.log.warnf(
"Error processing '{}': {}", file, err)
175 """Read and return any relevant headers from the given file. 177 The default implementation simply reads the header of the first 178 non-empty HDU, so it always returns a single-element list. 182 file : `str` or path-like object 183 Absolute path to the file to be ingested. 187 headers : `list` of `~lsst.daf.base.PropertyList` 188 Single-element list containing the header of the first 191 return [readMetadata(file)]
194 """Extract metadata from a raw file and add Exposure and Visit 197 Any needed Instrument, Detector, and PhysicalFilter Dimension entries must 198 exist in the Registry before `run` is called. 202 file : `str` or path-like object 203 Absolute path to the file to be ingested. 207 headers : `list` of `~lsst.daf.base.PropertyList` 208 Result of calling `readHeaders`. 210 Data ID dictionary, as returned by `extractDataId`. 213 obsInfo = ObservationInfo(headers[0])
216 fullDataId = self.
extractDataId(file, headers, obsInfo=obsInfo)
219 dimensionDataId = DataId(fullDataId, dimension=dimension)
222 dimensionEntryDict = self.
butler.registry.findDimensionEntry(dimension, dimensionDataId)
223 if dimensionEntryDict
is None:
224 if dimension.name
in (
"Visit",
"Exposure"):
226 self.
butler.registry.addDimensionEntry(dimension, dimensionDataId)
229 f
"Entry for {dimension.name} with ID {dimensionDataId} not found; must be " 230 f
"present in Registry prior to ingest." 235 return headers, fullDataId
238 """Ingest a single raw file into the repository. 240 All necessary Dimension entres must already be present. 242 This method is not transactional; it must be wrapped in a 243 ``with self.butler.transaction` block to make per-file ingest 248 file : `str` or path-like object 249 Absolute path to the file to be ingested. 250 headers : `list` of `~lsst.daf.base.PropertyList` 251 Result of calling `readHeaders`. 253 Data ID dictionary, as returned by `extractDataId`. 254 run : `~lsst.daf.butler.Run`, optional 255 Run to add the Dataset to; defaults to ``self.butler.run``. 269 transactional=
False, recursive=
True)
274 self.
butler.datastore.ingest(file, ref, formatter=self.
getFormatter(file, headers, dataId),
275 transfer=self.config.transfer)
279 """Ingest a single raw data file after extacting metadata. 281 This creates any new Exposure or Visit Dimension entries needed to 282 identify the ingest file, creates a new Dataset entry in the 283 Registry and finally ingests the file itself into the Datastore. 284 Any needed Instrument, Detector, and PhysicalFilter Dimension entries must 285 exist in the Registry before `run` is called. 289 file : `str` or path-like object 290 Absolute path to the file to be ingested. 295 with self.
butler.transaction():
299 except IngestConflictError:
300 if self.config.conflict ==
"fail":
302 if self.config.conflict ==
"ignore":
306 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
307 dataId, file, self.config.stash)
308 with self.
butler.transaction():
311 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
314 """Return the Data ID dictionary that should be used to label a file. 318 file : `str` or path-like object 319 Absolute path to the file being ingested (prior to any transfers). 320 headers : `list` of `~lsst.daf.base.PropertyList` 321 All headers returned by `readHeaders()`. 326 A mapping whose key-value pairs uniquely identify raw datasets. 327 Must have ``dimensions`` equal to ``self.dimensions``. 330 if obsInfo.visit_id
is None:
331 toRemove.add(
"Visit")
332 if obsInfo.physical_filter
is None:
333 toRemove.add(
"PhysicalFilter")
335 dimensions = self.
dimensions.difference(toRemove)
339 dimensions=dimensions,
340 instrument=obsInfo.instrument,
341 exposure=obsInfo.exposure_id,
342 visit=obsInfo.visit_id,
343 detector=obsInfo.detector_num,
344 physical_filter=obsInfo.physical_filter,
346 updateExposureEntryFromObsInfo(dataId, obsInfo)
347 if obsInfo.visit_id
is not None:
348 updateVisitEntryFromObsInfo(dataId, obsInfo)
352 """Return the Formatter that should be used to read this file after 355 The default implementation returns None, which uses the formatter 356 configured for this DatasetType/StorageClass in the Butler.
def ensureDimensions(self, file)
def extractDataId(self, file, headers, obsInfo)
def readHeaders(self, file)
def __init__(self, config=None, butler, kwds)
def ingestFile(self, file, headers, dataId, run=None)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)