23 __all__ = (
"RawIngestTask",
"RawIngestConfig")
26 from abc
import ABCMeta
28 from astro_metadata_translator
import ObservationInfo
29 from lsst.afw.image
import readMetadata
30 from lsst.daf.butler
import DatasetType, StorageClassFactory, Run
31 from lsst.daf.butler.instrument
import makeExposureEntryFromObsInfo, makeVisitEntryFromObsInfo
32 from lsst.pex.config
import Config, Field, ChoiceField
33 from lsst.pipe.base
import Task
41 transfer = ChoiceField(
42 (
"How to transfer files (None for no transfer)."),
44 allowed={
"move":
"move",
46 "hardlink":
"hard link",
47 "symlink":
"symbolic (soft) link"},
50 conflict = ChoiceField(
51 (
"What to do if a raw Dataset with the same data ID as an " 52 "ingested file already exists in the Butler's Collection."),
54 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 55 "'stash' is not None, the new file will be " 56 "ingested into the stash Collection instead."),
57 "fail": (
"Raise RuntimeError if a conflict is encountered " 58 "(which may then be caught if onError == 'continue')."),
64 "Name of an alternate Collection to hold Datasets that lose conflicts.",
68 onError = ChoiceField(
69 "What to do if an error (including fatal conflicts) occurs.",
71 allowed={
"continue":
"Warn and continue with the next file.",
72 "break": (
"Stop processing immediately, but leave " 73 "already-ingested datasets in the repository."),
74 "rollback": (
"Stop processing and attempt to remove aleady-" 75 "ingested datasets from the repository."),
83 """Driver Task for ingesting raw data into Gen3 Butler repositories. 85 This Task is intended to be runnable from the command-line, but it doesn't 86 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 87 gain much from being one. It also wouldn't really be appropriate as a 88 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 89 leverage the logging and configurability functionality that provides. 91 Each instance of `RawIngestTask` writes to the same Butler and maintains a 92 cache of DataUnit entries that have already been added to or extracted 93 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 94 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 96 RawIngestTask should be subclassed to specialize ingest for the actual 97 structure of raw data files produced by a particular instrument. 98 Subclasses must either provide populated `MetadataReader` instances in the 99 `dataIdReader`, `visitReader`, and `exposureReader` class attributes, or 100 alternate implementations of the `extractDataId`, `extractVisit`, and 101 `extractExposure` methods that do not use those attributes (each 102 attribute-method pair may be handled differently). Subclasses may also 103 wish to override `getFormatter` and/or (rarely) `getDatasetType`. We do 104 not anticipate overriding `run`, `ensureDataUnits`, `ingestFile`, or 105 `processFile` to ever be necessary. 109 config : `RawIngestConfig` 110 Configuration for whether/how to transfer files and how to handle 111 conflicts and errors. 112 butler : `~lsst.daf.butler.Butler` 113 Butler instance. Ingested Datasets will be created as part of 114 ``butler.run`` and associated with its Collection. 116 Other keyword arguments are forwarded to the Task base class constructor. 119 ConfigClass = RawIngestConfig
121 _DefaultName =
"ingest" 125 """Return the DatasetType of the Datasets ingested by this Task. 127 return DatasetType(
"raw", (
"Instrument",
"Detector",
"Exposure"),
128 StorageClassFactory().getStorageClass(
"Exposure"))
130 def __init__(self, config=None, *, butler, **kwds):
134 self.
units = tuple(butler.registry.getDataUnitDefinition(k)
135 for k
in (
"Instrument",
"Detector",
"PhysicalFilter",
"Visit",
"Exposure", ))
147 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 150 """Ingest files into a Butler data repository. 152 This creates any new Exposure or Visit DataUnit entries needed to 153 identify the ingested files, creates new Dataset entries in the 154 Registry and finally ingests the files themselves into the Datastore. 155 Any needed Instrument, Detector, and PhysicalFilter DataUnit entries 156 must exist in the Registry before `run` is called. 160 files : iterable over `str` or path-like objects 161 Paths to the files to be ingested. Will be made absolute 162 if they are not already. 165 if self.config.onError ==
"rollback":
166 with self.
butler.transaction():
169 elif self.config.onError ==
"break":
172 elif self.config.onError ==
"continue":
176 except Exception
as err:
177 self.log.warnf(
"Error processing '{}': {}", file, err)
180 """Read and return any relevant headers from the given file. 182 The default implementation simply reads the header of the first 183 non-empty HDU, so it always returns a single-element list. 187 file : `str` or path-like object 188 Absolute path to the file to be ingested. 192 headers : `list` of `~lsst.daf.base.PropertyList` 193 Single-element list containing the header of the first 196 return [readMetadata(file)]
199 """Extract metadata from a raw file and add Exposure and Visit 202 Any needed Instrument, Detector, and PhysicalFilter DataUnit entries must 203 exist in the Registry before `run` is called. 207 file : `str` or path-like object 208 Absolute path to the file to be ingested. 212 headers : `list` of `~lsst.daf.base.PropertyList` 213 Result of calling `readHeaders`. 215 Data ID dictionary, as returned by `extractDataId`. 225 dataId.setdefault(
"physical_filter",
None)
226 dataId.setdefault(
"visit",
None)
233 associatedUnitEntries = {}
234 for unit
in self.
units:
236 unitPrimaryKeyTuple = tuple(dataId[f]
for f
in unit.primaryKey)
237 if any(v
is None for v
in unitPrimaryKeyTuple):
241 associatedUnitEntries[unit.name] =
None 243 unitEntryDict = self.
unitEntryCache[unit.name].get(unitPrimaryKeyTuple,
None)
244 if unitEntryDict
is None:
246 unitPrimaryKeyDict = {f: dataId[f]
for f
in unit.primaryKey}
247 unitEntryDict = self.
butler.registry.findDataUnitEntry(unit.name, unitPrimaryKeyDict)
248 if unitEntryDict
is None:
251 if unit.name ==
"Visit":
253 elif unit.name ==
"Exposure":
256 raise LookupError(
"{} with keys {} not found; must be present in Registry prior " 257 "to ingest.".format(unit.name, unitPrimaryKeyDict))
258 unitEntryDict = extractMethod(file, headers, dataId=dataId.copy(),
259 associated=associatedUnitEntries)
261 self.
butler.registry.addDataUnitEntry(unit.name, unitEntryDict)
263 self.
unitEntryCache[unit.name][unitPrimaryKeyTuple] = unitEntryDict
264 associatedUnitEntries[unit.name] = unitEntryDict
266 return headers, dataId
269 """Ingest a single raw file into the repository. 271 All necessary DataUnit entres must already be present. 273 This method is not transactional; it must be wrapped in a 274 ``with self.butler.transaction` block to make per-file ingest 279 file : `str` or path-like object 280 Absolute path to the file to be ingested. 281 headers : `list` of `~lsst.daf.base.PropertyList` 282 Result of calling `readHeaders`. 284 Data ID dictionary, as returned by `extractDataId`. 285 run : `~lsst.daf.butler.Run`, optional 286 Run to add the Dataset to; defaults to ``self.butler.run``. 300 transactional=
False, recursive=
True)
305 self.
butler.datastore.ingest(file, ref, formatter=self.
getFormatter(file, headers, dataId),
306 transfer=self.config.transfer)
310 """Ingest a single raw data file after extacting metadata. 312 This creates any new Exposure or Visit DataUnit entries needed to 313 identify the ingest file, creates a new Dataset entry in the 314 Registry and finally ingests the file itself into the Datastore. 315 Any needed Instrument, Detector, and PhysicalFilter DataUnit entries must 316 exist in the Registry before `run` is called. 320 file : `str` or path-like object 321 Absolute path to the file to be ingested. 326 with self.
butler.transaction():
330 except IngestConflictError:
331 if self.config.conflict ==
"fail":
333 if self.config.conflict ==
"ignore":
337 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
338 dataId, file, self.config.stash)
339 with self.
butler.transaction():
342 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
345 """Return the Data ID dictionary that should be used to label a file. 349 file : `str` or path-like object 350 Absolute path to the file being ingested (prior to any transfers). 351 headers : `list` of `~lsst.daf.base.PropertyList` 352 All headers returned by `readHeaders()`. 357 Must include "instrument", "detector", and "exposure" keys. If the 358 Exposure is associated with a PhysicalFilter and/or Visit, 359 "physical_filter" and "visit" keys should be provided as well 362 obsInfo = ObservationInfo(headers[0])
364 "instrument": obsInfo.instrument,
365 "exposure": obsInfo.exposure_id,
366 "visit": obsInfo.visit_id,
367 "detector": obsInfo.detector_num,
368 "physical_filter": obsInfo.physical_filter,
372 """Create a Visit DataUnit entry from raw file metadata. 376 file : `str` or path-like object 377 Absolute path to the file being ingested (prior to any transfers). 378 headers : `list` of `~lsst.daf.base.PropertyList` 379 All headers returned by `readHeaders()`. 381 The data ID for this file. Implementations are permitted to 382 modify this dictionary (generally by stripping off "detector" and 383 "exposure" and adding new metadata key-value pairs) and return it. 385 A dictionary containing other associated DataUnit entries. 386 Guaranteed to have "Instrument", "Detector", and "PhysicalFilter" 387 keys, but the last may map to ``None`` if `extractDataId` either 388 did not contain a "physical_filter" key or mapped it to ``None``. 389 Also adds a "VisitInfo" key containing an `afw.image.VisitInfo` 390 object for use by `extractExposureEntry`. 395 Dictionary corresponding to an Visit database table row. 396 Must have all non-null columns in the Visit table as keys. 398 obsInfo = ObservationInfo(headers[0])
399 associated[
"ObsInfo"] = obsInfo
400 del dataId[
"detector"]
401 del dataId[
"exposure"]
402 return makeVisitEntryFromObsInfo(dataId, obsInfo)
405 """Create an Exposure DataUnit entry from raw file metadata. 409 file : `str` or path-like object 410 Absolute path to the file being ingested (prior to any transfers). 411 headers : `list` of `~lsst.daf.base.PropertyList` 412 All headers returned by `readHeaders()`. 414 The data ID for this file. Implementations are permitted to 415 modify this dictionary (generally by stripping off "detector" and 416 adding new metadata key-value pairs) and return it. 418 A dictionary containing other associated DataUnit entries. 419 Guaranteed to have "Instrument", "Detector", "PhysicalFilter", and 420 "Visit" keys, but the latter two may map to ``None`` if 421 `extractDataId` did not contain keys for these or mapped them to 422 ``None``. May also contain additional keys added by 428 Dictionary corresponding to an Exposure database table row. 429 Must have all non-null columns in the Exposure table as keys. 432 obsInfo = associated[
"ObsInfo"]
434 obsInfo = ObservationInfo(headers[0])
435 del dataId[
"detector"]
436 return makeExposureEntryFromObsInfo(dataId, obsInfo)
439 """Return the Formatter that should be used to read this file after 442 The default implementation returns None, which uses the formatter 443 configured for this DatasetType/StorageClass in the Butler.
def readHeaders(self, file)
def extractExposureEntry(self, file, headers, dataId, associated)
def extractDataId(self, file, headers)
def __init__(self, config=None, butler, kwds)
def extractVisitEntry(self, file, headers, dataId, associated)
def ingestFile(self, file, headers, dataId, run=None)
def ensureDataUnits(self, file)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)