23 __all__ = (
"RawIngestTask",
"RawIngestConfig")
26 from abc
import ABCMeta
28 from astro_metadata_translator
import ObservationInfo
29 from lsst.afw.image
import readMetadata
30 from lsst.daf.butler
import DatasetType, StorageClassFactory, Run
31 from lsst.daf.butler.instrument
import makeExposureEntryFromObsInfo, makeVisitEntryFromObsInfo
32 from lsst.pex.config
import Config, Field, ChoiceField
33 from lsst.pipe.base
import Task
41 transfer = ChoiceField(
42 (
"How to transfer files (None for no transfer)."),
44 allowed={
"move":
"move",
46 "hardlink":
"hard link",
47 "symlink":
"symbolic (soft) link"},
50 conflict = ChoiceField(
51 (
"What to do if a raw Dataset with the same data ID as an " 52 "ingested file already exists in the Butler's Collection."),
54 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 55 "'stash' is not None, the new file will be " 56 "ingested into the stash Collection instead."),
57 "fail": (
"Raise RuntimeError if a conflict is encountered " 58 "(which may then be caught if onError == 'continue')."),
64 "Name of an alternate Collection to hold Datasets that lose conflicts.",
68 onError = ChoiceField(
69 "What to do if an error (including fatal conflicts) occurs.",
71 allowed={
"continue":
"Warn and continue with the next file.",
72 "break": (
"Stop processing immediately, but leave " 73 "already-ingested datasets in the repository."),
74 "rollback": (
"Stop processing and attempt to remove aleady-" 75 "ingested datasets from the repository."),
83 """Driver Task for ingesting raw data into Gen3 Butler repositories. 85 This Task is intended to be runnable from the command-line, but it doesn't 86 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 87 gain much from being one. It also wouldn't really be appropriate as a 88 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 89 leverage the logging and configurability functionality that provides. 91 Each instance of `RawIngestTask` writes to the same Butler and maintains a 92 cache of DataUnit entries that have already been added to or extracted 93 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 94 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 96 RawIngestTask should be subclassed to specialize ingest for the actual 97 structure of raw data files produced by a particular camera. Subclasses 98 must either provide populated `MetadataReader` instances in the 99 `dataIdReader`, `visitReader`, and `exposureReader` class attributes, or 100 alternate implementations of the `extractDataId`, `extractVisit`, and 101 `extractExposure` methods that do not use those attributes (each 102 attribute-method pair may be handled differently). Subclasses may also 103 wish to override `getFormatter` and/or (rarely) `getDatasetType`. We do 104 not anticipate overriding `run`, `ensureDataUnits`, `ingestFile`, or 105 `processFile` to ever be necessary. 109 config : `RawIngestConfig` 110 Configuration for whether/how to transfer files and how to handle 111 conflicts and errors. 112 butler : `~lsst.daf.butler.Butler` 113 Butler instance. Ingested Datasets will be created as part of 114 ``butler.run`` and associated with its Collection. 116 Other keyword arguments are forwarded to the Task base class constructor. 119 ConfigClass = RawIngestConfig
121 _DefaultName =
"ingest" 125 """Return the DatasetType of the Datasets ingested by this Task. 127 return DatasetType(
"raw", (
"Camera",
"Sensor",
"Exposure"),
128 StorageClassFactory().getStorageClass(
"Exposure"))
130 def __init__(self, config=None, *, butler, **kwds):
134 self.
units = tuple(butler.registry.getDataUnitDefinition(k)
135 for k
in (
"Camera",
"Sensor",
"PhysicalFilter",
"Visit",
"Exposure", ))
147 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 150 """Ingest files into a Butler data repository. 152 This creates any new Exposure or Visit DataUnit entries needed to 153 identify the ingested files, creates new Dataset entries in the 154 Registry and finally ingests the files themselves into the Datastore. 155 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 156 exist in the Registry before `run` is called. 160 files : iterable over `str` or path-like objects 161 Paths to the files to be ingested. Will be made absolute 162 if they are not already. 165 if self.config.onError ==
"rollback":
166 with self.
butler.transaction():
169 elif self.config.onError ==
"break":
172 elif self.config.onError ==
"continue":
176 except Exception
as err:
177 self.log.warnf(
"Error processing '{}': {}", file, err)
180 """Read and return any relevant headers from the given file. 182 The default implementation simply reads the header of the first 183 non-empty HDU, so it always returns a single-element list. 187 file : `str` or path-like object 188 Absolute path to the file to be ingested. 192 headers : `list` of `~lsst.daf.base.PropertyList` 193 Single-element list containing the header of the first 196 return [readMetadata(file)]
199 """Extract metadata from a raw file and add Exposure and Visit 202 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 203 exist in the Registry before `run` is called. 207 file : `str` or path-like object 208 Absolute path to the file to be ingested. 212 headers : `list` of `~lsst.daf.base.PropertyList` 213 Result of calling `readHeaders`. 215 Data ID dictionary, as returned by `extractDataId`. 224 dataId.setdefault(
"physical_filter",
None)
225 dataId.setdefault(
"visit",
None)
232 associatedUnitEntries = {}
233 for unit
in self.
units:
235 unitPrimaryKeyTuple = tuple(dataId[f]
for f
in unit.primaryKey)
236 if any(v
is None for v
in unitPrimaryKeyTuple):
240 associatedUnitEntries[unit.name] =
None 242 unitEntryDict = self.
unitEntryCache[unit.name].get(unitPrimaryKeyTuple,
None)
243 if unitEntryDict
is None:
245 unitPrimaryKeyDict = {f: dataId[f]
for f
in unit.primaryKey}
246 unitEntryDict = self.
butler.registry.findDataUnitEntry(unit.name, unitPrimaryKeyDict)
247 if unitEntryDict
is None:
250 if unit.name ==
"Visit":
252 elif unit.name ==
"Exposure":
255 raise LookupError(
"{} with keys {} not found; must be present in Registry prior " 256 "to ingest.".format(unit.name, unitPrimaryKeyDict))
257 unitEntryDict = extractMethod(file, headers, dataId=dataId.copy(),
258 associated=associatedUnitEntries)
260 self.
butler.registry.addDataUnitEntry(unit.name, unitEntryDict)
262 self.
unitEntryCache[unit.name][unitPrimaryKeyTuple] = unitEntryDict
263 associatedUnitEntries[unit.name] = unitEntryDict
265 return headers, dataId
268 """Ingest a single raw file into the repository. 270 All necessary DataUnit entres must already be present. 272 This method is not transactional; it must be wrapped in a 273 ``with self.butler.transaction` block to make per-file ingest 278 file : `str` or path-like object 279 Absolute path to the file to be ingested. 280 headers : `list` of `~lsst.daf.base.PropertyList` 281 Result of calling `readHeaders`. 283 Data ID dictionary, as returned by `extractDataId`. 284 run : `~lsst.daf.butler.Run`, optional 285 Run to add the Dataset to; defaults to ``self.butler.run``. 299 transactional=
False, recursive=
True)
304 self.
butler.datastore.ingest(file, ref, formatter=self.
getFormatter(file, headers, dataId),
305 transfer=self.config.transfer)
309 """Ingest a single raw data file after extacting metadata. 311 This creates any new Exposure or Visit DataUnit entries needed to 312 identify the ingest file, creates a new Dataset entry in the 313 Registry and finally ingests the file itself into the Datastore. 314 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 315 exist in the Registry before `run` is called. 319 file : `str` or path-like object 320 Absolute path to the file to be ingested. 325 with self.
butler.transaction():
329 except IngestConflictError:
330 if self.config.conflict ==
"fail":
332 if self.config.conflict ==
"ignore":
336 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
337 dataId, file, self.config.stash)
338 with self.
butler.transaction():
341 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
344 """Return the Data ID dictionary that should be used to label a file. 348 file : `str` or path-like object 349 Absolute path to the file being ingested (prior to any transfers). 350 headers : `list` of `~lsst.daf.base.PropertyList` 351 All headers returned by `readHeaders()`. 356 Must include "camera", "sensor", and "exposure" keys. If the 357 Exposure is associated with a PhysicalFilter and/or Visit, 358 "physical_filter" and "visit" keys should be provided as well 361 obsInfo = ObservationInfo(headers[0])
363 "camera": obsInfo.instrument,
364 "exposure": obsInfo.exposure_id,
365 "visit": obsInfo.visit_id,
366 "sensor": obsInfo.detector_num,
367 "physical_filter": obsInfo.physical_filter,
371 """Create a Visit DataUnit entry from raw file metadata. 375 file : `str` or path-like object 376 Absolute path to the file being ingested (prior to any transfers). 377 headers : `list` of `~lsst.daf.base.PropertyList` 378 All headers returned by `readHeaders()`. 380 The data ID for this file. Implementations are permitted to 381 modify this dictionary (generally by stripping off "sensor" and 382 "exposure" and adding new metadata key-value pairs) and return it. 384 A dictionary containing other associated DataUnit entries. 385 Guaranteed to have "Camera", "Sensor", and "PhysicalFilter" keys, 386 but the last may map to ``None`` if `extractDataId` either did not 387 contain a "physical_filter" key or mapped it to ``None``. 388 Also adds a "VisitInfo" key containing an `afw.image.VisitInfo` 389 object for use by `extractExposureEntry`. 394 Dictionary corresponding to an Visit database table row. 395 Must have all non-null columns in the Visit table as keys. 397 obsInfo = ObservationInfo(headers[0])
398 associated[
"ObsInfo"] = obsInfo
400 del dataId[
"exposure"]
401 return makeVisitEntryFromObsInfo(dataId, obsInfo)
404 """Create an Exposure DataUnit entry from raw file metadata. 408 file : `str` or path-like object 409 Absolute path to the file being ingested (prior to any transfers). 410 headers : `list` of `~lsst.daf.base.PropertyList` 411 All headers returned by `readHeaders()`. 413 The data ID for this file. Implementations are permitted to 414 modify this dictionary (generally by stripping off "sensor" and 415 adding new metadata key-value pairs) and return it. 417 A dictionary containing other associated DataUnit entries. 418 Guaranteed to have "Camera", "Sensor", "PhysicalFilter", and 419 "Visit" keys, but the latter two may map to ``None`` if 420 `extractDataId` did not contain keys for these or mapped them to 421 ``None``. May also contain additional keys added by 427 Dictionary corresponding to an Exposure database table row. 428 Must have all non-null columns in the Exposure table as keys. 431 obsInfo = associated[
"ObsInfo"]
433 obsInfo = ObservationInfo(headers[0])
435 return makeExposureEntryFromObsInfo(dataId, obsInfo)
438 """Return the Formatter that should be used to read this file after 441 The default implementation returns None, which uses the formatter 442 configured for this DatasetType/StorageClass in the Butler.
def readHeaders(self, file)
def extractExposureEntry(self, file, headers, dataId, associated)
def extractDataId(self, file, headers)
def __init__(self, config=None, butler, kwds)
def extractVisitEntry(self, file, headers, dataId, associated)
def ingestFile(self, file, headers, dataId, run=None)
def ensureDataUnits(self, file)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)