23 __all__ = (
"RawIngestTask",
"RawIngestConfig",
"VisitInfoRawIngestTask")
26 from abc
import ABCMeta, abstractmethod
28 from lsst.afw.image
import readMetadata
29 from lsst.daf.butler
import DatasetType, StorageClassFactory, Run
30 from lsst.daf.butler.instrument
import makeExposureEntryFromVisitInfo, makeVisitEntryFromVisitInfo
31 from lsst.pex.config
import Config, Field, ChoiceField
32 from lsst.pipe.base
import Task
40 transfer = ChoiceField(
41 (
"How to transfer files (None for no transfer)."),
43 allowed={
"move":
"move",
45 "hardlink":
"hard link",
46 "symlink":
"symbolic (soft) link"},
49 conflict = ChoiceField(
50 (
"What to do if a raw Dataset with the same data ID as an " 51 "ingested file already exists in the Butler's Collection."),
53 allowed={
"ignore": (
"Do not add the new file to the Collection. If " 54 "'stash' is not None, the new file will be " 55 "ingested into the stash Collection instead."),
56 "fail": (
"Raise RuntimeError if a conflict is encountered " 57 "(which may then be caught if onError == 'continue')."),
63 "Name of an alternate Collection to hold Datasets that lose conflicts.",
67 onError = ChoiceField(
68 "What to do if an error (including fatal conflicts) occurs.",
70 allowed={
"continue":
"Warn and continue with the next file.",
71 "break": (
"Stop processing immediately, but leave " 72 "already-ingested datasets in the repository."),
73 "rollback": (
"Stop processing and attempt to remove aleady-" 74 "ingested datasets from the repository."),
82 """Driver Task for ingesting raw data into Gen3 Butler repositories. 84 This Task is intended to be runnable from the command-line, but it doesn't 85 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 86 gain much from being one. It also wouldn't really be appropriate as a 87 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 88 leverage the logging and configurability functionality that provides. 90 Each instance of `RawIngestTask` writes to the same Butler and maintains a 91 cache of DataUnit entries that have already been added to or extracted 92 from its Registry. Each invocation of `RawIngestTask.run` ingests a list 93 of files (possibly semi-atomically; see `RawIngestConfig.onError`). 95 RawIngestTask should be subclassed to specialize ingest for the actual 96 structure of raw data files produced by a particular camera. Subclasses 97 must either provide populated `MetadataReader` instances in the 98 `dataIdReader`, `visitReader`, and `exposureReader` class attributes, or 99 alternate implementations of the `extractDataId`, `extractVisit`, and 100 `extractExposure` methods that do not use those attributes (each 101 attribute-method pair may be handled differently). Subclasses may also 102 wish to override `getFormatter` and/or (rarely) `getDatasetType`. We do 103 not anticipate overriding `run`, `ensureDataUnits`, `ingestFile`, or 104 `processFile` to ever be necessary. 108 config : `RawIngestConfig` 109 Configuration for whether/how to transfer files and how to handle 110 conflicts and errors. 111 butler : `~lsst.daf.butler.Butler` 112 Butler instance. Ingested Datasets will be created as part of 113 ``butler.run`` and associated with its Collection. 115 Other keyword arguments are forwarded to the Task base class constructor. 118 ConfigClass = RawIngestConfig
120 _DefaultName =
"ingest" 124 """Return the DatasetType of the Datasets ingested by this Task. 126 return DatasetType(
"raw", (
"Camera",
"Sensor",
"Exposure"),
127 StorageClassFactory().getStorageClass(
"Exposure"))
129 def __init__(self, config=None, *, butler, **kwds):
133 self.
units = tuple(butler.registry.getDataUnitDefinition(k)
134 for k
in (
"Camera",
"Sensor",
"PhysicalFilter",
"Visit",
"Exposure", ))
146 self.
stashRun = Run(self.config.stash)
if self.config.stash
is not None else None 149 """Ingest files into a Butler data repository. 151 This creates any new Exposure or Visit DataUnit entries needed to 152 identify the ingested files, creates new Dataset entries in the 153 Registry and finally ingests the files themselves into the Datastore. 154 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 155 exist in the Registry before `run` is called. 159 files : iterable over `str` or path-like objects 160 Paths to the files to be ingested. Will be made absolute 161 if they are not already. 164 if self.config.onError ==
"rollback":
165 with self.
butler.transaction():
168 elif self.config.onError ==
"break":
171 elif self.config.onError ==
"continue":
175 except Exception
as err:
176 self.log.warnf(
"Error processing '{}': {}", file, err)
179 """Read and return any relevant headers from the given file. 181 The default implementation simply reads the header of the first 182 non-empty HDU, so it always returns a single-element list. 186 file : `str` or path-like object 187 Absolute path to the file to be ingested. 191 headers : `list` of `~lsst.daf.base.PropertyList` 192 Single-element list containing the header of the first 195 return [readMetadata(file)]
198 """Extract metadata from a raw file and add Exposure and Visit 201 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 202 exist in the Registry before `run` is called. 206 file : `str` or path-like object 207 Absolute path to the file to be ingested. 211 headers : `list` of `~lsst.daf.base.PropertyList` 212 Result of calling `readHeaders`. 214 Data ID dictionary, as returned by `extractDataId`. 223 dataId.setdefault(
"physical_filter",
None)
224 dataId.setdefault(
"visit",
None)
231 associatedUnitEntries = {}
232 for unit
in self.
units:
234 unitPrimaryKeyTuple = tuple(dataId[f]
for f
in unit.primaryKey)
235 if any(v
is None for v
in unitPrimaryKeyTuple):
239 associatedUnitEntries[unit.name] =
None 241 unitEntryDict = self.
unitEntryCache[unit.name].get(unitPrimaryKeyTuple,
None)
242 if unitEntryDict
is None:
244 unitPrimaryKeyDict = {f: dataId[f]
for f
in unit.primaryKey}
245 unitEntryDict = self.
butler.registry.findDataUnitEntry(unit.name, unitPrimaryKeyDict)
246 if unitEntryDict
is None:
249 if unit.name ==
"Visit":
251 elif unit.name ==
"Exposure":
254 raise LookupError(
"{} with keys {} not found; must be present in Registry prior " 255 "to ingest.".format(unit.name, unitPrimaryKeyDict))
256 unitEntryDict = extractMethod(file, headers, dataId=dataId.copy(),
257 associated=associatedUnitEntries)
259 self.
butler.registry.addDataUnitEntry(unit.name, unitEntryDict)
261 self.
unitEntryCache[unit.name][unitPrimaryKeyTuple] = unitEntryDict
262 associatedUnitEntries[unit.name] = unitEntryDict
264 return headers, dataId
267 """Ingest a single raw file into the repository. 269 All necessary DataUnit entres must already be present. 271 This method is not transactional; it must be wrapped in a 272 ``with self.butler.transaction` block to make per-file ingest 277 file : `str` or path-like object 278 Absolute path to the file to be ingested. 279 headers : `list` of `~lsst.daf.base.PropertyList` 280 Result of calling `readHeaders`. 282 Data ID dictionary, as returned by `extractDataId`. 283 run : `~lsst.daf.butler.Run`, optional 284 Run to add the Dataset to; defaults to ``self.butler.run``. 298 transactional=
False, recursive=
True)
303 self.
butler.datastore.ingest(file, ref, formatter=self.
getFormatter(file, headers, dataId),
304 transfer=self.config.transfer)
308 """Ingest a single raw data file after extacting metadata. 310 This creates any new Exposure or Visit DataUnit entries needed to 311 identify the ingest file, creates a new Dataset entry in the 312 Registry and finally ingests the file itself into the Datastore. 313 Any needed Camera, Sensor, and PhysicalFilter DataUnit entries must 314 exist in the Registry before `run` is called. 318 file : `str` or path-like object 319 Absolute path to the file to be ingested. 324 with self.
butler.transaction():
328 except IngestConflictError:
329 if self.config.conflict ==
"fail":
331 if self.config.conflict ==
"ignore":
335 self.log.infof(
"Conflict on {} ({}); ingesting to stash '{}' instead.",
336 dataId, file, self.config.stash)
337 with self.
butler.transaction():
340 self.log.infof(
"Conflict on {} ({}); ignoring.", dataId, file)
344 """Return the Data ID dictionary that should be used to label a file. 348 file : `str` or path-like object 349 Absolute path to the file being ingested (prior to any transfers). 350 headers : `list` of `~lsst.daf.base.PropertyList` 351 All headers returned by `readHeaders()`. 356 Must include "camera", "sensor", and "exposure" keys. If the 357 Exposure is associated with a PhysicalFilter and/or Visit, 358 "physical_filter" and "visit" keys should be provided as well 361 raise NotImplementedError(
"Must be implemented by subclasses.")
365 """Create a Visit DataUnit entry from raw file metadata. 369 file : `str` or path-like object 370 Absolute path to the file being ingested (prior to any transfers). 371 headers : `list` of `~lsst.daf.base.PropertyList` 372 All headers returned by `readHeaders()`. 374 The data ID for this file. Implementations are permitted to 375 modify this dictionary (generally by stripping off "sensor" and 376 "exposure" and adding new metadata key-value pairs) and return it. 378 A dictionary containing other associated DataUnit entries. 379 Guaranteed to have "Camera", "Sensor", and "PhysicalFilter" keys, 380 but the last may map to ``None`` if `extractDataId` either did not 381 contain a "physical_filter" key or mapped it to ``None``. 382 Subclasses may add new keys to this dict to pass arbitrary data to 383 `extractExposureEntry` (`extractVisitEntry` is always called 384 first), but note that when a Visit is comprised of multiple 385 Exposures, `extractVisitEntry` may not be called at all. 390 Dictionary corresponding to an Visit database table row. 391 Must have all non-null columns in the Visit table as keys. 393 raise NotImplementedError(
"Must be implemented by subclasses.")
397 """Create an Exposure DataUnit entry from raw file metadata. 401 file : `str` or path-like object 402 Absolute path to the file being ingested (prior to any transfers). 403 headers : `list` of `~lsst.daf.base.PropertyList` 404 All headers returned by `readHeaders()`. 406 The data ID for this file. Implementations are permitted to 407 modify this dictionary (generally by stripping off "sensor" and 408 adding new metadata key-value pairs) and return it. 410 A dictionary containing other associated DataUnit entries. 411 Guaranteed to have "Camera", "Sensor", "PhysicalFilter", and 412 "Visit" keys, but the latter two may map to ``None`` if 413 `extractDataId` did not contain keys for these or mapped them to 414 ``None``. May also contain additional keys added by 420 Dictionary corresponding to an Exposure database table row. 421 Must have all non-null columns in the Exposure table as keys. 423 raise NotImplementedError(
"Must be implemented by subclasses.")
426 """Return the Formatter that should be used to read this file after 429 The default implementation returns None, which uses the formatter 430 configured for this DatasetType/StorageClass in the Butler. 436 """An intermediate base class of RawIngestTask for cameras that already 437 implement constructing a `afw.image.VisitInfo` object from raw data. 439 Subclasses must provide (at least) implementations of `extractDataId` and 440 the new `makeVisitInfo` method; the latter is used to provide concrete 441 implementations of `extractVisitEntry` and `extractExposureEntry`. 446 """Return an `afw.image.VisitInfo` object from the given header and ID. 450 headers : `list` of `~lsst.daf.base.PropertyList` 451 All headers returned by `readHeaders()`. 453 Integer ID to pass to the `VisitInfo` constructor. 455 raise NotImplementedError(
"Must be implemented by subclasses.")
458 """Create a Visit DataUnit entry from raw file metadata. 462 file : `str` or path-like object 463 Absolute path to the file being ingested (prior to any transfers). 464 headers : `list` of `~lsst.daf.base.PropertyList` 465 All headers returned by `readHeaders()`. 467 The data ID for this file. Implementations are permitted to 468 modify this dictionary (generally by stripping off "sensor" and 469 "exposure" and adding new metadata key-value pairs) and return it. 471 A dictionary containing other associated DataUnit entries. 472 Guaranteed to have "Camera", "Sensor", and "PhysicalFilter" keys, 473 but the last may map to ``None`` if `extractDataId` either did not 474 contain a "physical_filter" key or mapped it to ``None``. 475 Also adds a "VisitInfo" key containing an `afw.image.VisitInfo` 476 object for use by `extractExposureEntry`. 481 Dictionary corresponding to an Visit database table row. 482 Must have all non-null columns in the Visit table as keys. 484 visitInfo = self.
makeVisitInfo(headers, exposureId=dataId[
"exposure"])
485 associated[
"VisitInfo"] = visitInfo
487 del dataId[
"exposure"]
488 return makeVisitEntryFromVisitInfo(dataId, visitInfo)
491 """Create an Exposure DataUnit entry from raw file metadata. 495 file : `str` or path-like object 496 Absolute path to the file being ingested (prior to any transfers). 497 headers : `list` of `~lsst.daf.base.PropertyList` 498 All headers returned by `readHeaders()`. 500 The data ID for this file. Implementations are permitted to 501 modify this dictionary (generally by stripping off "sensor" and 502 adding new metadata key-value pairs) and return it. 504 A dictionary containing other associated DataUnit entries. 505 Guaranteed to have "Camera", "Sensor", "PhysicalFilter", and 506 "Visit" keys, but the latter two may map to ``None`` if 507 `extractDataId` did not contain keys for these or mapped them to 508 ``None``. May also contain additional keys added by 514 Dictionary corresponding to an Exposure database table row. 515 Must have all non-null columns in the Exposure table as keys. 518 visitInfo = associated[
"VisitInfo"]
520 visitInfo = self.
makeVisitInfo(headers, exposureId=dataId[
"exposure"])
522 return makeExposureEntryFromVisitInfo(dataId, visitInfo)
def readHeaders(self, file)
def extractExposureEntry(self, file, headers, dataId, associated)
def extractDataId(self, file, headers)
def __init__(self, config=None, butler, kwds)
def extractVisitEntry(self, file, headers, dataId, associated)
def extractVisitEntry(self, file, headers, dataId, associated)
def makeVisitInfo(self, headers, exposureId)
def ingestFile(self, file, headers, dataId, run=None)
def ensureDataUnits(self, file)
def extractExposureEntry(self, file, headers, dataId, associated)
def getFormatter(self, file, headers, dataId)
def processFile(self, file)